Automate Excel using openpyxl in Python

Automate Excel using openpyxl in Python

openpyxl: openpyxl is a Python library to read/write Excel 2010 xlsx/xlsm/xltx/xltm files.

Install openpyxl

pip install openpyxl

Example 1: Write into Cells where we are referring “test.xlsx” file and write data into two cells

from openpyxl import Workbook

workbook = Workbook()
sheet = workbook.active

sheet["A1"] = "myTesting One"
sheet["B1"] = "myTesting two"

workbook.save(filename="test.xlsx")

Example 2: let’s play with sheets in a “test.xlsx” workbook

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print sheet object
print(sheet)
#print sheet name
print(sheet.title)

Example 3: Value attribute returns actual value of a cell

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print value
print(sheet["A1"].value)

Example 4: Cell object which helps to retrieve value from specific range. It takes two arguments row and column the given example will refer “E10” in sheet where Column=”E” and Row Number 10.

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print value using cell object
print(sheet.cell(row=10,column=5).value)

Example 5: Importing data from a spreadsheet with different methods

Within specific range, in this example we will see a Range starting from “A1” and ending up till “D5” as shown below

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print value between range
print(sheet["A1:D5"].value)

Example 6: iterating entire column

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print value of entire column
print(sheet["A"].value)

Example 7: Iterating more than one column

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print more than one column
print(sheet["A:C"].value)

Example 8: Iterating specific Row

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print specific row
print(sheet["5"].value)

Example 9: Iterate rows between range

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active
#print rows between range
print(sheet["5:9"].value)

Let’s learn other ways to iterate row, column, iter_rows(), iter_cols() these are the methods and takes for arguments as follow, Note given examples will result to object not actual values. To return values we need to add another parameter which refers to boolean value “values_only“:

  • min_row
  • max_row
  • min_col
  • max_col

Using iter_rows():

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active

for row in sheet.iter_rows(min_row=1, max_row=2, min_col=1, max_col=3):
	print(row)

Output:

(<Cell 'Sheet 1'.A1>, <Cell 'Sheet 1'.B1>, <Cell 'Sheet 1'.C1>)
(<Cell 'Sheet 1'.A2>, <Cell 'Sheet 1'.B2>, <Cell 'Sheet 1'.C2>)

Reference:

Using iter_cols():

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active

for column in sheet.iter_cols(min_row=1, max_row=2, min_col=1, max_col=3):
	print(column)

Output:

(<Cell 'Sheet 1'.A1>, <Cell 'Sheet 1'.A2>)
(<Cell 'Sheet 1'.B1>, <Cell 'Sheet 1'.B2>)
(<Cell 'Sheet 1'.C1>, <Cell 'Sheet 1'.C2>)

Using values_only:

#Import library
from openpyxl import load_workbook
#load excel test
workbook = load_workbook(filename="test.xlsx")
#print sheetnames
print(workbook.sheetnames)
#take active sheet reference
sheet = workbook.active

for value in sheet.iter_rows(min_row=1, max_row=2, min_col=1, max_col=3, values_only=True):
	print(value)

Output:

Next: Create json from Excel data using Python

Leave a Reply

Your email address will not be published. Required fields are marked *