I have the python code below that reads two csv files and finds matching rows. The csv files have a header row so I need to reset file2/reader2 to the second row on each iteration of reader1.
How开发者_如何学Go do I reset the position of file2/reader2 to the first data row to search for a match?
import csv
# Read in the data from the two CSV files
with open('file1.csv') as file1, open('file2.csv') as file2:
reader1 = csv.DictReader(file1)
reader2 = csv.DictReader(file2)
# Create an empty list to store the merged data
merged_rows = []
# Iterate over the rows in the first file
for row1 in reader1:
# Get the contact number from the current row
contact_no = row1['contact_no']
### file2 seek to start of second line to find matches again
# Iterate over the rows in the second file
for row2 in reader2:
# Check if the contact number in the current row
# of the second file matches the contact number
# from the current row of the first file
if row2['contact_no'] == contact_no:
# If the contact numbers match, merge the rows
# by adding the data from the second row to the
# first row
row1.update(row2)
# Add the merged row to the list of merged rows
merged_rows.append(row1)
# Write the merged rows to a new CSV file
with open('merged_file.csv', 'w') as out_file:
# Create a writer object and write the header row
writer = csv.DictWriter(out_file, fieldnames=row1.keys())
writer.writeheader()
# Write each of the merged rows to the output file
for row in merged_rows:
writer.writerow(row)
Unless file2.csv
is very large, I would load it in memory and keep it there.
...
reader2 = csv.DictReader(file2)
reader2_rows = list(reader2)
...
for row1 in reader1:
...
for row2 in reader2_rows:
...
Putting this into your code, it becomes:
import csv
# Read in the data from the two CSV files
with open('file1.csv') as file1, open('file2.csv') as file2:
reader1 = csv.DictReader(file1)
reader2 = csv.DictReader(file2)
# load reader2 rows into memory since we reuse them multiple times
reader2_rows = list(reader2)
# Create an empty list to store the merged data
merged_rows = []
# Iterate over the rows in the first file
for row1 in reader1:
# Get the contact number from the current row
contact_no = row1['contact_no']
### file2 seek to start of second line to find matches again
# Iterate over the rows in the second file
for row2 in reader2_rows:
# Check if the contact number in the current row
# of the second file matches the contact number
# from the current row of the first file
if row2['contact_no'] == contact_no:
# If the contact numbers match, merge the rows
# by adding the data from the second row to the
# first row
row1.update(row2)
# Add the merged row to the list of merged rows
merged_rows.append(row1)
# Write the merged rows to a new CSV file
with open('merged_file.csv', 'w') as out_file:
# Create a writer object and write the header row
writer = csv.DictWriter(out_file, fieldnames=row1.keys())
writer.writeheader()
# Write each of the merged rows to the output file
for row in merged_rows:
writer.writerow(row)
精彩评论