I am working in python trying to parse an excel file but also add two things tha
ID: 3821471 • Letter: I
Question
I am working in python trying to parse an excel file but also add two things that are not there. 1 being dayOfWeek and the other being latLong. Here is my code that I have tried moving around several times. It will parse the data but will not add dayOfWeek or latLong. Not all of the entries have a date and the dayOfWeek code is supposed to skip those but still provide latLong in the parsed output.
import csv
import datetime
# Put the full path to your CSV/Excel file here
MY_FILE = "c:\TestData\houston_crime_data_jan17_clearLake.csv"
def parse(raw_file, delimiter):
"""Parses a raw CSV file to a JSON-like object"""
# Setup an empty list
parsed_data = []
# Open CSV file, and safely close it when we're done
with open(raw_file, newline='') as opened_file:
# Read the CSV data
csv_data = csv.reader(opened_file, delimiter=delimiter)
# Skip over the first line of the file for the headers
field_labels = next(csv_data)
#print(', '.join(field_labels))
# Iterate over each row of the csv file, zip together field -> value
for row in csv_data:
parsed_data.append(dict(zip(field_labels, row)))
return parsed_data
def dayOfWeek(parsed_data):
for record in parsed_data:
date = record['Date']
year = date[-4:]
month = date[:date.index('/')]
day = date[date.index('/')+1:-5]
d = datetime.date(int(year),int(month),int(day))
dayOfWeek = d.weekday()
dayList = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
record['DayOfWeek']=dayList[dayOfWeek]
return daylist
def latLong(parsed_data):
from geopy.geocoders import Nominatim
geolocator = Nominatim()
for record in parsed_data:
street = record['StreetName']+' '+record['Type']
location = geolocator.geocode(street + " HOUSTON TX")
if location != None:
record['Y'] = location.latitude
record['X'] = location.longitude
else:
# set coordinates to 0,0 and then make sure to ignore those
record['Y'] = '0'
record['X'] = '0'
return X, Y
def main():
# Call our parse function and give it the needed parameters
new_data = parse(MY_FILE, ",")
# Let's see what the data looks like!
print (new_data)
if __name__ == "__main__":
main()
Here is the data that I am using from the excel file.
Explanation / Answer
There are small corrections in the code.
You are trying to parse the day in dayOfWeek method but returning the defined daylist in that, instead I modified to validate the date and find the day of week if the date is valid and append the data to the list, if date is not valid, directly appending data with out day of week.
In latLong method, you are trying to return X,Y which are not defined and they are the keys to store lat and long values, so I modified to append the lat and long values to the data and return the entire parsed data.
Now the code is working fine,
I made some changes and also I described the issues and mentioned the new changes in the corresponding comments please go through the code.
import csv
import datetime
import re
# Put the full path to your CSV/Excel file here
MY_FILE = "houston_crime_data_jan17_clearLake.csv"
def parse(raw_file, delimiter):
"""Parses a raw CSV file to a JSON-like object"""
# Setup an empty list
parsed_data = []
# Open CSV file, and safely close it when we're done
with open(raw_file) as opened_file:
# Read the CSV data
csv_data = csv.reader(opened_file, delimiter=delimiter)
# Skip over the first line of the file for the headers
field_labels = next(csv_data)
#print(', '.join(field_labels))
# Iterate over each row of the csv file, zip together field -> value
for row in csv_data:
parsed_data.append(dict(zip(field_labels, row)))
#print parsed_data
pass
return parsed_data
def dayOfWeek(parsed_data):
new_data = []
for record in parsed_data:
date = record['Date']
#print date
#print validate(date)
"""validate the date, if the date is valid then the day of
week should be found and appended to the record otherwise should be skipped."""
if(validate(date)):
year = date[-4:]
month = date[:date.index('/')]
day = date[date.index('/')+1:-5]
d = datetime.date(int(year),int(month),int(day))
dayOfWeek = d.weekday()
#print dayOfWeek
dayList = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
record['DayOfWeek']=dayList[dayOfWeek]
new_data.append(record)
#return daylist ### returning dayList, which is defined list of days in a week, should append DayOfWeek to the data and return.
#appending parsed data with or with out day of week record, and returning it.
return new_data
def validate(date_text):
"""validate date_text is actual date in format dd/mm/yyyy or not"""
try:
datetime.datetime.strptime(date_text, '%d/%m/%Y')
return 1
except ValueError:
return 0
def latLong(parsed_data):
from geopy.geocoders import Nominatim
geolocator = Nominatim()
new_data = []
for record in parsed_data:
street = record['StreetName']+' '+record['Type']
location = geolocator.geocode(street + " HOUSTON TX")
if location != None:
record['Y'] = location.latitude
record['X'] = location.longitude
else:
# set coordinates to 0,0 and then make sure to ignore those
record['Y'] = '0'
record['X'] = '0'
new_data.append(record) #appending the record with lat and long values
#return X, Y ##The X, Y are not defined, these are the keys being used to store the values of long and lat respectively
return new_data #returning the parsed_data after appending the lat and long values.
def main():
# Call our parse function and give it the needed parameters
new_data = parse(MY_FILE, ",")
# Let's see what the data looks like!
#print (new_data)
new_data = dayOfWeek(new_data)
#print (new_data)
latLong(new_data)
print (new_data)
if __name__ == "__main__":
main()
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.