I am trying to Geocode a CSV file that contains the name of the location and a parsed out address which includes Address number, Street name, city, zip, country. I want to use GEOPY and ArcGIS Geocodes through Geopy.I wanted to create a code that loops through my csv of 5000+ entries and gives me the latitude and longitude in separate columns in my CSV. I want to use ArcGIS Geocoding service through Geopy. Can anyone provide me with a code to get started? Thanks!
Here is my script:
import csv
from geopy.geocoders import ArcGIS
geolocator = ArcGIS() # here some parameters are needed
with open('C:/Users/v-albaut/Desktop/Test_Geo.csv', 'rb') as csvinput:
with open('output.csv', 'w') as csvoutput:
output_fieldnames = ['Name','Address', 'Latitude', 'Longitude']
writer = csv.DictWriter(csvoutput, delimiter=',', fieldnames=output_fieldnames)
reader = csv.DictReader(csvinput)
for row in reader:
# here you have to replace the dict item by your csv column names
query = ','.join(str(x) for x in (row['Name'], row['Address']))
Address, (latitude, longitude) = geolocator.geocode(query)
# here is the writing section
output_row = {}
output_row['Name'] = Name
output_row['Address'] = Address
output_row['Latitude'] = Latitude
output_row['Longitude'] =Longitude
writer.writerow(output_row)
I've been using this script to do some batch-geocoding from .csv. It requires that one column contain the complete text address that you wish to geocode, and that one column be titled 'UniqueID', which has a unique identifier for each item in the .csv. It will also print out a list of any addresses that it failed to geocode. It also does a quick check to see if the zip code might be incorrect/throwing off the geocoding:
def main(path, filename):
# path to where your .csv lives, and the name of the csv.
import geopy
from geopy.geocoders import ArcGIS
import pandas as pd
Target_Addresses = pd.read_csv(path+'\\'+filename)
Target_Addresses['Lat'] = np.nan
Target_Addresses['Long'] = np.nan
Indexed_Targets = Target_Addresses.set_index('UniqueID')
geolocator = ArcGIS() #some parameters here
Fails = []
for index, row in Indexed_Targets.iterrows():
Address = row['Address']
Result = geolocator.geocode(Address)
if Result == None:
Result = geolocator.geocode(Address[:-7])
if Result == None:
Fails.append[Address]
else:
Indexed_Targets.set_value(index, 'Lat', Result.latitude)
Indexed_Targets.set_value(index, 'Long', Result.longitude)
else:
Indexed_Targets.set_value(index, 'Lat', Result.latitude)
Indexed_Targets.set_value(index, 'Long', Result.longitude)
for address in Fails:
print address
Indexed_Targets.to_csv(filename[:-4]+"_RESULTS.csv")
if __name__ == '__main__':
main(path, filename) # whatever these are for you...
This will output a new csv with "_RESULTS" (e.g., an input of 'addresses.csv' will output 'addresses_RESULTS.csv') with two new columns for 'Lat' and 'Long'.
this is just a beggining, tell me if that helps. It does not write to the csv but I'll edit my answer later if you need that part also
import csv
from geopy.geocoders import ArcGIS
geolocator = ArcGIS() #here some parameters are needed
with open('C:/Users/v-albaut/Desktop/Test_Geo.csv', 'rb') as csvinput:
with open('output.csv', 'w') as csvoutput:
output_fieldnames = ['Name','Address', 'Latitude', 'Longitude']
writer = csv.DictWriter(csvoutput, delimiter=',', fieldnames=output_fieldnames)
reader = csv.DictReader(csvinput)
for row in reader:
#here you have to replace the dict item by your csv column names
query = ','.join(str(x) for x in (row['Name'], row['Address']))
try:
address, (latitude, longitude) = geolocator.geocode(query)
except:
latitude = 'N/A'
longitude = 'N/A'
#here is the writing section
output_row = {}
output_row['Name'] = row['Name']
output_row['Address'] = row['Address']
output_row['Latitude'] = latitude
output_row['Longitude'] = longitude
writer.writerow(output_row)
doc:
- http://geopy.readthedocs.org/en/latest/#geopy.geocoders.ArcGIS
- https://developers.arcgis.com/rest/geocode/api-reference/overview-world-geocoding-service.htm
- https://docs.python.org/2/library/csv.html