Numpy Array Math
Straight forward python with a for loop
from math import sin,cos,atan2,sqrt,pi # import from MATH
# earth's mean radius = 6,371km
EARTHRADIUS = 6371.0
def getDistanceByHaversine(loc1, loc2):
'''Haversine formula - give coordinates as
(lat_decimal,lon_decimal) tuples'''
#
# Unpack our tuples
lat1, lon1 = loc1
lat2, lon2 = loc2
#
#
#
#### convert to radians ####
lon1 = lon1 * pi / 180.0
lon2 = lon2 * pi / 180.0
lat1 = lat1 * pi / 180.0
lat2 = lat2 * pi / 180.0
#
### haversine formula ####
dlon = lon2 - lon1
dlat = lat2 - lat1
a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2.0))**2
c = 2.0 * atan2(sqrt(a), sqrt(1.0-a))
km = EARTHRADIUS * c
return km
#lets create 5000 random lat lon pairs
import random
r=random.random
positions1 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]
positions2 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]
#Evaluate all 500 latlon pairs in a "for loop"
distances = [ getDistanceByHaversine(positions1[i], positions2[i] )\
for i in range( len( positions1 ) ) ]
Final line evaluates in 27.866ms
This is an example on how to vectorize your math using numpy. This example shows how little one has to change, to remove the for loop, and achcieve a tenfold speedup, on only 500 items.
Vectorized using numpy
from numpy import sin,cos,arctan2,sqrt,pi # import from numpy
# earth's mean radius = 6,371km
EARTHRADIUS = 6371.0
def getDistanceByHaversine(loc1, loc2):
'''Haversine formula - give coordinates as a 2D numpy array of
(lat_decimal,lon_decimal) pairs'''
#
# "unpack" our numpy array, this extracts column wise arrays
lat1 = loc1[:,0]
lon1 = loc1[:,1]
lat2 = loc2[:,0]
lon2 = loc2[:,1]
#
# convert to radians ##### Completely identical
lon1 = lon1 * pi / 180.0
lon2 = lon2 * pi / 180.0
lat1 = lat1 * pi / 180.0
lat2 = lat2 * pi / 180.0
#
# haversine formula #### Same, but atan2 named arctan2 in numpy
dlon = lon2 - lon1
dlat = lat2 - lat1
a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2.0))**2
c = 2.0 * arctan2(sqrt(a), sqrt(1.0-a))
km = EARTHRADIUS * c
return km
#lets create 5000 random lat lon pairs
import random
r=random.random
positions1 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]
positions2 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]
#Create 2D numpy arrays from regular arrays of tuples
npos1 = numpy.array( positions1 )
npos2 = numpy.array( positions2 )
#Evaluate all 500 latlon pairs without the for loop
distances = getDistanceByHaversine(npos1, npos2 )
Final line evaluates in 3.943ms