Numpy Array Math

Straight forward python with a for loop

from math import sin,cos,atan2,sqrt,pi # import from MATH

# earth's mean radius = 6,371km

EARTHRADIUS = 6371.0

def getDistanceByHaversine(loc1, loc2):

'''Haversine formula - give coordinates as

(lat_decimal,lon_decimal) tuples'''

#

# Unpack our tuples

lat1, lon1 = loc1

lat2, lon2 = loc2

#

#

#

#### convert to radians ####

lon1 = lon1 * pi / 180.0

lon2 = lon2 * pi / 180.0

lat1 = lat1 * pi / 180.0

lat2 = lat2 * pi / 180.0

#

### haversine formula ####

dlon = lon2 - lon1

dlat = lat2 - lat1

a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2.0))**2

c = 2.0 * atan2(sqrt(a), sqrt(1.0-a))

km = EARTHRADIUS * c

return km

#lets create 5000 random lat lon pairs

import random

r=random.random

positions1 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]

positions2 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]

#Evaluate all 500 latlon pairs in a "for loop"

distances = [ getDistanceByHaversine(positions1[i], positions2[i] )\

for i in range( len( positions1 ) ) ]

Final line evaluates in 27.866ms

This is an example on how to vectorize your math using numpy. This example shows how little one has to change, to remove the for loop, and achcieve a tenfold speedup, on only 500 items.

Vectorized using numpy

from numpy import sin,cos,arctan2,sqrt,pi # import from numpy

# earth's mean radius = 6,371km

EARTHRADIUS = 6371.0

def getDistanceByHaversine(loc1, loc2):

'''Haversine formula - give coordinates as a 2D numpy array of

(lat_decimal,lon_decimal) pairs'''

#

# "unpack" our numpy array, this extracts column wise arrays

lat1 = loc1[:,0]

lon1 = loc1[:,1]

lat2 = loc2[:,0]

lon2 = loc2[:,1]

#

# convert to radians ##### Completely identical

lon1 = lon1 * pi / 180.0

lon2 = lon2 * pi / 180.0

lat1 = lat1 * pi / 180.0

lat2 = lat2 * pi / 180.0

#

# haversine formula #### Same, but atan2 named arctan2 in numpy

dlon = lon2 - lon1

dlat = lat2 - lat1

a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2.0))**2

c = 2.0 * arctan2(sqrt(a), sqrt(1.0-a))

km = EARTHRADIUS * c

return km

#lets create 5000 random lat lon pairs

import random

r=random.random

positions1 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]

positions2 = [ ( r()*360-180 , r()*180 ) for i in range(5000) ]

#Create 2D numpy arrays from regular arrays of tuples

npos1 = numpy.array( positions1 )

npos2 = numpy.array( positions2 )

#Evaluate all 500 latlon pairs without the for loop

distances = getDistanceByHaversine(npos1, npos2 )

Final line evaluates in 3.943ms