sparse_vs_dense_grav
import numpy as np
import pandas as pd
from gravity import Gravity, Production, Attraction, Doubly, BaseGravity
from entropy import Unconstrained, ProductionConstrained, AttractionConstrained, DoublyConstrained
import statsmodels.formula.api as smf
from statsmodels.api import families
import matplotlib.pyplot as plt
%pylab inline
import time
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
elapsed = te-ts
#print '%2.8f sec' % \
#(elapsed)
return result, elapsed
return timed
@timeit
def gravity(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Gravity(f, o_vars, d_vars, dij, cost, framework=framework)
return results
@timeit
def production(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Production(f, o, d_vars, dij, 'exp', framework=framework)
return results
@timeit
def attraction(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Attraction(f, d, o_vars, dij, 'exp', framework=framework)
return results
@timeit
def doubly(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Doubly(f, o, d, dij, 'exp', framework=framework)
return results
def sim_data(n):
o = np.tile(np.arange(n),n)
d = np.repeat(np.arange(n),n)
loc_size = np.random.randint(25000,500000, n)
o_vars = np.tile(loc_size, n)
d_vars = np.repeat(loc_size, n)
dij = np.random.exponential(2500, n**2)
f = o_vars**.3*d_vars**.4*np.exp(dij*-.00005)
o = np.reshape(o, (-1,1))
d = np.reshape(d, (-1,1))
o_vars = np.reshape(o_vars, (-1,1))
d_vars = np.reshape(d_vars, (-1,1))
dij = np.reshape(dij, (-1,1))
f = np.reshape(f, (-1,1))
f = f.astype(np.int64)
return f, o, d, o_vars, d_vars, dij
def loop(func, start, stop, step, framework='glm'):
results = []
for n in np.arange(start, stop, step):
f, o, d, o_vars, d_vars, dij = sim_data(n)
out, elapsed = func(f, o, d, o_vars, d_vars, dij, 'exp', framework=framework)
print out.params[-2:]
results.append(elapsed)
return results
#copy results over from notebook in sparse branch
sp_grav = np.array([0.009379863739013672,
0.010191917419433594,
0.013580083847045898,
0.02336597442626953,
0.023818016052246094,
0.03525900840759277,
0.03714299201965332,
0.047389984130859375,
0.055455923080444336,
0.07191991806030273,
0.07992720603942871,
0.10130095481872559,
0.10898685455322266,
0.1385822296142578,
0.15839409828186035,
0.18109512329101562,
0.19623899459838867,
0.2373828887939453,
0.24484801292419434])
sp_prod = np.array([0.008792877197265625,
0.012892961502075195,
0.018072128295898438,
0.02672410011291504,
0.036765098571777344,
0.05049705505371094,
0.06424093246459961,
0.08189702033996582,
0.1013631820678711,
0.1273641586303711,
0.15144896507263184,
0.17901921272277832,
0.2195439338684082,
0.2700672149658203,
0.2963399887084961,
0.332568883895874,
0.3813331127166748,
0.44652509689331055,
0.4651219844818115])
sp_att = np.array([0.00892496109008789,
0.01181483268737793,
0.017676830291748047,
0.025489091873168945,
0.03578901290893555,
0.04750704765319824,
0.06312990188598633,
0.07956314086914062,
0.09888005256652832,
0.11808300018310547,
0.14717316627502441,
0.175645112991333,
0.20492291450500488,
0.23653817176818848,
0.2717468738555908,
0.2954838275909424,
0.3636939525604248,
0.4200310707092285,
0.43506598472595215])
sp_doub = np.array([0.01020193099975586,
0.015948057174682617,
0.025825023651123047,
0.03924298286437988,
0.05489301681518555,
0.08008694648742676,
0.10814595222473145,
0.14121484756469727,
0.18188691139221191,
0.2283778190612793,
0.2879500389099121,
0.3248469829559326,
0.3642420768737793,
0.44018101692199707,
0.506378173828125,
0.5851249694824219,
0.6679189205169678,
0.7664568424224854,
0.8878300189971924])
grav = loop(gravity, 25, 500, 25)
prod = loop(production, 25, 500, 25)
att = loop(attraction, 25, 500, 25)
doub = loop(doubly, 25, 500, 25)
x = np.arange(25, 500, 25)
plt.plot(x, grav, x, sp_grav)
plt.legend(('dense', 'sparse'))
plt.title('Sparse vs Dense GLM Unconstrained Gravity Model')
plt.xlabel('Sample Size')
plt.ylabel('Seconds')
plt.plot(x, prod, x, sp_prod)
plt.legend(('dense', 'sparse'))
plt.title('Sparse vs Dense GLM Production-Constrained Gravity Model')
plt.xlabel('Sample Size')
plt.ylabel('Seconds')
plt.plot(x, att, x, sp_att)
plt.legend(('dense', 'sparse'))
plt.title('Sparse vs Dense GLM Attraction-Constrained Gravity Model')
plt.xlabel('Sample Size')
plt.ylabel('Seconds')
plt.plot(x, doub, x, sp_doub)
plt.legend(('dense', 'sparse'))
plt.title('Sparse vs Dense GLM Doubly-Constrained Gravity Model')
plt.xlabel('Sample Size')
plt.ylabel('Seconds')