glm_speed
import numpy as np
import pandas as pd
from spint.gravity import Gravity, Production, Attraction, Doubly, BaseGravity
#from entropy import Unconstrained, ProductionConstrained, AttractionConstrained, DoublyConstrained
import statsmodels.formula.api as smf
from statsmodels.api import families
import matplotlib.pyplot as plt
%pylab inline
import time
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
elapsed = te-ts
return result, elapsed
return timed
@timeit
def gravity(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Gravity(f, o_vars, d_vars, dij, cost, framework=framework)
return results
@timeit
def production(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Production(f, o, d_vars, dij, 'exp', framework=framework)
return results
@timeit
def attraction(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Attraction(f, d, o_vars, dij, 'exp', framework=framework)
return results
@timeit
def doubly(f ,o, d, o_vars, d_vars, dij, cost='exp', framework='glm'):
results = Doubly(f, o, d, dij, 'exp', framework=framework)
return results
def sim_data(n):
o = np.tile(np.arange(n),n)
d = np.repeat(np.arange(n),n)
loc_size = np.random.randint(25000,500000, n)
o_vars = np.tile(loc_size, n)
d_vars = np.repeat(loc_size, n)
dij = np.random.exponential(2500, n**2)
f = o_vars**.3*d_vars**.4*np.exp(dij*-.00005)
o = np.reshape(o, (-1,1))
d = np.reshape(d, (-1,1))
o_vars = np.reshape(o_vars, (-1,1))
d_vars = np.reshape(d_vars, (-1,1))
dij = np.reshape(dij, (-1,1))
f = np.reshape(f, (-1,1))
f = f.astype(np.int64)
return f, o, d, o_vars, d_vars, dij
def loop(func, start, stop, step, framework='glm'):
results = []
for n in np.arange(start, stop, step):
f, o, d, o_vars, d_vars, dij = sim_data(n)
out, elapsed = func(f, o, d, o_vars, d_vars, dij, 'exp', framework=framework)
print(out.params[-2:])
results.append(elapsed)
return results
glm_grav = loop(gravity, 50, 250, 50)
glm_prod = loop(production, 50, 250, 50)
glm_att = loop(attraction, 50, 250, 50)
glm_doub = loop(doubly, 50, 250, 50)
smglm_grav = loop(gravity, 50, 250, 50, framework='sm_glm')
smglm_prod = loop(production, 50, 250, 50, framework='sm_glm')
smglm_att = loop(attraction, 50, 250, 50, framework='sm_glm')
smglm_doub = loop(doubly, 50, 250, 50, framework='sm_glm')
x = np.arange(50, 250, 50)
plt.plot(x, glm_grav, x, glm_prod, x, glm_att, x, glm_doub)
plt.legend(('unconstrained', 'production', 'attraction', 'doubly'))
plt.title('Custom GLM Framework')
plt.xlabel('Sample Size')
plt.ylabel('Seconds')
plt.plot(x, smglm_grav, x, smglm_prod, x, smglm_att, x, smglm_doub)
plt.legend(('unconstrained', 'production', 'attraction', 'doubly'))
plt.legend(('unconstrained', 'production', 'attraction', 'doubly'))
plt.title('Statsmodels GLM Framework')
plt.xlabel('Sample Size')
plt.ylabel('Seconds')
f, o, d, o_vars, d_vars, dij = sim_data(100)
test = Production(f, o, d_vars, dij, 'exp')
test.