Search
gini
import sys
import os
import matplotlib.pyplot as plt
%pylab inline
Populating the interactive namespace from numpy and matplotlib
sys.path.append(os.path.abspath('..'))
import inequality
import libpysal
libpysal.examples.available()
['georgia',
 '__pycache__',
 'tests',
 'newHaven',
 'Polygon_Holes',
 'nat',
 'Polygon',
 '10740',
 'berlin',
 'rio_grande_do_sul',
 'sids2',
 'sacramento2',
 'burkitt',
 'arcgis',
 'calemp',
 'stl',
 'virginia',
 'geodanet',
 'desmith',
 'book',
 'nyc_bikes',
 'Line',
 'south',
 'snow_maps',
 'Point',
 'street_net_pts',
 'guerry',
 '__pycache__',
 'baltim',
 'networks',
 'us_income',
 'taz',
 'columbus',
 'tokyo',
 'mexico',
 '__pycache__',
 'chicago',
 'wmat',
 'juvenile',
 'clearwater']
libpysal.examples.explain('mexico')
{'name': 'mexico',
 'description': 'Decennial per capita incomes of Mexican states 1940-2000',
 'explanation': ['* mexico.csv: attribute data. (n=32, k=13)',
  '* mexico.gal: spatial weights in GAL format.',
  '* mexicojoin.shp: Polygon shapefile. (n=32)',
  'Data used in Rey, S.J. and M.L. Sastre Gutierrez. (2010) "Interregional inequality dynamics in Mexico." Spatial Economic Analysis, 5: 277-298.']}
import geopandas
pth = libpysal.examples.get_path("mexicojoin.shp")
gdf = geopandas.read_file(pth)

from libpysal.weights import Queen, Rook, KNN
%matplotlib inline
import matplotlib.pyplot as plt
ax = gdf.plot()
ax.set_axis_off()
gdf.head()
POLY_ID AREA CODE NAME PERIMETER ACRES HECTARES PCGDP1940 PCGDP1950 PCGDP1960 ... GR9000 LPCGDP40 LPCGDP50 LPCGDP60 LPCGDP70 LPCGDP80 LPCGDP90 LPCGDP00 TEST geometry
0 1 7.252751e+10 MX02 Baja California Norte 2040312.385 1.792187e+07 7252751.376 22361.0 20977.0 17865.0 ... 0.05 4.35 4.32 4.25 4.40 4.47 4.43 4.48 1.0 (POLYGON ((-113.1397171020508 29.0177764892578...
1 2 7.225988e+10 MX03 Baja California Sur 2912880.772 1.785573e+07 7225987.769 9573.0 16013.0 16707.0 ... 0.00 3.98 4.20 4.22 4.39 4.46 4.41 4.42 2.0 (POLYGON ((-111.2061233520508 25.8027763366699...
2 3 2.731957e+10 MX18 Nayarit 1034770.341 6.750785e+06 2731956.859 4836.0 7515.0 7621.0 ... -0.05 3.68 3.88 3.88 4.04 4.13 4.11 4.06 3.0 (POLYGON ((-106.6210784912109 21.5653114318847...
3 4 7.961008e+10 MX14 Jalisco 2324727.436 1.967200e+07 7961008.285 5309.0 8232.0 9953.0 ... 0.03 3.73 3.92 4.00 4.21 4.32 4.30 4.33 4.0 POLYGON ((-101.52490234375 21.85663986206055, ...
4 5 5.467030e+09 MX01 Aguascalientes 313895.530 1.350927e+06 546702.985 10384.0 6234.0 8714.0 ... 0.13 4.02 3.79 3.94 4.21 4.32 4.32 4.44 5.0 POLYGON ((-101.8461990356445 22.01176071166992...

5 rows × 35 columns

ax = gdf.plot(column='PCGDP1940',k=5,scheme='Quantiles',legend=True)
ax.set_axis_off()
#ax.set_title("PC GDP 1940")
plt.savefig('1940.png')
/home/serge/anaconda3/envs/libpysal/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
gini_1940 = inequality.gini.Gini(gdf['PCGDP1940'])
gini_1940.g
0.3537237117345285
decades = range(1940, 2010, 10)
decades
range(1940, 2010, 10)
ginis = [ inequality.gini.Gini(gdf["PCGDP%s"%decade]).g for decade in decades]
ginis
[0.3537237117345285,
 0.29644613439022827,
 0.2537183285655905,
 0.25513356494927303,
 0.24505338049421577,
 0.25181825879538217,
 0.2581130824882791]
inequality.gini.Gini_Spatial
inequality.gini.Gini_Spatial
regimes = gdf['HANSON98']
w = libpysal.weights.block_weights(regimes)
/home/serge/Dropbox/p/pysal/src/subpackages/libpysal/libpysal/weights/weights.py:170: UserWarning: The weights matrix is not fully connected. There are 5 components
  warnings.warn("The weights matrix is not fully connected. There are %d components" % self.n_components)
ax = gdf.plot(column='HANSON98', categorical=True)
#ax.set_title('Regions')
ax.set_axis_off()
plt.savefig('regions.png')
import numpy as np
np.random.seed(12345)
gs = inequality.gini.Gini_Spatial(gdf['PCGDP1940'],w)
gs.p_sim
0.01
gs_all = [ inequality.gini.Gini_Spatial(gdf["PCGDP%s"%decade], w) for decade in decades]
p_values = [gs.p_sim for gs in gs_all]
p_values
[0.04, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01]
wgs = [gs.wcg_share for gs in gs_all]
wgs
[0.2940179879590452,
 0.24885041274552472,
 0.21715641601961586,
 0.2212882581200239,
 0.20702733316567423,
 0.21270360014540865,
 0.2190953550725723]
bgs = [ 1 - wg for wg in wgs]
bgs
[0.7059820120409548,
 0.7511495872544753,
 0.7828435839803841,
 0.778711741879976,
 0.7929726668343258,
 0.7872963998545913,
 0.7809046449274277]
%pylab inline
Populating the interactive namespace from numpy and matplotlib
years = np.array(decades)
years
array([1940, 1950, 1960, 1970, 1980, 1990, 2000])
fig, ax1 = plt.subplots()
t = years
s1 = ginis
ax1.plot(t, s1, 'b-')
ax1.set_xlabel('Year')
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('Gini', color='b')
ax1.tick_params('y', colors='b')

ax2 = ax1.twinx()
s2 = bgs
ax2.plot(t, s2, 'r-.')
ax2.set_ylabel('Spatial Inequality Share', color='r')
ax2.tick_params('y', colors='r')

fig.tight_layout()

plt.savefig('share.png')