gini

import sys
import os
import matplotlib.pyplot as plt
%pylab inline

Populating the interactive namespace from numpy and matplotlib

sys.path.append(os.path.abspath('..'))
import inequality
import libpysal

libpysal.examples.available()

['georgia',
 '__pycache__',
 'tests',
 'newHaven',
 'Polygon_Holes',
 'nat',
 'Polygon',
 '10740',
 'berlin',
 'rio_grande_do_sul',
 'sids2',
 'sacramento2',
 'burkitt',
 'arcgis',
 'calemp',
 'stl',
 'virginia',
 'geodanet',
 'desmith',
 'book',
 'nyc_bikes',
 'Line',
 'south',
 'snow_maps',
 'Point',
 'street_net_pts',
 'guerry',
 '__pycache__',
 'baltim',
 'networks',
 'us_income',
 'taz',
 'columbus',
 'tokyo',
 'mexico',
 '__pycache__',
 'chicago',
 'wmat',
 'juvenile',
 'clearwater']

libpysal.examples.explain('mexico')

{'name': 'mexico',
 'description': 'Decennial per capita incomes of Mexican states 1940-2000',
 'explanation': ['* mexico.csv: attribute data. (n=32, k=13)',
  '* mexico.gal: spatial weights in GAL format.',
  '* mexicojoin.shp: Polygon shapefile. (n=32)',
  'Data used in Rey, S.J. and M.L. Sastre Gutierrez. (2010) "Interregional inequality dynamics in Mexico." Spatial Economic Analysis, 5: 277-298.']}

import geopandas
pth = libpysal.examples.get_path("mexicojoin.shp")
gdf = geopandas.read_file(pth)

from libpysal.weights import Queen, Rook, KNN

%matplotlib inline
import matplotlib.pyplot as plt

ax = gdf.plot()
ax.set_axis_off()

gdf.head()

	POLY_ID	AREA	CODE	NAME	PERIMETER	ACRES	HECTARES	PCGDP1940	PCGDP1950	PCGDP1960	...	GR9000	LPCGDP40	LPCGDP50	LPCGDP60	LPCGDP70	LPCGDP80	LPCGDP90	LPCGDP00	TEST	geometry
0	1	7.252751e+10	MX02	Baja California Norte	2040312.385	1.792187e+07	7252751.376	22361.0	20977.0	17865.0	...	0.05	4.35	4.32	4.25	4.40	4.47	4.43	4.48	1.0	(POLYGON ((-113.1397171020508 29.0177764892578...
1	2	7.225988e+10	MX03	Baja California Sur	2912880.772	1.785573e+07	7225987.769	9573.0	16013.0	16707.0	...	0.00	3.98	4.20	4.22	4.39	4.46	4.41	4.42	2.0	(POLYGON ((-111.2061233520508 25.8027763366699...
2	3	2.731957e+10	MX18	Nayarit	1034770.341	6.750785e+06	2731956.859	4836.0	7515.0	7621.0	...	-0.05	3.68	3.88	3.88	4.04	4.13	4.11	4.06	3.0	(POLYGON ((-106.6210784912109 21.5653114318847...
3	4	7.961008e+10	MX14	Jalisco	2324727.436	1.967200e+07	7961008.285	5309.0	8232.0	9953.0	...	0.03	3.73	3.92	4.00	4.21	4.32	4.30	4.33	4.0	POLYGON ((-101.52490234375 21.85663986206055, ...
4	5	5.467030e+09	MX01	Aguascalientes	313895.530	1.350927e+06	546702.985	10384.0	6234.0	8714.0	...	0.13	4.02	3.79	3.94	4.21	4.32	4.32	4.44	5.0	POLYGON ((-101.8461990356445 22.01176071166992...

5 rows × 35 columns

ax = gdf.plot(column='PCGDP1940',k=5,scheme='Quantiles',legend=True)
ax.set_axis_off()
#ax.set_title("PC GDP 1940")
plt.savefig('1940.png')

/home/serge/anaconda3/envs/libpysal/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval

gini_1940 = inequality.gini.Gini(gdf['PCGDP1940'])

gini_1940.g

0.3537237117345285

decades = range(1940, 2010, 10)
decades

range(1940, 2010, 10)

ginis = [ inequality.gini.Gini(gdf["PCGDP%s"%decade]).g for decade in decades]

ginis

[0.3537237117345285,
 0.29644613439022827,
 0.2537183285655905,
 0.25513356494927303,
 0.24505338049421577,
 0.25181825879538217,
 0.2581130824882791]

inequality.gini.Gini_Spatial

inequality.gini.Gini_Spatial

regimes = gdf['HANSON98']

w = libpysal.weights.block_weights(regimes)

/home/serge/Dropbox/p/pysal/src/subpackages/libpysal/libpysal/weights/weights.py:170: UserWarning: The weights matrix is not fully connected. There are 5 components
  warnings.warn("The weights matrix is not fully connected. There are %d components" % self.n_components)

ax = gdf.plot(column='HANSON98', categorical=True)
#ax.set_title('Regions')
ax.set_axis_off()
plt.savefig('regions.png')

import numpy as np
np.random.seed(12345)
gs = inequality.gini.Gini_Spatial(gdf['PCGDP1940'],w)

gs.p_sim

0.01

gs_all = [ inequality.gini.Gini_Spatial(gdf["PCGDP%s"%decade], w) for decade in decades]

p_values = [gs.p_sim for gs in gs_all]

p_values

[0.04, 0.01, 0.01, 0.01, 0.02, 0.01, 0.01]

wgs = [gs.wcg_share for gs in gs_all]

wgs

[0.2940179879590452,
 0.24885041274552472,
 0.21715641601961586,
 0.2212882581200239,
 0.20702733316567423,
 0.21270360014540865,
 0.2190953550725723]

bgs = [ 1 - wg for wg in wgs]

bgs

[0.7059820120409548,
 0.7511495872544753,
 0.7828435839803841,
 0.778711741879976,
 0.7929726668343258,
 0.7872963998545913,
 0.7809046449274277]

%pylab inline

Populating the interactive namespace from numpy and matplotlib

years = np.array(decades)

years

array([1940, 1950, 1960, 1970, 1980, 1990, 2000])

fig, ax1 = plt.subplots()
t = years
s1 = ginis
ax1.plot(t, s1, 'b-')
ax1.set_xlabel('Year')
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('Gini', color='b')
ax1.tick_params('y', colors='b')

ax2 = ax1.twinx()
s2 = bgs
ax2.plot(t, s2, 'r-.')
ax2.set_ylabel('Spatial Inequality Share', color='r')
ax2.tick_params('y', colors='r')

fig.tight_layout()

plt.savefig('share.png')