"""Theil Inequality metrics"""
__author__ = "Sergio J. Rey <srey@sdsu.edu>"
import numpy
__all__ = ["Theil", "TheilD", "TheilDSim"]
SMALL = numpy.finfo("float").tiny
[docs]
class Theil:
"""
Classic Theil measure of inequality.
.. math::
T = \\sum_{i=1}^n
\\left( \\frac{y_i}{\\sum_{i=1}^n y_i} \\ln
\\left[ N \\frac{y_i}{\\sum_{i=1}^n y_i}\\right]
\\right
)
Parameters
----------
y : numpy.array
An array in the shape :math:`(n,t)` or :math:`(n,)`
with :math:`n` taken as the observations across which inequality is
calculated. If ``y`` is :math:`(n,)` then a scalar inequality value is
determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
determined, one value for each column in ``y``.
Attributes
----------
T : numpy.array
An array in the shape :math:`(t,)` or :math:`(1,)`
containing Theil's *T* for each column of ``y``.
Notes
-----
This computation involves natural logs. To prevent ``ln[0]`` from occurring, a
small value is added to each element of ``y`` before beginning the computation.
Examples
--------
>>> import libpysal
>>> import numpy
>>> from inequality.theil import Theil
>>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
>>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
>>> y = numpy.array([f.by_col[v] for v in vnames]).T
>>> theil_y = Theil(y)
>>> theil_y.T
array([0.20894344, 0.15222451, 0.10472941, 0.10194725, 0.09560113,
0.10511256, 0.10660832])
"""
[docs]
def __init__(self, y):
n = len(y)
y = y + SMALL * (y == 0) # can't have 0 values
yt = y.sum(axis=0)
s = y / (yt * 1.0)
lns = numpy.log(n * s)
slns = s * lns
t = sum(slns)
self.T = t
[docs]
class TheilD:
"""Decomposition of Theil's *T* based on partitioning of
observations into exhaustive and mutually exclusive groups.
Parameters
----------
y : numpy.array
An array in the shape :math:`(n,t)` or :math:`(n,)`
with :math:`n` taken as the observations across which inequality is
calculated. If ``y`` is :math:`(n,)` then a scalar inequality value is
determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
determined, one value for each column in ``y``.
partition : numpy.array
An array in the shape :math:`(n,)` of elements indicating which partition
each observation belongs to. These are assumed to be exhaustive.
Attributes
----------
T : numpy.array
An array in the shape :math:`(t,)` or :math:`(1,)`
containing the global inequality *T*.
bg : numpy.array
An array in the shape :math:`(n,t)` or :math:`(n,)`
representing between group inequality.
wg : numpy.array
An array in the shape :math:`(n,t)` or :math:`(n,)`
representing within group inequality.
Examples
--------
>>> import libpysal
>>> import numpy
>>> from inequality.theil import TheilD
>>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
>>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
>>> y = numpy.array([f.by_col[v] for v in vnames]).T
>>> regimes = numpy.array(f.by_col('hanson98'))
>>> theil_d = TheilD(y, regimes)
>>> theil_d.bg
array([0.0345889 , 0.02816853, 0.05260921, 0.05931219, 0.03205257,
0.02963731, 0.03635872])
>>> theil_d.wg
array([0.17435454, 0.12405598, 0.0521202 , 0.04263506, 0.06354856,
0.07547525, 0.0702496 ])
"""
[docs]
def __init__(self, y, partition):
groups = numpy.unique(partition)
T = Theil(y).T # noqa N806
ytot = y.sum(axis=0)
# group totals
gtot = numpy.array([y[partition == gid].sum(axis=0) for gid in groups])
if ytot.size == 1: # y is 1-d
sg = gtot / (ytot * 1.0)
sg.shape = (sg.size, 1)
else:
sg = numpy.dot(gtot, numpy.diag(1.0 / ytot))
ng = numpy.array([sum(partition == gid) for gid in groups])
ng.shape = (ng.size,) # ensure ng is 1-d
n = y.shape[0]
# between group inequality
sg = sg + SMALL * (sg == 0) # can't have 0 values
bg = numpy.multiply(sg, numpy.log(numpy.dot(numpy.diag(n * 1.0 / ng), sg))).sum(
axis=0
)
self.T = T
self.bg = bg
self.wg = T - bg
[docs]
class TheilDSim:
"""Random permutation based inference on Theil's inequality decomposition.
Provides for computationally based inference regarding the inequality
decomposition using random spatial permutations.
See :cite:`rey_interregional_2010`.
Parameters
----------
y : numpy.array
An array in the shape :math:`(n,t)` or :math:`(n,)`
with :math:`n` taken as the observations across which inequality is
calculated. If ``y`` is :math:`(n,)` then a scalar inequality value is
determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
determined, one value for each column in ``y``.
partition : numpy.array
An array in the shape :math:`(n,)` of elements indicating which partition
each observation belongs to. These are assumed to be exhaustive.
permutations : int
The number of random spatial permutations for computationally
based inference on the decomposition.
Attributes
----------
observed : numpy.array
An array in the shape :math:`(n,t)` or :math:`(n,)`
representing a ``TheilD`` instance for the observed data.
bg : numpy.array
An array in the shape ``(permutations+1, t)``
representing between group inequality.
bg_pvalue : numpy.array
An array in the shape :math:`(t,1)` representing the :math:`p`-value
for the between group measure. Measures the percentage of the realized
values that were greater than or equal to the observed ``bg`` value.
Includes the observed value.
wg : numpy.array
An array in the shape ``(permutations+1)``
representing within group inequality. Depending on the
shape of ``y``, the array may be 1- or 2-dimensional.
Examples
--------
>>> import libpysal
>>> import numpy
>>> from inequality.theil import TheilDSim
>>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
>>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
>>> y = numpy.array([f.by_col[v] for v in vnames]).T
>>> regimes = numpy.array(f.by_col('hanson98'))
>>> numpy.random.seed(10)
>>> theil_ds = TheilDSim(y, regimes, 999)
>>> theil_ds.bg_pvalue
array([0.4 , 0.344, 0.001, 0.001, 0.034, 0.072, 0.032])
"""
[docs]
def __init__(self, y, partition, permutations=99):
observed = TheilD(y, partition)
bg_ct = observed.bg == observed.bg # already have one extreme value
bg_ct = bg_ct * 1.0
results = [observed]
for _ in range(permutations):
yp = numpy.random.permutation(y)
t = TheilD(yp, partition)
bg_ct += 1.0 * t.bg >= observed.bg
results.append(t)
self.results = results
self.T = observed.T
self.bg_pvalue = bg_ct / (permutations * 1.0 + 1)
self.bg = numpy.array([r.bg for r in results])
self.wg = numpy.array([r.wg for r in results])