Source code for inequality.theil

"""Theil Inequality metrics"""

__author__ = "Sergio J. Rey <srey@sdsu.edu>"

import numpy

__all__ = ["Theil", "TheilD", "TheilDSim"]

SMALL = numpy.finfo("float").tiny



[docs]
class Theil:
    """
    Classic Theil measure of inequality.

    .. math::

        T = \\sum_{i=1}^n
            \\left( \\frac{y_i}{\\sum_{i=1}^n y_i} \\ln
                \\left[ N \\frac{y_i}{\\sum_{i=1}^n y_i}\\right]
            \\right
        )

    Parameters
    ----------

    y : numpy.array
        An array in the shape :math:`(n,t)` or :math:`(n,)`
        with :math:`n` taken as the observations across which inequality is
        calculated.  If ``y`` is :math:`(n,)` then a scalar inequality value is
        determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
        determined, one value for each column in ``y``.

    Attributes
    ----------

    T : numpy.array
        An array in the shape :math:`(t,)` or :math:`(1,)`
        containing Theil's *T* for each column of ``y``.

    Notes
    -----
    This computation involves natural logs. To prevent ``ln[0]`` from occurring, a
    small value is added to each element of ``y`` before beginning the computation.

    Examples
    --------

    >>> import libpysal
    >>> import numpy
    >>> from inequality.theil import Theil

    >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
    >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
    >>> y = numpy.array([f.by_col[v] for v in vnames]).T
    >>> theil_y = Theil(y)

    >>> theil_y.T
    array([0.20894344, 0.15222451, 0.10472941, 0.10194725, 0.09560113,
           0.10511256, 0.10660832])

    """


[docs]
    def __init__(self, y):
        n = len(y)
        y = y + SMALL * (y == 0)  # can't have 0 values
        yt = y.sum(axis=0)
        s = y / (yt * 1.0)
        lns = numpy.log(n * s)
        slns = s * lns
        t = sum(slns)
        self.T = t





[docs]
class TheilD:
    """Decomposition of Theil's *T* based on partitioning of
    observations into exhaustive and mutually exclusive groups.

    Parameters
    ----------

    y : numpy.array
        An array in the shape :math:`(n,t)` or :math:`(n,)`
        with :math:`n` taken as the observations across which inequality is
        calculated.  If ``y`` is :math:`(n,)` then a scalar inequality value is
        determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
        determined, one value for each column in ``y``.
    partition : numpy.array
        An array in the shape :math:`(n,)` of elements indicating which partition
        each observation belongs to. These are assumed to be exhaustive.

    Attributes
    ----------

    T : numpy.array
        An array in the shape :math:`(t,)` or :math:`(1,)`
        containing the global inequality *T*.
    bg : numpy.array
        An array in the shape :math:`(n,t)` or :math:`(n,)`
        representing between group inequality.
    wg : numpy.array
        An array in the shape :math:`(n,t)` or :math:`(n,)`
        representing within group inequality.

    Examples
    --------

    >>> import libpysal
    >>> import numpy
    >>> from inequality.theil import TheilD

    >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
    >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
    >>> y = numpy.array([f.by_col[v] for v in vnames]).T
    >>> regimes = numpy.array(f.by_col('hanson98'))
    >>> theil_d = TheilD(y, regimes)

    >>> theil_d.bg
    array([0.0345889 , 0.02816853, 0.05260921, 0.05931219, 0.03205257,
           0.02963731, 0.03635872])

    >>> theil_d.wg
    array([0.17435454, 0.12405598, 0.0521202 , 0.04263506, 0.06354856,
           0.07547525, 0.0702496 ])

    """


[docs]
    def __init__(self, y, partition):
        groups = numpy.unique(partition)
        T = Theil(y).T  # noqa N806
        ytot = y.sum(axis=0)

        # group totals
        gtot = numpy.array([y[partition == gid].sum(axis=0) for gid in groups])

        if ytot.size == 1:  # y is 1-d
            sg = gtot / (ytot * 1.0)
            sg.shape = (sg.size, 1)
        else:
            sg = numpy.dot(gtot, numpy.diag(1.0 / ytot))
        ng = numpy.array([sum(partition == gid) for gid in groups])
        ng.shape = (ng.size,)  # ensure ng is 1-d
        n = y.shape[0]
        # between group inequality
        sg = sg + SMALL * (sg == 0)  # can't have 0 values

        bg = numpy.multiply(sg, numpy.log(numpy.dot(numpy.diag(n * 1.0 / ng), sg))).sum(
            axis=0
        )

        self.T = T
        self.bg = bg
        self.wg = T - bg





[docs]
class TheilDSim:
    """Random permutation based inference on Theil's inequality decomposition.
    Provides for computationally based inference regarding the inequality
    decomposition using random spatial permutations.
    See :cite:`rey_interregional_2010`.

    Parameters
    ----------

    y : numpy.array
        An array in the shape :math:`(n,t)` or :math:`(n,)`
        with :math:`n` taken as the observations across which inequality is
        calculated.  If ``y`` is :math:`(n,)` then a scalar inequality value is
        determined. If ``y`` is :math:`(n,t)` then an array of inequality values are
        determined, one value for each column in ``y``.
    partition : numpy.array
        An array in the shape :math:`(n,)` of elements indicating which partition
        each observation belongs to. These are assumed to be exhaustive.
    permutations : int
        The number of random spatial permutations for computationally
        based inference on the decomposition.

    Attributes
    ----------

    observed : numpy.array
        An array in the shape :math:`(n,t)` or :math:`(n,)`
        representing a ``TheilD`` instance for the observed data.
    bg : numpy.array
        An array in the shape ``(permutations+1, t)``
        representing between group inequality.
    bg_pvalue : numpy.array
        An array in the shape :math:`(t,1)` representing the :math:`p`-value
        for the between group measure. Measures the percentage of the realized
        values that were greater than or equal to the observed ``bg`` value.
        Includes the observed value.
    wg : numpy.array
        An array in the shape ``(permutations+1)``
        representing within group inequality. Depending on the
        shape of ``y``, the array may be 1- or 2-dimensional.

    Examples
    --------

    >>> import libpysal
    >>> import numpy
    >>> from inequality.theil import TheilDSim

    >>> f = libpysal.io.open(libpysal.examples.get_path('mexico.csv'))
    >>> vnames = [f'pcgdp{dec}' for dec in range(1940, 2010, 10)]
    >>> y = numpy.array([f.by_col[v] for v in vnames]).T
    >>> regimes = numpy.array(f.by_col('hanson98'))
    >>> numpy.random.seed(10)
    >>> theil_ds = TheilDSim(y, regimes, 999)

    >>> theil_ds.bg_pvalue
    array([0.4  , 0.344, 0.001, 0.001, 0.034, 0.072, 0.032])

    """


[docs]
    def __init__(self, y, partition, permutations=99):
        observed = TheilD(y, partition)
        bg_ct = observed.bg == observed.bg  # already have one extreme value
        bg_ct = bg_ct * 1.0
        results = [observed]
        for _ in range(permutations):
            yp = numpy.random.permutation(y)
            t = TheilD(yp, partition)
            bg_ct += 1.0 * t.bg >= observed.bg
            results.append(t)
        self.results = results
        self.T = observed.T
        self.bg_pvalue = bg_ct / (permutations * 1.0 + 1)
        self.bg = numpy.array([r.bg for r in results])
        self.wg = numpy.array([r.wg for r in results])