Source code for segregation.multigroup.multi_diversity

"""Multigroup Diversity index"""

__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"

import numpy as np
from geopandas import GeoDataFrame

from .._base import MultiGroupIndex, SpatialImplicitIndex

np.seterr(divide="ignore", invalid="ignore")


def _multi_diversity(data, groups, normalized=False):
    """Calculate of Multigroup Diveristy Index

    Parameters
    ----------
    data   : a pandas DataFrame
    groups : list of strings.
             The variables names in data of the groups of interest of the analysis.

    Returns
    -------

    statistic  : float
                 Multigroup Diversity Index
    core_data  : a pandas DataFrame
                 A pandas DataFrame that contains the columns used to perform the estimate.
    normalized : bool. Default is False.
                 Wheter the resulting index will be divided by its maximum (natural log of the number of groups)

    Notes
    -----
    Based on Reardon, Sean F., and Glenn Firebaugh. "Measures of multigroup segregation." Sociological methodology 32.1 (2002): 33-67 and Theil, Henry. "Statistical decomposition analysis; with applications in the social and administrative sciences". No. 04; HA33, T4.. 1972.

    This is also know as Theil's Entropy Index (Equation 2 of page 37 of Reardon, Sean F., and Glenn Firebaugh. "Measures of multigroup segregation." Sociological methodology 32.1 (2002): 33-67)

    High diversity means less segregation.

    Reference: :cite:`reardon2002measures`.

    """

    core_data = data[groups]
    df = np.array(core_data)

    Pk = df.sum(axis=0) / df.sum()

    E = -(Pk * np.log(Pk)).sum()

    if normalized:
        K = df.shape[1]
        E = E / np.log(K)
    if isinstance(data, GeoDataFrame):
        core_data = data[[data.geometry.name]].join(core_data)
    return E, core_data, groups



[docs]
class MultiDiversity(MultiGroupIndex, SpatialImplicitIndex):
    """Multigroup Diversity Index.

    Parameters
    ----------
    data : pandas.DataFrame or geopandas.GeoDataFrame, required
        dataframe or geodataframe if spatial index holding data for location of interest
    groups : list, required
        list of columns on dataframe holding population totals for each group
    w : libpysal.weights.KernelW, optional
        lipysal spatial kernel weights object used to define an egohood
    network : pandana.Network
        pandana Network object representing the study area
    distance : int
        Maximum distance (in units of geodataframe CRS) to consider the extent of the egohood
    decay : str
        type of decay function to apply. Options include
    precompute : bool
        Whether to precompute the pandana Network object
    normalized : bool. Default is False.
        Whether the resulting index will be divided by its maximum (natural log of the number of groups)
    
    Attributes
    ----------
    statistic : float
        Multigroup Dissimilarity Index value
    core_data : a pandas DataFrame
        DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Reardon, Sean F., and Glenn Firebaugh. "Measures of multigroup segregation." Sociological methodology 32.1 (2002): 33-67 and Theil, Henry. "Statistical decomposition analysis; with applications in the social and administrative sciences". No. 04; HA33, T4.. 1972.

    This is also know as Theil's Entropy Index (Equation 2 of page 37 of Reardon, Sean F., and Glenn Firebaugh. "Measures of multigroup segregation." Sociological methodology 32.1 (2002): 33-67)

    High diversity means less segregation.

    Reference: :cite:`reardon2002measures`.
    """


[docs]
    def __init__(
        self,
        data,
        groups,
        w=None,
        normalized=False,
        network=None,
        distance=None,
        decay=None,
        precompute=None,
        function='triangular',
        **kwargs
    ):
        """Init."""
        MultiGroupIndex.__init__(self, data, groups)
        self.normalized = normalized
        if any([w, network, distance]):
            SpatialImplicitIndex.__init__(self, w, network, distance, decay, function, precompute)
        aux = _multi_diversity(self.data, self.groups, normalized=self.normalized)

        self.statistic = aux[0]
        self.data = aux[1]
        self.groups = aux[2]
        self._function = _multi_diversity