Source code for segregation.local.local_distortion

import geopandas as gpd

from .._base import MultiGroupIndex, SpatialExplicitIndex
from ..dynamics import compute_divergence_profiles


def _local_distortion(
    gdf, groups, metric="euclidean", network=None, distance_matrix=None, normalize=False
):
    """
    A segregation metric, using Kullback-Leiber (KL) divergence to quantify the
    difference in the population characteristics between (1) an area and (2) the total population.

    This function utilises the methodology proposed in
    Olteanu et al. (2019): 'Segregation through the multiscalar lens'. Which can be
    found here: https://doi.org/10.1073/pnas.1900192116

    Parameters
    ----------
    data : pandas.DataFrame or geopandas.GeoDataFrame, required
        dataframe or geodataframe if spatial index holding data for location of interest
    groups : list, required
        list of columns on dataframe holding population totals for each group
    metric : str (optional; 'euclidean' by default)
        Distance metric for calculating pairwise distances,
        Accepts any inputs to `scipy.spatial.distance.pdist`.
        Ignored if passing a network or distance matrix
    network: pandana.Network object (optional, None by default)
        A pandana Network object used to compute distance between observations
    distance_matrix: numpy.array (optional; None by default)
        numpy array of distances between observations in the dataset
    normalize: bool
        NOT YET IMPLEMENTED


    Returns
    ----------
    aux : geopandas.GeoDataFrame
        geodataframe of distortion coefficient values

    """
    # Store the observation index to return with the results
    geoms = gdf[gdf.geometry.name]
    centroids = gdf.geometry.centroid

    aux = compute_divergence_profiles(
        gdf=gdf,
        groups=groups,
        network=network,
        metric=metric,
        distance_matrix=distance_matrix,
    )
    # divergence --> distortion by summing at each location
    aux = gpd.GeoDataFrame(
        aux.groupby("observation").sum()[["divergence"]], geometry=geoms
    ).rename(columns={"divergence": "distortion"})
    if normalize:
        raise Exception("Not yet implemented")
        # Need to write a routine to determine the scaling factor... From the paper:

        # the maximum distortion coefficient in a theoretical extreme case of segregation.
        # Theoretically, the maximal-segregation distortion coefficient is achieved when sorting
        # the k groups into k ghettos, ordered by sizes, and then computing the coefficient for
        # the most isolated person in the smallest group

    return aux


[docs]class LocalDistortion(MultiGroupIndex, SpatialExplicitIndex):
    """Multigroup Local Distortion Coefficients.

    Parameters
    ----------
    data : pandas.DataFrame or geopandas.GeoDataFrame, required
        dataframe or geodataframe if spatial index holding data for location of interest
    groups : list, required
        list of columns on dataframe holding population totals for each group
    metric : str (optional; 'euclidean' by default)
        Distance metric for calculating pairwise distances,
        Accepts any inputs to `scipy.spatial.distance.pdist`.
        Ignored if passing a network or distance matrix
    network: pandana.Network object (optional; None by default)
        A pandana Network object used to compute distance between observations
    distance_matrix:
        numpy array of distances between observations in the dataset
    normalization:
        NOT YET IMPLEMENTED

    Attributes
    ----------
    statistics : pandas.Series
        KL Divergence coefficients
    core_data : a pandas DataFrame
        DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Olteanu et al. (2019): 'Segregation through the multiscalar lens'.  https://doi.org/10.1073/pnas.1900192116

    """

[docs]    def __init__(
        self,
        data,
        groups=None,
        metric="euclidean",
        network=None,
        distance_matrix=None,
        normalize=False,
        **kwargs
    ):
        """Init."""

        MultiGroupIndex.__init__(self, data, groups)
        SpatialExplicitIndex.__init__(self)

        aux = _local_distortion(
            self.data,
            self.groups,
            network=network,
            metric=metric,
            normalize=normalize,
            distance_matrix=distance_matrix,
        )

        self.statistics = aux["distortion"]
        self.data = aux
        self._function = _local_distortion