Source code for segregation.singlegroup.conprof

"""ConProf Segregation Index."""

__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"

import geopandas as gpd
import numpy as np
import pandas as pd

from .._base import SingleGroupIndex, SpatialImplicitIndex


def _conprof(data, group_pop_var, total_pop_var, m=1000):
    """Calculation of Concentration Profile.

    Parameters
    ----------
    data : pandas.DataFrame or geopandas.GeoDataFrame
        Dataframe or geodataframe if spatial index holding data for location of interest
    group_pop_var : string
        Variable containing the population count of the group of interest
    total_pop_var : string
        Variable in data that contains the total population count of the unit

    Returns
    ----------
    statistic : float
        MinMax index statistic value
    core_data : pandas.DataFrame
        A pandas DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Hong, Seong-Yun, and Yukio Sadahiro. "Measuring geographic segregation: a graph-based approach." Journal of Geographical Systems 16.2 (2014): 211-231.

    Reference: :cite:`hong2014measuring`.

    """
    if type(m) is not int:
        raise TypeError("m must be a string.")

    if m < 2:
        raise ValueError("m must be greater than 1.")

    x = np.array(data[group_pop_var])
    t = np.array(data[total_pop_var])

    if any(t < x):
        raise ValueError(
            "Group of interest population must equal or lower than the total population of the units."
        )

    def calculate_vt(th):
        g_t_i = np.where(x / t >= th, 1, 0)
        v_t = (g_t_i * x).sum() / x.sum()
        return v_t

    grid = np.linspace(0, 1, m)
    curve = np.array(list(map(calculate_vt, grid)))

    threshold = x.sum() / t.sum()
    R = (
        threshold
        - ((curve[grid < threshold]).sum() / m - (curve[grid >= threshold]).sum() / m)
    ) / (1 - threshold)

    return R, grid, curve, data


[docs]class ConProf(SingleGroupIndex, SpatialImplicitIndex): """ConProf Index. Parameters ---------- data : pandas.DataFrame or geopandas.GeoDataFrame, required dataframe or geodataframe if spatial index holding data for location of interest group_pop_var : str, required name of column on dataframe holding population totals for focal group total_pop_var : str, required name of column on dataframe holding total overall population w : libpysal.weights.KernelW, optional lipysal spatial kernel weights object used to define an egohood network : pandana.Network pandana Network object representing the study area distance : int Maximum distance (in units of geodataframe CRS) to consider the extent of the egohood decay : str type of decay function to apply. Options include precompute : bool Whether to precompute the pandana Network object Attributes ---------- statistic : float ConProf Index core_data : a pandas DataFrame A pandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Hong, Seong-Yun, and Yukio Sadahiro. "Measuring geographic segregation: a graph-based approach." Journal of Geographical Systems 16.2 (2014): 211-231. Reference: :cite:`hong2014measuring`. """
[docs] def __init__( self, data, group_pop_var, total_pop_var, w=None, network=None, distance=None, decay=None, function="triangular", precompute=None, **kwargs ): """Init.""" SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var) if any([w, network, distance]): SpatialImplicitIndex.__init__( self, w, network, distance, decay, function, precompute ) aux = _conprof(self.data, self.group_pop_var, self.total_pop_var) self.statistic = aux[0] self.grid = aux[1] self.curve = aux[2] self.core_data = aux[3] self._function = _conprof
[docs] def plot(self): """Plot the Concentration Profile.""" try: import matplotlib.pyplot as plt except ImportError: raise ImportError("plotting requires `matplotlib`") graph = plt.scatter(self.grid, self.curve, s=0.1) return graph