Source code for segregation.singlegroup.absolute_clustering

"""Absolute Clustering Index."""

__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"

import numpy as np
import pandas as pd
from ..util import generate_distance_matrix
from .._base import SingleGroupIndex, SpatialExplicitIndex


def _absolute_clustering(data, group_pop_var, total_pop_var, alpha=0.6, beta=0.5):
    """Calculation of Absolute Clustering index

    Parameters
    ----------
    data : geopandas.GeoDataFrame
        a GeoDataFrame with a geometry column
    group_pop_var : string
        The name of variable in data that contains the population size of the
        group of interest
    total_pop_var : string
        The name of variable in data that contains the total population of the
        unit
    alpha : float
        A parameter that estimates the extent of the proximity within the same
        unit. Default value is 0.6
    beta : float
        A parameter that estimates the extent of the proximity within the same
        unit. Default value is 0.5

    Returns
    ----------
    statistic : float
        Absolute Clustering Index
    core_data : a geopandas DataFrame
        A geopandas DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Massey, Douglas S., and Nancy A. Denton. "The dimensions of residential segregation." Social forces 67.2 (1988): 281-315.

    The pairwise distance between unit i and itself is (alpha * area_of_unit_i) ^ beta.

    Reference: :cite:`massey1988dimensions`.

    """
    if alpha < 0:
        raise ValueError("alpha must be greater than zero.")

    if beta < 0:
        raise ValueError("beta must be greater than zero.")

    X = data[group_pop_var].values.sum()

    x = data[group_pop_var].values
    t = data[total_pop_var].values
    n = len(data)

    dist = generate_distance_matrix(data)
    np.fill_diagonal(dist, val=np.exp(-((alpha * data.area.values) ** (beta))))

    c = 1 - dist.copy()  # proximity matrix
    ACL = ((((x / X) * (c * x).sum(axis=1)).sum()) - ((X / n ** 2) * c.sum())) / (
        (((x / X) * (c * t).sum(axis=1)).sum()) - ((X / n ** 2) * c.sum())
    )

    core_data = data[[group_pop_var, total_pop_var, data.geometry.name]]

    return ACL, core_data


[docs]class AbsoluteClustering(SingleGroupIndex, SpatialExplicitIndex): """Absolute Clustering Index. Parameters ---------- data : pandas.DataFrame or geopandas.GeoDataFrame, required dataframe or geodataframe if spatial index holding data for location of interest group_pop_var : str, required name of column on dataframe holding population totals for focal group total_pop_var : str, required name of column on dataframe holding total overall population alpha : float A parameter that estimates the extent of the proximity within the same unit. Default value is 0.6 beta : float A parameter that estimates the extent of the proximity within the same unit. Default value is 0.5 Attributes ---------- statistic : float AbsolutecClustering Index core_data : a pandas DataFrame A pandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Massey, Douglas S., and Nancy A. Denton. "The dimensions of residential segregation." Social forces 67.2 (1988): 281-315. The pairwise distance between unit i and itself is (alpha * area_of_unit_i) ^ beta. Reference: :cite:`massey1988dimensions`. """
[docs] def __init__( self, data, group_pop_var, total_pop_var, alpha=0.6, beta=0.5, **kwargs, ): """Init.""" SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var) SpatialExplicitIndex.__init__(self,) self.alpha = alpha self.beta = beta aux = _absolute_clustering( self.data, self.group_pop_var, self.total_pop_var, self.alpha, self.beta, ) self.statistic = aux[0] self.core_data = aux[1] self._function = _absolute_clustering