Source code for segregation.singlegroup.relative_clustering

"""Relative Clustering Index."""

__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"

import numpy as np
import pandas as pd
from ..util import generate_distance_matrix
from .._base import SingleGroupIndex, SpatialExplicitIndex


def _relative_clustering(data, group_pop_var, total_pop_var, alpha=0.6, beta=0.5):
    """Calculate Relative Clustering index.

    Parameters
    ----------
    data          : a geopandas DataFrame with a geometry column.
    group_pop_var : string
                    The name of variable in data that contains the population size of the group of interest
    total_pop_var : string
                    The name of variable in data that contains the total population of the unit
    alpha         : float
                    A parameter that estimates the extent of the proximity within the same unit. Default value is 0.6
    beta          : float
                    A parameter that estimates the extent of the proximity within the same unit. Default value is 0.5

    Returns
    ----------
    statistic : float
                Relative Clustering Index
    core_data : a geopandas DataFrame
                A geopandas DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Massey, Douglas S., and Nancy A. Denton. "The dimensions of residential segregation." Social forces 67.2 (1988): 281-315.

    The pairwise distance between unit i and itself is (alpha * area_of_unit_i) ^ beta.

    Reference: :cite:`massey1988dimensions`.

    """
    if alpha < 0:
        raise ValueError("alpha must be greater than zero.")

    if beta < 0:
        raise ValueError("beta must be greater than zero.")

    data = data.assign(
        xi=data[group_pop_var], yi=data[total_pop_var] - data[group_pop_var]
    )

    X = data.xi.sum()
    Y = data.yi.sum()

    dist = generate_distance_matrix(data)

    np.fill_diagonal(dist, val=np.exp(-((alpha * data.area.values) ** (beta))))

    c = 1 - dist.copy()  # proximity matrix
    Pxx = (data.xi.values * data.xi.values * c).sum() / (X ** 2)
    Pyy = (data.yi.values * data.yi.values * c).sum() / (Y ** 2)
    RCL = (Pxx / Pyy) - 1

    if np.isnan(RCL):
        raise ValueError(
            "It not possible to determine the distance between, at least, one pair of units. This is probably due to the magnitude of the number of the centroids. We recommend to reproject the geopandas DataFrame."
        )

    core_data = data[[group_pop_var, total_pop_var, data.geometry.name]]

    return RCL, core_data


[docs]class RelativeClustering(SingleGroupIndex, SpatialExplicitIndex): """Relative Clustering Index. Parameters ---------- data : pandas.DataFrame or geopandas.GeoDataFrame, required dataframe or geodataframe if spatial index holding data for location of interest group_pop_var : str, required name of column on dataframe holding population totals for focal group total_pop_var : str, required name of column on dataframe holding total overall population alpha : float A parameter that estimates the extent of the proximity within the same unit. Default value is 0.6 beta : float A parameter that estimates the extent of the proximity within the same unit. Default value is 0.5 Attributes ---------- statistic : float Relative Clustering Index core_data : a pandas DataFrame A pandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Massey, Douglas S., and Nancy A. Denton. "The dimensions of residential segregation." Social forces 67.2 (1988): 281-315. The pairwise distance between unit i and itself is (alpha * area_of_unit_i) ^ beta. Reference: :cite:`massey1988dimensions`. """
[docs] def __init__( self, data, group_pop_var, total_pop_var, alpha=0.6, beta=0.5, **kwargs, ): """Init.""" SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var) SpatialExplicitIndex.__init__(self,) self.alpha = alpha self.beta = beta aux = _relative_clustering( self.data, self.group_pop_var, self.total_pop_var, self.alpha, self.beta, ) self.statistic = aux[0] self.core_data = aux[1] self._function = _relative_clustering