Source code for segregation.singlegroup.spatial_dissim

"""Spatial Dissimilarity Index."""

__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"

import libpysal
import numpy as np
from libpysal.weights import Queen

from .._base import SingleGroupIndex, SpatialExplicitIndex
from .dissim import _dissim


def _spatial_dissim(data, group_pop_var, total_pop_var, w=None, standardize=False):
    """Calculate of Spatial Dissimilarity index.

    Parameters
    ----------
    data : a geopandas DataFrame with a geometry column.
    group_pop_var : string
        The name of variable in data that contains the population size of the group of interest
    total_pop_var : string
        The name of variable in data that contains the total population of the unit
    w : W
        A PySAL weights object. If not provided, Queen contiguity matrix is used.
    standardize  : boolean
        A condition for row standardisation of the weights matrices. If True, the values of cij in the formulas gets row standardized.
        For the sake of comparison, the seg R package of Hong, Seong-Yun, David O'Sullivan, and Yukio Sadahiro. "Implementing spatial segregation measures in R." PloS one 9.11 (2014): e113767.
        works by default with row standardization.

    Returns
    ----------
    statistic : float
        Spatial Dissimilarity Index
    core_data : a geopandas DataFrame
        A geopandas DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Morrill, R. L. (1991) "On the Measure of Geographic Segregation". Geography Research Forum.

    Reference: :cite:`morrill1991measure`.

    """
    if type(standardize) is not bool:
        raise TypeError("std is not a boolean object")

    if w is None:
        w_object = Queen.from_dataframe(data)
    else:
        w_object = w

    if not issubclass(type(w_object), libpysal.weights.W):
        raise TypeError("w is not a PySAL weights object")

    D = _dissim(data, group_pop_var, total_pop_var)[0]

    x = np.array(data[group_pop_var])
    t = np.array(data[total_pop_var])

    # If a unit has zero population, the group of interest frequency is zero
    pi = np.where(t == 0, 0, x / t)

    if not standardize:
        cij = w_object.sparse.toarray()
    else:
        cij = w_object.sparse.toarray()
        cij = cij / cij.sum(axis=1).reshape((cij.shape[0], 1))

    # Inspired in (second solution): https://stackoverflow.com/questions/22720864/efficiently-calculating-a-euclidean-distance-matrix-using-numpy
    # Distance Matrix
    abs_dist = abs(pi[..., np.newaxis] - pi)

    # manhattan_distances used to compute absolute distances
    num = np.multiply(abs_dist, cij).sum()
    den = cij.sum()
    SD = D - num / den
    SD

    core_data = data[[group_pop_var, total_pop_var, data.geometry.name]]

    return SD, core_data


[docs]class SpatialDissim(SingleGroupIndex, SpatialExplicitIndex): """Spatial Dissimilarity Index. Parameters ---------- data : pandas.DataFrame or geopandas.GeoDataFrame, required dataframe or geodataframe if spatial index holding data for location of interest group_pop_var : str, required name of column on dataframe holding population totals for focal group total_pop_var : str, required name of column on dataframe holding total overall population w: libpysal.weights.W pysal spatial weights object measuring connectivity between geographic units. If nNne, a Queen object will be created standardize : boolean A condition for row standardisation of the weights matrices. If True, the values of cij in the formulas gets row standardized. For the sake of comparison, the seg R package of Hong, Seong-Yun, David O'Sullivan, and Yukio Sadahiro. "Implementing spatial segregation measures in R." PloS one 9.11 (2014): e113767. works by default with row standardization. Attributes ---------- statistic : float SpatialDissim Index core_data : a pandas DataFrame A pandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Morrill, R. L. (1991) "On the Measure of Geographic Segregation". Geography Research Forum. Reference: :cite:`morrill1991measure`. """
[docs] def __init__( self, data, group_pop_var, total_pop_var, w=None, standardize=False, **kwargs ): """Init.""" SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var) SpatialExplicitIndex.__init__(self,) self.w = w self.standardize = standardize aux = _spatial_dissim( self.data, self.group_pop_var, self.total_pop_var, self.w, self.standardize ) self.statistic = aux[0] self.core_data = aux[1] self._function = _spatial_dissim