Source code for segregation.multigroup.multi_gini
"""Multigroup Gini index"""
__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"
import numpy as np
from sklearn.metrics.pairwise import manhattan_distances
from geopandas import GeoDataFrame
from .._base import MultiGroupIndex, SpatialImplicitIndex
np.seterr(divide="ignore", invalid="ignore")
def _multi_gini_seg(data, groups):
"""Calculate Multigroup Gini Segregation index.
Parameters
----------
data : a pandas DataFrame
dataframe holding group data
groups : list of strings.
The variables names in data of the groups of interest of the analysis.
Returns
-------
statistic : float
Multigroup Gini Segregation Index
core_data : a pandas DataFrame
A pandas DataFrame that contains the columns used to perform the estimate.
Notes
-----
Based on Reardon, Sean F., and Glenn Firebaugh. "Measures of multigroup segregation." Sociological methodology 32.1 (2002): 33-67.
Reference: :cite:`reardon2002measures`.
"""
core_data = data[groups]
df = np.array(core_data)
K = df.shape[1]
T = df.sum()
ti = df.sum(axis=1)
pik = df / ti[:, None]
pik = np.nan_to_num(pik) # Replace NaN from zerodivision when unit has no population
Pk = df.sum(axis=0) / df.sum()
Is = (Pk * (1 - Pk)).sum()
elements_sum = np.empty(K)
for k in range(K):
aux = np.multiply(
np.outer(ti, ti), manhattan_distances(pik[:, k].reshape(-1, 1))
).sum()
elements_sum[k] = aux
multi_Gini_Seg = elements_sum.sum() / (2 * (T ** 2) * Is)
if isinstance(data, GeoDataFrame):
core_data = data[[data.geometry.name]].join(core_data)
return multi_Gini_Seg, core_data, groups
[docs]class MultiGini(MultiGroupIndex, SpatialImplicitIndex):
"""Multigroup Gini Index.
Parameters
----------
data : pandas.DataFrame or geopandas.GeoDataFrame, required
dataframe or geodataframe if spatial index holding data for location of interest
groups : list, required
list of columns on dataframe holding population totals for each group
w : libpysal.weights.KernelW, optional
lipysal spatial kernel weights object used to define an egohood
network : pandana.Network
pandana Network object representing the study area
distance : int
Maximum distance (in units of geodataframe CRS) to consider the extent of the egohood
decay : str
type of decay function to apply. Options include
precompute : bool
Whether to precompute the pandana Network object
Attributes
----------
statistic : float
Multigroup Dissimilarity Index value
core_data : a pandas DataFrame
DataFrame that contains the columns used to perform the estimate.
"""
[docs] def __init__(
self,
data,
groups,
w=None,
network=None,
distance=None,
decay='linear',
function='triangular',
precompute=False,
**kwargs
):
"""Init."""
MultiGroupIndex.__init__(self, data, groups)
if any([w, network, distance]):
SpatialImplicitIndex.__init__(self, w, network, distance, decay, function, precompute)
aux = _multi_gini_seg(self.data, self.groups)
self.statistic = aux[0]
self.data = aux[1]
self.groups = aux[2]
self._function = _multi_gini_seg