"""Gini Segregation Index."""
__author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"
import geopandas as gpd
import numpy as np
from .._base import SingleGroupIndex, SpatialImplicitIndex
try:
from numba import njit, jit, prange, boolean
except (ImportError, ModuleNotFoundError):
def jit(*dec_args, **dec_kwargs):
"""
decorator mimicking numba.jit
"""
def intercepted_function(f, *f_args, **f_kwargs):
return f
return intercepted_function
njit = jit
prange = range
boolean = bool
@njit(parallel=True, fastmath=True,)
def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
"""Memory efficient calculation of Gini
Parameters
----------
pi : np.ndarray
area minority population counts
ti : np.ndarray
area total population counts
Returns
----------
implicit: float
Gini coefficient
"""
n = ti.shape[0]
num = np.zeros(1)
T = ti.sum()
P = pi.sum() / T
pi = np.where(ti == 0, 0, pi / ti)
T = ti.sum()
for i in prange(n-1):
num += (ti[i] * ti[i+1:] * np.abs(pi[i] - pi[i+1:])).sum()
num *= 2
den = (2 * T * T * P * (1-P))
return (num / den)[0]
def _gini_seg(data, group_pop_var, total_pop_var):
"""Calculate Gini segregation index.
Parameters
----------
data : pandas.DataFrame or geopandas.GeoDataFrame
Dataframe or geodataframe if spatial index holding data for location of interest
group_pop_var : string
Variable containing the population count of the group of interest
total_pop_var : string
Variable in data that contains the total population count of the unit
Returns
----------
statistic : float
MinMax index statistic value
core_data : pandas.DataFrame
A pandas DataFrame that contains the columns used to perform the estimate.
Notes
-----
Based on Massey, Douglas S., and Nancy A. Denton. "The dimensions of residential segregation." Social forces 67.2 (1988): 281-315.
Reference: :cite:`massey1988dimensions`.
"""
# If a unit has zero population, the group of interest frequency is zero
data = data.assign(
ti=data[total_pop_var],
pi=np.where(
data[total_pop_var] == 0, 0, data[group_pop_var] / data[total_pop_var]
),
)
pi = data[group_pop_var].values
ti = data[total_pop_var].values
G = _gini_vecp(pi, ti)
if not isinstance(data, gpd.GeoDataFrame):
data = data[[group_pop_var, total_pop_var]]
else:
data = data[[group_pop_var, total_pop_var, data.geometry.name]]
return G, data
[docs]
class Gini(SingleGroupIndex, SpatialImplicitIndex):
"""Gini Index.
Parameters
----------
data : pandas.DataFrame or geopandas.GeoDataFrame, required
dataframe or geodataframe if spatial index holding data for location of interest
group_pop_var : str, required
name of column on dataframe holding population totals for focal group
total_pop_var : str, required
name of column on dataframe holding total overall population
w : libpysal.weights.KernelW, optional
lipysal spatial kernel weights object used to define an egohood
network : pandana.Network
pandana Network object representing the study area
distance : int
Maximum distance (in units of geodataframe CRS) to consider the extent of the egohood
decay : str
type of decay function to apply. Options include
precompute : bool
Whether to precompute the pandana Network object
Attributes
----------
statistic : float
Gini Index
core_data : a pandas DataFrame
A pandas DataFrame that contains the columns used to perform the estimate.
Notes
-----
Based on Massey, Douglas S., and Nancy A. Denton. "The dimensions of residential segregation." Social forces 67.2 (1988): 281-315.
Reference: :cite:`massey1988dimensions`.
"""
[docs]
def __init__(
self,
data,
group_pop_var,
total_pop_var,
w=None,
network=None,
distance=None,
decay=None,
function="triangular",
precompute=None,
**kwargs
):
"""Init."""
SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var)
if any([w, network, distance]):
SpatialImplicitIndex.__init__(
self, w, network, distance, decay, function, precompute
)
aux = _gini_seg(self.data, self.group_pop_var, self.total_pop_var)
self.statistic = aux[0]
self.data = aux[1]
self._function = _gini_seg