Source code for segregation.dynamics.segregation_profile

"""Compute multiscalar segregation profiles."""

import warnings

import numpy as np
import pandas as pd
from libpysal.weights import Kernel
from pyproj.crs import CRS


[docs]def compute_multiscalar_profile(
    gdf,
    segregation_index=None,
    groups=None,
    group_pop_var=None,
    total_pop_var=None,
    distances=None,
    network=None,
    decay="linear",
    function="triangular",
    precompute=True,
    **kwargs
):
    """Compute multiscalar segregation profile.

    This function calculates several Spatial Information Theory indices with
    increasing distance parameters.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        geodataframe with rows as observations and columns as population
        variables. Note that if using a network distance, the coordinate
        system for this gdf should be 4326. If using euclidian distance,
        this must be projected into planar coordinates like state plane or UTM.
    segregation_index : SpatialImplicit SegregationIndex Class
        a class from the library such as MultiInformationTheory, or MinMax
    groups : list
        list of population groups for calculating multigroup indices
    group_pop_var : str
        name of population group on gdf for calculating single group indices
    total_pop_var : str
        bame of total population on gdf for calculating single group indices
    distances : list
        list of floats representing bandwidth distances that define a local
        environment.
    network : pandana.Network (optional)
        A pandana.Network likely created with
        `segregation.network.get_osm_network`.
    decay : str (optional)
        decay type to be used in pandana accessibility calculation
        options are {'linear', 'exp', 'flat'}. The default is 'linear'.
    function: 'str' (optional)
        which weighting function should be passed to libpysal.weights.Kernel
        must be one of: 'triangular','uniform','quadratic','quartic','gaussian'
    precompute: bool
        Whether the pandana.Network instance should precompute the range
        queries. This is True by default
    **kwargs : dict
        additional keyword arguments passed to each index (e.g. for setting a random
        seed in indices like ModifiedGini or ModifiedDissm)


    Returns
    -------
    pandas.Series
        Series with distances as index and index statistics as values

    Notes
    -----
    Based on Sean F. Reardon, Stephen A. Matthews, David O’Sullivan, Barrett A. Lee, Glenn Firebaugh, Chad R. Farrell, & Kendra Bischoff. (2008). The Geographic Scale of Metropolitan Racial Segregation. Demography, 45(3), 489–514. https://doi.org/10.1353/dem.0.0019.

    Reference: :cite:`reardon2008`.

    """
    if not segregation_index:
        raise ValueError("You must pass a segregation SpatialImplicit Index Class")
    gdf = gdf.copy()
    indices = {}

    if groups:
        gdf[groups] = gdf[groups].astype(float)
        indices[0] = segregation_index(gdf, groups=groups, **kwargs).statistic
    elif group_pop_var:
        indices[0] = segregation_index(
            gdf, group_pop_var=group_pop_var, total_pop_var=total_pop_var, **kwargs
        ).statistic

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        if network:
            if not gdf.crs.equals(CRS(4326)):
                gdf = gdf.to_crs(epsg=4326)
            if precompute:
                maxdist = max(distances)
                network.precompute(maxdist)
            for distance in distances:
                distance = np.float(distance)
                if group_pop_var:
                    idx = segregation_index(
                        gdf,
                        group_pop_var=group_pop_var,
                        total_pop_var=total_pop_var,
                        network=network,
                        decay=decay,
                        distance=distance,
                        precompute=False,
                        **kwargs
                    )
                elif groups:
                    idx = segregation_index(
                        gdf,
                        groups=groups,
                        network=network,
                        decay=decay,
                        distance=distance,
                        precompute=False,
                        **kwargs
                    )

                indices[distance] = idx.statistic
        else:
            for distance in distances:
                w = Kernel.from_dataframe(gdf, bandwidth=distance, function=function)
                if group_pop_var:
                    idx = segregation_index(
                        gdf,
                        group_pop_var=group_pop_var,
                        total_pop_var=total_pop_var,
                        w=w,
                        **kwargs
                    )
                else:
                    idx = segregation_index(gdf, groups, w=w, **kwargs)
                indices[distance] = idx.statistic
        series = pd.Series(indices, name=str(type(idx)).split(".")[-1][:-2])
        series.index.name = "distance"
        return series