Source code for segregation.batch.batch_compute

"""Batch compute wrappers for calculating all relevant statistics at once."""

import inspect
import warnings

import pandas as pd
from tqdm.auto import tqdm

from .. import multigroup, singlegroup
from .._base import SpatialImplicitIndex
from ..dynamics import compute_multiscalar_profile

singlegroup_classes = {}
for name, obj in inspect.getmembers(singlegroup):
    if inspect.isclass(obj):
        singlegroup_classes[name] = obj

multigroup_classes = {}
for name, obj in inspect.getmembers(multigroup):
    if inspect.isclass(obj):
        multigroup_classes[name] = obj

implicit_single_indices = {}
for name, obj in inspect.getmembers(singlegroup):
    if inspect.isclass(obj):
        if str(SpatialImplicitIndex) in [str(i) for i in obj.__bases__]:
            implicit_single_indices[name] = obj

implicit_multi_indices = {}
for name, obj in inspect.getmembers(multigroup):
    if inspect.isclass(obj):
        if str(SpatialImplicitIndex) in [str(i) for i in obj.__bases__]:
            implicit_multi_indices[name] = obj



[docs]
def batch_compute_singlegroup(
    gdf, group_pop_var, total_pop_var, progress_bar=True, **kwargs
):
    """Batch compute single-group indices.

    Parameters
    ----------
    gdf : DataFrame or GeoDataFrame
        DataFrame holding demographic data for study region
    group_pop_var : str
        The name of variable in data that contains the population size of the group of interest
    total_pop_var : str
        Variable in data that contains the total population count of the unit
    progress_bar: bool
        Whether to show a progress bar during calculation
    **kwargs : dict
        additional keyword arguments passed to each index (e.g. for setting a random
        seed in indices like ModifiedGini or ModifiedDissm)

    Returns
    -------
    pandas.DataFrame
        dataframe with statistic name as dataframe index and statistic value as dataframe values
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fitted = {}
        if progress_bar:
            pbar = tqdm(total=len(singlegroup_classes.keys()))

            for each in sorted(singlegroup_classes.keys()):
                pbar.set_description(each)
                fitted[each] = singlegroup_classes[each](
                    gdf, group_pop_var, total_pop_var, **kwargs
                ).statistic
                pbar.update(1)
        else:
            for each in sorted(singlegroup_classes.keys()):
                fitted[each] = singlegroup_classes[each](
                    gdf, group_pop_var, total_pop_var, **kwargs
                ).statistic
        fitted = pd.DataFrame.from_dict(fitted, orient="index").round(4)
        fitted.columns = ["Statistic"]
        fitted.index.name = "Name"
        return fitted




[docs]
def batch_compute_multigroup(gdf, groups, **kwargs):
    """Batch compute multi-group indices.

    Parameters
    ----------
    gdf : DataFrame or GeoDataFrame
        DataFrame holding demographic data for study region
    groups : list
        The variables names in data of the groups of interest of the analysis.
    **kwargs : dict
        additional keyword arguments passed to each index (e.g. for setting a random
        seed in indices like ModifiedGini or ModifiedDissm)

    Returns
    -------
    pandas.DataFrame
        dataframe with statistic name as dataframe index and statistic value as dataframe values
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fitted = {}
        for each in sorted(multigroup_classes.keys()):
            fitted[each] = multigroup_classes[each](gdf, groups, **kwargs).statistic
        fitted = pd.DataFrame.from_dict(fitted, orient="index").round(4)
        fitted.columns = ["Statistic"]
        fitted.index.name = "Name"
    return fitted




[docs]
def batch_multiscalar_singlegroup(
    gdf, distances, group_pop_var, total_pop_var, progress_bar=True, **kwargs
):
    """Batch compute multiscalar profiles for single-group indices.

    Parameters
    ----------
    gdf : DataFrame or GeoDataFrame
        DataFrame holding demographic data for study region
    distances : list
        list of floats representing bandwidth distances that define a local
        environment.
    group_pop_var : str
        The name of variable in data that contains the population size of the group
        of interest
    total_pop_var : str
        Variable in data that contains the total population count of the unit
    progress_bar: bool
        Whether to show a progress bar during calculation
    **kwargs : dict
        additional keyword arguments passed to each index (e.g. for setting a random
        seed in indices like ModifiedGini or ModifiedDissm)

    Returns
    -------
    pandas.DataFrame
        pandas Dataframe with distance as dataframe index and each segregation
        statistic as dataframe columns
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        profs = []
        if progress_bar:
            pbar = tqdm(total=len(implicit_single_indices.keys()))
            for idx in sorted(implicit_single_indices.keys()):
                pbar.set_description(idx)
                prof = compute_multiscalar_profile(
                    gdf=gdf,
                    segregation_index=implicit_single_indices[idx],
                    distances=distances,
                    group_pop_var=group_pop_var,
                    total_pop_var=total_pop_var,
                    **kwargs
                )
                profs.append(prof)
                pbar.update(1)
        else:
            for idx in sorted(implicit_single_indices.keys()):
                prof = compute_multiscalar_profile(
                    gdf=gdf,
                    segregation_index=implicit_single_indices[idx],
                    distances=distances,
                    group_pop_var=group_pop_var,
                    total_pop_var=total_pop_var,
                    **kwargs
                )
                profs.append(prof)
        df = pd.concat(profs, axis=1)
        return df




[docs]
def batch_multiscalar_multigroup(gdf, distances, groups, progress_bar=True, **kwargs):
    """Batch compute multiscalar profiles for multi-group indices.

    Parameters
    ----------
    gdf : DataFrame or GeoDataFrame
        DataFrame holding demographic data for study region
    distances : list
        list of floats representing bandwidth distances that define a local
        environment.
    groups : list
        The variables names in data of the groups of interest of the analysis.
    progress_bar: bool
        Whether to show a progress bar during calculation
    **kwargs : dict
        additional keyword arguments passed to each index (e.g. for setting a random
        seed in indices like ModifiedGini or ModifiedDissm)

    Returns
    -------
    pandas.DataFrame
        pandas Dataframe with distance as dataframe index and each segregation
        statistic as dataframe columns
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        profs = []
        if progress_bar:
            pbar = tqdm(total=len(implicit_multi_indices.keys()))
            for idx in sorted(implicit_multi_indices.keys()):
                pbar.set_description(idx)
                prof = compute_multiscalar_profile(
                    gdf=gdf,
                    segregation_index=implicit_multi_indices[idx],
                    distances=distances,
                    groups=groups,
                    **kwargs
                )
                profs.append(prof)
                pbar.update(1)

        else:
            for idx in sorted(implicit_multi_indices.keys()):
                prof = compute_multiscalar_profile(
                    gdf=gdf,
                    segregation_index=implicit_multi_indices[idx],
                    distances=distances,
                    groups=groups,
                    **kwargs
                )
                profs.append(prof)
        df = pd.concat(profs, axis=1)
        return df