Source code for segregation.batch.batch_compute

"""Batch compute wrappers for calculating all relevant statistics at once."""

import inspect
import warnings

import pandas as pd
from tqdm.auto import tqdm

from .. import multigroup, singlegroup
from .._base import SpatialImplicitIndex
from ..dynamics import compute_multiscalar_profile

singlegroup_classes = {}
for name, obj in inspect.getmembers(singlegroup):
    if inspect.isclass(obj):
        singlegroup_classes[name] = obj

multigroup_classes = {}
for name, obj in inspect.getmembers(multigroup):
    if inspect.isclass(obj):
        multigroup_classes[name] = obj

implicit_single_indices = {}
for name, obj in inspect.getmembers(singlegroup):
    if inspect.isclass(obj):
        if str(SpatialImplicitIndex) in [str(i) for i in obj.__bases__]:
            implicit_single_indices[name] = obj

implicit_multi_indices = {}
for name, obj in inspect.getmembers(multigroup):
    if inspect.isclass(obj):
        if str(SpatialImplicitIndex) in [str(i) for i in obj.__bases__]:
            implicit_multi_indices[name] = obj


[docs]def batch_compute_singlegroup( gdf, group_pop_var, total_pop_var, progress_bar=True, **kwargs ): """Batch compute single-group indices. Parameters ---------- gdf : DataFrame or GeoDataFrame DataFrame holding demographic data for study region group_pop_var : str The name of variable in data that contains the population size of the group of interest total_pop_var : str Variable in data that contains the total population count of the unit progress_bar: bool Whether to show a progress bar during calculation **kwargs : dict additional keyword arguments passed to each index (e.g. for setting a random seed in indices like ModifiedGini or ModifiedDissm) Returns ------- pandas.DataFrame dataframe with statistic name as dataframe index and statistic value as dataframe values """ with warnings.catch_warnings(): warnings.simplefilter("ignore") fitted = {} if progress_bar: pbar = tqdm(total=len(singlegroup_classes.keys())) for each in sorted(singlegroup_classes.keys()): pbar.set_description(each) fitted[each] = singlegroup_classes[each]( gdf, group_pop_var, total_pop_var, **kwargs ).statistic pbar.update(1) else: for each in sorted(singlegroup_classes.keys()): fitted[each] = singlegroup_classes[each]( gdf, group_pop_var, total_pop_var, **kwargs ).statistic fitted = pd.DataFrame.from_dict(fitted, orient="index").round(4) fitted.columns = ["Statistic"] fitted.index.name = "Name" return fitted
[docs]def batch_compute_multigroup(gdf, groups, **kwargs): """Batch compute multi-group indices. Parameters ---------- gdf : DataFrame or GeoDataFrame DataFrame holding demographic data for study region groups : list The variables names in data of the groups of interest of the analysis. **kwargs : dict additional keyword arguments passed to each index (e.g. for setting a random seed in indices like ModifiedGini or ModifiedDissm) Returns ------- pandas.DataFrame dataframe with statistic name as dataframe index and statistic value as dataframe values """ with warnings.catch_warnings(): warnings.simplefilter("ignore") fitted = {} for each in sorted(multigroup_classes.keys()): fitted[each] = multigroup_classes[each](gdf, groups, **kwargs).statistic fitted = pd.DataFrame.from_dict(fitted, orient="index").round(4) fitted.columns = ["Statistic"] fitted.index.name = "Name" return fitted
[docs]def batch_multiscalar_singlegroup( gdf, distances, group_pop_var, total_pop_var, progress_bar=True, **kwargs ): """Batch compute multiscalar profiles for single-group indices. Parameters ---------- gdf : DataFrame or GeoDataFrame DataFrame holding demographic data for study region distances : list list of floats representing bandwidth distances that define a local environment. group_pop_var : str The name of variable in data that contains the population size of the group of interest total_pop_var : str Variable in data that contains the total population count of the unit progress_bar: bool Whether to show a progress bar during calculation **kwargs : dict additional keyword arguments passed to each index (e.g. for setting a random seed in indices like ModifiedGini or ModifiedDissm) Returns ------- pandas.DataFrame pandas Dataframe with distance as dataframe index and each segregation statistic as dataframe columns """ with warnings.catch_warnings(): warnings.simplefilter("ignore") profs = [] if progress_bar: pbar = tqdm(total=len(implicit_single_indices.keys())) for idx in sorted(implicit_single_indices.keys()): pbar.set_description(idx) prof = compute_multiscalar_profile( gdf=gdf, segregation_index=implicit_single_indices[idx], distances=distances, group_pop_var=group_pop_var, total_pop_var=total_pop_var, **kwargs ) profs.append(prof) pbar.update(1) else: for idx in sorted(implicit_single_indices.keys()): prof = compute_multiscalar_profile( gdf=gdf, segregation_index=implicit_single_indices[idx], distances=distances, group_pop_var=group_pop_var, total_pop_var=total_pop_var, **kwargs ) profs.append(prof) df = pd.concat(profs, axis=1) return df
[docs]def batch_multiscalar_multigroup(gdf, distances, groups, progress_bar=True, **kwargs): """Batch compute multiscalar profiles for multi-group indices. Parameters ---------- gdf : DataFrame or GeoDataFrame DataFrame holding demographic data for study region distances : list list of floats representing bandwidth distances that define a local environment. groups : list The variables names in data of the groups of interest of the analysis. progress_bar: bool Whether to show a progress bar during calculation **kwargs : dict additional keyword arguments passed to each index (e.g. for setting a random seed in indices like ModifiedGini or ModifiedDissm) Returns ------- pandas.DataFrame pandas Dataframe with distance as dataframe index and each segregation statistic as dataframe columns """ with warnings.catch_warnings(): warnings.simplefilter("ignore") profs = [] if progress_bar: pbar = tqdm(total=len(implicit_multi_indices.keys())) for idx in sorted(implicit_multi_indices.keys()): pbar.set_description(idx) prof = compute_multiscalar_profile( gdf=gdf, segregation_index=implicit_multi_indices[idx], distances=distances, groups=groups, **kwargs ) profs.append(prof) pbar.update(1) else: for idx in sorted(implicit_multi_indices.keys()): prof = compute_multiscalar_profile( gdf=gdf, segregation_index=implicit_multi_indices[idx], distances=distances, groups=groups, **kwargs ) profs.append(prof) df = pd.concat(profs, axis=1) return df