Source code for segregation.decomposition.decompose_segregation

"""
Decomposition Segregation based Metrics
"""

__author__ = "Renan X. Cortes <renanc@ucr.edu>, Elijah Knaap <elijah.knaap@ucr.edu>, and Sergio J. Rey <sergio.rey@ucr.edu>"


import warnings
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from segregation.inference.comparative import _generate_counterfactual

# Including old and new api in __all__ so users can use both

__all__ = ["DecomposeSegregation"]

# The Deprecation calls of the classes are located in the end of this script #


def _decompose_segregation(index1, index2, counterfactual_approach="composition"):
    """Decompose segregation differences into spatial and attribute components.

    Given two segregation indices of the same type, use Shapley decomposition
    to measure whether the differences between index measures arise from
    differences in spatial structure or population structure

    Parameters
    ----------
    index1 : segregation.SegIndex class
        First SegIndex class to compare.
    index2 : segregation.SegIndex class
        Second SegIndex class to compare.
    counterfactual_approach : str, one of
                              ["composition", "share", "dual_composition"]
        The technique used to generate the counterfactual population
        distributions.

    Returns
    -------
    tuple
        (shapley spatial component,
         shapley attribute component,
         core data of index1,
         core data of index2,
         data with counterfactual variables for index1,
         data with counterfactual variables for index2)

    """
    df1 = index1.data.copy()
    df2 = index2.data.copy()

    assert (
        index1._function == index2._function
    ), "Segregation indices must be of the same type"

    counterfac_df1, counterfac_df2 = _generate_counterfactual(
        df1,
        df2,
        index1.group_pop_var,
        index1.total_pop_var,
        index2.group_pop_var,
        index2.total_pop_var,
        counterfactual_approach=counterfactual_approach,
    )

    seg_func = index1._function

    # index for spatial 1, attribute 1
    G_S1_A1 = index1.statistic

    # index for spatial 2, attribute 2
    G_S2_A2 = index2.statistic

    # index for spatial 1 attribute 2 (counterfactual population for structure 1)
    G_S1_A2 = seg_func(
        counterfac_df1, "counterfactual_group_pop", "counterfactual_total_pop"
    )[0]

    # index for spatial 2 attribute 1 (counterfactual population for structure 2)
    G_S2_A1 = seg_func(
        counterfac_df2, "counterfactual_group_pop", "counterfactual_total_pop"
    )[0]

    # take the average difference in spatial structure, holding attributes constant
    C_S = 1 / 2 * (G_S1_A1 - G_S2_A1 + G_S1_A2 - G_S2_A2)

    # take the average difference in attributes, holding spatial structure constant
    C_A = 1 / 2 * (G_S1_A1 - G_S1_A2 + G_S2_A1 - G_S2_A2)

    results = {"s1_a1": G_S1_A1, "s1_a2": G_S1_A2, "s2_a1": G_S2_A1, "s2_a2": G_S2_A2}

    return (
        C_S,
        C_A,
        df1,
        df2,
        counterfac_df1,
        counterfac_df2,
        counterfactual_approach,
        results,
    )


[docs]class DecomposeSegregation:
    """Decompose segregation differences into spatial and attribute components.

    Given two segregation indices of the same type, use Shapley decomposition
    to measure whether the differences between index measures arise from
    differences in spatial structure or population structure

    Parameters
    ----------
    index1 : segregation.SegIndex class
        First SegIndex class to compare.
    index2 : segregation.SegIndex class
        Second SegIndex class to compare.
    counterfactual_approach : str, one of {"composition", "share", "dual_composition"}
        The technique used to generate the counterfactual population
        distributions.

    Attributes
    ----------
    c_s : float
        Shapley's Spatial Component of the decomposition
    c_a : float
        Shapley's Attribute Component of the decomposition
    indices : dict
        Dictionary of index values for all four combinations of spatial/attribute data


    """

[docs]    def __init__(self, index1, index2, counterfactual_approach="composition"):
        """Initialize class."""
        aux = _decompose_segregation(index1, index2, counterfactual_approach)

        self.c_s = aux[0]
        self.c_a = aux[1]
        self._df1 = aux[2]
        self._df2 = aux[3]
        self._counterfac_df1 = aux[4]
        self._counterfac_df2 = aux[5]
        self._counterfactual_approach = aux[6]
        self.indices = aux[7]

[docs]    def plot(
        self,
        plot_type="cdfs",
        figsize=None,
        city_a=None,
        city_b=None,
        cmap="OrRd",
        scheme="equalinterval",
        k=10,
        suptitle_size=16,
        title_size=12,
        savefig=None,
        dpi=300,
    ):
        """Plot maps or CDFs of urban contexts used in calculating the Decomposition class.

        Parameters
        ----------
        plot_type : str, {'cdfs, 'maps'}
            which type of plot to generate. Options include `cdfs` and `maps` by default "cdfs"
        figsize : tuple, optional
            figsize parameter passed to matplotlib.pyplot
        city_a : str, optional
            Name of the first "city" to be used in plotting. If None, defaults to 'City A'
        city_b : str, optional
            Name of the second "city" to be used in plotting. If None, defaults to 'City B'
        cmap : str, optional
            matplotlib colormap used to shade the map, by default "OrRd"
        scheme : str, optional
            pysal.mapclassify classification scheme used to shade the map, by default "equalinterval"
        k : int, optional
            number of classes in pysal.mapclassify classification scheme, by default 10
        suptitle_size : int, optional
            size parameter passed to `matplotlib.Figure.suptitle`, by default 16
        title_size : int, optional
            size parameter passed to `matplotlib.Axes.set_title`, by default 12
        savefig : str, optional
            Location to save the figure if desired. If None, fig will not be saved
        dpi : int, optional
            dpi parameter passed to matplotlib.pyplot, by default 300

        Returns
        -------
        None
            Generates a new matplotlib.Figure instance and optionally saves to disk
        """
        if not city_a:
            city_a = "City A"
        if not city_b:
            city_b = "City B"

        if plot_type == "cdfs":
            if not figsize:
                figsize = (10, 10)
            fig, ax = plt.subplots(figsize=figsize)
            plt.suptitle(
                f"Decomposing differences between\n{city_a} and {city_b}",
                size=suptitle_size,
            )
            plt.title(
                f"Spatial Component = {round(self.c_s, 3)}, Attribute Component: {round(self.c_a, 3)}",
                size=title_size,
            )

            temp_a = self._counterfac_df1.copy()
            temp_a["Location"] = city_a
            temp_b = self._counterfac_df2.copy()
            temp_b["Location"] = city_b
            df = pd.concat([temp_a, temp_b]).reset_index()

            if self._counterfactual_approach == "composition":
                sns.ecdfplot(data=df, x="group_composition", hue="Location", ax=ax)
                return ax

            elif self._counterfactual_approach == "share":
                f = sns.ecdfplot(data=df, x="share", hue="Location", ax=ax)
                return f

            elif self._counterfactual_approach == "dual_composition":
                df["compl"] = 1 - df.group_composition
                f = sns.ecdfplot(data=df, x="group_composition", hue="Location", ax=ax)
                f2 = sns.ecdfplot(data=df, x="compl", hue="Location", ax=ax)
            if savefig:
                plt.savefig(savefig, dpi=dpi)

        if plot_type == "maps":
            if not figsize:
                figsize = (20, 20)
            fig, axs = plt.subplots(2, 2, figsize=figsize)
            plt.suptitle(
                f"Decomposing differences between\n{city_a} and {city_b}",
                size=suptitle_size,
            )
            plt.title(
                f"Spatial Component = {round(self.c_s, 3)}, Attribute Component: {round(self.c_a, 3)}"
            )

            # Original First Context (Upper Left)
            self._counterfac_df1.plot(
                column="group_composition",
                cmap=cmap,
                legend=True,
                scheme=scheme,
                k=k,
                ax=axs[0, 0],
            )
            axs[0, 0].set_title(
                f"{city_a}\nOriginal Composition", fontdict={"fontsize": title_size}
            )
            axs[0, 0].axis("off")

            # Counterfactual First Context (Bottom Left)
            self._counterfac_df1.plot(
                column="counterfactual_composition",
                cmap=cmap,
                scheme=scheme,
                k=k,
                legend=True,
                ax=axs[1, 0],
            )
            axs[1, 0].set_title(
                f"{city_a}\nCounterfactual Composition",
                fontdict={"fontsize": title_size},
            )
            axs[1, 0].axis("off")

            # Counterfactual Second Context (Upper Right)
            self._counterfac_df2.plot(
                column="counterfactual_composition",
                cmap=cmap,
                scheme=scheme,
                k=k,
                legend=True,
                ax=axs[0, 1],
            )
            axs[0, 1].set_title(
                f"{city_b}\nCounterfactual Composition",
                fontdict={"fontsize": title_size},
            )
            axs[0, 1].axis("off")

            # Original Second Context (Bottom Right)
            self._counterfac_df2.plot(
                column="group_composition",
                cmap=cmap,
                scheme=scheme,
                k=k,
                legend=True,
                ax=axs[1, 1],
            )
            axs[1, 1].set_title(
                f"{city_b}\nOriginal Composition", fontdict={"fontsize": title_size}
            )
            axs[1, 1].axis("off")
            if savefig:
                plt.savefig(savefig, dpi=dpi)
            return axs