Source code for inequality.pen

"""
Pen's Parade and Pengram Visualizations

This module provides functions to create Pen's Parade visualizations and
extend them with choropleth maps to display the spatial distribution of
values. The `pen` function generates a traditional Pen's Parade, which is
a visual representation of income distribution or similar data, typically
used to show inequality. The `pengram` function enhances this by combining
the Pen's Parade with a choropleth map, allowing for a richer analysis of
spatial data distributions.

Author
------
Serge Rey <srey@sdsu.edu>
"""

import math

import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


def _check_deps(caller="pen"):
    """
    Check for required dependencies.

    Returns
    -------
    tuple
        A tuple containing the imported modules (Seaborn, mapclassify, pandas).
    """
    try:
        import seaborn as sns
    except ImportError as e:
        msg = f"{caller} requires Seaborn."
        msg = f"{msg} Install it using `conda install -c conda-forge seaborn`"
        raise ImportError(msg) from e

    try:
        import mapclassify as mc
    except ImportError as e:
        msg = f"{caller} requires mapclassify."
        msg = f"{msg} Install it using `conda install -c conda-forge mapclassify`"
        raise ImportError(msg) from e

    try:
        import pandas as pd
    except ImportError as e:
        msg = f"{caller} requires pandas. "
        msg = f"{msg} Install it using `conda install -c conda-forge pandas`"
        raise ImportError(msg) from e

    return sns, mc, pd


[docs] def pen( df, col, x, weight=None, ascending=True, xticks=True, total_bars=100, figsize=(8, 6), ax=None, ): """ Creates the Pen's Parade visualization. This function generates a bar plot sorted by a specified column, with options to customize the x-axis ticks and figure size. The Pen's Parade is a visual representation of income distribution (or similar data), typically used to show inequality. Parameters ---------- df : pd.DataFrame DataFrame containing the data to plot. col : str The column to plot on the y-axis. x : str The column to plot on the x-axis. weight : str, optional A column used to weight the bars in the Pen’s Parade. Default is None. ascending : bool, optional Whether to sort the DataFrame in ascending order by the `col`. Default is True. xticks : bool, optional Whether to show x-axis ticks. Default is True. total_bars : int, optional Total number of bars to create for the weighted Pen’s Parade. Default is 100. figsize : list, optional The size of the figure as a list [width, height]. Default is [8, 6]. ax : matplotlib.axes.Axes, optional Matplotlib Axes instance to plot on. If None, a new figure and axes will be created. Default is None. Returns ------- matplotlib.axes.Axes A Matplotlib Axes object with the Pen's Parade plot. """ sns, mc, pd = _check_deps() if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) if weight is None: dbfs = df.sort_values(col, ascending=ascending).reset_index(drop=True) sns.barplot(x=x, y=col, data=dbfs, ax=ax) ax.set_ylabel(col) ax.set_xlabel(x) plt.xticks(rotation=90) ax.set_xticks(dbfs.index) ax.set_xticklabels(dbfs[x], rotation=90) if not xticks: ax.set(xticks=[]) ax.set(xlabel="") else: df["NumBars"] = ( (df[weight] / df[weight].sum() * total_bars).apply(math.ceil).astype(int) ) repeated_rows = [] name = x for _, row in df.iterrows(): repeated_rows.extend([row] * row["NumBars"]) df_repeated = pd.DataFrame(repeated_rows) df_sorted = df_repeated.sort_values(by=col).reset_index(drop=True) unique_obs = df[name].unique() colors = plt.get_cmap("tab20", len(unique_obs)) color_map = {state: colors(i) for i, state in enumerate(unique_obs)} bar_colors = df_sorted[name].map(color_map) bar_positions = np.arange(len(df_sorted)) bar_heights = df_sorted[col] bar_widths = 1 # Equal width for all bars _ = ax.bar( bar_positions, bar_heights, width=bar_widths, color=bar_colors, edgecolor="black", ) tick_width = plt.rcParams["xtick.major.width"] first_positions = [] first_labels = [] current_state = None state_index = 0 last_name = df_sorted[name].iloc[-1] for i in range(len(bar_positions)): label = df_sorted[name].iloc[i] if label != current_state: if state_index % 2 == 0 or label == last_name: first_positions.append(bar_positions[i]) first_labels.append(df_sorted[name].iloc[i]) else: text_y_position = bar_heights[i] + 0.05 * max(bar_heights) ax.plot( [bar_positions[i], bar_positions[i]], [bar_heights[i], text_y_position - 550], color="black", linewidth=tick_width, ) ax.text( bar_positions[i], text_y_position, df_sorted[name].iloc[i], ha="center", rotation=90, fontsize=8, ) current_state = df_sorted[name].iloc[i] state_index += 1 ax.set_xticks(first_positions) ax.set_xticklabels(first_labels, rotation=90, fontsize=8) ax.set_xlabel(name) ax.set_ylabel(col) ax.set_title(f"Weighted Pen Parade of {name} by {col}") plt.tight_layout() return ax
[docs] def pengram( gdf, col, name, figsize=(8, 6), k=5, scheme="quantiles", xticks=True, legend=True, leg_pos="lower right", fmt="{:.2f}", query=None, ax=None, inset_size="30%", ): """ Pen's Parade combined with a choropleth map. This function generates a Pen’s Parade plot combined with a choropleth map. Both plots are placed within the same subplot, with the choropleth map as the main plot and the Pen's Parade as an inset. Parameters ---------- gdf : gpd.GeoDataFrame GeoDataFrame containing the data to plot. col : str The column to plot on the y-axis. name : str The name of the geographic units (e.g., states, regions). figsize : tuple, optional The size of the figure as a tuple (width, height). Default is (8, 6). k : int, optional Number of classes for the classification scheme. Default is 5. scheme : str, optional Classification scheme to use (e.g., 'Quantiles'). Default is 'quantiles'. xticks : bool, optional Whether to show x-axis ticks. Default is True. legend : bool, optional Whether to show the map legend. Default is True. leg_pos : str, optional The position of the legend on the choropleth map. Default is "lower right". fmt : str, optional Format string for legend labels. Default is "{:.2f}". query : list, optional Specific geographic units to highlight. Default is an empty list. ax : matplotlib.axes.Axes, optional Matplotlib Axes instance to plot on. If None, a new figure and axes will be created. Default is None. inset_size : str, optional Size of the inset plot as a percentage of the main plot. Default is "30%". Returns ------- matplotlib.axes.Axes Matplotlib Axes objects for the combined choropleth and Pen's parade. """ sns, mc, pd = _check_deps() if ax is None: fig, ax = plt.subplots(figsize=figsize) # Main plot: Choropleth map _ = gdf.plot( column=col, scheme=scheme, k=k, ax=ax, legend=legend, legend_kwds={"loc": leg_pos, "fmt": fmt}, ) ax.axis("off") if query: highlight = gdf[gdf[name].isin(query)] highlight.boundary.plot(ax=ax, edgecolor="red", linewidth=2) # Inset plot: Pen's Parade inset_ax = inset_axes(ax, width=inset_size, height=inset_size, loc="upper right") binned = mc.classify(gdf[col], scheme, k=k) gdf["_bin"] = binned.yb sgdf = gdf.sort_values(by=col, ascending=True).reset_index(drop=True) sns.barplot( x=sgdf.index, y=col, hue="_bin", data=sgdf, palette="viridis", ax=inset_ax ) inset_ax.set_ylabel(col) inset_ax.set_xlabel(name) plt.xticks(rotation=90) inset_ax.set_title("Pen's Parade", fontsize=10) inset_ax.set_xticks(sgdf.index) inset_ax.set_xticklabels(sgdf[name], rotation=90, fontsize=8) if not xticks: inset_ax.set(xticks=[]) inset_ax.set(xlabel="") if query: for obs in query: if obs in sgdf[name].values: obs_idx = sgdf[sgdf[name] == obs].index[0] rect = patches.Rectangle( (obs_idx - 0.5, 0), 1, sgdf.loc[obs_idx, col], linewidth=2, edgecolor="red", facecolor="none", ) inset_ax.add_patch(rect) inset_ax.get_legend().remove() # plt.tight_layout() return ax, inset_ax