Source code for inequality.wolfson

"""
Wolfson Bipolarization Index Module

This module provides functions to calculate the Lorenz curve, Gini coefficient,
and Wolfson Bipolarization Index for a given distribution of income or wealth.

Author:
Serge Rey <srey@sdsu.edu>
"""

import numpy as np

from .gini import Gini
from .utils import consistent_input

__all__ = ["wolfson", "lorenz_curve"]


@consistent_input
def lorenz_curve(data):
    """
    Calculate the Lorenz curve for a given distribution.

    This function takes an income or wealth distribution as input. The input
    can be a sequence, a NumPy array, or a Pandas DataFrame. If a DataFrame
    is provided, the `column` parameter must be used to specify which column
    contains the income or wealth values.

    Parameters
    ----------
    data : array-like or array
        A sequence or NumPy array representing the income or
        wealth distribution.

    Returns
    -------
    tuple
        Two numpy arrays: the first represents the cumulative share of the
        population, and the second represents the cumulative share of
        the income/wealth.

    Example
    -------
    >>> income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
    >>> population, income_share = lorenz_curve(income)
    >>> print(population[:2], income_share[:2])
    [0.  0.1] [0.         0.03868472]
    """
    sorted_y = np.sort(data)
    cumulative_y = np.cumsum(sorted_y)
    cumulative_y = np.insert(cumulative_y, 0, 0)
    cumulative_y = cumulative_y / cumulative_y[-1]
    cumulative_population = np.linspace(0, 1, len(data) + 1)
    return cumulative_population, cumulative_y



[docs]
@consistent_input
def wolfson(data):
    """
    Calculate the Wolfson Bipolarization Index for a given income distribution.

    This function takes an income distribution and calculates the Wolfson
    Bipolarization Index. The input can be a sequence or a NumPy array.
    The Wolfson index is constructed from the polarization curve, which is
    a rotation and rescaling of the Lorenz curve by the median income:

    .. math::

       W = (2D_{50} - G)\\frac{\\mu}{m}

    Where :math:`D_{50} =0.5 - L(0.5)`, :math:`L(0.5)` is the value of the
    Lorenz curve at the median, :math:`G` is the Gini index, :math:`\\mu`
    is the mean, and :math:`m` is the median.

    See: :cite:`wolfson1994WhenInequalities`.

    Parameters
    ----------
    data : array-like or array
        A sequence or NumPy array representing the income or
        wealth distribution.

    Returns
    -------
    float
        The Wolfson Bipolarization Index value.

    Example
    -------
    >>> import pandas as pd
    >>> income_distribution = [20000, 25000, 27000, 30000, 35000, 45000, 60000,
    ...                        75000, 80000, 120000]
    >>> wolfson_index = wolfson(income_distribution)
    >>> print(f"Wolfson Bipolarization Index: {wolfson_index:.4f}")
    Wolfson Bipolarization Index: 0.2013

    >>> df = pd.DataFrame({'income': [6, 6, 8, 8, 10, 10, 12, 12]})
    >>> wolfson_index = wolfson(df, column='income')
    >>> print(f"Wolfson Bipolarization Index: {wolfson_index:.4f}")
    Wolfson Bipolarization Index: 0.0833
    """
    y = np.array(data)
    y_med = np.median(y)
    ordinate, lc = lorenz_curve(y)
    l50 = np.interp(0.5, ordinate, lc)
    d50 = 0.5 - l50
    rat = y.mean() / y_med
    g = Gini(y).g
    w = (2 * d50 - g) * rat

    return w