"""
Wolfson Bipolarization Index Module
This module provides functions to calculate the Lorenz curve, Gini coefficient,
and Wolfson Bipolarization Index for a given distribution of income or wealth.
Author:
Serge Rey <srey@sdsu.edu>
"""
import numpy as np
from .gini import Gini
from .utils import consistent_input
__all__ = ["wolfson", "lorenz_curve"]
@consistent_input
def lorenz_curve(data):
"""
Calculate the Lorenz curve for a given distribution.
This function takes an income or wealth distribution as input. The input
can be a sequence, a NumPy array, or a Pandas DataFrame. If a DataFrame
is provided, the `column` parameter must be used to specify which column
contains the income or wealth values.
Parameters
----------
data : array-like or array
A sequence or NumPy array representing the income or
wealth distribution.
Returns
-------
tuple
Two numpy arrays: the first represents the cumulative share of the
population, and the second represents the cumulative share of
the income/wealth.
Example
-------
>>> income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]
>>> population, income_share = lorenz_curve(income)
>>> print(population[:2], income_share[:2])
[0. 0.1] [0. 0.03868472]
"""
sorted_y = np.sort(data)
cumulative_y = np.cumsum(sorted_y)
cumulative_y = np.insert(cumulative_y, 0, 0)
cumulative_y = cumulative_y / cumulative_y[-1]
cumulative_population = np.linspace(0, 1, len(data) + 1)
return cumulative_population, cumulative_y
[docs]
@consistent_input
def wolfson(data):
"""
Calculate the Wolfson Bipolarization Index for a given income distribution.
This function takes an income distribution and calculates the Wolfson
Bipolarization Index. The input can be a sequence or a NumPy array.
The Wolfson index is constructed from the polarization curve, which is
a rotation and rescaling of the Lorenz curve by the median income:
.. math::
W = (2D_{50} - G)\\frac{\\mu}{m}
Where :math:`D_{50} =0.5 - L(0.5)`, :math:`L(0.5)` is the value of the
Lorenz curve at the median, :math:`G` is the Gini index, :math:`\\mu`
is the mean, and :math:`m` is the median.
See: :cite:`wolfson1994WhenInequalities`.
Parameters
----------
data : array-like or array
A sequence or NumPy array representing the income or
wealth distribution.
Returns
-------
float
The Wolfson Bipolarization Index value.
Example
-------
>>> import pandas as pd
>>> income_distribution = [20000, 25000, 27000, 30000, 35000, 45000, 60000,
... 75000, 80000, 120000]
>>> wolfson_index = wolfson(income_distribution)
>>> print(f"Wolfson Bipolarization Index: {wolfson_index:.4f}")
Wolfson Bipolarization Index: 0.2013
>>> df = pd.DataFrame({'income': [6, 6, 8, 8, 10, 10, 12, 12]})
>>> wolfson_index = wolfson(df, column='income')
>>> print(f"Wolfson Bipolarization Index: {wolfson_index:.4f}")
Wolfson Bipolarization Index: 0.0833
"""
y = np.array(data)
y_med = np.median(y)
ordinate, lc = lorenz_curve(y)
l50 = np.interp(0.5, ordinate, lc)
d50 = 0.5 - l50
rat = y.mean() / y_med
g = Gini(y).g
w = (2 * d50 - g) * rat
return w