Source code for esda.losh
import warnings
import libpysal as lp
import numpy as np
from scipy import stats
from sklearn.base import BaseEstimator
[docs]
class LOSH(BaseEstimator):
"""Local spatial heteroscedasticity (LOSH)"""
[docs]
def __init__(self, connectivity=None, inference=None):
"""
Initialize a losh estimator
Parameters
----------
connectivity : scipy.sparse matrix object
the connectivity structure describing the
relationships between observed units.
inference : str
describes type of inference to be used. options are
"chi-square" or "permutation" methods.
Attributes
----------
Hi : numpy array
Array of LOSH values for each spatial unit.
ylag : numpy array
Spatially lagged y values.
yresid : numpy array
Spatially lagged residual values.
VarHi : numpy array
Variance of Hi.
pval : numpy array
P-values for inference based on either
"chi-square" or "permutation" methods.
"""
self.connectivity = connectivity
self.inference = inference
[docs]
def fit(self, y, a=2):
"""
Parameters
----------
y : numpy.ndarray
array containing continuous data
a : int
residual multiplier. Default is 2 in order
to generate a variance measure. Users may
use 1 for absolute deviations.
Returns
-------
the fitted estimator.
Notes
-----
Technical details and derivations can be found in :cite:`OrdGetis2012`.
Examples
--------
>>> import libpysal
>>> w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read()
>>> f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt"))
>>> y = np.array(f.by_col['HR8893'])
>>> from esda import losh
>>> ls = losh(connectivity=w, inference="chi-square").fit(y)
>>> np.round(ls.Hi[0], 3)
>>> np.round(ls.pval[0], 3)
Boston housing data replicating R spdep::LOSH()
>>> import libpysal
>>> import geopandas as gpd
>>> boston = libpysal.examples.load_example('Bostonhsg')
>>> boston_ds = gpd.read_file(boston.get_path('boston.shp'))
>>> w = libpysal.weights.Queen.from_dataframe(boston_ds)
>>> ls = losh(connectivity=w, inference="chi-square").fit(boston['NOX'])
>>> np.round(ls.Hi[0], 3)
>>> np.round(ls.VarHi[0], 3)
"""
y = np.asarray(y).flatten()
w = self.connectivity
self.Hi, self.ylag, self.yresid, self.VarHi = self._statistic(y, w, a)
if self.inference is None:
return self
elif self.inference == "chi-square":
if a != 2:
warnings.warn(
"Chi-square inference assumes that a=2, but "
f"a={a}. This means the inference will be invalid!"
)
else:
dof = 2 / self.VarHi
Zi = (2 * self.Hi) / self.VarHi
self.pval = 1 - stats.chi2.cdf(Zi, dof)
else:
raise NotImplementedError(
"The requested inference method "
f"({self.inference}) is not currently supported!"
)
return self
@staticmethod
def _statistic(y, w, a):
# Define what type of variance to use
if a is None:
a = 2
else:
a = a
rowsum = np.array(w.sparse.sum(axis=1)).flatten()
# Calculate spatial mean
ylag = lp.weights.lag_spatial(w, y) / rowsum
# Calculate and adjust residuals based on multiplier
yresid = abs(y - ylag) ** a
# Calculate denominator of Hi equation
denom = np.mean(yresid) * np.array(rowsum)
# Carry out final Hi calculation
Hi = lp.weights.lag_spatial(w, yresid) / denom
# Calculate average of residuals
yresid_mean = np.mean(yresid)
# Calculate VarHi
n = len(y)
squared_rowsum = np.asarray(w.sparse.multiply(w.sparse).sum(axis=1)).flatten()
term1 = ((n - 1) ** -1)
term2 = (denom**-2)
term3 = ((np.sum(yresid**2) / n) - yresid_mean**2)
term4 = ((n * squared_rowsum) - (rowsum**2))
VarHi = term1 * term2 * term3 * term4
return (Hi, ylag, yresid, VarHi)