Source code for esda.losh

import warnings

import libpysal as lp
import numpy as np
from scipy import stats
from sklearn.base import BaseEstimator


[docs] class LOSH(BaseEstimator): """Local spatial heteroscedasticity (LOSH)"""
[docs] def __init__(self, connectivity=None, inference=None): """ Initialize a losh estimator Parameters ---------- connectivity : scipy.sparse matrix object the connectivity structure describing the relationships between observed units. inference : str describes type of inference to be used. options are "chi-square" or "permutation" methods. Attributes ---------- Hi : numpy array Array of LOSH values for each spatial unit. ylag : numpy array Spatially lagged y values. yresid : numpy array Spatially lagged residual values. VarHi : numpy array Variance of Hi. pval : numpy array P-values for inference based on either "chi-square" or "permutation" methods. """ self.connectivity = connectivity self.inference = inference
[docs] def fit(self, y, a=2): """ Parameters ---------- y : numpy.ndarray array containing continuous data a : int residual multiplier. Default is 2 in order to generate a variance measure. Users may use 1 for absolute deviations. Returns ------- the fitted estimator. Notes ----- Technical details and derivations can be found in :cite:`OrdGetis2012`. Examples -------- >>> import libpysal >>> w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() >>> f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) >>> y = np.array(f.by_col['HR8893']) >>> from esda import losh >>> ls = losh(connectivity=w, inference="chi-square").fit(y) >>> np.round(ls.Hi[0], 3) >>> np.round(ls.pval[0], 3) Boston housing data replicating R spdep::LOSH() >>> import libpysal >>> import geopandas as gpd >>> boston = libpysal.examples.load_example('Bostonhsg') >>> boston_ds = gpd.read_file(boston.get_path('boston.shp')) >>> w = libpysal.weights.Queen.from_dataframe(boston_ds) >>> ls = losh(connectivity=w, inference="chi-square").fit(boston['NOX']) >>> np.round(ls.Hi[0], 3) >>> np.round(ls.VarHi[0], 3) """ y = np.asarray(y).flatten() w = self.connectivity self.Hi, self.ylag, self.yresid, self.VarHi = self._statistic(y, w, a) if self.inference is None: return self elif self.inference == "chi-square": if a != 2: warnings.warn( "Chi-square inference assumes that a=2, but " f"a={a}. This means the inference will be invalid!" ) else: dof = 2 / self.VarHi Zi = (2 * self.Hi) / self.VarHi self.pval = 1 - stats.chi2.cdf(Zi, dof) else: raise NotImplementedError( "The requested inference method " f"({self.inference}) is not currently supported!" ) return self
@staticmethod def _statistic(y, w, a): # Define what type of variance to use if a is None: a = 2 else: a = a rowsum = np.array(w.sparse.sum(axis=1)).flatten() # Calculate spatial mean ylag = lp.weights.lag_spatial(w, y) / rowsum # Calculate and adjust residuals based on multiplier yresid = abs(y - ylag) ** a # Calculate denominator of Hi equation denom = np.mean(yresid) * np.array(rowsum) # Carry out final Hi calculation Hi = lp.weights.lag_spatial(w, yresid) / denom # Calculate average of residuals yresid_mean = np.mean(yresid) # Calculate VarHi n = len(y) squared_rowsum = np.asarray(w.sparse.multiply(w.sparse).sum(axis=1)).flatten() term1 = ((n - 1) ** -1) term2 = (denom**-2) term3 = ((np.sum(yresid**2) / n) - yresid_mean**2) term4 = ((n * squared_rowsum) - (rowsum**2)) VarHi = term1 * term2 * term3 * term4 return (Hi, ylag, yresid, VarHi)