Source code for esda.geary

"""
Geary's C statistic for spatial autocorrelation
"""
__author__ = "Serge Rey <sjsrey@gmail.com> "

import warnings

import numpy as np
import scipy.stats as stats
from libpysal import weights

from .tabular import _univariate_handler

__all__ = ["Geary"]


[docs] class Geary(object): """ Global Geary C Autocorrelation statistic Parameters ---------- y : array (n, 1) attribute vector w : W spatial weights transformation : {'R', 'B', 'D', 'U', 'V'} weights transformation, default is row-standardized. Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo-p_values Attributes ---------- y : array original variable w : W spatial weights permutations : int number of permutations C : float value of statistic EC : float expected value VC : float variance of G under normality assumption z_norm : float z-statistic for C under normality assumption z_rand : float z-statistic for C under randomization assumption p_norm : float p-value under normality assumption (one-tailed) p_rand : float p-value under randomization assumption (one-tailed) sim : array (if permutations!=0) vector of I values for permutated samples p_sim : float (if permutations!=0) p-value based on permutations (one-tailed) null: sptial randomness alternative: the observed C is extreme it is either extremely high or extremely low EC_sim : float (if permutations!=0) average value of C from permutations VC_sim : float (if permutations!=0) variance of C from permutations seC_sim : float (if permutations!=0) standard deviation of C under permutations. z_sim : float (if permutations!=0) standardized C based on permutations p_z_sim : float (if permutations!=0) p-value based on standard normal approximation from permutations (one-tailed) Examples -------- >>> import libpysal >>> from esda.geary import Geary >>> w = libpysal.io.open(libpysal.examples.get_path("book.gal")).read() >>> f = libpysal.io.open(libpysal.examples.get_path("book.txt")) >>> y = np.array(f.by_col['y']) >>> c = Geary(y,w,permutations=0) >>> round(c.C,7) 0.3330108 >>> round(c.p_norm,7) 9.2e-05 >>> Notes ----- Technical details and derivations can be found in :cite:`cliff81`. """
[docs] def __init__(self, y, w, transformation="r", permutations=999): if not isinstance(w, weights.W): raise TypeError( f"w must be a pysal weights object, got {type(w)} instead." ) y = np.asarray(y).flatten() self.n = len(y) self.y = y w.transform = transformation self.w = w self._focal_ix, self._neighbor_ix = w.sparse.nonzero() self._weights = w.sparse.data self.permutations = permutations self.__moments() xn = range(len(y)) self.xn = xn self.y2 = y * y yd = y - y.mean() yss = sum(yd * yd) self.den = yss * self.w.s0 * 2.0 self.C = self.__calc(y) de = self.C - 1.0 self.EC = 1.0 self.z_norm = de / self.seC_norm self.z_rand = de / self.seC_rand if de > 0: self.p_norm = stats.norm.sf(self.z_norm) self.p_rand = stats.norm.sf(self.z_rand) else: self.p_norm = stats.norm.cdf(self.z_norm) self.p_rand = stats.norm.cdf(self.z_rand) if permutations: sim = [ self.__calc(np.random.permutation(self.y)) for i in range(permutations) ] self.sim = sim = np.array(sim) above = sim >= self.C larger = sum(above) if (permutations - larger) < larger: larger = permutations - larger self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EC_sim = sum(sim) / permutations self.seC_sim = np.array(sim).std() self.VC_sim = self.seC_sim**2 self.z_sim = (self.C - self.EC_sim) / self.seC_sim self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
@property def _statistic(self): """a standardized accessor for esda statistics""" return self.C def __moments(self): y = self.y n = self.n w = self.w s0 = w.s0 s1 = w.s1 s2 = w.s2 s02 = s0 * s0 yd = y - y.mean() yd4 = yd**4 yd2 = yd**2 n2 = n * n k = (yd4.sum() / n) / ((yd2.sum() / n) ** 2) A = (n - 1) * s1 * (n2 - 3 * n + 3 - (n - 1) * k) B = (1.0 / 4) * ((n - 1) * s2 * (n2 + 3 * n - 6 - (n2 - n + 2) * k)) C = s02 * (n2 - 3 - (n - 1) ** 2 * k) vc_rand = (A - B + C) / (n * (n - 2) * (n - 3) * s02) vc_norm = (1 / (2 * (n + 1) * s02)) * ((2 * s1 + s2) * (n - 1) - 4 * s02) self.VC_rand = vc_rand self.VC_norm = vc_norm self.seC_rand = vc_rand ** (0.5) self.seC_norm = vc_norm ** (0.5) def __calc(self, y): num = (self._weights * ((y[self._focal_ix] - y[self._neighbor_ix]) ** 2)).sum() a = (self.n - 1) * num return a / self.den
[docs] @classmethod def by_col( cls, df, cols, w=None, inplace=False, pvalue="sim", outvals=None, **stat_kws ): """ Function to compute a Geary statistic on a dataframe Parameters ---------- df : pandas.DataFrame a pandas dataframe with a geometry column cols : string or list of string name or list of names of columns to use to compute the statistic w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, with default configurations, the derived columns will be named like 'column_geary' and 'column_p_sim' pvalue : string a string denoting which pvalue should be returned. Refer to the the Geary statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Geary statistic **stat_kws : dict options to pass to the underlying statistic. For this, see the documentation for the Geary statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. Notes ----- Technical details and derivations can be found in :cite:`cliff81`. """ msg = ( "The `.by_col()` methods are deprecated and will be " "removed in a future version of `esda`." ) warnings.warn(msg, FutureWarning) return _univariate_handler( df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname=cls.__name__.lower(), **stat_kws )