Source code for esda.gamma

"""
Gamma index for spatial autocorrelation


"""
__author__ = "Luc Anselin <luc.anselin@asu.edu> Serge Rey <sjsrey@gmail.com>"

import warnings

import numpy as np
from libpysal.weights.spatial_lag import lag_spatial

from .crand import _prepare_univariate
from .crand import njit as _njit
from .tabular import _univariate_handler

__all__ = ["Gamma"]

PERMUTATIONS = 999


[docs] class Gamma(object): """Gamma index for spatial autocorrelation Parameters ---------- y : array variable measured across n spatial units w : W spatial weights instance can be binary or row-standardized operation : {'c', 's', 'a'} attribute similarity function where, 'c' cross product 's' squared difference 'a' absolute difference standardize : {False, True} standardize variables first False, keep as is True, standardize to mean zero and variance one permutations : int number of random permutations for calculation of pseudo-p_values Attributes ---------- y : array original variable w : W original w object op : {'c', 's', 'a'} attribute similarity function, as per parameters attribute similarity function stand : {False, True} standardization permutations : int number of permutations gamma : float value of Gamma index sim_g : array (if permutations>0) vector of Gamma index values for permuted samples p_sim_g : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed Gamma is more extreme than under randomness implemented as a two-sided test mean_g : float average of permuted Gamma values min_g : float minimum of permuted Gamma values max_g : float maximum of permuted Gamma values Examples -------- use same example as for join counts to show similarity >>> import libpysal, numpy as np >>> from esda.gamma import Gamma >>> w = libpysal.weights.lat2W(4,4) >>> y=np.ones(16) >>> y[0:8]=0 >>> np.random.seed(12345) >>> g = Gamma(y,w) >>> g.g 20.0 >>> round(g.g_z, 3) 3.188 >>> round(g.p_sim_g, 3) 0.003 >>> g.min_g 0.0 >>> g.max_g 20.0 >>> g.mean_g 11.093093093093094 >>> np.random.seed(12345) >>> g1 = Gamma(y,w,operation='s') >>> g1.g 8.0 >>> round(g1.g_z, 3) -3.706 >>> g1.p_sim_g 0.001 >>> g1.min_g 14.0 >>> g1.max_g 48.0 >>> g1.mean_g 25.623623623623622 >>> np.random.seed(12345) >>> g2 = Gamma(y,w,operation='a') >>> g2.g 8.0 >>> round(g2.g_z, 3) -3.706 >>> g2.p_sim_g 0.001 >>> g2.min_g 14.0 >>> g2.max_g 48.0 >>> g2.mean_g 25.623623623623622 >>> np.random.seed(12345) >>> g3 = Gamma(y,w,standardize=True) >>> g3.g 32.0 >>> round(g3.g_z, 3) 3.706 >>> g3.p_sim_g 0.001 >>> g3.min_g -48.0 >>> g3.max_g 20.0 >>> g3.mean_g -3.2472472472472473 >>> np.random.seed(12345) >>> def func(z,i,j): ... q = z[i]*z[j] ... return q ... >>> g4 = Gamma(y,w,operation=func) >>> g4.g 20.0 >>> round(g4.g_z, 3) 3.188 >>> round(g4.p_sim_g, 3) 0.003 Notes ----- For further technical details see :cite:`Hubert_1981`. """
[docs] def __init__( self, y, w, operation="c", standardize=False, permutations=PERMUTATIONS ): if isinstance(standardize, str): standardize = standardize.lower() == "yes" warnings.warn( "Use True/False for standardize. The option to" ' provide "yes"/"no" will be deprecated in PySAL 2.2.' ) y = np.asarray(y).flatten() self.w = w self.y = y self.op = operation self.stand = standardize self.permutations = permutations if self.stand: ym = np.mean(self.y) ysd = np.std(self.y) ys = (self.y - ym) / ysd self.y = ys self.g = self.__calc(self.y, self.op) if permutations: sim = [ self.__calc(np.random.permutation(self.y), self.op) for i in range(permutations) ] self.sim_g = np.array(sim) self.min_g = np.min(self.sim_g) self.mean_g = np.mean(self.sim_g) self.max_g = np.max(self.sim_g) p_sim_g = self.__pseudop(self.sim_g, self.g) self.p_sim_g = p_sim_g self.g_z = (self.g - self.mean_g) / np.std(self.sim_g)
@property def _statistic(self): return self.g @property def p_sim(self): """new name to fit with Moran module""" return self.p_sim_g def __calc(self, z, op): if op == "c": # cross-product zl = lag_spatial(self.w, z) g = (z * zl).sum() elif op == "s": # squared difference zs = np.zeros(z.shape) z2 = z**2 for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] zw = list(zip(neighbors, wijs)) zs[i] = sum( [wij * (z2[i] - 2.0 * z[i] * z[j] + z2[j]) for j, wij in zw] ) g = zs.sum() elif op == "a": # absolute difference zs = np.zeros(z.shape) for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] zw = list(zip(neighbors, wijs)) zs[i] = sum([wij * abs(z[i] - z[j]) for j, wij in zw]) g = zs.sum() else: # any previously defined function op zs = np.zeros(z.shape) for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] zw = list(zip(neighbors, wijs)) zs[i] = sum([wij * op(z, i, j) for j, wij in zw]) g = zs.sum() return g def __pseudop(self, sim, g): above = sim >= g larger = above.sum() psim = (larger + 1.0) / (self.permutations + 1.0) if psim > 0.5: psim = (self.permutations - larger + 1.0) / (self.permutations + 1.0) return psim
[docs] @classmethod def by_col( cls, df, cols, w=None, inplace=False, pvalue="sim", outvals=None, **stat_kws ): msg = ( "The `.by_col()` methods are deprecated and will be " "removed in a future version of `esda`." ) warnings.warn(msg, FutureWarning) return _univariate_handler( df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname=cls.__name__.lower(), **stat_kws )
# -------------------------------------------------------------- # Conditional Randomization Function Implementations # -------------------------------------------------------------- @_njit(fastmath=True) def _local_gamma_crand(i, z, permuted_ids, weights_i, scaling): zi, zrand = _prepare_univariate(i, z, permuted_ids, weights_i) return (zi * zrand) @ weights_i * scaling