Source code for esda.geary_local_mv

import numpy as np
import pandas as pd
from scipy import stats
from sklearn.base import BaseEstimator
from sklearn.utils import check_array


[docs] class Geary_Local_MV(BaseEstimator): """Local Geary - Multivariate"""
[docs] def __init__(self, connectivity=None, permutations=999, drop_islands=True): """ Initialize a Local_Geary_MV estimator Parameters ---------- connectivity : scipy.sparse matrix object the connectivity structure describing the relationships between observed units. Need not be row-standardized. permutations : int (default=999) number of random permutations for calculation of pseudo p_values drop_islands : bool (default True) Whether or not to preserve islands as entries in the adjacency list. By default, observations with no neighbors do not appear in the adjacency list. If islands are kept, they are coded as self-neighbors with zero weight. See ``libpysal.weights.to_adjlist()``. Attributes ---------- localG : numpy array array containing the observed multivariate Local Geary values. p_sim : numpy array array containing the simulated p-values for each unit. """ self.connectivity = connectivity self.permutations = permutations self.drop_islands = drop_islands
[docs] def fit(self, variables): """ Parameters ---------- variables : numpy.ndarray array containing continuous data Returns ------- the fitted estimator. Notes ----- Technical details and derivations can be found in :cite:`Anselin1995`. Examples -------- Guerry data replication GeoDa tutorial >>> import libpysal >>> import geopandas as gpd >>> guerry = lp.examples.load_example('Guerry') >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp')) >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) >>> import libpysal >>> import geopandas as gpd >>> guerry = lp.examples.load_example('Guerry') >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp')) >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) >>> x1 = guerry_ds['Donatns'] >>> x2 = guerry_ds['Suicids'] >>> lG_mv = Local_Geary(connectivity=w).fit([x1,x2]) >>> lG_mv.localG[0:5] >>> lG_mv.p_sim[0:5] """ self.variables = check_array( variables, accept_sparse=False, dtype="float", force_all_finite=True, estimator=self, ) w = self.connectivity w.transform = "r" self.n = len(variables[0]) self.w = w permutations = self.permutations # Caclulate z-scores for input variables # to be used in _statistic and _crand zvariables = stats.zscore(variables, axis=1) self.localG = self._statistic(variables, zvariables, w, self.drop_islands) if permutations: self._crand(zvariables) sim = np.transpose(self.Gs) above = sim >= self.localG larger = above.sum(0) low_extreme = (permutations - larger) < larger larger[low_extreme] = permutations - larger[low_extreme] self.p_sim = (larger + 1.0) / (permutations + 1.0) return self
@staticmethod def _statistic(variables, zvariables, w, drop_islands): # Define denominator adjustment k = len(variables) # Create focal and neighbor values adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands) zseries = [pd.Series(i, index=w.id_order) for i in zvariables] focal = [zseries[i].loc[adj_list.focal].values for i in range(len(variables))] neighbor = [ zseries[i].loc[adj_list.neighbor].values for i in range(len(variables)) ] # Carry out local Geary calculation gs = adj_list.weight.values * (np.array(focal) - np.array(neighbor)) ** 2 # Reorganize data temp = pd.DataFrame(gs).T temp["ID"] = adj_list.focal.values adj_list_gs = temp.groupby(by="ID").sum() # Rearrange data based on w id order adj_list_gs["w_order"] = w.id_order adj_list_gs.sort_values(by="w_order", inplace=True) adj_list_gs.drop(columns=['w_order'], inplace=True) localG = np.array(adj_list_gs.sum(axis=1, numeric_only=True) / k) return localG def _crand(self, zvariables): """ conditional randomization for observation i with ni neighbors, the candidate set cannot include i (we don't want i being a neighbor of i). we have to sample without replacement from a set of ids that doesn't include i. numpy doesn't directly support sampling wo replacement and it is expensive to implement this. instead we omit i from the original ids, permute the ids and take the first ni elements of the permuted ids as the neighbors to i in each randomization. """ nvars = self.variables.shape[0] Gs = np.zeros((self.n, self.permutations)) prange = list(range(self.permutations)) k = self.w.max_neighbors + 1 nn = self.n - 1 rids = np.array([np.random.permutation(nn)[0:k] for i in prange]) ids = np.arange(self.w.n) ido = self.w.id_order w = [self.w.weights[ido[i]] for i in ids] wc = [self.w.cardinalities[ido[i]] for i in ids] for i in range(self.w.n): idsi = ids[ids != i] np.random.shuffle(idsi) vars_rand = [] for j in range(nvars): vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) # noqa E203 # vars rand as tmp # Calculate diff diff = [] for z in range(nvars): _diff = (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])) diff.append(_diff.sum(1) / nvars) # add up differences temp = np.array([sum(x) for x in zip(*diff)]) # Assign to object to be returned Gs[i] = temp self.Gs = Gs