Source code for libpysal.graph._summary

import numpy as np


[docs] class GraphSummary: r"""Graph Summary An object containing the statistical attributes summarising the Graph and its basic properties. Attributes ---------- n_nodes : int number of Graph nodes n_edges : int number of Graph edges n_components : int number of connected components n_isolates : int number of isolates (nodes with no neighbors) nonzero : int number of edges with nonzero weight pct_nonzero : float percentage of nonzero weights n_asymmetries : int number of intrinsic asymmetries cardinalities_mean : float mean number of neighbors cardinalities_std : float standard deviation of number of neighbors cardinalities_min : float minimal number of neighbors cardinalities_25 : float 25th percentile of number of neighbors cardinalities_50 : float 50th percentile (median) of number of neighbors cardinalities_75 : float 75th percentile of number of neighbors cardinalities_max : float maximal number of neighbors weights_mean : float mean edge weight weights_std : float standard deviation of edge weights weights_min : float minimal edge weight weights_25 : float 25th percentile of edge weights weights_50 : float 50th percentile (median) of edge weights weights_75 : float 75th percentile of edge weights weights_max : float maximal edge weight s0 : float S0 (global) sum of weights ``s0`` is defined as .. math:: s0=\sum_i \sum_j w_{i,j} :attr:`s0`, :attr:`s1`, and :attr:`s2` reflect interaction between observations and are used to compute standard errors for spatial autocorrelation estimators. s1 : float S1 sum of weights ``s1`` is defined as .. math:: s1=1/2 \sum_i \sum_j \Big(w_{i,j} + w_{j,i}\Big)^2 :attr:`s0`, :attr:`s1`, and :attr:`s2` reflect interaction between observations and are used to compute standard errors for spatial autocorrelation estimators. s2 : float S2 sum of weights ``s2`` is defined as .. math:: s2=\sum_j \Big(\sum_i w_{i,j} + \sum_i w_{j,i}\Big)^2 :attr:`s0`, :attr:`s1`, and :attr:`s2` reflect interaction between observations and are used to compute standard errors for spatial autocorrelation estimators. diag_g2 : np.ndarray diagonal of :math:`GG` diag_gtg : np.ndarrray diagonal of :math:`G^{'}G` diag_gtg_gg : np.ndarray diagonal of :math:`G^{'}G + GG` trace_g2 : np.ndarray trace of :math:`GG` trace_gtg : np.ndarrray trace of :math:`G^{'}G` trace_gtg_gg : np.ndarray trace of :math:`G^{'}G + GG` Examples -------- >>> import geopandas as gpd >>> from geodatasets import get_path >>> nybb = gpd.read_file(get_path("nybb")).set_index("BoroName") >>> nybb BoroCode ... geometry BoroName ... Staten Island 5 ... MULTIPOLYGON (((970217.022 145643.332, 970227.... Queens 4 ... MULTIPOLYGON (((1029606.077 156073.814, 102957... Brooklyn 3 ... MULTIPOLYGON (((1021176.479 151374.797, 102100... Manhattan 1 ... MULTIPOLYGON (((981219.056 188655.316, 980940.... Bronx 2 ... MULTIPOLYGON (((1012821.806 229228.265, 101278... [5 rows x 4 columns] >>> contiguity = graph.Graph.build_contiguity(nybb) >>> contiguity <Graph of 5 nodes and 10 nonzero edges indexed by ['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']> >>> summary = contiguity.summary(asymmetries=True) >>> summary Graph Summary Statistics ======================== Graph indexed by: ['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx'] ============================================================== number of nodes: 5 number of edges: 10 number of connected components: 2 number of isolates: 1 number of non-zero edges: 10 Percentage of non-zero edges: 44.00% number of asymmetries: 0 -------------------------------------------------------------- Cardinalities ============================================================== Mean: 2 25%: 2 Standard deviation: 1 50%: 2 Min: 0 75%: 3 Max: 3 -------------------------------------------------------------- Weights ============================================================== Mean: 1 25%: 1 Standard deviation: 0 50%: 1 Min: 1 75%: 1 Max: 1 -------------------------------------------------------------- Sum of weights ============================================================== S0: 10 S1: 20 S2: 104 -------------------------------------------------------------- Traces ============================================================== GG: 10 G'G: 10 G'G + GG: 20 >>> summary.s1 20 """
[docs] def __init__(self, graph, asymmetries=False): """Create GraphSummary Parameters ---------- graph : Graph asymmetries : bool whether to compute ``n_asymmetries``, which is considerably more expensive than the other attributes. By default False. """ self._graph = graph self.asymmetries = asymmetries self.n_nodes = self._graph.n_nodes # number of nodes / observations self.n_edges = self._graph.n_edges # number of edges excluding isolates self.n_components = self._graph.n_components self.n_isolates = len(self._graph.isolates) # nonzero self.nonzero = self._graph.nonzero self.pct_nonzero = self._graph.pct_nonzero # intrinsic assymetries if asymmetries: self.n_asymmetries = len(self._graph.asymmetry()) # statistics of cardinalities card_stats = self._graph.cardinalities.describe() self.cardinalities_mean = card_stats["mean"] self.cardinalities_std = card_stats["std"] self.cardinalities_min = card_stats["min"] self.cardinalities_25 = card_stats["25%"] self.cardinalities_50 = card_stats["50%"] self.cardinalities_75 = card_stats["75%"] self.cardinalities_max = card_stats["max"] # statistics of weights weights_stats = self._graph._adjacency.drop(self._graph.isolates).describe() self.weights_mean = weights_stats["mean"] self.weights_std = weights_stats["std"] self.weights_min = weights_stats["min"] self.weights_25 = weights_stats["25%"] self.weights_50 = weights_stats["50%"] self.weights_75 = weights_stats["75%"] self.weights_max = weights_stats["max"] # sum of weights self.s0 = self._s0() self.s1 = self._s1() self.s2 = self._s2() # diags self.diag_g2 = self._diag_g2() self.diag_gtg = self._diag_gtg() self.diag_gtg_gg = self._diag_gtg_gg() # traces self.trace_g2 = self.diag_g2.sum() self.trace_gtg = self.diag_gtg.sum() self.trace_gtg_gg = self.diag_gtg_gg.sum()
def __repr__(self): n_asymmetries = f"{self.n_asymmetries:>12.0f}" if self.asymmetries else "NA" return f"""Graph Summary Statistics {'='*24} Graph indexed by: {self._graph._get_ids_repr(57)} {'='*62} {"Number of nodes:":<50}{self.n_nodes:>12.0f} {"Number of edges:":<50}{self.n_edges:>12.0f} {"Number of connected components:":<50}{self.n_components:>12.0f} {"Number of isolates:":<50}{self.n_isolates:12.0f} {"Number of non-zero edges:":<50}{self.nonzero:>12.0f} {"Percentage of non-zero edges:":<50}{self.pct_nonzero:>11.2f}% {"Number of asymmetries:":<50}{n_asymmetries} {'-'*62} Cardinalities {'='*62} {"Mean:":<20}{self.cardinalities_mean:>9.0f} {"25%:":<20}{self.cardinalities_25:>9.0f} {"Standard deviation:":<20}{self.cardinalities_std:>9.0f} {"50%:":<20}{self.cardinalities_50:>9.0f} {"Min:":<20}{self.cardinalities_min:>9.0f} {"75%:":<20}{self.cardinalities_75:>9.0f} {"Max:":<20}{self.cardinalities_max:>9.0f} {'-'*62} Weights {'='*62} {"Mean:":<20}{self.weights_mean:>9.0f} {"25%:":<20}{self.weights_25:>9.0f} {"Standard deviation:":<20}{self.weights_std:>9.0f} {"50%:":<20}{self.weights_50:>9.0f} {"Min:":<20}{self.weights_min:>9.0f} {"75%:":<20}{self.weights_75:>9.0f} {"Max:":<20}{self.weights_max:>9.0f} {'-'*62} Sum of weights {'='*62} {"S0:":<50}{self.s0:>12.0f} {"S1:":<50}{self.s1:>12.0f} {"S2:":<50}{self.s2:>12.0f} {'-'*62} Traces {'='*62} {"GG:":<50}{self.trace_g2:>12.0f} {"G'G:":<50}{self.trace_gtg:>12.0f} {"G'G + GG:":<50}{self.trace_gtg_gg:>12.0f} """ # noqa: E501 def _repr_html_(self): n_asymmetries = f"{self.n_asymmetries:12.0f}" if self.asymmetries else "NA" return f""" <table> <caption>Graph Summary Statistics</caption> <tr> <td>Number of nodes:</td> <td>{self.n_nodes:12.0f}</td> </tr> <tr> <td>Number of edges:</td> <td>{self.n_edges:12.0f}</td> </tr> <tr> <td>Number of connected components:</td> <td>{self.n_components:12.0f}</td> </tr> <tr> <td>Number of isolates:</td> <td>{self.n_isolates:12.0f}</td> </tr> <tr> <td>Number of non-zero edges:</td> <td>{self.nonzero:12.0f}</td> </tr> <tr> <td>Percentage of non-zero edges:</td> <td>{self.pct_nonzero:11.2f}%</td> </tr> <tr> <td>Number of asymmetries:</td> <td>{n_asymmetries}</td> </tr> </table> <table> <caption>Cardinalities</caption> <tr> <td>Mean:</td> <td>{self.cardinalities_mean:9.0f}</td> <td>25%:</td> <td>{self.cardinalities_25:9.0f}</td> </tr> <tr> <td>Standard deviation:</td> <td>{self.cardinalities_std:9.0f}</td> <td>50%</td> <td>{self.cardinalities_50:9.0f} <td> </tr> <tr> <td>Min:</td> <td>{self.cardinalities_min:9.0f}</td> <td>75%:</td> <td>{self.cardinalities_75:9.0f}</td> </tr> <tr> <td>Max:</td> <td>{self.cardinalities_max:9.0f}</td> </tr> </table> <table> <caption>Weights</caption> <tr> <td>Mean:</td> <td>{self.weights_mean:9.0f}</td> <td>25%:</td> <td>{self.weights_25:9.0f}</td> </tr> <tr> <td>Standard deviation:</td> <td>{self.weights_std:9.0f}</td> <td>50%</td> <td>{self.weights_50:9.0f} <tr> <td>Min:</td> <td>{self.weights_min:9.0f}</td> <td>75%:</td> <td>{self.weights_75:9.0f}</td> </tr> <tr> <td>Max:</td> <td>{self.weights_max:9.0f}</td> </tr> </table> <table> <caption>Sum of weights and Traces</caption> <tr> <td>S0:</td> <td>{self.s0:12.0f}</td> <td>GG:</td> <td>{self.trace_g2:12.0f}</td> </tr> <tr> <td>S1:</td> <td>{self.s1:12.0f}</td> <td>G'G:</td> <td>{self.trace_gtg:12.0f}</td> </tr> <tr> <td>S3:</td> <td>{self.s2:12.0f}</td> <td>G'G + GG:</td> <td>{self.trace_gtg_gg:12.0f}</td> </tr> </table> <div> Graph indexed by: <code>{self._graph._get_ids_repr(57)}</code> </div> """ def _s0(self): r"""helper to get S0 in downstream ``s0`` is defined as .. math:: s0=\sum_i \sum_j w_{i,j} :attr:`s0`, :attr:`s1`, and :attr:`s2` reflect interaction between observations and are used to compute standard errors for spatial autocorrelation estimators. Returns ------- float global sum of weights """ return self._graph._adjacency.sum() def _s1(self): r"""S1 sum of weights ``s1`` is defined as .. math:: s1=1/2 \sum_i \sum_j \Big(w_{i,j} + w_{j,i}\Big)^2 :attr:`s0`, :attr:`s1`, and :attr:`s2` reflect interaction between observations and are used to compute standard errors for spatial autocorrelation estimators. Returns ------- float s1 sum of weights """ t = self._graph.sparse.transpose() t = t + self._graph.sparse t2 = t * t return t2.sum() / 2.0 def _s2(self): r"""S2 sum of weights ``s2`` is defined as .. math:: s2=\sum_j \Big(\sum_i w_{i,j} + \sum_i w_{j,i}\Big)^2 :attr:`s0`, :attr:`s1`, and :attr:`s2` reflect interaction between observations and are used to compute standard errors for spatial autocorrelation estimators. Returns ------- float s2 sum of weights """ s = self._graph.sparse return (np.array(s.sum(1) + s.sum(0).transpose()) ** 2).sum() def _diag_g2(self): """Diagonal of :math:`GG`. Returns ------- np.ndarray """ return (self._graph.sparse @ self._graph.sparse).diagonal() def _diag_gtg(self): """Diagonal of :math:`G^{'}G`. Returns ------- np.ndarray """ return (self._graph.sparse.transpose() @ self._graph.sparse).diagonal() def _diag_gtg_gg(self): """Diagonal of :math:`G^{'}G + GG`. Returns ------- np.ndarray """ gt = self._graph.sparse.transpose() g = self._graph.sparse return (gt @ g + g @ g).diagonal()