Source code for libpysal.weights.distance

# ruff: noqa: N802, N803

__all__ = ["KNN", "Kernel", "DistanceBand"]
__author__ = "Sergio J. Rey <srey@asu.edu>, Levi John Wolf <levi.john.wolf@gmail.com>"


import copy
from warnings import warn

import numpy as np
import scipy.sparse as sp
from scipy.spatial import distance_matrix

from ..cg.kdtree import KDTree
from .util import (
    WSP2W,
    get_ids,
    get_points_array,
    get_points_array_from_shapefile,
    isKDTree,
)
from .weights import WSP, W


def knnW(data, k=2, p=2, ids=None, radius=None, distance_metric="euclidean"):
    """
    This is deprecated. Use the pysal.weights.KNN class instead.
    """
    # warn('This function is deprecated. Please use pysal.weights.KNN', UserWarning)
    return KNN(data, k=k, p=p, ids=ids, radius=radius, distance_metric=distance_metric)


[docs] class KNN(W): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- kdtree : object PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree ids : list identifiers to attach to each observation Returns ------- w : W instance Weights object with binary weights Examples -------- >>> import libpysal >>> import numpy as np >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kd = libpysal.cg.KDTree(np.array(points)) >>> wnn2 = libpysal.weights.KNN(kd, 2) >>> [1,3] == wnn2.neighbors[0] True >>> wnn2 = KNN(kd,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = libpysal.weights.KNN(kd, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. Further, if many points occupy the same spatial location (i.e. observations are coincident), then you may need to increase k for those observations to acquire neighbors at different spatial locations. For example, if five points are coincident, then their four nearest neighbors will all occupy the same spatial location; only the fifth nearest neighbor will result in those coincident points becoming connected to the graph as a whole. Solutions to this problem include jittering the points (by adding a small random value to each observation's location) or by adding higher-k neighbors only to the coincident points, using the weights.w_sets.w_union() function. See Also -------- :class:`libpysal.weights.weights.W` """
[docs] def __init__( self, data, k=2, p=2, ids=None, radius=None, distance_metric="euclidean", **kwargs, ): if radius is not None: distance_metric = "arc" if isKDTree(data): self.kdtree = data self.data = self.kdtree.data else: self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric) self.data = self.kdtree.data self.k = k self.p = p # these are both n x k+1 distances, indices = self.kdtree.query(self.data, k=k + 1, p=p) full_indices = np.arange(self.kdtree.n) # if an element in the indices matrix is equal to the corresponding # index for that row, we want to mask that site from its neighbors not_self_mask = indices != full_indices.reshape(-1, 1) # if there are *too many duplicates per site*, then we may get some # rows where the site index is not in the set of k+1 neighbors # So, we need to know where these sites are has_one_too_many = not_self_mask.sum(axis=1) == (k + 1) # if a site has k+1 neighbors, drop its k+1th neighbor not_self_mask[has_one_too_many, -1] &= False not_self_indices = indices[not_self_mask].reshape(self.kdtree.n, -1) if ids is None: ids = list(full_indices) named_indices = not_self_indices else: named_indices = np.asarray(ids)[not_self_indices] neighbors = { idx: list(indices) for idx, indices in zip(ids, named_indices, strict=True) } W.__init__(self, neighbors, id_order=ids, **kwargs)
[docs] @classmethod def from_shapefile(cls, filepath, *args, **kwargs): """ Nearest neighbor weights from a shapefile. Parameters ---------- data : string shapefile containing attribute data. k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance ids : list identifiers to attach to each observation radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : KNN instance; Weights object with binary weights. Examples -------- Polygon shapefile >>> import libpysal >>> from libpysal.weights import KNN >>> wc=KNN.from_shapefile(libpysal.examples.get_path("columbus.shp")) >>> "%.4f"%wc.pct_nonzero '4.0816' >>> set([2,1]) == set(wc.neighbors[0]) True >>> wc3=KNN.from_shapefile(libpysal.examples.get_path("columbus.shp"),k=3) >>> set(wc3.neighbors[0]) == set([2,1,3]) True >>> set(wc3.neighbors[2]) == set([4,3,0]) True Point shapefile >>> w=KNN.from_shapefile(libpysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 1.1904761904761905 >>> w1=KNN.from_shapefile(libpysal.examples.get_path("juvenile.shp"),k=1) >>> "%.3f"%w1.pct_nonzero '0.595' Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`libpysal.weights.weights.W` """ return cls(get_points_array_from_shapefile(filepath), *args, **kwargs)
[docs] @classmethod def from_array(cls, array, *args, **kwargs): """ Creates nearest neighbor weights matrix based on k nearest neighbors. Parameters ---------- array : np.ndarray (n, k) array representing n observations on k characteristics used to measure distances between the n objects **kwargs : keyword arguments, see Rook Returns ------- w : W instance Weights object with binary weights Examples -------- >>> from libpysal.weights import KNN >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> wnn2 = KNN.from_array(points, 2) >>> [1,3] == wnn2.neighbors[0] True >>> wnn2 = KNN.from_array(points,2) >>> wnn2[0] {1: 1.0, 3: 1.0} >>> wnn2[1] {0: 1.0, 3: 1.0} now with 1 rather than 0 offset >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7)) >>> wnn2[1] {2: 1.0, 4: 1.0} >>> wnn2[2] {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False Notes ----- Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`libpysal.weights.weights.W` """ return cls(array, *args, **kwargs)
[docs] @classmethod def from_dataframe( cls, df, geom_col=None, ids=None, use_index=True, *args, **kwargs ): """ Make KNN weights from a dataframe. Parameters ---------- df : pandas.dataframe a dataframe with a geometry column that can be used to construct a W object geom_col : string the name of the column in `df` that contains the geometries. Defaults to active geometry column. ids : list-like, string a list-like of ids to use to index the spatial weights object or the name of the column to use as IDs. If nothing is provided, the dataframe index is used if `use_index=True` or a positional index is used if `use_index=False`. Order of the resulting W is not respected from this list. use_index : bool use index of `df` as `ids` to index the spatial weights object. See Also -------- :class:`libpysal.weights.weights.W` """ if geom_col is None: geom_col = df.geometry.name pts = get_points_array(df[geom_col]) if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() return cls(pts, *args, ids=ids, **kwargs)
[docs] def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True): """ Redo K-Nearest Neighbor weights construction using given parameters Parameters ---------- new_data : np.ndarray an array containing additional data to use in the KNN weight new_ids : list a list aligned with new_data that provides the ids for each new observation inplace : bool a flag denoting whether to modify the KNN object in place or to return a new KNN object k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Ignored if the KDTree is an ArcKDTree Returns ------- A copy of the object using the new parameterization, or None if the object is reweighted in place. """ if new_data is not None: new_data = np.asarray(new_data).reshape(-1, 2) data = np.vstack((self.data, new_data)).reshape(-1, 2) if new_ids is not None: ids = copy.deepcopy(self.id_order) ids.extend(list(new_ids)) else: ids = list(range(data.shape[0])) elif (new_data is None) and (new_ids is None): # If not, we can use the same kdtree we have data = self.kdtree ids = self.id_order elif (new_data is None) and (new_ids is not None): warn("Remapping ids must be done using w.remap_ids", stacklevel=2) if k is None: k = self.k if p is None: p = self.p if inplace: self._reset() self.__init__(data, ids=ids, k=k, p=p) else: return KNN(data, ids=ids, k=k, p=p)
[docs] class Kernel(W): """ Spatial weights based on kernel functions. Parameters ---------- data : array (n,k) or KDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects bandwidth : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` diagonal : boolean If true, set diagonal weights = 1.0, if false (default), diagonals weights are set to value according to kernel function. function : {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \\ if |z| \\le 1 uniform .. math:: K(z) = 1/2 \\ if |z| \\le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \\ if |z| \\le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \\ if |z| \\le 1 gaussian .. math:: K(z) = (2\\pi)^{(-1/2)} exp(-z^2 / 2) eps : float adjustment to ensure knn distance range is closed on the knnth observations Attributes ---------- weights : dict Dictionary keyed by id with a list of weights for each neighbor neighbors : dict of lists of neighbors keyed by observation id bandwidth : array array of bandwidths Examples -------- >>> from libpysal.weights import Kernel >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> kw=Kernel(points) >>> kw.weights[0] [1.0, 0.500000049999995, 0.4409830615267465] >>> kw.neighbors[0] [0, 1, 3] >>> kw.bandwidth array([[20.000002], [20.000002], [20.000002], [20.000002], [20.000002], [20.000002]]) >>> kw15=Kernel(points,bandwidth=15.0) >>> kw15[0] {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701} >>> kw15.neighbors[0] [0, 1, 3] >>> kw15.bandwidth array([[15.], [15.], [15.], [15.], [15.], [15.]]) Adaptive bandwidths user specified >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0] >>> kwa=Kernel(points,bandwidth=bw) >>> kwa.weights[0] [1.0, 0.6, 0.552786404500042, 0.10557280900008403] >>> kwa.neighbors[0] [0, 1, 3, 4] >>> kwa.bandwidth array([[25. ], [15. ], [25. ], [16. ], [14.5], [25. ]]) Endogenous adaptive bandwidths >>> kwea=Kernel(points,fixed=False) >>> kwea.weights[0] [1.0, 0.10557289844279438, 9.99999900663795e-08] >>> kwea.neighbors[0] [0, 1, 3] >>> kwea.bandwidth array([[11.18034101], [11.18034101], [20.000002 ], [11.18034101], [14.14213704], [18.02775818]]) Endogenous adaptive bandwidths with Gaussian kernel >>> kweag=Kernel(points,fixed=False,function='gaussian') >>> kweag.weights[0] [0.3989422804014327, 0.2674190291577696, 0.2419707487162134] >>> kweag.bandwidth array([[11.18034101], [11.18034101], [20.000002 ], [11.18034101], [14.14213704], [18.02775818]]) Diagonals to 1.0 >>> kq = Kernel(points,function='gaussian') >>> kq.weights {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]} >>> kqd = Kernel(points, function='gaussian', diagonal=True) >>> kqd.weights {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]} """ # noqa: E501
[docs] def __init__( self, data, bandwidth=None, fixed=True, k=2, function="triangular", eps=1.0000001, ids=None, diagonal=False, distance_metric="euclidean", radius=None, **kwargs, ): if radius is not None: distance_metric = "arc" if isKDTree(data): self.kdtree = data self.data = self.kdtree.data data = self.data else: self.kdtree = KDTree(data, distance_metric=distance_metric, radius=radius) self.data = self.kdtree.data self.k = k + 1 self.function = function.lower() self.fixed = fixed self.eps = eps if bandwidth: try: bandwidth = np.array(bandwidth) bandwidth.shape = (len(bandwidth), 1) except: # noqa: E722 bandwidth = np.ones((len(data), 1), "float") * bandwidth self.bandwidth = bandwidth else: self._set_bw() self._eval_kernel() neighbors, weights = self._k_to_W(ids) if diagonal: for i in neighbors: weights[i][neighbors[i].index(i)] = 1.0 W.__init__(self, neighbors, weights, ids, **kwargs)
[docs] @classmethod def from_shapefile(cls, filepath, idVariable=None, **kwargs): """ Kernel based weights from shapefile Parameters ---------- shapefile : string shapefile name with shp suffix idVariable : string name of column in shapefile's DBF to use for ids Returns ------- Kernel Weights Object See Also -------- :class:`libpysal.weights.weights.W` """ points = get_points_array_from_shapefile(filepath) ids = get_ids(filepath, idVariable) if idVariable is not None else None return cls.from_array(points, ids=ids, **kwargs)
[docs] @classmethod def from_array(cls, array, **kwargs): """ Construct a Kernel weights from an array. Supports all the same options as :class:`libpysal.weights.Kernel` See Also -------- :class:`libpysal.weights.weights.W` """ return cls(array, **kwargs)
[docs] @classmethod def from_dataframe(cls, df, geom_col=None, ids=None, use_index=True, **kwargs): """ Make Kernel weights from a dataframe. Parameters ---------- df : pandas.dataframe a dataframe with a geometry column that can be used to construct a W object geom_col : string the name of the column in `df` that contains the geometries. Defaults to active geometry column. ids : list-like, string a list-like of ids to use to index the spatial weights object or the name of the column to use as IDs. If nothing is provided, the dataframe index is used if `use_index=True` or a positional index is used if `use_index=False`. Order of the resulting W is not respected from this list. use_index : bool use index of `df` as `ids` to index the spatial weights object. See Also -------- :class:`libpysal.weights.weights.W` """ if geom_col is None: geom_col = df.geometry.name pts = get_points_array(df[geom_col]) if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() return cls(pts, ids=ids, **kwargs)
def _k_to_W(self, ids=None): allneighbors = {} weights = {} ids = np.array(ids) if ids else np.arange(len(self.data)) for i, _ in enumerate(self.kernel): if len(self.neigh[i]) == 0: allneighbors[ids[i]] = [] weights[ids[i]] = [] else: allneighbors[ids[i]] = list(ids[self.neigh[i]]) weights[ids[i]] = self.kernel[i].tolist() return allneighbors, weights def _set_bw(self): dmat, neigh = self.kdtree.query(self.data, k=self.k) if self.fixed: # use max knn distance as bandwidth bandwidth = dmat.max() * self.eps n = len(dmat) self.bandwidth = np.ones((n, 1), "float") * bandwidth else: # use local max knn distance self.bandwidth = dmat.max(axis=1) * self.eps self.bandwidth.shape = (self.bandwidth.size, 1) # identify knn neighbors for each point nnq = self.kdtree.query(self.data, k=self.k) self.neigh = nnq[1] def _eval_kernel(self): # get points within bandwidth distance of each point if not hasattr(self, "neigh"): kdtq = self.kdtree.query_ball_point neighbors = [ kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth) ] self.neigh = neighbors # get distances for neighbors bw = self.bandwidth kdtq = self.kdtree.query z = [] for i, nids in enumerate(self.neigh): di, ni = kdtq(self.data[i], k=len(nids)) if not isinstance(di, np.ndarray): di = np.asarray([di] * len(nids)) ni = np.asarray([ni] * len(nids)) zi = ( np.array([dict(list(zip(ni, di, strict=True)))[nid] for nid in nids]) / bw[i] ) z.append(zi) zs = z # functions follow Anselin and Rey (2010) table 5.4 if self.function == "triangular": self.kernel = [1 - zi for zi in zs] elif self.function == "uniform": self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs] elif self.function == "quadratic": self.kernel = [(3.0 / 4) * (1 - zi**2) for zi in zs] elif self.function == "quartic": self.kernel = [(15.0 / 16) * (1 - zi**2) ** 2 for zi in zs] elif self.function == "gaussian": c = np.pi * 2 c = c ** (-0.5) self.kernel = [c * np.exp(-(zi**2) / 2.0) for zi in zs] else: print(("Unsupported kernel function", self.function))
[docs] class DistanceBand(W): """ Spatial weights based on distance band. Parameters ---------- data : array (n,k) or KDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects threshold : float distance band p : float DEPRECATED: use `distance_metric` Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance binary : boolean If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0 If false wij=dij^{alpha} alpha : float distance decay parameter for weight (default -1.0) if alpha is positive the weights will not decline with distance. If binary is True, alpha is ignored ids : list values to use for keys of the neighbors and weights dicts build_sp : boolean DEPRECATED True to build sparse distance matrix and false to build dense distance matrix; significant speed gains may be obtained dending on the sparsity of the of distance_matrix and threshold that is applied silent : boolean By default libpysal will print a warning if the dataset contains any disconnected observations or islands. To silence this warning set this parameter to True. Attributes ---------- weights : dict of neighbor weights keyed by observation id neighbors : dict of neighbors keyed by observation id Examples -------- >>> import libpysal >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] >>> wcheck = libpysal.weights.W( ... {0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]} ... ) WARNING: there is one disconnected observation (no neighbors) Island id: [2] >>> w=libpysal.weights.DistanceBand(points,threshold=11.2) WARNING: there is one disconnected observation (no neighbors) Island id: [2] >>> libpysal.weights.util.neighbor_equality(w, wcheck) True >>> w=libpysal.weights.DistanceBand(points,threshold=14.2) >>> wcheck = libpysal.weights.W( ... {0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 2, 1], 5: [4]} ... ) >>> libpysal.weights.util.neighbor_equality(w, wcheck) True inverse distance weights >>> w=libpysal.weights.DistanceBand(points,threshold=11.2,binary=False) WARNING: there is one disconnected observation (no neighbors) Island id: [2] >>> w.weights[0] [0.1, 0.08944271909999159] >>> w.neighbors[0].tolist() [1, 3] gravity weights >>> w=libpysal.weights.DistanceBand(points,threshold=11.2,binary=False,alpha=-2.) WARNING: there is one disconnected observation (no neighbors) Island id: [2] >>> w.weights[0] [0.01, 0.007999999999999998] """
[docs] def __init__( self, data, threshold, p=2, alpha=-1.0, binary=True, ids=None, build_sp=True, silence_warnings=False, distance_metric="euclidean", radius=None, ): """Casting to floats is a work around for a bug in scipy.spatial. See detail in pysal issue #126. """ if ids is not None: ids = list(ids) if radius is not None: distance_metric = "arc" self.p = p self.threshold = threshold self.binary = binary self.alpha = alpha self.build_sp = build_sp self.silence_warnings = silence_warnings if isKDTree(data): self.kdtree = data self.data = self.kdtree.data else: if self.build_sp: try: data = np.asarray(data) if data.dtype.kind != "f": data = data.astype(float) self.kdtree = KDTree( data, distance_metric=distance_metric, radius=radius ) self.data = self.kdtree.data except: # noqa: E722 raise ValueError("Could not make array from data") from None else: self.data = data self.kdtree = None self._band() neighbors, weights = self._distance_to_W(ids) W.__init__( self, neighbors, weights, ids, silence_warnings=self.silence_warnings )
[docs] @classmethod def from_shapefile(cls, filepath, threshold, idVariable=None, **kwargs): """ Distance-band based weights from shapefile Parameters ---------- shapefile : string shapefile name with shp suffix idVariable : string name of column in shapefile's DBF to use for ids Returns ------- Kernel Weights Object """ points = get_points_array_from_shapefile(filepath) ids = get_ids(filepath, idVariable) if idVariable is not None else None return cls.from_array(points, threshold, ids=ids, **kwargs)
[docs] @classmethod def from_array(cls, array, threshold, **kwargs): """ Construct a DistanceBand weights from an array. Supports all the same options as :class:`libpysal.weights.DistanceBand` """ return cls(array, threshold, **kwargs)
[docs] @classmethod def from_dataframe( cls, df, threshold, geom_col=None, ids=None, use_index=True, **kwargs ): """ Make DistanceBand weights from a dataframe. Parameters ---------- df : pandas.dataframe a dataframe with a geometry column that can be used to construct a W object geom_col : string the name of the column in `df` that contains the geometries. Defaults to active geometry column. ids : list-like, string a list-like of ids to use to index the spatial weights object or the name of the column to use as IDs. If nothing is provided, the dataframe index is used if `use_index=True` or a positional index is used if `use_index=False`. Order of the resulting W is not respected from this list. use_index : bool use index of `df` as `ids` to index the spatial weights object. """ if geom_col is None: geom_col = df.geometry.name pts = get_points_array(df[geom_col]) if ids is None and use_index: ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() return cls(pts, threshold, ids=ids, **kwargs)
def _band(self): """Find all pairs within threshold.""" if self.build_sp: self.dmat = self.kdtree.sparse_distance_matrix( self.kdtree, max_distance=self.threshold, p=self.p ).tocsr() else: if str(self.kdtree).split(".")[-1][0:10] == "Arc_KDTree": raise TypeError( "Unable to calculate dense arc distance matrix;" ' parameter "build_sp" must be set to True for arc' " distance type weight" ) self.dmat = self._spdistance_matrix(self.data, self.data, self.threshold) def _distance_to_W(self, ids=None): if self.binary: self.dmat[self.dmat > 0] = 1 self.dmat.eliminate_zeros() temp_w = WSP2W( WSP(self.dmat, id_order=ids), silence_warnings=self.silence_warnings ) neighbors = temp_w.neighbors weight_keys = list(temp_w.weights.keys()) weight_vals = list(temp_w.weights.values()) weights = dict( list(zip(weight_keys, list(map(list, weight_vals)), strict=True)) ) return neighbors, weights else: weighted = self.dmat.power(self.alpha) weighted[weighted == np.inf] = 0 weighted.eliminate_zeros() temp_w = WSP2W( WSP(weighted, id_order=ids), silence_warnings=self.silence_warnings ) neighbors = temp_w.neighbors weight_keys = list(temp_w.weights.keys()) weight_vals = list(temp_w.weights.values()) weights = dict( list(zip(weight_keys, list(map(list, weight_vals)), strict=True)) ) return neighbors, weights def _spdistance_matrix(self, x, y, threshold=None): dist = distance_matrix(x, y) if threshold is not None: zeros = dist > threshold dist[zeros] = 0 return sp.csr_matrix(dist)
def _test(): import doctest # the following line could be used to define an alternative to the # '<BLANKLINE>' flag doctest.BLANKLINE_MARKER = 'something better than <BLANKLINE>' start_suppress = np.get_printoptions()["suppress"] np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) if __name__ == "__main__": _test()