Source code for libpysal.weights.set_operations

"""
Set-like manipulation of weights matrices.
"""

__author__ = "Sergio J. Rey <srey@asu.edu>, Charles Schmidt <schmidtc@gmail.com>, David Folch <david.folch@asu.edu>, Dani Arribas-Bel <darribas@asu.edu>"

import copy
from .weights import W, WSP
from scipy.sparse import isspmatrix_csr
from numpy import ones

__all__ = ['w_union', 'w_intersection', 'w_difference',
           'w_symmetric_difference', 'w_subset', 'w_clip']


[docs]def w_union(w1, w2, **kwargs): """ Returns a binary weights object, w, that includes all neighbor pairs that exist in either w1 or w2. Parameters ---------- w1 : W object w2 : W object **kwargs : keyword arguments optional arguments for :class:`pysal.weights.W` Returns ------- w : W object Notes ----- ID comparisons are performed using ==, therefore the integer ID 2 is equivalent to the float ID 2.0. Returns a matrix with all the unique IDs from w1 and w2. Examples -------- Construct rook weights matrices for two regions, one is 4x4 (16 areas) and the other is 6x4 (24 areas). A union of these two weights matrices results in the new weights matrix matching the larger one. >>> from libpysal.weights import lat2W, w_union >>> w1 = lat2W(4,4) >>> w2 = lat2W(6,4) >>> w = w_union(w1, w2) >>> w1[0] == w[0] True >>> w1.neighbors[15] [11, 14] >>> w2.neighbors[15] [11, 14, 19] >>> w.neighbors[15] [19, 11, 14] """ neighbors = dict(list(w1.neighbors.items())) for i in w2.neighbors: if i in neighbors: add_neigh = set(neighbors[i]).union(set(w2.neighbors[i])) neighbors[i] = list(add_neigh) else: neighbors[i] = copy.copy(w2.neighbors[i]) return W(neighbors, **kwargs)
[docs]def w_intersection(w1, w2, w_shape='w1', **kwargs): """ Returns a binary weights object, w, that includes only those neighbor pairs that exist in both w1 and w2. Parameters ---------- w1 : W object w2 : W object w_shape : string Defines the shape of the returned weights matrix. 'w1' returns a matrix with the same IDs as w1; 'all' returns a matrix with all the unique IDs from w1 and w2; and 'min' returns a matrix with only the IDs occurring in both w1 and w2. **kwargs : keyword arguments optional arguments for :class:`pysal.weights.W` Returns ------- w : W object Notes ----- ID comparisons are performed using ==, therefore the integer ID 2 is equivalent to the float ID 2.0. Examples -------- Construct rook weights matrices for two regions, one is 4x4 (16 areas) and the other is 6x4 (24 areas). An intersection of these two weights matrices results in the new weights matrix matching the smaller one. >>> from libpysal.weights import lat2W, w_intersection >>> w1 = lat2W(4,4) >>> w2 = lat2W(6,4) >>> w = w_intersection(w1, w2) >>> w1[0] == w[0] True >>> w1.neighbors[15] [11, 14] >>> w2.neighbors[15] [11, 14, 19] >>> w.neighbors[15] [11, 14] """ if w_shape == 'w1': neigh_keys = list(w1.neighbors.keys()) elif w_shape == 'all': neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys())) elif w_shape == 'min': neigh_keys = set(w1.neighbors.keys( )).intersection(set(w2.neighbors.keys())) else: raise Exception("invalid string passed to w_shape") neighbors = {} for i in neigh_keys: if i in w1.neighbors and i in w2.neighbors: add_neigh = set(w1.neighbors[i]).intersection(set(w2.neighbors[i])) neighbors[i] = list(add_neigh) else: neighbors[i] = [] return W(neighbors, **kwargs)
[docs]def w_difference(w1, w2, w_shape='w1', constrained=True, **kwargs): """ Returns a binary weights object, w, that includes only neighbor pairs in w1 that are not in w2. The w_shape and constrained parameters determine which pairs in w1 that are not in w2 are returned. Parameters ---------- w1 : W object w2 : W object w_shape : string Defines the shape of the returned weights matrix. 'w1' returns a matrix with the same IDs as w1; 'all' returns a matrix with all the unique IDs from w1 and w2; and 'min' returns a matrix with the IDs occurring in w1 and not in w2. constrained : boolean If False then the full set of neighbor pairs in w1 that are not in w2 are returned. If True then those pairs that would not be possible if w_shape='min' are dropped. Ignored if w_shape is set to 'min'. **kwargs : keyword arguments optional arguments for :class:`pysal.weights.W` Returns ------- w : W object Notes ----- ID comparisons are performed using ==, therefore the integer ID 2 is equivalent to the float ID 2.0. Examples -------- Construct rook (w2) and queen (w1) weights matrices for two 4x4 regions (16 areas). A queen matrix has all the joins a rook matrix does plus joins between areas that share a corner. The new matrix formed by the difference of rook from queen contains only join at corners (typically called a bishop matrix). Note that the difference of queen from rook would result in a weights matrix with no joins. >>> from libpysal.weights import lat2W, w_difference >>> w1 = lat2W(4,4,rook=False) >>> w2 = lat2W(4,4,rook=True) >>> w = w_difference(w1, w2, constrained=False) >>> w1[0] == w[0] False >>> w1.neighbors[15] [10, 11, 14] >>> w2.neighbors[15] [11, 14] >>> w.neighbors[15] [10] """ if w_shape == 'w1': neigh_keys = list(w1.neighbors.keys()) elif w_shape == 'all': neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys())) elif w_shape == 'min': neigh_keys = set( w1.neighbors.keys()).difference(set(w2.neighbors.keys())) if not neigh_keys: raise Exception("returned an empty weights matrix") else: raise Exception("invalid string passed to w_shape") neighbors = {} for i in neigh_keys: if i in w1.neighbors: if i in w2.neighbors: add_neigh = set(w1.neighbors[i] ).difference(set(w2.neighbors[i])) neighbors[i] = list(add_neigh) else: neighbors[i] = copy.copy(w1.neighbors[i]) else: neighbors[i] = [] if constrained or w_shape == 'min': constrained_keys = set( w1.neighbors.keys()).difference(set(w2.neighbors.keys())) island_keys = set(neighbors.keys()).difference(constrained_keys) for i in island_keys: neighbors[i] = [] for i in constrained_keys: neighbors[i] = list( set(neighbors[i]).intersection(constrained_keys)) return W(neighbors, **kwargs)
[docs]def w_symmetric_difference(w1, w2, w_shape='all', constrained=True, **kwargs): """ Returns a binary weights object, w, that includes only neighbor pairs that are not shared by w1 and w2. The w_shape and constrained parameters determine which pairs that are not shared by w1 and w2 are returned. Parameters ---------- w1 : W object w2 : W object w_shape : string Defines the shape of the returned weights matrix. 'all' returns a matrix with all the unique IDs from w1 and w2; and 'min' returns a matrix with the IDs not shared by w1 and w2. constrained : boolean If False then the full set of neighbor pairs that are not shared by w1 and w2 are returned. If True then those pairs that would not be possible if w_shape='min' are dropped. Ignored if w_shape is set to 'min'. **kwargs : keyword arguments optional arguments for :class:`pysal.weights.W` Returns ------- w : W object Notes ----- ID comparisons are performed using ==, therefore the integer ID 2 is equivalent to the float ID 2.0. Examples -------- Construct queen weights matrix for a 4x4 (16 areas) region (w1) and a rook matrix for a 6x4 (24 areas) region (w2). The symmetric difference of these two matrices (with w_shape set to 'all' and constrained set to False) contains the corner joins in the overlap area, all the joins in the non-overlap area. >>> from libpysal.weights import lat2W, w_symmetric_difference >>> w1 = lat2W(4,4,rook=False) >>> w2 = lat2W(6,4,rook=True) >>> w = w_symmetric_difference(w1, w2, constrained=False) >>> w1[0] == w[0] False >>> w1.neighbors[15] [10, 11, 14] >>> w2.neighbors[15] [11, 14, 19] >>> set(w.neighbors[15]) == set([10, 19]) True """ if w_shape == 'all': neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys())) elif w_shape == 'min': neigh_keys = set(w1.neighbors.keys( )).symmetric_difference(set(w2.neighbors.keys())) else: raise Exception("invalid string passed to w_shape") neighbors = {} for i in neigh_keys: if i in w1.neighbors: if i in w2.neighbors: add_neigh = set(w1.neighbors[i]).symmetric_difference( set(w2.neighbors[i])) neighbors[i] = list(add_neigh) else: neighbors[i] = copy.copy(w1.neighbors[i]) elif i in w2.neighbors: neighbors[i] = copy.copy(w2.neighbors[i]) else: neighbors[i] = [] if constrained or w_shape == 'min': constrained_keys = set( w1.neighbors.keys()).difference(set(w2.neighbors.keys())) island_keys = set(neighbors.keys()).difference(constrained_keys) for i in island_keys: neighbors[i] = [] for i in constrained_keys: neighbors[i] = list( set(neighbors[i]).intersection(constrained_keys)) return W(neighbors, **kwargs)
[docs]def w_subset(w1, ids, **kwargs): """ Returns a binary weights object, w, that includes only those observations in ids. Parameters ---------- w1 : W object ids : list A list containing the IDs to be include in the returned weights object. **kwargs : keyword arguments optional arguments for :class:`pysal.weights.W` Returns ------- w : W object Examples -------- Construct a rook weights matrix for a 6x4 region (24 areas). By default PySAL assigns integer IDs to the areas in a region. By passing in a list of integers from 0 to 15, the first 16 areas are extracted from the previous weights matrix, and only those joins relevant to the new region are retained. >>> from libpysal.weights import lat2W, w_subset >>> w1 = lat2W(6,4) >>> ids = range(16) >>> w = w_subset(w1, ids) >>> w1[0] == w[0] True >>> w1.neighbors[15] [11, 14, 19] >>> w.neighbors[15] [11, 14] """ neighbors = {} ids_set = set(list(ids)) for i in ids: if i in w1.neighbors: neigh_add = ids_set.intersection(set(w1.neighbors[i])) neighbors[i] = list(neigh_add) else: neighbors[i] = [] return W(neighbors, id_order=list(ids), **kwargs)
[docs]def w_clip(w1, w2, outSP=True, **kwargs): ''' Clip a continuous W object (w1) with a different W object (w2) so only cells where w2 has a non-zero value remain with non-zero values in w1. Checks on w1 and w2 are performed to make sure they conform to the appropriate format and, if not, they are converted. Parameters ---------- w1 : W W, scipy.sparse.csr.csr_matrix Potentially continuous weights matrix to be clipped. The clipped matrix wc will have at most the same elements as w1. w2 : W W, scipy.sparse.csr.csr_matrix Weights matrix to use as shell to clip w1. Automatically converted to binary format. Only non-zero elements in w2 will be kept non-zero in wc. NOTE: assumed to be of the same shape as w1 outSP : boolean If True (default) return sparse version of the clipped W, if False, return W object of the clipped matrix **kwargs : keyword arguments optional arguments for :class:`pysal.weights.W` Returns ------- wc : W W, scipy.sparse.csr.csr_matrix Clipped W object (sparse if outSP=Ture). It inherits ``id_order`` from w1. Examples -------- >>> from libpysal.weights import lat2W First create a W object from a lattice using queen contiguity and row-standardize it (note that these weights will stay when we clip the object, but they will not neccesarily represent a row-standardization anymore): >>> w1 = lat2W(3, 2, rook=False) >>> w1.transform = 'R' We will clip that geography assuming observations 0, 2, 3 and 4 belong to one group and 1, 5 belong to another group and we don't want both groups to interact with each other in our weights (i.e. w_ij = 0 if i and j in different groups). For that, we use the following method: >>> import libpysal >>> w2 = libpysal.weights.block_weights(['r1', 'r2', 'r1', 'r1', 'r1', 'r2']) To illustrate that w2 will only be considered as binary even when the object passed is not, we can row-standardize it >>> w2.transform = 'R' The clipped object ``wc`` will contain only the spatial queen relationships that occur within one group ('r1' or 'r2') but will have gotten rid of those that happen across groups >>> wcs = libpysal.weights.w_clip(w1, w2, outSP=True) This will create a sparse object (recommended when n is large). >>> wcs.sparse.toarray() array([[0. , 0. , 0.33333333, 0.33333333, 0. , 0. ], [0. , 0. , 0. , 0. , 0. , 0. ], [0.2 , 0. , 0. , 0.2 , 0.2 , 0. ], [0.2 , 0. , 0.2 , 0. , 0.2 , 0. ], [0. , 0. , 0.33333333, 0.33333333, 0. , 0. ], [0. , 0. , 0. , 0. , 0. , 0. ]]) If we wanted an original W object, we can control that with the argument ``outSP``: >>> wc = libpysal.weights.w_clip(w1, w2, outSP=False) >>> wc.full()[0] array([[0. , 0. , 0.33333333, 0.33333333, 0. , 0. ], [0. , 0. , 0. , 0. , 0. , 0. ], [0.2 , 0. , 0. , 0.2 , 0.2 , 0. ], [0.2 , 0. , 0.2 , 0. , 0.2 , 0. ], [0. , 0. , 0.33333333, 0.33333333, 0. , 0. ], [0. , 0. , 0. , 0. , 0. , 0. ]]) You can check they are actually the same: >>> wcs.sparse.toarray() == wc.full()[0] array([[ True, True, True, True, True, True], [ True, True, True, True, True, True], [ True, True, True, True, True, True], [ True, True, True, True, True, True], [ True, True, True, True, True, True], [ True, True, True, True, True, True]]) ''' from .util import WSP2W if not w1.id_order: w1.id_order = None id_order = w1.id_order if not isspmatrix_csr(w1): w1 = w1.sparse if not isspmatrix_csr(w2): w2 = w2.sparse w2.data = ones(w2.data.shape) wc = w1.multiply(w2) wc = WSP(wc, id_order=id_order) if not outSP: wc = WSP2W(wc, **kwargs) return wc