"""
Set-like manipulation of weights matrices.
"""
__author__ = (
"Sergio J. Rey <srey@asu.edu>, "
"Charles Schmidt <schmidtc@gmail.com>, "
"David Folch <david.folch@asu.edu>, "
"Dani Arribas-Bel <darribas@asu.edu>"
)
import copy
from numpy import ones
from scipy.sparse import isspmatrix_csr
from .weights import WSP, W
__all__ = [
"w_union",
"w_intersection",
"w_difference",
"w_symmetric_difference",
"w_subset",
"w_clip",
]
[docs]
def w_union(w1, w2, **kwargs):
"""
Returns a binary weights object, w, that includes all neighbor pairs that
exist in either w1 or w2.
Parameters
----------
w1 : W
object
w2 : W
object
**kwargs : keyword arguments
optional arguments for :class:`pysal.weights.W`
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0. Returns a matrix with all the unique IDs
from w1 and w2.
Examples
--------
Construct rook weights matrices for two regions, one is 4x4 (16 areas)
and the other is 6x4 (24 areas). A union of these two weights matrices
results in the new weights matrix matching the larger one.
>>> from libpysal.weights import lat2W, w_union
>>> w1 = lat2W(4,4)
>>> w2 = lat2W(6,4)
>>> w = w_union(w1, w2)
>>> w1[0] == w[0]
True
>>> w1.neighbors[15]
[11, 14]
>>> w2.neighbors[15]
[11, 14, 19]
>>> w.neighbors[15]
[19, 11, 14]
"""
neighbors = dict(list(w1.neighbors.items()))
for i in w2.neighbors:
if i in neighbors:
add_neigh = set(neighbors[i]).union(set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = copy.copy(w2.neighbors[i])
return W(neighbors, **kwargs)
[docs]
def w_intersection(w1, w2, w_shape="w1", **kwargs):
"""
Returns a binary weights object, w, that includes only
those neighbor pairs that exist in both w1 and w2.
Parameters
----------
w1 : W
object
w2 : W
object
w_shape : string
Defines the shape of the returned weights matrix.
'w1' returns a matrix with the same IDs as w1; 'all'
returns a matrix with all the unique IDs from w1 and w2;
and 'min' returns a matrix with only the IDs occurring in
both w1 and w2.
**kwargs : keyword arguments
optional arguments for :class:`pysal.weights.W`
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0.
Examples
--------
Construct rook weights matrices for two regions, one is 4x4 (16 areas)
and the other is 6x4 (24 areas). An intersection of these two weights
matrices results in the new weights matrix matching the smaller one.
>>> from libpysal.weights import lat2W, w_intersection
>>> w1 = lat2W(4,4)
>>> w2 = lat2W(6,4)
>>> w = w_intersection(w1, w2)
>>> w1[0] == w[0]
True
>>> w1.neighbors[15]
[11, 14]
>>> w2.neighbors[15]
[11, 14, 19]
>>> w.neighbors[15]
[11, 14]
"""
if w_shape == "w1":
neigh_keys = list(w1.neighbors.keys())
elif w_shape == "all":
neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys()))
elif w_shape == "min":
neigh_keys = set(w1.neighbors.keys()).intersection(set(w2.neighbors.keys()))
else:
raise Exception("invalid string passed to w_shape")
neighbors = {}
for i in neigh_keys:
if i in w1.neighbors and i in w2.neighbors:
add_neigh = set(w1.neighbors[i]).intersection(set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = []
return W(neighbors, **kwargs)
[docs]
def w_difference(w1, w2, w_shape="w1", constrained=True, **kwargs):
"""
Returns a binary weights object, w, that includes only neighbor pairs
in w1 that are not in w2. The w_shape and constrained parameters
determine which pairs in w1 that are not in w2 are returned.
Parameters
----------
w1 : W
object
w2 : W
object
w_shape : string
Defines the shape of the returned weights matrix.
'w1' returns a matrix with the same IDs as w1; 'all'
returns a matrix with all the unique IDs from w1 and w2;
and 'min' returns a matrix with the IDs occurring in w1
and not in w2.
constrained : boolean
If False then the full set of neighbor pairs in w1 that
are not in w2 are returned. If True then those pairs that
would not be possible if w_shape='min' are dropped.
Ignored if w_shape is set to 'min'.
**kwargs : keyword arguments
optional arguments for :class:`pysal.weights.W`
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0.
Examples
--------
Construct rook (w2) and queen (w1) weights matrices for two 4x4 regions
(16 areas). A queen matrix has all the joins a rook matrix does plus joins
between areas that share a corner. The new matrix formed by the difference
of rook from queen contains only join at corners (typically called a
bishop matrix). Note that the difference of queen from rook would result
in a weights matrix with no joins.
>>> from libpysal.weights import lat2W, w_difference
>>> w1 = lat2W(4,4,rook=False)
>>> w2 = lat2W(4,4,rook=True)
>>> w = w_difference(w1, w2, constrained=False)
>>> w1[0] == w[0]
False
>>> w1.neighbors[15]
[10, 11, 14]
>>> w2.neighbors[15]
[11, 14]
>>> w.neighbors[15]
[10]
"""
if w_shape == "w1":
neigh_keys = list(w1.neighbors.keys())
elif w_shape == "all":
neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys()))
elif w_shape == "min":
neigh_keys = set(w1.neighbors.keys()).difference(set(w2.neighbors.keys()))
if not neigh_keys:
raise Exception("returned an empty weights matrix")
else:
raise Exception("invalid string passed to w_shape")
neighbors = {}
for i in neigh_keys:
if i in w1.neighbors:
if i in w2.neighbors:
add_neigh = set(w1.neighbors[i]).difference(set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = copy.copy(w1.neighbors[i])
else:
neighbors[i] = []
if constrained or w_shape == "min":
constrained_keys = set(w1.neighbors.keys()).difference(set(w2.neighbors.keys()))
island_keys = set(neighbors.keys()).difference(constrained_keys)
for i in island_keys:
neighbors[i] = []
for i in constrained_keys:
neighbors[i] = list(set(neighbors[i]).intersection(constrained_keys))
return W(neighbors, **kwargs)
[docs]
def w_symmetric_difference(w1, w2, w_shape="all", constrained=True, **kwargs):
"""
Returns a binary weights object, w, that includes only neighbor pairs
that are not shared by w1 and w2. The w_shape and constrained parameters
determine which pairs that are not shared by w1 and w2 are returned.
Parameters
----------
w1 : W
object
w2 : W
object
w_shape : string
Defines the shape of the returned weights matrix.
'all' returns a matrix with all the unique IDs
from w1 and w2; and 'min' returns
a matrix with the IDs not shared by w1 and w2.
constrained : boolean
If False then the full set of neighbor pairs that are not
shared by w1 and w2 are returned. If True then those pairs
that would not be possible if w_shape='min' are dropped.
Ignored if w_shape is set to 'min'.
**kwargs : keyword arguments
optional arguments for :class:`pysal.weights.W`
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0.
Examples
--------
Construct queen weights matrix for a 4x4 (16 areas) region (w1) and a rook
matrix for a 6x4 (24 areas) region (w2). The symmetric difference of these
two matrices (with w_shape set to 'all' and constrained set to False)
contains the corner joins in the overlap area, all the joins in the
non-overlap area.
>>> from libpysal.weights import lat2W, w_symmetric_difference
>>> w1 = lat2W(4,4,rook=False)
>>> w2 = lat2W(6,4,rook=True)
>>> w = w_symmetric_difference(w1, w2, constrained=False)
>>> w1[0] == w[0]
False
>>> w1.neighbors[15]
[10, 11, 14]
>>> w2.neighbors[15]
[11, 14, 19]
>>> set(w.neighbors[15]) == set([10, 19])
True
"""
if w_shape == "all":
neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys()))
elif w_shape == "min":
neigh_keys = set(w1.neighbors.keys()).symmetric_difference(
set(w2.neighbors.keys())
)
else:
raise Exception("invalid string passed to w_shape")
neighbors = {}
for i in neigh_keys:
if i in w1.neighbors:
if i in w2.neighbors:
add_neigh = set(w1.neighbors[i]).symmetric_difference(
set(w2.neighbors[i])
)
neighbors[i] = list(add_neigh)
else:
neighbors[i] = copy.copy(w1.neighbors[i])
elif i in w2.neighbors:
neighbors[i] = copy.copy(w2.neighbors[i])
else:
neighbors[i] = []
if constrained or w_shape == "min":
constrained_keys = set(w1.neighbors.keys()).difference(set(w2.neighbors.keys()))
island_keys = set(neighbors.keys()).difference(constrained_keys)
for i in island_keys:
neighbors[i] = []
for i in constrained_keys:
neighbors[i] = list(set(neighbors[i]).intersection(constrained_keys))
return W(neighbors, **kwargs)
[docs]
def w_subset(w1, ids, **kwargs):
"""
Returns a binary weights object, w, that includes only those
observations in ids.
Parameters
----------
w1 : W
object
ids : list
A list containing the IDs to be include
in the returned weights object.
**kwargs : keyword arguments
optional arguments for :class:`pysal.weights.W`
Returns
-------
w : W
object
Examples
--------
Construct a rook weights matrix for a 6x4 region (24 areas). By default
PySAL assigns integer IDs to the areas in a region. By passing in a list
of integers from 0 to 15, the first 16 areas are extracted from the
previous weights matrix, and only those joins relevant to the new region
are retained.
>>> from libpysal.weights import lat2W, w_subset
>>> w1 = lat2W(6,4)
>>> ids = range(16)
>>> w = w_subset(w1, ids)
>>> w1[0] == w[0]
True
>>> w1.neighbors[15]
[11, 14, 19]
>>> w.neighbors[15]
[11, 14]
"""
neighbors = {}
ids_set = set(ids)
for i in ids:
if i in w1.neighbors:
neigh_add = ids_set.intersection(set(w1.neighbors[i]))
neighbors[i] = list(neigh_add)
else:
neighbors[i] = []
return W(neighbors, id_order=list(ids), **kwargs)
[docs]
def w_clip(w1, w2, outSP=True, **kwargs): # noqa: N803
"""
Clip a continuous W object (w1) with a different W object (w2) so only cells where
w2 has a non-zero value remain with non-zero values in w1.
Checks on w1 and w2 are performed to make sure they conform to the
appropriate format and, if not, they are converted.
Parameters
----------
w1 : W
W, scipy.sparse.csr.csr_matrix
Potentially continuous weights matrix to be clipped.
The clipped matrix wc will have at most the same
elements as w1.
w2 : W
W, scipy.sparse.csr.csr_matrix
Weights matrix to use as shell to clip w1. Automatically
converted to binary format. Only non-zero
elements in w2 will be kept non-zero in wc.
NOTE: assumed to be of the same shape as w1
outSP : boolean
If True (default) return sparse version of the clipped W,
if False, return W object of the clipped matrix
**kwargs : keyword arguments
optional arguments for :class:`pysal.weights.W`
Returns
-------
wc : W
W, scipy.sparse.csr.csr_matrix
Clipped W object (sparse if outSP=Ture).
It inherits ``id_order`` from w1.
Examples
--------
>>> from libpysal.weights import lat2W
First create a W object from a lattice using queen contiguity and
row-standardize it (note that these weights will stay when we clip the
object, but they will not neccesarily represent a row-standardization
anymore):
>>> w1 = lat2W(3, 2, rook=False)
>>> w1.transform = 'R'
We will clip that geography assuming observations 0, 2, 3 and 4 belong to
one group and 1, 5 belong to another group and we don't want both groups
to interact with each other in our weights (i.e. w_ij = 0 if i and j in
different groups). For that, we use the following method:
>>> import libpysal
>>> w2 = libpysal.weights.block_weights(['r1', 'r2', 'r1', 'r1', 'r1', 'r2'])
To illustrate that w2 will only be considered as binary even when the
object passed is not, we can row-standardize it
>>> w2.transform = 'R'
The clipped object ``wc`` will contain only the spatial queen
relationships that occur within one group ('r1' or 'r2') but will have
gotten rid of those that happen across groups
>>> wcs = libpysal.weights.w_clip(w1, w2, outSP=True)
This will create a sparse object (recommended when n is large).
>>> wcs.sparse.toarray()
array([[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ],
[0.2 , 0. , 0. , 0.2 , 0.2 ,
0. ],
[0.2 , 0. , 0.2 , 0. , 0.2 ,
0. ],
[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ]])
If we wanted an original W object, we can control that with the argument
``outSP``:
>>> wc = libpysal.weights.w_clip(w1, w2, outSP=False)
>>> wc.full()[0]
array([[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ],
[0.2 , 0. , 0. , 0.2 , 0.2 ,
0. ],
[0.2 , 0. , 0.2 , 0. , 0.2 ,
0. ],
[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ]])
You can check they are actually the same:
>>> wcs.sparse.toarray() == wc.full()[0]
array([[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True]])
"""
from .util import WSP2W
if not w1.id_order:
w1.id_order = None
id_order = w1.id_order
if not isspmatrix_csr(w1):
w1 = w1.sparse
if not isspmatrix_csr(w2):
w2 = w2.sparse
w2.data = ones(w2.data.shape)
wc = w1.multiply(w2)
wc = WSP(wc, id_order=id_order)
if not outSP:
wc = WSP2W(wc, **kwargs)
return wc