Source code for libpysal.weights.raster

# ruff: noqa: B006, N802

import os
import sys
from warnings import warn

import numpy as np
from scipy import sparse

from .util import lat2SW
from .weights import WSP, W

if os.path.basename(sys.argv[0]) in ("pytest", "py.test"):

    def jit(*dec_args, **dec_kwargs):  # noqa: ARG001
        """
        decorator mimicking numba.jit
        """

        def intercepted_function(f, *f_args, **f_kwargs):  # noqa: ARG001
            return f

        return intercepted_function

else:
    from ..common import jit

__all__ = ["da2W", "da2WSP", "w2da", "wsp2da", "testDataArray"]


[docs] def da2W( da, criterion="queen", z_value=None, coords_labels={}, k=1, include_nodata=False, n_jobs=1, **kwargs, ): """ Create a W object from xarray.DataArray with an additional attribute index containing coordinate values of the raster in the form of Pandas.Index/MultiIndex. Parameters ---------- da : xarray.DataArray Input 2D or 3D DataArray with shape=(z, y, x) criterion : {"rook", "queen"} Type of contiguity. Default is queen. z_value : int/string/float Select the z_value of 3D DataArray with multiple layers. coords_labels : dictionary Pass dimension labels for coordinates and layers if they do not belong to default dimensions, which are (band/time, y/lat, x/lon) e.g. coords_labels = {"y_label": "latitude", "x_label": "longitude", "z_label": "year"} Default is {} empty dictionary. k : int Order of contiguity, this will select all neighbors upto kth order. Default is 1. include_nodata : boolean If True, missing values will be assumed as non-missing when selecting higher_order neighbors, Default is False n_jobs : int Number of cores to be used in the sparse weight construction. If -1, all available cores are used. Default is 1. **kwargs : keyword arguments Optional arguments for :class:`libpysal.weights.W` Returns ------- w : libpysal.weights.W instance of spatial weights class W with an index attribute Notes ----- 1. Lower order contiguities are also selected. 2. Returned object contains `index` attribute that includes a `Pandas.MultiIndex` object from the DataArray. Examples -------- >>> from libpysal.weights.raster import da2W, testDataArray >>> da = testDataArray().rename( {'band': 'layer', 'x': 'longitude', 'y': 'latitude'}) >>> da.dims ('layer', 'latitude', 'longitude') >>> da.shape (3, 4, 4) >>> da.coords Coordinates: * layer (layer) int64 1 2 3 * latitude (latitude) float64 90.0 30.0 -30.0 -90.0 * longitude (longitude) float64 -180.0 -60.0 60.0 180.0 >>> da.attrs {'nodatavals': (-32768.0,)} >>> coords_labels = { "z_label": "layer", "y_label": "latitude", "x_label": "longitude" } >>> w = da2W(da, z_value=2, coords_labels=coords_labels) >>> "%.3f"%w.pct_nonzero '30.000' >>> w[(2, 90.0, 180.0)] == {(2, 90.0, 60.0): 1, (2, 30.0, 180.0): 1} True >>> len(w.index) 10 >>> w.index[:2] MultiIndex([(2, 90.0, 60.0), (2, 90.0, 180.0)], names=['layer', 'latitude', 'longitude']) See Also -------- :class:`libpysal.weights.weights.W` """ warn( "You are trying to build a full W object from " "xarray.DataArray (raster) object. This computation " "can be very slow and not scale well. It is recommended, " "if possible, to instead build WSP object, which is more " "efficient and faster. You can do this by using da2WSP method.", stacklevel=2, ) wsp = da2WSP(da, criterion, z_value, coords_labels, k, include_nodata, n_jobs) w = wsp.to_W(**kwargs) # temp addition of index attribute w.index = wsp.index return w
[docs] def da2WSP( da, criterion="queen", z_value=None, coords_labels={}, k=1, include_nodata=False, n_jobs=1, ): """ Create a WSP object from xarray.DataArray with an additional attribute index containing coordinate values of the raster in the form of Pandas.Index/MultiIndex. Parameters ---------- da : xarray.DataArray Input 2D or 3D DataArray with shape=(z, y, x) criterion : {"rook", "queen"} Type of contiguity. Default is queen. z_value : int/string/float Select the z_value of 3D DataArray with multiple layers. coords_labels : dictionary Pass dimension labels for coordinates and layers if they do not belong to default dimensions, which are (band/time, y/lat, x/lon) e.g. coords_labels = {"y_label": "latitude", "x_label": "longitude", "z_label": "year"} Default is {} empty dictionary. k : int Order of contiguity, this will select all neighbors upto kth order. Default is 1. include_nodata : boolean If True, missing values will be assumed as non-missing when selecting higher_order neighbors, Default is False n_jobs : int Number of cores to be used in the sparse weight construction. If -1, all available cores are used. Default is 1. Returns ------- wsp : libpysal.weights.WSP instance of spatial weights class WSP with an index attribute Notes ----- 1. Lower order contiguities are also selected. 2. Returned object contains `index` attribute that includes a `Pandas.MultiIndex` object from the DataArray. Examples -------- >>> from libpysal.weights.raster import da2WSP, testDataArray >>> da = testDataArray().rename( {'band': 'layer', 'x': 'longitude', 'y': 'latitude'}) >>> da.dims ('layer', 'latitude', 'longitude') >>> da.shape (3, 4, 4) >>> da.coords Coordinates: * layer (layer) int64 1 2 3 * latitude (latitude) float64 90.0 30.0 -30.0 -90.0 * longitude (longitude) float64 -180.0 -60.0 60.0 180.0 >>> da.attrs {'nodatavals': (-32768.0,)} >>> coords_labels = { "z_label": "layer", "y_label": "latitude", "x_label": "longitude" } >>> wsp = da2WSP(da, z_value=2, coords_labels=coords_labels) >>> wsp.n 10 >>> pct_sp = wsp.sparse.nnz *1. / wsp.n**2 >>> "%.3f"%pct_sp '0.300' >>> print(wsp.sparse[4].todense()) [[0 0 1 0 0 1 1 1 0 0]] >>> wsp.index[:2] MultiIndex([(2, 90.0, 60.0), (2, 90.0, 180.0)], names=['layer', 'latitude', 'longitude']) See Also -------- :class:`libpysal.weights.weights.WSP` """ z_id, coords_labels = _da_checker(da, z_value, coords_labels) shape = da.shape if z_id: slice_dict = {} slice_dict[coords_labels["z_label"]] = 0 shape = da[slice_dict].shape slice_dict[coords_labels["z_label"]] = slice(z_id - 1, z_id) da = da[slice_dict] ser = da.to_series() dtype = np.int32 if (shape[0] * shape[1]) < 46340**2 else np.int64 if "nodatavals" in da.attrs and da.attrs["nodatavals"]: mask = (ser != da.attrs["nodatavals"][0]).to_numpy() ids = np.where(mask)[0] id_map = _idmap(ids, mask, dtype) ser = ser[ser != da.attrs["nodatavals"][0]] else: ids = np.arange(len(ser), dtype=dtype) id_map = ids.copy() n = len(ids) try: import numba # noqa: F401 except (ModuleNotFoundError, ImportError): warn( "numba cannot be imported, parallel processing " "and include_nodata functionality will be disabled. " "falling back to slower method", stacklevel=2, ) include_nodata = False # Fallback method to build sparse matrix sw = lat2SW(*shape, criterion) if "nodatavals" in da.attrs and da.attrs["nodatavals"]: sw = sw[mask] sw = sw[:, mask] else: k_nas = k if include_nodata else 1 if n_jobs != 1: try: import joblib # noqa: F401 except (ModuleNotFoundError, ImportError): warn( f"Parallel processing is requested (n_jobs={n_jobs})," f" but joblib cannot be imported. n_jobs will be set" f" to 1.", stacklevel=2, ) n_jobs = 1 if n_jobs == 1: sw_tup = _SWbuilder( *shape, ids, id_map, criterion, k_nas, dtype ) # -> (data, (row, col)) else: if n_jobs == -1: n_jobs = os.cpu_count() # Parallel implementation sw_tup = _parSWbuilder( *shape, ids, id_map, criterion, k_nas, dtype, n_jobs ) # -> (data, (row, col)) sw = sparse.csr_matrix( sw_tup, shape=(n, n), dtype=np.int8, ) # Higher_order functionality, this uses idea from # libpysal#313 for adding higher order neighbors. # Since diagonal elements are also added in the result, # this method set the diagonal elements to zero and # then eliminate zeros from the data. This changes the # sparcity of the csr_matrix !! if k > 1 and not include_nodata: sw = sum(sw**x for x in range(1, k + 1)) sw.setdiag(0) sw.eliminate_zeros() sw.data[:] = np.ones_like(sw.data, dtype=np.int8) index = ser.index wsp = WSP(sw, index=index) return wsp
[docs] def w2da(data, w, attrs={}, coords=None): """ Creates xarray.DataArray object from passed data aligned with W object. Parameters ---------- data : array/list/pd.Series 1d array-like data with dimensionality conforming to w w : libpysal.weights.W Spatial weights object aligned with passed data attrs : Dictionary Attributes stored in dict related to DataArray, e.g. da.attrs Default is {} empty dictionary. coords : Dictionary/xarray.core.coordinates.DataArrayCoordinates Coordinates corresponding to DataArray, e.g. da.coords Returns ------- da : xarray.DataArray instance of xarray.DataArray Examples -------- >>> from libpysal.raster import da2W, testDataArray, w2da >>> da = testDataArray() >>> da.shape (3, 4, 4) >>> w = da2W(da, z_value=2) >>> data = np.random.randint(0, 255, len(w.index)) >>> da1 = w2da(data, w) """ if not isinstance(w, W): raise TypeError("w must be an instance of weights.W") if hasattr(w, "index"): da = _index2da(data, w.index, attrs, coords) else: raise AttributeError( "This method requires `w` object to include `index` " "attribute that is built as a `pandas.MultiIndex` object." ) return da
[docs] def wsp2da(data, wsp, attrs={}, coords=None): """ Creates xarray.DataArray object from passed data aligned with WSP object. Parameters ---------- data : array/list/pd.Series 1d array-like data with dimensionality conforming to wsp wsp : libpysal.weights.WSP Sparse weights object aligned with passed data attrs : Dictionary Attributes stored in dict related to DataArray, e.g. da.attrs Default is {} empty dictionary. coords : Dictionary/xarray.core.coordinates.DataArrayCoordinates coordinates corresponding to DataArray, e.g. da.coords Returns ------- da : xarray.DataArray instance of xarray.DataArray Examples -------- >>> from libpysal.raster import da2WSP, testDataArray, wsp2da >>> da = testDataArray() >>> da.shape (3, 4, 4) >>> wsp = da2WSP(da, z_value=2) >>> data = np.random.randint(0, 255, len(wsp.index)) >>> da1 = w2da(data, wsp) """ if not isinstance(wsp, WSP): raise TypeError("wsp must be an instance of weights.WSP") if hasattr(wsp, "index"): da = _index2da(data, wsp.index, attrs, coords) else: raise AttributeError( "This method requires `wsp` object to include `index` " "attribute that is built as a `pandas.MultiIndex` object." ) return da
[docs] def testDataArray(shape=(3, 4, 4), time=False, rand=False, missing_vals=True): """ Creates 2 or 3 dimensional test xarray.DataArray object Parameters ---------- shape : tuple Tuple containing shape of the DataArray aligned with following dimension = (lat, lon) or (layer, lat, lon) Default shape = (3, 4, 4) time : boolean Type of layer, if True then layer=time else layer=band Default is False. rand : boolean If True, creates a DataArray filled with unique and random data. Default is false (generates seeded random data) missing_vals : boolean Create a DataArray filled with missing values. Default is True. Returns ------- da : xarray.DataArray instance of xarray.DataArray """ try: from xarray import DataArray except ImportError: raise ModuleNotFoundError( "xarray must be installed to use this functionality" ) from None if not rand: np.random.seed(12345) coords = {} n = len(shape) if n != 2: layer = "time" if time else "band" dims = (layer, "y", "x") if time: layers = np.arange( np.datetime64("2020-07-30"), shape[0], dtype="datetime64[D]" ) else: layers = np.arange(1, shape[0] + 1) coords[dims[-3]] = layers else: dims = ("y", "x") coords[dims[-2]] = np.linspace(90, -90, shape[-2]) coords[dims[-1]] = np.linspace(-180, 180, shape[-1]) data = np.random.randint(0, 255, shape) attrs = {} if missing_vals: attrs["nodatavals"] = (-32768.0,) miss_ids = np.where(np.random.randint(2, size=shape) == 1) data[miss_ids] = attrs["nodatavals"][0] da = DataArray(data, coords, dims, attrs=attrs) return da
def _da_checker(da, z_value, coords_labels): """ xarray.dataarray checker for raster interface Parameters ---------- da : xarray.DataArray Input 2D or 3D DataArray with shape=(z, y, x) z_value : int/string/float Select the z_value of 3D DataArray with multiple layers. coords_labels : dictionary Pass dimension labels for coordinates and layers if they do not belong to default dimensions, which are (band/time, y/lat, x/lon) e.g. coords_labels = {"y_label": "latitude"," "x_label": "longitude", "z_label": "year"} Default is {} empty dictionary. Returns ------- z_id : int Returns the index of layer dims : dictionary Mapped dimensions of the DataArray """ try: from xarray import DataArray except ImportError: raise ModuleNotFoundError( "xarray must be installed to use this functionality" ) from None if not isinstance(da, DataArray): raise TypeError("da must be an instance of xarray.DataArray") if da.ndim not in [2, 3]: raise ValueError("da must be 2D or 3D") if not ( np.issubdtype(da.values.dtype, np.integer) or np.issubdtype(da.values.dtype, np.floating) ): raise ValueError("da must be an array of integers or float") # default dimensions def_labels = { "x_label": coords_labels["x_label"] if "x_label" in coords_labels else ("x" if hasattr(da, "x") else "lon"), "y_label": coords_labels["y_label"] if "y_label" in coords_labels else ("y" if hasattr(da, "y") else "lat"), } if da.ndim == 3: def_labels["z_label"] = ( coords_labels["z_label"] if "z_label" in coords_labels else ("band" if hasattr(da, "band") else "time") ) z_id = 1 if z_value is None: if da.sizes[def_labels["z_label"]] != 1: warn( "Multiple layers detected. Using first layer as default.", stacklevel=2, ) else: z_id += tuple(da[def_labels["z_label"]]).index(z_value) else: z_id = None return z_id, def_labels def _index2da(data, index, attrs, coords): """ Creates xarray.DataArray object from passed data Parameters ---------- data : array/list/pd.Series 1d array-like data with dimensionality conforming to index index : pd.MultiIndex indices of the DataArray when converted to pd.Series attrs : Dictionary Attributes stored in dict related to DataArray, e.g. da.attrs coords : Dictionary/xarray.core.coordinates.DataArrayCoordinates coordinates corresponding to DataArray, e.g. da[n-1:n].coords Returns ------- da : xarray.DataArray instance of xarray.DataArray """ try: from xarray import DataArray except ImportError: raise ModuleNotFoundError( "xarray must be installed to use this functionality" ) from None data = np.array(data).flatten() idx = index dims = idx.names indexer = tuple(idx.codes) shape = tuple(lev.size for lev in idx.levels) if coords is None: missing = np.prod(shape) > idx.shape[0] if missing: if "nodatavals" in attrs: fill_value = attrs["nodatavals"][0] else: min_data = np.min(data) fill_value = min_data - 1 if min_data < 0 else -1 attrs["nodatavals"] = tuple([fill_value]) # noqa: C409 data_complete = np.full(shape, fill_value, data.dtype) else: data_complete = np.empty(shape, data.dtype) data_complete[indexer] = data coords = {} for dim, lev in zip(dims, idx.levels, strict=True): coords[dim] = lev.to_numpy() else: fill = attrs["nodatavals"][0] if "nodatavals" in attrs else 0 data_complete = np.full(shape, fill, data.dtype) data_complete[indexer] = data da = DataArray(data_complete, coords=coords, dims=dims, attrs=attrs) return da @jit(nopython=True, fastmath=True) def _idmap(ids, mask, dtype): """ Utility function computes id_map of non-missing raster data Parameters ---------- ids : ndarray 1D array containing ids of non-missing raster data mask : ndarray 1D array mask array dtype : type Data type of the id_map array Returns ------- id_map : ndarray 1D array containing id_maps of non-missing raster data """ id_map = mask * 1 id_map[ids] = np.arange(len(ids), dtype=dtype) return id_map @jit(nopython=True, fastmath=True) def _SWbuilder( nrows, ncols, ids, id_map, criterion, k, dtype, ): """ Computes data and orders rows, cols, data for a single chunk Parameters ---------- nrows : int Number of rows in the raster data ncols : int Number of columns in the raster data ids : ndarray 1D array containing ids of non-missing raster data id_map : ndarray 1D array containing id_maps of non-missing raster data criterion : str Type of contiguity. k : int Order of contiguity, Default is 1 dtype : type Data type of the id_map array Returns ------- data : ndarray 1D ones array containing weight of each neighbor rows : ndarray 1D ones array containing row value of each id in the sparse weight object cols : ndarray 1D ones array containing columns value of each id in the sparse weight object """ rows, cols = _compute_chunk(nrows, ncols, ids, id_map, criterion, k, dtype) data = np.ones_like(rows, dtype=np.int8) return (data, (rows, cols)) @jit(nopython=True, fastmath=True, nogil=True) def _compute_chunk( nrows, ncols, ids, id_map, criterion, k, dtype, ): """ Computes rows cols for a single chunk Parameters ---------- nrows : int Number of rows in the raster data ncols : int Number of columns in the raster data ids : ndarray 1D array containing ids of non-missing raster data id_map : ndarray 1D array containing id_maps of non-missing raster data criterion : str Type of contiguity. k : int Order of contiguity, Default is 1 dtype : type Data type of the rows and cols array Returns ------- rows : ndarray 1D ones array containing row value of each id in the sparse weight object cols : ndarray 1D ones array containing columns value of each id in the sparse weight object ni : int Number of rows and cols """ n = len(ids) # Setting d which is used for row, col preallocation d = 4 if criterion == "rook" else 8 if k > 1: d = int((k / 2) * (2 * 8 + (k - 1) * 8)) rows = np.empty(d * n, dtype=dtype) cols = np.empty_like(rows) ni = 0 # -> Pointer to store rows and cols in array for order in range(1, k + 1): condition = ( (order - 1) if criterion == "queen" else ((k - order) if ((k - order) < order) else (order - 1)) ) for i in range(n): id_i = ids[i] og_id = id_map[id_i] if ((id_i + order) % ncols) >= order: # east neighbor id_neighbor = id_map[id_i + order] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 # north-east to south-east neighbors for j in range(condition): if (id_i // ncols) < (nrows - j - 1): id_neighbor = id_map[(id_i + order) + (ncols * (j + 1))] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 if (id_i // ncols) >= j + 1: id_neighbor = id_map[(id_i + order) - (ncols * (j + 1))] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 if (id_i // ncols) < (nrows - order): # south neighbor id_neighbor = id_map[id_i + (ncols * order)] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 # south-west to south-east neighbors for j in range(condition): if (id_i % ncols) >= j + 1: id_neighbor = id_map[id_i + (ncols * order) - j - 1] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 if ((id_i + j + 1) % ncols) >= j + 1: id_neighbor = id_map[id_i + (ncols * order) + j + 1] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 if criterion == "queen" or ((k / order) >= 2.0): if (id_i % ncols) >= order: # south-west neighbor id_neighbor = id_map[id_i + (ncols * order) - order] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 if ((id_i + order) % ncols) >= order: # south-east neighbor id_neighbor = id_map[id_i + (ncols * order) + order] if id_neighbor: rows[ni], cols[ni] = og_id, id_neighbor ni += 1 rows[ni], cols[ni] = id_neighbor, og_id ni += 1 return rows[:ni], cols[:ni] @jit(nopython=True, fastmath=True) def _chunk_generator( n_jobs, starts, ids, ): """ Construct chunks to iterate over within numba in parallel Parameters ---------- n_jobs : int Number of cores to be used in the sparse weight construction. If -1, all available cores are used. starts : ndarray (n_chunks+1,) array of positional starts for ids chunk ids : ndarray 1D array containing ids of non-missing raster data Yields ------ ids_chunk : numpy.ndarray (n_chunk,) array containing the chunk of non-missing raster data """ chunk_size = starts[1] - starts[0] for i in range(n_jobs): start = starts[i] ids_chunk = ids[start : (start + chunk_size)] yield (ids_chunk,) def _parSWbuilder( nrows, ncols, ids, id_map, criterion, k, dtype, n_jobs, ): """ Computes data and orders rows, cols, data in parallel using numba Parameters ---------- nrows : int Number of rows in the raster data ncols : int Number of columns in the raster data ids : ndarray 1D array containing ids of non-missing raster data id_map : ndarray 1D array containing id_maps of non-missing raster data criterion : str Type of contiguity. k : int Order of contiguity, Default is 1 dtype : type Data type of the rows and cols array n_jobs : int Number of cores to be used in the sparse weight construction. If -1, all available cores are used. Returns ------- data : ndarray 1D ones array containing weight of each neighbor rows : ndarray 1D ones array containing row value of each id in the sparse weight object cols : ndarray 1D ones array containing columns value of each id in the sparse weight object """ from joblib import Parallel, delayed, parallel_backend n = len(ids) chunk_size = n // n_jobs + 1 starts = np.arange(n_jobs + 1) * chunk_size chunk = _chunk_generator(n_jobs, starts, ids) with parallel_backend("threading"): worker_out = Parallel(n_jobs=n_jobs)( delayed(_compute_chunk)(nrows, ncols, *ids, id_map, criterion, k, dtype) for ids in chunk ) rows, cols = zip(*worker_out, strict=True) rows = np.concatenate(rows) cols = np.concatenate(cols) data = np.ones_like(rows, dtype=np.int8) return (data, (rows, cols))