Source code for tobler.dasymetric.raster_tools

"""tools for working with rasters."""

import ast
import multiprocessing
import warnings

import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio as rio
import rasterstats as rs
from joblib import Parallel, delayed
from packaging.version import Version
from rasterio import features
from rasterio.mask import mask
from shapely.geometry import shape

from ..util.util import _check_presence_of_crs

GPD_10 = Version(gpd.__version__) >= Version("1.0.0dev")

__all__ = ["extract_raster_features"]


def _chunk_dfs(geoms_to_chunk, n_jobs):
    chunk_size = geoms_to_chunk.shape[0] // n_jobs + 1
    for i in range(n_jobs):
        start = i * chunk_size
        yield geoms_to_chunk.iloc[start : start + chunk_size]


def _parse_geom(geom_str):
    return shape(ast.literal_eval(geom_str))


def _apply_parser(df):
    return df.apply(_parse_geom)


def _fast_append_profile_in_gdf(geodataframe, raster_path, force_crs_match=True):
    """Append categorical zonal statistics (counts by pixel type) as columns to an input geodataframe.

    geodataframe : geopandas.GeoDataFrame
        geodataframe that has overlay with the raster. If some polygon do not overlay the raster,
        consider a preprocessing step using the function subset_gdf_polygons_from_raster.
    raster_path : str
        path to the raster image.
    force_crs_match : bool, Default is True.
        Whether the Coordinate Reference System (CRS) of the polygon will be reprojected to
        the CRS of the raster file. It is recommended to let this argument as True.

    Notes
    -----
    The generated geodataframe will input the value 0 for each Type that is not present in the raster
    for each polygon.
    """

    _check_presence_of_crs(geodataframe)
    if force_crs_match:
        with rio.open(raster_path) as raster:
            geodataframe = geodataframe.to_crs(crs=raster.crs.data)
    else:
        warnings.warn(
            "The GeoDataFrame is not being reprojected. The clipping might be being performing on unmatching polygon to the raster."
        )

    zonal_gjson = rs.zonal_stats(
        geodataframe, raster_path, prefix="Type_", geojson_out=True, categorical=True
    )

    zonal_ppt_gdf = gpd.GeoDataFrame.from_features(zonal_gjson)

    return zonal_ppt_gdf



[docs]
def extract_raster_features(
    gdf, raster_path, pixel_values=None, nodata=255, n_jobs=-1, collapse_values=False
):
    """Generate a geodataframe from raster data by polygonizing contiguous pixels with the same value using rasterio's features module.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        geodataframe  defining the area of interest. The input raster will be
        clipped to the extent of the geodataframe
    raster_path : str
        path to raster file, such as downloaded from <https://lcviewer.vito.be/download>
    pixel_values : list-like, optional
        subset of pixel values to extract, by default None. If None, this function
        may generate a very large geodataframe
    nodata : int, optional
        pixel value denoting "no data" in input raster
    n_jobs : int
        [Optional. Default=-1] Number of processes to run in parallel. If -1,
        this is set to the number of CPUs available
    collapse_values : bool, optional
        If True, multiple values passed to the pixel_values argument are treated
        as a single type. I.e. polygons will be generated from any contiguous collection
        of values from pixel_types, instead of unique polygons generated for each value
        This can dramatically reduce the complexity of the resulting geodataframe a
        fewer polygons are required to represent the study area.

    Returns
    -------
    geopandas.GeoDataFrame
        geodataframe whose rows are the zones extracted by the rasterio.features module.
        The geometry of each zone is the boundary of a contiguous group of pixels with
        the same value; the `value` column contains the pixel value of each zone.
    """
    if n_jobs == -1:
        n_jobs = multiprocessing.cpu_count()
    with rio.open(raster_path) as src:
        raster_crs = src.crs.to_dict()
        gdf = gdf.to_crs(raster_crs)
        if GPD_10:
            geomask = [gdf.union_all().__geo_interface__]
        else:
            geomask = [gdf.unary_union.__geo_interface__]

        out_image, out_transform = mask(
            src, geomask, nodata=nodata, crop=True
        )  # clip to AoI using a vector layer

        if pixel_values:
            if collapse_values:
                out_image = np.where(
                    np.isin(out_image, pixel_values), pixel_values[0], out_image
                )  #  replace values to generate fewer polys
            pixel_values = np.isin(
                out_image, pixel_values
            )  # only include requested pixels

        shapes = list(
            features.shapes(out_image, mask=pixel_values, transform=out_transform)
        )  # convert regions to polygons
    res = list(zip(*shapes))
    geoms = pd.Series(res[0], name="geometry").astype(str)
    pieces = _chunk_dfs(geoms, n_jobs)
    geoms = pd.concat(
        Parallel(n_jobs=n_jobs)(delayed(_apply_parser)(i) for i in pieces)
    )
    geoms = gpd.GeoSeries(geoms).buffer(0)  # we sometimes get self-intersecting rings
    vals = pd.Series(res[1], name="value")
    gdf = gpd.GeoDataFrame(vals, geometry=geoms, crs=raster_crs)
    if collapse_values:
        gdf = gdf.drop(columns=["value"])  # values col is misleading in this case

    return gdf