Source code for tobler.dasymetric.masked_area_interpolate

import geopandas as gpd

from ..area_weighted import area_interpolate
from .raster_tools import extract_raster_features
from warnings import warn


[docs]def masked_area_interpolate( source_df, target_df, raster, pixel_values, extensive_variables=None, intensive_variables=None, categorical_variables=None, allocate_total=True, nodata=255, n_jobs=-1, codes=None, ): """Interpolate data between two polygonal datasets using an auxiliary raster to mask out uninhabited land. Parameters ---------- source_df : geopandas.GeoDataFrame source data to be converted to another geometric representation. target_df : geopandas.GeoDataFrame target geometries that will form the new representation of the input data raster : str path to raster file that contains ancillary data pixel_values : list of ints list of pixel values that should be considered part of the mask. For example if using data from NLCD Land Cover Database <https://www.mrlc.gov/data>, a common input might be [21,22,23,24], which match the "developed" land types in that dataset extensive_variables : list Columns of the input dataframe containing extensive variables to interpolate intensive_variables : list Columns of the input dataframe containing intensive variables to interpolate categorical_variables : list [Optional. Default=None] Columns in dataframes for categorical variables allocate_total : bool whether to allocate the total from the source geometries (the default is True). nodata : int value in raster that indicates null or missing values. Default is 255 n_jobs : int [Optional. Default=-1] Number of processes to run in parallel to generate the area allocation. If -1, this is set to the number of CPUs available. Returns ------- geopandas.GeoDataFrame GeoDataFrame with geometries matching the target_df and extensive and intensive variables as the columns """ if codes: warn( "The `codes` keyword is deprecated and will be removed shortly. Please use `pixel_values` instead" ) pixel_values = codes source_df = source_df.copy() assert not any( source_df.index.duplicated() ), "The index of the source_df cannot contain duplicates." # create a vector mask from the raster data raster_mask = extract_raster_features( source_df, raster, pixel_values, nodata, n_jobs, collapse_values=True ) # create a column in the source_df to dissolve on idx_name = source_df.index.name if source_df.index.name else "idx" source_df[idx_name] = source_df.index # clip source_df by its mask (overlay/dissolve is faster than gpd.clip here) source_df = gpd.overlay( source_df, raster_mask.to_crs(source_df.crs), how="intersection" ).dissolve(idx_name) # continue with standard areal interpolation using the clipped source interpolation = area_interpolate( source_df, target_df.copy(), extensive_variables=extensive_variables, intensive_variables=intensive_variables, n_jobs=n_jobs, categorical_variables=categorical_variables, allocate_total=allocate_total, ) return interpolation