Source code for tobler.area_weighted.area_join

import numpy as np
import pandas as pd
import warnings

__author__ = "Martin Fleischmann <martin@martinfleischmann.net>"


[docs]def area_join(source_df, target_df, variables): """ Join variables from source_df based on the largest intersection. In case of a tie it picks the first one. Parameters ---------- source_df : geopandas.GeoDataFrame GeoDataFrame containing source values target_df : geopandas.GeoDataFrame GeoDataFrame containing source values variables : string or list-like column(s) in source_df dataframe for variable(s) to be joined Returns ------- joined : geopandas.GeoDataFrame target_df GeoDataFrame with joined variables as additional columns """ if not pd.api.types.is_list_like(variables): variables = [variables] for v in variables: if v in target_df.columns: raise ValueError(f"Column '{v}' already present in target_df.") target_df = target_df.copy() target_ix, source_ix = source_df.sindex.query( target_df.geometry, predicate="intersects" ) areas = ( target_df.geometry.values[target_ix] .intersection(source_df.geometry.values[source_ix]) .area ) main = [] for i in range(len(target_df)): # vectorise this loop? mask = target_ix == i if np.any(mask): main.append(source_ix[mask][np.argmax(areas[mask])]) else: main.append(np.nan) main = np.array(main, dtype=float) mask = ~np.isnan(main) for v in variables: arr = np.empty(len(main), dtype=object) arr[mask] = source_df[v].values[main[mask].astype(int)] try: arr = arr.astype(source_df[v].dtype) except TypeError: warnings.warn( f"Cannot preserve dtype of '{v}'. Falling back to `dtype=object`.", ) target_df[v] = arr return target_df