Source code for pointpats.distance_statistics

import warnings
from collections import namedtuple

import geopandas
import numpy
import shapely
from scipy import interpolate, spatial

from .geometry import (
    TREE_TYPES,
)
from .geometry import (
    area as _area,
)
from .geometry import (
    build_best_tree as _build_best_tree,
)
from .geometry import (
    k_neighbors as _k_neighbors,
)
from .geometry import (
    prepare_hull as _prepare_hull,
)
from .random import poisson

__all__ = [
    "f",
    "g",
    "k",
    "j",
    "l",
    "f_test",
    "g_test",
    "k_test",
    "j_test",
    "l_test",
]


def _prepare(coordinates, support, distances, metric, hull, edge_correction):
    """
    prepare the arguments to convert into a standard format
    1. cast the coordinates to a numpy array
    2. precomputed metrics must have distances provided
    3. metrics must be callable or string
    4. warn if distances are specified and metric is not default
    5. make distances a numpy.ndarray
    6. construct the support, accepting:
        - num_steps -> a linspace with len(support) == num_steps
                       from zero to a quarter of the bounding box's smallest side
        - (stop, ) -> a linspace with len(support) == 20
                 from zero to stop
        - (start, stop) -> a linspace with len(support) == 20
                           from start to stop
        - (start, stop, num_steps) -> a linspace with len(support) == num_steps
                                      from start to stop
        - numpy.ndarray -> passed through
    """
    # Throw early if edge correction is requested
    if edge_correction is not None:
        raise NotImplementedError("Edge correction is not currently implemented.")

    if isinstance(coordinates, geopandas.GeoDataFrame | geopandas.GeoSeries):
        coordinates = shapely.get_coordinates(coordinates.geometry)

    # cast to coordinate array
    if isinstance(coordinates, TREE_TYPES):
        tree = coordinates
        coordinates = tree.data
    else:
        coordinates = numpy.asarray(coordinates)
    hull = _prepare_hull(coordinates, hull)

    # evaluate distances
    if (distances is None) and metric == "precomputed":
        raise ValueError(
            "If metric =`precomputed` then distances must"
            " be provided as a (n,n) numpy array."
        )
    if not (isinstance(metric, str) or callable(metric)):
        raise TypeError(
            f"`metric` argument must be callable or a string. Recieved: {metric}"
        )
    if distances is not None and metric != "euclidean":
        warnings.warn(
            "Distances were provided. The specified metric will be ignored."
            " To use precomputed distances with a custom distance metric,"
            " do not specify a `metric` argument.",
            stacklevel=2,
        )
        metric = "euclidean"

    if support is None:
        support = 20

    if isinstance(support, int):  # if just n_steps, use the max nnd
        # this is O(n log n) for kdtrees & balltrees
        tmp_tree = _build_best_tree(coordinates, metric=metric)
        max_dist = _k_neighbors(tmp_tree, coordinates, 1)[0].max()
        support = numpy.linspace(0, max_dist, num=support)
    # otherwise, we need to build it using (start, stop, step) semantics
    elif isinstance(support, tuple):
        if len(support) == 1:  # assuming this is with zero implicit start
            support = numpy.linspace(0, support[0], num=20)  # default support n bins
        elif len(support) == 2:
            support = numpy.linspace(*support, num=20)  # default support n bins
        elif len(support) == 3:
            support = numpy.linspace(support[0], support[1], num=support[2])
    else:  # try to use it as is
        try:
            support = numpy.asarray(support)
        except:
            raise TypeError(
                "`support` must be a tuple (either (start, stop, step), (start, stop) or (stop,)),"
                " an int describing the number of breaks to use to evalute the function,"
                " or an iterable containing the breaks to use to evaluate the function."
                " Recieved object of type {}: {}".format(type(support), support)
            )

    return coordinates, support, distances, metric, hull, edge_correction


# ------------------------------------------------------------#
# Statistical Functions                                       #
# ------------------------------------------------------------#



[docs]
def f(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
):
    """
    Ripley's F function

    The so-called "empty space" function, this is the cumulative density function of
    the distances from a random set of points to the known points in the pattern.

    Parameters
    ----------
    coordinates : geopandas object | numpy.ndarray of shape (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.

    Returns
    -------
    a tuple containing the support values used to evalute the function
    and the values of the function at each distance value in the support.
    """
    coordinates, support, distances, metric, hull, _ = _prepare(
        coordinates, support, distances, metric, hull, edge_correction
    )
    if distances is not None:
        n = coordinates.shape[0]
        if distances.ndim == 2:
            k, p = distances.shape
            if k == p == n:
                warnings.warn(
                    f"A full distance matrix is not required for this function, and"
                    f" the intput matrix is a square {n},{n} matrix. Only the"
                    f" distances from p random points to their nearest neighbor within"
                    f" the pattern is required, as an {n},p matrix. Assuming the"
                    f" provided distance matrix has rows pertaining to input"
                    f" pattern and columns pertaining to the output points.",
                    stacklevel=2,
                )
                distances = distances.min(axis=0)
            elif k == n:
                distances = distances.min(axis=0)
            else:
                raise ValueError(
                    f"Distance matrix should have the same rows as the input"
                    f" coordinates with p columns, where n may be equal to p."
                    f" Recieved an {k},{p} distance matrix for {n} coordinates"
                )
        elif distances.ndim == 1:
            p = len(distances)
    else:
        # Do 1000 empties. Users can control this by computing their own
        # empty space distribution.
        n_empty_points = 1000

        randoms = poisson(hull=hull, size=(n_empty_points, 1))
        try:
            tree
        except NameError:
            tree = _build_best_tree(coordinates, metric)
        finally:
            distances, _ = tree.query(randoms, k=1)
            distances = distances.squeeze()

    counts, bins = numpy.histogram(distances, bins=support)
    fracs = numpy.cumsum(counts) / counts.sum()

    return bins, numpy.asarray([0, *fracs])




[docs]
def g(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    edge_correction=None,
):
    """
    Ripley's G function

    The G function is computed from the cumulative density function of the nearest neighbor
    distances between points in the pattern.

    Parameters
    -----------
    coordinates : geopandas object | numpy.ndarray of shape (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, n) or (n,)
        distances from every point in the point to another point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.

    Returns
    -------
    a tuple containing the support values used to evalute the function
    and the values of the function at each distance value in the support.

    """

    coordinates, support, distances, metric, *_ = _prepare(
        coordinates, support, distances, metric, None, edge_correction
    )
    if distances is not None:
        if distances.ndim == 2:
            if distances.shape[0] == distances.shape[1] == coordinates.shape[0]:
                warnings.warn(
                    "The full distance matrix is not required for this function,"
                    " only the distance to the nearest neighbor within the pattern."
                    " Computing this and discarding the rest.",
                    stacklevel=2,
                )
                distances = distances.min(axis=1)
            else:
                k, p = distances.shape
                n = coordinates.shape[0]
                raise ValueError(
                    " Input distance matrix has an invalid shape: {k},{p}."
                    " Distances supplied can either be 2 dimensional"
                    " square matrices with the same number of rows"
                    " as `coordinates` ({n}) or 1 dimensional and contain"
                    " the shortest distance from each point in "
                    " `coordinates` to some other point in coordinates."
                )
        elif distances.ndim == 1:
            if distances.shape[0] != coordinates.shape[0]:
                raise ValueError(
                    f"Distances are not aligned with coordinates! Distance"
                    f" matrix must be (n_coordinates, n_coordinates), but recieved"
                    f" {distances.shape} instead of ({coordinates.shape[0]},)"
                )
        else:
            raise ValueError(
                "Distances supplied can either be 2 dimensional"
                " square matrices with the same number of rows"
                " as `coordinates` or 1 dimensional and contain"
                " the shortest distance from each point in "
                " `coordinates` to some other point in coordinates."
                " Input matrix was {distances.ndim} dimensioanl"
            )
    else:
        try:
            tree
        except NameError:
            tree = _build_best_tree(coordinates, metric)
        finally:
            distances, indices = _k_neighbors(tree, coordinates, k=1)

    counts, bins = numpy.histogram(distances.squeeze(), bins=support)
    fracs = numpy.cumsum(counts) / counts.sum()

    return bins, numpy.asarray([0, *fracs])




[docs]
def j(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    truncate=True,
):
    """
    Ripely's J function

    The so-called "spatial hazard" function, this is a function relating the F and G functions.

    Parameters
    -----------
    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: tuple of numpy.ndarray
        precomputed distances to use to evaluate the j function.
        The first must be of shape (n,n) or (n,) and is used in the g function.
        the second must be of shape (n,p) or (p,) (with p possibly equal to n)
        used in the f function.
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern for the f function.
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    truncate: bool (default: True)
        whether or not to truncate the results when the F function reaches one. If the
        F function is one but the G function is less than one, this function will return
        numpy.nan values.

    Returns
    -------
    a tuple containing the support values used to evalute the function
    and the values of the function at each distance value in the support.
    """
    if distances is not None:
        g_distances, f_distances = distances
    else:
        g_distances = f_distances = None
    fsupport, fstats = f(
        coordinates,
        support=support,
        distances=f_distances,
        metric=metric,
        hull=hull,
        edge_correction=edge_correction,
    )

    gsupport, gstats = g(
        coordinates,
        support=support,
        distances=g_distances,
        metric=metric,
        edge_correction=edge_correction,
    )

    if isinstance(support, numpy.ndarray):
        if not numpy.allclose(gsupport, support):
            gfunction = interpolate.interp1d(gsupport, gstats, fill_value=1)
            gstats = gfunction(support)
            gsupport = support
    if not (numpy.allclose(gsupport, fsupport)):
        ffunction = interpolate.interp1d(fsupport, fstats, fill_value=1)
        fstats = ffunction(gsupport)
        fsupport = gsupport

    with numpy.errstate(invalid="ignore", divide="ignore"):
        hazard_ratio = (1 - gstats) / (1 - fstats)
    both_zero = (gstats == 1) & (fstats == 1)
    hazard_ratio[both_zero] = numpy.nan
    if truncate:
        result = _truncate(gsupport, hazard_ratio)
        if len(result[1]) != len(hazard_ratio):
            warnings.warn(
                f"requested {support} bins to evaluate the J function, but"
                f" it reaches infinity at d={result[0][-1]:.4f}, meaning only"
                f" {len(result[0])} bins will be used to characterize the J function.",
                stacklevel=2,
            )
        return result
    else:
        return gsupport, hazard_ratio




[docs]
def k(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    edge_correction=None,
):
    """
    Ripley's K function

    This function counts the number of pairs of points that are closer than a given distance.
    As d increases, K approaches the number of point pairs.

    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.

    Returns
    -------
    a tuple containing the support values used to evalute the function
    and the values of the function at each distance value in the support.
    """
    coordinates, support, distances, metric, hull, edge_correction = _prepare(
        coordinates, support, distances, metric, None, edge_correction
    )
    n = coordinates.shape[0]
    upper_tri_n = n * (n - 1) * 0.5
    if distances is not None:
        if distances.ndim == 1:
            if distances.shape[0] != upper_tri_n:
                raise ValueError(
                    f"Shape of inputted distances is not square, nor is the upper triangular"
                    f" matrix matching the number of input points. The shape of the input matrix"
                    f" is {distances.shape}, but required shape is ({upper_tri_n},) or ({n},{n})"
                )
            upper_tri_distances = distances
        elif distances.shape[0] == distances.shape[1] == n:
            upper_tri_distances = distances[numpy.triu_indices_from(distances, k=1)]
        else:
            raise ValueError(
                f"Shape of inputted distances is not square, nor is the upper triangular"
                f" matrix matching the number of input points. The shape of the input matrix"
                f" is {distances.shape}, but required shape is ({upper_tri_n},) or ({n},{n})"
            )
    else:
        upper_tri_distances = spatial.distance.pdist(coordinates, metric=metric)
    n_pairs_less_than_d = (upper_tri_distances < support.reshape(-1, 1)).sum(axis=1)
    intensity = n / _area(hull)
    k_estimate = ((n_pairs_less_than_d * 2) / n) / intensity
    return support, k_estimate




[docs]
def l(
    coordinates,
    support=None,
    permutations=9999,
    distances=None,
    metric="euclidean",
    edge_correction=None,
    linearized=False,
):
    """
    Ripley's L function

    This is a scaled and shifted version of the K function that accounts for the K function's
    increasing expected value as distances increase. This means that the L function, for a
    completely random pattern, should be close to zero at all distance values in the support.

    Parameters
    ----------
    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    linearized : bool
        whether or not to subtract l from its expected value (support) at each
        distance bin. This centers the l function on zero for all distances.
        Proposed by Besag (1977)

    Returns
    -------
    a tuple containing the support values used to evalute the function
    and the values of the function at each distance value in the support.
    """

    support, k_estimate = k(
        coordinates,
        support=support,
        distances=distances,
        metric=metric,
        edge_correction=edge_correction,
    )

    l = numpy.sqrt(k_estimate / numpy.pi)

    if linearized:
        return support, l - support
    return support, l



# ------------------------------------------------------------#
# Statistical Tests based on Ripley Functions                 #
# ------------------------------------------------------------#

FtestResult = namedtuple(
    "FtestResult", ("support", "statistic", "pvalue", "simulations")
)
GtestResult = namedtuple(
    "GtestResult", ("support", "statistic", "pvalue", "simulations")
)
JtestResult = namedtuple(
    "JtestResult", ("support", "statistic", "pvalue", "simulations")
)
KtestResult = namedtuple(
    "KtestResult", ("support", "statistic", "pvalue", "simulations")
)
LtestResult = namedtuple(
    "LtestResult", ("support", "statistic", "pvalue", "simulations")
)

_ripley_dispatch = {
    "F": (f, FtestResult),
    "G": (g, GtestResult),
    "J": (j, JtestResult),
    "K": (k, KtestResult),
    "L": (l, LtestResult),
}


def _ripley_test(
    calltype,
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    keep_simulations=False,
    n_simulations=9999,
    **kwargs,
):
    if isinstance(coordinates, geopandas.GeoDataFrame | geopandas.GeoSeries):
        coordinates = shapely.get_coordinates(coordinates.geometry)

    stat_function, result_container = _ripley_dispatch.get(calltype)
    core_kwargs = dict(
        support=support,
        metric=metric,
        edge_correction=edge_correction,
    )
    tree = _build_best_tree(coordinates, metric=metric)
    hull = _prepare_hull(coordinates, hull)
    if calltype in ("F", "J"):  # these require simulations
        core_kwargs["hull"] = hull
        # amortize to avoid doing this every time
        empty_space_points = poisson(coordinates, size=(1000, 1))
        if distances is None:
            empty_space_distances, _ = _k_neighbors(tree, empty_space_points, k=1)
            if calltype == "F":
                distances = empty_space_distances.squeeze()
            else:  # calltype == 'J':
                n_distances, _ = _k_neighbors(tree, coordinates, k=1)
                distances = (n_distances.squeeze(), empty_space_distances.squeeze())
        else:
            pass
    core_kwargs.update(**kwargs)

    observed_support, observed_statistic = stat_function(
        tree, distances=distances, **core_kwargs
    )
    core_kwargs["support"] = observed_support
    n_observations = coordinates.shape[0]

    if keep_simulations:
        simulations = numpy.empty((len(observed_support), n_simulations)).T
    pvalues = numpy.ones_like(observed_support)
    for i_replication in range(n_simulations):
        random_i = poisson(hull, size=n_observations)
        if calltype in ("F", "J"):
            random_tree = _build_best_tree(random_i, metric)
            empty_distances, _ = random_tree.query(empty_space_points, k=1)
            if calltype == "F":
                core_kwargs["distances"] = empty_distances.squeeze()
            else:  # calltype == 'J':
                n_distances, _ = _k_neighbors(random_tree, random_i, k=1)
                core_kwargs["distances"] = (
                    n_distances.squeeze(),
                    empty_distances.squeeze(),
                )
        rep_support, simulations_i = stat_function(random_i, **core_kwargs)
        pvalues += simulations_i >= observed_statistic
        if keep_simulations:
            simulations[i_replication] = simulations_i
    pvalues /= n_simulations + 1
    pvalues = numpy.minimum(pvalues, 1 - pvalues)
    return result_container(
        observed_support,
        observed_statistic,
        pvalues,
        simulations if keep_simulations else None,
    )



[docs]
def f_test(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    keep_simulations=False,
    n_simulations=9999,
):
    """
    Ripley's F function

    The so-called "empty space" function, this is the cumulative density function of
    the distances from a random set of points to the known points in the pattern.

    When the estimated statistic is larger than simulated values at a given distance, then
    the pattern is considered "dispersed" or "regular"

    Parameters
    -----------
    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    keep_simulations: bool
        whether or not to keep the simulation envelopes. If so,
        will be returned as the result's simulations attribute
    n_simulations: int
        how many simulations to conduct, assuming that the reference pattern
        has complete spatial randomness.

    Returns
    -------
    a named tuple with properties
    - support, the exact distance values used to evalute the statistic
    - statistic, the values of the statistic at each distance
    - pvalue, the percent of simulations that were as extreme as the observed value
    - simulations, the distribution of simulated statistics (shaped (n_simulations, n_support_points))
        or None if keep_simulations=False (which is the default)
    """

    return _ripley_test(
        "F",
        coordinates,
        support=support,
        distances=distances,
        metric=metric,
        hull=hull,
        edge_correction=edge_correction,
        keep_simulations=keep_simulations,
        n_simulations=n_simulations,
    )




[docs]
def g_test(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    keep_simulations=False,
    n_simulations=9999,
):
    """
    Ripley's G function

    The G function is computed from the cumulative density function of the nearest neighbor
    distances between points in the pattern.

    When the G function is below the simulated values, it suggests dispersion.

    Parameters
    ----------
    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    keep_simulations: bool
        whether or not to keep the simulation envelopes. If so,
        will be returned as the result's simulations attribute
    n_simulations: int
        how many simulations to conduct, assuming that the reference pattern
        has complete spatial randomness.

    Returns
    -------
    a named tuple with properties
    - support, the exact distance values used to evalute the statistic
    - statistic, the values of the statistic at each distance
    - pvalue, the percent of simulations that were as extreme as the observed value
    - simulations, the distribution of simulated statistics (shaped (n_simulations, n_support_points))
        or None if keep_simulations=False (which is the default)
    """
    return _ripley_test(
        "G",
        coordinates,
        support=support,
        distances=distances,
        metric=metric,
        hull=hull,
        edge_correction=edge_correction,
        keep_simulations=keep_simulations,
        n_simulations=n_simulations,
    )




[docs]
def j_test(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    truncate=True,
    keep_simulations=False,
    n_simulations=9999,
):
    """
    Ripley's J function

    The so-called "spatial hazard" function, this is a function relating the F and G functions.

    When the J function is consistently below 1, then it indicates clustering.
    When consistently above 1, it suggests dispersion.

    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    keep_simulations: bool
        whether or not to keep the simulation envelopes. If so,
        will be returned as the result's simulations attribute
    n_simulations: int
        how many simulations to conduct, assuming that the reference pattern
        has complete spatial randomness.

    Returns
    -------
    a named tuple with properties
    - support, the exact distance values used to evalute the statistic
    - statistic, the values of the statistic at each distance
    - pvalue, the percent of simulations that were as extreme as the observed value
    - simulations, the distribution of simulated statistics (shaped (n_simulations, n_support_points))
        or None if keep_simulations=False (which is the default)
    """
    result = _ripley_test(
        "J",
        coordinates,
        support=support,
        distances=distances,
        metric=metric,
        hull=hull,
        edge_correction=edge_correction,
        keep_simulations=keep_simulations,
        n_simulations=n_simulations,
        truncate=False,
    )
    if truncate:
        result_trunc = _truncate(*result)
        result_trunc = JtestResult(*result_trunc)
        if len(result_trunc.statistic) != len(result.statistic):
            warnings.warn(
                f"requested {support} bins to evaluate the J function, but"
                f" it reaches infinity at d={result[0][-1]:.4f}, meaning only"
                f" {len(result[0])} bins will be used to characterize the J function.",
                stacklevel=2,
            )
            return result_trunc

    else:
        return result




[docs]
def k_test(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    keep_simulations=False,
    n_simulations=9999,
):
    """
    Ripley's K function

    This function counts the number of pairs of points that are closer than a given distance.
    As d increases, K approaches the number of point pairs.

    When the K function is below simulated values, it suggests that the pattern is dispersed.

    Parameters
    ----------
    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    keep_simulations: bool
        whether or not to keep the simulation envelopes. If so,
        will be returned as the result's simulations attribute
    n_simulations: int
        how many simulations to conduct, assuming that the reference pattern
        has complete spatial randomness.

    Returns
    -------
    a named tuple with properties
    - support, the exact distance values used to evalute the statistic
    - statistic, the values of the statistic at each distance
    - pvalue, the percent of simulations that were as extreme as the observed value
    - simulations, the distribution of simulated statistics (shaped (n_simulations, n_support_points))
        or None if keep_simulations=False (which is the default)
    """
    return _ripley_test(
        "K",
        coordinates,
        support=support,
        distances=distances,
        metric=metric,
        hull=hull,
        edge_correction=edge_correction,
        keep_simulations=keep_simulations,
        n_simulations=n_simulations,
    )




[docs]
def l_test(
    coordinates,
    support=None,
    distances=None,
    metric="euclidean",
    hull=None,
    edge_correction=None,
    linearized=False,
    keep_simulations=False,
    n_simulations=9999,
):
    """
    Ripley's L function

    This is a scaled and shifted version of the K function that accounts for the K function's
    increasing expected value as distances increase. This means that the L function, for a
    completely random pattern, should be close to zero at all distance values in the support.

    When the L function is negative, this suggests dispersion.

    Parameters
    ----------
    coordinates : geopandas object | numpy.ndarray, (n,2)
        input coordinates to function
    support : tuple of length 1, 2, or 3, int, or numpy.ndarray
        tuple, encoding (stop,), (start, stop), or (start, stop, num)
        int, encoding number of equally-spaced intervals
        numpy.ndarray, used directly within numpy.histogram
    distances: numpy.ndarray, (n, p) or (p,)
        distances from every point in a random point set of size p
        to some point in `coordinates`
    metric: str or callable
        distance metric to use when building search tree
    hull: bounding box, scipy.spatial.ConvexHull, shapely.geometry.Polygon
        the hull used to construct a random sample pattern, if distances is None
    edge_correction: bool or str
        whether or not to conduct edge correction. Not yet implemented.
    keep_simulations: bool
        whether or not to keep the simulation envelopes. If so,
        will be returned as the result's simulations attribute
    n_simulations: int
        how many simulations to conduct, assuming that the reference pattern
        has complete spatial randomness.

    Returns
    -------
    a named tuple with properties
    - support, the exact distance values used to evalute the statistic
    - statistic, the values of the statistic at each distance
    - pvalue, the percent of simulations that were as extreme as the observed value
    - simulations, the distribution of simulated statistics (shaped (n_simulations, n_support_points))
        or None if keep_simulations=False (which is the default)
    """
    return _ripley_test(
        "L",
        coordinates,
        support=support,
        distances=distances,
        metric=metric,
        hull=hull,
        edge_correction=edge_correction,
        linearized=linearized,
        keep_simulations=keep_simulations,
        n_simulations=n_simulations,
    )



def _truncate(support, realizations, *rest):
    is_invalid = numpy.isinf(realizations) | numpy.isnan(realizations)
    first_inv = is_invalid.argmax()
    if not is_invalid.any():
        return support, realizations, *rest
    elif first_inv < len(realizations):
        return (
            support[:first_inv],
            realizations[:first_inv],
            *[r[:first_inv] if r is not None else None for r in rest],
        )