Source code for mapclassify._classify_API

from .classifiers import (
    BoxPlot,
    EqualInterval,
    FisherJenks,
    FisherJenksSampled,
    HeadTailBreaks,
    JenksCaspall,
    JenksCaspallForced,
    JenksCaspallSampled,
    MaximumBreaks,
    MaxP,
    NaturalBreaks,
    Percentiles,
    PrettyBreaks,
    Quantiles,
    StdMean,
    UserDefined,
)

__author__ = "Stefanie Lumnitz <stefanie.lumitz@gmail.com>"


_classifiers = {
    "boxplot": BoxPlot,
    "equalinterval": EqualInterval,
    "fisherjenks": FisherJenks,
    "fisherjenkssampled": FisherJenksSampled,
    "headtailbreaks": HeadTailBreaks,
    "jenkscaspall": JenksCaspall,
    "jenkscaspallforced": JenksCaspallForced,
    "jenkscaspallsampled": JenksCaspallSampled,
    "maxp": MaxP,
    "maximumbreaks": MaximumBreaks,
    "naturalbreaks": NaturalBreaks,
    "quantiles": Quantiles,
    "percentiles": Percentiles,
    "prettybreaks": PrettyBreaks,
    "stdmean": StdMean,
    "userdefined": UserDefined,
}


[docs] def classify( y, scheme, k=5, pct=[1, 10, 50, 90, 99, 100], pct_sampled=0.10, truncate=True, hinge=1.5, multiples=[-2, -1, 1, 2], mindiff=0, initial=100, bins=None, lowest=None, anchor=False, ): """ Classify your data with ``mapclassify.classify``. Input parameters are dependent on classifier used. Parameters ---------- y : numpy.array :math:`(n,1)`, values to classify. scheme : str ``pysal.mapclassify`` classification scheme. k : int (default 5) The number of classes. pct : numpy.array (default [1, 10, 50, 90, 99, 100]) Percentiles used for classification with ``percentiles``. pct_sampled : float default (0.10) The percentage of n that should form the sample (``JenksCaspallSampled``, ``FisherJenksSampled``) If ``pct`` is specified such that ``n*pct > 1000``, then ``pct=1000``. truncate : bool (default True) Truncate ``pct_sampled`` in cases where ``pct * n > 1000``. hinge : float (default 1.5) Multiplier for *IQR* when ``BoxPlot`` classifier used. multiples : numpy.array (default [-2,-1,1,2]) The multiples of the standard deviation to add/subtract from the sample mean to define the bins using ``std_mean``. mindiff : float (default is 0) The minimum difference between class breaks if using ``maximum_breaks`` classifier. initial : int (default 100) Number of initial solutions to generate or number of runs when using ``natural_breaks`` or ``max_p_classifier``. Setting initial to ``0`` will result in the quickest calculation of bins. bins : numpy.array (default None) :math:`(k,1)`, upper bounds of classes (have to be monotically increasing) if using ``user_defined`` classifier. Default is ``None``. For example: ``[20, max(y)]``. lowest : float (default None) Scalar minimum value of lowest class. Default is to set the minimum to ``-inf`` if ``y.min()`` > first upper bound (which will override the default), otherwise minimum is set to ``y.min()``. anchor : bool (default False) Anchor upper bound of one class to the sample mean. Returns ------- classifier : mapclassify.classifiers.MapClassifier Object containing bin ids for each observation (``.yb``), upper bounds of each class (``.bins``), number of classes (``.k``) and number of observations falling in each class (``.counts``). Notes ----- Supported classifiers include: * ``quantiles`` * ``boxplot`` * ``equalinterval`` * ``fisherjenks`` * ``fisherjenkssampled`` * ``headtailbreaks`` * ``jenkscaspall`` * ``jenkscaspallsampled`` * ``jenks_caspallforced`` * ``maxp`` * ``maximumbreaks`` * ``naturalbreaks`` * ``percentiles`` * ``prettybreaks`` * ``stdmean`` * ``userdefined`` Examples -------- >>> import libpysal >>> import geopandas >>> from mapclassify import classify Load example data. >>> link_to_data = libpysal.examples.get_path("columbus.shp") >>> gdf = geopandas.read_file(link_to_data) >>> x = gdf['HOVAL'].values Classify values by quantiles. >>> quantiles = classify(x, "quantiles") Classify values by box_plot and set hinge to ``2``. >>> box_plot = classify(x, 'box_plot', hinge=2) >>> box_plot BoxPlot <BLANKLINE> Interval Count ---------------------- ( -inf, -9.50] | 0 (-9.50, 25.70] | 13 (25.70, 33.50] | 12 (33.50, 43.30] | 12 (43.30, 78.50] | 9 (78.50, 96.40] | 3 """ # reformat scheme_lower = scheme.lower() scheme = scheme_lower.replace("_", "") # check if scheme is a valid scheme if scheme not in _classifiers: raise ValueError( f"Invalid scheme: '{scheme}'\n" f"Scheme must be in the set: {_classifiers.keys()}" ) elif scheme == "boxplot": classifier = _classifiers[scheme](y, hinge) elif scheme == "fisherjenkssampled": classifier = _classifiers[scheme](y, k, pct_sampled, truncate) elif scheme == "headtailbreaks": classifier = _classifiers[scheme](y) elif scheme == "percentiles": classifier = _classifiers[scheme](y, pct) elif scheme == "stdmean": classifier = _classifiers[scheme](y, multiples, anchor) elif scheme == "jenkscaspallsampled": classifier = _classifiers[scheme](y, k, pct_sampled) elif scheme == "maximumbreaks": classifier = _classifiers[scheme](y, k, mindiff) elif scheme in ["naturalbreaks", "maxp"]: classifier = _classifiers[scheme](y, k, initial) elif scheme == "userdefined": classifier = _classifiers[scheme](y, bins, lowest) elif scheme in [ "equalinterval", "fisherjenks", "jenkscaspall", "jenkscaspallforced", "quantiles", "prettybreaks", ]: classifier = _classifiers[scheme](y, k) return classifier