mapclassify
mapclassify: Classification Schemes for Choropleth Maps
>>> import mapclassify
>>> y = mapclassify.load_example()
>>> y.mean()
125.92810344827588
>>> y.min(), y.max()
(0.13, 4111.4499999999998)
Map Classifiers Supported
BoxPlot
>>> mapclassify.BoxPlot(y)
BoxPlot
Interval Count
--------------------------
( -inf, -52.88] | 0
( -52.88, 2.57] | 15
( 2.57, 9.36] | 14
( 9.36, 39.53] | 14
( 39.53, 94.97] | 6
( 94.97, 4111.45] | 9
EqualInterval
>>> mapclassify.EqualInterval(y)
EqualInterval
Interval Count
--------------------------
[ 0.13, 822.39] | 57
( 822.39, 1644.66] | 0
(1644.66, 2466.92] | 0
(2466.92, 3289.19] | 0
(3289.19, 4111.45] | 1
FisherJenks
>>> import numpy as np
>>> np.random.seed(123456)
>>> mapclassify.FisherJenks(y, k=5)
FisherJenks
Interval Count
--------------------------
[ 0.13, 75.29] | 49
( 75.29, 192.05] | 3
( 192.05, 370.50] | 4
( 370.50, 722.85] | 1
( 722.85, 4111.45] | 1
FisherJenksSampled
>>> np.random.seed(123456)
>>> x = np.random.exponential(size=(10000,))
>>> mapclassify.FisherJenks(x, k=5)
FisherJenks
Interval Count
----------------------
[ 0.00, 0.64] | 4694
( 0.64, 1.45] | 2922
( 1.45, 2.53] | 1584
( 2.53, 4.14] | 636
( 4.14, 10.61] | 164
>>> mapclassify.FisherJenksSampled(x, k=5)
FisherJenksSampled
Interval Count
----------------------
[ 0.00, 0.70] | 5020
( 0.70, 1.63] | 2952
( 1.63, 2.88] | 1454
( 2.88, 5.32] | 522
( 5.32, 10.61] | 52
HeadTailBreaks
>>> mapclassify.HeadTailBreaks(y)
HeadTailBreaks
Interval Count
--------------------------
[ 0.13, 125.93] | 50
( 125.93, 811.26] | 7
( 811.26, 4111.45] | 1
JenksCaspall
>>> mapclassify.JenksCaspall(y, k=5)
JenksCaspall
Interval Count
--------------------------
[ 0.13, 1.81] | 14
( 1.81, 7.60] | 13
( 7.60, 29.82] | 14
( 29.82, 181.27] | 10
( 181.27, 4111.45] | 7
JenksCaspallForced
>>> mapclassify.JenksCaspallForced(y, k=5)
JenksCaspallForced
Interval Count
--------------------------
[ 0.13, 1.34] | 12
( 1.34, 5.90] | 12
( 5.90, 16.70] | 13
( 16.70, 50.65] | 9
( 50.65, 4111.45] | 12
JenksCaspallSampled
>>> mapclassify.JenksCaspallSampled(y, k=5)
JenksCaspallSampled
Interval Count
--------------------------
[ 0.13, 12.02] | 33
( 12.02, 29.82] | 8
( 29.82, 75.29] | 8
( 75.29, 192.05] | 3
( 192.05, 4111.45] | 6
MaxP
>>> mapclassify.MaxP(y)
MaxP
Interval Count
--------------------------
[ 0.13, 8.70] | 29
( 8.70, 16.70] | 8
( 16.70, 20.47] | 1
( 20.47, 66.26] | 10
( 66.26, 4111.45] | 10
MaximumBreaks
>>> mapclassify.MaximumBreaks(y, k=5)
MaximumBreaks
Interval Count
--------------------------
[ 0.13, 146.00] | 50
( 146.00, 228.49] | 2
( 228.49, 546.67] | 4
( 546.67, 2417.15] | 1
(2417.15, 4111.45] | 1
NaturalBreaks
>>> mapclassify.NaturalBreaks(y, k=5)
NaturalBreaks
Interval Count
--------------------------
[ 0.13, 75.29] | 49
( 75.29, 192.05] | 3
( 192.05, 370.50] | 4
( 370.50, 722.85] | 1
( 722.85, 4111.45] | 1
Quantiles
>>> mapclassify.Quantiles(y, k=5)
Quantiles
Interval Count
--------------------------
[ 0.13, 1.46] | 12
( 1.46, 5.80] | 11
( 5.80, 13.28] | 12
( 13.28, 54.62] | 11
( 54.62, 4111.45] | 12
Percentiles
>>> mapclassify.Percentiles(y, pct=[33, 66, 100])
Percentiles
Interval Count
--------------------------
[ 0.13, 3.36] | 19
( 3.36, 22.86] | 19
( 22.86, 4111.45] | 20
StdMean
>>> mapclassify.StdMean(y)
StdMean
Interval Count
--------------------------
( -inf, -967.36] | 0
(-967.36, -420.72] | 0
(-420.72, 672.57] | 56
( 672.57, 1219.22] | 1
(1219.22, 4111.45] | 1
UserDefined
>>> mapclassify.UserDefined(y, bins=[22, 674, 4112])
UserDefined
Interval Count
--------------------------
[ 0.13, 22.00] | 38
( 22.00, 674.00] | 18
( 674.00, 4112.00] | 2
Use Cases
Creating and using a classification instance
>>> bp = mapclassify.BoxPlot(y)
>>> bp
BoxPlot
Interval Count
--------------------------
( -inf, -52.88] | 0
( -52.88, 2.57] | 15
( 2.57, 9.36] | 14
( 9.36, 39.53] | 14
( 39.53, 94.97] | 6
( 94.97, 4111.45] | 9
>>> bp.bins
array([ -5.28762500e+01, 2.56750000e+00, 9.36500000e+00,
3.95300000e+01, 9.49737500e+01, 4.11145000e+03])
>>> bp.counts
array([ 0, 15, 14, 14, 6, 9])
>>> bp.yb
array([5, 1, 2, 3, 2, 1, 5, 1, 3, 3, 1, 2, 2, 1, 2, 2, 2, 1, 5, 2, 4, 1, 2,
2, 1, 1, 3, 3, 3, 5, 3, 1, 3, 5, 2, 3, 5, 5, 4, 3, 5, 3, 5, 4, 2, 1,
1, 4, 4, 3, 3, 1, 1, 2, 1, 4, 3, 2])
Apply
>>> import mapclassify
>>> import pandas
>>> from numpy import linspace as lsp
>>> data = [lsp(3,8,num=10), lsp(10, 0, num=10), lsp(-5, 15, num=10)]
>>> data = pandas.DataFrame(data).T
>>> data
0 1 2
0 3.000000 10.000000 -5.000000
1 3.555556 8.888889 -2.777778
2 4.111111 7.777778 -0.555556
3 4.666667 6.666667 1.666667
4 5.222222 5.555556 3.888889
5 5.777778 4.444444 6.111111
6 6.333333 3.333333 8.333333
7 6.888889 2.222222 10.555556
8 7.444444 1.111111 12.777778
9 8.000000 0.000000 15.000000
>>> data.apply(mapclassify.Quantiles.make(rolling=True))
0 1 2
0 0 4 0
1 0 4 0
2 1 4 0
3 1 3 0
4 2 2 1
5 2 1 2
6 3 0 4
7 3 0 4
8 4 0 4
9 4 0 4