Search
Rank_Markov_ergodic_morecomu

Full Rank Markov and Geographic Rank Markov

Author: Wei Kang weikang9009@gmail.com

import libpysal as ps
import libpysal
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd
import geopandas as gpd
from giddy.markov import Spatial_Markov, Markov, FullRank_Markov
from giddy import ergodic, util
import mapclassify as mc
import quantecon as qe
f = ps.io.open(ps.examples.get_path('usjoin.csv'))
pci = np.array([f.by_col[str(y)] for y in range(2000, 2010)])
pci = pci.transpose()
rpci = pci / (pci.mean(axis=0))
discretized = (rpci * 100).astype(int) % 4
w = ps.io.open(ps.examples.get_path("states48.gal")).read()
w.transform = 'r'
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
# rpci = pci/(pci.mean(axis=0))
rq = mc.Quantiles(pci.flatten()).yb.reshape(pci.shape)
m = Markov(rq)
The Markov Chain is irreducible and is composed by:
1 Recurrent class (indices):
[0 1 2 3 4]
0 Transient class.
The Markov Chain has 0 absorbing state.
m.steady_state
array([0.1696532 , 0.1853406 , 0.19973599, 0.2178871 , 0.22738311])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
The Markov Chain is reducible and is composed by:
Recurrent classes:
[0 1], [2], [3 4]
Transient classes:
None
m.steady_state
array([[0.50531915, 0.49468085, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.5       , 0.5       ]])
a = Spatial_Markov(rpci, w)
a = FullRank_Markov(rpci)
The Markov Chain is reducible and is composed by:
Recurrent classes:
[0], [1], [2], [3 4], [ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46], [47]
Transient classes:
None
P = np.array([[0,0,0.8,0.2],
             [0,0,0.2,0.8],
             [0.3,0.7,0,0],
             [0.7,0.3,0,0]])
m = qe.MarkovChain(P)
m.recurrent_classes_indices
[array([0, 1, 2, 3])]
m.communication_classes_indices
[array([0, 1, 2, 3])]
m.is_aperiodic
False
m.stationary_distributions
array([[0.25, 0.25, 0.25, 0.25]])
p = np.array([[0.8,0.2],[0.7,0.3]])
list(np.argwhere(p==0.5))
[]
len([])
0
print(*[], sep=", "

from giddy.ergodic import fmpt, steady_state
fmpt(p)
array([[1.28571429, 5.        ],
       [1.42857143, 4.5       ]])
steady_state(p)
array([0.77777778, 0.22222222])
1/steady_state(p)
array([1.28571429, 4.5       ])
P = np.array([[1, 0,0,0,0],
              [0,1,0,0,0],
              [0, 0,0,0.5, 0.5],
             [0,0.5,0, 0, 0.5],
             [0.5,0, 0, 0.5, 0]])
P
array([[1. , 0. , 0. , 0. , 0. ],
       [0. , 1. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5],
       [0. , 0.5, 0. , 0. , 0.5],
       [0.5, 0. , 0. , 0.5, 0. ]])
np.where?
np.argwhere(P==0.5)
array([[2, 3],
       [2, 4],
       [3, 1],
       [3, 4],
       [4, 0],
       [4, 3]])
np.argwhere(P==1)[:,0]
array([0, 1])
mc = qe.MarkovChain(P)
mc.communication_classes_indices
[array([0]), array([1]), array([3, 4]), array([2])]
mc.recurrent_classes?
mc.num_communication_classes
4
mc.recurrent_classes_indices
[array([0]), array([1])]
c = [['b','a','a'],['c','c','a'],['c','b','c']]
m = Markov(np.array(c))
The Markov Chain is reducible and is composed by:
Recurrent classes:
[0]
Transient classes:
[1 2]
m.p
array([[1.        , 0.        , 0.        ],
       [0.5       , 0.        , 0.5       ],
       [0.33333333, 0.33333333, 0.33333333]])
c = [['b','a','c'],['c','c','a'],['c','b','c']]
c.extend([['a','a','b'], ['a','b','c']])
c = np.array(c)
m = Markov(c)
The Markov Chain is irreducible and is composed by:
Recurrent classes:
[0 1 2]
Transient classes:
None
m.classes
array(['a', 'b', 'c'], dtype='<U1')
m.communication_classes
[array([0, 1, 2])]
m.classes[m.communication_classes[0]]
array(['a', 'b', 'c'], dtype='<U1')
classes
if mc.is_irreducible:
    print("The Markov Chain is irreducible and is composed by:")
else:
    print("The Markov Chain is reducible and is composed by:")
print("{0} Recurrent classes:".format(mc.num_recurrent_classes))
print(mc.recurrent_classes_indices, sep = ", ")
print("Transient classes:")
if self.transient_classes is None:
    print("None")
else:
    print(*self.transient_classes, sep = ", ")
The Markov Chain is reducible and is composed by:
2 Recurrent classes:
[array([0]), array([1])]
Transient classes:
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-6c26cc7bf27f> in <module>
      6 print(mc.recurrent_classes_indices, sep = ", ")
      7 print("Transient classes:")
----> 8 if self.transient_classes is None:
      9     print("None")
     10 else:

NameError: name 'self' is not defined
all(np.array([0.5,0.5])<1)
True
any(P.sum(axis=1))<1
False
if not len([1]):
    print(2)
mc.num_recurrent_classes
2
mc.recurrent_classes_indices
[array([0]), array([1])]
set(list(map(list,mc.communication_classes_indices)))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-31-6b690714732b> in <module>
----> 1 set(list(map(list,mc.communication_classes_indices)))

TypeError: unhashable type: 'list'
mc.communication_classes_indices
[array([0]), array([1]), array([3, 4]), array([2])]
np.array(tuple(np.array([0])))
array([0])
set(list(map(tuple,mc.communication_classes_indices)))
{(0,), (1,), (2,), (3, 4)}
transient = set(list(map(tuple,mc.communication_classes_indices))).difference(
    set(list(map(tuple,mc.recurrent_classes_indices))))
transient = set(list(map(tuple,mc.communication_classes_indices))).difference(
    set(list(map(tuple,mc.recurrent_classes_indices))))
transient
{(2,), (3, 4)}
print(*mc.communication_classes_indices)
[0] [1] [3 4] [2]
print(*[[1],[2]])
[1] [2]
print(*[[1]])
[1]
mc.is_irreducible
print("Markov Chain is composed by:\n"
              "Recurrent classes:")
print(*mc.recurrent_classes_indices, sep = ", ")
print("Transient classes:")
if transient_classes is None:
    print("None")
else:
    print(*mc.communication_classes_indices, sep = ", ")
Markov Chain is composed by:
Recurrent classes:
[0], [1]
Transient classes:
[0], [1], [3 4], [2]
[np.asarray(i) for i in transient]
[array([2]), array([3, 4])]
len(set({1}).difference(set({1})))
0
recurrent_classes = []
transient_classes = []
for cclass in mc.communication_classes_indices:
    rows = cclass[:, np.newaxis]
    p_temp = P[rows, cclass]
    if all(p_temp.sum(axis=1)<1):
        transient_classes.append(cclass)
    else:
        recurrent_classes.append(cclass)
if not len(transient_classes):
    transient_classes = None
transient_classes = None    
print(recurrent_classes, transient_classes)
[array([0]), array([1])] None
P.flatten()
array([1. , 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0.5, 0.5, 0. , 0.5, 0. , 0. , 0.5, 0.5, 0. , 0. , 0.5, 0. ])
P2 = np.array([[0.2, 0.8,0,0,0],
              [0.4,0.6,0,0,0],
              [0, 0,0,0.5, 0.5],
             [0,0.5,0, 0, 0.5],
             [0.5,0, 0, 0.5, 0]])
P2
array([[0.2, 0.8, 0. , 0. , 0. ],
       [0.4, 0.6, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5],
       [0. , 0.5, 0. , 0. , 0.5],
       [0.5, 0. , 0. , 0.5, 0. ]])
mc2 = qe.MarkovChain(P2)
mc2.recurrent_classes_indices
[array([0, 1])]
rec_classes = mc2.recurrent_classes_indices
for i, rec_class in enumerate(rec_classes):
    P_rec_class = P2[np.ix_(rec_class, rec_class)]
    print(P_rec_class)
[[0.2 0.8]
 [0.4 0.6]]
mc = qe.MarkovChain(P)
mc_graph = mc.digraph
mc_graph.num_sink_strongly_connected_components
2
mc.recurrent_classes_indices
[array([0]), array([1])]
mc.communication_classes_indices
[array([0]), array([1]), array([3, 4]), array([2])]
rec_classes = mc.recurrent_classes_indices
rec_classes
[array([0]), array([1])]
for i, rec_class in enumerate(rec_classes):
    P_rec_class = P[np.ix_(rec_class, rec_class)]
    print(P_rec_class)
[[1.]]
[[1.]]
recurrent_indices = []
for i in mc.recurrent_classes_indices:
    recurrent_indices.extend(list(i))
recurrent_indices
[0, 1]
k = len(P)
transient_indices = np.array(list(set(range(k)).difference(set(recurrent_indices))))
recurrent_indices = np.array(recurrent_indices)
transient_indices 
array([2, 3, 4])
# rows = 
Q = P[transient_indices[:, np.newaxis], transient_indices]
S = P[recurrent_indices[:, np.newaxis], recurrent_indices]
R = P[transient_indices[:, np.newaxis], recurrent_indices]
print(Q,S,R)
[[0.  0.5 0.5]
 [0.  0.  0.5]
 [0.  0.5 0. ]] [[1. 0.]
 [0. 1.]] [[0.  0. ]
 [0.  0.5]
 [0.5 0. ]]
def expected_steps_fast(Q):
    I = np.identity(Q.shape[0])
    o = np.ones(Q.shape[0])
    return(np.linalg.solve(I-Q, o))
expected_steps_fast(Q)
array([3., 2., 2.])

the fundamental matrix N:

$n_{ij}$:the mean of the total number of times the process is in transient state j given it started in transient state i

# norm_Q = Q/Q.sum(axis=1)
I = np.identity(len(Q))
N = inv(I-Q)
N
array([[1.        , 1.        , 1.        ],
       [0.        , 1.33333333, 0.66666667],
       [0.        , 0.66666667, 1.33333333]])
I-Q
array([[ 1. , -0.5, -0.5],
       [ 0. ,  1. , -0.5],
       [ 0. , -0.5,  1. ]])
Q = np.array([[0, 0.5, 0.5],
             [0, 0, 0.5],
             [0, 0.5, 0]])
Q
array([[0. , 0.5, 0.5],
       [0. , 0. , 0.5],
       [0. , 0.5, 0. ]])
I = np.identity(len(Q))
I
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
from numpy.linalg import inv
N = inv(I-Q)
N
array([[1.        , 1.        , 1.        ],
       [0.        , 1.33333333, 0.66666667],
       [0.        , 0.66666667, 1.33333333]])
Q = np.array([[0, 0.5],
             [0.5, 0]])
I = np.identity(len(Q))
N = inv(I-Q)
N
array([[1.33333333, 0.66666667],
       [0.66666667, 1.33333333]])
sm = Spatial_Markov(rpci, w, fixed=True, k=5, m=5)
sm.F[0]
array([[  2.29835259,  28.95614035,  46.14285714,  80.80952381,
        279.42857143],
       [ 33.86549708,   3.79459555,  22.57142857,  57.23809524,
        255.85714286],
       [ 43.60233918,   9.73684211,   4.91085714,  34.66666667,
        233.28571429],
       [ 46.62865497,  12.76315789,   6.25714286,  14.61564626,
        198.61904762],
       [ 52.62865497,  18.76315789,  12.25714286,   6.        ,
         34.1031746 ]])
cc = np.array([0.8, 0.9, 1, 1.2])
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc)
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc, fill_diag=True)
sm.S[2]
array([0.03607655, 0.22667277, 0.25883041, 0.43607249, 0.04234777])
a = np.random.random((2,3))
a
array([[0.19044059, 0.29836464, 0.50387978],
       [0.38069295, 0.8113362 , 0.61993731]])
b = np.random.random((3,))
b
array([0.66913524, 0.81527363, 0.80146708])
np.array([a,b])
array([array([[0.19044059, 0.29836464, 0.50387978],
       [0.38069295, 0.8113362 , 0.61993731]]),
       array([0.66913524, 0.81527363, 0.80146708])], dtype=object)
list([a,b])
[array([[0.19044059, 0.29836464, 0.50387978],
        [0.38069295, 0.8113362 , 0.61993731]]),
 array([0.66913524, 0.81527363, 0.80146708])]
a.dtype
dtype('float64')
if np.array(list([a,b])).dtype is np.dtype('O'):
    print(0)
0
if a.dtype is np.dtype('O'):
    print(0)
np.array(list([a,b])).dtype
np.array(list([a,b])).dtype
dtype('O')
sm.P
array([[[0.96703297, 0.03296703, 0.        , 0.        , 0.        ],
        [0.10638298, 0.68085106, 0.21276596, 0.        , 0.        ],
        [0.        , 0.14285714, 0.7755102 , 0.08163265, 0.        ],
        [0.        , 0.        , 0.5       , 0.5       , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 1.        ]],

       [[0.88636364, 0.10606061, 0.00757576, 0.        , 0.        ],
        [0.04402516, 0.89308176, 0.06289308, 0.        , 0.        ],
        [0.        , 0.05882353, 0.8627451 , 0.07843137, 0.        ],
        [0.        , 0.        , 0.13846154, 0.86153846, 0.        ],
        [0.        , 0.        , 0.        , 0.        , 1.        ]],

       [[0.78082192, 0.17808219, 0.02739726, 0.01369863, 0.        ],
        [0.03488372, 0.90406977, 0.05813953, 0.00290698, 0.        ],
        [0.        , 0.05919003, 0.84735202, 0.09034268, 0.00311526],
        [0.        , 0.        , 0.05811623, 0.92985972, 0.01202405],
        [0.        , 0.        , 0.        , 0.14285714, 0.85714286]],

       [[0.82692308, 0.15384615, 0.        , 0.01923077, 0.        ],
        [0.0703125 , 0.7890625 , 0.125     , 0.015625  , 0.        ],
        [0.00295858, 0.06213018, 0.82248521, 0.10946746, 0.00295858],
        [0.        , 0.00185529, 0.07606679, 0.88497217, 0.03710575],
        [0.        , 0.        , 0.        , 0.07803468, 0.92196532]],

       [[1.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 1.        , 0.        , 0.        , 0.        ],
        [0.        , 0.06666667, 0.9       , 0.03333333, 0.        ],
        [0.        , 0.        , 0.05660377, 0.90566038, 0.03773585],
        [0.        , 0.        , 0.        , 0.03932584, 0.96067416]]])
f = ps.io.open(ps.examples.get_path('usjoin.csv'))
pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
m.steady_state
array([0.20774716, 0.18725774, 0.20740537, 0.18821787, 0.20937187])
m.fmpt
array([[  4.81354357,  11.50292712,  29.60921231,  53.38594954,
        103.59816743],
       [ 42.04774505,   5.34023324,  18.74455332,  42.50023268,
         92.71316899],
       [ 69.25849753,  27.21075248,   4.82147603,  25.27184624,
         75.43305672],
       [ 84.90689329,  42.85914824,  17.18082642,   5.31299186,
         51.60953369],
       [ 98.41295543,  56.36521038,  30.66046735,  14.21158356,
          4.77619083]])
m.sojourn_time
array([11.125     ,  4.65806452,  4.73372781,  4.95172414, 13.77586207])
sojourn_time(m.p)
array([11.125     ,  4.65806452,  4.73372781,  4.95172414, 13.77586207])
q5
array([[0, 0, 0, ..., 0, 0, 0],
       [2, 2, 2, ..., 1, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [3, 3, 2, ..., 2, 2, 2],
       [3, 3, 3, ..., 4, 4, 4]])
q5[:,10]
array([0, 2, 0, 4, 2, 4, 4, 2, 0, 1, 4, 3, 2, 1, 0, 1, 2, 4, 4, 3, 2, 0,
       2, 3, 1, 4, 3, 4, 1, 4, 0, 0, 3, 1, 3, 3, 4, 0, 0, 0, 1, 2, 2, 1,
       3, 1, 2, 3])
m2 = Markov(q5[:,10:12])
m2.transitions
array([[10.,  0.,  0.,  0.,  0.],
       [ 0.,  9.,  0.,  0.,  0.],
       [ 0.,  0., 10.,  0.,  0.],
       [ 0.,  0.,  0.,  9.,  0.],
       [ 0.,  0.,  0.,  0., 10.]])
m2 = Markov(q5[:,:2])
m2.p
array([[1.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.88888889, 0.11111111, 0.        , 0.        ],
       [0.        , 0.09090909, 0.81818182, 0.09090909, 0.        ],
       [0.        , 0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 0.        , 0.1       , 0.9       ]])
np.inf
inf
p = m2.p
pii = p.diagonal()
pii
array([1.        , 0.88888889, 0.81818182, 1.        , 0.9       ])
np.where(pii==1)[0]
array([0, 3])
np.full(len(pii), np.inf)
array([inf, inf, inf, inf, inf])
def sojourn_time(p):
    """
    Calculate sojourn time based on a given transition probability matrix.

    """
    p = np.asarray(p)
    pii = p.diagonal()

    if not (1 - pii).all():
        absorbing_states = np.where(pii==1)[0]
        non_absorbing_states = np.where(pii!=1)[0]
        st = np.full(len(pii), np.inf)
        print("Sojourn times are infinite for absorbing states! In this Markov Chain, states {} are absorbing states.".format(list(absorbing_states)))
        st[non_absorbing_states] = 1 / (1 - pii[non_absorbing_states])
    else:
        st = 1 / (1 - pii)
    return st
p2 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 1]])
sojourn_time(p2)
Sojourn times are infinite for absorbing states! In this Markov Chain, states [2] are absorbing states.
array([2.        , 3.33333333,        inf])
sojourn_time(p)
Sojourn times are infinite for absorbing states! In this Markov Chain, states [0, 3] are absorbing states.
array([ inf,  9. ,  5.5,  inf, 10. ])
m2.fmpt
array([[1.        ,        inf,        inf,        inf,        inf],
       [       inf, 1.78305684, 8.65447755,        inf,        inf],
       [       inf, 7.393034  , 2.2770465 ,        inf,        inf],
       [       inf,        inf,        inf, 1.        ,        inf],
       [       inf,        inf,        inf,        inf, 1.        ]])
m2.steady_state
array([[1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.]])
m2.transitions
array([[10.,  0.,  0.,  0.,  0.],
       [ 0.,  8.,  1.,  0.,  0.],
       [ 0.,  1.,  9.,  1.,  0.],
       [ 0.,  0.,  0.,  8.,  0.],
       [ 0.,  0.,  0.,  1.,  9.]])
p = np.array([[.5, .25, .25], [.5, 0, .5], [.25, .25, .5]])
p
array([[0.5 , 0.25, 0.25],
       [0.5 , 0.  , 0.5 ],
       [0.25, 0.25, 0.5 ]])
p2 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 1]])
p2
array([[0.5, 0.5, 0. ],
       [0.3, 0.7, 0. ],
       [0. , 0. , 1. ]])
p3 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 0]])
p3
array([[0.5, 0.5, 0. ],
       [0.3, 0.7, 0. ],
       [0. , 0. , 0. ]])
p23 = np.array([[[.5, .5, 0], [.3, .7, 0], [0, 0, 0]],
                [[0, 0, 0], [.3, .7, 0], [0, 0, 0]]])
p23
array([[[0.5, 0.5, 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 0. ]],

       [[0. , 0. , 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 0. ]]])
util.fill_diag2(p23)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-af60868f6222> in <module>
----> 1 util.fill_diag2(p23)

~/Google Drive/python_repos/pysal-refactor/giddy/giddy/util.py in fill_diag2(p)
    102     p_temp = np.asarray(p)
    103     if len(p_temp.shape) == 3:
--> 104         raise ValueError('Please use function `fill_diag3` for '
    105                          'a three-dimensional matrix!')
    106     p0 = (p_temp.sum(axis=1) == 0)

ValueError: Please use function `fill_diag3` for a three-dimensional matrix!
util.fill_diag3(p23)
array([[[0.5, 0.5, 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 1. ]],

       [[1. , 0. , 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 1. ]]])
util.fill_diag2(p3)
array([[0.5, 0.5, 0. ],
       [0.3, 0.7, 0. ],
       [0. , 0. , 1. ]])
ergodic.steady_state_general(p2)
array([[0.375, 0.625, 0.   ],
       [0.   , 0.   , 1.   ]])
ergodic.steady_state(p2)
array([0.5, 0.5, 0. ])
np.inf
inf
ergodic.fmpt_general(p2)
array([[2.66666667, 2.        ,        inf],
       [3.33333333, 1.6       ,        inf],
       [       inf,        inf, 1.        ]])
ergodic.fmpt_general(p)
array([[2.5       , 4.        , 3.33333333],
       [2.66666667, 5.        , 2.66666667],
       [3.33333333, 4.        , 2.5       ]])
f = libpysal.io.open(libpysal.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
pci = pci.transpose()
rpci = pci/(pci.mean(axis=0))
w = libpysal.io.open(libpysal.examples.get_path("states48.gal")).read()
w.transform = 'r'
cc = np.array([0.8, 0.9, 1, 1.2])
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc, variable_name='rpci')
sm.cutoffs
array([0.8, 0.9, 1. , 1.2])
sm.T
array([[[264.,   9.,   0.,   0.,   0.],
        [  5.,  32.,  10.,   0.,   0.],
        [  0.,   7.,  38.,   4.,   0.],
        [  0.,   0.,   3.,   3.,   0.],
        [  0.,   0.,   0.,   0.,   0.]],

       [[117.,  14.,   1.,   0.,   0.],
        [  7., 142.,  10.,   0.,   0.],
        [  0.,   6.,  88.,   8.,   0.],
        [  0.,   0.,   9.,  56.,   0.],
        [  0.,   0.,   0.,   0.,   4.]],

       [[ 57.,  13.,   2.,   1.,   0.],
        [ 12., 311.,  20.,   1.,   0.],
        [  0.,  19., 272.,  29.,   1.],
        [  0.,   0.,  29., 464.,   6.],
        [  0.,   0.,   0.,   7.,  42.]],

       [[ 43.,   8.,   0.,   1.,   0.],
        [  9., 101.,  16.,   2.,   0.],
        [  1.,  21., 278.,  37.,   1.],
        [  0.,   1.,  41., 477.,  20.],
        [  0.,   0.,   0.,  27., 319.]],

       [[  0.,   0.,   0.,   0.,   0.],
        [  0.,   0.,   0.,   0.,   0.],
        [  0.,   2.,  27.,   1.,   0.],
        [  0.,   0.,   6.,  96.,   4.],
        [  0.,   0.,   0.,   7., 171.]]])
p_adjust = sm.T
(p_adjust.sum(axis=2) == 0).sum()
0
def fill_diagonal2(p):
    """

    :param p:
    :return:
    """
    p = np.asarray(p)
    if len(p.shape) == 3:
        raise ValueError('Please use function `fill_diagonal3` for '
                         'three-dimensional matrix!')
    p0 = (p.sum(axis=1) == 0)
    if not p0.sum():
        row_zero_i = np.where(p0)
        for row in row_zero_i:
            p[row, row] = 1
    return p
def fill_diagonal3(p):
    p = np.asarray(p)
    p0 = (p.sum(axis=2) == 0)
    if not p0.sum():
        rows, cols = np.where(p0)
        row_zero_i = list(zip(rows, cols))
        for row in row_zero_i:
            i, j = row
            p[i, j, j] = 1
    return p
fill_diagonal2(sm.T[4])
array([[  1.,   0.,   0.,   0.,   0.],
       [  0.,   1.,   0.,   0.,   0.],
       [  0.,   2.,  27.,   1.,   0.],
       [  0.,   0.,   6.,  96.,   4.],
       [  0.,   0.,   0.,   7., 171.]])
b=2
a=b
a = 3
b
2
fill_diagonal2(sm.T)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-0b8c6969112b> in <module>
----> 1 fill_diagonal2(sm.T)

<ipython-input-29-1d3742ef6fc2> in fill_diagonal2(p)
      7     p = np.asarray(p)
      8     if len(p.shape) == 3:
----> 9         raise ValueError('Please use function `fill_diagonal3` for '
     10                          'three-dimensional matrix!')
     11     p0 = (p.sum(axis=1) == 0)

ValueError: Please use function `fill_diagonal3` for three-dimensional matrix!
rows, cols = np.where(p_adjust.sum(axis=2) == 0)
row_zero_i = list(zip(rows, cols))
row_zero_i
[(0, 4), (4, 0), (4, 1)]
for row in row_zero_i:
    i, j = row
    p_adjust[i, j, j] = 1
p_adjust
array([[[264.,   9.,   0.,   0.,   0.],
        [  5.,  32.,  10.,   0.,   0.],
        [  0.,   7.,  38.,   4.,   0.],
        [  0.,   0.,   3.,   3.,   0.],
        [  0.,   0.,   0.,   0.,   1.]],

       [[117.,  14.,   1.,   0.,   0.],
        [  7., 142.,  10.,   0.,   0.],
        [  0.,   6.,  88.,   8.,   0.],
        [  0.,   0.,   9.,  56.,   0.],
        [  0.,   0.,   0.,   0.,   4.]],

       [[ 57.,  13.,   2.,   1.,   0.],
        [ 12., 311.,  20.,   1.,   0.],
        [  0.,  19., 272.,  29.,   1.],
        [  0.,   0.,  29., 464.,   6.],
        [  0.,   0.,   0.,   7.,  42.]],

       [[ 43.,   8.,   0.,   1.,   0.],
        [  9., 101.,  16.,   2.,   0.],
        [  1.,  21., 278.,  37.,   1.],
        [  0.,   1.,  41., 477.,  20.],
        [  0.,   0.,   0.,  27., 319.]],

       [[  1.,   0.,   0.,   0.,   0.],
        [  0.,   1.,   0.,   0.,   0.],
        [  0.,   2.,  27.,   1.,   0.],
        [  0.,   0.,   6.,  96.,   4.],
        [  0.,   0.,   0.,   7., 171.]]])
np.where(p_adjust.sum(axis=2) == 0)
(array([0, 4, 4]), array([4, 0, 1]))
if 
array([[273.,  47.,  49.,   6.,   0.],
       [132., 159., 102.,  65.,   4.],
       [ 73., 344., 321., 499.,  49.],
       [ 52., 128., 338., 539., 346.],
       [  0.,   0.,  30., 106., 178.]])
if p_adjust.sum(axis=1).all() != 1:
    row_sum = p_adjust.sum(axis=1)
    indexn0 = np.where(row_sum != 0)[0]
    p_diag = np.eye(k)
    p_diag[indexn0] = p_adjust[indexn0]
    p_adjust = p_diag
p=np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
p
array([[0.5 , 0.25, 0.25],
       [0.5 , 0.  , 0.5 ],
       [0.25, 0.25, 0.5 ]])
p=np.array([[.5, .5, 0],[.2,0.8,0],[0,0,1]])
p
array([[0.5, 0.5, 0. ],
       [0.2, 0.8, 0. ],
       [0. , 0. , 1. ]])
import quantecon as qe
mc = qe.MarkovChain(p)
mc.num_recurrent_classes
2
mc.n
3
fmpt_general(p)
array([[3.5, 2. , inf],
       [5. , 1.4, inf],
       [inf, inf, 1. ]])
from giddy.ergodic import fmpt
def fmpt_general(P):
    """
    Calculates the matrix of first mean passage times for an ergodic/non-ergodic
    transition probability matrix.

    Parameters
    ----------
    P       : array
             (k, k), an ergodic/non-ergodic Markov transition probability
             matrix.

    Returns
    -------
    fmpt_all: array
              (k, k), elements are the expected value for the number of
              intervals required for a chain starting in state i to first
              enter state j. If i=j then this is the recurrence time.

    Examples
    --------
    >>> import numpy as np
    >>> from giddy.ergodic import fmpt_general

    # irreducible Markov chain
    >>> p = np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
    >>> fm = fmpt_general(p)
    >>> fm
    array([[2.5       , 4.        , 3.33333333],
           [2.66666667, 5.        , 2.66666667],
           [3.33333333, 4.        , 2.5       ]])

    Thus, if it is raining today in Oz we can expect a nice day to come
    along in another 4 days, on average, and snow to hit in 3.33 days. We can
    expect another rainy day in 2.5 days. If it is nice today in Oz, we would
    experience a change in the weather (either rain or snow) in 2.67 days from
    today. (That wicked witch can only die once so I reckon that is the
    ultimate absorbing state).

    # reducible Markov chain
    >>> p = np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
    >>> fm = fmpt_general(p)
    >>> fm
    array([[2.5       , 4.        , 3.33333333],
           [2.66666667, 5.        , 2.66666667],
           [3.33333333, 4.        , 2.5       ]])


    """

    P = np.asarray(P)
    mc = qe.MarkovChain(P)
    num_classes = mc.num_communication_classes
    if num_classes == 1:
        fmpt_all = fmpt(P)
        return fmpt_all
    else:
        i_cclasses = mc.communication_classes_indices
        fmpt_all = np.full((mc.n, mc.n), np.inf)
        for cclass in i_cclasses:
            rows = cclass[:, np.newaxis]
            p_temp = P[rows, cclass]
            fmpt_all[rows, cclass] = fmpt(p_temp)
        return fmpt_all
mc.stationary_distributions
array([[0.28571429, 0.71428571, 0.        ],
       [0.        , 0.        , 1.        ]])

Full Rank Markov

import libpysal
f = libpysal.io.open(libpysal.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
m.p
mc = qe.MarkovChain(m.p)
mc.stationary_distributions[0]
mc.stationary_distributions[0].shape
pci
from giddy.markov import FullRank_Markov, Markov
import mapclassify as mc
income_table = pd.read_csv(ps.examples.get_path("usjoin.csv"))
pci = income_table[list(map(str,range(1929,2010)))].values
# q5 = np.array([mc.Quantiles(y).yb for y in pci.T]).transpose()
# m = Markov(q5)
# m.p
pci
m = Markov(q5)
m.p
income_table.head()

Let's only select per capical incomes in years 1949-1960

pci = income_table[list(map(str,range(1990,2010)))].values
pci
m = FullRank_Markov(pci)
m.ranks
m.transitions

Full rank Markov transition probability matrix

m.p
np.where(m.p == 1)

The probability of staying in rank 48 is $100\%$. Thus, this rank is not communicating with other ranks, meaning that if a US State currently ranks 48th in per capita income, it will always rank 48th in the future. In this case, there is not going to be a unique steady state distribution towards which the Markov Chain converges. Instead, the future distribution will be dependent on whether the system is traped in rank 48. And also because of this, the estimation of the first mean passage time becomes more complicated. Currently, giddy does not have a mechanism to deal with it.

The current implementation for estimating the steady state distribution in giddy ignores the possibility of being trapped in rank 48 and gives the following distribution.

The estimation of the first mean passage times will incur an error shown as follows:

m.fmpt

At the moment (due to the time limit for submitting the paper), I will suggest to use an external package quantecon to check whether the Markov Chain is ergodic. If not, we give some explanation and not calculating or ploting the first mean passage times.

I will refactor giddy in the meantime to incorporate the non-ergodic chains when estimating steady state distributions and first mean passage times.

Use quantecon to check whether the Markov Chain is ergodic and which Markov states are not communicating with others

(1) check whether the Markov Chain is ergodic

mc.is_irreducible and mc.is_aperiodic
qe.MarkovChain?
import quantecon as qe
mc = qe.MarkovChain(m.p)

mc.is_irreducible and mc.is_aperiodic
mc.num_communication_classes?
mc.num_communication_classes
mc.communication_classes_indices
k = len(m.p)
fmpt_k = np.full((k, k), np.inf)
cclass = mc.communication_classes_indices[1]
cclass[:, np.newaxis]
rows = cclass[:, np.newaxis]
m.p[rows,cclass]
p_temp = m.p[cclass,:][:,cclass]
p_temp
fmpt_k
fmpt_k.flat[[1,2]]
fmpt_temp = fmpt(p_temp)
fmpt_temp
np.pad?
np.putmask?
fmpt_k[cclass,:]
fmpt_temp
b = fmpt_k[cclass,:]
b[:,cclass] = fmpt_temp
fmpt_k[cclass,:][:,cclass]
np.put(fmpt_k, cclass, fmpt_temp)
np.put?
fmpt_k[cclass,:][:,cclass]
fmpt_k[cclass,:][:,cclass] = fmpt_temp
fmpt_k
for cclass in mc.communication_classes_indices:
    rows = cclass[:, np.newaxis]
    p_temp = m.p[rows,cclass]
#     p_temp = m.p[cclass,:][:,cclass]
    fmpt_temp = fmpt(p_temp)
    print(cclass, fmpt_temp)
    fmpt_k[rows,cclass] = fmpt_temp
fmpt_k
fmpt_k
mc.communication_classes_indices[1]
m.p[list(mc.communication_classes_indices[1]),:][:,list(mc.communication_classes_indices[1])]
from giddy.ergodic import fmpt
a = np.zeros((2,2))
a[0] = np.Infinity
a
np.full((2, 2), np.inf)
fmpt(m.p[list(mc.communication_classes_indices[1]),:][:,list(mc.communication_classes_indices[1])])
k = len(m.p)
mc.stationary_distributions
mc.is_irreducible
mc.is_aperiodic

(2) check which Markov states are not communicating with others

mc.num_communication_classes
mc.communication_classes

We have two sets of Markov states which are not communicating with each other

mc.num_communication_classes

The first set contains all the Markov states (rank 1 to 47) and the second set contains only 1 Markov state (rank 48)

mc.communication_classes
from giddy.ergodic import steady_state as STEADY_STATE
if mc.num_communication_classes==1:
    STEADY_STATE(mp.)