Rank_Markov_ergodic_morecomu

Full Rank Markov and Geographic Rank Markov

Author: Wei Kang weikang9009@gmail.com

import libpysal as ps
import libpysal
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd
import geopandas as gpd
from giddy.markov import Spatial_Markov, Markov, FullRank_Markov
from giddy import ergodic, util
import mapclassify as mc
import quantecon as qe

f = ps.io.open(ps.examples.get_path('usjoin.csv'))
pci = np.array([f.by_col[str(y)] for y in range(2000, 2010)])
pci = pci.transpose()
rpci = pci / (pci.mean(axis=0))
discretized = (rpci * 100).astype(int) % 4
w = ps.io.open(ps.examples.get_path("states48.gal")).read()
w.transform = 'r'

pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
# rpci = pci/(pci.mean(axis=0))
rq = mc.Quantiles(pci.flatten()).yb.reshape(pci.shape)
m = Markov(rq)

The Markov Chain is irreducible and is composed by:
1 Recurrent class (indices):
[0 1 2 3 4]
0 Transient class.
The Markov Chain has 0 absorbing state.

m.steady_state

array([0.1696532 , 0.1853406 , 0.19973599, 0.2178871 , 0.22738311])

q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()

m = Markov(q5)

The Markov Chain is reducible and is composed by:
Recurrent classes:
[0 1], [2], [3 4]
Transient classes:
None

m.steady_state

array([[0.50531915, 0.49468085, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.5       , 0.5       ]])

a = Spatial_Markov(rpci, w)

a = FullRank_Markov(rpci)

The Markov Chain is reducible and is composed by:
Recurrent classes:
[0], [1], [2], [3 4], [ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46], [47]
Transient classes:
None

P = np.array([[0,0,0.8,0.2],
             [0,0,0.2,0.8],
             [0.3,0.7,0,0],
             [0.7,0.3,0,0]])

m = qe.MarkovChain(P)

m.recurrent_classes_indices

[array([0, 1, 2, 3])]

m.communication_classes_indices

[array([0, 1, 2, 3])]

m.is_aperiodic

False

m.stationary_distributions

array([[0.25, 0.25, 0.25, 0.25]])

p = np.array([[0.8,0.2],[0.7,0.3]])

list(np.argwhere(p==0.5))

[]

len([])

print(*[], sep=", "

from giddy.ergodic import fmpt, steady_state

fmpt(p)

array([[1.28571429, 5.        ],
       [1.42857143, 4.5       ]])

steady_state(p)

array([0.77777778, 0.22222222])

1/steady_state(p)

array([1.28571429, 4.5       ])

P = np.array([[1, 0,0,0,0],
              [0,1,0,0,0],
              [0, 0,0,0.5, 0.5],
             [0,0.5,0, 0, 0.5],
             [0.5,0, 0, 0.5, 0]])
P

array([[1. , 0. , 0. , 0. , 0. ],
       [0. , 1. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5],
       [0. , 0.5, 0. , 0. , 0.5],
       [0.5, 0. , 0. , 0.5, 0. ]])

np.where?

np.argwhere(P==0.5)

array([[2, 3],
       [2, 4],
       [3, 1],
       [3, 4],
       [4, 0],
       [4, 3]])

np.argwhere(P==1)[:,0]

array([0, 1])

mc = qe.MarkovChain(P)
mc.communication_classes_indices

[array([0]), array([1]), array([3, 4]), array([2])]

mc.recurrent_classes?

mc.num_communication_classes

mc.recurrent_classes_indices

[array([0]), array([1])]

c = [['b','a','a'],['c','c','a'],['c','b','c']]
m = Markov(np.array(c))

The Markov Chain is reducible and is composed by:
Recurrent classes:
[0]
Transient classes:
[1 2]

m.p

array([[1.        , 0.        , 0.        ],
       [0.5       , 0.        , 0.5       ],
       [0.33333333, 0.33333333, 0.33333333]])

c = [['b','a','c'],['c','c','a'],['c','b','c']]
c.extend([['a','a','b'], ['a','b','c']])
c = np.array(c)
m = Markov(c)

The Markov Chain is irreducible and is composed by:
Recurrent classes:
[0 1 2]
Transient classes:
None

m.classes

array(['a', 'b', 'c'], dtype='<U1')

m.communication_classes

[array([0, 1, 2])]

m.classes[m.communication_classes[0]]

array(['a', 'b', 'c'], dtype='<U1')

classes

if mc.is_irreducible:
    print("The Markov Chain is irreducible and is composed by:")
else:
    print("The Markov Chain is reducible and is composed by:")
print("{0} Recurrent classes:".format(mc.num_recurrent_classes))
print(mc.recurrent_classes_indices, sep = ", ")
print("Transient classes:")
if self.transient_classes is None:
    print("None")
else:
    print(*self.transient_classes, sep = ", ")

The Markov Chain is reducible and is composed by:
2 Recurrent classes:
[array([0]), array([1])]
Transient classes:

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-6c26cc7bf27f> in <module>
      6 print(mc.recurrent_classes_indices, sep = ", ")
      7 print("Transient classes:")
----> 8 if self.transient_classes is None:
      9     print("None")
     10 else:

NameError: name 'self' is not defined

all(np.array([0.5,0.5])<1)

True

any(P.sum(axis=1))<1

False

if not len([1]):
    print(2)

mc.num_recurrent_classes

mc.recurrent_classes_indices

[array([0]), array([1])]

set(list(map(list,mc.communication_classes_indices)))

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-31-6b690714732b> in <module>
----> 1 set(list(map(list,mc.communication_classes_indices)))

TypeError: unhashable type: 'list'

mc.communication_classes_indices

[array([0]), array([1]), array([3, 4]), array([2])]

np.array(tuple(np.array([0])))

array([0])

set(list(map(tuple,mc.communication_classes_indices)))

{(0,), (1,), (2,), (3, 4)}

transient = set(list(map(tuple,mc.communication_classes_indices))).difference(
    set(list(map(tuple,mc.recurrent_classes_indices))))

transient = set(list(map(tuple,mc.communication_classes_indices))).difference(
    set(list(map(tuple,mc.recurrent_classes_indices))))

transient

{(2,), (3, 4)}

print(*mc.communication_classes_indices)

[0] [1] [3 4] [2]

print(*[[1],[2]])

[1] [2]

print(*[[1]])

[1]

mc.is_irreducible

print("Markov Chain is composed by:\n"
              "Recurrent classes:")
print(*mc.recurrent_classes_indices, sep = ", ")
print("Transient classes:")
if transient_classes is None:
    print("None")
else:
    print(*mc.communication_classes_indices, sep = ", ")

Markov Chain is composed by:
Recurrent classes:
[0], [1]
Transient classes:
[0], [1], [3 4], [2]

[np.asarray(i) for i in transient]

[array([2]), array([3, 4])]

len(set({1}).difference(set({1})))

recurrent_classes = []
transient_classes = []
for cclass in mc.communication_classes_indices:
    rows = cclass[:, np.newaxis]
    p_temp = P[rows, cclass]
    if all(p_temp.sum(axis=1)<1):
        transient_classes.append(cclass)
    else:
        recurrent_classes.append(cclass)
if not len(transient_classes):
    transient_classes = None
transient_classes = None    
print(recurrent_classes, transient_classes)

[array([0]), array([1])] None

P.flatten()

array([1. , 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0.5, 0.5, 0. , 0.5, 0. , 0. , 0.5, 0.5, 0. , 0. , 0.5, 0. ])

P2 = np.array([[0.2, 0.8,0,0,0],
              [0.4,0.6,0,0,0],
              [0, 0,0,0.5, 0.5],
             [0,0.5,0, 0, 0.5],
             [0.5,0, 0, 0.5, 0]])
P2

array([[0.2, 0.8, 0. , 0. , 0. ],
       [0.4, 0.6, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5],
       [0. , 0.5, 0. , 0. , 0.5],
       [0.5, 0. , 0. , 0.5, 0. ]])

mc2 = qe.MarkovChain(P2)
mc2.recurrent_classes_indices

[array([0, 1])]

rec_classes = mc2.recurrent_classes_indices
for i, rec_class in enumerate(rec_classes):
    P_rec_class = P2[np.ix_(rec_class, rec_class)]
    print(P_rec_class)

[[0.2 0.8]
 [0.4 0.6]]

mc = qe.MarkovChain(P)

mc_graph = mc.digraph

mc_graph.num_sink_strongly_connected_components

mc.recurrent_classes_indices

[array([0]), array([1])]

mc.communication_classes_indices

[array([0]), array([1]), array([3, 4]), array([2])]

rec_classes = mc.recurrent_classes_indices
rec_classes

[array([0]), array([1])]

for i, rec_class in enumerate(rec_classes):
    P_rec_class = P[np.ix_(rec_class, rec_class)]
    print(P_rec_class)

[[1.]]
[[1.]]

recurrent_indices = []
for i in mc.recurrent_classes_indices:
    recurrent_indices.extend(list(i))
recurrent_indices

[0, 1]

k = len(P)
transient_indices = np.array(list(set(range(k)).difference(set(recurrent_indices))))
recurrent_indices = np.array(recurrent_indices)
transient_indices

array([2, 3, 4])

# rows = 
Q = P[transient_indices[:, np.newaxis], transient_indices]
S = P[recurrent_indices[:, np.newaxis], recurrent_indices]
R = P[transient_indices[:, np.newaxis], recurrent_indices]
print(Q,S,R)

[[0.  0.5 0.5]
 [0.  0.  0.5]
 [0.  0.5 0. ]] [[1. 0.]
 [0. 1.]] [[0.  0. ]
 [0.  0.5]
 [0.5 0. ]]

def expected_steps_fast(Q):
    I = np.identity(Q.shape[0])
    o = np.ones(Q.shape[0])
    return(np.linalg.solve(I-Q, o))

expected_steps_fast(Q)

array([3., 2., 2.])

the fundamental matrix N:

$n_{ij}$:the mean of the total number of times the process is in transient state j given it started in transient state i

# norm_Q = Q/Q.sum(axis=1)
I = np.identity(len(Q))
N = inv(I-Q)
N

array([[1.        , 1.        , 1.        ],
       [0.        , 1.33333333, 0.66666667],
       [0.        , 0.66666667, 1.33333333]])

I-Q

array([[ 1. , -0.5, -0.5],
       [ 0. ,  1. , -0.5],
       [ 0. , -0.5,  1. ]])

Q = np.array([[0, 0.5, 0.5],
             [0, 0, 0.5],
             [0, 0.5, 0]])
Q

array([[0. , 0.5, 0.5],
       [0. , 0. , 0.5],
       [0. , 0.5, 0. ]])

I = np.identity(len(Q))
I

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

from numpy.linalg import inv
N = inv(I-Q)
N

array([[1.        , 1.        , 1.        ],
       [0.        , 1.33333333, 0.66666667],
       [0.        , 0.66666667, 1.33333333]])

Q = np.array([[0, 0.5],
             [0.5, 0]])
I = np.identity(len(Q))
N = inv(I-Q)
N

array([[1.33333333, 0.66666667],
       [0.66666667, 1.33333333]])

sm = Spatial_Markov(rpci, w, fixed=True, k=5, m=5)

sm.F[0]

array([[  2.29835259,  28.95614035,  46.14285714,  80.80952381,
        279.42857143],
       [ 33.86549708,   3.79459555,  22.57142857,  57.23809524,
        255.85714286],
       [ 43.60233918,   9.73684211,   4.91085714,  34.66666667,
        233.28571429],
       [ 46.62865497,  12.76315789,   6.25714286,  14.61564626,
        198.61904762],
       [ 52.62865497,  18.76315789,  12.25714286,   6.        ,
         34.1031746 ]])

cc = np.array([0.8, 0.9, 1, 1.2])
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc)

sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc, fill_diag=True)

sm.S[2]

array([0.03607655, 0.22667277, 0.25883041, 0.43607249, 0.04234777])

a = np.random.random((2,3))
a

array([[0.19044059, 0.29836464, 0.50387978],
       [0.38069295, 0.8113362 , 0.61993731]])

b = np.random.random((3,))
b

array([0.66913524, 0.81527363, 0.80146708])

np.array([a,b])

array([array([[0.19044059, 0.29836464, 0.50387978],
       [0.38069295, 0.8113362 , 0.61993731]]),
       array([0.66913524, 0.81527363, 0.80146708])], dtype=object)

list([a,b])

[array([[0.19044059, 0.29836464, 0.50387978],
        [0.38069295, 0.8113362 , 0.61993731]]),
 array([0.66913524, 0.81527363, 0.80146708])]

a.dtype

dtype('float64')

if np.array(list([a,b])).dtype is np.dtype('O'):
    print(0)

if a.dtype is np.dtype('O'):
    print(0)

np.array(list([a,b])).dtype

np.array(list([a,b])).dtype

dtype('O')

sm.P

array([[[0.96703297, 0.03296703, 0.        , 0.        , 0.        ],
        [0.10638298, 0.68085106, 0.21276596, 0.        , 0.        ],
        [0.        , 0.14285714, 0.7755102 , 0.08163265, 0.        ],
        [0.        , 0.        , 0.5       , 0.5       , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 1.        ]],

       [[0.88636364, 0.10606061, 0.00757576, 0.        , 0.        ],
        [0.04402516, 0.89308176, 0.06289308, 0.        , 0.        ],
        [0.        , 0.05882353, 0.8627451 , 0.07843137, 0.        ],
        [0.        , 0.        , 0.13846154, 0.86153846, 0.        ],
        [0.        , 0.        , 0.        , 0.        , 1.        ]],

       [[0.78082192, 0.17808219, 0.02739726, 0.01369863, 0.        ],
        [0.03488372, 0.90406977, 0.05813953, 0.00290698, 0.        ],
        [0.        , 0.05919003, 0.84735202, 0.09034268, 0.00311526],
        [0.        , 0.        , 0.05811623, 0.92985972, 0.01202405],
        [0.        , 0.        , 0.        , 0.14285714, 0.85714286]],

       [[0.82692308, 0.15384615, 0.        , 0.01923077, 0.        ],
        [0.0703125 , 0.7890625 , 0.125     , 0.015625  , 0.        ],
        [0.00295858, 0.06213018, 0.82248521, 0.10946746, 0.00295858],
        [0.        , 0.00185529, 0.07606679, 0.88497217, 0.03710575],
        [0.        , 0.        , 0.        , 0.07803468, 0.92196532]],

       [[1.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 1.        , 0.        , 0.        , 0.        ],
        [0.        , 0.06666667, 0.9       , 0.03333333, 0.        ],
        [0.        , 0.        , 0.05660377, 0.90566038, 0.03773585],
        [0.        , 0.        , 0.        , 0.03932584, 0.96067416]]])

f = ps.io.open(ps.examples.get_path('usjoin.csv'))
pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)

m.steady_state

array([0.20774716, 0.18725774, 0.20740537, 0.18821787, 0.20937187])

m.fmpt

array([[  4.81354357,  11.50292712,  29.60921231,  53.38594954,
        103.59816743],
       [ 42.04774505,   5.34023324,  18.74455332,  42.50023268,
         92.71316899],
       [ 69.25849753,  27.21075248,   4.82147603,  25.27184624,
         75.43305672],
       [ 84.90689329,  42.85914824,  17.18082642,   5.31299186,
         51.60953369],
       [ 98.41295543,  56.36521038,  30.66046735,  14.21158356,
          4.77619083]])

m.sojourn_time

array([11.125     ,  4.65806452,  4.73372781,  4.95172414, 13.77586207])

sojourn_time(m.p)

array([11.125     ,  4.65806452,  4.73372781,  4.95172414, 13.77586207])

q5

array([[0, 0, 0, ..., 0, 0, 0],
       [2, 2, 2, ..., 1, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [3, 3, 2, ..., 2, 2, 2],
       [3, 3, 3, ..., 4, 4, 4]])

q5[:,10]

array([0, 2, 0, 4, 2, 4, 4, 2, 0, 1, 4, 3, 2, 1, 0, 1, 2, 4, 4, 3, 2, 0,
       2, 3, 1, 4, 3, 4, 1, 4, 0, 0, 3, 1, 3, 3, 4, 0, 0, 0, 1, 2, 2, 1,
       3, 1, 2, 3])

m2 = Markov(q5[:,10:12])
m2.transitions

array([[10.,  0.,  0.,  0.,  0.],
       [ 0.,  9.,  0.,  0.,  0.],
       [ 0.,  0., 10.,  0.,  0.],
       [ 0.,  0.,  0.,  9.,  0.],
       [ 0.,  0.,  0.,  0., 10.]])

m2 = Markov(q5[:,:2])
m2.p

array([[1.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.88888889, 0.11111111, 0.        , 0.        ],
       [0.        , 0.09090909, 0.81818182, 0.09090909, 0.        ],
       [0.        , 0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 0.        , 0.1       , 0.9       ]])

np.inf

inf

p = m2.p
pii = p.diagonal()

pii

array([1.        , 0.88888889, 0.81818182, 1.        , 0.9       ])

np.where(pii==1)[0]

array([0, 3])

np.full(len(pii), np.inf)

array([inf, inf, inf, inf, inf])

def sojourn_time(p):
    """
    Calculate sojourn time based on a given transition probability matrix.

    """
    p = np.asarray(p)
    pii = p.diagonal()

    if not (1 - pii).all():
        absorbing_states = np.where(pii==1)[0]
        non_absorbing_states = np.where(pii!=1)[0]
        st = np.full(len(pii), np.inf)
        print("Sojourn times are infinite for absorbing states! In this Markov Chain, states {} are absorbing states.".format(list(absorbing_states)))
        st[non_absorbing_states] = 1 / (1 - pii[non_absorbing_states])
    else:
        st = 1 / (1 - pii)
    return st

p2 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 1]])

sojourn_time(p2)

Sojourn times are infinite for absorbing states! In this Markov Chain, states [2] are absorbing states.

array([2.        , 3.33333333,        inf])

sojourn_time(p)

Sojourn times are infinite for absorbing states! In this Markov Chain, states [0, 3] are absorbing states.

array([ inf,  9. ,  5.5,  inf, 10. ])

m2.fmpt

array([[1.        ,        inf,        inf,        inf,        inf],
       [       inf, 1.78305684, 8.65447755,        inf,        inf],
       [       inf, 7.393034  , 2.2770465 ,        inf,        inf],
       [       inf,        inf,        inf, 1.        ,        inf],
       [       inf,        inf,        inf,        inf, 1.        ]])

m2.steady_state

array([[1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.]])

m2.transitions

array([[10.,  0.,  0.,  0.,  0.],
       [ 0.,  8.,  1.,  0.,  0.],
       [ 0.,  1.,  9.,  1.,  0.],
       [ 0.,  0.,  0.,  8.,  0.],
       [ 0.,  0.,  0.,  1.,  9.]])

p = np.array([[.5, .25, .25], [.5, 0, .5], [.25, .25, .5]])
p

array([[0.5 , 0.25, 0.25],
       [0.5 , 0.  , 0.5 ],
       [0.25, 0.25, 0.5 ]])

p2 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 1]])
p2

array([[0.5, 0.5, 0. ],
       [0.3, 0.7, 0. ],
       [0. , 0. , 1. ]])

p3 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 0]])
p3

array([[0.5, 0.5, 0. ],
       [0.3, 0.7, 0. ],
       [0. , 0. , 0. ]])

p23 = np.array([[[.5, .5, 0], [.3, .7, 0], [0, 0, 0]],
                [[0, 0, 0], [.3, .7, 0], [0, 0, 0]]])
p23

array([[[0.5, 0.5, 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 0. ]],

       [[0. , 0. , 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 0. ]]])

util.fill_diag2(p23)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-af60868f6222> in <module>
----> 1 util.fill_diag2(p23)

~/Google Drive/python_repos/pysal-refactor/giddy/giddy/util.py in fill_diag2(p)
    102     p_temp = np.asarray(p)
    103     if len(p_temp.shape) == 3:
--> 104         raise ValueError('Please use function `fill_diag3` for '
    105                          'a three-dimensional matrix!')
    106     p0 = (p_temp.sum(axis=1) == 0)

ValueError: Please use function `fill_diag3` for a three-dimensional matrix!

util.fill_diag3(p23)

array([[[0.5, 0.5, 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 1. ]],

       [[1. , 0. , 0. ],
        [0.3, 0.7, 0. ],
        [0. , 0. , 1. ]]])

util.fill_diag2(p3)

array([[0.5, 0.5, 0. ],
       [0.3, 0.7, 0. ],
       [0. , 0. , 1. ]])

ergodic.steady_state_general(p2)

array([[0.375, 0.625, 0.   ],
       [0.   , 0.   , 1.   ]])

ergodic.steady_state(p2)

array([0.5, 0.5, 0. ])

np.inf

inf

ergodic.fmpt_general(p2)

array([[2.66666667, 2.        ,        inf],
       [3.33333333, 1.6       ,        inf],
       [       inf,        inf, 1.        ]])

ergodic.fmpt_general(p)

array([[2.5       , 4.        , 3.33333333],
       [2.66666667, 5.        , 2.66666667],
       [3.33333333, 4.        , 2.5       ]])

f = libpysal.io.open(libpysal.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
pci = pci.transpose()
rpci = pci/(pci.mean(axis=0))
w = libpysal.io.open(libpysal.examples.get_path("states48.gal")).read()
w.transform = 'r'
cc = np.array([0.8, 0.9, 1, 1.2])
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc, variable_name='rpci')
sm.cutoffs

array([0.8, 0.9, 1. , 1.2])

sm.T

array([[[264.,   9.,   0.,   0.,   0.],
        [  5.,  32.,  10.,   0.,   0.],
        [  0.,   7.,  38.,   4.,   0.],
        [  0.,   0.,   3.,   3.,   0.],
        [  0.,   0.,   0.,   0.,   0.]],

       [[117.,  14.,   1.,   0.,   0.],
        [  7., 142.,  10.,   0.,   0.],
        [  0.,   6.,  88.,   8.,   0.],
        [  0.,   0.,   9.,  56.,   0.],
        [  0.,   0.,   0.,   0.,   4.]],

       [[ 57.,  13.,   2.,   1.,   0.],
        [ 12., 311.,  20.,   1.,   0.],
        [  0.,  19., 272.,  29.,   1.],
        [  0.,   0.,  29., 464.,   6.],
        [  0.,   0.,   0.,   7.,  42.]],

       [[ 43.,   8.,   0.,   1.,   0.],
        [  9., 101.,  16.,   2.,   0.],
        [  1.,  21., 278.,  37.,   1.],
        [  0.,   1.,  41., 477.,  20.],
        [  0.,   0.,   0.,  27., 319.]],

       [[  0.,   0.,   0.,   0.,   0.],
        [  0.,   0.,   0.,   0.,   0.],
        [  0.,   2.,  27.,   1.,   0.],
        [  0.,   0.,   6.,  96.,   4.],
        [  0.,   0.,   0.,   7., 171.]]])

p_adjust = sm.T

(p_adjust.sum(axis=2) == 0).sum()

def fill_diagonal2(p):
    """

    :param p:
    :return:
    """
    p = np.asarray(p)
    if len(p.shape) == 3:
        raise ValueError('Please use function `fill_diagonal3` for '
                         'three-dimensional matrix!')
    p0 = (p.sum(axis=1) == 0)
    if not p0.sum():
        row_zero_i = np.where(p0)
        for row in row_zero_i:
            p[row, row] = 1
    return p

def fill_diagonal3(p):
    p = np.asarray(p)
    p0 = (p.sum(axis=2) == 0)
    if not p0.sum():
        rows, cols = np.where(p0)
        row_zero_i = list(zip(rows, cols))
        for row in row_zero_i:
            i, j = row
            p[i, j, j] = 1
    return p

fill_diagonal2(sm.T[4])

array([[  1.,   0.,   0.,   0.,   0.],
       [  0.,   1.,   0.,   0.,   0.],
       [  0.,   2.,  27.,   1.,   0.],
       [  0.,   0.,   6.,  96.,   4.],
       [  0.,   0.,   0.,   7., 171.]])

b=2
a=b

a = 3

fill_diagonal2(sm.T)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-0b8c6969112b> in <module>
----> 1 fill_diagonal2(sm.T)

<ipython-input-29-1d3742ef6fc2> in fill_diagonal2(p)
      7     p = np.asarray(p)
      8     if len(p.shape) == 3:
----> 9         raise ValueError('Please use function `fill_diagonal3` for '
     10                          'three-dimensional matrix!')
     11     p0 = (p.sum(axis=1) == 0)

ValueError: Please use function `fill_diagonal3` for three-dimensional matrix!

rows, cols = np.where(p_adjust.sum(axis=2) == 0)
row_zero_i = list(zip(rows, cols))
row_zero_i

[(0, 4), (4, 0), (4, 1)]

for row in row_zero_i:
    i, j = row
    p_adjust[i, j, j] = 1
p_adjust

array([[[264.,   9.,   0.,   0.,   0.],
        [  5.,  32.,  10.,   0.,   0.],
        [  0.,   7.,  38.,   4.,   0.],
        [  0.,   0.,   3.,   3.,   0.],
        [  0.,   0.,   0.,   0.,   1.]],

       [[117.,  14.,   1.,   0.,   0.],
        [  7., 142.,  10.,   0.,   0.],
        [  0.,   6.,  88.,   8.,   0.],
        [  0.,   0.,   9.,  56.,   0.],
        [  0.,   0.,   0.,   0.,   4.]],

       [[ 57.,  13.,   2.,   1.,   0.],
        [ 12., 311.,  20.,   1.,   0.],
        [  0.,  19., 272.,  29.,   1.],
        [  0.,   0.,  29., 464.,   6.],
        [  0.,   0.,   0.,   7.,  42.]],

       [[ 43.,   8.,   0.,   1.,   0.],
        [  9., 101.,  16.,   2.,   0.],
        [  1.,  21., 278.,  37.,   1.],
        [  0.,   1.,  41., 477.,  20.],
        [  0.,   0.,   0.,  27., 319.]],

       [[  1.,   0.,   0.,   0.,   0.],
        [  0.,   1.,   0.,   0.,   0.],
        [  0.,   2.,  27.,   1.,   0.],
        [  0.,   0.,   6.,  96.,   4.],
        [  0.,   0.,   0.,   7., 171.]]])

np.where(p_adjust.sum(axis=2) == 0)

(array([0, 4, 4]), array([4, 0, 1]))

if

array([[273.,  47.,  49.,   6.,   0.],
       [132., 159., 102.,  65.,   4.],
       [ 73., 344., 321., 499.,  49.],
       [ 52., 128., 338., 539., 346.],
       [  0.,   0.,  30., 106., 178.]])

if p_adjust.sum(axis=1).all() != 1:
    row_sum = p_adjust.sum(axis=1)
    indexn0 = np.where(row_sum != 0)[0]
    p_diag = np.eye(k)
    p_diag[indexn0] = p_adjust[indexn0]
    p_adjust = p_diag

p=np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])

array([[0.5 , 0.25, 0.25],
       [0.5 , 0.  , 0.5 ],
       [0.25, 0.25, 0.5 ]])

p=np.array([[.5, .5, 0],[.2,0.8,0],[0,0,1]])
p

array([[0.5, 0.5, 0. ],
       [0.2, 0.8, 0. ],
       [0. , 0. , 1. ]])

import quantecon as qe

mc = qe.MarkovChain(p)

mc.num_recurrent_classes

mc.n

fmpt_general(p)

array([[3.5, 2. , inf],
       [5. , 1.4, inf],
       [inf, inf, 1. ]])

from giddy.ergodic import fmpt
def fmpt_general(P):
    """
    Calculates the matrix of first mean passage times for an ergodic/non-ergodic
    transition probability matrix.

    Parameters
    ----------
    P       : array
             (k, k), an ergodic/non-ergodic Markov transition probability
             matrix.

    Returns
    -------
    fmpt_all: array
              (k, k), elements are the expected value for the number of
              intervals required for a chain starting in state i to first
              enter state j. If i=j then this is the recurrence time.

    Examples
    --------
    >>> import numpy as np
    >>> from giddy.ergodic import fmpt_general

    # irreducible Markov chain
    >>> p = np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
    >>> fm = fmpt_general(p)
    >>> fm
    array([[2.5       , 4.        , 3.33333333],
           [2.66666667, 5.        , 2.66666667],
           [3.33333333, 4.        , 2.5       ]])

    Thus, if it is raining today in Oz we can expect a nice day to come
    along in another 4 days, on average, and snow to hit in 3.33 days. We can
    expect another rainy day in 2.5 days. If it is nice today in Oz, we would
    experience a change in the weather (either rain or snow) in 2.67 days from
    today. (That wicked witch can only die once so I reckon that is the
    ultimate absorbing state).

    # reducible Markov chain
    >>> p = np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
    >>> fm = fmpt_general(p)
    >>> fm
    array([[2.5       , 4.        , 3.33333333],
           [2.66666667, 5.        , 2.66666667],
           [3.33333333, 4.        , 2.5       ]])


    """

    P = np.asarray(P)
    mc = qe.MarkovChain(P)
    num_classes = mc.num_communication_classes
    if num_classes == 1:
        fmpt_all = fmpt(P)
        return fmpt_all
    else:
        i_cclasses = mc.communication_classes_indices
        fmpt_all = np.full((mc.n, mc.n), np.inf)
        for cclass in i_cclasses:
            rows = cclass[:, np.newaxis]
            p_temp = P[rows, cclass]
            fmpt_all[rows, cclass] = fmpt(p_temp)
        return fmpt_all

mc.stationary_distributions

array([[0.28571429, 0.71428571, 0.        ],
       [0.        , 0.        , 1.        ]])

Full Rank Markov

import libpysal
f = libpysal.io.open(libpysal.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
m.p

mc = qe.MarkovChain(m.p)

mc.stationary_distributions[0]

mc.stationary_distributions[0].shape

pci

from giddy.markov import FullRank_Markov, Markov
import mapclassify as mc

income_table = pd.read_csv(ps.examples.get_path("usjoin.csv"))
pci = income_table[list(map(str,range(1929,2010)))].values
# q5 = np.array([mc.Quantiles(y).yb for y in pci.T]).transpose()
# m = Markov(q5)
# m.p

pci

m = Markov(q5)
m.p

income_table.head()

Let's only select per capical incomes in years 1949-1960

pci = income_table[list(map(str,range(1990,2010)))].values
pci

m = FullRank_Markov(pci)
m.ranks

m.transitions

Full rank Markov transition probability matrix

m.p

np.where(m.p == 1)

The probability of staying in rank 48 is $100\%$. Thus, this rank is not communicating with other ranks, meaning that if a US State currently ranks 48th in per capita income, it will always rank 48th in the future. In this case, there is not going to be a unique steady state distribution towards which the Markov Chain converges. Instead, the future distribution will be dependent on whether the system is traped in rank 48. And also because of this, the estimation of the first mean passage time becomes more complicated. Currently, giddy does not have a mechanism to deal with it.

The current implementation for estimating the steady state distribution in giddy ignores the possibility of being trapped in rank 48 and gives the following distribution.

The estimation of the first mean passage times will incur an error shown as follows:

m.fmpt

At the moment (due to the time limit for submitting the paper), I will suggest to use an external package `quantecon` to check whether the Markov Chain is ergodic. If not, we give some explanation and not calculating or ploting the first mean passage times.

I will refactor `giddy` in the meantime to incorporate the non-ergodic chains when estimating steady state distributions and first mean passage times.

Use `quantecon` to check whether the Markov Chain is ergodic and which Markov states are not communicating with others

(1) check whether the Markov Chain is ergodic

mc.is_irreducible and mc.is_aperiodic

qe.MarkovChain?

import quantecon as qe
mc = qe.MarkovChain(m.p)

mc.is_irreducible and mc.is_aperiodic

mc.num_communication_classes?

mc.num_communication_classes

mc.communication_classes_indices

k = len(m.p)

fmpt_k = np.full((k, k), np.inf)

cclass = mc.communication_classes_indices[1]

cclass[:, np.newaxis]

rows = cclass[:, np.newaxis]
m.p[rows,cclass]

p_temp = m.p[cclass,:][:,cclass]
p_temp

fmpt_k

fmpt_k.flat[[1,2]]

fmpt_temp = fmpt(p_temp)
fmpt_temp

np.pad?

np.putmask?

fmpt_k[cclass,:]

fmpt_temp

b = fmpt_k[cclass,:]
b[:,cclass] = fmpt_temp

fmpt_k[cclass,:][:,cclass]

np.put(fmpt_k, cclass, fmpt_temp)

np.put?

fmpt_k[cclass,:][:,cclass]

fmpt_k[cclass,:][:,cclass] = fmpt_temp
fmpt_k

for cclass in mc.communication_classes_indices:
    rows = cclass[:, np.newaxis]
    p_temp = m.p[rows,cclass]
#     p_temp = m.p[cclass,:][:,cclass]
    fmpt_temp = fmpt(p_temp)
    print(cclass, fmpt_temp)
    fmpt_k[rows,cclass] = fmpt_temp

fmpt_k

fmpt_k

mc.communication_classes_indices[1]

m.p[list(mc.communication_classes_indices[1]),:][:,list(mc.communication_classes_indices[1])]

from giddy.ergodic import fmpt

a = np.zeros((2,2))
a[0] = np.Infinity
a

np.full((2, 2), np.inf)

fmpt(m.p[list(mc.communication_classes_indices[1]),:][:,list(mc.communication_classes_indices[1])])

k = len(m.p)

mc.stationary_distributions

mc.is_irreducible

mc.is_aperiodic

(2) check which Markov states are not communicating with others

mc.num_communication_classes

mc.communication_classes

We have two sets of Markov states which are not communicating with each other

mc.num_communication_classes

The first set contains all the Markov states (rank 1 to 47) and the second set contains only 1 Markov state (rank 48)

mc.communication_classes

from giddy.ergodic import steady_state as STEADY_STATE
if mc.num_communication_classes==1:
    STEADY_STATE(mp.)