Full Rank Markov and Geographic Rank Markov
Author: Wei Kang weikang9009@gmail.com
import libpysal as ps
import libpysal
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd
import geopandas as gpd
from giddy.markov import Spatial_Markov, Markov, FullRank_Markov
from giddy import ergodic, util
import mapclassify as mc
import quantecon as qe
f = ps.io.open(ps.examples.get_path('usjoin.csv'))
pci = np.array([f.by_col[str(y)] for y in range(2000, 2010)])
pci = pci.transpose()
rpci = pci / (pci.mean(axis=0))
discretized = (rpci * 100).astype(int) % 4
w = ps.io.open(ps.examples.get_path("states48.gal")).read()
w.transform = 'r'
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
# rpci = pci/(pci.mean(axis=0))
rq = mc.Quantiles(pci.flatten()).yb.reshape(pci.shape)
m = Markov(rq)
m.steady_state
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
m.steady_state
a = Spatial_Markov(rpci, w)
a = FullRank_Markov(rpci)
P = np.array([[0,0,0.8,0.2],
[0,0,0.2,0.8],
[0.3,0.7,0,0],
[0.7,0.3,0,0]])
m = qe.MarkovChain(P)
m.recurrent_classes_indices
m.communication_classes_indices
m.is_aperiodic
m.stationary_distributions
p = np.array([[0.8,0.2],[0.7,0.3]])
list(np.argwhere(p==0.5))
len([])
print(*[], sep=", "
from giddy.ergodic import fmpt, steady_state
fmpt(p)
steady_state(p)
1/steady_state(p)
P = np.array([[1, 0,0,0,0],
[0,1,0,0,0],
[0, 0,0,0.5, 0.5],
[0,0.5,0, 0, 0.5],
[0.5,0, 0, 0.5, 0]])
P
np.where?
np.argwhere(P==0.5)
np.argwhere(P==1)[:,0]
mc = qe.MarkovChain(P)
mc.communication_classes_indices
mc.recurrent_classes?
mc.num_communication_classes
mc.recurrent_classes_indices
c = [['b','a','a'],['c','c','a'],['c','b','c']]
m = Markov(np.array(c))
m.p
c = [['b','a','c'],['c','c','a'],['c','b','c']]
c.extend([['a','a','b'], ['a','b','c']])
c = np.array(c)
m = Markov(c)
m.classes
m.communication_classes
m.classes[m.communication_classes[0]]
classes
if mc.is_irreducible:
print("The Markov Chain is irreducible and is composed by:")
else:
print("The Markov Chain is reducible and is composed by:")
print("{0} Recurrent classes:".format(mc.num_recurrent_classes))
print(mc.recurrent_classes_indices, sep = ", ")
print("Transient classes:")
if self.transient_classes is None:
print("None")
else:
print(*self.transient_classes, sep = ", ")
all(np.array([0.5,0.5])<1)
any(P.sum(axis=1))<1
if not len([1]):
print(2)
mc.num_recurrent_classes
mc.recurrent_classes_indices
set(list(map(list,mc.communication_classes_indices)))
mc.communication_classes_indices
np.array(tuple(np.array([0])))
set(list(map(tuple,mc.communication_classes_indices)))
transient = set(list(map(tuple,mc.communication_classes_indices))).difference(
set(list(map(tuple,mc.recurrent_classes_indices))))
transient = set(list(map(tuple,mc.communication_classes_indices))).difference(
set(list(map(tuple,mc.recurrent_classes_indices))))
transient
print(*mc.communication_classes_indices)
print(*[[1],[2]])
print(*[[1]])
mc.is_irreducible
print("Markov Chain is composed by:\n"
"Recurrent classes:")
print(*mc.recurrent_classes_indices, sep = ", ")
print("Transient classes:")
if transient_classes is None:
print("None")
else:
print(*mc.communication_classes_indices, sep = ", ")
[np.asarray(i) for i in transient]
len(set({1}).difference(set({1})))
recurrent_classes = []
transient_classes = []
for cclass in mc.communication_classes_indices:
rows = cclass[:, np.newaxis]
p_temp = P[rows, cclass]
if all(p_temp.sum(axis=1)<1):
transient_classes.append(cclass)
else:
recurrent_classes.append(cclass)
if not len(transient_classes):
transient_classes = None
transient_classes = None
print(recurrent_classes, transient_classes)
P.flatten()
P2 = np.array([[0.2, 0.8,0,0,0],
[0.4,0.6,0,0,0],
[0, 0,0,0.5, 0.5],
[0,0.5,0, 0, 0.5],
[0.5,0, 0, 0.5, 0]])
P2
mc2 = qe.MarkovChain(P2)
mc2.recurrent_classes_indices
rec_classes = mc2.recurrent_classes_indices
for i, rec_class in enumerate(rec_classes):
P_rec_class = P2[np.ix_(rec_class, rec_class)]
print(P_rec_class)
mc = qe.MarkovChain(P)
mc_graph = mc.digraph
mc_graph.num_sink_strongly_connected_components
mc.recurrent_classes_indices
mc.communication_classes_indices
rec_classes = mc.recurrent_classes_indices
rec_classes
for i, rec_class in enumerate(rec_classes):
P_rec_class = P[np.ix_(rec_class, rec_class)]
print(P_rec_class)
recurrent_indices = []
for i in mc.recurrent_classes_indices:
recurrent_indices.extend(list(i))
recurrent_indices
k = len(P)
transient_indices = np.array(list(set(range(k)).difference(set(recurrent_indices))))
recurrent_indices = np.array(recurrent_indices)
transient_indices
# rows =
Q = P[transient_indices[:, np.newaxis], transient_indices]
S = P[recurrent_indices[:, np.newaxis], recurrent_indices]
R = P[transient_indices[:, np.newaxis], recurrent_indices]
print(Q,S,R)
def expected_steps_fast(Q):
I = np.identity(Q.shape[0])
o = np.ones(Q.shape[0])
return(np.linalg.solve(I-Q, o))
expected_steps_fast(Q)
# norm_Q = Q/Q.sum(axis=1)
I = np.identity(len(Q))
N = inv(I-Q)
N
I-Q
Q = np.array([[0, 0.5, 0.5],
[0, 0, 0.5],
[0, 0.5, 0]])
Q
I = np.identity(len(Q))
I
from numpy.linalg import inv
N = inv(I-Q)
N
Q = np.array([[0, 0.5],
[0.5, 0]])
I = np.identity(len(Q))
N = inv(I-Q)
N
sm = Spatial_Markov(rpci, w, fixed=True, k=5, m=5)
sm.F[0]
cc = np.array([0.8, 0.9, 1, 1.2])
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc)
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc, fill_diag=True)
sm.S[2]
a = np.random.random((2,3))
a
b = np.random.random((3,))
b
np.array([a,b])
list([a,b])
a.dtype
if np.array(list([a,b])).dtype is np.dtype('O'):
print(0)
if a.dtype is np.dtype('O'):
print(0)
np.array(list([a,b])).dtype
np.array(list([a,b])).dtype
sm.P
f = ps.io.open(ps.examples.get_path('usjoin.csv'))
pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
m.steady_state
m.fmpt
m.sojourn_time
sojourn_time(m.p)
q5
q5[:,10]
m2 = Markov(q5[:,10:12])
m2.transitions
m2 = Markov(q5[:,:2])
m2.p
np.inf
p = m2.p
pii = p.diagonal()
pii
np.where(pii==1)[0]
np.full(len(pii), np.inf)
def sojourn_time(p):
"""
Calculate sojourn time based on a given transition probability matrix.
"""
p = np.asarray(p)
pii = p.diagonal()
if not (1 - pii).all():
absorbing_states = np.where(pii==1)[0]
non_absorbing_states = np.where(pii!=1)[0]
st = np.full(len(pii), np.inf)
print("Sojourn times are infinite for absorbing states! In this Markov Chain, states {} are absorbing states.".format(list(absorbing_states)))
st[non_absorbing_states] = 1 / (1 - pii[non_absorbing_states])
else:
st = 1 / (1 - pii)
return st
p2 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 1]])
sojourn_time(p2)
sojourn_time(p)
m2.fmpt
m2.steady_state
m2.transitions
p = np.array([[.5, .25, .25], [.5, 0, .5], [.25, .25, .5]])
p
p2 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 1]])
p2
p3 = np.array([[.5, .5, 0], [.3, .7, 0], [0, 0, 0]])
p3
p23 = np.array([[[.5, .5, 0], [.3, .7, 0], [0, 0, 0]],
[[0, 0, 0], [.3, .7, 0], [0, 0, 0]]])
p23
util.fill_diag2(p23)
util.fill_diag3(p23)
util.fill_diag2(p3)
ergodic.steady_state_general(p2)
ergodic.steady_state(p2)
np.inf
ergodic.fmpt_general(p2)
ergodic.fmpt_general(p)
f = libpysal.io.open(libpysal.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
pci = pci.transpose()
rpci = pci/(pci.mean(axis=0))
w = libpysal.io.open(libpysal.examples.get_path("states48.gal")).read()
w.transform = 'r'
cc = np.array([0.8, 0.9, 1, 1.2])
sm = Spatial_Markov(rpci, w, cutoffs=cc, lag_cutoffs=cc, variable_name='rpci')
sm.cutoffs
sm.T
p_adjust = sm.T
(p_adjust.sum(axis=2) == 0).sum()
def fill_diagonal2(p):
"""
:param p:
:return:
"""
p = np.asarray(p)
if len(p.shape) == 3:
raise ValueError('Please use function `fill_diagonal3` for '
'three-dimensional matrix!')
p0 = (p.sum(axis=1) == 0)
if not p0.sum():
row_zero_i = np.where(p0)
for row in row_zero_i:
p[row, row] = 1
return p
def fill_diagonal3(p):
p = np.asarray(p)
p0 = (p.sum(axis=2) == 0)
if not p0.sum():
rows, cols = np.where(p0)
row_zero_i = list(zip(rows, cols))
for row in row_zero_i:
i, j = row
p[i, j, j] = 1
return p
fill_diagonal2(sm.T[4])
b=2
a=b
a = 3
b
fill_diagonal2(sm.T)
rows, cols = np.where(p_adjust.sum(axis=2) == 0)
row_zero_i = list(zip(rows, cols))
row_zero_i
for row in row_zero_i:
i, j = row
p_adjust[i, j, j] = 1
p_adjust
np.where(p_adjust.sum(axis=2) == 0)
if
if p_adjust.sum(axis=1).all() != 1:
row_sum = p_adjust.sum(axis=1)
indexn0 = np.where(row_sum != 0)[0]
p_diag = np.eye(k)
p_diag[indexn0] = p_adjust[indexn0]
p_adjust = p_diag
p=np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
p
p=np.array([[.5, .5, 0],[.2,0.8,0],[0,0,1]])
p
import quantecon as qe
mc = qe.MarkovChain(p)
mc.num_recurrent_classes
mc.n
fmpt_general(p)
from giddy.ergodic import fmpt
def fmpt_general(P):
"""
Calculates the matrix of first mean passage times for an ergodic/non-ergodic
transition probability matrix.
Parameters
----------
P : array
(k, k), an ergodic/non-ergodic Markov transition probability
matrix.
Returns
-------
fmpt_all: array
(k, k), elements are the expected value for the number of
intervals required for a chain starting in state i to first
enter state j. If i=j then this is the recurrence time.
Examples
--------
>>> import numpy as np
>>> from giddy.ergodic import fmpt_general
# irreducible Markov chain
>>> p = np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
>>> fm = fmpt_general(p)
>>> fm
array([[2.5 , 4. , 3.33333333],
[2.66666667, 5. , 2.66666667],
[3.33333333, 4. , 2.5 ]])
Thus, if it is raining today in Oz we can expect a nice day to come
along in another 4 days, on average, and snow to hit in 3.33 days. We can
expect another rainy day in 2.5 days. If it is nice today in Oz, we would
experience a change in the weather (either rain or snow) in 2.67 days from
today. (That wicked witch can only die once so I reckon that is the
ultimate absorbing state).
# reducible Markov chain
>>> p = np.array([[.5, .25, .25],[.5,0,.5],[.25,.25,.5]])
>>> fm = fmpt_general(p)
>>> fm
array([[2.5 , 4. , 3.33333333],
[2.66666667, 5. , 2.66666667],
[3.33333333, 4. , 2.5 ]])
"""
P = np.asarray(P)
mc = qe.MarkovChain(P)
num_classes = mc.num_communication_classes
if num_classes == 1:
fmpt_all = fmpt(P)
return fmpt_all
else:
i_cclasses = mc.communication_classes_indices
fmpt_all = np.full((mc.n, mc.n), np.inf)
for cclass in i_cclasses:
rows = cclass[:, np.newaxis]
p_temp = P[rows, cclass]
fmpt_all[rows, cclass] = fmpt(p_temp)
return fmpt_all
mc.stationary_distributions
import libpysal
f = libpysal.io.open(libpysal.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
q5 = np.array([mc.Quantiles(y).yb for y in pci]).transpose()
m = Markov(q5)
m.p
mc = qe.MarkovChain(m.p)
mc.stationary_distributions[0]
mc.stationary_distributions[0].shape
pci
from giddy.markov import FullRank_Markov, Markov
import mapclassify as mc
income_table = pd.read_csv(ps.examples.get_path("usjoin.csv"))
pci = income_table[list(map(str,range(1929,2010)))].values
# q5 = np.array([mc.Quantiles(y).yb for y in pci.T]).transpose()
# m = Markov(q5)
# m.p
pci
m = Markov(q5)
m.p
income_table.head()
pci = income_table[list(map(str,range(1990,2010)))].values
pci
m = FullRank_Markov(pci)
m.ranks
m.transitions
Full rank Markov transition probability matrix
m.p
np.where(m.p == 1)
The probability of staying in rank 48 is $100\%$. Thus, this rank is not communicating with other ranks, meaning that if a US State currently ranks 48th in per capita income, it will always rank 48th in the future. In this case, there is not going to be a unique steady state distribution towards which the Markov Chain converges. Instead, the future distribution will be dependent on whether the system is traped in rank 48. And also because of this, the estimation of the first mean passage time becomes more complicated. Currently, giddy
does not have a mechanism to deal with it.
The current implementation for estimating the steady state distribution in giddy
ignores the possibility of being trapped in rank 48 and gives the following distribution.
The estimation of the first mean passage times will incur an error shown as follows:
m.fmpt
At the moment (due to the time limit for submitting the paper), I will suggest to use an external package quantecon
to check whether the Markov Chain is ergodic. If not, we give some explanation and not calculating or ploting the first mean passage times.
I will refactor giddy
in the meantime to incorporate the non-ergodic chains when estimating steady state distributions and first mean passage times.
qe.MarkovChain?
import quantecon as qe
mc = qe.MarkovChain(m.p)
mc.is_irreducible and mc.is_aperiodic
mc.num_communication_classes?
mc.num_communication_classes
mc.communication_classes_indices
k = len(m.p)
fmpt_k = np.full((k, k), np.inf)
cclass = mc.communication_classes_indices[1]
cclass[:, np.newaxis]
rows = cclass[:, np.newaxis]
m.p[rows,cclass]
p_temp = m.p[cclass,:][:,cclass]
p_temp
fmpt_k
fmpt_k.flat[[1,2]]
fmpt_temp = fmpt(p_temp)
fmpt_temp
np.pad?
np.putmask?
fmpt_k[cclass,:]
fmpt_temp
b = fmpt_k[cclass,:]
b[:,cclass] = fmpt_temp
fmpt_k[cclass,:][:,cclass]
np.put(fmpt_k, cclass, fmpt_temp)
np.put?
fmpt_k[cclass,:][:,cclass]
fmpt_k[cclass,:][:,cclass] = fmpt_temp
fmpt_k
for cclass in mc.communication_classes_indices:
rows = cclass[:, np.newaxis]
p_temp = m.p[rows,cclass]
# p_temp = m.p[cclass,:][:,cclass]
fmpt_temp = fmpt(p_temp)
print(cclass, fmpt_temp)
fmpt_k[rows,cclass] = fmpt_temp
fmpt_k
fmpt_k
mc.communication_classes_indices[1]
m.p[list(mc.communication_classes_indices[1]),:][:,list(mc.communication_classes_indices[1])]
from giddy.ergodic import fmpt
a = np.zeros((2,2))
a[0] = np.Infinity
a
np.full((2, 2), np.inf)
fmpt(m.p[list(mc.communication_classes_indices[1]),:][:,list(mc.communication_classes_indices[1])])
k = len(m.p)
mc.stationary_distributions
mc.is_irreducible
mc.is_aperiodic
We have two sets of Markov states which are not communicating with each other
mc.num_communication_classes
The first set contains all the Markov states (rank 1 to 47) and the second set contains only 1 Markov state (rank 48)
mc.communication_classes
from giddy.ergodic import steady_state as STEADY_STATE
if mc.num_communication_classes==1:
STEADY_STATE(mp.)