"""
Spatial Error SUR estimation
"""
__author__ = "Luc Anselin lanselin@gmail.com, \
Pedro V. Amaral pedrovma@gmail.com"
import numpy as np
import numpy.linalg as la
from scipy import stats
stats.chisqprob = stats.chi2.sf
from . import summary_output as SUMMARY
from . import user_output as USER
from . import regimes as REGI
from scipy.sparse.linalg import splu as SuperLU
from scipy.optimize import minimize_scalar, minimize
from scipy import sparse as sp
from .ml_error import err_c_loglik_sp
from .utils import optim_moments
from .sur_utils import (
sur_dictxy,
sur_corr,
sur_dict2mat,
sur_crossprod,
sur_est,
sur_resids,
filter_dict,
check_k,
)
from .sur import BaseSUR, _sur_ols
from .diagnostics_sur import sur_setp, lam_setp, sur_chow
from .regimes import buildR, wald_test
__all__ = ["BaseSURerrorGM", "SURerrorGM", "BaseSURerrorML", "SURerrorML"]
class BaseSURerrorGM:
"""Base class for SUR Error estimation by Generalized Moments
Parameters
----------
bigy : dictionary
with vector for dependent variable by equation
bigX : dictionary
with matrix of explanatory variables by equation
(note, already includes constant term)
w : spatial weights object
Attributes
----------
n_eq : int
number of equations
n : int
number of observations in each cross-section
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
bigK : array
n_eq x 1 array with number of explanatory variables
by equation
bigylag : dictionary
spatially lagged dependent variable
bigXlag : dictionary
spatially lagged explanatory variable
lamsur : float
spatial autoregressive coefficient in GM SUR Error
bSUR : array
beta coefficients in GM SUR Error
varb : array
variance of beta coefficients in GM SUR Error
sig : array
error variance-covariance matrix in GM SUR Error
corr : array
error correlation matrix
bigE : array
n by n_eq matrix of vectors of residuals for each equation
"""
def __init__(self, bigy, bigX, w):
self.n = w.n
self.n_eq = len(bigy.keys())
WS = w.sparse
I = sp.identity(self.n)
# variables
self.bigy = bigy
self.bigX = bigX
# number of variables by equation
self.bigK = np.zeros((self.n_eq, 1), dtype=np.int_)
for r in range(self.n_eq):
self.bigK[r] = self.bigX[r].shape[1]
# OLS
self.bigXX, self.bigXy = sur_crossprod(self.bigX, self.bigy)
reg0 = _sur_ols(self)
# Moments
moments = _momentsGM_sur_Error(WS, reg0.olsE)
lam = np.zeros((self.n_eq, 1))
for r in range(self.n_eq):
lam[r] = optim_moments(moments[r])
# spatially lagged variables
self.bigylag = {}
for r in range(self.n_eq):
self.bigylag[r] = WS * self.bigy[r]
# note: unlike WX as instruments, this includes the constant
self.bigXlag = {}
for r in range(self.n_eq):
self.bigXlag[r] = WS * self.bigX[r]
# spatially filtered variables
sply = filter_dict(lam, self.bigy, self.bigylag)
splX = filter_dict(lam, self.bigX, self.bigXlag)
WbigE = WS * reg0.olsE
splbigE = reg0.olsE - WbigE * lam.T
splXX, splXy = sur_crossprod(splX, sply)
b1, varb1, sig1 = sur_est(splXX, splXy, splbigE, self.bigK)
bigE = sur_resids(self.bigy, self.bigX, b1)
self.lamsur = lam
self.bSUR = b1
self.varb = varb1
self.sig = sig1
self.corr = sur_corr(self.sig)
self.bigE = bigE
def _momentsGM_sur_Error(w, u):
n = w.shape[0]
u2 = (u * u).sum(0)
wu = w * u
uwu = (u * wu).sum(0)
wu2 = (wu * wu).sum(0)
wwu = w * wu
uwwu = (u * wwu).sum(0)
wwu2 = (wwu * wwu).sum(0)
wwuwu = (wwu * wu).sum(0)
trWtW = w.multiply(w).sum()
moments = {}
for r in range(u.shape[1]):
g = np.array([[u2[r], wu2[r], uwu[r]]]).T / n
G = (
np.array(
[
[2 * uwu[r], -wu2[r], n],
[2 * wwuwu[r], -wwu2[r], trWtW],
[uwwu[r] + wu2[r], -wwuwu[r], 0.0],
]
)
/ n
)
moments[r] = [G, g]
return moments
[docs]class SURerrorGM(BaseSURerrorGM, REGI.Regimes_Frame):
"""
User class for SUR Error estimation by Generalized Moments
Parameters
----------
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
w : spatial weights object
regimes : list
List of n values with the mapping of each
observation to a regime. Assumed to be aligned with 'x'.
nonspat_diag : boolean
flag for non-spatial diagnostics, default = False
spat_diag : boolean
flag for spatial diagnostics, default = False (to be implemented)
vm : boolean
flag for asymptotic variance for lambda and Sigma,
default = False (to be implemented)
name_bigy : dictionary
with name of dependent variable for each equation.
default = None, but should be specified is done when
sur_stackxy is used
name_bigX : dictionary
with names of explanatory variables for each equation.
default = None, but should be specified is done when
sur_stackxy is used
name_ds : string
name for the data set
name_w : string
name for the weights file
name_regimes : string
name of regime variable for use in the output
Attributes
----------
n : int
number of observations in each cross-section
n_eq : int
number of equations
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
bigK : array
n_eq x 1 array with number of explanatory variables
by equation
bigylag : dictionary
spatially lagged dependent variable
bigXlag : dictionary
spatially lagged explanatory variable
lamsur : float
spatial autoregressive coefficient in ML SUR Error
bSUR : array
beta coefficients in ML SUR Error
varb : array
variance of beta coefficients in ML SUR Error
sig : array
error variance-covariance matrix in ML SUR Error
bigE : array
n by n_eq matrix of vectors of residuals for each equation
sur_inf : array
inference for regression coefficients, stand. error, t, p
surchow : array
list with tuples for Chow test on regression coefficients.
each tuple contains test value, degrees of freedom, p-value
name_bigy : dictionary
with name of dependent variable for each equation
name_bigX : dictionary
with names of explanatory variables for each
equation
name_ds : string
name for the data set
name_w : string
name for the weights file
name_regimes : string
name of regime variable for use in the output
Examples
--------
First import libpysal to load the spatial analysis tools.
>>> import libpysal
>>> from libpysal.examples import load_example
>>> from libpysal.weights import Queen
>>> import spreg
>>> np.set_printoptions(suppress=True) #prevent scientific format
Open data on NCOVR US County Homicides (3085 areas) using pysal.open().
This is the DBF associated with the NAT shapefile. Note that pysal.open()
also reads data in CSV format.
>>> nat = load_example('Natregimes')
>>> db = libpysal.io.open(nat.get_path('natregimes.dbf'), 'r')
The specification of the model to be estimated can be provided as lists.
Each equation should be listed separately. Equation 1 has HR80 as dependent
variable, and PS80 and UE80 as exogenous regressors.
For equation 2, HR90 is the dependent variable, and PS90 and UE90 the
exogenous regressors.
>>> y_var = ['HR80','HR90']
>>> x_var = [['PS80','UE80'],['PS90','UE90']]
>>> yend_var = [['RD80'],['RD90']]
>>> q_var = [['FP79'],['FP89']]
The SUR method requires data to be provided as dictionaries. PySAL
provides the tool sur_dictxy to create these dictionaries from the
list of variables. The line below will create four dictionaries
containing respectively the dependent variables (bigy), the regressors
(bigX), the dependent variables' names (bigyvars) and regressors' names
(bigXvars). All these will be created from th database (db) and lists
of variables (y_var and x_var) created above.
>>> bigy,bigX,bigyvars,bigXvars = spreg.sur_dictxy(db,y_var,x_var)
To run a spatial error model, we need to specify the spatial weights matrix.
To do that, we can open an already existing gal file or create a new one.
In this example, we will create a new one from NAT.shp and transform it to
row-standardized.
>>> w = Queen.from_shapefile(nat.get_path("natregimes.shp"))
>>> w.transform='r'
We can now run the regression and then have a summary of the output by typing:
print(reg.summary)
Alternatively, we can just check the betas and standard errors, asymptotic t
and p-value of the parameters:
>>> reg = spreg.SURerrorGM(bigy,bigX,w=w,name_bigy=bigyvars,name_bigX=bigXvars,name_ds="NAT",name_w="nat_queen")
>>> reg.bSUR
{0: array([[3.97746866],
[0.89021219],
[0.43050363]]), 1: array([[2.93679119],
[1.11002826],
[0.48761542]])}
>>> reg.sur_inf
{0: array([[ 0.37251476, 10.67734504, 0. ],
[ 0.14224297, 6.25839153, 0. ],
[ 0.04322388, 9.95985608, 0. ]]), 1: array([[ 0.33694902, 8.71583245, 0. ],
[ 0.13413626, 8.27537783, 0. ],
[ 0.04033105, 12.09032288, 0. ]])}
"""
[docs] def __init__(
self,
bigy,
bigX,
w,
regimes=None,
nonspat_diag=True,
spat_diag=False,
vm=False,
name_bigy=None,
name_bigX=None,
name_ds=None,
name_w=None,
name_regimes=None,
):
# check on variable names for listing results
self.name_ds = USER.set_name_ds(name_ds)
self.name_w = USER.set_name_w(name_w, w)
# initialize names - should be generated by sur_stack
self.n_eq = len(bigy.keys())
if name_bigy:
self.name_bigy = name_bigy
else: # need to construct y names
self.name_bigy = {}
for r in range(self.n_eq):
yn = "dep_var_" + str(r)
self.name_bigy[r] = yn
if name_bigX is None:
name_bigX = {}
for r in range(self.n_eq):
k = bigX[r].shape[1] - 1
name_x = ["var_" + str(i + 1) + "_" + str(r + 1) for i in range(k)]
ct = "Constant_" + str(r + 1) # NOTE: constant always included in X
name_x.insert(0, ct)
name_bigX[r] = name_x
if regimes is not None:
self.constant_regi = "many"
self.cols2regi = "all"
self.regime_err_sep = False
self.name_regimes = USER.set_name_ds(name_regimes)
self.regimes_set = REGI._get_regimes_set(regimes)
self.regimes = regimes
cols2regi_dic = {}
self.name_bigX = {}
self.name_x_r = name_bigX
for r in range(self.n_eq):
cols2regi_dic[r] = REGI.check_cols2regi(
self.constant_regi, self.cols2regi, bigX[r], add_cons=False
)
USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1])
bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__(
self,
bigX[r],
regimes,
constant_regi=None,
cols2regi=cols2regi_dic[r],
names=name_bigX[r],
)
else:
self.name_bigX = name_bigX
BaseSURerrorGM.__init__(self, bigy=bigy, bigX=bigX, w=w)
# inference
self.sur_inf = sur_setp(self.bSUR, self.varb)
# test on constancy of regression coefficients across equations
if check_k(self.bigK): # only for equal number of variables
self.surchow = sur_chow(self.n_eq, self.bigK, self.bSUR, self.varb)
else:
self.surchow = None
# listing of results
self.title = "SEEMINGLY UNRELATED REGRESSIONS (SUR) - GM SPATIAL ERROR MODEL"
if regimes is not None:
self.title = "SUR - GM SPATIAL ERROR MODEL WITH REGIMES"
self.chow_regimes = {}
varb_counter = 0
for r in range(self.n_eq):
counter_end = varb_counter + self.bSUR[r].shape[0]
self.chow_regimes[r] = REGI._chow_run(
len(cols2regi_dic[r]),
0,
0,
len(self.regimes_set),
self.bSUR[r],
self.varb[varb_counter:counter_end, varb_counter:counter_end],
)
varb_counter = counter_end
regimes = True
SUMMARY.SUR(
reg=self,
nonspat_diag=nonspat_diag,
spat_diag=spat_diag,
lambd=True,
ml=False,
regimes=regimes,
)
class BaseSURerrorML:
"""
Base class for SUR Error estimation by Maximum Likelihood
requires: scipy.optimize.minimize_scalar and scipy.optimize.minimize
Parameters
----------
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
w : spatial weights object
epsilon : float
convergence criterion for ML iterations
default 0.0000001
Attributes
----------
n : int
number of observations in each cross-section
n2 : int
n/2
n_eq : int
number of equations
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
bigK : array
n_eq x 1 array with number of explanatory variables
by equation
bigylag : dictionary
spatially lagged dependent variable
bigXlag : dictionary
spatially lagged explanatory variable
lamols : array
spatial autoregressive coefficients from equation by
equation ML-Error estimation
clikerr : float
concentrated log-likelihood from equation by equation
ML-Error estimation (no constant)
bSUR0 : array
SUR estimation for betas without spatial autocorrelation
llik : float
log-likelihood for classic SUR estimation (includes constant)
lamsur : float
spatial autoregressive coefficient in ML SUR Error
bSUR : array
beta coefficients in ML SUR Error
varb : array
variance of beta coefficients in ML SUR Error
sig : array
error variance-covariance matrix in ML SUR Error
corr : array
error correlation matrix
bigE : array
n by n_eq matrix of vectors of residuals for each equation
cliksurerr : float
concentrated log-likelihood from ML SUR Error (no constant)
"""
def __init__(self, bigy, bigX, w, epsilon=0.0000001):
# setting up constants
self.n = w.n
self.n2 = self.n / 2.0
self.n_eq = len(bigy.keys())
WS = w.sparse
I = sp.identity(self.n)
# variables
self.bigy = bigy
self.bigX = bigX
# number of variables by equation
self.bigK = np.zeros((self.n_eq, 1), dtype=np.int_)
for r in range(self.n_eq):
self.bigK[r] = self.bigX[r].shape[1]
# spatially lagged variables
self.bigylag = {}
for r in range(self.n_eq):
self.bigylag[r] = WS * self.bigy[r]
# note: unlike WX as instruments, this includes the constant
self.bigXlag = {}
for r in range(self.n_eq):
self.bigXlag[r] = WS * self.bigX[r]
# spatial parameter starting values
lam = np.zeros((self.n_eq, 1)) # initialize as an array
fun0 = 0.0
fun1 = 0.0
for r in range(self.n_eq):
res = minimize_scalar(
err_c_loglik_sp,
0.0,
bounds=(-1.0, 1.0),
args=(
self.n,
self.bigy[r],
self.bigylag[r],
self.bigX[r],
self.bigXlag[r],
I,
WS,
),
method="bounded",
options={"xatol": epsilon},
)
lam[r] = res.x
fun1 += res.fun
self.lamols = lam
self.clikerr = -fun1 # negative because use in min
# SUR starting values
reg0 = BaseSUR(self.bigy, self.bigX, iter=True)
bigE = reg0.bigE
self.bSUR0 = reg0.bSUR
self.llik = reg0.llik # as is, includes constant
# iteration
lambdabounds = [(-1.0, +1.0) for i in range(self.n_eq)]
while abs(fun0 - fun1) > epsilon:
fun0 = fun1
sply = filter_dict(lam, self.bigy, self.bigylag)
splX = filter_dict(lam, self.bigX, self.bigXlag)
WbigE = WS * bigE
splbigE = bigE - WbigE * lam.T
splXX, splXy = sur_crossprod(splX, sply)
b1, varb1, sig1 = sur_est(splXX, splXy, splbigE, self.bigK)
bigE = sur_resids(self.bigy, self.bigX, b1)
res = minimize(
clik,
np.array(lam).flatten(),
args=(self.n, self.n2, self.n_eq, bigE, I, WS),
method="L-BFGS-B",
bounds=lambdabounds,
)
lam = res.x
lam.resize((self.n_eq, 1))
fun1 = res.fun
self.lamsur = lam
self.bSUR = b1
self.varb = varb1
self.sig = sig1
self.corr = sur_corr(self.sig)
self.bigE = bigE
self.cliksurerr = -fun1 # negative because use in min, no constant
[docs]class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame):
"""
User class for SUR Error estimation by Maximum Likelihood
Parameters
----------
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
w : spatial weights object
regimes : list
default = None.
List of n values with the mapping of each
observation to a regime. Assumed to be aligned with 'x'.
epsilon : float
convergence criterion for ML iterations.
default 0.0000001
nonspat_diag : boolean
flag for non-spatial diagnostics, default = True
spat_diag : boolean
flag for spatial diagnostics, default = False
vm : boolean
flag for asymptotic variance for lambda and Sigma,
default = False
name_bigy : dictionary
with name of dependent variable for each equation.
default = None, but should be specified is done when
sur_stackxy is used
name_bigX : dictionary
with names of explanatory variables for each equation.
default = None, but should be specified is done when
sur_stackxy is used
name_ds : string
name for the data set
name_w : string
name for the weights file
name_regimes : string
name of regime variable for use in the output
Attributes
----------
n : int
number of observations in each cross-section
n2 : int
n/2
n_eq : int
number of equations
bigy : dictionary
with vectors of dependent variable, one for
each equation
bigX : dictionary
with matrices of explanatory variables,
one for each equation
bigK : array
n_eq x 1 array with number of explanatory variables
by equation
bigylag : dictionary
spatially lagged dependent variable
bigXlag : dictionary
spatially lagged explanatory variable
lamols : array
spatial autoregressive coefficients from equation by
equation ML-Error estimation
clikerr : float
concentrated log-likelihood from equation by equation
ML-Error estimation (no constant)
bSUR0 : array
SUR estimation for betas without spatial autocorrelation
llik : float
log-likelihood for classic SUR estimation (includes constant)
lamsur : float
spatial autoregressive coefficient in ML SUR Error
bSUR : array
beta coefficients in ML SUR Error
varb : array
variance of beta coefficients in ML SUR Error
sig : array
error variance-covariance matrix in ML SUR Error
bigE : array
n by n_eq matrix of vectors of residuals for each equation
cliksurerr : float
concentrated log-likelihood from ML SUR Error (no constant)
sur_inf : array
inference for regression coefficients, stand. error, t, p
errllik : float
log-likelihood for error model without SUR (with constant)
surerrllik : float
log-likelihood for SUR error model (with constant)
lrtest : tuple
likelihood ratio test for off-diagonal Sigma elements
likrlambda : tuple
likelihood ratio test on spatial autoregressive coefficients
vm : array
asymptotic variance matrix for lambda and Sigma (only for vm=True)
lamsetp : array
inference for lambda, stand. error, t, p (only for vm=True)
lamtest : tuple
with test for constancy of lambda across equations
(test value, degrees of freedom, p-value)
joinlam : tuple
with test for joint significance of lambda across
equations (test value, degrees of freedom, p-value)
surchow : list
with tuples for Chow test on regression coefficients.
each tuple contains test value, degrees of freedom, p-value
name_bigy : dictionary
with name of dependent variable for each equation
name_bigX : dictionary
with names of explanatory variables for each
equation
name_ds : string
name for the data set
name_w : string
name for the weights file
name_regimes : string
name of regime variable for use in the output
Examples
--------
First import libpysal to load the spatial analysis tools.
>>> import libpysal
>>> from libpysal.examples import load_example
>>> from libpysal.weights import Queen
>>> import spreg
>>> np.set_printoptions(suppress=True) #prevent scientific format
Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open().
This is the DBF associated with the NAT shapefile. Note that libpysal.io.open()
also reads data in CSV format.
>>> nat = load_example('Natregimes')
>>> db = libpysal.io.open(nat.get_path('natregimes.dbf'), 'r')
The specification of the model to be estimated can be provided as lists.
Each equation should be listed separately. Equation 1 has HR80 as dependent
variable, and PS80 and UE80 as exogenous regressors.
For equation 2, HR90 is the dependent variable, and PS90 and UE90 the
exogenous regressors.
>>> y_var = ['HR80','HR90']
>>> x_var = [['PS80','UE80'],['PS90','UE90']]
>>> yend_var = [['RD80'],['RD90']]
>>> q_var = [['FP79'],['FP89']]
The SUR method requires data to be provided as dictionaries. PySAL
provides the tool sur_dictxy to create these dictionaries from the
list of variables. The line below will create four dictionaries
containing respectively the dependent variables (bigy), the regressors
(bigX), the dependent variables' names (bigyvars) and regressors' names
(bigXvars). All these will be created from th database (db) and lists
of variables (y_var and x_var) created above.
>>> bigy,bigX,bigyvars,bigXvars = spreg.sur_dictxy(db,y_var,x_var)
To run a spatial error model, we need to specify the spatial weights matrix.
To do that, we can open an already existing gal file or create a new one.
In this example, we will create a new one from NAT.shp and transform it to
row-standardized.
>>> w = Queen.from_shapefile(nat.get_path("natregimes.shp"))
>>> w.transform='r'
We can now run the regression and then have a summary of the output by typing:
print(reg.summary)
Alternatively, we can just check the betas and standard errors, asymptotic t
and p-value of the parameters:
>>> reg = spreg.SURerrorML(bigy,bigX,w=w,name_bigy=bigyvars,name_bigX=bigXvars,name_ds="NAT",name_w="nat_queen")
>>> reg.bSUR
{0: array([[4.02228606],
[0.88489637],
[0.42402845]]), 1: array([[3.04923031],
[1.10972632],
[0.47075678]])}
>>> reg.sur_inf
{0: array([[ 0.36692175, 10.96224484, 0. ],
[ 0.14129077, 6.26294545, 0. ],
[ 0.04267954, 9.93516909, 0. ]]), 1: array([[ 0.33139967, 9.20106629, 0. ],
[ 0.13352591, 8.31094381, 0. ],
[ 0.04004097, 11.75687747, 0. ]])}
"""
[docs] def __init__(
self,
bigy,
bigX,
w,
regimes=None,
nonspat_diag=True,
spat_diag=False,
vm=False,
epsilon=0.0000001,
name_bigy=None,
name_bigX=None,
name_ds=None,
name_w=None,
name_regimes=None,
):
# need checks on match between bigy, bigX dimensions
# check on variable names for listing results
self.name_ds = USER.set_name_ds(name_ds)
self.name_w = USER.set_name_w(name_w, w)
self.n_eq = len(bigy.keys())
# initialize names - should be generated by sur_stack
if name_bigy:
self.name_bigy = name_bigy
else: # need to construct y names
self.name_bigy = {}
for r in range(self.n_eq):
yn = "dep_var_" + str(r)
self.name_bigy[r] = yn
if name_bigX is None:
name_bigX = {}
for r in range(self.n_eq):
k = bigX[r].shape[1] - 1
name_x = ["var_" + str(i + 1) + "_" + str(r + 1) for i in range(k)]
ct = "Constant_" + str(r + 1) # NOTE: constant always included in X
name_x.insert(0, ct)
name_bigX[r] = name_x
if regimes is not None:
self.constant_regi = "many"
self.cols2regi = "all"
self.regime_err_sep = False
self.name_regimes = USER.set_name_ds(name_regimes)
self.regimes_set = REGI._get_regimes_set(regimes)
self.regimes = regimes
self.name_x_r = name_bigX
cols2regi_dic = {}
self.name_bigX = {}
for r in range(self.n_eq):
cols2regi_dic[r] = REGI.check_cols2regi(
self.constant_regi, self.cols2regi, bigX[r], add_cons=False
)
USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1])
bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__(
self,
bigX[r],
regimes,
constant_regi=None,
cols2regi=cols2regi_dic[r],
names=name_bigX[r],
)
else:
self.name_bigX = name_bigX
# moved init here
BaseSURerrorML.__init__(self, bigy=bigy, bigX=bigX, w=w, epsilon=epsilon)
# inference
self.sur_inf = sur_setp(self.bSUR, self.varb)
# adjust concentrated log lik for constant
const = -self.n2 * (self.n_eq * (1.0 + np.log(2.0 * np.pi)))
self.errllik = const + self.clikerr
self.surerrllik = const + self.cliksurerr
# LR test on off-diagonal sigma
if nonspat_diag:
M = self.n_eq * (self.n_eq - 1) / 2.0
likrodiag = 2.0 * (self.surerrllik - self.errllik)
plik1 = stats.chisqprob(likrodiag, M)
self.lrtest = (likrodiag, int(M), plik1)
else:
self.lrtest = None
# LR test on spatial autoregressive coefficients
if spat_diag:
liklambda = 2.0 * (self.surerrllik - self.llik)
plik2 = stats.chisqprob(liklambda, self.n_eq)
self.likrlambda = (liklambda, self.n_eq, plik2)
else:
self.likrlambda = None
# asymptotic variance for spatial coefficient
if vm:
self.vm = surerrvm(self.n, self.n_eq, w, self.lamsur, self.sig)
vlam = self.vm[: self.n_eq, : self.n_eq]
self.lamsetp = lam_setp(self.lamsur, vlam)
# test on constancy of lambdas
R = REGI.buildR(kr=1, kf=0, nr=self.n_eq)
w, p = REGI.wald_test(self.lamsur, R, np.zeros((R.shape[0], 1)), vlam)
self.lamtest = (w, R.shape[0], p)
if spat_diag: # test on joint significance of lambdas
Rj = np.identity(self.n_eq)
wj, pj = REGI.wald_test(
self.lamsur, Rj, np.zeros((Rj.shape[0], 1)), vlam
)
self.joinlam = (wj, Rj.shape[0], pj)
else:
self.joinlam = None
else:
self.vm = None
self.lamsetp = None
self.lamtest = None
self.joinlam = None
# test on constancy of regression coefficients across equations
if check_k(self.bigK): # only for equal number of variables
self.surchow = sur_chow(self.n_eq, self.bigK, self.bSUR, self.varb)
else:
self.surchow = None
# listing of results
self.title = "SEEMINGLY UNRELATED REGRESSIONS (SUR) - ML SPATIAL ERROR MODEL"
if regimes is not None:
self.title = "SUR - ML SPATIAL ERROR MODEL - REGIMES"
self.chow_regimes = {}
varb_counter = 0
for r in range(self.n_eq):
counter_end = varb_counter + self.bSUR[r].shape[0]
self.chow_regimes[r] = REGI._chow_run(
len(cols2regi_dic[r]),
0,
0,
len(self.regimes_set),
self.bSUR[r],
self.varb[varb_counter:counter_end, varb_counter:counter_end],
)
varb_counter = counter_end
regimes = True
SUMMARY.SUR(
reg=self,
nonspat_diag=nonspat_diag,
spat_diag=spat_diag,
lambd=True,
regimes=regimes,
)
def jacob(lam, n_eq, I, WS):
"""Log-Jacobian for SUR Error model
Parameters
----------
lam : array
n_eq by 1 array of spatial autoregressive parameters
n_eq : int
number of equations
I : sparse matrix
sparse Identity matrix
WS : sparse matrix
sparse spatial weights matrix
Returns
-------
logjac : float
the log Jacobian
"""
logjac = 0.0
for r in range(n_eq):
lami = lam[r]
lamWS = WS.multiply(lami)
B = (I - lamWS).tocsc()
LU = SuperLU(B)
jj = np.sum(np.log(np.abs(LU.U.diagonal())))
logjac += jj
return logjac
def clik(lam, n, n2, n_eq, bigE, I, WS):
"""
Concentrated (negative) log-likelihood for SUR Error model
Parameters
----------
lam : array
n_eq x 1 array of spatial autoregressive parameters
n : int
number of observations in each cross-section
n2 : int
n/2
n_eq : int
number of equations
bigE : array
n by n_eq matrix with vectors of residuals for
each equation
I : sparse Identity matrix
WS : sparse spatial weights matrix
Returns
-------
-clik : float
negative (for minimize) of the concentrated
log-likelihood function
"""
WbigE = WS * bigE
spfbigE = bigE - WbigE * lam.T
sig = np.dot(spfbigE.T, spfbigE) / n
ldet = la.slogdet(sig)[1]
logjac = jacob(lam, n_eq, I, WS)
clik = -n2 * ldet + logjac
return -clik # negative for minimize
def surerrvm(n, n_eq, w, lam, sig):
"""
Asymptotic variance matrix for lambda and Sigma in
ML SUR Error estimation
Source: Anselin (1988) :cite:`Anselin1988`, Chapter 10.
Parameters
----------
n : int
number of cross-sectional observations
n_eq : int
number of equations
w : spatial weights object
lam : array
n_eq by 1 vector with spatial autoregressive coefficients
sig : array
n_eq by n_eq matrix with cross-equation error covariances
Returns
-------
vm : array
asymptotic variance-covariance matrix for spatial autoregressive
coefficients and the upper triangular elements of Sigma
n_eq + n_eq x (n_eq + 1) / 2 coefficients
"""
# inverse Sigma
sigi = la.inv(sig)
sisi = sigi * sig
# elements of Psi_lam,lam
# trace terms
trDi = np.zeros((n_eq, 1))
trDDi = np.zeros((n_eq, 1))
trDTDi = np.zeros((n_eq, 1))
trDTiDj = np.zeros((n_eq, n_eq))
WS = w.sparse
I = sp.identity(n)
for i in range(n_eq):
lami = lam[i][0]
lamWS = WS.multiply(lami)
B = I - lamWS
bb = B.todense()
Bi = la.inv(bb)
D = WS * Bi
trDi[i] = np.trace(D)
DD = np.dot(D, D)
trDDi[i] = np.trace(DD)
DD = np.dot(D.T, D)
trDTDi[i] = np.trace(DD)
for j in range(i + 1, n_eq):
lamj = lam[j][0]
lamWS = WS.multiply(lamj)
B = I - lamWS
bb = B.todense()
Bi = la.inv(bb)
Dj = WS * Bi
DD = np.dot(D.T, Dj)
trDTiDj[i, j] = np.trace(DD)
trDTiDj[j, i] = trDTiDj[i, j]
np.fill_diagonal(trDTiDj, trDTDi)
sisjT = sisi * trDTiDj
Vll = np.diagflat(trDDi) + sisjT
# elements of Psi_lam_sig
P = int(n_eq * (n_eq + 1) / 2) # force ints to be ints
tlist = [(i, j) for i in range(n_eq) for j in range(i, n_eq)]
zog = sigi * trDi
Vlsig = np.zeros((n_eq, P))
for i in range(n_eq):
for j in range(n_eq):
if i > j:
jj = tlist.index((j, i))
else:
jj = tlist.index((i, j))
Vlsig[i, jj] = zog[i, j]
# top of Psi
vtop = np.hstack((Vll, Vlsig))
# elements of Psi_sig_sig
Vsig = np.zeros((P, P))
for ij in range(P):
i, j = tlist[ij]
for hk in range(P):
h, k = tlist[hk]
if i == j:
if h == k:
Vsig[ij, hk] = 0.5 * (sigi[i, h] ** 2)
else: # h not equal to k
Vsig[ij, hk] = sigi[i, h] * sigi[i, k]
else: # i not equal to j
if h == k:
Vsig[ij, hk] = sigi[i, h] * sigi[j, h]
else: # h not equal to k
Vsig[ij, hk] = sigi[i, h] * sigi[j, k] + sigi[i, k] * sigi[j, h]
Vsig = n * Vsig
# bottom of Psi
vbottom = np.hstack((Vlsig.T, Vsig))
# all of Psi
vbig = np.vstack((vtop, vbottom))
# inverse of Psi
vm = la.inv(vbig)
return vm
def _test():
import doctest
start_suppress = np.get_printoptions()["suppress"]
np.set_printoptions(suppress=True)
doctest.testmod()
np.set_printoptions(suppress=start_suppress)
if __name__ == "__main__":
_test()
import numpy as np
import libpysal
from .sur_utils import sur_dictxy, sur_dictZ
from libpysal.examples import load_example
from libpysal.weights import Queen
nat = load_example("Natregimes")
db = libpysal.io.open(nat.get_path("natregimes.dbf"), "r")
y_var = ["HR80", "HR90"]
x_var = [["PS80", "UE80"], ["PS90", "UE90"]]
w = Queen.from_shapefile(nat.get_path("natregimes.shp"))
w.transform = "r"
bigy0, bigX0, bigyvars0, bigXvars0 = sur_dictxy(db, y_var, x_var)
reg0 = SURerrorML(
bigy0,
bigX0,
w,
#regimes=regimes,
name_bigy=bigyvars0,
name_bigX=bigXvars0,
name_w="natqueen",
name_ds="natregimes",
vm=True,
nonspat_diag=True,
spat_diag=True,
)
# reg0 = SURerrorGM(bigy0,bigX0,w,regimes=regimes,name_bigy=bigyvars0,name_bigX=bigXvars0,\
# name_w="natqueen",name_ds="natregimes",vm=False,nonspat_diag=True,spat_diag=False)
print(reg0.summary)