Source code for spreg.diagnostics_sur

"""
Diagnostics for SUR and 3SLS estimation
"""

__author__ = "Luc Anselin lanselin@gmail.com,    \
             Pedro V. Amaral pedrovma@gmail.com  \
             Tony Aburaad taburaad@uchicago.edu"


import numpy as np
import scipy.stats as stats
import numpy.linalg as la
from .sur_utils import sur_dict2mat, sur_mat2dict, sur_corr, spdot
from .regimes import buildR1var, wald_test


__all__ = ["sur_setp", "sur_lrtest", "sur_lmtest", "lam_setp", "surLMe", "surLMlag"]


[docs]def sur_setp(bigB, varb): """ Utility to compute standard error, t and p-value Parameters ---------- bigB : dictionary of regression coefficient estimates, one vector by equation varb : array variance-covariance matrix of coefficients Returns ------- surinfdict : dictionary with standard error, t-value, and p-value array, one for each equation """ vvb = varb.diagonal() n_eq = len(bigB.keys()) bigK = np.zeros((n_eq, 1), dtype=np.int_) for r in range(n_eq): bigK[r] = bigB[r].shape[0] b = sur_dict2mat(bigB) se = np.sqrt(vvb) se.resize(len(se), 1) t = np.divide(b, se) tp = stats.norm.sf(abs(t)) * 2 surinf = np.hstack((se, t, tp)) surinfdict = sur_mat2dict(surinf, bigK) return surinfdict
[docs]def lam_setp(lam, vm): """ Standard errors, t-test and p-value for lambda in SUR Error ML Parameters ---------- lam : array n_eq x 1 array with ML estimates for spatial error autoregressive coefficient vm : array n_eq x n_eq subset of variance-covariance matrix for lambda and Sigma in SUR Error ML (needs to be subset from full vm) Returns ------- : tuple with arrays for standard error, t-value and p-value (each element in the tuple is an n_eq x 1 array) """ vvb = vm.diagonal() se = np.sqrt(vvb) se.resize(len(se), 1) t = np.divide(lam, se) tp = stats.norm.sf(abs(t)) * 2 return (se, t, tp)
[docs]def sur_lrtest(n, n_eq, ldetS0, ldetS1): """ Likelihood Ratio test on off-diagonal elements of Sigma Parameters ---------- n : int cross-sectional dimension (number of observations for an equation) n_eq : int number of equations ldetS0 : float log determinant of Sigma for OLS case ldetS1 : float log determinant of Sigma for SUR case (should be iterated) Returns ------- (lrtest,M,pvalue) : tuple with value of test statistic (lrtest), degrees of freedom (M, as an integer) p-value """ M = n_eq * (n_eq - 1) / 2.0 lrtest = n * (ldetS0 - ldetS1) pvalue = stats.chi2.sf(lrtest, M) return (lrtest, int(M), pvalue)
[docs]def sur_lmtest(n, n_eq, sig): """ Lagrange Multiplier test on off-diagonal elements of Sigma Parameters ---------- n : int cross-sectional dimension (number of observations for an equation) n_eq : int number of equations sig : array inter-equation covariance matrix for null model (OLS) Returns ------- (lmtest,M,pvalue) : tuple with value of test statistic (lmtest), degrees of freedom (M, as an integer) p-value """ R = sur_corr(sig) tr = np.trace(np.dot(R.T, R)) M = n_eq * (n_eq - 1) / 2.0 lmtest = (n / 2.0) * (tr - n_eq) pvalue = stats.chi2.sf(lmtest, M) return (lmtest, int(M), pvalue)
[docs]def surLMe(n_eq, WS, bigE, sig): """ Lagrange Multiplier test on error spatial autocorrelation in SUR Parameters ---------- n_eq : int number of equations WS : array spatial weights matrix in sparse form bigE : array n x n_eq matrix of residuals by equation sig : array cross-equation error covariance matrix Returns ------- (LMe,n_eq,pvalue) : tuple with value of statistic (LMe), degrees of freedom (n_eq) and p-value """ # spatially lagged residuals WbigE = WS * bigE # score EWE = np.dot(bigE.T, WbigE) sigi = la.inv(sig) SEWE = sigi * EWE # score = SEWE.sum(axis=1) # score.resize(n_eq,1) # note score is column sum of Sig_i * E'WE, a 1 by n_eq row vector # previously stored as column score = SEWE.sum(axis=0) score.resize(1, n_eq) # trace terms WW = WS * WS trWW = np.sum(WW.diagonal()) WTW = WS.T * WS trWtW = np.sum(WTW.diagonal()) # denominator SiS = sigi * sig Tii = trWW * np.identity(n_eq) tSiS = trWtW * SiS denom = Tii + tSiS idenom = la.inv(denom) # test statistic # LMe = np.dot(np.dot(score.T,idenom),score)[0][0] # score is now row vector LMe = np.dot(np.dot(score, idenom), score.T)[0][0] pvalue = stats.chi2.sf(LMe, n_eq) return (LMe, n_eq, pvalue)
[docs]def surLMlag(n_eq, WS, bigy, bigX, bigE, bigYP, sig, varb): """ Lagrange Multiplier test on lag spatial autocorrelation in SUR Parameters ---------- n_eq : int number of equations WS : spatial weights matrix in sparse form bigy : dictionary with y values bigX : dictionary with X values bigE : array n x n_eq matrix of residuals by equation bigYP : array n x n_eq matrix of predicted values by equation sig : array cross-equation error covariance matrix varb : array variance-covariance matrix for b coefficients (inverse of Ibb) Returns ------- (LMlag,n_eq,pvalue) : tuple with value of statistic (LMlag), degrees of freedom (n_eq) and p-value """ # Score Y = np.hstack([bigy[r] for r in range(n_eq)]) WY = WS * Y EWY = np.dot(bigE.T, WY) sigi = la.inv(sig) SEWE = sigi * EWY score = SEWE.sum(axis=0) # column sums score.resize(1, n_eq) # score as a row vector # I(rho,rho) as partitioned inverse, eq 72 # trace terms WW = WS * WS trWW = np.sum(WW.diagonal()) # T1 WTW = WS.T * WS trWtW = np.sum(WTW.diagonal()) # T2 # I(rho,rho) SiS = sigi * sig Tii = trWW * np.identity(n_eq) # T1It tSiS = trWtW * SiS firstHalf = Tii + tSiS WbigYP = WS * bigYP inner = np.dot(WbigYP.T, WbigYP) secondHalf = sigi * inner Ipp = firstHalf + secondHalf # eq. 75 # I(b,b) inverse is varb # I(b,rho) bp = sigi[0,] * spdot( bigX[0].T, WbigYP ) # initialize for r in range(1, n_eq): bpwork = ( sigi[ r, ] * spdot(bigX[r].T, WbigYP) ) bp = np.vstack((bp, bpwork)) # partitioned part i_inner = Ipp - np.dot(np.dot(bp.T, varb), bp) # partitioned inverse of information matrix Ippi = la.inv(i_inner) # test statistic LMlag = np.dot(np.dot(score, Ippi), score.T)[0][0] # p-value pvalue = stats.chi2.sf(LMlag, n_eq) return (LMlag, n_eq, pvalue)
def sur_chow(n_eq, bigK, bSUR, varb): """ test on constancy of regression coefficients across equations in a SUR specification Note: requires a previous check on constancy of number of coefficients across equations; no other checks are carried out, so it is possible that the results are meaningless if the variables are not listed in the same order in each equation. Parameters ---------- n_eq : int number of equations bigK : array with the number of variables by equation (includes constant) bSUR : dictionary with the SUR regression coefficients by equation varb : array the variance-covariance matrix for the SUR regression coefficients Returns ------- test : array a list with for each coefficient (in order) a tuple with the value of the test statistic, the degrees of freedom, and the p-value """ kr = bigK[0][0] test = [] bb = sur_dict2mat(bSUR) kf = 0 nr = n_eq df = n_eq - 1 for i in range(kr): Ri = buildR1var(i, kr, kf, 0, nr) tt, p = wald_test(bb, Ri, np.zeros((df, 1)), varb) test.append((tt, df, p)) return test def sur_joinrho(n_eq, bigK, bSUR, varb): """ Test on joint significance of spatial autoregressive coefficient in SUR Parameters ---------- n_eq : int number of equations bigK : array n_eq x 1 array with number of variables by equation (includes constant term, exogenous and endogeneous and spatial lag) bSUR : dictionary with regression coefficients by equation, with the spatial autoregressive term as last varb : array variance-covariance matrix for regression coefficients Returns ------- : tuple with test statistic, degrees of freedom, p-value """ bb = sur_dict2mat(bSUR) R = np.zeros((n_eq, varb.shape[0])) q = np.zeros((n_eq, 1)) kc = -1 for i in range(n_eq): kc = kc + bigK[i] R[i, kc] = 1 w, p = wald_test(bb, R, q, varb) return (w, n_eq, p)