Source code for spreg.sur_error

"""
Spatial Error SUR estimation
"""

__author__ = "Luc Anselin lanselin@gmail.com,    \
             Pedro V. Amaral pedrovma@gmail.com"


import numpy as np
import numpy.linalg as la
from scipy import stats

stats.chisqprob = stats.chi2.sf
from . import summary_output as SUMMARY
from . import user_output as USER
from . import regimes as REGI
from scipy.sparse.linalg import splu as SuperLU
from scipy.optimize import minimize_scalar, minimize
from scipy import sparse as sp

from .ml_error import err_c_loglik_sp
from .utils import optim_moments
from .sur_utils import (
    sur_dictxy,
    sur_corr,
    sur_dict2mat,
    sur_crossprod,
    sur_est,
    sur_resids,
    filter_dict,
    check_k,
)
from .sur import BaseSUR, _sur_ols
from .diagnostics_sur import sur_setp, lam_setp, sur_chow
from .regimes import buildR, wald_test

__all__ = ["BaseSURerrorGM", "SURerrorGM", "BaseSURerrorML", "SURerrorML"]


class BaseSURerrorGM:
    """Base class for SUR Error estimation by Generalized Moments

    Parameters
    ----------
    bigy       : dictionary
                 with vector for dependent variable by equation
    bigX       : dictionary
                 with matrix of explanatory variables by equation
                 (note, already includes constant term)
    w          : spatial weights object

    Attributes
    ----------
    n_eq       : int
                 number of equations
    n          : int
                 number of observations in each cross-section
    bigy       : dictionary
                 with vectors of dependent variable, one for
                 each equation
    bigX       : dictionary
                 with matrices of explanatory variables,
                 one for each equation
    bigK       : array
                 n_eq x 1 array with number of explanatory variables
                 by equation
    bigylag    : dictionary
                 spatially lagged dependent variable
    bigXlag    : dictionary
                 spatially lagged explanatory variable
    lamsur     : float
                 spatial autoregressive coefficient in GM SUR Error
    bSUR       : array
                 beta coefficients in GM SUR Error
    varb       : array
                 variance of beta coefficients in GM SUR Error
    sig        : array
                 error variance-covariance matrix in GM SUR Error
    corr       : array
                 error correlation matrix
    bigE       : array
                 n by n_eq matrix of vectors of residuals for each equation

    """

    def __init__(self, bigy, bigX, w):
        self.n = w.n
        self.n_eq = len(bigy.keys())
        WS = w.sparse
        I = sp.identity(self.n)
        # variables
        self.bigy = bigy
        self.bigX = bigX
        # number of variables by equation
        self.bigK = np.zeros((self.n_eq, 1), dtype=np.int_)
        for r in range(self.n_eq):
            self.bigK[r] = self.bigX[r].shape[1]

        # OLS
        self.bigXX, self.bigXy = sur_crossprod(self.bigX, self.bigy)
        reg0 = _sur_ols(self)

        # Moments
        moments = _momentsGM_sur_Error(WS, reg0.olsE)
        lam = np.zeros((self.n_eq, 1))
        for r in range(self.n_eq):
            lam[r] = optim_moments(moments[r])

        # spatially lagged variables
        self.bigylag = {}
        for r in range(self.n_eq):
            self.bigylag[r] = WS @ self.bigy[r]
        # note: unlike WX as instruments, this includes the constant
        self.bigXlag = {}
        for r in range(self.n_eq):
            self.bigXlag[r] = WS @ self.bigX[r]

        # spatially filtered variables
        sply = filter_dict(lam, self.bigy, self.bigylag)
        splX = filter_dict(lam, self.bigX, self.bigXlag)
        WbigE = WS @ reg0.olsE
        splbigE = reg0.olsE - WbigE * lam.T
        splXX, splXy = sur_crossprod(splX, sply)
        b1, varb1, sig1 = sur_est(splXX, splXy, splbigE, self.bigK)
        bigE = sur_resids(self.bigy, self.bigX, b1)

        self.lamsur = lam
        self.bSUR = b1
        self.varb = varb1
        self.sig = sig1
        self.corr = sur_corr(self.sig)
        self.bigE = bigE


def _momentsGM_sur_Error(w, u):
    n = w.shape[0]
    u2 = (u * u).sum(0)
    wu = w @ u
    uwu = (u * wu).sum(0)
    wu2 = (wu * wu).sum(0)
    wwu = w @ wu
    uwwu = (u * wwu).sum(0)
    wwu2 = (wwu * wwu).sum(0)
    wwuwu = (wwu * wu).sum(0)
    trWtW = w.multiply(w).sum()
    moments = {}
    for r in range(u.shape[1]):
        g = np.array([[u2[r], wu2[r], uwu[r]]]).T / n
        G = (
            np.array(
                [
                    [2 * uwu[r], -wu2[r], n],
                    [2 * wwuwu[r], -wwu2[r], trWtW],
                    [uwwu[r] + wu2[r], -wwuwu[r], 0.0],
                ]
            )
            / n
        )
        moments[r] = [G, g]
    return moments


[docs] class SURerrorGM(BaseSURerrorGM, REGI.Regimes_Frame): """ User class for SUR Error estimation by Generalized Moments Parameters ---------- bigy : list or dictionary list with the name of the dependent variable for each equation or dictionary with vectors for dependent variable by equation bigX : list or dictionary list of lists the name of the explanatory variables for each equation or dictionary with matrix of explanatory variables by equation (note, already includes constant term) w : spatial weights object db : Pandas DataFrame Optional. Required in case bigy and bigX are lists with names of variables regimes : list List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. nonspat_diag : boolean flag for non-spatial diagnostics, default = False spat_diag : boolean flag for spatial diagnostics, default = False (to be implemented) vm : boolean flag for asymptotic variance for lambda and Sigma, default = False (to be implemented) name_bigy : dictionary with name of dependent variable for each equation. default = None, but should be specified is done when sur_stackxy is used name_bigX : dictionary with names of explanatory variables for each equation. default = None, but should be specified is done when sur_stackxy is used name_ds : string name for the data set name_w : string name for the weights file name_regimes : string name of regime variable for use in the output Attributes ---------- n : int number of observations in each cross-section n_eq : int number of equations bigy : dictionary with vectors of dependent variable, one for each equation bigX : dictionary with matrices of explanatory variables, one for each equation bigK : array n_eq x 1 array with number of explanatory variables by equation bigylag : dictionary spatially lagged dependent variable bigXlag : dictionary spatially lagged explanatory variable lamsur : float spatial autoregressive coefficient in ML SUR Error bSUR : array beta coefficients in ML SUR Error varb : array variance of beta coefficients in ML SUR Error sig : array error variance-covariance matrix in ML SUR Error bigE : array n by n_eq matrix of vectors of residuals for each equation sur_inf : array inference for regression coefficients, stand. error, t, p surchow : array list with tuples for Chow test on regression coefficients. each tuple contains test value, degrees of freedom, p-value name_bigy : dictionary with name of dependent variable for each equation name_bigX : dictionary with names of explanatory variables for each equation name_ds : string name for the data set name_w : string name for the weights file name_regimes : string name of regime variable for use in the output Examples -------- >>> import libpysal >>> import geopandas as gpd >>> from spreg import SURerrorGM Open data on NCOVR US County Homicides (3085 areas) from libpysal examples using geopandas. >>> nat = libpysal.examples.load_example('Natregimes') >>> df = gpd.read_file(nat.get_path("natregimes.shp")) The specification of the model to be estimated can be provided as lists. Each equation should be listed separately. In this example, equation 1 has HR80 as dependent variable and PS80 and UE80 as exogenous regressors. For equation 2, HR90 is the dependent variable, and PS90 and UE90 the exogenous regressors. >>> y_var = ['HR80','HR90'] >>> x_var = [['PS80','UE80'],['PS90','UE90']] To run a spatial error model, we need to specify the spatial weights matrix. To do that, we can open an already existing gal file or create a new one. In this example, we will create a new one from NAT.shp and transform it to row-standardized. >>> w = libpysal.weights.Queen.from_dataframe(df) >>> w.transform='r' We can now run the regression and then have a summary of the output by typing: print(reg.summary) Alternatively, we can just check the betas and standard errors, asymptotic t and p-value of the parameters: >>> reg = SURerrorGM(y_var,x_var,w=w,df=df,name_ds="NAT",name_w="nat_queen") >>> reg.bSUR {0: array([[3.97746866], [0.89021219], [0.43050363]]), 1: array([[2.93679119], [1.11002826], [0.48761542]])} >>> reg.sur_inf {0: array([[ 0.37251476, 10.67734504, 0. ], [ 0.14224297, 6.25839153, 0. ], [ 0.04322388, 9.95985608, 0. ]]), 1: array([[ 0.33694902, 8.71583245, 0. ], [ 0.13413626, 8.27537783, 0. ], [ 0.04033105, 12.09032288, 0. ]])} """
[docs] def __init__( self, bigy, bigX, w, df=None, regimes=None, nonspat_diag=True, spat_diag=False, vm=False, name_bigy=None, name_bigX=None, name_ds=None, name_w=None, name_regimes=None, ): if isinstance(bigy, list) or isinstance(bigX, list): if isinstance(bigy, list) and isinstance(bigX, list): if len(bigy) == len(bigX): if df is not None: bigy,bigX,name_bigy,name_bigX = sur_dictxy(df,bigy,bigX) else: raise Exception("Error: df argument is required if bigy and bigX are lists") else: raise Exception("Error: bigy and bigX must have the same number of elements") else: raise Exception("Error: bigy and bigX must be both lists or both dictionaries") # check on variable names for listing results self.name_ds = USER.set_name_ds(name_ds) self.name_w = USER.set_name_w(name_w, w) # initialize names - should be generated by sur_stack self.n_eq = len(bigy.keys()) if name_bigy: self.name_bigy = name_bigy else: # need to construct y names self.name_bigy = {} for r in range(self.n_eq): yn = "dep_var_" + str(r) self.name_bigy[r] = yn if name_bigX is None: name_bigX = {} for r in range(self.n_eq): k = bigX[r].shape[1] - 1 name_x = ["var_" + str(i + 1) + "_" + str(r + 1) for i in range(k)] ct = "Constant_" + str(r + 1) # NOTE: constant always included in X name_x.insert(0, ct) name_bigX[r] = name_x if regimes is not None: self.constant_regi = "many" self.cols2regi = "all" self.regime_err_sep = False self.name_regimes = USER.set_name_ds(name_regimes) self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes cols2regi_dic = {} self.name_bigX = {} self.name_x_r = name_bigX for r in range(self.n_eq): cols2regi_dic[r] = REGI.check_cols2regi( self.constant_regi, self.cols2regi, bigX[r], add_cons=False ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, constant_regi=None, cols2regi=cols2regi_dic[r], names=name_bigX[r], ) else: self.name_bigX = name_bigX BaseSURerrorGM.__init__(self, bigy=bigy, bigX=bigX, w=w) # inference self.sur_inf = sur_setp(self.bSUR, self.varb) # test on constancy of regression coefficients across equations if check_k(self.bigK): # only for equal number of variables self.surchow = sur_chow(self.n_eq, self.bigK, self.bSUR, self.varb) else: self.surchow = None # listing of results self.title = "SEEMINGLY UNRELATED REGRESSIONS (SUR) - GM SPATIAL ERROR MODEL" if regimes is not None: self.title = "SUR - GM SPATIAL ERROR MODEL WITH REGIMES" self.chow_regimes = {} varb_counter = 0 for r in range(self.n_eq): counter_end = varb_counter + self.bSUR[r].shape[0] self.chow_regimes[r] = REGI._chow_run( len(cols2regi_dic[r]), 0, 0, len(self.regimes_set), self.bSUR[r], self.varb[varb_counter:counter_end, varb_counter:counter_end], ) varb_counter = counter_end regimes = True SUMMARY.SUR( reg=self, nonspat_diag=nonspat_diag, spat_diag=spat_diag, lambd=True, ml=False, regimes=regimes, )
class BaseSURerrorML: """ Base class for SUR Error estimation by Maximum Likelihood requires: scipy.optimize.minimize_scalar and scipy.optimize.minimize Parameters ---------- bigy : dictionary with vectors of dependent variable, one for each equation bigX : dictionary with matrices of explanatory variables, one for each equation w : spatial weights object epsilon : float convergence criterion for ML iterations default 0.0000001 Attributes ---------- n : int number of observations in each cross-section n2 : int n/2 n_eq : int number of equations bigy : dictionary with vectors of dependent variable, one for each equation bigX : dictionary with matrices of explanatory variables, one for each equation bigK : array n_eq x 1 array with number of explanatory variables by equation bigylag : dictionary spatially lagged dependent variable bigXlag : dictionary spatially lagged explanatory variable lamols : array spatial autoregressive coefficients from equation by equation ML-Error estimation clikerr : float concentrated log-likelihood from equation by equation ML-Error estimation (no constant) bSUR0 : array SUR estimation for betas without spatial autocorrelation llik : float log-likelihood for classic SUR estimation (includes constant) lamsur : float spatial autoregressive coefficient in ML SUR Error bSUR : array beta coefficients in ML SUR Error varb : array variance of beta coefficients in ML SUR Error sig : array error variance-covariance matrix in ML SUR Error corr : array error correlation matrix bigE : array n by n_eq matrix of vectors of residuals for each equation cliksurerr : float concentrated log-likelihood from ML SUR Error (no constant) """ def __init__(self, bigy, bigX, w, epsilon=0.0000001): # setting up constants self.n = w.n self.n2 = self.n / 2.0 self.n_eq = len(bigy.keys()) WS = w.sparse I = sp.identity(self.n) # variables self.bigy = bigy self.bigX = bigX # number of variables by equation self.bigK = np.zeros((self.n_eq, 1), dtype=np.int_) for r in range(self.n_eq): self.bigK[r] = self.bigX[r].shape[1] # spatially lagged variables self.bigylag = {} for r in range(self.n_eq): self.bigylag[r] = WS @ self.bigy[r] # note: unlike WX as instruments, this includes the constant self.bigXlag = {} for r in range(self.n_eq): self.bigXlag[r] = WS @ self.bigX[r] # spatial parameter starting values lam = np.zeros((self.n_eq, 1)) # initialize as an array fun0 = 0.0 fun1 = 0.0 for r in range(self.n_eq): res = minimize_scalar( err_c_loglik_sp, 0.0, bounds=(-1.0, 1.0), args=( self.n, self.bigy[r], self.bigylag[r], self.bigX[r], self.bigXlag[r], I, WS, ), method="bounded", options={"xatol": epsilon}, ) lam[r] = res.x fun1 += res.fun self.lamols = lam self.clikerr = -fun1 # negative because use in min # SUR starting values reg0 = BaseSUR(self.bigy, self.bigX, iter=True) bigE = reg0.bigE self.bSUR0 = reg0.bSUR self.llik = reg0.llik # as is, includes constant # iteration lambdabounds = [(-1.0, +1.0) for i in range(self.n_eq)] while abs(fun0 - fun1) > epsilon: fun0 = fun1 sply = filter_dict(lam, self.bigy, self.bigylag) splX = filter_dict(lam, self.bigX, self.bigXlag) WbigE = WS @ bigE splbigE = bigE - WbigE * lam.T splXX, splXy = sur_crossprod(splX, sply) b1, varb1, sig1 = sur_est(splXX, splXy, splbigE, self.bigK) bigE = sur_resids(self.bigy, self.bigX, b1) res = minimize( clik, np.array(lam).flatten(), args=(self.n, self.n2, self.n_eq, bigE, I, WS), method="L-BFGS-B", bounds=lambdabounds, ) lam = res.x lam.resize((self.n_eq, 1)) fun1 = res.fun self.lamsur = lam self.bSUR = b1 self.varb = varb1 self.sig = sig1 self.corr = sur_corr(self.sig) self.bigE = bigE self.cliksurerr = -fun1 # negative because use in min, no constant
[docs] class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame): """ User class for SUR Error estimation by Maximum Likelihood Parameters ---------- bigy : list or dictionary list with the name of the dependent variable for each equation or dictionary with vectors for dependent variable by equation bigX : list or dictionary list of lists the name of the explanatory variables for each equation or dictionary with matrix of explanatory variables by equation (note, already includes constant term) w : spatial weights object db : Pandas DataFrame Optional. Required in case bigy and bigX are lists with names of variables regimes : list default = None. List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. epsilon : float convergence criterion for ML iterations. default 0.0000001 nonspat_diag : boolean flag for non-spatial diagnostics, default = True spat_diag : boolean flag for spatial diagnostics, default = False vm : boolean flag for asymptotic variance for lambda and Sigma, default = False name_bigy : dictionary with name of dependent variable for each equation. default = None, but should be specified is done when sur_stackxy is used name_bigX : dictionary with names of explanatory variables for each equation. default = None, but should be specified is done when sur_stackxy is used name_ds : string name for the data set name_w : string name for the weights file name_regimes : string name of regime variable for use in the output Attributes ---------- n : int number of observations in each cross-section n2 : int n/2 n_eq : int number of equations bigy : dictionary with vectors of dependent variable, one for each equation bigX : dictionary with matrices of explanatory variables, one for each equation bigK : array n_eq x 1 array with number of explanatory variables by equation bigylag : dictionary spatially lagged dependent variable bigXlag : dictionary spatially lagged explanatory variable lamols : array spatial autoregressive coefficients from equation by equation ML-Error estimation clikerr : float concentrated log-likelihood from equation by equation ML-Error estimation (no constant) bSUR0 : array SUR estimation for betas without spatial autocorrelation llik : float log-likelihood for classic SUR estimation (includes constant) lamsur : float spatial autoregressive coefficient in ML SUR Error bSUR : array beta coefficients in ML SUR Error varb : array variance of beta coefficients in ML SUR Error sig : array error variance-covariance matrix in ML SUR Error bigE : array n by n_eq matrix of vectors of residuals for each equation cliksurerr : float concentrated log-likelihood from ML SUR Error (no constant) sur_inf : array inference for regression coefficients, stand. error, t, p errllik : float log-likelihood for error model without SUR (with constant) surerrllik : float log-likelihood for SUR error model (with constant) lrtest : tuple likelihood ratio test for off-diagonal Sigma elements likrlambda : tuple likelihood ratio test on spatial autoregressive coefficients vm : array asymptotic variance matrix for lambda and Sigma (only for vm=True) lamsetp : array inference for lambda, stand. error, t, p (only for vm=True) lamtest : tuple with test for constancy of lambda across equations (test value, degrees of freedom, p-value) joinlam : tuple with test for joint significance of lambda across equations (test value, degrees of freedom, p-value) surchow : list with tuples for Chow test on regression coefficients. each tuple contains test value, degrees of freedom, p-value name_bigy : dictionary with name of dependent variable for each equation name_bigX : dictionary with names of explanatory variables for each equation name_ds : string name for the data set name_w : string name for the weights file name_regimes : string name of regime variable for use in the output Examples -------- >>> import libpysal >>> import geopandas as gpd >>> from spreg import SURerrorML Open data on NCOVR US County Homicides (3085 areas) from libpysal examples using geopandas. >>> nat = libpysal.examples.load_example('Natregimes') >>> df = gpd.read_file(nat.get_path("natregimes.shp")) The specification of the model to be estimated can be provided as lists. Each equation should be listed separately. In this example, equation 1 has HR80 as dependent variable and PS80 and UE80 as exogenous regressors. For equation 2, HR90 is the dependent variable, and PS90 and UE90 the exogenous regressors. >>> y_var = ['HR80','HR90'] >>> x_var = [['PS80','UE80'],['PS90','UE90']] To run a spatial error model, we need to specify the spatial weights matrix. To do that, we can open an already existing gal file or create a new one. In this example, we will create a new one from NAT.shp and transform it to row-standardized. >>> w = libpysal.weights.Queen.from_dataframe(df) >>> w.transform='r' We can now run the regression and then have a summary of the output by typing: print(reg.summary) Alternatively, we can just check the betas and standard errors, asymptotic t and p-value of the parameters: >>> reg = spreg.SURerrorML(y_var,x_var,w=w,df=df,name_ds="NAT",name_w="nat_queen") >>> reg.bSUR {0: array([[4.02228606], [0.88489637], [0.42402845]]), 1: array([[3.04923031], [1.10972632], [0.47075678]])} >>> reg.sur_inf {0: array([[ 0.36692175, 10.96224484, 0. ], [ 0.14129077, 6.26294545, 0. ], [ 0.04267954, 9.93516909, 0. ]]), 1: array([[ 0.33139967, 9.20106629, 0. ], [ 0.13352591, 8.31094381, 0. ], [ 0.04004097, 11.75687747, 0. ]])} """
[docs] def __init__( self, bigy, bigX, w, df=None, regimes=None, nonspat_diag=True, spat_diag=False, vm=False, epsilon=0.0000001, name_bigy=None, name_bigX=None, name_ds=None, name_w=None, name_regimes=None, ): if isinstance(bigy, list) or isinstance(bigX, list): if isinstance(bigy, list) and isinstance(bigX, list): if len(bigy) == len(bigX): if df is not None: bigy,bigX,name_bigy,name_bigX = sur_dictxy(df,bigy,bigX) else: raise Exception("Error: df argument is required if bigy and bigX are lists") else: raise Exception("Error: bigy and bigX must have the same number of elements") else: raise Exception("Error: bigy and bigX must be both lists or both dictionaries") # check on variable names for listing results self.name_ds = USER.set_name_ds(name_ds) self.name_w = USER.set_name_w(name_w, w) self.n_eq = len(bigy.keys()) # initialize names - should be generated by sur_stack if name_bigy: self.name_bigy = name_bigy else: # need to construct y names self.name_bigy = {} for r in range(self.n_eq): yn = "dep_var_" + str(r) self.name_bigy[r] = yn if name_bigX is None: name_bigX = {} for r in range(self.n_eq): k = bigX[r].shape[1] - 1 name_x = ["var_" + str(i + 1) + "_" + str(r + 1) for i in range(k)] ct = "Constant_" + str(r + 1) # NOTE: constant always included in X name_x.insert(0, ct) name_bigX[r] = name_x if regimes is not None: self.constant_regi = "many" self.cols2regi = "all" self.regime_err_sep = False self.name_regimes = USER.set_name_ds(name_regimes) self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes self.name_x_r = name_bigX cols2regi_dic = {} self.name_bigX = {} for r in range(self.n_eq): cols2regi_dic[r] = REGI.check_cols2regi( self.constant_regi, self.cols2regi, bigX[r], add_cons=False ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, constant_regi=None, cols2regi=cols2regi_dic[r], names=name_bigX[r], ) else: self.name_bigX = name_bigX # moved init here BaseSURerrorML.__init__(self, bigy=bigy, bigX=bigX, w=w, epsilon=epsilon) # inference self.sur_inf = sur_setp(self.bSUR, self.varb) # adjust concentrated log lik for constant const = -self.n2 * (self.n_eq * (1.0 + np.log(2.0 * np.pi))) self.errllik = const + self.clikerr self.surerrllik = const + self.cliksurerr # LR test on off-diagonal sigma if nonspat_diag: M = self.n_eq * (self.n_eq - 1) / 2.0 likrodiag = 2.0 * (self.surerrllik - self.errllik) plik1 = stats.chisqprob(likrodiag, M) self.lrtest = (likrodiag, int(M), plik1) else: self.lrtest = None # LR test on spatial autoregressive coefficients if spat_diag: liklambda = 2.0 * (self.surerrllik - self.llik) plik2 = stats.chisqprob(liklambda, self.n_eq) self.likrlambda = (liklambda, self.n_eq, plik2) else: self.likrlambda = None # asymptotic variance for spatial coefficient if vm: self.vm = surerrvm(self.n, self.n_eq, w, self.lamsur, self.sig) vlam = self.vm[: self.n_eq, : self.n_eq] self.lamsetp = lam_setp(self.lamsur, vlam) # test on constancy of lambdas R = REGI.buildR(kr=1, kf=0, nr=self.n_eq) w, p = REGI.wald_test(self.lamsur, R, np.zeros((R.shape[0], 1)), vlam) self.lamtest = (w, R.shape[0], p) if spat_diag: # test on joint significance of lambdas Rj = np.identity(self.n_eq) wj, pj = REGI.wald_test( self.lamsur, Rj, np.zeros((Rj.shape[0], 1)), vlam ) self.joinlam = (wj, Rj.shape[0], pj) else: self.joinlam = None else: self.vm = None self.lamsetp = None self.lamtest = None self.joinlam = None # test on constancy of regression coefficients across equations if check_k(self.bigK): # only for equal number of variables self.surchow = sur_chow(self.n_eq, self.bigK, self.bSUR, self.varb) else: self.surchow = None # listing of results self.title = "SEEMINGLY UNRELATED REGRESSIONS (SUR) - ML SPATIAL ERROR MODEL" if regimes is not None: self.title = "SUR - ML SPATIAL ERROR MODEL - REGIMES" self.chow_regimes = {} varb_counter = 0 for r in range(self.n_eq): counter_end = varb_counter + self.bSUR[r].shape[0] self.chow_regimes[r] = REGI._chow_run( len(cols2regi_dic[r]), 0, 0, len(self.regimes_set), self.bSUR[r], self.varb[varb_counter:counter_end, varb_counter:counter_end], ) varb_counter = counter_end regimes = True SUMMARY.SUR( reg=self, nonspat_diag=nonspat_diag, spat_diag=spat_diag, lambd=True, regimes=regimes, )
def jacob(lam, n_eq, I, WS): """Log-Jacobian for SUR Error model Parameters ---------- lam : array n_eq by 1 array of spatial autoregressive parameters n_eq : int number of equations I : sparse matrix sparse Identity matrix WS : sparse matrix sparse spatial weights matrix Returns ------- logjac : float the log Jacobian """ logjac = 0.0 for r in range(n_eq): lami = lam[r] lamWS = WS.multiply(lami) B = (I - lamWS).tocsc() LU = SuperLU(B) jj = np.sum(np.log(np.abs(LU.U.diagonal()))) logjac += jj return logjac def clik(lam, n, n2, n_eq, bigE, I, WS): """ Concentrated (negative) log-likelihood for SUR Error model Parameters ---------- lam : array n_eq x 1 array of spatial autoregressive parameters n : int number of observations in each cross-section n2 : int n/2 n_eq : int number of equations bigE : array n by n_eq matrix with vectors of residuals for each equation I : sparse Identity matrix WS : sparse spatial weights matrix Returns ------- -clik : float negative (for minimize) of the concentrated log-likelihood function """ WbigE = WS @ bigE spfbigE = bigE - WbigE * lam.T sig = np.dot(spfbigE.T, spfbigE) / n ldet = la.slogdet(sig)[1] logjac = jacob(lam, n_eq, I, WS) clik = -n2 * ldet + logjac return -clik # negative for minimize def surerrvm(n, n_eq, w, lam, sig): """ Asymptotic variance matrix for lambda and Sigma in ML SUR Error estimation Source: Anselin (1988) :cite:`Anselin1988`, Chapter 10. Parameters ---------- n : int number of cross-sectional observations n_eq : int number of equations w : spatial weights object lam : array n_eq by 1 vector with spatial autoregressive coefficients sig : array n_eq by n_eq matrix with cross-equation error covariances Returns ------- vm : array asymptotic variance-covariance matrix for spatial autoregressive coefficients and the upper triangular elements of Sigma n_eq + n_eq x (n_eq + 1) / 2 coefficients """ # inverse Sigma sigi = la.inv(sig) sisi = sigi * sig # elements of Psi_lam,lam # trace terms trDi = np.zeros((n_eq, 1)) trDDi = np.zeros((n_eq, 1)) trDTDi = np.zeros((n_eq, 1)) trDTiDj = np.zeros((n_eq, n_eq)) WS = w.sparse I = sp.identity(n) for i in range(n_eq): lami = lam[i][0] lamWS = WS.multiply(lami) B = I - lamWS bb = B.todense() Bi = la.inv(bb) D = WS @ Bi trDi[i] = np.trace(D) DD = np.dot(D, D) trDDi[i] = np.trace(DD) DD = np.dot(D.T, D) trDTDi[i] = np.trace(DD) for j in range(i + 1, n_eq): lamj = lam[j][0] lamWS = WS.multiply(lamj) B = I - lamWS bb = B.todense() Bi = la.inv(bb) Dj = WS @ Bi DD = np.dot(D.T, Dj) trDTiDj[i, j] = np.trace(DD) trDTiDj[j, i] = trDTiDj[i, j] np.fill_diagonal(trDTiDj, trDTDi) sisjT = sisi * trDTiDj Vll = np.diagflat(trDDi) + sisjT # elements of Psi_lam_sig P = int(n_eq * (n_eq + 1) / 2) # force ints to be ints tlist = [(i, j) for i in range(n_eq) for j in range(i, n_eq)] zog = sigi * trDi Vlsig = np.zeros((n_eq, P)) for i in range(n_eq): for j in range(n_eq): if i > j: jj = tlist.index((j, i)) else: jj = tlist.index((i, j)) Vlsig[i, jj] = zog[i, j] # top of Psi vtop = np.hstack((Vll, Vlsig)) # elements of Psi_sig_sig Vsig = np.zeros((P, P)) for ij in range(P): i, j = tlist[ij] for hk in range(P): h, k = tlist[hk] if i == j: if h == k: Vsig[ij, hk] = 0.5 * (sigi[i, h] ** 2) else: # h not equal to k Vsig[ij, hk] = sigi[i, h] * sigi[i, k] else: # i not equal to j if h == k: Vsig[ij, hk] = sigi[i, h] * sigi[j, h] else: # h not equal to k Vsig[ij, hk] = sigi[i, h] * sigi[j, k] + sigi[i, k] * sigi[j, h] Vsig = n * Vsig # bottom of Psi vbottom = np.hstack((Vlsig.T, Vsig)) # all of Psi vbig = np.vstack((vtop, vbottom)) # inverse of Psi vm = la.inv(vbig) return vm def _test(): import doctest start_suppress = np.get_printoptions()["suppress"] np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) if __name__ == "__main__": _test() import numpy as np import libpysal from .sur_utils import sur_dictxy, sur_dictZ from libpysal.examples import load_example from libpysal.weights import Queen nat = load_example("Natregimes") db = libpysal.io.open(nat.get_path("natregimes.dbf"), "r") y_var = ["HR80", "HR90"] x_var = [["PS80", "UE80"], ["PS90", "UE90"]] w = Queen.from_shapefile(nat.get_path("natregimes.shp")) w.transform = "r" bigy0, bigX0, bigyvars0, bigXvars0 = sur_dictxy(db, y_var, x_var) reg0 = SURerrorML( bigy0, bigX0, w, #regimes=regimes, name_bigy=bigyvars0, name_bigX=bigXvars0, name_w="natqueen", name_ds="natregimes", vm=True, nonspat_diag=True, spat_diag=True, ) # reg0 = SURerrorGM(bigy0,bigX0,w,regimes=regimes,name_bigy=bigyvars0,name_bigX=bigXvars0,\ # name_w="natqueen",name_ds="natregimes",vm=False,nonspat_diag=True,spat_diag=False) print(reg0.summary)