Source code for spreg.diagnostics_sur

"""
Diagnostics for SUR and 3SLS estimation
"""

__author__ = "Luc Anselin lanselin@gmail.com,    \
             Pedro V. Amaral pedrovma@gmail.com  \
             Tony Aburaad taburaad@uchicago.edu"


import numpy as np
import scipy.stats as stats
import numpy.linalg as la
from .sur_utils import sur_dict2mat, sur_mat2dict, sur_corr, spdot
from .regimes import buildR1var, wald_test


__all__ = ["sur_setp", "sur_lrtest", "sur_lmtest", "lam_setp", "surLMe", "surLMlag"]



[docs]
def sur_setp(bigB, varb):
    """
    Utility to compute standard error, t and p-value

    Parameters
    ----------
    bigB    : dictionary
              of regression coefficient estimates,
              one vector by equation
    varb    : array
              variance-covariance matrix of coefficients

    Returns
    -------
    surinfdict : dictionary
                 with standard error, t-value, and
                 p-value array, one for each equation

    """
    vvb = varb.diagonal()
    n_eq = len(bigB.keys())
    bigK = np.zeros((n_eq, 1), dtype=np.int_)
    for r in range(n_eq):
        bigK[r] = bigB[r].shape[0]
    b = sur_dict2mat(bigB)
    se = np.sqrt(vvb)
    se.resize(len(se), 1)
    t = np.divide(b, se)
    tp = stats.norm.sf(abs(t)) * 2
    surinf = np.hstack((se, t, tp))
    surinfdict = sur_mat2dict(surinf, bigK)
    return surinfdict




[docs]
def lam_setp(lam, vm):
    """
    Standard errors, t-test and p-value for lambda in SUR Error ML

    Parameters
    ----------
    lam        : array
                 n_eq x 1 array with ML estimates for spatial error
                 autoregressive coefficient
    vm         : array
                 n_eq x n_eq subset of variance-covariance matrix for
                 lambda and Sigma in SUR Error ML
                 (needs to be subset from full vm)

    Returns
    -------
               : tuple
                 with arrays for standard error, t-value and p-value
                 (each element in the tuple is an n_eq x 1 array)

    """
    vvb = vm.diagonal()
    se = np.sqrt(vvb)
    se.resize(len(se), 1)
    t = np.divide(lam, se)
    tp = stats.norm.sf(abs(t)) * 2
    return (se, t, tp)




[docs]
def sur_lrtest(n, n_eq, ldetS0, ldetS1):
    """
    Likelihood Ratio test on off-diagonal elements of Sigma

    Parameters
    ----------
    n        : int
               cross-sectional dimension (number of observations for an equation)
    n_eq     : int
               number of equations
    ldetS0   : float
               log determinant of Sigma for OLS case
    ldetS1   : float
               log determinant of Sigma for SUR case (should be iterated)

    Returns
    -------
    (lrtest,M,pvalue) : tuple
                        with value of test statistic (lrtest),
                        degrees of freedom (M, as an integer)
                        p-value

    """
    M = n_eq * (n_eq - 1) / 2.0
    lrtest = n * (ldetS0 - ldetS1)
    pvalue = stats.chi2.sf(lrtest, M)
    return (lrtest, int(M), pvalue)




[docs]
def sur_lmtest(n, n_eq, sig):
    """
    Lagrange Multiplier test on off-diagonal elements of Sigma

    Parameters
    ----------
    n        : int
               cross-sectional dimension (number of observations for an equation)
    n_eq     : int
               number of equations
    sig      : array
               inter-equation covariance matrix for null model (OLS)

    Returns
    -------
    (lmtest,M,pvalue) : tuple
                        with value of test statistic (lmtest),
                        degrees of freedom (M, as an integer)
                        p-value
    """
    R = sur_corr(sig)
    tr = np.trace(np.dot(R.T, R))
    M = n_eq * (n_eq - 1) / 2.0
    lmtest = (n / 2.0) * (tr - n_eq)
    pvalue = stats.chi2.sf(lmtest, M)
    return (lmtest, int(M), pvalue)




[docs]
def surLMe(n_eq, WS, bigE, sig):
    """
    Lagrange Multiplier test on error spatial autocorrelation in SUR

    Parameters
    ----------
    n_eq       : int
                 number of equations
    WS         : array
                 spatial weights matrix in sparse form
    bigE       : array
                 n x n_eq matrix of residuals by equation
    sig        : array
                 cross-equation error covariance matrix

    Returns
    -------
    (LMe,n_eq,pvalue) : tuple
                        with value of statistic (LMe), degrees
                        of freedom (n_eq) and p-value

    """
    # spatially lagged residuals
    WbigE = WS @ bigE
    # score
    EWE = np.dot(bigE.T, WbigE)
    sigi = la.inv(sig)
    SEWE = sigi * EWE
    # score = SEWE.sum(axis=1)
    # score.resize(n_eq,1)
    # note score is column sum of Sig_i * E'WE, a 1 by n_eq row vector
    # previously stored as column
    score = SEWE.sum(axis=0)
    score.resize(1, n_eq)

    # trace terms
    WW = WS @ WS
    trWW = np.sum(WW.diagonal())
    WTW = WS.T @ WS
    trWtW = np.sum(WTW.diagonal())
    # denominator
    SiS = sigi * sig
    Tii = trWW * np.identity(n_eq)
    tSiS = trWtW * SiS
    denom = Tii + tSiS
    idenom = la.inv(denom)
    # test statistic
    # LMe = np.dot(np.dot(score.T,idenom),score)[0][0]
    # score is now row vector
    LMe = np.dot(np.dot(score, idenom), score.T)[0][0]
    pvalue = stats.chi2.sf(LMe, n_eq)
    return (LMe, n_eq, pvalue)




[docs]
def surLMlag(n_eq, WS, bigy, bigX, bigE, bigYP, sig, varb):
    """
    Lagrange Multiplier test on lag spatial autocorrelation in SUR

    Parameters
    ----------
    n_eq       : int
                 number of equations
    WS         : spatial weights matrix in sparse form
    bigy       : dictionary
                 with y values
    bigX       : dictionary
                 with X values
    bigE       : array
                 n x n_eq matrix of residuals by equation
    bigYP      : array
                 n x n_eq matrix of predicted values by equation
    sig        : array
                 cross-equation error covariance matrix
    varb       : array
                 variance-covariance matrix for b coefficients (inverse of Ibb)

    Returns
    -------
    (LMlag,n_eq,pvalue) : tuple
                          with value of statistic (LMlag), degrees
                          of freedom (n_eq) and p-value

    """
    # Score
    Y = np.hstack([bigy[r] for r in range(n_eq)])
    WY = WS @ Y
    EWY = np.dot(bigE.T, WY)
    sigi = la.inv(sig)
    SEWE = sigi @ EWY
    score = SEWE.sum(axis=0)  # column sums
    score.resize(1, n_eq)  # score as a row vector

    # I(rho,rho) as partitioned inverse, eq 72
    # trace terms
    WW = WS @ WS
    trWW = np.sum(WW.diagonal())  # T1
    WTW = WS.T @ WS
    trWtW = np.sum(WTW.diagonal())  # T2

    # I(rho,rho)
    SiS = sigi * sig
    Tii = trWW * np.identity(n_eq)  # T1It
    tSiS = trWtW * SiS
    firstHalf = Tii + tSiS
    WbigYP = WS @ bigYP
    inner = np.dot(WbigYP.T, WbigYP)
    secondHalf = sigi * inner
    Ipp = firstHalf + secondHalf  # eq. 75

    # I(b,b) inverse is varb

    # I(b,rho)
    bp = sigi[0,] * spdot(bigX[0].T, WbigYP)  # initialize
    for r in range(1, n_eq):
        bpwork = sigi[r,] * spdot(bigX[r].T, WbigYP)
        bp = np.vstack((bp, bpwork))
    # partitioned part
    i_inner = Ipp - np.dot(np.dot(bp.T, varb), bp)
    # partitioned inverse of information matrix
    Ippi = la.inv(i_inner)

    # test statistic
    LMlag = np.dot(np.dot(score, Ippi), score.T)[0][0]
    # p-value
    pvalue = stats.chi2.sf(LMlag, n_eq)
    return (LMlag, n_eq, pvalue)



def sur_chow(n_eq, bigK, bSUR, varb):
    """
    test on constancy of regression coefficients across equations in
    a SUR specification

    Note: requires a previous check on constancy of number of coefficients
    across equations; no other checks are carried out, so it is possible
    that the results are meaningless if the variables are not listed in
    the same order in each equation.

    Parameters
    ----------
    n_eq       : int
                 number of equations
    bigK       : array
                 with the number of variables by equation (includes constant)
    bSUR       : dictionary
                 with the SUR regression coefficients by equation
    varb       : array
                 the variance-covariance matrix for the SUR regression
                 coefficients

    Returns
    -------
    test       : array
                 a list with for each coefficient (in order) a tuple with the
                 value of the test statistic, the degrees of freedom, and the
                 p-value

    """
    kr = bigK[0][0]
    test = []
    bb = sur_dict2mat(bSUR)
    kf = 0
    nr = n_eq
    df = n_eq - 1
    for i in range(kr):
        Ri = buildR1var(i, kr, kf, 0, nr)
        tt, p = wald_test(bb, Ri, np.zeros((df, 1)), varb)
        test.append((tt, df, p))
    return test


def sur_joinrho(n_eq, bigK, bSUR, varb):
    """
    Test on joint significance of spatial autoregressive coefficient in SUR

    Parameters
    ----------
    n_eq       : int
                 number of equations
    bigK       : array
                 n_eq x 1 array with number of variables by equation
                 (includes constant term, exogenous and endogeneous and
                 spatial lag)
    bSUR       : dictionary
                 with regression coefficients by equation, with
                 the spatial autoregressive term as last
    varb       : array
                 variance-covariance matrix for regression coefficients

    Returns
    -------
               : tuple
                 with test statistic, degrees of freedom, p-value

    """
    bb = sur_dict2mat(bSUR)
    R = np.zeros((n_eq, varb.shape[0]))
    q = np.zeros((n_eq, 1))
    kc = -1
    for i in range(n_eq):
        kc = kc + bigK[i]
        R[i, kc] = 1
    w, p = wald_test(bb, R, q, varb)
    return (w, n_eq, p)