Source code for spreg.ml_lag

"""
ML Estimation of Spatial Lag Model
"""

__author__ = "Luc Anselin lanselin@gmail.com, \
              Pedro V. Amaral pedrovma@gmail.com, \
              Serge Rey srey@asu.edu, \
              Levi Wolf levi.john.wolf@gmail.com"

import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse.linalg import splu as SuperLU
from .utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn, get_lags
from .sputils import spdot, spfill_diagonal, spinv
from . import diagnostics as DIAG
from . import user_output as USER
import pandas as pd
from .output import output, _nonspat_top, _spat_diag_out, _spat_pseudo_r2, _summary_impacts
from .w_utils import symmetrize
from libpysal import weights

try:
    from scipy.optimize import minimize_scalar

    minimize_scalar_available = True
except ImportError:
    minimize_scalar_available = False

__all__ = ["ML_Lag"]


class BaseML_Lag(RegressionPropsY, RegressionPropsVM):

    """
    ML estimation of the spatial lag model (note no consistency
    checks, diagnostics or constants added) :cite:`Anselin1988`

    Parameters
    ----------
    y            : array
                   nx1 array for dependent variable
    x            : array
                   Two dimensional array with n rows and one column for each
                   independent (exogenous) variable, excluding the constant
    w            : pysal W object
                   Spatial weights object
    slx_lags     : integer
                   Number of spatial lags of X to include in the model specification.
                   If slx_lags>0, the specification becomes of the Spatial Durbin type.
    method       : string
                   if 'full', brute force calculation (full matrix expressions)
                   if 'ord', Ord eigenvalue method
                   if 'LU', LU sparse matrix decomposition
    epsilon      : float
                   tolerance criterion in mimimize_scalar function and inverse_product

    Attributes
    ----------
    betas        : array
                   (k+1)x1 array of estimated coefficients (rho first)
    rho          : float
                   estimate of spatial autoregressive coefficient
    u            : array
                   nx1 array of residuals
    predy        : array
                   nx1 array of predicted y values
    n            : integer
                   Number of observations
    k            : integer
                   Number of variables for which coefficients are estimated
                   (including the constant, excluding the rho)
    y            : array
                   nx1 array for dependent variable
    x            : array
                   Two dimensional array with n rows and one column for each
                   independent (exogenous) variable, including the constant
    method       : string
                   log Jacobian method
                   if 'full': brute force (full matrix computations)
                   if 'ord' : Ord eigenvalue method
    epsilon      : float
                   tolerance criterion used in minimize_scalar function and inverse_product
    mean_y       : float
                   Mean of dependent variable
    std_y        : float
                   Standard deviation of dependent variable
    vm           : array
                   Variance covariance matrix (k+1 x k+1)
    vm1          : array
                   Variance covariance matrix (k+2 x k+2) includes sigma2
    sig2         : float
                   Sigma squared used in computations
    logll        : float
                   maximized log-likelihood (including constant terms)
    predy_e      : array
                   predicted values from reduced form
    e_pred       : array
                   prediction errors using reduced form predicted values


    Examples
    --------

    >>> import numpy as np
    >>> import libpysal
    >>> from libpysal.examples import load_example
    >>> import geopandas as gpd
    >>> from libpysal.weights import Queen
    >>> import spreg
    >>> np.set_printoptions(suppress=True) #prevent scientific format
    >>> baltimore = load_example('Baltimore')
    >>> db =  libpysal.io.open(baltimore.get_path("baltim.dbf"),'r')
    >>> df = gpd.read_file(baltimore.get_path("baltim.shp"))
    >>> ds_name = "baltim.dbf"
    >>> y_name = "PRICE"
    >>> y = np.array(db.by_col(y_name)).T
    >>> y.shape = (len(y),1)
    >>> x_names = ["NROOM","NBATH","PATIO","FIREPL","AC","GAR","AGE","LOTSZ","SQFT"]
    >>> x = np.array([db.by_col(var) for var in x_names]).T
    >>> x = np.hstack((np.ones((len(y),1)),x))
    >>> w = Queen.from_dataframe(df)
    >>> w.transform = 'r'
    >>> w_name = "baltim_q.gal"
    >>> mllag = spreg.ml_lag.BaseML_Lag(y,x,w,method='ord') #doctest: +SKIP
    >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP
    '0.425885'
    >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP
    array([[ 4.3675],
           [ 0.7502],
           [ 5.6116],
           [ 7.0497],
           [ 7.7246],
           [ 6.1231],
           [ 4.6375],
           [-0.1107],
           [ 0.0679],
           [ 0.0794],
           [ 0.4259]])
    >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP
    '44.307180'
    >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP
    '23.606077'
    >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP
    array([  23.8716,    1.1222,    3.0593,    7.3416,    5.6695,    5.4698,
              2.8684,    0.0026,    0.0002,    0.0266,    0.0032,  220.1292])
    >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP
    array([ 23.8716,   1.1222,   3.0593,   7.3416,   5.6695,   5.4698,
             2.8684,   0.0026,   0.0002,   0.0266,   0.0032])
    >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP
    '151.458698'
    >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP
    '-832.937174'
    >>> mllag = spreg.ml_lag.BaseML_Lag(y,x,w) #doctest: +SKIP
    >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP
    '0.425885'
    >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP
    array([[ 4.3675],
           [ 0.7502],
           [ 5.6116],
           [ 7.0497],
           [ 7.7246],
           [ 6.1231],
           [ 4.6375],
           [-0.1107],
           [ 0.0679],
           [ 0.0794],
           [ 0.4259]])
    >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP
    '44.307180'
    >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP
    '23.606077'
    >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP
    array([  23.8716,    1.1222,    3.0593,    7.3416,    5.6695,    5.4698,
              2.8684,    0.0026,    0.0002,    0.0266,    0.0032,  220.1292])
    >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP
    array([ 23.8716,   1.1222,   3.0593,   7.3416,   5.6695,   5.4698,
             2.8684,   0.0026,   0.0002,   0.0266,   0.0032])
    >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP
    '151.458698'
    >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP
    '-832.937174'


    """

    def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001):
        # set up main regression variables and spatial filters
        self.y = y
        self.x = x
        self.method = method
        self.epsilon = epsilon
        # W = w.full()[0]
        # Wsp = w.sparse
        ylag = weights.lag_spatial(w, y)
        # b0, b1, e0 and e1

# now set in ML_Lag
#        if slx_lags>0:
#            self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags)))

        self.n, self.k = self.x.shape
        xtx = spdot(self.x.T, self.x)
        xtxi = la.inv(xtx)
        xty = spdot(self.x.T, self.y)
        xtyl = spdot(self.x.T, ylag)
        b0 = spdot(xtxi, xty)
        b1 = spdot(xtxi, xtyl)
        e0 = self.y - spdot(self.x, b0)
        e1 = ylag - spdot(self.x, b1)
        methodML = method.upper()
        # call minimizer using concentrated log-likelihood to get rho
        if methodML in ["FULL", "LU", "ORD"]:
            if methodML == "FULL":
                W = w.full()[0]  # moved here
                res = minimize_scalar(
                    lag_c_loglik,
                    0.0,
                    bounds=(-1.0, 1.0),
                    args=(self.n, e0, e1, W),
                    method="bounded",
                    options={'xatol': epsilon},
                )
            elif methodML == "LU":
                I = sp.identity(w.n)
                Wsp = w.sparse  # moved here
                W = Wsp#.tocsc()
                res = minimize_scalar(
                    lag_c_loglik_sp,
                    0.0,
                    bounds=(-1.0, 1.0),
                    args=(self.n, e0, e1, I, Wsp),
                    method="bounded",
                    options={'xatol': epsilon},
                )
            elif methodML == "ORD":
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = np.array(ww.todense())
                    evals = la.eigvalsh(WW)
                    W = WW
                else:
                    W = w.full()[0]  # moved here
                    evals = la.eigvals(W)
                res = minimize_scalar(
                    lag_c_loglik_ord,
                    0.0,
                    bounds=(-1.0, 1.0),
                    args=(self.n, e0, e1, evals),
                    method="bounded",
                    options={'xatol': epsilon},
                )
        else:
            # program will crash, need to catch
            print(("{0} is an unsupported method".format(methodML)))
            self = None
            return

        self.rho = res.x[0][0]

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0
        self.logll = llik[0][0]

        # b, residuals and predicted values

        b = b0 - self.rho * b1
        self.betas = np.vstack((b, self.rho))  # rho added as last coefficient
        self.u = e0 - self.rho * e1
        self.predy = self.y - self.u

        xb = spdot(self.x, b)

        self.predy_e = inverse_prod(
            w.sparse, xb, self.rho, inv_method="power_exp", threshold=epsilon
        )
        self.e_pred = self.y - self.predy_e

        # residual variance
        self._cache = {}
        self.sig2 = self.sig2n  # no allowance for division by n-k

        # information matrix
        # if w should be kept sparse, how can we do the following:
        a = -self.rho * W
        spfill_diagonal(a, 1.0)
        ai = spinv(a)
        wai = spdot(W, ai)
        tr1 = wai.diagonal().sum()  # same for sparse and dense

        wai2 = spdot(wai, wai)
        tr2 = wai2.diagonal().sum()

        waiTwai = spdot(wai.T, wai)
        tr3 = waiTwai.diagonal().sum()
        ### to here

        wpredy = weights.lag_spatial(w, self.predy_e)
        wpyTwpy = spdot(wpredy.T, wpredy)
        xTwpy = spdot(self.x.T, wpredy)

        # order of variables is beta, rho, sigma2

        v1 = np.vstack((xtx / self.sig2, xTwpy.T / self.sig2, np.zeros((1, self.k))))
        v2 = np.vstack(
            (xTwpy / self.sig2, tr2 + tr3 + wpyTwpy / self.sig2, tr1 / self.sig2)
        )
        v3 = np.vstack(
            (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2))
        )

        v = np.hstack((v1, v2, v3))

        self.vm1 = la.inv(v)  # vm1 includes variance for sigma2
        self.vm = self.vm1[:-1, :-1]  # vm is for coefficients only


[docs] class ML_Lag(BaseML_Lag): """ ML estimation of the spatial lag model with all results and diagnostics; :cite:`Anselin1988` Parameters ---------- y : numpy.ndarray or pandas.Series nx1 array for dependent variable x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the Spatial Durbin type. slx_vars : either "All" (default) or list of booleans to select x variables to be lagged regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. For other regimes-specific arguments, see ML_Lag_Regimes method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method epsilon : float tolerance criterion in mimimize_scalar function and inverse_product spat_diag : boolean If True, then compute Common Factor Hypothesis test when applicable spat_impacts : string or list Include average direct impact (ADI), average indirect impact (AII), and average total impact (ATI) in summary results. Options are 'simple', 'full', 'power', 'all' or None. See sputils.spmultiplier for more information. vm : boolean if True, include variance-covariance matrix in summary results name_y : string Name of dependent variable for use in output name_x : list of strings Names of independent variables for use in output name_w : string Name of weights matrix for use in output name_ds : string Name of dataset for use in output latex : boolean Specifies if summary is to be printed in latex format Attributes ---------- output : dataframe regression results pandas dataframe betas : array (k+1)x1 array of estimated coefficients (rho first) rho : float estimate of spatial autoregressive coefficient u : array nx1 array of residuals predy : array nx1 array of predicted y values n : integer Number of observations k : integer Number of variables for which coefficients are estimated (including the constant, excluding the rho) y : array nx1 array for dependent variable x : array Two dimensional array with n rows and one column for each independent (exogenous) variable, including the constant method : string log Jacobian method if 'full': brute force (full matrix computations) epsilon : float tolerance criterion used in minimize_scalar function and inverse_product mean_y : float Mean of dependent variable std_y : float Standard deviation of dependent variable vm : array Variance covariance matrix (k+1 x k+1), all coefficients vm1 : array Variance covariance matrix (k+2 x k+2), includes sig2 sig2 : float Sigma squared used in computations logll : float maximized log-likelihood (including constant terms) aic : float Akaike information criterion schwarz : float Schwarz criterion cfh_test : tuple Common Factor Hypothesis test; tuple contains the pair (statistic, p-value). Only when it applies (see specific documentation). predy_e : array predicted values from reduced form e_pred : array prediction errors using reduced form predicted values pr2 : float Pseudo R squared (squared correlation between y and ypred) pr2_e : float Pseudo R squared (squared correlation between y and ypred_e (using reduced form)) utu : float Sum of squared residuals std_err : array 1xk array of standard errors of the betas z_stat : list of tuples z statistic; each tuple contains the pair (statistic, p-value), where each is a float sp_multipliers: dict Dictionary of spatial multipliers (if spat_impacts is not None) name_y : string Name of dependent variable for use in output name_x : list of strings Names of independent variables for use in output name_w : string Name of weights matrix for use in output name_ds : string Name of dataset for use in output title : string Name of the regression method used Examples -------- >>> import numpy as np >>> import libpysal >>> from libpysal.examples import load_example >>> from libpysal.weights import Queen >>> from spreg import ML_Error_Regimes >>> import geopandas as gpd >>> from spreg import ML_Lag >>> np.set_printoptions(suppress=True) #prevent scientific format >>> baltimore = load_example('Baltimore') >>> db = libpysal.io.open(baltimore.get_path("baltim.dbf"),'r') >>> df = gpd.read_file(baltimore.get_path("baltim.shp")) >>> ds_name = "baltim.dbf" >>> y_name = "PRICE" >>> y = np.array(db.by_col(y_name)).T >>> y.shape = (len(y),1) >>> x_names = ["NROOM","NBATH","PATIO","FIREPL","AC","GAR","AGE","LOTSZ","SQFT"] >>> x = np.array([db.by_col(var) for var in x_names]).T >>> w = Queen.from_dataframe(df) >>> w_name = "baltim_q.gal" >>> w.transform = 'r' >>> mllag = ML_Lag(y,x,w,name_y=y_name,name_x=x_names,\ name_w=w_name,name_ds=ds_name) #doctest: +SKIP >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP array([[ 4.3675], [ 0.7502], [ 5.6116], [ 7.0497], [ 7.7246], [ 6.1231], [ 4.6375], [-0.1107], [ 0.0679], [ 0.0794], [ 0.4259]]) >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP '0.425885' >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP '44.307180' >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP '23.606077' >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP array([ 23.8716, 1.1222, 3.0593, 7.3416, 5.6695, 5.4698, 2.8684, 0.0026, 0.0002, 0.0266, 0.0032, 220.1292]) >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP array([ 23.8716, 1.1222, 3.0593, 7.3416, 5.6695, 5.4698, 2.8684, 0.0026, 0.0002, 0.0266, 0.0032]) >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP '151.458698' >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP '-832.937174' >>> "{0:.6f}".format(mllag.aic) #doctest: +SKIP '1687.874348' >>> "{0:.6f}".format(mllag.schwarz) #doctest: +SKIP '1724.744787' >>> "{0:.6f}".format(mllag.pr2) #doctest: +SKIP '0.727081' >>> "{0:.4f}".format(mllag.pr2_e) #doctest: +SKIP '0.7062' >>> "{0:.4f}".format(mllag.utu) #doctest: +SKIP '31957.7853' >>> np.around(mllag.std_err, decimals=4) #doctest: +SKIP array([ 4.8859, 1.0593, 1.7491, 2.7095, 2.3811, 2.3388, 1.6936, 0.0508, 0.0146, 0.1631, 0.057 ]) >>> np.around(mllag.z_stat, decimals=4) #doctest: +SKIP array([[ 0.8939, 0.3714], [ 0.7082, 0.4788], [ 3.2083, 0.0013], [ 2.6018, 0.0093], [ 3.2442, 0.0012], [ 2.6181, 0.0088], [ 2.7382, 0.0062], [-2.178 , 0.0294], [ 4.6487, 0. ], [ 0.4866, 0.6266], [ 7.4775, 0. ]]) >>> mllag.name_y #doctest: +SKIP 'PRICE' >>> mllag.name_x #doctest: +SKIP ['CONSTANT', 'NROOM', 'NBATH', 'PATIO', 'FIREPL', 'AC', 'GAR', 'AGE', 'LOTSZ', 'SQFT', 'W_PRICE'] >>> mllag.name_w #doctest: +SKIP 'baltim_q.gal' >>> mllag.name_ds #doctest: +SKIP 'baltim.dbf' >>> mllag.title #doctest: +SKIP 'MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = FULL)' >>> mllag = ML_Lag(y,x,w,method='ord',name_y=y_name,name_x=x_names,\ name_w=w_name,name_ds=ds_name) #doctest: +SKIP >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP array([[ 4.3675], [ 0.7502], [ 5.6116], [ 7.0497], [ 7.7246], [ 6.1231], [ 4.6375], [-0.1107], [ 0.0679], [ 0.0794], [ 0.4259]]) >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP '0.425885' >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP '44.307180' >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP '23.606077' >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP array([ 23.8716, 1.1222, 3.0593, 7.3416, 5.6695, 5.4698, 2.8684, 0.0026, 0.0002, 0.0266, 0.0032, 220.1292]) >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP array([ 23.8716, 1.1222, 3.0593, 7.3416, 5.6695, 5.4698, 2.8684, 0.0026, 0.0002, 0.0266, 0.0032]) >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP '151.458698' >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP '-832.937174' >>> "{0:.6f}".format(mllag.aic) #doctest: +SKIP '1687.874348' >>> "{0:.6f}".format(mllag.schwarz) #doctest: +SKIP '1724.744787' >>> "{0:.6f}".format(mllag.pr2) #doctest: +SKIP '0.727081' >>> "{0:.6f}".format(mllag.pr2_e) #doctest: +SKIP '0.706198' >>> "{0:.4f}".format(mllag.utu) #doctest: +SKIP '31957.7853' >>> np.around(mllag.std_err, decimals=4) #doctest: +SKIP array([ 4.8859, 1.0593, 1.7491, 2.7095, 2.3811, 2.3388, 1.6936, 0.0508, 0.0146, 0.1631, 0.057 ]) >>> np.around(mllag.z_stat, decimals=4) #doctest: +SKIP array([[ 0.8939, 0.3714], [ 0.7082, 0.4788], [ 3.2083, 0.0013], [ 2.6018, 0.0093], [ 3.2442, 0.0012], [ 2.6181, 0.0088], [ 2.7382, 0.0062], [-2.178 , 0.0294], [ 4.6487, 0. ], [ 0.4866, 0.6266], [ 7.4775, 0. ]]) >>> mllag.name_y #doctest: +SKIP 'PRICE' >>> mllag.name_x #doctest: +SKIP ['CONSTANT', 'NROOM', 'NBATH', 'PATIO', 'FIREPL', 'AC', 'GAR', 'AGE', 'LOTSZ', 'SQFT', 'W_PRICE'] >>> mllag.name_w #doctest: +SKIP 'baltim_q.gal' >>> mllag.name_ds #doctest: +SKIP 'baltim.dbf' >>> mllag.title #doctest: +SKIP 'MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = ORD)' """
[docs] def __init__( self, y, x, w, slx_lags=0, slx_vars="All", regimes=None, method="full", epsilon=0.0000001, spat_impacts="simple", vm=False, spat_diag=True, name_y=None, name_x=None, name_w=None, name_ds=None, latex=False, **kwargs ): if regimes is not None: from .ml_lag_regimes import ML_Lag_Regimes self.__class__ = ML_Lag_Regimes self.__init__( y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon, spat_impacts=spat_impacts, vm=vm, spat_diag=spat_diag, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs ) else: n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # needs to be initialized for none, now with constant set_warn(self, warn) method = method.upper() # using flex_wx kx = len(name_x) if slx_lags > 0: x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, slx_lags=slx_lags,slx_vars=slx_vars) if isinstance(slx_vars,list): kw = slx_vars.count(True) if kw < kx - 1: spat_diag = False # no common factor test else: kw = kx-1 BaseML_Lag.__init__( self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon ) # increase by 1 to have correct aic and sc, include rho in count self.k += 1 if slx_lags>0: # kx = len(name_x) # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")" var_types = ['o'] + ['x'] * (kx-1) + ['wx'] * (kw) * slx_lags + ['rho'] else: self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" var_types = ['o'] + ['x'] * (len(name_x)-1) + ['rho'] self.slx_lags = slx_lags self.slx_vars = slx_vars self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = name_x # already has constant name_ylag = USER.set_name_yend_sp(self.name_y) self.name_x.append(name_ylag) # rho changed to last position self.name_w = USER.set_name_w(name_w, w) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) self.output = pd.DataFrame(self.name_x, columns=['var_names']) self.output['var_type'] = var_types self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _spat_pseudo_r2(self) self.other_top += _nonspat_top(self, ml=True) diag_out = None if spat_diag and slx_lags==1: diag_out = _spat_diag_out(self, w, 'yend', ml=True) if spat_impacts: self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) try: diag_out += impacts_str except TypeError: diag_out = impacts_str output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex)
def lag_c_loglik(rho, n, e0, e1, W): # concentrated log-lik for lag model, no constants, brute force er = e0 - rho * e1 sig2 = spdot(er.T, er) / n nlsig2 = (n / 2.0) * np.log(sig2) a = -rho * W spfill_diagonal(a, 1.0) jacob = np.log(np.linalg.det(a)) # this is the negative of the concentrated log lik for minimization clik = nlsig2 - jacob return clik def lag_c_loglik_sp(rho, n, e0, e1, I, Wsp): # concentrated log-lik for lag model, sparse algebra if isinstance(rho, np.ndarray): if rho.shape == (1, 1): rho = rho[0][0] # why does the interior value change? er = e0 - rho * e1 sig2 = spdot(er.T, er) / n nlsig2 = (n / 2.0) * np.log(sig2) a = I - rho * Wsp LU = SuperLU(a.tocsc()) jacob = np.sum(np.log(np.abs(LU.U.diagonal()))) clike = nlsig2 - jacob return clike def lag_c_loglik_ord(rho, n, e0, e1, evals): # concentrated log-lik for lag model, no constants, Ord eigenvalue method er = e0 - rho * e1 sig2 = spdot(er.T, er) / n nlsig2 = (n / 2.0) * np.log(sig2) revals = rho * evals jacob = np.log(1 - revals).sum() if isinstance(jacob, complex): jacob = jacob.real # this is the negative of the concentrated log lik for minimization clik = nlsig2 - jacob return clik def _test(): import doctest start_suppress = np.get_printoptions()["suppress"] np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) if __name__ == "__main__": _test() import numpy as np import libpysal db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y_var = "CRIME" y = np.array([db.by_col(y_var)]).reshape(49, 1) x_var = ["INC"] x = np.array([db.by_col(name) for name in x_var]).T w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) w.transform = "r" model = ML_Lag( y, x, w=w, vm=False, name_y=y_var, name_x=x_var, name_ds="columbus", name_w="columbus.gal", ) print(model.output) print(model.summary)