Source code for spreg.ml_lag

"""
ML Estimation of Spatial Lag Model
"""

__author__ = "Luc Anselin lanselin@gmail.com, \
              Pedro V. Amaral pedrovma@gmail.com, \
              Serge Rey srey@asu.edu, \
              Levi Wolf levi.john.wolf@gmail.com"

import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse.linalg import splu as SuperLU
from .utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn, get_lags
from .sputils import spdot, spfill_diagonal, spinv
from . import diagnostics as DIAG
from . import user_output as USER
import pandas as pd
from .output import output, _nonspat_top, _spat_diag_out, _spat_pseudo_r2, _summary_impacts
from .w_utils import symmetrize
from libpysal import weights

try:
    from scipy.optimize import minimize_scalar

    minimize_scalar_available = True
except ImportError:
    minimize_scalar_available = False

__all__ = ["ML_Lag"]


class BaseML_Lag(RegressionPropsY, RegressionPropsVM):

    """
    ML estimation of the spatial lag model (note no consistency
    checks, diagnostics or constants added) :cite:`Anselin1988`

    Parameters
    ----------
    y            : array
                   nx1 array for dependent variable
    x            : array
                   Two dimensional array with n rows and one column for each
                   independent (exogenous) variable, excluding the constant
    w            : pysal W object
                   Spatial weights object
    slx_lags     : integer
                   Number of spatial lags of X to include in the model specification.
                   If slx_lags>0, the specification becomes of the Spatial Durbin type.
    method       : string
                   if 'full', brute force calculation (full matrix expressions)
                   if 'ord', Ord eigenvalue method
                   if 'LU', LU sparse matrix decomposition
    epsilon      : float
                   tolerance criterion in mimimize_scalar function and inverse_product

    Attributes
    ----------
    betas        : array
                   (k+1)x1 array of estimated coefficients (rho first)
    rho          : float
                   estimate of spatial autoregressive coefficient
    u            : array
                   nx1 array of residuals
    predy        : array
                   nx1 array of predicted y values
    n            : integer
                   Number of observations
    k            : integer
                   Number of variables for which coefficients are estimated
                   (including the constant, excluding the rho)
    y            : array
                   nx1 array for dependent variable
    x            : array
                   Two dimensional array with n rows and one column for each
                   independent (exogenous) variable, including the constant
    method       : string
                   log Jacobian method
                   if 'full': brute force (full matrix computations)
                   if 'ord' : Ord eigenvalue method
    epsilon      : float
                   tolerance criterion used in minimize_scalar function and inverse_product
    mean_y       : float
                   Mean of dependent variable
    std_y        : float
                   Standard deviation of dependent variable
    vm           : array
                   Variance covariance matrix (k+1 x k+1)
    vm1          : array
                   Variance covariance matrix (k+2 x k+2) includes sigma2
    sig2         : float
                   Sigma squared used in computations
    logll        : float
                   maximized log-likelihood (including constant terms)
    predy_e      : array
                   predicted values from reduced form
    e_pred       : array
                   prediction errors using reduced form predicted values


    Examples
    --------

    >>> import numpy as np
    >>> import libpysal
    >>> from libpysal.examples import load_example
    >>> import geopandas as gpd
    >>> from libpysal.weights import Queen
    >>> import spreg
    >>> np.set_printoptions(suppress=True) #prevent scientific format
    >>> baltimore = load_example('Baltimore')
    >>> db =  libpysal.io.open(baltimore.get_path("baltim.dbf"),'r')
    >>> df = gpd.read_file(baltimore.get_path("baltim.shp"))
    >>> ds_name = "baltim.dbf"
    >>> y_name = "PRICE"
    >>> y = np.array(db.by_col(y_name)).T
    >>> y.shape = (len(y),1)
    >>> x_names = ["NROOM","NBATH","PATIO","FIREPL","AC","GAR","AGE","LOTSZ","SQFT"]
    >>> x = np.array([db.by_col(var) for var in x_names]).T
    >>> x = np.hstack((np.ones((len(y),1)),x))
    >>> w = Queen.from_dataframe(df)
    >>> w.transform = 'r'
    >>> w_name = "baltim_q.gal"
    >>> mllag = spreg.ml_lag.BaseML_Lag(y,x,w,method='ord') #doctest: +SKIP
    >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP
    '0.425885'
    >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP
    array([[ 4.3675],
           [ 0.7502],
           [ 5.6116],
           [ 7.0497],
           [ 7.7246],
           [ 6.1231],
           [ 4.6375],
           [-0.1107],
           [ 0.0679],
           [ 0.0794],
           [ 0.4259]])
    >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP
    '44.307180'
    >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP
    '23.606077'
    >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP
    array([  23.8716,    1.1222,    3.0593,    7.3416,    5.6695,    5.4698,
              2.8684,    0.0026,    0.0002,    0.0266,    0.0032,  220.1292])
    >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP
    array([ 23.8716,   1.1222,   3.0593,   7.3416,   5.6695,   5.4698,
             2.8684,   0.0026,   0.0002,   0.0266,   0.0032])
    >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP
    '151.458698'
    >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP
    '-832.937174'
    >>> mllag = spreg.ml_lag.BaseML_Lag(y,x,w) #doctest: +SKIP
    >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP
    '0.425885'
    >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP
    array([[ 4.3675],
           [ 0.7502],
           [ 5.6116],
           [ 7.0497],
           [ 7.7246],
           [ 6.1231],
           [ 4.6375],
           [-0.1107],
           [ 0.0679],
           [ 0.0794],
           [ 0.4259]])
    >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP
    '44.307180'
    >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP
    '23.606077'
    >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP
    array([  23.8716,    1.1222,    3.0593,    7.3416,    5.6695,    5.4698,
              2.8684,    0.0026,    0.0002,    0.0266,    0.0032,  220.1292])
    >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP
    array([ 23.8716,   1.1222,   3.0593,   7.3416,   5.6695,   5.4698,
             2.8684,   0.0026,   0.0002,   0.0266,   0.0032])
    >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP
    '151.458698'
    >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP
    '-832.937174'


    """

    def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001):
        # set up main regression variables and spatial filters
        self.y = y
        self.x = x
        self.method = method
        self.epsilon = epsilon
        # W = w.full()[0]
        # Wsp = w.sparse
        ylag = weights.lag_spatial(w, y)
        # b0, b1, e0 and e1

# now set in ML_Lag
#        if slx_lags>0:
#            self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags)))

        self.n, self.k = self.x.shape
        xtx = spdot(self.x.T, self.x)
        xtxi = la.inv(xtx)
        xty = spdot(self.x.T, self.y)
        xtyl = spdot(self.x.T, ylag)
        b0 = spdot(xtxi, xty)
        b1 = spdot(xtxi, xtyl)
        e0 = self.y - spdot(self.x, b0)
        e1 = ylag - spdot(self.x, b1)
        methodML = method.upper()
        # call minimizer using concentrated log-likelihood to get rho
        if methodML in ["FULL", "LU", "ORD"]:
            if methodML == "FULL":
                W = w.full()[0]  # moved here
                res = minimize_scalar(
                    lag_c_loglik,
                    0.0,
                    bounds=(-1.0, 1.0),
                    args=(self.n, e0, e1, W),
                    method="bounded",
                    options={'xatol': epsilon},
                )
            elif methodML == "LU":
                I = sp.identity(w.n)
                Wsp = w.sparse  # moved here
                W = Wsp#.tocsc()
                res = minimize_scalar(
                    lag_c_loglik_sp,
                    0.0,
                    bounds=(-1.0, 1.0),
                    args=(self.n, e0, e1, I, Wsp),
                    method="bounded",
                    options={'xatol': epsilon},
                )
            elif methodML == "ORD":
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = np.array(ww.todense())
                    evals = la.eigvalsh(WW)
                    W = WW
                else:
                    W = w.full()[0]  # moved here
                    evals = la.eigvals(W)
                res = minimize_scalar(
                    lag_c_loglik_ord,
                    0.0,
                    bounds=(-1.0, 1.0),
                    args=(self.n, e0, e1, evals),
                    method="bounded",
                    options={'xatol': epsilon},
                )
        else:
            # program will crash, need to catch
            print(("{0} is an unsupported method".format(methodML)))
            self = None
            return

        self.rho = res.x[0][0]

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0
        self.logll = llik[0][0]

        # b, residuals and predicted values

        b = b0 - self.rho * b1
        self.betas = np.vstack((b, self.rho))  # rho added as last coefficient
        self.u = e0 - self.rho * e1
        self.predy = self.y - self.u

        xb = spdot(self.x, b)

        self.predy_e = inverse_prod(
            w.sparse, xb, self.rho, inv_method="power_exp", threshold=epsilon
        )
        self.e_pred = self.y - self.predy_e

        # residual variance
        self._cache = {}
        self.sig2 = self.sig2n  # no allowance for division by n-k

        # information matrix
        # if w should be kept sparse, how can we do the following:
        a = -self.rho * W
        spfill_diagonal(a, 1.0)
        ai = spinv(a)
        wai = spdot(W, ai)
        tr1 = wai.diagonal().sum()  # same for sparse and dense

        wai2 = spdot(wai, wai)
        tr2 = wai2.diagonal().sum()

        waiTwai = spdot(wai.T, wai)
        tr3 = waiTwai.diagonal().sum()
        ### to here

        wpredy = weights.lag_spatial(w, self.predy_e)
        wpyTwpy = spdot(wpredy.T, wpredy)
        xTwpy = spdot(self.x.T, wpredy)

        # order of variables is beta, rho, sigma2

        v1 = np.vstack((xtx / self.sig2, xTwpy.T / self.sig2, np.zeros((1, self.k))))
        v2 = np.vstack(
            (xTwpy / self.sig2, tr2 + tr3 + wpyTwpy / self.sig2, tr1 / self.sig2)
        )
        v3 = np.vstack(
            (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2))
        )

        v = np.hstack((v1, v2, v3))

        self.vm1 = la.inv(v)  # vm1 includes variance for sigma2
        self.vm = self.vm1[:-1, :-1]  # vm is for coefficients only



[docs]
class ML_Lag(BaseML_Lag):

    """
    ML estimation of the spatial lag model with all results and diagnostics; :cite:`Anselin1988`

    Parameters
    ----------
    y            : numpy.ndarray or pandas.Series
                   nx1 array for dependent variable
    x            : numpy.ndarray or pandas object
                   Two dimensional array with n rows and one column for each
                   independent (exogenous) variable, excluding the constant
    w            : pysal W object
                   Spatial weights object
    slx_lags     : integer
                   Number of spatial lags of X to include in the model specification.
                   If slx_lags>0, the specification becomes of the Spatial Durbin type.
    slx_vars     : either "All" (default) or list of booleans to select x variables
                   to be lagged
    regimes      : list or pandas.Series
                   List of n values with the mapping of each
                   observation to a regime. Assumed to be aligned with 'x'.
                   For other regimes-specific arguments, see ML_Lag_Regimes
    method       : string
                   if 'full', brute force calculation (full matrix expressions)
                   if 'ord', Ord eigenvalue method
    epsilon      : float
                   tolerance criterion in mimimize_scalar function and inverse_product
    spat_diag    : boolean
                   If True, then compute Common Factor Hypothesis test when applicable
    spat_impacts : string or list
                   Include average direct impact (ADI), average indirect impact (AII),
                    and average total impact (ATI) in summary results.
                    Options are 'simple', 'full', 'power', 'all' or None.
                    See sputils.spmultiplier for more information.
    vm           : boolean
                   if True, include variance-covariance matrix in summary
                   results
    name_y       : string
                   Name of dependent variable for use in output
    name_x       : list of strings
                   Names of independent variables for use in output
    name_w       : string
                   Name of weights matrix for use in output
    name_ds      : string
                   Name of dataset for use in output
    latex        : boolean
                   Specifies if summary is to be printed in latex format

    Attributes
    ----------
    output       : dataframe
                   regression results pandas dataframe
    betas        : array
                   (k+1)x1 array of estimated coefficients (rho first)
    rho          : float
                   estimate of spatial autoregressive coefficient
    u            : array
                   nx1 array of residuals
    predy        : array
                   nx1 array of predicted y values
    n            : integer
                   Number of observations
    k            : integer
                   Number of variables for which coefficients are estimated
                   (including the constant, excluding the rho)
    y            : array
                   nx1 array for dependent variable
    x            : array
                   Two dimensional array with n rows and one column for each
                   independent (exogenous) variable, including the constant
    method       : string
                   log Jacobian method
                   if 'full': brute force (full matrix computations)
    epsilon      : float
                   tolerance criterion used in minimize_scalar function and inverse_product
    mean_y       : float
                   Mean of dependent variable
    std_y        : float
                   Standard deviation of dependent variable
    vm           : array
                   Variance covariance matrix (k+1 x k+1), all coefficients
    vm1          : array
                   Variance covariance matrix (k+2 x k+2), includes sig2
    sig2         : float
                   Sigma squared used in computations
    logll        : float
                   maximized log-likelihood (including constant terms)
    aic          : float
                   Akaike information criterion
    schwarz      : float
                   Schwarz criterion
    cfh_test     : tuple
                   Common Factor Hypothesis test; tuple contains the pair (statistic,
                   p-value). Only when it applies (see specific documentation).
    predy_e      : array
                   predicted values from reduced form
    e_pred       : array
                   prediction errors using reduced form predicted values
    pr2          : float
                   Pseudo R squared (squared correlation between y and ypred)
    pr2_e        : float
                   Pseudo R squared (squared correlation between y and ypred_e
                   (using reduced form))
    utu          : float
                   Sum of squared residuals
    std_err      : array
                   1xk array of standard errors of the betas
    z_stat       : list of tuples
                   z statistic; each tuple contains the pair (statistic,
                   p-value), where each is a float
    sp_multipliers: dict
                   Dictionary of spatial multipliers (if spat_impacts is not None)             
    name_y       : string
                   Name of dependent variable for use in output
    name_x       : list of strings
                   Names of independent variables for use in output
    name_w       : string
                   Name of weights matrix for use in output
    name_ds      : string
                   Name of dataset for use in output
    title        : string
                   Name of the regression method used

    Examples
    --------
    >>> import numpy as np
    >>> import libpysal
    >>> from libpysal.examples import load_example
    >>> from libpysal.weights import Queen
    >>> from spreg import ML_Error_Regimes
    >>> import geopandas as gpd
    >>> from spreg import ML_Lag
    >>> np.set_printoptions(suppress=True) #prevent scientific format
    >>> baltimore = load_example('Baltimore')
    >>> db = libpysal.io.open(baltimore.get_path("baltim.dbf"),'r')
    >>> df = gpd.read_file(baltimore.get_path("baltim.shp"))
    >>> ds_name = "baltim.dbf"
    >>> y_name = "PRICE"
    >>> y = np.array(db.by_col(y_name)).T
    >>> y.shape = (len(y),1)
    >>> x_names = ["NROOM","NBATH","PATIO","FIREPL","AC","GAR","AGE","LOTSZ","SQFT"]
    >>> x = np.array([db.by_col(var) for var in x_names]).T
    >>> w = Queen.from_dataframe(df)
    >>> w_name = "baltim_q.gal"
    >>> w.transform = 'r'
    >>> mllag = ML_Lag(y,x,w,name_y=y_name,name_x=x_names,\
               name_w=w_name,name_ds=ds_name) #doctest: +SKIP
    >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP
    array([[ 4.3675],
           [ 0.7502],
           [ 5.6116],
           [ 7.0497],
           [ 7.7246],
           [ 6.1231],
           [ 4.6375],
           [-0.1107],
           [ 0.0679],
           [ 0.0794],
           [ 0.4259]])
    >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP
    '0.425885'
    >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP
    '44.307180'
    >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP
    '23.606077'
    >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP
    array([  23.8716,    1.1222,    3.0593,    7.3416,    5.6695,    5.4698,
              2.8684,    0.0026,    0.0002,    0.0266,    0.0032,  220.1292])
    >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP
    array([ 23.8716,   1.1222,   3.0593,   7.3416,   5.6695,   5.4698,
             2.8684,   0.0026,   0.0002,   0.0266,   0.0032])
    >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP
    '151.458698'
    >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP
    '-832.937174'
    >>> "{0:.6f}".format(mllag.aic) #doctest: +SKIP
    '1687.874348'
    >>> "{0:.6f}".format(mllag.schwarz) #doctest: +SKIP
    '1724.744787'
    >>> "{0:.6f}".format(mllag.pr2) #doctest: +SKIP
    '0.727081'
    >>> "{0:.4f}".format(mllag.pr2_e) #doctest: +SKIP
    '0.7062'
    >>> "{0:.4f}".format(mllag.utu) #doctest: +SKIP
    '31957.7853'
    >>> np.around(mllag.std_err, decimals=4) #doctest: +SKIP
    array([ 4.8859,  1.0593,  1.7491,  2.7095,  2.3811,  2.3388,  1.6936,
            0.0508,  0.0146,  0.1631,  0.057 ])
    >>> np.around(mllag.z_stat, decimals=4) #doctest: +SKIP
    array([[ 0.8939,  0.3714],
           [ 0.7082,  0.4788],
           [ 3.2083,  0.0013],
           [ 2.6018,  0.0093],
           [ 3.2442,  0.0012],
           [ 2.6181,  0.0088],
           [ 2.7382,  0.0062],
           [-2.178 ,  0.0294],
           [ 4.6487,  0.    ],
           [ 0.4866,  0.6266],
           [ 7.4775,  0.    ]])
    >>> mllag.name_y #doctest: +SKIP
    'PRICE'
    >>> mllag.name_x #doctest: +SKIP
    ['CONSTANT', 'NROOM', 'NBATH', 'PATIO', 'FIREPL', 'AC', 'GAR', 'AGE', 'LOTSZ', 'SQFT', 'W_PRICE']
    >>> mllag.name_w #doctest: +SKIP
    'baltim_q.gal'
    >>> mllag.name_ds #doctest: +SKIP
    'baltim.dbf'
    >>> mllag.title #doctest: +SKIP
    'MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = FULL)'
    >>> mllag = ML_Lag(y,x,w,method='ord',name_y=y_name,name_x=x_names,\
               name_w=w_name,name_ds=ds_name) #doctest: +SKIP
    >>> np.around(mllag.betas, decimals=4) #doctest: +SKIP
    array([[ 4.3675],
           [ 0.7502],
           [ 5.6116],
           [ 7.0497],
           [ 7.7246],
           [ 6.1231],
           [ 4.6375],
           [-0.1107],
           [ 0.0679],
           [ 0.0794],
           [ 0.4259]])
    >>> "{0:.6f}".format(mllag.rho) #doctest: +SKIP
    '0.425885'
    >>> "{0:.6f}".format(mllag.mean_y) #doctest: +SKIP
    '44.307180'
    >>> "{0:.6f}".format(mllag.std_y) #doctest: +SKIP
    '23.606077'
    >>> np.around(np.diag(mllag.vm1), decimals=4) #doctest: +SKIP
    array([  23.8716,    1.1222,    3.0593,    7.3416,    5.6695,    5.4698,
              2.8684,    0.0026,    0.0002,    0.0266,    0.0032,  220.1292])
    >>> np.around(np.diag(mllag.vm), decimals=4) #doctest: +SKIP
    array([ 23.8716,   1.1222,   3.0593,   7.3416,   5.6695,   5.4698,
             2.8684,   0.0026,   0.0002,   0.0266,   0.0032])
    >>> "{0:.6f}".format(mllag.sig2) #doctest: +SKIP
    '151.458698'
    >>> "{0:.6f}".format(mllag.logll) #doctest: +SKIP
    '-832.937174'
    >>> "{0:.6f}".format(mllag.aic) #doctest: +SKIP
    '1687.874348'
    >>> "{0:.6f}".format(mllag.schwarz) #doctest: +SKIP
    '1724.744787'
    >>> "{0:.6f}".format(mllag.pr2) #doctest: +SKIP
    '0.727081'
    >>> "{0:.6f}".format(mllag.pr2_e) #doctest: +SKIP
    '0.706198'
    >>> "{0:.4f}".format(mllag.utu) #doctest: +SKIP
    '31957.7853'
    >>> np.around(mllag.std_err, decimals=4) #doctest: +SKIP
    array([ 4.8859,  1.0593,  1.7491,  2.7095,  2.3811,  2.3388,  1.6936,
            0.0508,  0.0146,  0.1631,  0.057 ])
    >>> np.around(mllag.z_stat, decimals=4) #doctest: +SKIP
    array([[ 0.8939,  0.3714],
           [ 0.7082,  0.4788],
           [ 3.2083,  0.0013],
           [ 2.6018,  0.0093],
           [ 3.2442,  0.0012],
           [ 2.6181,  0.0088],
           [ 2.7382,  0.0062],
           [-2.178 ,  0.0294],
           [ 4.6487,  0.    ],
           [ 0.4866,  0.6266],
           [ 7.4775,  0.    ]])
    >>> mllag.name_y #doctest: +SKIP
    'PRICE'
    >>> mllag.name_x #doctest: +SKIP
    ['CONSTANT', 'NROOM', 'NBATH', 'PATIO', 'FIREPL', 'AC', 'GAR', 'AGE', 'LOTSZ', 'SQFT', 'W_PRICE']
    >>> mllag.name_w #doctest: +SKIP
    'baltim_q.gal'
    >>> mllag.name_ds #doctest: +SKIP
    'baltim.dbf'
    >>> mllag.title #doctest: +SKIP
    'MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = ORD)'


    """


[docs]
    def __init__(
        self,
        y,
        x,
        w,
        slx_lags=0,
        slx_vars="All",
        regimes=None,
        method="full",
        epsilon=0.0000001,
        spat_impacts="simple",
        vm=False,
        spat_diag=True,
        name_y=None,
        name_x=None,
        name_w=None,
        name_ds=None,
        latex=False,
        **kwargs
    ):
        if regimes is not None:
            from .ml_lag_regimes import ML_Lag_Regimes
            self.__class__ = ML_Lag_Regimes
            self.__init__(
                y=y,
                x=x,
                regimes=regimes,
                w=w,
                slx_lags=slx_lags,
                method=method,
                epsilon=epsilon,
                spat_impacts=spat_impacts,
                vm=vm,
                spat_diag=spat_diag,
                name_y=name_y,
                name_x=name_x,
                name_w=name_w,
                name_ds=name_ds,
                latex=latex,
                **kwargs
            )
        else:
            n = USER.check_arrays(y, x)
            y, name_y = USER.check_y(y, n, name_y)
            w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags)
            x_constant, name_x, warn = USER.check_constant(x, name_x)
            name_x = USER.set_name_x(name_x, x_constant) # needs to be initialized for none, now with constant
            set_warn(self, warn)
            method = method.upper()
            # using flex_wx
            kx = len(name_x)
            if slx_lags > 0:
                x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True,
                                                slx_lags=slx_lags,slx_vars=slx_vars)
                if isinstance(slx_vars,list):
                    kw = slx_vars.count(True)
                    if kw < kx - 1:
                        spat_diag = False   # no common factor test
                else:
                    kw = kx-1
        

            BaseML_Lag.__init__(
                self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon
            )
            # increase by 1 to have correct aic and sc, include rho in count
            self.k += 1

            if slx_lags>0:
        #        kx = len(name_x)
        #        name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags)  # exclude constant

                self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")"
                var_types = ['o'] + ['x'] * (kx-1) + ['wx'] * (kw) * slx_lags + ['rho']
            else:
                self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")"
                var_types = ['o'] + ['x'] * (len(name_x)-1) + ['rho']
            self.slx_lags = slx_lags
            self.slx_vars = slx_vars
            self.name_ds = USER.set_name_ds(name_ds)
            self.name_y = USER.set_name_y(name_y)
            self.name_x = name_x  # already has constant
            name_ylag = USER.set_name_yend_sp(self.name_y)
            self.name_x.append(name_ylag)  # rho changed to last position
            self.name_w = USER.set_name_w(name_w, w)
            self.aic = DIAG.akaike(reg=self)
            self.schwarz = DIAG.schwarz(reg=self)
            self.output = pd.DataFrame(self.name_x, columns=['var_names'])
            self.output['var_type'] = var_types
            self.output['regime'], self.output['equation'] = (0, 0)
            self.other_top = _spat_pseudo_r2(self)
            self.other_top += _nonspat_top(self, ml=True)
            diag_out = None
            if spat_diag and slx_lags==1:
                diag_out = _spat_diag_out(self, w, 'yend', ml=True)
            if spat_impacts:
                self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars)
                try:
                    diag_out += impacts_str
                except TypeError:
                    diag_out = impacts_str
            output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex)



def lag_c_loglik(rho, n, e0, e1, W):
    # concentrated log-lik for lag model, no constants, brute force
    er = e0 - rho * e1
    sig2 = spdot(er.T, er) / n
    nlsig2 = (n / 2.0) * np.log(sig2)
    a = -rho * W
    spfill_diagonal(a, 1.0)
    jacob = np.log(np.linalg.det(a))
    # this is the negative of the concentrated log lik for minimization
    clik = nlsig2 - jacob
    return clik


def lag_c_loglik_sp(rho, n, e0, e1, I, Wsp):
    # concentrated log-lik for lag model, sparse algebra
    if isinstance(rho, np.ndarray):
        if rho.shape == (1, 1):
            rho = rho[0][0]  # why does the interior value change?
    er = e0 - rho * e1
    sig2 = spdot(er.T, er) / n
    nlsig2 = (n / 2.0) * np.log(sig2)
    a = I - rho * Wsp
    LU = SuperLU(a.tocsc())
    jacob = np.sum(np.log(np.abs(LU.U.diagonal())))
    clike = nlsig2 - jacob
    return clike


def lag_c_loglik_ord(rho, n, e0, e1, evals):
    # concentrated log-lik for lag model, no constants, Ord eigenvalue method
    er = e0 - rho * e1
    sig2 = spdot(er.T, er) / n
    nlsig2 = (n / 2.0) * np.log(sig2)
    revals = rho * evals
    jacob = np.log(1 - revals).sum()
    if isinstance(jacob, complex):
        jacob = jacob.real
    # this is the negative of the concentrated log lik for minimization
    clik = nlsig2 - jacob
    return clik


def _test():
    import doctest

    start_suppress = np.get_printoptions()["suppress"]
    np.set_printoptions(suppress=True)
    doctest.testmod()
    np.set_printoptions(suppress=start_suppress)

if __name__ == "__main__":
    _test()

    import numpy as np
    import libpysal

    db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r")
    y_var = "CRIME"
    y = np.array([db.by_col(y_var)]).reshape(49, 1)
    x_var = ["INC"]
    x = np.array([db.by_col(name) for name in x_var]).T
    w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp"))
    w.transform = "r"
    model = ML_Lag(
        y,
        x,
        w=w,
        vm=False,
        name_y=y_var,
        name_x=x_var,
        name_ds="columbus",
        name_w="columbus.gal",
    )
    print(model.output)
    print(model.summary)