Improve estimator

bashtage · bashtage · commit 7e9f37d74399 · 2020-03-31T18:00:50.000+01:00
diff --git a/arch/covariance/var.py b/arch/covariance/var.py
@@ -1,7 +1,7 @@
-from typing import Dict, NamedTuple, Optional, Tuple
+from typing import Dict, NamedTuple, Optional, Tuple, Type
 
 import numpy as np
-from numpy import ones, zeros
+from numpy import zeros
 from numpy.linalg import lstsq
 import pandas as pd
 from statsmodels.tools import add_constant
@@ -25,7 +25,7 @@ def _normalize_name(name: str) -> str:
     return name
 
 
-KERNELS = {}
+KERNELS: Dict[str, Type[CovarianceEstimator]] = {}
 for name in kernel.__all__:
     estimator = getattr(kernel, name)
     if issubclass(estimator, kernel.CovarianceEstimator):
@@ -77,7 +77,7 @@ def __init__(
         super().__init__(
             x, bandwidth=bandwidth, df_adjust=df_adjust, center=center, weights=weights
         )
-        self._kernel = kernel
+        self._kernel_name = kernel
         self._lags = 0
         self._diagonal_lags = (0,) * self._x.shape[0]
         self._method = method
@@ -100,6 +100,7 @@ def __init__(
                 f"are:\n\n{available_val}"
             )
         self._kernel = KERNELS[kernel]
+        self._kernel_instance: Optional[CovarianceEstimator] = None
 
         # Attach for testing only
         self._ics: Dict[Tuple[int, int], float] = {}
@@ -162,8 +163,13 @@ def _ic_from_vars(
         c = int(self._center)
         nobs, nvar = lhs.shape
         _rhs = rhs[:, : (c + full_order * nvar)]
-        params = lstsq(_rhs, lhs, rcond=None)[0]
-        resids0 = lhs - _rhs @ params
+        if _rhs.shape[1] > 0 and lhs.shape[1] > 0:
+            params = lstsq(_rhs, lhs, rcond=None)[0]
+            resids0 = lhs - _rhs @ params
+        else:
+            # Branch is a workaround of NumPy 1.15
+            # TODO: Remove after NumPy 1.15 dropped
+            resids0 = lhs
         sigma = resids0.T @ resids0 / nobs
         nparam = (c + full_order * nvar) * nvar
         ics: Dict[Tuple[int, int], float] = {
@@ -175,8 +181,14 @@ def _ic_from_vars(
         purged_indiv_lags = np.empty((nvar, nobs, max_lag - full_order))
         for i in range(nvar):
             single = indiv_lags[i, :, full_order:]
-            params = lstsq(_rhs, single, rcond=None)[0]
-            purged_indiv_lags[i] = single - _rhs @ params
+            if single.shape[1] > 0 and _rhs.shape[1] > 0:
+                params = lstsq(_rhs, single, rcond=None)[0]
+                purged_indiv_lags[i] = single - _rhs @ params
+            else:
+                # Branch is a workaround of NumPy 1.15
+                # TODO: Remove after NumPy 1.15 dropped
+                purged_indiv_lags[i] = single
+
         for diag_lag in range(1, max_lag - full_order + 1):
             resids = self._fit_diagonal(resids0.copy(), diag_lag, purged_indiv_lags)
             sigma = resids.T @ resids / nobs
@@ -227,11 +239,17 @@ def _estimate_var(self, full_order: int, diag_order: int) -> VARModel:
         ncommon = rhs.shape[1]
         for i in range(nvar):
             full_rhs = np.hstack([rhs, extra_lags[i]])
-            single_params = lstsq(full_rhs, lhs[:, i], rcond=None)[0]
-            params[i, :ncommon] = single_params[:ncommon]
-            locs = ncommon + i + nvar * np.arange(extra_lags[i].shape[1])
-            params[i, locs] = single_params[ncommon:]
-            resids[:, i] = lhs[:, i] - full_rhs @ single_params
+            if full_rhs.shape[1] > 0:
+                single_params = lstsq(full_rhs, lhs[:, i], rcond=None)[0]
+                params[i, :ncommon] = single_params[:ncommon]
+                locs = ncommon + i + nvar * np.arange(extra_lags[i].shape[1])
+                params[i, locs] = single_params[ncommon:]
+                resids[:, i] = lhs[:, i] - full_rhs @ single_params
+            else:
+                # Branch is a workaround of NumPy 1.15
+                # TODO: Remove after NumPy 1.15 dropped
+                resids[:, i] = lhs[:, i]
+
         return VARModel(resids, params, max_lag, self._center)
 
     def _estimate_sample_cov(self, nvar: int, nlag: int) -> NDArray:
@@ -290,17 +308,23 @@ def _companion_form(
 
     @property
     def cov(self) -> CovarianceEstimate:
-        x = self._x
         common, individual = self._select_lags()
         self._order = (common, individual)
         var_mod = self._estimate_var(common, individual)
         resids = var_mod.resids
         nobs, nvar = resids.shape
-        short_run = resids.T @ resids / nobs
+        self._kernel_instance = self._kernel(
+            resids, self._bandwidth, 0, False, self._x_weights, self._force_int
+        )
+        kern_cov = self._kernel_instance.cov
+        short_run = kern_cov.short_run
+        x_orig = self._x_orig
+        columns = x_orig.columns if isinstance(x_orig, pd.DataFrame) else None
         if var_mod.var_order == 0:
             # Special case VAR(0)
             # TODO: Docs should reflect different DoF adjustment
-            return CovarianceEstimate(short_run, np.zeros((nvar, nvar)))
+            oss = kern_cov.one_sided_strict
+            return CovarianceEstimate(short_run, oss, columns)
         comp_coefs, comp_var_cov = self._companion_form(var_mod, short_run)
         max_eig = np.abs(np.linalg.eigvals(comp_coefs)).max()
         if max_eig >= 1:
@@ -328,7 +352,6 @@ def cov(self) -> CovarianceEstimate:
 
         one_sided = one_sided[:nvar, :nvar]
         one_sided_strict = one_sided_strict[:nvar, :nvar]
-        columns = x.columns if isinstance(x, pd.DataFrame) else None
 
         return CovarianceEstimate(
             short_run,
@@ -338,14 +361,29 @@ def cov(self) -> CovarianceEstimate:
             one_sided=one_sided,
         )
 
+    def _ensure_kernel_instantized(self) -> None:
+        if self._kernel_instance is None:
+            self.cov
+
+    @property
     def bandwidth_scale(self) -> float:
-        return 1.0
+        self._ensure_kernel_instantized()
+        assert self._kernel_instance is not None
+        return self._kernel_instance.bandwidth_scale
 
+    @property
     def kernel_const(self) -> float:
-        return 1.0
+        self._ensure_kernel_instantized()
+        assert self._kernel_instance is not None
+        return self._kernel_instance.kernel_const
 
     def _weights(self) -> NDArray:
-        return ones(0)
+        self._ensure_kernel_instantized()
+        assert self._kernel_instance is not None
+        return self._kernel_instance._weights()
 
+    @property
     def rate(self) -> float:
-        return 2 / 9
+        self._ensure_kernel_instantized()
+        assert self._kernel_instance is not None
+        return self._kernel_instance.rate
diff --git a/arch/tests/covariance/test_var.py b/arch/tests/covariance/test_var.py
@@ -11,6 +11,25 @@
 
 DATA_PARAMS = list(product([1, 3], [True, False], [0]))  # , 1, 3]))
 DATA_IDS = [f"dim: {d}, pandas: {p}, order: {o}" for d, p, o in DATA_PARAMS]
+KERNELS = [
+    "Bartlett",
+    "Parzen",
+    "ParzenCauchy",
+    "ParzenGeometric",
+    "ParzenRiesz",
+    "TukeyHamming",
+    "TukeyHanning",
+    "TukeyParzen",
+    "QuadraticSpectral",
+    "Andrews",
+    "Gallant",
+    "NeweyWest",
+]
+
+
+@pytest.fixture(params=KERNELS)
+def kernel(request):
+    return request.param
 
 
 @pytest.fixture(scope="module", params=DATA_PARAMS, ids=DATA_IDS)
@@ -82,9 +101,14 @@ def direct_var(
         if diag_order > full_order:
             locs[diag_start:] = c + i + nvar * np.arange(full_order, diag_order)
         _rhs = rhs[:, locs]
-        p = np.linalg.lstsq(_rhs, lhs[:, i : i + 1], rcond=None)[0]
-        params[i : i + 1, locs] = p.T
-        resids[:, i : i + 1] = lhs[:, i : i + 1] - _rhs @ p
+        if _rhs.shape[1] > 0:
+            p = np.linalg.lstsq(_rhs, lhs[:, i : i + 1], rcond=None)[0]
+            params[i : i + 1, locs] = p.T
+            resids[:, i : i + 1] = lhs[:, i : i + 1] - _rhs @ p
+        else:
+            # Branch is a workaround of NumPy 1.15
+            # TODO: Remove after NumPy 1.15 dropped
+            resids[:, i: i + 1] = lhs[:, i: i + 1]
     return params, resids
 
 
@@ -125,10 +149,11 @@ def test_direct_var(data, const, full_order, diag_order, max_order, ic):
 @pytest.mark.parametrize("method", ["aic", "bic", "hqc"])
 def test_ic(data, center, diagonal, method):
     pwrc = PreWhitenRecoloredCovariance(
-        data, center=center, diagonal=diagonal, method=method
+        data, center=center, diagonal=diagonal, method=method, bandwidth=0.0,
     )
     cov = pwrc.cov
-    assert isinstance(cov.short_run, np.ndarray)
+    expected_type = np.ndarray if isinstance(data, np.ndarray) else pd.DataFrame
+    assert isinstance(cov.short_run, expected_type)
     expected_max_lag = int(data.shape[0] ** (1 / 3))
     assert pwrc._max_lag == expected_max_lag
     expected_ics = {}
@@ -152,7 +177,7 @@ def test_ic(data, center, diagonal, method):
 @pytest.mark.parametrize("lags", [0, 1, 3])
 def test_short_long_run(data, center, diagonal, method, lags):
     pwrc = PreWhitenRecoloredCovariance(
-        data, center=center, diagonal=diagonal, method=method, lags=lags
+        data, center=center, diagonal=diagonal, method=method, lags=lags, bandwidth=0.0,
     )
     cov = pwrc.cov
     full_order, diag_order = pwrc._order
@@ -172,7 +197,9 @@ def test_short_long_run(data, center, diagonal, method, lags):
 
 @pytest.mark.parametrize("sample_autocov", [True, False])
 def test_data(data, sample_autocov):
-    pwrc = PreWhitenRecoloredCovariance(data, sample_autocov=sample_autocov)
+    pwrc = PreWhitenRecoloredCovariance(
+        data, sample_autocov=sample_autocov, bandwidth=0.0
+    )
     pwrc.cov