Skip to content

Commit 4ac194c

Browse files
authored
Merge pull request #220 from bashtage/improve-autolag
ENH: Speed up autolag in ADF
2 parents 88e1e70 + c08aa1f commit 4ac194c

File tree

8 files changed

+60
-100
lines changed

8 files changed

+60
-100
lines changed

arch/compat/python.py

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,49 +9,27 @@
99
# always writeable
1010
from StringIO import StringIO
1111

12-
BytesIO = StringIO
13-
import cPickle
14-
pickle = cPickle
15-
import urllib2
16-
import urlparse
1712
except ImportError:
1813
import builtins
19-
from io import StringIO, BytesIO
14+
from io import StringIO
2015

2116
cStringIO = StringIO
22-
import pickle as cPickle
23-
pickle = cPickle
24-
import urllib.request
25-
import urllib.parse
26-
from urllib.request import HTTPError, urlretrieve
17+
2718

2819
PY3 = sys.version_info[0] == 3
2920

3021
if PY3:
3122
range = range
3223
long = int
33-
string_types = str,
3424

3525
def lmap(*args, **kwargs):
3626
return list(map(*args, **kwargs))
3727
else:
38-
string_types = basestring,
3928
range = xrange
4029
long = long
4130
lmap = builtins.map
4231

4332

44-
def with_metaclass(meta, *bases):
45-
"""Create a base class with a metaclass."""
46-
# This requires a bit of explanation: the basic idea is to make a dummy
47-
# metaclass for one level of class instantiation that replaces itself with
48-
# the actual metaclass.
49-
class metaclass(meta):
50-
def __new__(cls, name, this_bases, d):
51-
return meta(name, bases, d)
52-
return type.__new__(metaclass, 'temporary_class', (), {})
53-
54-
5533
def add_metaclass(metaclass):
5634
"""Class decorator for creating a class with a metaclass."""
5735
def wrapper(cls):
@@ -80,13 +58,6 @@ def iteritems(obj, **kwargs):
8058
return func(**kwargs)
8159

8260

83-
def iterkeys(obj, **kwargs):
84-
func = getattr(obj, "iterkeys", None)
85-
if not func:
86-
func = obj.keys
87-
return func(**kwargs)
88-
89-
9061
def itervalues(obj, **kwargs):
9162
func = getattr(obj, "itervalues", None)
9263
if not func:

arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_joblib.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111

1212
from statsmodels.tools.parallel import parallel_func
1313
import datetime
14-
from numpy import array, savez, percentile, nan
15-
from numpy import ones, vstack, arange, cumsum, sum, dot, zeros
14+
from numpy import array, savez, percentile, nan, ones, vstack, arange, cumsum, sum, dot, zeros
1615
from numpy.random import RandomState
1716
from numpy.linalg import pinv
1817

arch/unitroot/critical_values/simulation/dfgls_critical_values_simulation.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import datetime
99

1010
import numpy as np
11-
from numpy import ones, vstack, arange, diff, cumsum, sqrt, sum
1211
from numpy.linalg import pinv
1312
from numpy.random import RandomState
1413
from statsmodels.compat import range
@@ -57,36 +56,36 @@ def dfgsl_simulation(n, trend, b, rng=None):
5756
nobs = n
5857
if trend == 'c':
5958
c = -7.0
60-
z = ones((nobs, 1))
59+
z = np.ones((nobs, 1))
6160
else:
6261
c = -13.5
63-
z = vstack((ones(nobs), arange(1, nobs + 1))).T
62+
z = np.vstack((np.ones(nobs), np.arange(1, nobs + 1))).T
6463

6564
ct = c / nobs
6665

6766
delta_z = np.copy(z)
6867
delta_z[1:, :] = delta_z[1:, :] - (1 + ct) * delta_z[:-1, :]
6968
delta_z_inv = pinv(delta_z)
7069
y = standard_normal((n + 50, b))
71-
y = cumsum(y, axis=0)
70+
y = np.cumsum(y, axis=0)
7271
y = y[50:, :]
7372
delta_y = y.copy()
7473
delta_y[1:, :] = delta_y[1:, :] - (1 + ct) * delta_y[:-1, :]
7574
detrend_coef = delta_z_inv.dot(delta_y)
7675
y_detrended = y - z.dot(detrend_coef)
7776

78-
delta_y_detrended = diff(y_detrended, axis=0)
77+
delta_y_detrended = np.diff(y_detrended, axis=0)
7978
rhs = y_detrended[:-1, :]
8079
lhs = delta_y_detrended
8180

82-
xpy = sum(rhs * lhs, 0)
83-
xpx = sum(rhs ** 2.0, 0)
81+
xpy = np.sum(rhs * lhs, 0)
82+
xpx = np.sum(rhs ** 2.0, 0)
8483
gamma = xpy / xpx
8584
e = lhs - rhs * gamma
86-
sigma2 = sum(e ** 2.0, axis=0) / (n - 1) # DOF correction?
85+
sigma2 = np.sum(e ** 2.0, axis=0) / (n - 1) # DOF correction?
8786
gamma_var = sigma2 / xpx
8887

89-
stat = gamma / sqrt(gamma_var)
88+
stat = gamma / np.sqrt(gamma_var)
9089
return stat
9190

9291

arch/unitroot/unitroot.py

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,32 @@
11
from __future__ import absolute_import, division
2-
from arch.compat.python import add_metaclass, range, lmap, long
32

43
import warnings
54

65
from numpy import (diff, ceil, power, sqrt, sum, cumsum, int32, int64, interp, pi,
76
array, inf, abs, log, sort, polyval, empty, argwhere, arange, squeeze)
8-
from numpy.linalg import pinv
9-
from scipy.stats import norm
7+
from numpy.linalg import pinv, qr, inv, solve
108
from pandas import DataFrame
11-
12-
from statsmodels.regression.linear_model import OLS
13-
from statsmodels.tsa.tsatools import lagmat
14-
from statsmodels.tsa.stattools import _autolag
9+
from scipy.stats import norm
1510
from statsmodels.iolib.summary import Summary
1611
from statsmodels.iolib.table import SimpleTable
12+
from statsmodels.regression.linear_model import OLS
13+
from statsmodels.tsa.stattools import _autolag
14+
from statsmodels.tsa.tsatools import lagmat
1715

18-
from arch.utility import cov_nw
19-
from arch.utility.exceptions import InvalidLengthWarning, invalid_length_doc
16+
from arch.compat.python import add_metaclass, range, lmap, long
17+
from arch.unitroot.critical_values.dfgls import (dfgls_large_p, dfgls_small_p,
18+
dfgls_tau_max, dfgls_tau_min,
19+
dfgls_tau_star, dfgls_cv_approx)
2020
from arch.unitroot.critical_values.dickey_fuller import (adf_z_cv_approx, adf_z_large_p, adf_z_max,
2121
adf_z_min, adf_z_small_p, adf_z_star,
2222
tau_2010, tau_large_p, tau_max,
2323
tau_min, tau_small_p, tau_star)
2424
from arch.unitroot.critical_values.kpss import kpss_critical_values
25-
from arch.unitroot.critical_values.dfgls import (dfgls_large_p, dfgls_small_p,
26-
dfgls_tau_max, dfgls_tau_min,
27-
dfgls_tau_star, dfgls_cv_approx)
25+
from arch.utility import cov_nw
2826
from arch.utility.array import ensure1d, DocStringInheritor
27+
from arch.utility.exceptions import InvalidLengthWarning, invalid_length_doc
2928
from arch.utility.timeseries import add_trend
3029

31-
3230
__all__ = ['ADF', 'DFGLS', 'PhillipsPerron', 'KPSS', 'VarianceRatio',
3331
'kpss_crit', 'mackinnoncrit', 'mackinnonp']
3432

@@ -78,27 +76,23 @@ def _autolag_ols(endog, exog, startlag, maxlag, method, modargs=(), regresults=F
7876
if regresults:
7977
return _autolag(OLS, endog, exog, startlag, maxlag, method, regresults=regresults)
8078

81-
resid = squeeze(endog.copy())
82-
x = exog[:, startlag:].copy()
79+
q, r = qr(exog)
80+
qpy = q.T.dot(endog)
81+
ypy = endog.T.dot(endog)
82+
xpx = exog.T.dot(exog)
83+
8384
sigma2 = empty(maxlag + 1)
8485
tstat = empty(maxlag + 1)
85-
if len(exog) > 0 and startlag > 0:
86-
_x = exog[:, :startlag]
87-
resid -= _x.dot(pinv(_x).dot(resid))
88-
x -= _x.dot(pinv(_x).dot(x))
89-
sigma2[0] = (resid ** 2).mean()
86+
nobs = float(endog.shape[0])
9087
tstat[0] = inf
88+
for i in range(startlag, startlag + maxlag + 1):
89+
b = solve(r[:i, :i], qpy[:i])
90+
sigma2[i - startlag] = (ypy - b.T.dot(xpx[:i, :i]).dot(b)) / nobs
91+
if method == 't-stat' and i > startlag:
92+
xpxi = inv(xpx[:i, :i])
93+
stderr = sqrt(sigma2[i - startlag] * xpxi[-1, -1])
94+
tstat[i - startlag] = b[-1] / stderr
9195

92-
for i in range(maxlag):
93-
_x = x[:, i:i + 1]
94-
xpx = _x.T.dot(_x)
95-
beta = squeeze(_x.T.dot(resid) / xpx)
96-
resid -= squeeze(beta * _x)
97-
x[:, i + 1:] -= _x.dot(_x.T.dot(x[:, i + 1:]) / xpx)
98-
sigma2[i + 1] = (resid ** 2).mean()
99-
tstat[i + 1] = beta / sqrt(sigma2[i + 1] / xpx)
100-
101-
nobs = float(resid.shape[0])
10296
llf = -nobs / 2.0 * (log(2 * pi) + log(sigma2) + 1)
10397

10498
if method == 'aic':

arch/univariate/base.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
import warnings
1010

1111
import numpy as np
12-
from numpy.linalg import matrix_rank
13-
from numpy import ones, zeros, sqrt, diag, empty, ceil
1412
import scipy.stats as stats
1513
import pandas as pd
1614
from statsmodels.tools.decorators import cache_readonly, resettable_cache
@@ -97,7 +95,7 @@ def format_float_fixed(x, max_digits=10, decimal=4):
9795
if x == 0:
9896
return ('{:0.' + str(decimal) + 'f}').format(0.0)
9997
scale = np.log10(np.abs(x))
100-
scale = np.sign(scale) * ceil(np.abs(scale))
98+
scale = np.sign(scale) * np.ceil(np.abs(scale))
10199
if scale > (max_digits - 2 - decimal) or scale < -(decimal - 2):
102100
formatted = (
103101
'{0:' + str(max_digits) + '.' + str(decimal) + 'e}').format(x)
@@ -123,7 +121,7 @@ def implicit_constant(x):
123121
the array has a set of columns that adds to a constant value
124122
"""
125123
nobs = x.shape[0]
126-
rank = matrix_rank(np.hstack((ones((nobs, 1)), x)))
124+
rank = np.linalg.matrix_rank(np.hstack((np.ones((nobs, 1)), x)))
127125
return rank == x.shape[1]
128126

129127

@@ -149,7 +147,7 @@ def __init__(self, y=None, volatility=None, distribution=None,
149147
if y is not None:
150148
self._y_series = ensure1d(y, 'y', series=True)
151149
else:
152-
self._y_series = ensure1d(empty((0,)), 'y', series=True)
150+
self._y_series = ensure1d(np.empty((0,)), 'y', series=True)
153151

154152
self._y = np.asarray(self._y_series)
155153
self._y_original = y
@@ -187,7 +185,7 @@ def constraints(self):
187185
-----
188186
Parameters satisfy a.dot(parameters) - b >= 0
189187
"""
190-
return empty((0, self.num_params)), empty(0)
188+
return np.empty((0, self.num_params)), np.empty(0)
191189

192190
def bounds(self):
193191
"""
@@ -444,7 +442,7 @@ def fit(self, update_freq=1, disp='final', starting_values=None,
444442
sv_volatility = v.starting_values(resids)
445443
self._var_bounds = var_bounds = v.variance_bounds(resids)
446444
v.compute_variance(sv_volatility, resids, sigma2, backcast, var_bounds)
447-
std_resids = resids / sqrt(sigma2)
445+
std_resids = resids / np.sqrt(sigma2)
448446

449447
# 2. Construct constraint matrices from all models and distribution
450448
constraints = (self.constraints(),
@@ -454,8 +452,8 @@ def fit(self, update_freq=1, disp='final', starting_values=None,
454452
num_constraints = [c[0].shape[0] for c in constraints]
455453
num_constraints = np.array(num_constraints)
456454
num_params = offsets.sum()
457-
a = zeros((num_constraints.sum(), num_params))
458-
b = zeros(num_constraints.sum())
455+
a = np.zeros((num_constraints.sum(), num_params))
456+
b = np.zeros(num_constraints.sum())
459457

460458
for i, c in enumerate(constraints):
461459
r_en = num_constraints[:i + 1].sum()
@@ -1506,7 +1504,7 @@ def std_err(self):
15061504
"""
15071505
Parameter standard error
15081506
"""
1509-
return pd.Series(sqrt(diag(self.param_cov)),
1507+
return pd.Series(np.sqrt(np.diag(self.param_cov)),
15101508
index=self._names, name='std_err')
15111509

15121510
@cache_readonly

arch/univariate/mean.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from collections import OrderedDict
99

1010
import numpy as np
11-
from numpy import zeros, empty, ones, isscalar, log
1211
from pandas import DataFrame
1312
from scipy.optimize import OptimizeResult
1413
from statsmodels.tools.decorators import cache_readonly
@@ -191,8 +190,8 @@ def _static_gaussian_loglikelihood(resids):
191190
nobs = resids.shape[0]
192191
sigma2 = resids.dot(resids) / nobs
193192

194-
loglikelihood = -0.5 * nobs * log(2 * np.pi)
195-
loglikelihood -= 0.5 * nobs * log(sigma2)
193+
loglikelihood = -0.5 * nobs * np.log(2 * np.pi)
194+
loglikelihood -= 0.5 * nobs * np.log(sigma2)
196195
loglikelihood -= 0.5 * nobs
197196

198197
return loglikelihood
@@ -346,10 +345,10 @@ def simulate(self, params, nobs, burn=500, initial_value=None, x=None,
346345
vol = np.sqrt(sim_data[1])
347346

348347
max_lag = np.max(self._lags)
349-
y = zeros(nobs + burn)
348+
y = np.zeros(nobs + burn)
350349
if initial_value is None:
351350
initial_value = 0.0
352-
elif not isscalar(initial_value):
351+
elif not np.isscalar(initial_value):
353352
initial_value = ensure1d(initial_value, 'initial_value')
354353
if initial_value.shape[0] != max_lag:
355354
raise ValueError('initial_value has the wrong shape')
@@ -442,7 +441,7 @@ def _reformat_lags(self):
442441

443442
ind = np.lexsort(np.flipud(lags))
444443
lags = lags[:, ind]
445-
test_mat = zeros((lags.shape[1], np.max(lags)))
444+
test_mat = np.zeros((lags.shape[1], np.max(lags)))
446445
for i in range(lags.shape[1]):
447446
test_mat[i, lags[0, i]:lags[1, i]] = 1.0
448447
rank = np.linalg.matrix_rank(test_mat)
@@ -476,24 +475,24 @@ def _init_model(self):
476475

477476
nobs_orig = self._y.shape[0]
478477
if self.constant:
479-
reg_constant = ones((nobs_orig, 1), dtype=np.float64)
478+
reg_constant = np.ones((nobs_orig, 1), dtype=np.float64)
480479
else:
481-
reg_constant = ones((nobs_orig, 0), dtype=np.float64)
480+
reg_constant = np.ones((nobs_orig, 0), dtype=np.float64)
482481

483482
if self.lags is not None and nobs_orig > 0:
484483
maxlag = np.max(self.lags)
485484
lag_array = lagmat(self._y, maxlag)
486-
reg_lags = empty((nobs_orig, self._lags.shape[1]),
487-
dtype=np.float64)
485+
reg_lags = np.empty((nobs_orig, self._lags.shape[1]),
486+
dtype=np.float64)
488487
for i, lags in enumerate(self._lags.T):
489488
reg_lags[:, i] = np.mean(lag_array[:, lags[0]:lags[1]], 1)
490489
else:
491-
reg_lags = empty((nobs_orig, 0), dtype=np.float64)
490+
reg_lags = np.empty((nobs_orig, 0), dtype=np.float64)
492491

493492
if self._x is not None:
494493
reg_x = self._x
495494
else:
496-
reg_x = empty((nobs_orig, 0), dtype=np.float64)
495+
reg_x = np.empty((nobs_orig, 0), dtype=np.float64)
497496

498497
self.regressors = np.hstack((reg_constant, reg_lags, reg_x))
499498

@@ -597,7 +596,7 @@ def _fit_no_arch_normal_errors(self, cov_type='robust'):
597596
param_cov /= nobs
598597
cov_type = COV_TYPES['classic_ols']
599598
elif cov_type in ('robust',):
600-
scores = zeros((nobs, self.num_params + 1))
599+
scores = np.zeros((nobs, self.num_params + 1))
601600
scores[:, :self.num_params] = x * e[:, None]
602601
scores[:, -1] = e ** 2.0 - sigma2
603602
score_cov = scores.T.dot(scores) / nobs

0 commit comments

Comments
 (0)