From d7a5b4f5df194c94fb36b5351d8b9d0bb2e38a5f Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Thu, 22 Aug 2024 10:57:45 +0200 Subject: [PATCH 1/5] Raise if mx < k. Handle mx = k separately. --- ivmodels/models/kclass.py | 39 ++++++++++++++++++------------ ivmodels/tests/likelihood_ratio.py | 36 ++++++++++++++++++--------- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/ivmodels/models/kclass.py b/ivmodels/models/kclass.py index ce30a7f..61dc565 100644 --- a/ivmodels/models/kclass.py +++ b/ivmodels/models/kclass.py @@ -366,6 +366,11 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs): n, k = Z.shape mx, mc = X.shape[1], C.shape[1] + if k < mx: + raise ValueError( + f"Need at least as many instruments {k} as endogenous regressors {mx}." + ) + # Including an intercept is equivalent to replacing y <- M_1 y, X <- M_1 X, # C <- M_1 C, Z <- M_1 Z, where M_1 = Id - 1/n 1 1^T is the projection # orthogonal to np.ones(n). I.e., center the columns of all design matrices. @@ -418,21 +423,25 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs): self.kappa_ = 0 else: self.fuller_alpha_ = self._fuller_alpha(self.kappa) - # If C!=None, we compute the ar_min as if we removed C from all design - # matrices. I.e., we replace Z <- M_C Z, X <- M_C X, y <- M_C y. - # We also exclude the columns in X coming from C. - X_proj_C, y_proj_C = proj(C, X[:, :mx], y) - # Here ar_min = lambdamin ( - # (X y)^T M_{[Z, C]} (X y)^{-1} (X y)^T P_{M_C Z} (X y) - # ). - # Thus X_proj <- P_[M_C Z] X = P_[Z, C] X - P_C X = X_proj - X_proj_C - # and X <- M_C X = X - X_proj_C. Some for y. - self.ar_min_ = self.ar_min( - X[:, :mx] - X_proj_C, - y - y_proj_C, - X_proj=X_proj[:, :mx] - X_proj_C, - y_proj=y_proj - y_proj_C, - ) + + if mx == k: + self.ar_min_ = 0 + else: + # If C!=None, we compute the ar_min as if we removed C from all design + # matrices. I.e., we replace Z <- M_C Z, X <- M_C X, y <- M_C y. + # We also exclude the columns in X coming from C. + X_proj_C, y_proj_C = proj(C, X[:, :mx], y) + # Here ar_min = lambdamin ( + # (X y)^T M_{[Z, C]} (X y)^{-1} (X y)^T P_{M_C Z} (X y) + # ). + # Thus X_proj <- P_[M_C Z] X = P_[Z, C] X - P_C X = X_proj - X_proj_C + # and X <- M_C X = X - X_proj_C. Some for y. + self.ar_min_ = self.ar_min( + X[:, :mx] - X_proj_C, + y - y_proj_C, + X_proj=X_proj[:, :mx] - X_proj_C, + y_proj=y_proj - y_proj_C, + ) self.kappa_liml_ = 1 + self.ar_min_ self.kappa_ = self.kappa_liml_ - self.fuller_alpha_ / ( n - k - mc - self.fit_intercept diff --git a/ivmodels/tests/likelihood_ratio.py b/ivmodels/tests/likelihood_ratio.py index a0eebde..03fb9ed 100644 --- a/ivmodels/tests/likelihood_ratio.py +++ b/ivmodels/tests/likelihood_ratio.py @@ -76,6 +76,12 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, D=None, fit_intercept=T n, k = Z.shape mx, mw, mc, md = X.shape[1], W.shape[1], C.shape[1], D.shape[1] + if k < mx + mw: + raise ValueError( + "The number of instruments must be at least the number of endogenous " + "regressors." + ) + if fit_intercept: C = np.hstack([np.ones((n, 1)), C]) @@ -91,11 +97,14 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, D=None, fit_intercept=T XWy_proj = np.hstack([X_proj, W_proj, y_proj.reshape(-1, 1)]) - ar_min = _characteristic_roots( - a=oproj(D, XWy).T @ XWy_proj, - b=XWy.T @ (XWy - XWy_proj), - subset_by_index=[0, 0], - )[0] + if k == mx + mw: + ar_min = 0 + else: + ar_min = _characteristic_roots( + a=oproj(D, XWy).T @ XWy_proj, + b=XWy.T @ (XWy - XWy_proj), + subset_by_index=[0, 0], + )[0] if md > 0: X = np.hstack([X, D]) @@ -189,13 +198,16 @@ def inverse_likelihood_ratio_test( XWy_proj = np.concatenate([XW_proj, y_proj.reshape(-1, 1)], axis=1) XWy = np.concatenate([XW, y.reshape(-1, 1)], axis=1) - kappa_liml = np.real( - _characteristic_roots( - a=oproj(D, XWy).T @ XWy_proj, - b=XWy.T @ (XWy - XWy_proj), - subset_by_index=[0, 0], - )[0] - ) + if k == mx + mw: + kappa_liml = 0 + else: + kappa_liml = np.real( + _characteristic_roots( + a=oproj(D, XWy).T @ XWy_proj, + b=XWy.T @ (XWy - XWy_proj), + subset_by_index=[0, 0], + )[0] + ) dfd = n - k - mc - md - fit_intercept quantile = scipy.stats.chi2.ppf(1 - alpha, df=mx + md) + dfd * kappa_liml From e8c218964ee91b9cb08af2a32e308b28bc909fe4 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Thu, 22 Aug 2024 10:59:27 +0200 Subject: [PATCH 2/5] allow k= 1 and k < mx: raise ValueError( - f"Need at least as many instruments {k} as endogenous regressors {mx}." + f"Need at least as many instruments (got {k}) as endogenous regressors " + f"(got {mx})." ) # Including an intercept is equivalent to replacing y <- M_1 y, X <- M_1 X, From 3d4e04ffe42f907df6546d732fd56760ae891ca6 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Thu, 22 Aug 2024 16:41:52 +0200 Subject: [PATCH 3/5] Fix. --- ivmodels/models/kclass.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ivmodels/models/kclass.py b/ivmodels/models/kclass.py index 3b41c0b..27c1e8e 100644 --- a/ivmodels/models/kclass.py +++ b/ivmodels/models/kclass.py @@ -366,7 +366,13 @@ def fit(self, X, y, Z=None, C=None, *args, **kwargs): n, k = Z.shape mx, mc = X.shape[1], C.shape[1] - if self.kappa_ >= 1 and k < mx: + if ( + ( + isinstance(self.kappa, str) + and self.kappa.lower() in {"tsls", "2sls", "liml"} + ) + or (isinstance(self.kappa, (int, float)) and self.kappa >= 1) + ) and k < mx: raise ValueError( f"Need at least as many instruments (got {k}) as endogenous regressors " f"(got {mx})." From c108179c32cfef0033e3163b6a4187dfe2fd54dd Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Fri, 23 Aug 2024 09:38:53 +0200 Subject: [PATCH 4/5] Fix test --- tests/tests/test_anderson_rubin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests/test_anderson_rubin.py b/tests/tests/test_anderson_rubin.py index 9eb4d93..a5c777b 100644 --- a/tests/tests/test_anderson_rubin.py +++ b/tests/tests/test_anderson_rubin.py @@ -70,7 +70,7 @@ def test_more_powerful_sAR_critical_value_function_integrates_to_one(k, hat_kapp @pytest.mark.parametrize("alpha", [0.1, 0.05, 0.01]) @pytest.mark.parametrize( - "n, k, mx, mw", [(100, 2, 1, 0), (100, 2, 2, 1), (100, 20, 5, 5)] + "n, k, mx, mw", [(100, 2, 2, 0), (100, 3, 2, 1), (100, 20, 5, 5)] ) def test_inverse_anderson_rubin_confidence_set_alternative_formulation( alpha, n, k, mx, mw From 984672953659d08655006069798cb24074b71ee8 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Fri, 23 Aug 2024 09:40:42 +0200 Subject: [PATCH 5/5] Changelog. --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c69bb44..c6f1c33 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,10 @@ Changelog :func:`~ivmodels.tests.lagrange_multiplier.inverse_lagrange_multiplier_test` via the `kwargs`. +- :class:`~ivmodels.models.kclass.KClass` now raises if `kappa >= 1` (as for the LIML + and TSLS estimators) and the number of instruments is less than the number of + endogenous regressors. + 0.4.0 - 2024-08-08 ------------------