New util function: characteristic_roots

mlondschien · Jul 26, 2024 · ca51410 · ca51410
1 parent 6851286
commit ca51410
Show file tree

Hide file tree

Showing 6 changed files with 146 additions and 39 deletions.
diff --git a/ivmodels/models/kclass.py b/ivmodels/models/kclass.py
@@ -2,11 +2,10 @@
 import re
 
 import numpy as np
-import scipy
 from glum import GeneralizedLinearRegressor
 
 from ivmodels.summary import Summary
-from ivmodels.utils import _check_inputs, oproj, proj, to_numpy
+from ivmodels.utils import _characteristic_roots, _check_inputs, oproj, proj, to_numpy
 
 try:
     import pandas as pd
@@ -274,7 +273,7 @@ def _spectrum(X, y, Z=None, X_proj=None, y_proj=None, subset_by_index=None):
         Xy_proj = np.concatenate([X_proj, y_proj.reshape(-1, 1)], axis=1)
         # Here possibly Xy_proj.T @ Xy != Xy_proj.T @ Xy_proj, if called from
         # KClass.fit() with C!=None.
-        return scipy.linalg.eigvalsh(
+        return _characteristic_roots(
             a=Xy.T @ Xy_proj, b=(Xy - Xy_proj).T @ Xy, subset_by_index=subset_by_index
         )
 

diff --git a/ivmodels/tests/conditional_likelihood_ratio.py b/ivmodels/tests/conditional_likelihood_ratio.py
@@ -4,7 +4,13 @@
 from ivmodels.confidence_set import ConfidenceSet
 from ivmodels.models.kclass import KClass
 from ivmodels.quadric import Quadric
-from ivmodels.utils import _check_inputs, _find_roots, oproj, proj
+from ivmodels.utils import (
+    _characteristic_roots,
+    _check_inputs,
+    _find_roots,
+    oproj,
+    proj,
+)
 
 
 def conditional_likelihood_ratio_critical_value_function(
@@ -346,7 +352,7 @@ def conditional_likelihood_ratio_test(
         Xt_orth = Xt - Xt_proj
 
         s_min = np.real(
-            scipy.linalg.eigvalsh(
+            _characteristic_roots(
                 a=Xt_proj.T @ Xt_proj, b=Xt_orth.T @ Xt_orth, subset_by_index=[0, 0]
             )[0]
         ) * (n - k - mc - md - fit_intercept)
@@ -355,7 +361,7 @@ def conditional_likelihood_ratio_test(
         Xy_proj = np.hstack([X_proj, y_proj.reshape(-1, 1)])
 
         ar_min = (
-            scipy.linalg.eigvalsh(
+            _characteristic_roots(
                 a=Xy.T @ Xy,
                 b=(Xy - Xy_proj).T @ Xy,
                 subset_by_index=[0, 0],
@@ -374,7 +380,7 @@ def conditional_likelihood_ratio_test(
         XWy_eigenvals = (
             np.sort(
                 np.real(
-                    scipy.linalg.eigvalsh(
+                    _characteristic_roots(
                         a=XWy.T @ XWy,
                         b=(XWy - XWy_proj).T @ XWy,
                         subset_by_index=[0, 1],
@@ -429,7 +435,7 @@ def inverse_conditional_likelihood_ratio_test(
     Sy_orth = np.concatenate([S_orth, y_orth.reshape(-1, 1)], axis=1)
 
     Sy_eigvals = np.real(
-        scipy.linalg.eigvalsh(
+        _characteristic_roots(
             a=Sy_proj.T @ Sy_proj, b=Sy_orth.T @ Sy_orth, subset_by_index=[0, 1]
         )
     )
@@ -465,7 +471,7 @@ def f(x):
         Wy_orth_[:, -1:] -= S_orth[:, :mx] * x
 
         eigval = np.real(
-            scipy.linalg.eigvalsh(
+            _characteristic_roots(
                 a=Wy_proj_.T @ Wy_proj_, b=Wy_orth_.T @ Wy_orth_, subset_by_index=[0, 0]
             )
         )

diff --git a/ivmodels/tests/lagrange_multiplier.py b/ivmodels/tests/lagrange_multiplier.py
@@ -6,7 +6,13 @@
 
 from ivmodels.confidence_set import ConfidenceSet
 from ivmodels.models.kclass import KClass
-from ivmodels.utils import _check_inputs, _find_roots, oproj, proj
+from ivmodels.utils import (
+    _characteristic_roots,
+    _check_inputs,
+    _find_roots,
+    oproj,
+    proj,
+)
 
 
 # https://stackoverflow.com/a/68608349/10586763
@@ -125,19 +131,31 @@ def liml(self, beta=None):
             one_beta_id[1 : (1 + self.mx), 0] = -beta[:, 0]
             one_beta_id[(1 + self.mx) :, 1:] = np.diag(np.ones(self.mw))
 
-            X_proj_X = one_beta_id.T @ self.yS_proj_at_yS @ one_beta_id
-            X_orth_X = one_beta_id.T @ self.yS_orth_at_yS @ one_beta_id
+            resX_proj_resX = one_beta_id.T @ self.yS_proj_at_yS @ one_beta_id
+            resX_orth_resX = one_beta_id.T @ self.yS_orth_at_yS @ one_beta_id
         else:
-            X_proj_X = self.yS_proj_at_yS
-            X_orth_X = self.yS_orth_at_yS
-
-        eigval = scipy.linalg.eigh(
-            a=X_proj_X,
-            b=X_orth_X,
-            subset_by_index=[0, 0],
-        )[1]
+            resX_proj_resX = self.yS_proj_at_yS
+            resX_orth_resX = self.yS_orth_at_yS
+
+        cond = np.linalg.cond(resX_orth_resX)
+        if cond < 0.5 / np.finfo(resX_orth_resX.dtype).eps:
+            eigval = scipy.linalg.eigh(
+                a=resX_proj_resX,
+                b=resX_orth_resX,
+                subset_by_index=[0, 0],
+            )[1]
+
+            liml = -eigval[1:, 0] / eigval[0, 0]
+        else:
+            kappa_liml = _characteristic_roots(
+                resX_proj_resX, resX_orth_resX, subset_by_index=[0, 0]
+            )[0]
+            liml = np.linalg.solve(
+                resX_proj_resX[1:, 1:] - kappa_liml * resX_orth_resX[1:, 1:],
+                resX_proj_resX[1:, 0] - kappa_liml * resX_orth_resX[1:, 0],
+            )
 
-        return -eigval[1:, 0] / eigval[0, 0]
+        return liml
 
     def derivative(self, beta, gamma=None, jac=True, hess=True):
         """Return LM and derivative of LM at beta, gamma w.r.t. (beta, gamma)."""
@@ -284,12 +302,12 @@ def _derivative(gamma):
                 )
             )
 
-        res = min(results, key=lambda r: r.fun)
+        minimizer = min(results, key=lambda r: r.fun).x
+        statistic = self.derivative(beta, minimizer, jac=False, hess=False)[0]
 
         if return_minimizer:
-            return res.fun, res.x
-        else:
-            return res.fun
+            return statistic, minimizer
+        return statistic
 
 
 def lagrange_multiplier_test(
@@ -369,7 +387,8 @@ def lagrange_multiplier_test(
 
     dof = n - k - mc - md - fit_intercept
     if mw > 0:
-        statistic = _LM(X=X, y=y, W=W, Z=Z, dof=dof, **kwargs).lm(beta)
+        lm = _LM(X=X, y=y, W=W, Z=Z, dof=dof, **kwargs)
+        statistic = lm.lm(beta)
 
         p_value = 1 - scipy.stats.chi2.cdf(statistic, df=mx + md)
 

diff --git a/ivmodels/tests/likelihood_ratio.py b/ivmodels/tests/likelihood_ratio.py
@@ -2,7 +2,7 @@
 import scipy
 
 from ivmodels.quadric import Quadric
-from ivmodels.utils import _check_inputs, oproj, proj
+from ivmodels.utils import _characteristic_roots, _check_inputs, oproj, proj
 
 
 def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, D=None, fit_intercept=True):
@@ -91,13 +91,11 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, D=None, fit_intercept=T
 
     XWy_proj = np.hstack([X_proj, W_proj, y_proj.reshape(-1, 1)])
 
-    ar_min = np.real(
-        scipy.linalg.eigvalsh(
-            a=oproj(D, XWy).T @ XWy_proj,
-            b=XWy.T @ (XWy - XWy_proj),
-            subset_by_index=[0, 0],
-        )[0]
-    )
+    ar_min = _characteristic_roots(
+        a=oproj(D, XWy).T @ XWy_proj,
+        b=XWy.T @ (XWy - XWy_proj),
+        subset_by_index=[0, 0],
+    )[0]
 
     if md > 0:
         X = np.hstack([X, D])
@@ -118,7 +116,7 @@ def likelihood_ratio_test(Z, X, y, beta, W=None, C=None, D=None, fit_intercept=T
 
         statistic = (
             np.real(
-                scipy.linalg.eigvalsh(
+                _characteristic_roots(
                     a=Wy_proj.T @ Wy_proj,
                     b=Wy.T @ (Wy - Wy_proj),
                     subset_by_index=[0, 0],
@@ -192,7 +190,7 @@ def inverse_likelihood_ratio_test(
     XWy = np.concatenate([XW, y.reshape(-1, 1)], axis=1)
 
     kappa_liml = np.real(
-        scipy.linalg.eigvalsh(
+        _characteristic_roots(
             a=oproj(D, XWy).T @ XWy_proj,
             b=XWy.T @ (XWy - XWy_proj),
             subset_by_index=[0, 0],

diff --git a/ivmodels/tests/rank.py b/ivmodels/tests/rank.py
@@ -1,7 +1,7 @@
 import numpy as np
 import scipy
 
-from ivmodels.utils import _check_inputs, oproj, proj
+from ivmodels.utils import _characteristic_roots, _check_inputs, oproj, proj
 
 
 def rank_test(Z, X, C=None, fit_intercept=True):
@@ -60,7 +60,7 @@ def rank_test(Z, X, C=None, fit_intercept=True):
     X_proj = proj(Z, X)
 
     statistic = (n - k - C.shape[1]) * np.real(
-        scipy.linalg.eigvalsh(
+        _characteristic_roots(
             a=X.T @ X_proj, b=X.T @ (X - X_proj), subset_by_index=[0, 0]
         )[0]
     )

diff --git a/ivmodels/utils.py b/ivmodels/utils.py
@@ -55,7 +55,6 @@ def proj(Z, *args):
         return (*(np.zeros_like(f) for f in args),)
 
     fs = np.dot(Z, scipy.linalg.lstsq(Z, fs, cond=None, lapack_driver="gelsy")[0])
-
     return (
         *(fs[:, i:j].reshape(f.shape) for i, j, f in zip(csum[:-1], csum[1:], args)),
     )
@@ -264,3 +263,89 @@ def _find_roots(f, a, b, tol, max_value, max_eval, n_points=50):
         max_value=None,
         max_eval=max_eval - n_points,
     )
+
+
+def _characteristic_roots(a, b, subset_by_index=None, symmetric=True):
+    """
+    Compute the solutions to det(A - B * lambda) = 0.
+
+    Parameters
+    ----------
+    a: np.ndarray of dimension (n, n)
+        The matrix A.
+    b: np.ndarray of dimension (n, n)
+        The matrix B.
+    subset_by_index: List[int] or None, optional, default=None
+        The subset of roots to return. If None, returns all roots.
+
+    Returns
+    -------
+    np.ndarray of dimension (n,)
+        The characteristic roots of the generalized eigenvalue problem.
+    """
+    cond = np.linalg.cond(b)
+
+    if cond < 0.5 / np.finfo(b.dtype).eps:
+        if symmetric:
+            return scipy.linalg.eigvalsh(a=a, b=b, subset_by_index=subset_by_index)
+        else:
+            return scipy.linalg.eigvals(a=a, b=b, subset_by_index=subset_by_index)
+
+    if not symmetric:
+        raise ValueError
+
+    if subset_by_index is not None:
+        subset_by_index = sorted([b.shape[0] - 1 - i for i in subset_by_index])
+
+    return 1 / np.real(scipy.linalg.eigvalsh(a=b, b=a, subset_by_index=subset_by_index))
+
+    # if symmetric:
+    #     eigvals, eigvecs = scipy.linalg.eigh(a=b)
+    # else:
+    #     eigvals, eigvecs = scipy.linalg.eig(a=b)
+
+    # mask = eigvals < np.sqrt(np.finfo(b.dtype).eps)
+    # eigvals[mask] = 0
+    # # eigvals, eigvecs = eigvals[~mask], eigvecs[:, ~mask]
+
+    # D = eigvecs * np.sqrt(eigvals)
+
+    # eigvals_inverse = 1 / eigvals
+    # eigvals_inverse[mask] = 0
+
+    # pinv = scipy.linalg.pinv(b)
+    # chol = scipy.linalg.cholesky(pinv)
+    # eigvals, eigvecs = scipy.linalg.eig(chol @ a @ chol.T)
+
+    # # D = scipy.linalg.cholesky(pinv)
+    # # eigvals, eigvecs = scipy.linalg.eigh(D.T @ a @ D)
+
+    # for eigval in eigvals:
+    #     print("")
+    #     print(eigval)
+    #     print(np.linalg.det(a - eigval * b))
+
+    # # assert np.allclose(eigvecs @ np.diag(eigvals) @ eigvecs.T, b)
+    # pinv = (eigvecs * np.sqrt(eigvals_inverse)).T
+    # breakpoint()
+    # eigvals, eigvecs = scipy.linalg.eig(pinv @ a @ pinv.T)
+    # eigvecs = pinv.T @ eigvecs
+
+    # breakpoint()
+    # # assert np.allclose(a @ eigvecs, b @ eigvecs * eigvals)
+
+    # return scipy.linalg.eigvalsh(pinv @ a @ pinv.T, subset_by_index=subset_by_index)
+    # breakpoint()
+
+    # U, S, Vh = scipy.linalg.svd(b)
+    # mask = S < np.sqrt(np.finfo(b.dtype).eps)
+    # U, S, Vh = U[:, ~mask], S[~mask], Vh[~mask, :]
+    # breakpoint()
+    # res = scipy.linalg.eigvals(a=np.linalg.solve(b, a))
+
+    # x = np.linspace(0, 0.03, 100)
+    # y = np.zeros_like(x)
+    # for i, x_ in enumerate(x):
+    #     y[i] = np.linalg.det(b * x_ - a)
+
+    # breakpoint()