equinor · Blunde1 · Dec 27, 2023 · Jan 3, 2024 · Jan 3, 2024 · Jan 3, 2024
diff --git a/docs/source/LinearRegression.py b/docs/source/LinearRegression.py
@@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.15.2
+#       jupytext_version: 1.16.0
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
@@ -34,6 +34,9 @@
 # %%
 import numpy as np
 from matplotlib import pyplot as plt
+from sklearn.linear_model import LassoCV
+from sklearn.preprocessing import StandardScaler
+from tqdm import tqdm
 
 from iterative_ensemble_smoother import ESMDA
 
@@ -49,9 +52,10 @@
 
 # %%
 num_parameters = 25
-num_observations = 100
-num_ensemble = 30
+num_observations = 50
+num_ensemble = 100
 prior_std = 1
+obs_sd = 1.0
 
 # %%
 rng = np.random.default_rng(42)
@@ -67,14 +71,14 @@ def g(X):
 
 # Create observations: obs = g(x) + N(0, 1)
 x_true = np.linspace(-1, 1, num=num_parameters)
-observation_noise = rng.standard_normal(size=num_observations)
+observation_noise = obs_sd * rng.standard_normal(size=num_observations)
 observations = g(x_true) + observation_noise
 
 # Initial ensemble X ~ N(0, prior_std) and diagonal covariance with ones
 X = rng.normal(size=(num_parameters, num_ensemble)) * prior_std
 
 # Covariance matches the noise added to observations above
-covariance = np.ones(num_observations)
+covariance = np.ones(num_observations) * obs_sd**2
 
 # %% [markdown]
 # ## Solve the maximum likelihood problem
@@ -170,3 +174,131 @@ def g(X):
 plt.grid(True, ls="--", zorder=0, alpha=0.33)
 plt.legend()
 plt.show()
+
+
+# %% [markdown]
+# ## Solve using LASSO without structure
+#
+# The Kalman gain is possible to estimate through multiple linear regression
+# $d$ onto $x$.
+# This view has some implications.
+# - Modern linear regression routines (LASSO, RIDGE, and others) can be used
+# to solve for $K$. This is particularly good for e.g. $p>>n$ problems,
+# typical for ensemble methods.
+# - We lose the ability to specify the independence of randomness from
+# $x$ and $\epsilon$ into $d$.
+# - We also lose the ability to specify structure in the prior through the
+# covariance.
+#
+# Below we showcase how the LASSO algorithm can be used multiple times to
+# solve for the Kalman gain $K$.
+
+
+# %%
+def linear_l1_regression(D, X):
+    """
+    Performs LASSO regression for each response in X against predictors in D,
+    constructing a sparse matrix of regression coefficients.
+
+    The function scales features in D using standard scaling before applying
+    LASSO, then re-scales the coefficients to the original scale of D. This
+    extracts the effect of each feature in D on each response in X, ignoring
+    intercepts and constant terms.
+
+    Parameters
+    ----------
+    D : np.ndarray
+        2D array of predictors with shape (n, p).
+    X : np.ndarray
+        2D array of responses with shape (n, m).
+
+    Returns
+    -------
+    H : np.ndarray
+        2D array of responses with shape (m, p) with re-scaled LASSO
+        regression coefficients for each response in X.
+
+    Raises
+    ------
+    AssertionError
+        If the number of samples in D and X do not match, or if the shape of
+        H is not (m, p).
+    """
+    n, p = D.shape  # p: number of features
+    n_y, m = X.shape  # m: number of y responses
+
+    # Assert that the first dimension of D and X are the same
+    assert n == n_y, "Number of samples in D and X must be the same"
+
+    scaler_d = StandardScaler()
+    D_scaled = scaler_d.fit_transform(D)
+
+    scaler_x = StandardScaler()
+    X_scaled = scaler_x.fit_transform(X)
+
+    # Loop over features
+    H = np.zeros((m, p))
+    for j in tqdm(range(m), desc="Learning sparse linear map for each response"):
+        x_j = X_scaled[:, j]
+
+        # Learn individual regularization and fit
+        eps = 1e-3
+        max_iter = 10000
+        model_cv = LassoCV(cv=10, fit_intercept=False, max_iter=max_iter, eps=eps)
+        model_cv.fit(D_scaled, x_j)
+
+        # Extract coefficients
+        for non_zero_ind in model_cv.coef_.nonzero()[0]:
+            H[j, non_zero_ind] = (
+                scaler_x.scale_[j]
+                * model_cv.coef_[non_zero_ind]
+                / scaler_d.scale_[non_zero_ind]
+            )
+
+    # Assert shape of H_sparse
+    assert H.shape == (m, p), "Shape of H_sparse must be (m, p)"
+
+    return H
+
+
+# %%
+# Learn Kalman gain
+X_prior = np.copy(X)
+Y = g(X_prior)
+D = Y + obs_sd * rng.standard_normal(size=Y.shape)
+K = linear_l1_regression(D=D.T, X=X_prior.T)
+
+# %%
+# Use Kalman gain in update equation
+X_posterior = X_prior + K @ (observations - D.T).T
+
+# %%
+plt.figure(figsize=(8, 3))
+plt.scatter(np.arange(len(x_true)), x_true, label="True parameter values")
+plt.scatter(np.arange(len(x_true)), x_ml, label="ML estimate (no prior)")
+plt.scatter(
+    np.arange(len(x_true)), np.mean(X_posterior, axis=1), label="Posterior mean"
+)
+
+# Loop over every ensemble member and plot it
+for j in range(num_ensemble):
+    # Jitter along the x-axis a little bit
+    x_jitter = np.arange(len(x_true)) + rng.normal(loc=0, scale=0.1, size=len(x_true))
+
+    # Plot this ensemble member
+    plt.scatter(
+        x_jitter,
+        X_posterior[:, j],
+        label=("Posterior values" if j == 0 else None),
+        color="black",
+        alpha=0.2,
+        s=5,
+        zorder=0,
+    )
+plt.xlabel("Parameter index")
+plt.ylabel("Parameter value")
+plt.grid(True, ls="--", zorder=0, alpha=0.33)
+plt.legend()
+plt.show()
+
+# %%