Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions synthlearners/mcnnm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import numpy as np
import mlpack
import scipy
from synthlearners.crossvalidation import PanelCrossValidator, cross_validate


Expand All @@ -25,7 +27,7 @@ class MatrixCompletionEstimator:

"""

def __init__(self, max_iter=500, tol=1e-4, verbose=False):
def __init__(self, max_iter=500, tol=1e-4, verbose=False, svd_method="numpy"):
"""
Parameters:
lambda_param: regularization strength (the weight on the nuclear norm penalty)
Expand All @@ -37,7 +39,40 @@ def __init__(self, max_iter=500, tol=1e-4, verbose=False):
self.tol = tol
self.verbose = verbose
self.completed_matrix_ = None
self.svd_method = svd_method # Options: "numpy", "mlpack", "scipy"

def internal_svd(self, A, compute_uv=True, full_matrices=True):
"""
Compute the Singular Value Decomposition (SVD) of a matrix.

Parameters:
A: Input matrix to decompose.
compute_uv: Whether to compute the full U and Vt matrices.
full_matrices:

Returns:
U: Left singular vectors.
s: Singular values.
Vt: Right singular vectors (transposed).
"""
if compute_uv is True:
if self.svd_method == "numpy":
U, s, Vt = np.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices)
#if self.svd_method == "mlpack":
#U, s, Vt = mlpack.svd(A, compute_uv=compute_uv, full_matrices=full_matrices)
if self.svd_method == "scipy":
U, s, Vt = scipy.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices)
return U, s, Vt
else:
if self.svd_method == "numpy":
s = np.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices)
#if self.svd_method == "mlpack":
#s = mlpack.svd(A, compute_uv=compute_uv, full_matrices=full_matrices)
if self.svd_method == "scipy":
s = scipy.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices)
return s


def shrink_lambda(self, A, threshold):
"""
Apply singular value thresholding (soft-thresholding of singular values).
Expand All @@ -49,7 +84,7 @@ def shrink_lambda(self, A, threshold):
Returns:
The matrix after applying soft-thresholding to its singular values.
"""
U, s, Vt = np.linalg.svd(A, full_matrices=False)
U, s, Vt = self.internal_svd(A, full_matrices=False)
# Soft-threshold the singular values.
s_thresholded = np.maximum(s - threshold, 0)
return U @ np.diag(s_thresholded) @ Vt, s_thresholded
Expand Down Expand Up @@ -192,7 +227,7 @@ def initialize_uv(self, M, mask, to_estimate_u=True, to_estimate_v=True):
P_omega = (M - E) * mask

# Compute SVD and find lambda_L_max
singular_values = np.linalg.svd(P_omega, compute_uv=False)
singular_values = self.internal_svd(P_omega, compute_uv=False)
lambda_L_max = 2.0 * np.max(singular_values) / np.sum(mask)

return {"u": u, "v": v, "lambda_L_max": lambda_L_max}
Expand Down Expand Up @@ -254,7 +289,7 @@ def NNM_fit(self, M, mask, lambda_L, to_estimate_u=True, to_estimate_v=True):

M, mask, L, u, v = map(np.asarray, (M, mask, L_init, u_init, v_init))

sing = np.linalg.svd(L, compute_uv=False) # Compute singular values
sing = self.internal_svd(L, compute_uv=False) # Compute singular values
sum_sigma = np.sum(sing)
obj_val = self.compute_objval(M, mask, L, u, v, sum_sigma, lambda_L)
term_iter = 0
Expand Down
94 changes: 94 additions & 0 deletions tests/test_mcnnm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import pytest
import numpy as np
import mlpack
import scipy
from synthlearners.mcnnm import MatrixCompletionEstimator

@pytest.fixture(scope="module")
def svd_methods():
#svd_methods = ["numpy", "scipy","mlpack"]
svd_methods = ["numpy", "scipy"]
return svd_methods

@pytest.fixture
def sample_data():
np.random.seed(42)
n, t = 10, 8
true_matrix = np.random.randn(n, t)
mask = np.random.binomial(1, 0.8, size=(n,t))
observed = true_matrix * mask
return true_matrix, observed, mask

def test_svd_methods(sample_data,svd_methods):
true_matrix, observed, mask = sample_data

for method in svd_methods:
mc = MatrixCompletionEstimator(svd_method=method)
completed = mc.fit(observed, mask)

# Check output dimensions
assert completed.shape == true_matrix.shape

# Check if observed entries are preserved (approximately)
np.testing.assert_allclose(
completed[mask == 1],
observed[mask == 1],
rtol=1e-1
)

# Check if completion produces finite values
assert np.all(np.isfinite(completed))

# Check error metric
mse = mc.score(true_matrix, completed, mask)
assert mse >= 0
assert np.isfinite(mse)
print(f"SVD Method: {method} works, MSE: {mse}")

def test_fixed_effects_with_svd_methods(sample_data,svd_methods):
true_matrix, observed, mask = sample_data

for method in svd_methods:
# Test with unit fixed effects
mc = MatrixCompletionEstimator(svd_method=method)
completed = mc.fit(observed, mask, unit_intercept=True)
assert completed.shape == true_matrix.shape

# Test with time fixed effects
mc = MatrixCompletionEstimator(svd_method=method)
completed = mc.fit(observed, mask, time_intercept=True)
assert completed.shape == true_matrix.shape

# Test with both fixed effects
mc = MatrixCompletionEstimator(svd_method=method)
completed = mc.fit(observed, mask, unit_intercept=True, time_intercept=True)
assert completed.shape == true_matrix.shape

def test_convergence_with_svd_methods(sample_data,svd_methods):
true_matrix, observed, mask = sample_data

for method in svd_methods:
# Test with strict tolerance
mc = MatrixCompletionEstimator(svd_method=method, tol=1e-6, max_iter=1000)
completed = mc.fit(observed, mask)
assert completed.shape == true_matrix.shape

# Test with loose tolerance
mc = MatrixCompletionEstimator(svd_method=method, tol=1e-2, max_iter=50)
completed = mc.fit(observed, mask)
assert completed.shape == true_matrix.shape

# Test the run time by svd method
def test_runtime_with_svd_methods(sample_data,svd_methods):
import time
true_matrix, observed, mask = sample_data
runtimes = {}
for method in svd_methods:
start_time = time.time()
mc = MatrixCompletionEstimator(svd_method=method)
completed = mc.fit(observed, mask)
end_time = time.time()
runtimes[method] = end_time - start_time
assert completed.shape == true_matrix.shape
print("Runtimes by SVD method:", runtimes)
#assert 0==1 # Force to see runtimes