diff --git a/synthlearners/mcnnm.py b/synthlearners/mcnnm.py index 8e6acc5..283cacd 100644 --- a/synthlearners/mcnnm.py +++ b/synthlearners/mcnnm.py @@ -1,4 +1,6 @@ import numpy as np +import mlpack +import scipy from synthlearners.crossvalidation import PanelCrossValidator, cross_validate @@ -25,7 +27,7 @@ class MatrixCompletionEstimator: """ - def __init__(self, max_iter=500, tol=1e-4, verbose=False): + def __init__(self, max_iter=500, tol=1e-4, verbose=False, svd_method="numpy"): """ Parameters: lambda_param: regularization strength (the weight on the nuclear norm penalty) @@ -37,7 +39,40 @@ def __init__(self, max_iter=500, tol=1e-4, verbose=False): self.tol = tol self.verbose = verbose self.completed_matrix_ = None + self.svd_method = svd_method # Options: "numpy", "mlpack", "scipy" + def internal_svd(self, A, compute_uv=True, full_matrices=True): + """ + Compute the Singular Value Decomposition (SVD) of a matrix. + + Parameters: + A: Input matrix to decompose. + compute_uv: Whether to compute the full U and Vt matrices. + full_matrices: + + Returns: + U: Left singular vectors. + s: Singular values. + Vt: Right singular vectors (transposed). + """ + if compute_uv is True: + if self.svd_method == "numpy": + U, s, Vt = np.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices) + #if self.svd_method == "mlpack": + #U, s, Vt = mlpack.svd(A, compute_uv=compute_uv, full_matrices=full_matrices) + if self.svd_method == "scipy": + U, s, Vt = scipy.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices) + return U, s, Vt + else: + if self.svd_method == "numpy": + s = np.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices) + #if self.svd_method == "mlpack": + #s = mlpack.svd(A, compute_uv=compute_uv, full_matrices=full_matrices) + if self.svd_method == "scipy": + s = scipy.linalg.svd(A, compute_uv=compute_uv, full_matrices=full_matrices) + return s + + def shrink_lambda(self, A, threshold): """ Apply singular value thresholding (soft-thresholding of singular values). @@ -49,7 +84,7 @@ def shrink_lambda(self, A, threshold): Returns: The matrix after applying soft-thresholding to its singular values. """ - U, s, Vt = np.linalg.svd(A, full_matrices=False) + U, s, Vt = self.internal_svd(A, full_matrices=False) # Soft-threshold the singular values. s_thresholded = np.maximum(s - threshold, 0) return U @ np.diag(s_thresholded) @ Vt, s_thresholded @@ -192,7 +227,7 @@ def initialize_uv(self, M, mask, to_estimate_u=True, to_estimate_v=True): P_omega = (M - E) * mask # Compute SVD and find lambda_L_max - singular_values = np.linalg.svd(P_omega, compute_uv=False) + singular_values = self.internal_svd(P_omega, compute_uv=False) lambda_L_max = 2.0 * np.max(singular_values) / np.sum(mask) return {"u": u, "v": v, "lambda_L_max": lambda_L_max} @@ -254,7 +289,7 @@ def NNM_fit(self, M, mask, lambda_L, to_estimate_u=True, to_estimate_v=True): M, mask, L, u, v = map(np.asarray, (M, mask, L_init, u_init, v_init)) - sing = np.linalg.svd(L, compute_uv=False) # Compute singular values + sing = self.internal_svd(L, compute_uv=False) # Compute singular values sum_sigma = np.sum(sing) obj_val = self.compute_objval(M, mask, L, u, v, sum_sigma, lambda_L) term_iter = 0 diff --git a/tests/test_mcnnm.py b/tests/test_mcnnm.py new file mode 100644 index 0000000..cfeb458 --- /dev/null +++ b/tests/test_mcnnm.py @@ -0,0 +1,94 @@ +import pytest +import numpy as np +import mlpack +import scipy +from synthlearners.mcnnm import MatrixCompletionEstimator + +@pytest.fixture(scope="module") +def svd_methods(): + #svd_methods = ["numpy", "scipy","mlpack"] + svd_methods = ["numpy", "scipy"] + return svd_methods + +@pytest.fixture +def sample_data(): + np.random.seed(42) + n, t = 10, 8 + true_matrix = np.random.randn(n, t) + mask = np.random.binomial(1, 0.8, size=(n,t)) + observed = true_matrix * mask + return true_matrix, observed, mask + +def test_svd_methods(sample_data,svd_methods): + true_matrix, observed, mask = sample_data + + for method in svd_methods: + mc = MatrixCompletionEstimator(svd_method=method) + completed = mc.fit(observed, mask) + + # Check output dimensions + assert completed.shape == true_matrix.shape + + # Check if observed entries are preserved (approximately) + np.testing.assert_allclose( + completed[mask == 1], + observed[mask == 1], + rtol=1e-1 + ) + + # Check if completion produces finite values + assert np.all(np.isfinite(completed)) + + # Check error metric + mse = mc.score(true_matrix, completed, mask) + assert mse >= 0 + assert np.isfinite(mse) + print(f"SVD Method: {method} works, MSE: {mse}") + +def test_fixed_effects_with_svd_methods(sample_data,svd_methods): + true_matrix, observed, mask = sample_data + + for method in svd_methods: + # Test with unit fixed effects + mc = MatrixCompletionEstimator(svd_method=method) + completed = mc.fit(observed, mask, unit_intercept=True) + assert completed.shape == true_matrix.shape + + # Test with time fixed effects + mc = MatrixCompletionEstimator(svd_method=method) + completed = mc.fit(observed, mask, time_intercept=True) + assert completed.shape == true_matrix.shape + + # Test with both fixed effects + mc = MatrixCompletionEstimator(svd_method=method) + completed = mc.fit(observed, mask, unit_intercept=True, time_intercept=True) + assert completed.shape == true_matrix.shape + +def test_convergence_with_svd_methods(sample_data,svd_methods): + true_matrix, observed, mask = sample_data + + for method in svd_methods: + # Test with strict tolerance + mc = MatrixCompletionEstimator(svd_method=method, tol=1e-6, max_iter=1000) + completed = mc.fit(observed, mask) + assert completed.shape == true_matrix.shape + + # Test with loose tolerance + mc = MatrixCompletionEstimator(svd_method=method, tol=1e-2, max_iter=50) + completed = mc.fit(observed, mask) + assert completed.shape == true_matrix.shape + + # Test the run time by svd method +def test_runtime_with_svd_methods(sample_data,svd_methods): + import time + true_matrix, observed, mask = sample_data + runtimes = {} + for method in svd_methods: + start_time = time.time() + mc = MatrixCompletionEstimator(svd_method=method) + completed = mc.fit(observed, mask) + end_time = time.time() + runtimes[method] = end_time - start_time + assert completed.shape == true_matrix.shape + print("Runtimes by SVD method:", runtimes) + #assert 0==1 # Force to see runtimes \ No newline at end of file