added HK testing and wHK testing and implementation

Richard Michael · Richard Michael · commit 1689114d2597 · 2024-01-09T11:00:39.000+01:00
diff --git a/experiments/config/slurm/run_configs.csv b/experiments/config/slurm/run_configs.csv
diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml
@@ -0,0 +1,6 @@
+artifact_location: file:///Users/rcml/corel/mlruns/0
+creation_time: 1696424246894
+experiment_id: '0'
+last_update_time: 1696424246894
+lifecycle_stage: active
+name: Default
diff --git a/src/corel/kernel/__init__.py b/src/corel/kernel/__init__.py
@@ -3,3 +3,4 @@
 from .hellinger import _k
 from .hellinger import _hellinger_distance
 from .hellinger_reference import HellingerReference
+from .hellinger import Hellinger
diff --git a/src/corel/kernel/hellinger_reference.py b/src/corel/kernel/hellinger_reference.py
@@ -1,6 +1,6 @@
 from typing import Optional
 import tensorflow as tf
-
+import numpy as np
 import gpflow
 from gpflow.kernels import Kernel
 from gpflow.utilities import positive
@@ -18,7 +18,12 @@ def __init__(self, L:int, AA:int, lengthscale: float=1.0, noise: float=0.1, acti
     def restore(self, ps: tf.Tensor) -> tf.Tensor:
         ps = tf.squeeze(ps)
         N = 1 if len(ps.shape) == 1 else ps.shape[0]
-        return tf.reshape(ps, shape=(N, ps.shape[-1] // self.AA,  self.AA))
+        if ps.shape[-1] != self.AA:
+            return tf.reshape(ps, shape=(N, ps.shape[-1] // self.AA,  self.AA))
+        elif ps.shape[0] == N and ps.shape[1] == self.L and ps.shape[-1] == self.AA:
+            return ps
+        else:
+            raise ValueError(f"Vector p shape incorrect! {ps.shape}")
 
     def K(self, X, X2=None) -> tf.Tensor:
         if X2 is None:
@@ -32,18 +37,18 @@ def K_diag(self, X) -> tf.Tensor:
         return tf.ones(X.shape[0])
 
     def _assert_X_values(self, X: tf.Tensor, tol:float) -> bool:
-        return tf.all(tf.abs(tf.math.reduce_sum(X, axis=-1) - 1.) < tol)
+        return (tf.abs(tf.math.reduce_sum(X, axis=-1) - 1.) < tol).numpy().all()
 
     def _hellinger2(self, X: tf.Tensor, X2: tf.Tensor, tol: float=1e-5):
-        M = tf.zeros([X.shape[0], X2.shape[0]])
+        M = np.zeros([X.shape[0], X2.shape[0]], dtype=np.float64)
         X = self.restore(X)
         X2 = self.restore(X2)
         assert self._assert_X_values(X, tol)
         assert self._assert_X_values(X2, tol)
-        _tmp = tf.zeros(self.L)
+        _tmp = np.zeros(self.L, dtype=np.float64)
         for x_idx in range(X.shape[0]):
             for y_idx in range(X2.shape[0]):
                 for l_idx in range(self.L):
-                    _tmp[l_idx] = tf.math.reduce_sum(tf.math.sqrt(X[x_idx, l_idx, :] * X2[y_idx, l_idx, :]))
-                M[x_idx, y_idx] = 1 - tf.reduce_prod(_tmp)
-        return M
+                    _tmp[l_idx] = tf.math.reduce_sum(tf.math.sqrt(X[x_idx, l_idx, :] * X2[y_idx, l_idx, :])).numpy() # NOTE: this type of assignment not supported by TF only numpy
+                M[x_idx, y_idx] = (1 - tf.reduce_prod(_tmp)).numpy() # NOTE: build np matrix by assignment
+        return tf.convert_to_tensor(M)
diff --git a/src/corel/kernel/weighted_hellinger.py b/src/corel/kernel/weighted_hellinger.py
@@ -9,14 +9,13 @@
 
 
 class WeightedHellinger(Hellinger):
-    def __init__(self, z: tf.Tensor, L: int, AA: int, lengthscale: float=1.0, noise: float=0.1, active_dims: Optional[int] = None, name: Optional[str] = None) -> None:
+    def __init__(self, w: tf.Tensor, L: int, AA: int, lengthscale: float=1.0, noise: float=0.1, active_dims: Optional[int] = None, name: Optional[str] = None) -> None:
         super().__init__(L=L, AA=AA, active_dims=active_dims, name=name)
-        self.z = z
-        # TODO assert p in [0,1]
+        self.w = w  # weighting density vector
         self.lengthscale = gpflow.Parameter(lengthscale, transform=positive()) # TODO: log transform here?
         self.noise = gpflow.Parameter(noise, transform=positive()) # TODO: check against Kernel Interface
 
-    def K(self, X, X2=None) -> tf.Tensor:
+    def K(self, X: tf.Tensor, X2: Optional[tf.Tensor]=None) -> tf.Tensor:
         """
         X input is P(X)
         """
@@ -40,15 +39,26 @@ def K(self, X, X2=None) -> tf.Tensor:
                 M = tf.reshape(M, shape=(1, M.shape[0], 1, M.shape[1])) # adhere to [batch..., N1, batch..., N2]
         return M
 
-    def _H(self, X: tf.Tensor, X2: tf.Tensor):
-        raise NotImplementedError("TODO: implement weighting by expected value")
+    def _get_inner_product(self, X: tf.Tensor, X2: tf.Tensor) -> tf.Tensor:
+        """
+        Compute RHS of weighted HK equation, as weighting times sqrt(p[a_l,l] x q[a_l,l])
+        """
+        # M = tf.math.reduce_sum(self.w * tf.sqrt(X[None, ...] * X2[:, None, ...]), axis=-1)
+        # NOTE: the einsum and reduce_sum product should be equivalent
+        M = tf.einsum('ali,bli->abl', tf.sqrt(tf.pow(self.w,2)*X), tf.sqrt(X2)) 
+        return tf.math.reduce_prod(M, axis=-1) # product over L, positions factorize
+
+    def _compute_lhs(self, X: tf.Tensor, X2: tf.Tensor) -> tf.Tensor:
+        w_p = tf.math.reduce_sum(self.w*X[None, ...], axis=-1) / 2
+        w_q = tf.math.reduce_sum(self.w*X2[:, None, ...], axis=-1) / 2
+        return tf.math.reduce_prod(w_p+w_q, axis=-1)
+
+    def _H(self, X: tf.Tensor, X2: tf.Tensor) -> tf.Tensor:
         M = self._get_inner_product(X, X2)
-        # TODO: correctly compute the z vector!
-        # z = tf.reduce_sum(tf.squeeze(self.z), -1)[None:]
-        M = z@tf.transpose(z) - M
-        #M[M < 0.] = 0.
+        # NOTE: LHS is expectation with equal weight, could have weighting 
+        weighted_E = self._compute_lhs(X, X2)
+        M = weighted_E - M 
         M = tf.where(M < 0., tf.zeros_like(M), M)
-        
         M = tf.where(M == 0., tf.zeros_like(M), M) # fix gradients
         M = tf.exp(-tf.sqrt(M) / tf.square(self.lengthscale))
         return M
diff --git a/src/corel/test/test_kernel/test_HK_reference.py b/src/corel/test/test_kernel/test_HK_reference.py
@@ -0,0 +1,194 @@
+from typing import Callable
+import pytest
+import inspect
+import numpy as np
+from corel.kernel.hellinger import get_mean_and_amplitude
+from corel.kernel.hellinger import _hellinger_distance
+from corel.kernel.hellinger import _k
+from corel.kernel import HellingerReference
+from corel.kernel import Hellinger
+from corel.kernel import WeightedHellinger
+import tensorflow as tf
+import matplotlib.pyplot as plt
+
+# define test sequences and test alphabet and test weighting distributions
+SEED=12
+N = 20
+L = 15
+AA = 3
+np.random.seed(SEED)
+
+simulated_decoding_distributions = np.stack([
+        np.random.dirichlet(np.ones(AA), L) for _ in range(N)
+     ])
+
+simulated_weighting_vec = np.random.dirichlet(np.ones(AA), L)
+
+
+@pytest.mark.parametrize("dist", [simulated_decoding_distributions])#, simulated_weighting_vec])
+def test_simulated_dist_is_probabilities(dist):
+    summed_dist = np.sum(dist, axis=-1)
+    np.testing.assert_almost_equal(summed_dist, np.ones((N, L)))
+    np.testing.assert_array_less(dist, np.ones_like(dist))
+    np.testing.assert_array_less(np.zeros_like(dist), dist)
+
+
+def test_simulated_w_vec_is_probabilities():
+    summed_dist = np.sum(simulated_weighting_vec, axis=-1)
+    np.testing.assert_almost_equal(summed_dist, np.ones_like(summed_dist))
+
+
+def really_naive_r(p_x: np.ndarray, q_y: np.ndarray):
+    assert p_x.shape[0] == q_y.shape[0] and p_x.shape[1] == q_y.shape[1], "Input distributions inconsistent"
+    L = p_x.shape[0]
+    AA = p_x.shape[1]
+    dist_prod_sum_across_sequence = 0.
+    for l in range(L):
+        for a in range(AA):
+            dist_prod_sum_across_sequence += np.sqrt(p_x[l, a] * q_y[l, a])
+    return np.sqrt(1 - dist_prod_sum_across_sequence)
+
+
+def naive_r(p_x: np.ndarray, q_y: np.ndarray):
+    """
+    p, q are probability distributions (ie. decoder distributions)
+    """
+    # assumption sequences x , y are of shape (L, |AA|) with L seq-length and |AA| size of alphabet
+    assert p_x.shape[0] == q_y.shape[0] and p_x.shape[1] == q_y.shape[1], "Input distributions inconsistent"
+    if np.all(p_x == q_y):
+        # the Hellinger distance between equal distributions is 0, but numerically this could fail
+        # In that case summed_pq_vals_across_sequence can become slightly larger than 1 resulting in NaNs when taking the square root
+        return 0.
+    L = p_x.shape[0]
+    AA = p_x.shape[1]
+    summed_pq_vals_across_sequence = []
+    for l in range(L):
+        alphabet_prod_vals = []
+        for a in range(AA):
+            pq_sqrt_prod = np.sqrt(p_x[l, a]*q_y[l,a]) # TODO: for weighting: add weighting dist here
+            alphabet_prod_vals.append(pq_sqrt_prod)
+        summed_alphabet_vals = np.sum(alphabet_prod_vals)
+        summed_pq_vals_across_sequence.append(summed_alphabet_vals)
+    dist_prod_sum_across_sequence = np.prod(summed_pq_vals_across_sequence)
+    assert dist_prod_sum_across_sequence <= 1
+    return np.sqrt(1 - dist_prod_sum_across_sequence)
+
+
+def naive_r_w(p_x: np.ndarray, q_y: np.ndarray, w: np.ndarray):
+    """
+    p, q are probability distributions (ie. decoder distributions),
+    w is weighting distribution (ie. decoder out)
+    """
+    # assumption sequences x , y are of shape (L, |AA|) with L seq-length and |AA| size of alphabet
+    assert p_x.shape[0] == q_y.shape[0] and p_x.shape[1] == q_y.shape[1] and p_x.shape[0] == w.shape[0], "Input distributions inconsistent"
+    if np.all(p_x == q_y):
+        # the Hellinger distance between equal distributions is 0, but numerically this could fail
+        # In that case summed_pq_vals_across_sequence can become slightly larger than 1 resulting in NaNs when taking the square root
+        return 0.
+    L = p_x.shape[0]
+    AA = p_x.shape[1]
+    summed_pq_vals_across_sequence = []
+    for l in range(L):
+        alphabet_prod_vals = []
+        for a in range(AA):
+            pq_sqrt_prod = w[l,a] * np.sqrt(p_x[l,a]*q_y[l,a])
+            alphabet_prod_vals.append(pq_sqrt_prod)
+        summed_alphabet_vals = np.sum(alphabet_prod_vals)
+        summed_pq_vals_across_sequence.append(summed_alphabet_vals)
+    dist_prod_sum_across_sequence = np.prod(summed_pq_vals_across_sequence)
+    assert dist_prod_sum_across_sequence <= 1
+    lhs_weighted_pq_values = []
+    for l in range(L):
+        alphabet_prod_vals = []
+        for a in range(AA):
+            weighted_pq_sum = 1/2 * w[l,a]*p_x[l,a] + 1/2 * w[l,a]*q_y[l,a] 
+            alphabet_prod_vals.append(weighted_pq_sum)
+        summed_alphabet_vals = np.sum(alphabet_prod_vals)
+        lhs_weighted_pq_values.append(summed_alphabet_vals)
+    lhs_expectation = np.prod(lhs_weighted_pq_values)
+    return np.sqrt(lhs_expectation - dist_prod_sum_across_sequence)
+
+
+# implement naive Hellinger function
+def naive_kernel(p: np.ndarray, q: np.ndarray, theta: float, lam: float) -> float:
+    """
+    Naive hellinger distance computation and covariance computation of 
+    inputs: p , q distribution vectors
+        theta, lam covariance function parameters
+    returns: 
+        kernel value
+    """
+    distance_mat = np.zeros((p.shape[0], q.shape[0]))
+    for i in range(distance_mat.shape[0]):
+        for j in range(distance_mat.shape[1]):
+            distance_mat[i,j] = naive_r(p[i], q[j])
+            if not np.isfinite(distance_mat[i,j]):
+                print("Introduced NaN here!")
+    return theta * np.exp( -lam * distance_mat)
+
+
+def naive_weighted_kernel(p: np.ndarray, q: np.ndarray, w: np.ndarray, theta: float, lam: float):
+    distance_mat = np.zeros((p.shape[0], q.shape[0]))
+    for i in range(distance_mat.shape[0]):
+        for j in range(distance_mat.shape[1]):
+            distance_mat[i,j] = naive_r_w(p[i], q[j], w)
+            if not np.isfinite(distance_mat[i,j]):
+                print("NaN here!")
+    return theta * np.exp( -lam * distance_mat)
+
+
+# Simon implementation is p_0 is [N,1]
+
+# def test_kernel_functions_distance_against_naive(): # TODO: this is not the same _hellinger_distance expects vector of 1d not number of elements in alphabet deep!
+#     p_0 = simulated_decoding_distributions[:, :, 0] # not weighted p-vec, for comparison against [N, 1], make ones vector of that
+#     dist_kernelmodule_function = _hellinger_distance(p_0)
+#     dist_naive = naive_r(p_0, p_0)
+#     np.testing.assert_almost_equal(dist_kernelmodule_function, dist_naive)
+
+
+# def test_kernel_functions_k_against_naive_k(): # TODO requires understanding of HD
+#     lam = 0.5
+#     noise = 0.01
+#     module_dist = _hellinger_distance(simulated_decoding_distributions)
+## NOTE: Simon's _k is defined over atomic distributions for numerical efficiency, therefore not comparable here
+# Test works only on one-hot vectors
+#     module_k = _k(module_dist, lengthscale=np.log(lam), log_noise=np.log(noise))
+#     naive_k = naive_kernel(simulated_decoding_distributions, simulated_decoding_distributions, theta=1, lam=lam)
+#     np.testing.assert_almost_equal(module_k, naive_k)
+
+
+def test_kernel_implementation_naive():
+    """
+    test naive sum product reference implementation against GPFlow reference implementation
+    """
+    theta = 1.
+    lam = 1.
+    naive_k_matrix = naive_kernel(simulated_decoding_distributions, simulated_decoding_distributions, theta=theta, lam=lam)
+    hk = Hellinger(L=L, AA=AA, lengthscale=lam)
+    hk_matrix = hk.K(simulated_decoding_distributions, simulated_decoding_distributions)[0].numpy()
+    np.testing.assert_allclose(hk_matrix, naive_k_matrix, rtol=1e-6)
+
+
+def test_weighted_kernel_implementation_naive():
+    theta = 1.
+    lam = 1.
+    whk = WeightedHellinger(w=tf.convert_to_tensor(simulated_weighting_vec), L=L, AA=AA, lengthscale=lam)
+    whk_matrix = whk.K(simulated_decoding_distributions, simulated_decoding_distributions)
+    naive_whk_matrix = naive_weighted_kernel(simulated_decoding_distributions, simulated_decoding_distributions, simulated_weighting_vec, 
+            lam=lam, theta=theta)   
+    np.testing.assert_allclose(naive_whk_matrix, whk_matrix[0], 5) # TODO: cov. matrix shape from WHK
+
+# def test_kernel_functions_k():
+#     # TODO: test k function in hellinger module
+#     assert False
+
+# TODO: test src/corel/kernel GPFlow weighted implementation
+
+# def test_kernel_functions_hd():
+#     # TODO: test distance function in hellinger module
+#     assert False
+
+
+if __name__ == "__main__": # NOTE: added for the debugger to work!
+    test_kernel_implementation_naive()
+    test_weighted_kernel_implementation_naive()
diff --git a/src/corel/test/test_kernel/test_hellinger_reference.py b/src/corel/test/test_kernel/test_hellinger_reference.py