EU-EMERGE · yumetsuro · Dec 10, 2024 · Dec 15, 2024 · Dec 15, 2024 · Dec 16, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,8 @@
+# My directory files
+MNIST/
+thesis_venv/
+wandb/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 .idea/

diff --git a/acds/archetypes/__init__.py b/acds/archetypes/__init__.py
@@ -1,4 +1,4 @@
-from .ron import RandomizedOscillatorsNetwork
+from .ron import RandomizedOscillatorsNetwork, DeepRandomizedOscillatorsNetwork
 from .pron import (PhysicallyImplementableRandomizedOscillatorsNetwork,
                    MultistablePhysicallyImplementableRandomizedOscillatorsNetwork)
 from .trainable_pron import TrainedPhysicallyImplementableRandomizedOscillatorsNetwork
@@ -7,7 +7,7 @@
 from .rnn import LSTM, RNN_DFA, GRU_DFA
 from .utils import *
 
-__all__ = ["RandomizedOscillatorsNetwork", "DeepReservoir", "LSTM", "RNN_DFA", "GRU_DFA",
+__all__ = ["RandomizedOscillatorsNetwork", "DeepRandomizedOscillatorsNetwork", "DeepReservoir", "LSTM", "RNN_DFA", "GRU_DFA",
            "PhysicallyImplementableRandomizedOscillatorsNetwork",
            "MultistablePhysicallyImplementableRandomizedOscillatorsNetwork",
            "TrainedPhysicallyImplementableRandomizedOscillatorsNetwork",

diff --git a/acds/archetypes/esn.py b/acds/archetypes/esn.py
@@ -108,10 +108,10 @@ def forward(self, xt: torch.Tensor, h_prev: torch.Tensor):
             torch.Tensor: hidden state tensor shaped as (batch, time, state_dim).
             torch.Tensor: hidden state tensor shaped as (batch, time, state_dim).
         """
-        input_part = torch.mm(xt, self.kernel)
-        state_part = torch.mm(h_prev, self.recurrent_kernel)
+        input_part = torch.mm(xt, self.kernel.to(dtype=xt.dtype))
+        state_part = torch.mm(h_prev.to(dtype=xt.dtype), self.recurrent_kernel.to(dtype=(xt.dtype)))
 
-        output = torch.tanh(input_part + self.bias + state_part)
+        output = torch.tanh(input_part + self.bias.to(dtype=xt.dtype) + state_part.to(dtype=xt.dtype))
         leaky_output = h_prev * (1 - self.leaky) + output * self.leaky
         return leaky_output, leaky_output
 
@@ -216,6 +216,7 @@ def __init__(
         connectivity_recurrent: int = 10,
         connectivity_input: int = 10,
         connectivity_inter: int = 10,
+        all: bool = False,
     ):
         """Initializes the DeepReservoir.
 
@@ -246,13 +247,12 @@ def __init__(
         self.tot_units = tot_units
         self.concat = concat
         self.batch_first = True  # DeepReservoir only supports batch_first
-
         # in case in which all the reservoir layers are concatenated, each level
         # contains units/layers neurons. This is done to keep the number of
         # state variables projected to the next layer fixed,
         # i.e., the number of trainable parameters does not depend on concat
-        if concat:
-            self.layers_units = np.int(tot_units / n_layers)
+        if concat or True:
+            self.layers_units = int(tot_units / n_layers)
         else:
             self.layers_units = tot_units
 
@@ -309,9 +309,12 @@ def forward(self, X: torch.Tensor):
             [X, h_last] = res_layer(X)
             states.append(X)
             states_last.append(h_last)
-
+
+        states_uncat = states
+
         if self.concat:
             states = torch.cat(states, dim=2)
         else:
             states = states[-1]
-        return states, states_last
+
+        return states, states_last, states_uncat
diff --git a/acds/archetypes/ron.py b/acds/archetypes/ron.py
@@ -120,9 +120,13 @@ def cell(
             hy (torch.Tensor): Current hidden state.
             hz (torch.Tensor): Current hidden state derivative.
         """
+        # convert to same type of x
+        hz = hz.to(x.dtype)
+        hy = hy.to(x.dtype)
+
         hz = hz + self.dt * (
             torch.tanh(
-                torch.matmul(x, self.x2h) + torch.matmul(hy, self.h2h - self.diffusive_matrix) + self.bias
+                torch.matmul(x, self.x2h.to(dtype=x.dtype)) + torch.matmul(hy, self.h2h.to(dtype=x.dtype) - self.diffusive_matrix.to(dtype=x.dtype)) + self.bias.to(dtype=x.dtype)
             )
             - self.gamma * hy
             - self.epsilon * hz
@@ -151,3 +155,148 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
         return torch.stack(all_states, dim=1), [
             hy
         ]  # list to be compatible with ESN implementation
+
+
+class DeepRandomizedOscillatorsNetwork(nn.Module):
+    """
+    Deep Randomized Oscillators Network, using two stack of RON model one over another developing depth
+    over.
+
+    A recurrent deep neural network model with
+    oscillatory dynamics stacked in layers. The model is defined by the following ordinary
+    differential equation:
+
+    .. math::
+        \\dot{h} = -\\gamma h - \\epsilon \\dot{h} + \\tanh(W_{in} x + W_{rec} h + b)
+
+    where:
+    - :math:`h` is the hidden state,
+    - :math:`\\dot{h}` is the derivative of the hidden state,
+    - :math:`\\gamma` is the damping factor,
+    - :math:`\\epsilon` is the stiffness factor,
+    - :math:`W_{in}` is the input-to-hidden weight matrix,
+    - :math:`W_{rec}` is the hidden-to-hidden weight matrix,
+    - :math:`b` is the bias vector.
+
+    The model is trained by minimizing the mean squared error between the output of the
+    model and the target time-series.
+    """
+
+    def __init__(
+        self,
+        n_inp: int,
+        total_units: int,
+        dt: float,
+        gamma: Union[float, Tuple[float, float]],
+        epsilon: Union[float, Tuple[float, float]],
+        n_layers: int = 1,
+        diffusive_gamma=0.0,
+        rho: float = 0.99,
+        input_scaling: float = 1.0,
+        inter_scaling: float = 1.0,
+        topology: Literal[
+            "full", "lower", "orthogonal", "band", "ring", "toeplitz", "antisymmetric"
+        ] = "full",
+        reservoir_scaler=0.0,
+        sparsity=0.0,
+        device="cuda",
+        concat: bool = True,
+        # TODO implement sparse connectivity later...
+        connectivity_input: int = 10,
+        connectivity_inter: int = 10,
+    ):
+        """Initialize the DeepRON model.
+
+        Args:
+            n_inp (int): number of input units. Default to 1
+            total_units (int): Total number of neurons in RON.
+            dt (float): Time step.
+            n_layers (int): Number of layers in the network.
+            concat: (bool): If True, the output of each layer is concatenated. If False, only the output of the last layer is returned.
+        """
+        super().__init__()
+        self.inter_scaling = inter_scaling
+        self.n_layers = n_layers
+        self.total_units = total_units
+        self.reservoir_scaler = reservoir_scaler
+        # if True, then the input and output tensors are provided as (batch, seq, feature)
+        #self.batch_first = True
+
+        self.layers = nn.ModuleList()   
+
+        self.concat = concat
+
+        if concat:
+            self.layer_units = int(total_units / n_layers) 
+        else:
+            self.layer_units = total_units
+
+        input_scaling_others = inter_scaling
+        connectivity_input_1 = connectivity_input
+        connectivity_input_others = connectivity_inter
+
+        deepron_layers = [
+            RandomizedOscillatorsNetwork(
+                n_inp=n_inp, n_hid=self.layer_units + total_units % n_layers,
+                                    input_scaling=input_scaling_others,
+                                    dt=dt,
+                                    gamma=gamma,
+                                    epsilon=epsilon,
+                                    reservoir_scaler=self.reservoir_scaler
+                                    #TODO still sparse connectivity to implement
+                                    #connectivity_input=connectivity_input_1,
+                                    #connectivity_recurrent=connectivity_input_others,
+            )
+        ]
+
+        last_h_size = self.layer_units + total_units % n_layers
+
+        for _ in range(n_layers - 1):
+            deepron_layers.append(
+                RandomizedOscillatorsNetwork(
+                    n_inp=last_h_size, n_hid=self.layer_units,
+                    input_scaling=input_scaling_others,
+                    dt=dt,
+                    gamma=gamma,
+                    epsilon=epsilon,
+                    #connectivity_input=connectivity_input_others,
+                    #connectivity_recurrent=connectivity_recurrent,
+                )
+            )
+            last_h_size = self.layer_units
+        self.ron_reservoir = nn.ModuleList(deepron_layers)
+
+
+    def forward(self, hy: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        """Forward pass on the layers of the DeepRON a given input time-series.
+
+        Args:
+            x (torch.Tensor): Input time-series shaped as (batch, time, input_dim).
+
+        Returns:
+            torch.Tensor: Hidden states of the network shaped as (batch, time, n_hid).
+            list: List containing the last hidden state of the network.
+        """
+        # list to store the last state of each layer
+        layer_states = []
+        # list to store the hidden states of each layer
+        states = []
+
+        for _, ron_layer in enumerate(self.ron_reservoir):
+            [hy, last_state] = ron_layer(hy)
+            states.append(hy)
+            layer_states.append(last_state[0])
+
+        states_uncat = states
+
+        if self.concat:
+            # check what dim we need to concat
+            hy = torch.cat(states, dim=2)
+        else:
+            # if not concat, return only the last layer
+            hy = states[-1]
+
+       # Choose if return all_states from all layers for the  
+
+        return hy, layer_states, states_uncat
+
diff --git a/acds/archetypes/utils.py b/acds/archetypes/utils.py
@@ -66,7 +66,7 @@ def sparse_tensor_init(M: int, N: int, C: int = 1) -> torch.FloatTensor:
             indices[k, 0] = i
             indices[k, 1] = idx[j]
             k = k + 1
-    values = 2 * (2 * np.random.rand(M * C).astype("f") - 1)
+    values = (2 * np.random.rand(M * C).astype("f") - 1)
     values = torch.from_numpy(values)
     return torch.sparse_coo_tensor(indices.T, values, dense_shape).to_dense().float()
 
@@ -95,7 +95,7 @@ def sparse_recurrent_tensor_init(M: int, C: int = 1) -> torch.FloatTensor:
             indices[k, 0] = idx[j]
             indices[k, 1] = i
             k = k + 1
-    values = 2 * (2 * np.random.rand(M * C).astype("f") - 1)
+    values = (2 * np.random.rand(M * C).astype("f") - 1)
     values = torch.from_numpy(values)
     return torch.sparse_coo_tensor(indices.T, values, dense_shape).to_dense().float()
 
@@ -156,7 +156,8 @@ def get_sparsity(A):
     assert sparsity >= 0 and sparsity < 1, "Sparsity must be in [0,1)"
 
     if topology == "full":
-        h2h = 2 * (2 * torch.rand(n_hid, n_hid) - 1)
+        # TODO should this be scaled? with reservoir_scaler
+        h2h = 2* (2 * torch.rand(n_hid, n_hid) - 1) 
     elif topology == "lower":
         h2h = torch.tril(2 * torch.rand(n_hid, n_hid) - 1)
         if sparsity > 0:

diff --git a/acds/benchmarks/__init__.py b/acds/benchmarks/__init__.py
@@ -4,5 +4,6 @@
 from .mallat import get_mallat_data
 from .trace import get_trace_data
 from .libras import get_libras_data
+from .memory_capacity import get_memory_capacity
 
-__all__ = ["get_adiac_data", "get_mackey_glass", "get_mnist_data", "get_mackey_glass_windows", "get_mallat_data", "get_trace_data", "get_libras_data"]
+__all__ = ["get_adiac_data", "get_mackey_glass", "get_mnist_data", "get_mackey_glass_windows", "get_mallat_data", "get_trace_data", "get_libras_data", "get_memory_capacity"]
diff --git a/acds/benchmarks/memory_capacity.py b/acds/benchmarks/memory_capacity.py
@@ -0,0 +1,105 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import torch 
+import os
+from torch.utils.data import DataLoader, TensorDataset
+
+
+def generate_memory_capacity_dataset(delay, signal_length=6000, train_length=5000, test_length=1000):
+    """
+    Generates a dataset for the memory capacity task of a recurrent network.
+
+    Parameters:
+    - signal_length: Total length of the input signal.
+    - train_length: Length of the training set.
+    - test_length: Length of the test set.
+    - delay: Current delay to consider for the task.
+
+    Returns:
+    - u: Input signal.
+    - y: Target signal with delays.
+    """
+    # We could remove the seed because we're not "learning" we're just testing the memory capacity
+    u = np.random.uniform(low= -0.8, high= 0.8, size = (signal_length + delay, 1))
+
+    u_input = u[delay:]
+    u_target = u[:-delay]
+
+    u_train = u_input[:train_length]
+    y_train = u_target[:train_length]
+
+    u_test = u_input[train_length:]
+    y_test = u_target[train_length:]
+
+    return (u_train, y_train), (u_test, y_test)
+
+
+def get_memory_capacity(delay, train_ratio: float = 0.8, test_size: int = 1000):
+    """
+    Returns the memory capacity dataset as torch tensors.
+
+    In this following format:
+    - u_train: Training input signal.
+    - y_train: Training target signal.
+    - u_test: Test input signal.
+    - y_test: Test target signal.
+
+    - apply washout to the training set
+
+    Returns: Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]
+    """
+
+    (u_train, y_train), (u_test, y_test) = generate_memory_capacity_dataset(delay)
+
+    assert len(u_train) == len(y_train), "Input and target signals must have the same length."
+
+    return (torch.from_numpy(u_train).float(), torch.from_numpy(y_train).float()), (torch.from_numpy(u_test).float(), torch.from_numpy(y_test).float())
+    # ---- For validation later ----
+    # test start after the training set
+    #test_start_idx = len(u_train) - test_size
+
+    #u_test, y_test = u_train[test_start_idx:], y_train[test_start_idx:]
+
+    #u_train_valid, y_train_valid = u_train[:test_start_idx], y_train[:test_start_idx]
+
+    # For now no hypeparams search is required so return train-test
+    #train_size = int(train_ratio * len(u_train_valid))
+    #u_train, y_train = u_train_valid[:train_size], y_train_valid[:train_size]
+    #u_val, y_val = u_train_valid[train_size:], y_train_valid[train_size:]
+
+    # as numpy arrays
+    #return (torch.from_numpy(u_train).float(), torch.from_numpy(y_train).float()), (torch.from_numpy(u_val).float(), torch.from_numpy(y_val).float()), (torch.from_numpy(u_test).float(), torch.from_numpy(y_test).float())
+
+if __name__ == "__main__":
+
+    debug = True
+
+    if debug:
+
+        (u_train, y_train), (u_val, y_val), (u_test, y_test) = get_memory_capacity(1, train_ratio=0.8, test_size=1000)
+        # make them into numpy arrays
+        u_train, y_train = u_train.numpy(), y_train.numpy()
+        u_val, y_val = u_val.numpy(), y_val.numpy()
+        u_test, y_test = u_test.numpy(), y_test.numpy()
+
+
+        # plot the dataset
+        plt.figure(figsize=(12, 6))
+        # original signal
+        y = np.concatenate([y_train, y_val, y_test])
+        plt.plot(y, label="Target signal", alpha=0.7)
+        plt.plot(range(len(y_train), len(y_train) + len(y_val)), y_val, label="Validation signal", alpha=0.7)
+        plt.plot(range(len(y_train) + len(y_val), len(y)), y_test, label="Test signal", alpha=0.7)
+        plt.legend()
+
+        plt.savefig("memory_capacity_dataset.png")
+
+        # visualize the delayed signal against the input signal
+        plt.figure(figsize=(12, 6))
+        plt.plot(u_train, label="Input signal", alpha=0.7)
+        plt.plot(y_train, label="Target signal", alpha=0.7)
+        plt.legend()
+
+        plt.savefig("memory_capacity_input_target.png")
+
+