apache · christinadionysio · May 14, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -116,8 +116,9 @@ jobs:
           h5py \
           gensim \
           black \
-          opt-einsum
-
+          opt-einsum \
+          nltk
+
     - name: Build Python Package
       run: |
         cd src/main/python

diff --git a/src/main/python/systemds/scuro/dataloader/audio_loader.py b/src/main/python/systemds/scuro/dataloader/audio_loader.py
@@ -22,23 +22,18 @@
 
 import librosa
 from systemds.scuro.dataloader.base_loader import BaseLoader
-from systemds.scuro.utils.schema_helpers import create_timestamps
+from systemds.scuro.modality.type import ModalityType
 
 
 class AudioLoader(BaseLoader):
     def __init__(
-        self,
-        source_path: str,
-        indices: List[str],
-        chunk_size: Optional[int] = None,
+        self, source_path: str, indices: List[str], chunk_size: Optional[int] = None
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.AUDIO)
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         self.file_sanity_check(file)
         audio, sr = librosa.load(file)
-        self.metadata[file] = {"sample_rate": sr, "length": audio.shape[0]}
-        self.metadata[file]["timestamp"] = create_timestamps(
-            self.metadata[file]["sample_rate"], self.metadata[file]["length"]
-        )
+        self.metadata[file] = self.modality_type.create_audio_metadata(sr, audio)
+
         self.data.append(audio)
diff --git a/src/main/python/systemds/scuro/dataloader/base_loader.py b/src/main/python/systemds/scuro/dataloader/base_loader.py
@@ -25,7 +25,11 @@
 
 class BaseLoader(ABC):
     def __init__(
-        self, source_path: str, indices: List[str], chunk_size: Optional[int] = None
+        self,
+        source_path: str,
+        indices: List[str],
+        chunk_size: Optional[int] = None,
+        modality_type=None,
     ):
         """
         Base class to load raw data for a given list of indices and stores them in the data object
@@ -40,6 +44,7 @@ def __init__(
         )  # TODO: check what the index should be for storing the metadata (file_name, counter, ...)
         self.source_path = source_path
         self.indices = indices
+        self.modality_type = modality_type
         self._next_chunk = 0
         self._num_chunks = 1
         self._chunk_size = None
@@ -64,6 +69,11 @@ def num_chunks(self):
     def next_chunk(self):
         return self._next_chunk
 
+    def reset(self):
+        self._next_chunk = 0
+        self.data = []
+        self.metadata = {}
+
     def load(self):
         """
         Takes care of loading the raw data either chunk wise (if chunk size is defined) or all at once

diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py b/src/main/python/systemds/scuro/dataloader/json_loader.py
@@ -20,6 +20,7 @@
 # -------------------------------------------------------------
 import json
 
+from systemds.scuro.modality.type import ModalityType
 from systemds.scuro.dataloader.base_loader import BaseLoader
 from typing import Optional, List, Union
 
@@ -32,12 +33,16 @@ def __init__(
         field: str,
         chunk_size: Optional[int] = None,
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
         self.field = field
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         self.file_sanity_check(file)
         with open(file) as f:
             json_file = json.load(f)
             for idx in index:
-                self.data.append(json_file[idx][self.field])
+                sentence = json_file[idx][self.field]
+                self.data.append(sentence)
+                self.metadata[idx] = self.modality_type.create_text_metadata(
+                    len(sentence), sentence
+                )
diff --git a/src/main/python/systemds/scuro/dataloader/text_loader.py b/src/main/python/systemds/scuro/dataloader/text_loader.py
@@ -20,6 +20,7 @@
 # -------------------------------------------------------------
 from systemds.scuro.dataloader.base_loader import BaseLoader
 from typing import Optional, Pattern, List, Union
+from systemds.scuro.modality.type import ModalityType
 import re
 
 
@@ -31,7 +32,7 @@ def __init__(
         chunk_size: Optional[int] = None,
         prefix: Optional[Pattern[str]] = None,
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.TEXT)
         self.prefix = prefix
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
@@ -41,5 +42,7 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
                 if self.prefix:
                     line = re.sub(self.prefix, "", line)
                 line = line.replace("\n", "")
-                self.metadata[file] = {"length": len(line.split())}
+                self.metadata[file] = self.modality_type.create_text_metadata(
+                    len(line.split()), line
+                )
                 self.data.append(line)
diff --git a/src/main/python/systemds/scuro/dataloader/video_loader.py b/src/main/python/systemds/scuro/dataloader/video_loader.py
@@ -23,8 +23,8 @@
 import numpy as np
 
 from systemds.scuro.dataloader.base_loader import BaseLoader
-from systemds.scuro.utils.schema_helpers import create_timestamps
 import cv2
+from systemds.scuro.modality.type import ModalityType
 
 
 class VideoLoader(BaseLoader):
@@ -34,7 +34,7 @@ def __init__(
         indices: List[str],
         chunk_size: Optional[int] = None,
     ):
-        super().__init__(source_path, indices, chunk_size)
+        super().__init__(source_path, indices, chunk_size, ModalityType.VIDEO)
 
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         self.file_sanity_check(file)
@@ -43,16 +43,14 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         if not cap.isOpened():
             raise f"Could not read video at path: {file}"
 
-        self.metadata[file] = {
-            "fps": cap.get(cv2.CAP_PROP_FPS),
-            "length": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
-            "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
-            "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
-            "num_channels": 3,
-        }
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        num_channels = 3
 
-        self.metadata[file]["timestamp"] = create_timestamps(
-            self.metadata[file]["fps"], self.metadata[file]["length"]
+        self.metadata[file] = self.modality_type.create_video_metadata(
+            fps, length, width, height, num_channels
         )
 
         frames = []

diff --git a/src/main/python/systemds/scuro/modality/joined.py b/src/main/python/systemds/scuro/modality/joined.py
@@ -104,7 +104,7 @@ def execute(self, starting_idx=0):
                 self.joined_right.data[i - starting_idx].append([])
                 right = np.array([])
                 if self.condition.join_type == "<":
-                    while c < len(idx_2) and idx_2[c] < nextIdx[j]:
+                    while c < len(idx_2) - 1 and idx_2[c] < nextIdx[j]:
                         if right.size == 0:
                             right = self.right_modality.data[i][c]
                             if right.ndim == 1:
@@ -125,7 +125,7 @@ def execute(self, starting_idx=0):
                                 )
                         c = c + 1
                 else:
-                    while c < len(idx_2) and idx_2[c] <= idx_1[j]:
+                    while c < len(idx_2) - 1 and idx_2[c] <= idx_1[j]:
                         if idx_2[c] == idx_1[j]:
                             right.append(self.right_modality.data[i][c])
                         c = c + 1
@@ -141,18 +141,17 @@ def execute(self, starting_idx=0):
 
                 self.joined_right.data[i - starting_idx][j] = right
 
-    def apply_representation(self, representation, aggregation):
+    def apply_representation(self, representation, aggregation=None):
         self.aggregation = aggregation
         if self.chunked_execution:
             return self._handle_chunked_execution(representation)
-        elif self.left_type.__name__.__contains__("Unimodal"):
-            self.left_modality.extract_raw_data()
-            if self.left_type == self.right_type:
-                self.right_modality.extract_raw_data()
-        elif self.right_type.__name__.__contains__("Unimodal"):
-            self.right_modality.extract_raw_data()
+        # elif self.left_type.__name__.__contains__("Unimodal"):
+        #     self.left_modality.extract_raw_data()
+        #     if self.left_type == self.right_type:
+        #         self.right_modality.extract_raw_data()
+        # elif self.right_type.__name__.__contains__("Unimodal") and not self.right_modality.has_data():
+        #     self.right_modality.extract_raw_data()
 
-        self.execute()
         left_transformed = self._apply_representation(
             self.left_modality, representation
         )
@@ -263,12 +262,12 @@ def _apply_representation_chunked(
 
     def _apply_representation(self, modality, representation):
         transformed = representation.transform(modality)
-        if self.aggregation:
-            aggregated_data_left = self.aggregation.window(transformed)
-            transformed = Modality(
-                transformed.modality_type,
-                transformed.metadata,
-            )
-            transformed.data = aggregated_data_left
+        # if self.aggregation:
+        #     aggregated_data_left = self.aggregation.execute(transformed)
+        #     transformed = Modality(
+        #         transformed.modality_type,
+        #         transformed.metadata,
+        #     )
+        #     transformed.data = aggregated_data_left
 
         return transformed
diff --git a/src/main/python/systemds/scuro/modality/joined_transformed.py b/src/main/python/systemds/scuro/modality/joined_transformed.py
@@ -25,6 +25,7 @@
 
 from systemds.scuro.modality.modality import Modality
 from systemds.scuro.representations.utils import pad_sequences
+from systemds.scuro.representations.window import WindowAggregation
 
 
 class JoinedTransformedModality(Modality):
@@ -68,3 +69,9 @@ def combine(self, fusion_method):
             self.data[i] = np.array(r)
         self.data = pad_sequences(self.data)
         return self
+
+    def window(self, window_size, aggregation):
+        w = WindowAggregation(window_size, aggregation)
+        self.left_modality.data = w.execute(self.left_modality)
+        self.right_modality.data = w.execute(self.right_modality)
+        return self