apache · christinadionysio · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/src/main/python/systemds/scuro/dataloader/timeseries_loader.py b/src/main/python/systemds/scuro/dataloader/timeseries_loader.py
@@ -64,15 +64,7 @@ def __init__(
     def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
         self.file_sanity_check(file)
 
-        if self.file_format == "npy":
-            data = self._load_npy(file)
-        elif self.file_format in ["txt", "csv"]:
-            with open(file, "r") as f:
-                first_line = f.readline()
-            if any(name in first_line for name in self.signal_names):
-                data = self._load_csv_with_header(file)
-            else:
-                data = self._load_txt(file)
+        data = self._load_data(file)
 
         if data.ndim > 1 and len(self.signal_names) == 1:
             data = data.flatten()
@@ -96,6 +88,18 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
                 )
                 self.data.append(data[i])
 
+    def _load_data(self, file: str) -> np.ndarray:
+        if self.file_format == "npy":
+            data = self._load_npy(file)
+        elif self.file_format in ["txt", "csv"]:
+            with open(file, "r") as f:
+                first_line = f.readline()
+            if any(name in first_line for name in self.signal_names):
+                data = self._load_csv_with_header(file)
+            else:
+                data = self._load_txt(file)
+        return data
+
     def _normalize_signals(self, data: np.ndarray) -> np.ndarray:
         if data.ndim == 1:
             mean = np.mean(data)
@@ -145,14 +149,16 @@ def _load_csv_with_header(self, file: str, delimiter: str = None) -> np.ndarray:
         return data
 
     def get_stats(self, source_path: str):
-        pass  # TODO: Implement this
-        # self.file_sanity_check(source_path)
-        # max_length = 0
-        # num_instances = 0
-        # num_signals = 0
-        # for file in os.listdir(source_path):
-        #     data = self._load_npy(source_path + file)
-        #     max_length = max(max_length, data.shape[0])
-        #     num_instances += 1
-        #     num_signals = max(num_signals, data.shape[1])
-        # return TimeseriesStats(max_length, num_instances, num_signals)
+        self.file_sanity_check(source_path)
+        max_length = 0
+        num_instances = 0
+        num_signals = 0
+        for file_name in self.indices:
+            file = source_path + file_name + "." + self.file_format
+            data = self._load_data(file)
+            max_length = max(max_length, data.shape[0])
+            num_instances += 1
+            num_signals = max(num_signals, data.shape[1])
+        return TimeseriesStats(
+            max_length, num_instances, num_signals, (max_length,), True
+        )
diff --git a/src/main/python/systemds/scuro/representations/aggregate.py b/src/main/python/systemds/scuro/representations/aggregate.py
@@ -126,8 +126,8 @@ def execute(self, modality, aggregate_dim=(0,)):
     def transform(self, modality):
         return self.execute(modality)
 
-    def compute_feature(self, instance):
-        return self._aggregation_func(instance)
+    def compute_feature(self, instance, axis=0):
+        return self._aggregation_func(instance, axis)
 
     def get_aggregation_functions(self):
         return list(self._aggregation_function.keys())
diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/average.py
@@ -32,7 +32,7 @@
 
 @register_fusion_operator()
 class Average(Fusion):
-    def __init__(self):
+    def __init__(self, params=None):
         """
         Combines modalities using averaging
         """
@@ -41,10 +41,10 @@ def __init__(self):
         self.associative = True
         self.commutative = True
 
-    def execute(self, modalities: List[Modality]):
-        data = copy.deepcopy(modalities[0].data)
+    def execute(self, modalities: List[Modality], labels=None):
+        data = np.asarray(copy.deepcopy(modalities[0].data), dtype=float)
         for i in range(1, len(modalities)):
-            data += modalities[i].data
+            data += np.asarray(modalities[i].data, dtype=float)
 
         data /= len(modalities)
 

diff --git a/src/main/python/systemds/scuro/representations/concatenation.py b/src/main/python/systemds/scuro/representations/concatenation.py
@@ -79,23 +79,27 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats:
             return RepresentationStats(0, (0,))
 
         num_instances = stats_list[0].num_instances
-        rank = len(stats_list[0].output_shape)
-
-        if rank == 1:
-            total_dim = sum(s.output_shape[0] for s in stats_list)
-            output_shape = (total_dim,)
-        elif rank == 2:
-            time_dim = stats_list[0].output_shape[0]
-            total_dim = sum(s.output_shape[1] for s in stats_list)
-            output_shape = (time_dim, total_dim)
-        else:
-            output_shape = stats_list[0].output_shape
+        total_dim = sum(s.output_shape[-1] for s in stats_list)
+        output_shape = (total_dim,)
 
         return RepresentationStats(num_instances, output_shape)
 
-    def estimate_peak_memory_bytes(self, input_stats) -> dict:
-        # TODO
-        return {
-            "cpu_peak_bytes": 0,
-            "gpu_peak_bytes": 0,
-        }
+    def estimate_peak_memory_bytes(self, input_stats_list) -> dict:
+        elem_size = np.dtype(np.float32).itemsize
+
+        def stats_bytes(s: RepresentationStats) -> int:
+            numel = int(np.prod(s.output_shape)) if len(s.output_shape) > 0 else 1
+            return int(s.num_instances * numel * elem_size)
+
+        current_output = 0
+        peak = 0
+        for s in input_stats_list:
+            chunk = stats_bytes(s)
+            new_output = current_output + chunk
+
+            step_peak = current_output + chunk + new_output + chunk
+            peak = max(peak, step_peak)
+            current_output = new_output
+
+        cpu_peak = int(peak * 1.15 + 16 * 1024 * 1024)
+        return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0}
diff --git a/src/main/python/systemds/scuro/representations/hadamard.py b/src/main/python/systemds/scuro/representations/hadamard.py
@@ -55,18 +55,22 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats:
         if not stats_list:
             return RepresentationStats(0, (0,))
 
-        def num_elements(stats: RepresentationStats) -> int:
-            n = 1
-            for d in stats.output_shape:
-                n *= d
-            return n
+        max_dim = max([stats.output_shape[-1] for stats in stats_list])
+        return RepresentationStats(stats_list[0].num_instances, (max_dim,))
 
-        largest = max(stats_list, key=num_elements)
-        return RepresentationStats(largest.num_instances, largest.output_shape)
+    def estimate_peak_memory_bytes(self, input_stats_list) -> dict:
+        elem_size = np.dtype(np.float64).itemsize
 
-    def estimate_peak_memory_bytes(self, input_stats) -> dict:
-        # TODO
-        return {
-            "cpu_peak_bytes": 0,
-            "gpu_peak_bytes": 0,
-        }
+        def stats_payload_bytes(s: RepresentationStats) -> int:
+            numel = int(np.prod(s.output_shape)) if len(s.output_shape) > 0 else 1
+            return int(s.num_instances * numel * elem_size)
+
+        stacked_input_bytes = sum(stats_payload_bytes(s) for s in input_stats_list)
+        out_stats = self.get_output_stats(input_stats_list)
+        output_bytes = stats_payload_bytes(out_stats)
+        reduction_workspace_bytes = output_bytes
+        cpu_peak = int(
+            (stacked_input_bytes + output_bytes + reduction_workspace_bytes) * 1.15
+            + 8 * 1024 * 1024
+        )
+        return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0}
diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py
@@ -233,7 +233,7 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None):
                 TensorDataset(X_tensor), batch_size=self.batch_size, shuffle=False
             )
             for (batch_X,) in inference_dataloader:
-                batch_X = batch_X.to(device)
+                batch_X = batch_X.to(self.device)
                 features, _ = self.model(batch_X)
                 all_features.append(features.cpu())
 

diff --git a/src/main/python/systemds/scuro/representations/max.py b/src/main/python/systemds/scuro/representations/max.py
@@ -30,7 +30,7 @@
 
 @register_fusion_operator()
 class RowMax(Fusion):
-    def __init__(self):
+    def __init__(self, params=None):
         """
         Combines modalities by computing the outer product of a modality combination and
         taking the row max

diff --git a/src/main/python/systemds/scuro/representations/mlp_averaging.py b/src/main/python/systemds/scuro/representations/mlp_averaging.py
@@ -54,8 +54,12 @@ def __init__(self, output_dim=512, batch_size=32, params=None):
             "batch_size": [8, 16, 32, 64, 128],
         }
         super().__init__("MLPAveraging", parameters)
-        self.output_dim = output_dim
-        self.batch_size = batch_size
+        if params is not None:
+            self.output_dim = params.get("output_dim", output_dim)
+            self.batch_size = params.get("batch_size", batch_size)
+        else:
+            self.output_dim = output_dim
+            self.batch_size = batch_size
         self.device = None
         self.data_type = np.float32
         self.gpu_id = None
@@ -70,7 +74,10 @@ def gpu_id(self, gpu_id):
         self.device = get_device(gpu_id)
 
     def get_output_stats(self, input_stats: RepresentationStats) -> RepresentationStats:
-        if len(input_stats.output_shape) > 1:
+        if (
+            len(input_stats.output_shape) > 1
+            and np.prod(input_stats.output_shape) > self.output_dim
+        ):
             return RepresentationStats(
                 input_stats.num_instances,
                 (self.output_dim,),
@@ -87,7 +94,13 @@ def get_output_stats(self, input_stats: RepresentationStats) -> RepresentationSt
             )
         return RepresentationStats(
             input_stats.num_instances,
-            (self.output_dim,),
+            (
+                (
+                    np.prod(input_stats.output_shape)
+                    if np.prod(input_stats.output_shape) < self.output_dim
+                    else self.output_dim
+                ),
+            ),
             output_shape_is_known=input_stats.output_shape_is_known,
         )
 

diff --git a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py
@@ -42,6 +42,7 @@ def __init__(
         batch_size=32,
         num_epochs=20,
         learning_rate=0.001,
+        params=None,
     ):
         parameters = {
             "hidden_dim": [32, 128, 256, 384, 512, 768],

diff --git a/src/main/python/systemds/scuro/representations/spectrogram.py b/src/main/python/systemds/scuro/representations/spectrogram.py
@@ -60,10 +60,14 @@ def transform(self, modality, aggregation=None):
         return transformed_modality
 
     def compute_feature(self, instance):
+        data = np.array(instance)
         spectrogram = librosa.stft(
-            y=np.array(np.abs(instance)), hop_length=self.hop_length, n_fft=self.n_fft
+            y=np.abs(data), hop_length=self.hop_length, n_fft=self.n_fft
         )
-        return librosa.amplitude_to_db(np.abs(spectrogram)).T
+        if data.ndim == 1:
+            return librosa.amplitude_to_db(np.abs(spectrogram)).T
+
+        return librosa.amplitude_to_db(np.abs(spectrogram)).transpose(0, 2, 1)
 
     def estimate_peak_memory_bytes(self, input_stats) -> dict:
         # TODO: validate this function
@@ -74,7 +78,7 @@ def estimate_peak_memory_bytes(self, input_stats) -> dict:
         if signal_length < self.n_fft:
             num_frames = 1
         else:
-            num_frames = 1 + (signal_length - self.n_fft) // self.hop_length
+            num_frames = 1 + signal_length // self.hop_length
         num_frames = max(int(num_frames), 1)
         num_freq_bins = 1 + self.n_fft // 2
 
@@ -121,7 +125,7 @@ def get_output_stats(self, input_stats) -> RepresentationStats:
             if signal_length < self.n_fft:
                 num_frames = 1
             else:
-                num_frames = 1 + (signal_length - self.n_fft) // self.hop_length
+                num_frames = 1 + signal_length // self.hop_length
             num_frames = max(int(num_frames), 1)
 
         num_freq_bins = 1 + self.n_fft // 2

diff --git a/src/main/python/systemds/scuro/representations/sum.py b/src/main/python/systemds/scuro/representations/sum.py
@@ -61,18 +61,24 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats:
         if not stats_list:
             return RepresentationStats(0, (0,))
 
-        def num_elements(stats: RepresentationStats) -> int:
-            n = 1
-            for d in stats.output_shape:
-                n *= d
-            return n
+        max_dim = max([stats.output_shape[-1] for stats in stats_list])
+        return RepresentationStats(stats_list[0].num_instances, (max_dim,))
 
-        largest = max(stats_list, key=num_elements)
-        return RepresentationStats(largest.num_instances, largest.output_shape)
+    def estimate_peak_memory_bytes(self, input_stats_list) -> dict:
+        elem_size = np.dtype(np.float64).itemsize
 
-    def estimate_peak_memory_bytes(self, input_stats) -> dict:
-        # TODO
-        return {
-            "cpu_peak_bytes": 0,
-            "gpu_peak_bytes": 0,
-        }
+        def stats_payload_bytes(s: RepresentationStats) -> int:
+            numel = int(np.prod(s.output_shape)) if len(s.output_shape) > 0 else 1
+            return int(s.num_instances * numel * elem_size)
+
+        first_bytes = stats_payload_bytes(input_stats_list[0])
+        max_other_bytes = 0
+        if len(input_stats_list) > 1:
+            max_other_bytes = max(stats_payload_bytes(s) for s in input_stats_list[1:])
+
+        ufunc_workspace_bytes = int(0.1 * max(first_bytes, max_other_bytes))
+        cpu_peak = int(
+            (first_bytes + max_other_bytes + ufunc_workspace_bytes) * 1.15
+            + 8 * 1024 * 1024
+        )
+        return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0}