diff --git a/src/main/python/systemds/scuro/dataloader/timeseries_loader.py b/src/main/python/systemds/scuro/dataloader/timeseries_loader.py index 7131b55db11..6b697e6a7ad 100644 --- a/src/main/python/systemds/scuro/dataloader/timeseries_loader.py +++ b/src/main/python/systemds/scuro/dataloader/timeseries_loader.py @@ -64,15 +64,7 @@ def __init__( def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): self.file_sanity_check(file) - if self.file_format == "npy": - data = self._load_npy(file) - elif self.file_format in ["txt", "csv"]: - with open(file, "r") as f: - first_line = f.readline() - if any(name in first_line for name in self.signal_names): - data = self._load_csv_with_header(file) - else: - data = self._load_txt(file) + data = self._load_data(file) if data.ndim > 1 and len(self.signal_names) == 1: data = data.flatten() @@ -96,6 +88,18 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None): ) self.data.append(data[i]) + def _load_data(self, file: str) -> np.ndarray: + if self.file_format == "npy": + data = self._load_npy(file) + elif self.file_format in ["txt", "csv"]: + with open(file, "r") as f: + first_line = f.readline() + if any(name in first_line for name in self.signal_names): + data = self._load_csv_with_header(file) + else: + data = self._load_txt(file) + return data + def _normalize_signals(self, data: np.ndarray) -> np.ndarray: if data.ndim == 1: mean = np.mean(data) @@ -145,14 +149,16 @@ def _load_csv_with_header(self, file: str, delimiter: str = None) -> np.ndarray: return data def get_stats(self, source_path: str): - pass # TODO: Implement this - # self.file_sanity_check(source_path) - # max_length = 0 - # num_instances = 0 - # num_signals = 0 - # for file in os.listdir(source_path): - # data = self._load_npy(source_path + file) - # max_length = max(max_length, data.shape[0]) - # num_instances += 1 - # num_signals = max(num_signals, data.shape[1]) - # return TimeseriesStats(max_length, num_instances, num_signals) + self.file_sanity_check(source_path) + max_length = 0 + num_instances = 0 + num_signals = 0 + for file_name in self.indices: + file = source_path + file_name + "." + self.file_format + data = self._load_data(file) + max_length = max(max_length, data.shape[0]) + num_instances += 1 + num_signals = max(num_signals, data.shape[1]) + return TimeseriesStats( + max_length, num_instances, num_signals, (max_length,), True + ) diff --git a/src/main/python/systemds/scuro/representations/aggregate.py b/src/main/python/systemds/scuro/representations/aggregate.py index cf2c371676f..e8a44faa34c 100644 --- a/src/main/python/systemds/scuro/representations/aggregate.py +++ b/src/main/python/systemds/scuro/representations/aggregate.py @@ -126,8 +126,8 @@ def execute(self, modality, aggregate_dim=(0,)): def transform(self, modality): return self.execute(modality) - def compute_feature(self, instance): - return self._aggregation_func(instance) + def compute_feature(self, instance, axis=0): + return self._aggregation_func(instance, axis) def get_aggregation_functions(self): return list(self._aggregation_function.keys()) diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/average.py index ac51f5d1e8d..f58ba0b6802 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/average.py @@ -32,7 +32,7 @@ @register_fusion_operator() class Average(Fusion): - def __init__(self): + def __init__(self, params=None): """ Combines modalities using averaging """ @@ -41,10 +41,10 @@ def __init__(self): self.associative = True self.commutative = True - def execute(self, modalities: List[Modality]): - data = copy.deepcopy(modalities[0].data) + def execute(self, modalities: List[Modality], labels=None): + data = np.asarray(copy.deepcopy(modalities[0].data), dtype=float) for i in range(1, len(modalities)): - data += modalities[i].data + data += np.asarray(modalities[i].data, dtype=float) data /= len(modalities) diff --git a/src/main/python/systemds/scuro/representations/concatenation.py b/src/main/python/systemds/scuro/representations/concatenation.py index 5d53690317e..3bdfdb28b1f 100644 --- a/src/main/python/systemds/scuro/representations/concatenation.py +++ b/src/main/python/systemds/scuro/representations/concatenation.py @@ -79,23 +79,27 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats: return RepresentationStats(0, (0,)) num_instances = stats_list[0].num_instances - rank = len(stats_list[0].output_shape) - - if rank == 1: - total_dim = sum(s.output_shape[0] for s in stats_list) - output_shape = (total_dim,) - elif rank == 2: - time_dim = stats_list[0].output_shape[0] - total_dim = sum(s.output_shape[1] for s in stats_list) - output_shape = (time_dim, total_dim) - else: - output_shape = stats_list[0].output_shape + total_dim = sum(s.output_shape[-1] for s in stats_list) + output_shape = (total_dim,) return RepresentationStats(num_instances, output_shape) - def estimate_peak_memory_bytes(self, input_stats) -> dict: - # TODO - return { - "cpu_peak_bytes": 0, - "gpu_peak_bytes": 0, - } + def estimate_peak_memory_bytes(self, input_stats_list) -> dict: + elem_size = np.dtype(np.float32).itemsize + + def stats_bytes(s: RepresentationStats) -> int: + numel = int(np.prod(s.output_shape)) if len(s.output_shape) > 0 else 1 + return int(s.num_instances * numel * elem_size) + + current_output = 0 + peak = 0 + for s in input_stats_list: + chunk = stats_bytes(s) + new_output = current_output + chunk + + step_peak = current_output + chunk + new_output + chunk + peak = max(peak, step_peak) + current_output = new_output + + cpu_peak = int(peak * 1.15 + 16 * 1024 * 1024) + return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0} diff --git a/src/main/python/systemds/scuro/representations/hadamard.py b/src/main/python/systemds/scuro/representations/hadamard.py index a29f63e2b24..fc053f9c6dc 100644 --- a/src/main/python/systemds/scuro/representations/hadamard.py +++ b/src/main/python/systemds/scuro/representations/hadamard.py @@ -55,18 +55,22 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats: if not stats_list: return RepresentationStats(0, (0,)) - def num_elements(stats: RepresentationStats) -> int: - n = 1 - for d in stats.output_shape: - n *= d - return n + max_dim = max([stats.output_shape[-1] for stats in stats_list]) + return RepresentationStats(stats_list[0].num_instances, (max_dim,)) - largest = max(stats_list, key=num_elements) - return RepresentationStats(largest.num_instances, largest.output_shape) + def estimate_peak_memory_bytes(self, input_stats_list) -> dict: + elem_size = np.dtype(np.float64).itemsize - def estimate_peak_memory_bytes(self, input_stats) -> dict: - # TODO - return { - "cpu_peak_bytes": 0, - "gpu_peak_bytes": 0, - } + def stats_payload_bytes(s: RepresentationStats) -> int: + numel = int(np.prod(s.output_shape)) if len(s.output_shape) > 0 else 1 + return int(s.num_instances * numel * elem_size) + + stacked_input_bytes = sum(stats_payload_bytes(s) for s in input_stats_list) + out_stats = self.get_output_stats(input_stats_list) + output_bytes = stats_payload_bytes(out_stats) + reduction_workspace_bytes = output_bytes + cpu_peak = int( + (stacked_input_bytes + output_bytes + reduction_workspace_bytes) * 1.15 + + 8 * 1024 * 1024 + ) + return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0} diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py index 104f2727e69..7243b65966a 100644 --- a/src/main/python/systemds/scuro/representations/lstm.py +++ b/src/main/python/systemds/scuro/representations/lstm.py @@ -233,7 +233,7 @@ def execute(self, modalities: List[Modality], labels: np.ndarray = None): TensorDataset(X_tensor), batch_size=self.batch_size, shuffle=False ) for (batch_X,) in inference_dataloader: - batch_X = batch_X.to(device) + batch_X = batch_X.to(self.device) features, _ = self.model(batch_X) all_features.append(features.cpu()) diff --git a/src/main/python/systemds/scuro/representations/max.py b/src/main/python/systemds/scuro/representations/max.py index 39f5069c2b5..2dadc497c78 100644 --- a/src/main/python/systemds/scuro/representations/max.py +++ b/src/main/python/systemds/scuro/representations/max.py @@ -30,7 +30,7 @@ @register_fusion_operator() class RowMax(Fusion): - def __init__(self): + def __init__(self, params=None): """ Combines modalities by computing the outer product of a modality combination and taking the row max diff --git a/src/main/python/systemds/scuro/representations/mlp_averaging.py b/src/main/python/systemds/scuro/representations/mlp_averaging.py index 8c8d67a06ec..46fe04899aa 100644 --- a/src/main/python/systemds/scuro/representations/mlp_averaging.py +++ b/src/main/python/systemds/scuro/representations/mlp_averaging.py @@ -54,8 +54,12 @@ def __init__(self, output_dim=512, batch_size=32, params=None): "batch_size": [8, 16, 32, 64, 128], } super().__init__("MLPAveraging", parameters) - self.output_dim = output_dim - self.batch_size = batch_size + if params is not None: + self.output_dim = params.get("output_dim", output_dim) + self.batch_size = params.get("batch_size", batch_size) + else: + self.output_dim = output_dim + self.batch_size = batch_size self.device = None self.data_type = np.float32 self.gpu_id = None @@ -70,7 +74,10 @@ def gpu_id(self, gpu_id): self.device = get_device(gpu_id) def get_output_stats(self, input_stats: RepresentationStats) -> RepresentationStats: - if len(input_stats.output_shape) > 1: + if ( + len(input_stats.output_shape) > 1 + and np.prod(input_stats.output_shape) > self.output_dim + ): return RepresentationStats( input_stats.num_instances, (self.output_dim,), @@ -87,7 +94,13 @@ def get_output_stats(self, input_stats: RepresentationStats) -> RepresentationSt ) return RepresentationStats( input_stats.num_instances, - (self.output_dim,), + ( + ( + np.prod(input_stats.output_shape) + if np.prod(input_stats.output_shape) < self.output_dim + else self.output_dim + ), + ), output_shape_is_known=input_stats.output_shape_is_known, ) diff --git a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py index a295eaa267a..066f3432159 100644 --- a/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py +++ b/src/main/python/systemds/scuro/representations/multimodal_attention_fusion.py @@ -42,6 +42,7 @@ def __init__( batch_size=32, num_epochs=20, learning_rate=0.001, + params=None, ): parameters = { "hidden_dim": [32, 128, 256, 384, 512, 768], diff --git a/src/main/python/systemds/scuro/representations/spectrogram.py b/src/main/python/systemds/scuro/representations/spectrogram.py index ba455cb9808..ab0c8a6c649 100644 --- a/src/main/python/systemds/scuro/representations/spectrogram.py +++ b/src/main/python/systemds/scuro/representations/spectrogram.py @@ -60,10 +60,14 @@ def transform(self, modality, aggregation=None): return transformed_modality def compute_feature(self, instance): + data = np.array(instance) spectrogram = librosa.stft( - y=np.array(np.abs(instance)), hop_length=self.hop_length, n_fft=self.n_fft + y=np.abs(data), hop_length=self.hop_length, n_fft=self.n_fft ) - return librosa.amplitude_to_db(np.abs(spectrogram)).T + if data.ndim == 1: + return librosa.amplitude_to_db(np.abs(spectrogram)).T + + return librosa.amplitude_to_db(np.abs(spectrogram)).transpose(0, 2, 1) def estimate_peak_memory_bytes(self, input_stats) -> dict: # TODO: validate this function @@ -74,7 +78,7 @@ def estimate_peak_memory_bytes(self, input_stats) -> dict: if signal_length < self.n_fft: num_frames = 1 else: - num_frames = 1 + (signal_length - self.n_fft) // self.hop_length + num_frames = 1 + signal_length // self.hop_length num_frames = max(int(num_frames), 1) num_freq_bins = 1 + self.n_fft // 2 @@ -121,7 +125,7 @@ def get_output_stats(self, input_stats) -> RepresentationStats: if signal_length < self.n_fft: num_frames = 1 else: - num_frames = 1 + (signal_length - self.n_fft) // self.hop_length + num_frames = 1 + signal_length // self.hop_length num_frames = max(int(num_frames), 1) num_freq_bins = 1 + self.n_fft // 2 diff --git a/src/main/python/systemds/scuro/representations/sum.py b/src/main/python/systemds/scuro/representations/sum.py index d6c4fe659b2..4f658020f1e 100644 --- a/src/main/python/systemds/scuro/representations/sum.py +++ b/src/main/python/systemds/scuro/representations/sum.py @@ -61,18 +61,24 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats: if not stats_list: return RepresentationStats(0, (0,)) - def num_elements(stats: RepresentationStats) -> int: - n = 1 - for d in stats.output_shape: - n *= d - return n + max_dim = max([stats.output_shape[-1] for stats in stats_list]) + return RepresentationStats(stats_list[0].num_instances, (max_dim,)) - largest = max(stats_list, key=num_elements) - return RepresentationStats(largest.num_instances, largest.output_shape) + def estimate_peak_memory_bytes(self, input_stats_list) -> dict: + elem_size = np.dtype(np.float64).itemsize - def estimate_peak_memory_bytes(self, input_stats) -> dict: - # TODO - return { - "cpu_peak_bytes": 0, - "gpu_peak_bytes": 0, - } + def stats_payload_bytes(s: RepresentationStats) -> int: + numel = int(np.prod(s.output_shape)) if len(s.output_shape) > 0 else 1 + return int(s.num_instances * numel * elem_size) + + first_bytes = stats_payload_bytes(input_stats_list[0]) + max_other_bytes = 0 + if len(input_stats_list) > 1: + max_other_bytes = max(stats_payload_bytes(s) for s in input_stats_list[1:]) + + ufunc_workspace_bytes = int(0.1 * max(first_bytes, max_other_bytes)) + cpu_peak = int( + (first_bytes + max_other_bytes + ufunc_workspace_bytes) * 1.15 + + 8 * 1024 * 1024 + ) + return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0} diff --git a/src/main/python/systemds/scuro/representations/timeseries_representations.py b/src/main/python/systemds/scuro/representations/timeseries_representations.py index 80f6880a0b8..26c8c1a8d98 100644 --- a/src/main/python/systemds/scuro/representations/timeseries_representations.py +++ b/src/main/python/systemds/scuro/representations/timeseries_representations.py @@ -77,8 +77,8 @@ class Mean(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Mean") - def compute_feature(self, signal): - return np.array(np.mean(signal)) + def compute_feature(self, signal, axis=-1): + return np.array(np.mean(signal, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -87,8 +87,8 @@ class Min(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Min") - def compute_feature(self, signal): - return np.array(np.min(signal)) + def compute_feature(self, signal, axis=-1): + return np.array(np.min(signal, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -97,8 +97,8 @@ class Max(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Max") - def compute_feature(self, signal): - return np.array(np.max(signal)) + def compute_feature(self, signal, axis=-1): + return np.array(np.max(signal, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -107,8 +107,8 @@ class Sum(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Sum") - def compute_feature(self, signal): - return np.array(np.sum(signal)) + def compute_feature(self, signal, axis=-1): + return np.array(np.sum(signal, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -117,8 +117,8 @@ class Std(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Std") - def compute_feature(self, signal): - return np.array(np.std(signal)) + def compute_feature(self, signal, axis=-1): + return np.array(np.std(signal, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -127,8 +127,8 @@ class Skew(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Skew") - def compute_feature(self, signal): - return np.array(stats.skew(signal)) + def compute_feature(self, signal, axis=-1): + return np.array(stats.skew(signal, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -140,8 +140,8 @@ def __init__(self, quantile=0.9, params=None): ) self.quantile = quantile - def compute_feature(self, signal): - return np.array(np.quantile(signal, self.quantile)) + def compute_feature(self, signal, axis=-1): + return np.array(np.quantile(signal, self.quantile, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -150,8 +150,8 @@ class Kurtosis(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("Kurtosis") - def compute_feature(self, signal): - return np.array(stats.kurtosis(signal, fisher=True, bias=False)) + def compute_feature(self, signal, axis=-1): + return np.array(stats.kurtosis(signal, fisher=True, bias=True, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -160,8 +160,8 @@ class RMS(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("RMS") - def compute_feature(self, signal): - return np.array(np.sqrt(np.mean(np.square(signal)))) + def compute_feature(self, signal, axis=-1): + return np.array(np.sqrt(np.mean(np.square(signal), axis=axis))) @register_representation([ModalityType.TIMESERIES]) @@ -170,8 +170,8 @@ class ZeroCrossingRate(TimeSeriesRepresentation): def __init__(self, params=None): super().__init__("ZeroCrossingRate") - def compute_feature(self, signal): - return np.array(np.sum(np.diff(np.signbit(signal)) != 0)) + def compute_feature(self, signal, axis=-1): + return np.array(np.sum(np.diff(np.signbit(signal), axis=axis) != 0, axis=axis)) @register_representation([ModalityType.TIMESERIES]) @@ -181,16 +181,23 @@ def __init__(self, k=1, params=None): super().__init__("ACF", {"k": [1, 2, 5, 10, 20, 25, 50, 100, 200, 500]}) self.k = k - def compute_feature(self, signal): - x = np.asarray(signal) - np.mean(signal) + def compute_feature(self, signal, axis=-1): + x = np.asarray(signal, dtype=np.float64) + x = x - np.mean(x, axis=axis, keepdims=True) k = int(self.k) - if k <= 0 or k >= len(x): - return np.array(0.0) - den = np.dot(x, x) - if not np.isfinite(den) or np.isclose(den, 0.0): - return np.array(0.0) - corr = np.correlate(x[:-k], x[k:])[0] - return np.array(corr / den) + n = x.shape[axis] + if k <= 0 or k >= n: + out_shape = list(x.shape) + del out_shape[axis] + return np.zeros(out_shape) if out_shape else np.array(0.0) + den = np.sum(x * x, axis=axis) + xm = np.moveaxis(x, axis, -1) + corr = np.sum(xm[..., :-k] * xm[..., k:], axis=-1) + with np.errstate(invalid="ignore", divide="ignore"): + out = corr / den + bad = ~np.isfinite(den) | np.isclose(den, 0.0) + out = np.where(bad, 0.0, out) + return np.asarray(out) def get_k_values(self, max_length, percent=0.2, num=10, log=False): # TODO: Probably would be useful to invoke this function while tuning the hyperparameters depending on the max length of the singal @@ -205,11 +212,11 @@ def get_k_values(self, max_length, percent=0.2, num=10, log=False): @register_representation([ModalityType.TIMESERIES]) @register_context_representation_operator(ModalityType.TIMESERIES) class FrequencyMagnitude(TimeSeriesRepresentation): - def __init__(self): + def __init__(self, params=None): super().__init__("FrequencyMagnitude") - def compute_feature(self, signal): - return np.array(np.abs(np.fft.rfft(signal))) + def compute_feature(self, signal, axis=-1): + return np.array(np.abs(np.fft.rfft(signal, axis=axis))) @register_representation([ModalityType.TIMESERIES]) @@ -219,11 +226,17 @@ def __init__(self, fs=1.0, params=None): super().__init__("SpectralCentroid", parameters={"fs": [0.5, 1.0, 2.0]}) self.fs = fs - def compute_feature(self, signal): - frequency_magnitude = FrequencyMagnitude().compute_feature(signal) - freqencies = np.fft.rfftfreq(len(signal), d=1.0 / self.fs) - num = np.sum(freqencies * frequency_magnitude) - den = np.sum(frequency_magnitude) + 1e-12 + def compute_feature(self, signal, axis=-1): + signal = np.asarray(signal, dtype=np.float64) + n = signal.shape[axis] + frequency_magnitude = FrequencyMagnitude().compute_feature(signal, axis=axis) + frequencies = np.fft.rfftfreq(n, d=1.0 / self.fs) + ax = axis if axis >= 0 else frequency_magnitude.ndim + axis + freq_shape = [1] * frequency_magnitude.ndim + freq_shape[ax] = frequencies.size + frequencies = frequencies.reshape(freq_shape) + num = np.sum(frequencies * frequency_magnitude, axis=axis) + den = np.sum(frequency_magnitude, axis=axis) + 1e-12 return np.array(num / den) @@ -239,11 +252,17 @@ def __init__(self, fs=1.0, f1=0.0, f2=0.5, params=None): self.f1 = f1 self.f2 = f2 - def compute_feature( - self, - signal, - ): - frequency_magnitude = FrequencyMagnitude().compute_feature(signal) - freqencies = np.fft.rfftfreq(len(signal), d=1.0 / self.fs) - m = (freqencies >= self.f1) & (freqencies < self.f2) - return np.array(np.sum(frequency_magnitude[m] ** 2)) + def compute_feature(self, signal, axis=-1): + signal = np.asarray(signal, dtype=np.float64) + n = signal.shape[axis] + + frequency_magnitude = FrequencyMagnitude().compute_feature(signal, axis=axis) + frequencies = np.fft.rfftfreq(n, d=1.0 / self.fs) + + ax = axis if axis >= 0 else frequency_magnitude.ndim + axis + freq_shape = [1] * frequency_magnitude.ndim + freq_shape[ax] = frequencies.size + frequencies = frequencies.reshape(freq_shape) + + in_band = (frequencies >= self.f1) & (frequencies < self.f2) + return np.array(np.sum((frequency_magnitude**2) * in_band, axis=axis)) diff --git a/src/main/python/systemds/scuro/representations/window_aggregation.py b/src/main/python/systemds/scuro/representations/window_aggregation.py index 5d63e4790dd..36e12c0cd5c 100644 --- a/src/main/python/systemds/scuro/representations/window_aggregation.py +++ b/src/main/python/systemds/scuro/representations/window_aggregation.py @@ -19,9 +19,7 @@ # # ------------------------------------------------------------- -from concurrent.futures import ThreadPoolExecutor import inspect -import os import numpy as np import math @@ -236,7 +234,6 @@ def estimate_peak_memory_bytes(self, input_stats: RepresentationStats) -> dict: effective_seq_len = in_shape[0] in_numel = effective_seq_len * self._rest_numel(in_shape) - output_bytes = self.estimate_output_memory_bytes(input_stats) one_instance_bytes = in_numel * np.dtype(self.data_type).itemsize input_bytes = one_instance_bytes * input_stats.num_instances @@ -272,36 +269,28 @@ def execute(self, modality): ) original_lengths.append(windowed_instance.shape[0]) windowed_data.append(windowed_instance) - if self.pad and not isinstance(windowed_data, np.ndarray): target_length = max(original_lengths) - sample_shape = windowed_data[0].shape - padded_features = [] + padded_features = np.zeros( + (len(windowed_data), target_length, *windowed_data[0].shape[1:]) + ) for i, features in enumerate(windowed_data): - current_len = original_lengths[i] - - if current_len < target_length: - padding_needed = target_length - current_len - - pad_shape = (padding_needed,) + features.shape[1:] - padding = np.zeros(pad_shape) - padded = np.concatenate([features, padding], axis=0) - - padded_features.append(padded) + if padded_features.ndim == 3: + padded_features[i, : features.shape[0], :] = features else: - padded_features.append(features) + padded_features[i, : features.shape[0]] = features - attention_masks = np.zeros((len(windowed_data), target_length)) - for i, length in enumerate(original_lengths): - actual_length = min(length, target_length) - attention_masks[i, :actual_length] = 1 + # attention_masks = np.zeros((len(windowed_data), target_length)) + # for i, length in enumerate(original_lengths): + # actual_length = min(length, target_length) + # attention_masks[i, :actual_length] = 1 - ModalityType(modality.modality_type).add_field_for_instances( - modality.metadata, "attention_masks", attention_masks - ) + # ModalityType(modality.modality_type).add_field_for_instances( + # modality.metadata, "attention_masks", attention_masks + # ) - windowed_data = np.array(padded_features) + windowed_data = padded_features data_type = modality.metadata[0]["data_layout"]["type"] if data_type != "str": windowed_data = windowed_data.astype(data_type) @@ -313,17 +302,28 @@ def window_aggregate_single_level(self, instance, new_length): if isinstance(instance, str): return instance - result = [] - for i in range(0, new_length): - result.append( - self.aggregation_function.compute_feature( - instance[ - i * self.window_size : i * self.window_size + self.window_size - ] - ) - ) + arr = np.asarray(instance) + cut_length = (new_length - 1) * self.window_size - return np.array(result) + full_batches = arr[:cut_length].reshape( + new_length - 1, self.window_size, *arr.shape[1:] + ) + tail = arr[cut_length:] + + sig = inspect.signature(self.aggregation_function.compute_feature) + if "axis" in sig.parameters: + full_result = self.aggregation_function.compute_feature( + full_batches, axis=1 + ) + if tail.size: + tail_result = self.aggregation_function.compute_feature(tail) + full_result = np.concatenate([full_result, np.array([tail_result])]) + else: + full_result = self.aggregation_function.compute_feature(full_batches) + if tail.size: + tail_result = self.aggregation_function.compute_feature(tail) + full_result = np.concatenate([full_result, tail_result[None, :]]) + return full_result def window_aggregate_nested_level(self, instance, new_length): result = [[] for _ in range(0, new_length)] @@ -339,10 +339,12 @@ def window_aggregate_nested_level(self, instance, new_length): [ModalityType.TIMESERIES, ModalityType.AUDIO, ModalityType.EMBEDDING] ) class StaticWindow(Window): - # TODO def __init__(self, aggregation_function="mean", num_windows=100, params=None): super().__init__("StaticWindow", aggregation_function) - self.parameters["num_windows"] = [10, num_windows] + if params is not None: + num_windows = params.get("num_windows", 100) + + self.parameters["num_windows"] = (5, num_windows) self.num_windows = int(num_windows) def get_output_stats(self, input_stats: RepresentationStats) -> tuple: @@ -387,28 +389,26 @@ def estimate_peak_memory_bytes(self, input_stats: RepresentationStats) -> dict: def execute(self, modality): windowed_data = [] - for instance in modality.data: - window_size = len(instance) // self.num_windows - remainder = len(instance) % self.num_windows - output = [] - start = 0 - for i in range(0, self.num_windows): - extra = 1 if i < remainder else 0 - end = start + window_size + extra - window = instance[start:end] - window.setflags(write=False) - val = ( - self.aggregation_function.compute_feature(window) - if len(window) > 0 - else np.zeros_like(output[i - 1]) - ) - output.append(val) - start = end + window_size = int(np.ceil(len(instance) / self.num_windows)) + padding_size = int(window_size * self.num_windows - len(instance)) + pad_width = [(0, 0)] * instance.ndim + pad_width[0] = (0, padding_size) + instance = np.pad( + instance, pad_width=pad_width, mode="constant", constant_values=0 + ) + full_batches = instance.reshape( + self.num_windows, window_size, *instance.shape[1:] + ) - windowed_data.append(output) + sig = inspect.signature(self.aggregation_function.compute_feature) + if "axis" in sig.parameters: + f = self.aggregation_function.compute_feature(full_batches, axis=1) + else: + f = self.aggregation_function.compute_feature(full_batches) + + windowed_data.append(f) windowed_data = np.array(windowed_data) - self.assert_output_stats(windowed_data) return windowed_data @@ -418,7 +418,9 @@ def execute(self, modality): class DynamicWindow(Window): def __init__(self, aggregation_function="mean", num_windows=100, params=None): super().__init__("DynamicWindow", aggregation_function) - self.parameters["num_windows"] = [10, num_windows] + if params is not None: + num_windows = params.get("num_windows", 100) + self.parameters["num_windows"] = (5, num_windows) self.num_windows = int(num_windows) def get_output_stats(self, input_stats: RepresentationStats) -> tuple: diff --git a/src/main/python/tests/scuro/test_unimodal_representations.py b/src/main/python/tests/scuro/test_unimodal_representations.py index a4e18743090..2f474be7fd9 100644 --- a/src/main/python/tests/scuro/test_unimodal_representations.py +++ b/src/main/python/tests/scuro/test_unimodal_representations.py @@ -86,7 +86,7 @@ class TestUnimodalRepresentations(unittest.TestCase): @classmethod def setUpClass(cls): - cls.num_instances = 100 + cls.num_instances = 2 cls.indices = np.array(range(cls.num_instances)) def _create_audio_modality(self, signal_length=1000): @@ -118,7 +118,6 @@ def test_audio_representation_transform_output_shapes(self): for representation, expected_shape_signature in audio_representations: with self.subTest(representation=representation.name): transformed_modality = representation.transform(audio) - print(representation.name) self.assertIsNotNone(transformed_modality.data) self.assertEqual(len(transformed_modality.data), self.num_instances)