From fdec8ff4ab69258738957a73ddcaf6cb5a85a0cb Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 13 May 2026 17:32:56 +0200 Subject: [PATCH 1/9] add Butterworth dataclass --- src/osekit/utils/audio.py | 98 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/src/osekit/utils/audio.py b/src/osekit/utils/audio.py index b63d6357..35804a66 100644 --- a/src/osekit/utils/audio.py +++ b/src/osekit/utils/audio.py @@ -1,11 +1,14 @@ from __future__ import annotations +import dataclasses import enum +from collections.abc import Iterable from typing import Literal, Self import numpy as np import soxr from pandas import Timedelta +from scipy import signal from osekit.config import ( resample_quality_settings, @@ -203,3 +206,98 @@ def normalize( if Normalization.ZSCORE in normalization: values = normalize_zscore(values=values, mean=mean, std=std) return values + + +@dataclasses.dataclass +class Butterworth: + """Class that represent a Butterworth sos filter. + + Parameters + ---------- + N: int + The order of the filter. + For "bandpass" and "bandstop" filters, the resulting order of the final + second-order sections ("sos") matrix is ``2*N``, + with ``N`` the number of biquad sections of the desired system. + Wn: Iterable | int | float + The critical frequency or frequencies. + For lowpass and highpass filters, ``Wn`` is a scalar. + For bandpass and bandstop filters, ``Wn`` is a length-2 sequence. + For a Butterworth filter, this is the point at which the gain + drops to ``1/sqrt(2)`` that of the passband (the “-3 dB point”). + For digital filters, if ``fs`` is not specified, + ``Wn`` units are normalized from ``0`` to ``1``, + where ``1`` is the Nyquist frequency + (``Wn`` is thus in half cycles / sample and defined as + ``2*critical frequencies / fs``). + If ``fs`` is specified, ``Wn`` is in the same units as ``fs``. + For analog filters, ``Wn`` is an angular frequency (e.g. ``rad/s``). + btype: Literal["lowpass", "highpass", "bandpass", "bandstop"] + The type of filter. Default is "lowpass". + + """ + + N: int + Wn: Iterable | int | float + btype: Literal["lowpass", "highpass", "bandpass", "bandstop"] = "lowpass" + + def to_dict(self) -> dict: + """Serialize a Butterworth sos filter to a dictionary. + + Returns + ------- + dict: + Serialized Butterworth sos filter. + + """ + return { + "N": self.N, + "Wn": self.Wn, + "btype": self.btype, + } + + @classmethod + def from_dict(cls, data: dict) -> Butterworth: + """Deserialize a Butterworth sos filter from a dictionary. + + Parameters + ---------- + data: dict + Serialized Butterworth sos filter. + + Returns + ------- + Butterworth: + The Butterworth sos filter. + + """ + return cls( + N=data["N"], + Wn=data["Wn"], + btype=data["btype"], + ) + + def filter(self, sig: np.typing.NDArray, fs: float) -> np.typing.NDArray: + """Filter an input signal with the Butterworth sos filter. + + Parameters + ---------- + sig: np.typing.NDArray + Input signal + fs: float + Sampling frequency of the signal + + Returns + ------- + np.typing.NDArray + Filtered signal + + """ + sos = signal.butter( + N=self.N, + Wn=self.Wn, + btype=self.btype, + fs=fs, + output="sos", + ) + return signal.sosfilt(sos=sos, x=sig, axis=0) From 6cca0045a6cbaf3a4395f565607445509954901c Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 13 May 2026 17:34:13 +0200 Subject: [PATCH 2/9] add butterworth filtering in AudioData.get_value() --- src/osekit/core/audio_data.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/osekit/core/audio_data.py b/src/osekit/core/audio_data.py index d592ae28..96ccee16 100644 --- a/src/osekit/core/audio_data.py +++ b/src/osekit/core/audio_data.py @@ -20,7 +20,7 @@ from osekit.core.audio_item import AudioItem from osekit.core.base_data import BaseData from osekit.core.instrument import Instrument -from osekit.utils.audio import Normalization, normalize +from osekit.utils.audio import Butterworth, Normalization, normalize if TYPE_CHECKING: from pathlib import Path @@ -45,6 +45,7 @@ def __init__( instrument: Instrument | None = None, normalization: Normalization = Normalization.RAW, normalization_values: dict | None = None, + butter: Butterworth | None = None, ) -> None: """Initialize an ``AudioData`` from a list of ``AudioItems``. @@ -67,6 +68,8 @@ def __init__( the wav audio data. normalization: Normalization The type of normalization to apply to the audio data. + butter: Butterworth | None + Butterworth filter to apply to the audio data. """ super().__init__(items=items, begin=begin, end=end, name=name) @@ -74,6 +77,7 @@ def __init__( self.instrument = instrument self.normalization = normalization self.normalization_values = normalization_values + self.butter = butter @property def nb_channels(self) -> int: @@ -123,6 +127,15 @@ def normalization_values(self, value: dict | None) -> None: } ) + @property + def butter(self) -> Butterworth: + """The Butterworth filter to apply to the audio data.""" + return self._butter + + @butter.setter + def butter(self, value: Butterworth) -> None: + self._butter = value + @classmethod def _make_item( cls, @@ -220,7 +233,12 @@ def get_raw_value(self) -> np.ndarray: The value of the audio data. """ - return np.vstack(list(self.stream())) + output = np.vstack(list(self.stream())) + return ( + output + if self.butter is None + else self.butter.filter(sig=output, fs=self.sample_rate) + ) @staticmethod def _flush( @@ -547,9 +565,13 @@ def to_dict(self) -> dict: None if self.instrument is None else self.instrument.to_dict() ), } + butter_dict = { + "butter": (None if self.butter is None else self.butter.to_dict()), + } return ( base_dict | instrument_dict + | butter_dict | { "sample_rate": self.sample_rate, "normalization": self.normalization.value, @@ -595,6 +617,11 @@ def _from_base_dict( if dictionary["instrument"] is None else Instrument.from_dict(dictionary["instrument"]) ) + butter = ( + None + if "butter" not in dictionary or dictionary["butter"] is None + else Butterworth.from_dict(dictionary["butter"]) + ) return cls.from_files( files=files, begin=begin, @@ -603,6 +630,7 @@ def _from_base_dict( sample_rate=dictionary["sample_rate"], normalization=Normalization(dictionary["normalization"]), normalization_values=dictionary["normalization_values"], + butter=butter, ) @classmethod @@ -641,6 +669,9 @@ def from_files( normalization: Normalization The type of normalization to apply to the audio data. + butter: Butterworth + Butterworth filter to apply to the audio data. + Returns ------- Self: From 4c09d295ac3d912c4f7f18bf34e2b00815bf084e Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Wed, 13 May 2026 17:54:23 +0200 Subject: [PATCH 3/9] add AudioDataset.butter property --- src/osekit/core/audio_dataset.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/osekit/core/audio_dataset.py b/src/osekit/core/audio_dataset.py index 32fe8c72..7eff0a63 100644 --- a/src/osekit/core/audio_dataset.py +++ b/src/osekit/core/audio_dataset.py @@ -13,7 +13,7 @@ from osekit.core.audio_file import AudioFile from osekit.core.base_dataset import BaseDataset from osekit.core.json_serializer import deserialize_json -from osekit.utils.audio import Normalization +from osekit.utils.audio import Butterworth, Normalization from osekit.utils.multiprocess import multiprocess if TYPE_CHECKING: @@ -89,6 +89,17 @@ def normalization(self, normalization: Normalization) -> None: for data in self.data: data.normalization = normalization + @property + def butter(self) -> Butterworth: + """Return the most frequent Butterworth filter among those of this dataset data.""" + butters = [data.butter for data in self.data] + return max(set(butters), key=butters.count) + + @butter.setter + def butter(self, butter: Butterworth) -> None: + for data in self.data: + data.butter = butter + @property def instrument(self) -> Instrument | None: """Instrument that can be used to get acoustic pressure from wav audio data.""" @@ -187,6 +198,7 @@ def from_folder( # noqa: PLR0913 name: str | None = None, instrument: Instrument | None = None, normalization: Normalization = Normalization.RAW, + butter: Butterworth | None = None, **kwargs, # noqa: ANN003 ) -> Self: """Return an ``AudioDataset`` from a folder containing the audio files. @@ -240,6 +252,8 @@ def from_folder( # noqa: PLR0913 the wav audio data. normalization: Normalization The type of normalization to apply to the audio data. + butter: Butterworth | None + Butterworth filter to apply to the audio data. kwargs: any Keyword arguments passed to the ``BaseDataset.from_folder()`` classmethod. @@ -262,6 +276,7 @@ def from_folder( # noqa: PLR0913 name=name, instrument=instrument, normalization=normalization, + butter=butter, ) @classmethod @@ -277,6 +292,7 @@ def from_files( # noqa: PLR0913 sample_rate: float | None = None, instrument: Instrument | None = None, normalization: Normalization = Normalization.RAW, + butter: Butterworth | None = None, ) -> AudioDataset: """Return an AudioDataset object from a list of AudioFiles. @@ -317,6 +333,8 @@ def from_files( # noqa: PLR0913 the wav audio data. normalization: Normalization The type of normalization to apply to the audio data. + butter: Butterworth | None + Butterworth filter to apply to the audio data. Returns ------- @@ -335,6 +353,7 @@ def from_files( # noqa: PLR0913 mode=mode, overlap=overlap, data_duration=data_duration, + butter=butter, ) @classmethod From ca090c6adba403f8c27626efa85c2e005d69290d Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Mon, 18 May 2026 11:19:54 +0200 Subject: [PATCH 4/9] add butter parameter in Transform constructor --- src/osekit/public/project.py | 1 + src/osekit/public/transform.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/osekit/public/project.py b/src/osekit/public/project.py index a9235ee4..e3cdd5e9 100644 --- a/src/osekit/public/project.py +++ b/src/osekit/public/project.py @@ -294,6 +294,7 @@ def prepare_audio(self, transform: Transform) -> AudioDataset: mode=transform.mode, overlap=transform.overlap, normalization=transform.normalization, + butter=transform.butter, name=transform.name, instrument=self.instrument, ) diff --git a/src/osekit/public/transform.py b/src/osekit/public/transform.py index 4e42eb22..895cd01c 100644 --- a/src/osekit/public/transform.py +++ b/src/osekit/public/transform.py @@ -3,7 +3,7 @@ from enum import Flag, auto from typing import TYPE_CHECKING, Literal -from osekit.utils.audio import Normalization +from osekit.utils.audio import Butterworth, Normalization if TYPE_CHECKING: from pandas import Timedelta, Timestamp @@ -73,6 +73,7 @@ def __init__( overlap: float = 0.0, sample_rate: float | None = None, normalization: Normalization = Normalization.RAW, + butter: Butterworth | None = None, name: str | None = None, subtype: str | None = None, fft: ShortTimeFFT | None = None, @@ -118,6 +119,8 @@ def __init__( will be set to the one of the original dataset. normalization: Normalization The type of normalization to apply to the audio data. + butter: Butterworth | None + Butterworth filter to apply to the audio data. name: str | None Name of the transform dataset. Defaulted as the begin timestamp of the transform dataset. @@ -160,6 +163,7 @@ def __init__( self.sample_rate = sample_rate self.name = name self.normalization = normalization + self.butter = butter self.subtype = subtype self.v_lim = v_lim self.colormap = colormap From 20b5bc72ef352ae5e718fe0b6f1fa420988e930f Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 19 May 2026 12:22:50 +0200 Subject: [PATCH 5/9] move filtering to a new AudioData.get_filtered_value() method --- src/osekit/core/audio_data.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/osekit/core/audio_data.py b/src/osekit/core/audio_data.py index 96ccee16..70124d80 100644 --- a/src/osekit/core/audio_data.py +++ b/src/osekit/core/audio_data.py @@ -191,7 +191,7 @@ def get_normalization_values(self) -> dict: "std": standard deviation used for z-score normalization """ - values = np.array(self.get_raw_value()) + values = np.array(self.get_filtered_value()) self.normalization_values = { "mean": values.mean(), "peak": values.max(), @@ -233,7 +233,18 @@ def get_raw_value(self) -> np.ndarray: The value of the audio data. """ - output = np.vstack(list(self.stream())) + return np.vstack(list(self.stream())) + + def get_filtered_value(self) -> np.ndarray: + """Return the value of the audio data after filtering. + + Returns + ------- + np.ndarray: + The value of the audio data filtered by the ``self.butter`` Butterworth filter. + + """ + output = self.get_raw_value() return ( output if self.butter is None @@ -338,7 +349,7 @@ def get_value(self) -> np.ndarray: """ return normalize( - values=self.get_raw_value(), + values=self.get_filtered_value(), normalization=self.normalization, **self.normalization_values, ) From 999e2416c0a7e9eb0c00bd2a2e056f1b97c57f98 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 19 May 2026 12:23:05 +0200 Subject: [PATCH 6/9] add butter filtering test --- tests/test_audio.py | 47 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/tests/test_audio.py b/tests/test_audio.py index 37374847..0267591a 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -3,13 +3,14 @@ import importlib import logging from pathlib import Path -from typing import Literal +from typing import Any, Literal import numpy as np import pandas as pd import pytest import soundfile as sf from pandas import Timedelta, Timestamp +from scipy import signal import osekit from osekit.config import ( @@ -25,7 +26,12 @@ from osekit.core.audio_item import AudioItem from osekit.core.instrument import Instrument from osekit.utils import audio -from osekit.utils.audio import Normalization, generate_sample_audio, normalize +from osekit.utils.audio import ( + Butterworth, + Normalization, + generate_sample_audio, + normalize, +) from tests.helpers.audio import MockedAudioData @@ -2166,3 +2172,40 @@ def test_resampling_from_different_origin_frequencies(tmp_path: Path) -> None: vs = ad.get_value() assert vs.size == int(ad.duration.total_seconds()) * ad.sample_rate + + +def test_butter(monkeypatch: pytest.MonkeyPatch) -> None: + butter_calls = {} + + sig_values = [1] * 10 + butter = Butterworth(N=2, Wn=[1000, 2000], btype="bandpass") + + def butter_patch(*args: Any, **kwargs: Any) -> list[int]: + butter_calls.update(kwargs) + return sig_values + + from osekit.utils.audio import signal as audio_signal + + monkeypatch.setattr(audio_signal, "sosfilt", butter_patch) + + ad = MockedAudioData( + mocked_value=sig_values, + butter=butter, + ) + ad.get_value() + + assert np.array_equal( + butter_calls["sos"], + signal.butter( + N=butter.N, + Wn=butter.Wn, + btype=butter.btype, + output="sos", + fs=ad.sample_rate, + ), + ) + + assert np.array_equal( + butter_calls["x"], + ad.get_raw_value(), + ) From d592a6f4adfa01803e1f56fe6d6aa31de6511462 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 19 May 2026 14:34:23 +0200 Subject: [PATCH 7/9] add Butterworth serialization test --- tests/test_utils.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 6f46d55e..196675f2 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,7 +12,7 @@ from osekit.core.ltas_dataset import LTASDataset from osekit.core.spectro_dataset import SpectroDataset -from osekit.utils.audio import Normalization, normalize +from osekit.utils.audio import Butterworth, Normalization, normalize from osekit.utils.core import ( file_indexes_per_batch, get_closest_value_index, @@ -574,3 +574,15 @@ def raise_key_error(*args, **kwargs) -> None: # noqa: ANN002, ANN003 monkeypatch.setattr(LTASDataset, "from_json", raise_key_error) assert deserialize_spectro_or_ltas_dataset(path=Path()) == "SpectroDataset" + + +def test_butter_serialization() -> None: + N = 4 + Wn = [120.0, 200.0] + btype = "bandpass" + butter = Butterworth(N=N, Wn=Wn, btype=btype) + butter2 = Butterworth.from_dict(butter.to_dict()) + + assert butter.N == butter2.N + assert butter.Wn == butter2.Wn + assert butter.btype == butter2.btype From 8774d331a9bc45fc887fea4467510f9b63bda741 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 19 May 2026 14:42:26 +0200 Subject: [PATCH 8/9] add __hash__() method to Butterworth class --- src/osekit/utils/audio.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/osekit/utils/audio.py b/src/osekit/utils/audio.py index 35804a66..38d2e633 100644 --- a/src/osekit/utils/audio.py +++ b/src/osekit/utils/audio.py @@ -301,3 +301,6 @@ def filter(self, sig: np.typing.NDArray, fs: float) -> np.typing.NDArray: output="sos", ) return signal.sosfilt(sos=sos, x=sig, axis=0) + + def __hash__(self) -> int: + return hash((self.N, self.Wn, self.btype)) From fcbd8ffc90f4a37417145e905d15349dc2085bd7 Mon Sep 17 00:00:00 2001 From: Gautzilla Date: Tue, 19 May 2026 14:42:52 +0200 Subject: [PATCH 9/9] add AudioDataset butter tests --- tests/test_audio.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_audio.py b/tests/test_audio.py index 0267591a..4dee313c 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -2209,3 +2209,21 @@ def butter_patch(*args: Any, **kwargs: Any) -> list[int]: butter_calls["x"], ad.get_raw_value(), ) + + +def test_butter_audiodataset() -> None: + ads = AudioDataset([MockedAudioData(mocked_value=[]) for _ in range(2)]) + + assert all(ad.butter is None for ad in ads.data) + + butter = Butterworth(N=2, Wn=[1000, 2000], btype="bandpass") + butter2 = Butterworth(N=4, Wn=100, btype="lowpass") + + # AudioDataset to AudioData + ads.butter = butter + assert all(ad.butter == butter for ad in ads.data) + + # AudioData to AudioDataset + for ad in ads.data: + ad.butter = butter2 + assert ads.butter == butter2