From fdec8ff4ab69258738957a73ddcaf6cb5a85a0cb Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Wed, 13 May 2026 17:32:56 +0200
Subject: [PATCH 1/9] add Butterworth dataclass

---
 src/osekit/utils/audio.py | 98 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/src/osekit/utils/audio.py b/src/osekit/utils/audio.py
index b63d6357..35804a66 100644
--- a/src/osekit/utils/audio.py
+++ b/src/osekit/utils/audio.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
+import dataclasses
 import enum
+from collections.abc import Iterable
 from typing import Literal, Self
 
 import numpy as np
 import soxr
 from pandas import Timedelta
+from scipy import signal
 
 from osekit.config import (
     resample_quality_settings,
@@ -203,3 +206,98 @@ def normalize(
     if Normalization.ZSCORE in normalization:
         values = normalize_zscore(values=values, mean=mean, std=std)
     return values
+
+
+@dataclasses.dataclass
+class Butterworth:
+    """Class that represent a Butterworth sos filter.
+
+    Parameters
+    ----------
+    N: int
+        The order of the filter.
+        For "bandpass" and "bandstop" filters, the resulting order of the final
+        second-order sections ("sos") matrix is ``2*N``,
+        with ``N`` the number of biquad sections of the desired system.
+    Wn: Iterable | int | float
+        The critical frequency or frequencies.
+        For lowpass and highpass filters, ``Wn`` is a scalar.
+        For bandpass and bandstop filters, ``Wn`` is a length-2 sequence.
+        For a Butterworth filter, this is the point at which the gain
+        drops to ``1/sqrt(2)`` that of the passband (the “-3 dB point”).
+        For digital filters, if ``fs`` is not specified,
+        ``Wn`` units are normalized from ``0`` to ``1``,
+        where ``1`` is the Nyquist frequency
+        (``Wn`` is thus in half cycles / sample and defined as
+        ``2*critical frequencies / fs``).
+        If ``fs`` is specified, ``Wn`` is in the same units as ``fs``.
+        For analog filters, ``Wn`` is an angular frequency (e.g. ``rad/s``).
+    btype: Literal["lowpass", "highpass", "bandpass", "bandstop"]
+        The type of filter. Default is "lowpass".
+
+    """
+
+    N: int
+    Wn: Iterable | int | float
+    btype: Literal["lowpass", "highpass", "bandpass", "bandstop"] = "lowpass"
+
+    def to_dict(self) -> dict:
+        """Serialize a Butterworth sos filter to a dictionary.
+
+        Returns
+        -------
+        dict:
+            Serialized Butterworth sos filter.
+
+        """
+        return {
+            "N": self.N,
+            "Wn": self.Wn,
+            "btype": self.btype,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> Butterworth:
+        """Deserialize a Butterworth sos filter from a dictionary.
+
+        Parameters
+        ----------
+        data: dict
+            Serialized Butterworth sos filter.
+
+        Returns
+        -------
+        Butterworth:
+            The Butterworth sos filter.
+
+        """
+        return cls(
+            N=data["N"],
+            Wn=data["Wn"],
+            btype=data["btype"],
+        )
+
+    def filter(self, sig: np.typing.NDArray, fs: float) -> np.typing.NDArray:
+        """Filter an input signal with the Butterworth sos filter.
+
+        Parameters
+        ----------
+        sig: np.typing.NDArray
+            Input signal
+        fs: float
+            Sampling frequency of the signal
+
+        Returns
+        -------
+        np.typing.NDArray
+            Filtered signal
+
+        """
+        sos = signal.butter(
+            N=self.N,
+            Wn=self.Wn,
+            btype=self.btype,
+            fs=fs,
+            output="sos",
+        )
+        return signal.sosfilt(sos=sos, x=sig, axis=0)

From 6cca0045a6cbaf3a4395f565607445509954901c Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Wed, 13 May 2026 17:34:13 +0200
Subject: [PATCH 2/9] add butterworth filtering in AudioData.get_value()

---
 src/osekit/core/audio_data.py | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/osekit/core/audio_data.py b/src/osekit/core/audio_data.py
index d592ae28..96ccee16 100644
--- a/src/osekit/core/audio_data.py
+++ b/src/osekit/core/audio_data.py
@@ -20,7 +20,7 @@
 from osekit.core.audio_item import AudioItem
 from osekit.core.base_data import BaseData
 from osekit.core.instrument import Instrument
-from osekit.utils.audio import Normalization, normalize
+from osekit.utils.audio import Butterworth, Normalization, normalize
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -45,6 +45,7 @@ def __init__(
         instrument: Instrument | None = None,
         normalization: Normalization = Normalization.RAW,
         normalization_values: dict | None = None,
+        butter: Butterworth | None = None,
     ) -> None:
         """Initialize an ``AudioData`` from a list of ``AudioItems``.
 
@@ -67,6 +68,8 @@ def __init__(
             the wav audio data.
         normalization: Normalization
             The type of normalization to apply to the audio data.
+        butter: Butterworth | None
+            Butterworth filter to apply to the audio data.
 
         """
         super().__init__(items=items, begin=begin, end=end, name=name)
@@ -74,6 +77,7 @@ def __init__(
         self.instrument = instrument
         self.normalization = normalization
         self.normalization_values = normalization_values
+        self.butter = butter
 
     @property
     def nb_channels(self) -> int:
@@ -123,6 +127,15 @@ def normalization_values(self, value: dict | None) -> None:
             }
         )
 
+    @property
+    def butter(self) -> Butterworth:
+        """The Butterworth filter to apply to the audio data."""
+        return self._butter
+
+    @butter.setter
+    def butter(self, value: Butterworth) -> None:
+        self._butter = value
+
     @classmethod
     def _make_item(
         cls,
@@ -220,7 +233,12 @@ def get_raw_value(self) -> np.ndarray:
             The value of the audio data.
 
         """
-        return np.vstack(list(self.stream()))
+        output = np.vstack(list(self.stream()))
+        return (
+            output
+            if self.butter is None
+            else self.butter.filter(sig=output, fs=self.sample_rate)
+        )
 
     @staticmethod
     def _flush(
@@ -547,9 +565,13 @@ def to_dict(self) -> dict:
                 None if self.instrument is None else self.instrument.to_dict()
             ),
         }
+        butter_dict = {
+            "butter": (None if self.butter is None else self.butter.to_dict()),
+        }
         return (
             base_dict
             | instrument_dict
+            | butter_dict
             | {
                 "sample_rate": self.sample_rate,
                 "normalization": self.normalization.value,
@@ -595,6 +617,11 @@ def _from_base_dict(
             if dictionary["instrument"] is None
             else Instrument.from_dict(dictionary["instrument"])
         )
+        butter = (
+            None
+            if "butter" not in dictionary or dictionary["butter"] is None
+            else Butterworth.from_dict(dictionary["butter"])
+        )
         return cls.from_files(
             files=files,
             begin=begin,
@@ -603,6 +630,7 @@ def _from_base_dict(
             sample_rate=dictionary["sample_rate"],
             normalization=Normalization(dictionary["normalization"]),
             normalization_values=dictionary["normalization_values"],
+            butter=butter,
         )
 
     @classmethod
@@ -641,6 +669,9 @@ def from_files(
             normalization: Normalization
             The type of normalization to apply to the audio data.
 
+            butter: Butterworth
+            Butterworth filter to apply to the audio data.
+
         Returns
         -------
         Self:

From 4c09d295ac3d912c4f7f18bf34e2b00815bf084e Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Wed, 13 May 2026 17:54:23 +0200
Subject: [PATCH 3/9] add AudioDataset.butter property

---
 src/osekit/core/audio_dataset.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/osekit/core/audio_dataset.py b/src/osekit/core/audio_dataset.py
index 32fe8c72..7eff0a63 100644
--- a/src/osekit/core/audio_dataset.py
+++ b/src/osekit/core/audio_dataset.py
@@ -13,7 +13,7 @@
 from osekit.core.audio_file import AudioFile
 from osekit.core.base_dataset import BaseDataset
 from osekit.core.json_serializer import deserialize_json
-from osekit.utils.audio import Normalization
+from osekit.utils.audio import Butterworth, Normalization
 from osekit.utils.multiprocess import multiprocess
 
 if TYPE_CHECKING:
@@ -89,6 +89,17 @@ def normalization(self, normalization: Normalization) -> None:
         for data in self.data:
             data.normalization = normalization
 
+    @property
+    def butter(self) -> Butterworth:
+        """Return the most frequent Butterworth filter among those of this dataset data."""
+        butters = [data.butter for data in self.data]
+        return max(set(butters), key=butters.count)
+
+    @butter.setter
+    def butter(self, butter: Butterworth) -> None:
+        for data in self.data:
+            data.butter = butter
+
     @property
     def instrument(self) -> Instrument | None:
         """Instrument that can be used to get acoustic pressure from wav audio data."""
@@ -187,6 +198,7 @@ def from_folder(  # noqa: PLR0913
         name: str | None = None,
         instrument: Instrument | None = None,
         normalization: Normalization = Normalization.RAW,
+        butter: Butterworth | None = None,
         **kwargs,  # noqa: ANN003
     ) -> Self:
         """Return an ``AudioDataset`` from a folder containing the audio files.
@@ -240,6 +252,8 @@ def from_folder(  # noqa: PLR0913
             the wav audio data.
         normalization: Normalization
             The type of normalization to apply to the audio data.
+        butter: Butterworth | None
+            Butterworth filter to apply to the audio data.
         kwargs: any
             Keyword arguments passed to the ``BaseDataset.from_folder()`` classmethod.
 
@@ -262,6 +276,7 @@ def from_folder(  # noqa: PLR0913
             name=name,
             instrument=instrument,
             normalization=normalization,
+            butter=butter,
         )
 
     @classmethod
@@ -277,6 +292,7 @@ def from_files(  # noqa: PLR0913
         sample_rate: float | None = None,
         instrument: Instrument | None = None,
         normalization: Normalization = Normalization.RAW,
+        butter: Butterworth | None = None,
     ) -> AudioDataset:
         """Return an AudioDataset object from a list of AudioFiles.
 
@@ -317,6 +333,8 @@ def from_files(  # noqa: PLR0913
             the wav audio data.
         normalization: Normalization
             The type of normalization to apply to the audio data.
+        butter: Butterworth | None
+            Butterworth filter to apply to the audio data.
 
         Returns
         -------
@@ -335,6 +353,7 @@ def from_files(  # noqa: PLR0913
             mode=mode,
             overlap=overlap,
             data_duration=data_duration,
+            butter=butter,
         )
 
     @classmethod

From ca090c6adba403f8c27626efa85c2e005d69290d Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Mon, 18 May 2026 11:19:54 +0200
Subject: [PATCH 4/9] add butter parameter in Transform constructor

---
 src/osekit/public/project.py   | 1 +
 src/osekit/public/transform.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/osekit/public/project.py b/src/osekit/public/project.py
index a9235ee4..e3cdd5e9 100644
--- a/src/osekit/public/project.py
+++ b/src/osekit/public/project.py
@@ -294,6 +294,7 @@ def prepare_audio(self, transform: Transform) -> AudioDataset:
             mode=transform.mode,
             overlap=transform.overlap,
             normalization=transform.normalization,
+            butter=transform.butter,
             name=transform.name,
             instrument=self.instrument,
         )
diff --git a/src/osekit/public/transform.py b/src/osekit/public/transform.py
index 4e42eb22..895cd01c 100644
--- a/src/osekit/public/transform.py
+++ b/src/osekit/public/transform.py
@@ -3,7 +3,7 @@
 from enum import Flag, auto
 from typing import TYPE_CHECKING, Literal
 
-from osekit.utils.audio import Normalization
+from osekit.utils.audio import Butterworth, Normalization
 
 if TYPE_CHECKING:
     from pandas import Timedelta, Timestamp
@@ -73,6 +73,7 @@ def __init__(
         overlap: float = 0.0,
         sample_rate: float | None = None,
         normalization: Normalization = Normalization.RAW,
+        butter: Butterworth | None = None,
         name: str | None = None,
         subtype: str | None = None,
         fft: ShortTimeFFT | None = None,
@@ -118,6 +119,8 @@ def __init__(
             will be set to the one of the original dataset.
         normalization: Normalization
             The type of normalization to apply to the audio data.
+        butter: Butterworth | None
+            Butterworth filter to apply to the audio data.
         name: str | None
             Name of the transform dataset.
             Defaulted as the begin timestamp of the transform dataset.
@@ -160,6 +163,7 @@ def __init__(
         self.sample_rate = sample_rate
         self.name = name
         self.normalization = normalization
+        self.butter = butter
         self.subtype = subtype
         self.v_lim = v_lim
         self.colormap = colormap

From 20b5bc72ef352ae5e718fe0b6f1fa420988e930f Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Tue, 19 May 2026 12:22:50 +0200
Subject: [PATCH 5/9] move filtering to a new AudioData.get_filtered_value()
 method

---
 src/osekit/core/audio_data.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/osekit/core/audio_data.py b/src/osekit/core/audio_data.py
index 96ccee16..70124d80 100644
--- a/src/osekit/core/audio_data.py
+++ b/src/osekit/core/audio_data.py
@@ -191,7 +191,7 @@ def get_normalization_values(self) -> dict:
             "std": standard deviation used for z-score normalization
 
         """
-        values = np.array(self.get_raw_value())
+        values = np.array(self.get_filtered_value())
         self.normalization_values = {
             "mean": values.mean(),
             "peak": values.max(),
@@ -233,7 +233,18 @@ def get_raw_value(self) -> np.ndarray:
             The value of the audio data.
 
         """
-        output = np.vstack(list(self.stream()))
+        return np.vstack(list(self.stream()))
+
+    def get_filtered_value(self) -> np.ndarray:
+        """Return the value of the audio data after filtering.
+
+        Returns
+        -------
+        np.ndarray:
+            The value of the audio data filtered by the ``self.butter`` Butterworth filter.
+
+        """
+        output = self.get_raw_value()
         return (
             output
             if self.butter is None
@@ -338,7 +349,7 @@ def get_value(self) -> np.ndarray:
 
         """
         return normalize(
-            values=self.get_raw_value(),
+            values=self.get_filtered_value(),
             normalization=self.normalization,
             **self.normalization_values,
         )

From 999e2416c0a7e9eb0c00bd2a2e056f1b97c57f98 Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Tue, 19 May 2026 12:23:05 +0200
Subject: [PATCH 6/9] add butter filtering test

---
 tests/test_audio.py | 47 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/tests/test_audio.py b/tests/test_audio.py
index 37374847..0267591a 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -3,13 +3,14 @@
 import importlib
 import logging
 from pathlib import Path
-from typing import Literal
+from typing import Any, Literal
 
 import numpy as np
 import pandas as pd
 import pytest
 import soundfile as sf
 from pandas import Timedelta, Timestamp
+from scipy import signal
 
 import osekit
 from osekit.config import (
@@ -25,7 +26,12 @@
 from osekit.core.audio_item import AudioItem
 from osekit.core.instrument import Instrument
 from osekit.utils import audio
-from osekit.utils.audio import Normalization, generate_sample_audio, normalize
+from osekit.utils.audio import (
+    Butterworth,
+    Normalization,
+    generate_sample_audio,
+    normalize,
+)
 from tests.helpers.audio import MockedAudioData
 
 
@@ -2166,3 +2172,40 @@ def test_resampling_from_different_origin_frequencies(tmp_path: Path) -> None:
 
     vs = ad.get_value()
     assert vs.size == int(ad.duration.total_seconds()) * ad.sample_rate
+
+
+def test_butter(monkeypatch: pytest.MonkeyPatch) -> None:
+    butter_calls = {}
+
+    sig_values = [1] * 10
+    butter = Butterworth(N=2, Wn=[1000, 2000], btype="bandpass")
+
+    def butter_patch(*args: Any, **kwargs: Any) -> list[int]:
+        butter_calls.update(kwargs)
+        return sig_values
+
+    from osekit.utils.audio import signal as audio_signal
+
+    monkeypatch.setattr(audio_signal, "sosfilt", butter_patch)
+
+    ad = MockedAudioData(
+        mocked_value=sig_values,
+        butter=butter,
+    )
+    ad.get_value()
+
+    assert np.array_equal(
+        butter_calls["sos"],
+        signal.butter(
+            N=butter.N,
+            Wn=butter.Wn,
+            btype=butter.btype,
+            output="sos",
+            fs=ad.sample_rate,
+        ),
+    )
+
+    assert np.array_equal(
+        butter_calls["x"],
+        ad.get_raw_value(),
+    )

From d592a6f4adfa01803e1f56fe6d6aa31de6511462 Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Tue, 19 May 2026 14:34:23 +0200
Subject: [PATCH 7/9] add Butterworth serialization test

---
 tests/test_utils.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6f46d55e..196675f2 100755
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -12,7 +12,7 @@
 
 from osekit.core.ltas_dataset import LTASDataset
 from osekit.core.spectro_dataset import SpectroDataset
-from osekit.utils.audio import Normalization, normalize
+from osekit.utils.audio import Butterworth, Normalization, normalize
 from osekit.utils.core import (
     file_indexes_per_batch,
     get_closest_value_index,
@@ -574,3 +574,15 @@ def raise_key_error(*args, **kwargs) -> None:  # noqa: ANN002, ANN003
     monkeypatch.setattr(LTASDataset, "from_json", raise_key_error)
 
     assert deserialize_spectro_or_ltas_dataset(path=Path()) == "SpectroDataset"
+
+
+def test_butter_serialization() -> None:
+    N = 4
+    Wn = [120.0, 200.0]
+    btype = "bandpass"
+    butter = Butterworth(N=N, Wn=Wn, btype=btype)
+    butter2 = Butterworth.from_dict(butter.to_dict())
+
+    assert butter.N == butter2.N
+    assert butter.Wn == butter2.Wn
+    assert butter.btype == butter2.btype

From 8774d331a9bc45fc887fea4467510f9b63bda741 Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Tue, 19 May 2026 14:42:26 +0200
Subject: [PATCH 8/9] add __hash__() method to Butterworth class

---
 src/osekit/utils/audio.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/osekit/utils/audio.py b/src/osekit/utils/audio.py
index 35804a66..38d2e633 100644
--- a/src/osekit/utils/audio.py
+++ b/src/osekit/utils/audio.py
@@ -301,3 +301,6 @@ def filter(self, sig: np.typing.NDArray, fs: float) -> np.typing.NDArray:
             output="sos",
         )
         return signal.sosfilt(sos=sos, x=sig, axis=0)
+
+    def __hash__(self) -> int:
+        return hash((self.N, self.Wn, self.btype))

From fcbd8ffc90f4a37417145e905d15349dc2085bd7 Mon Sep 17 00:00:00 2001
From: Gautzilla <gauthier.berthomieu@gmail.com>
Date: Tue, 19 May 2026 14:42:52 +0200
Subject: [PATCH 9/9] add AudioDataset butter tests

---
 tests/test_audio.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_audio.py b/tests/test_audio.py
index 0267591a..4dee313c 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -2209,3 +2209,21 @@ def butter_patch(*args: Any, **kwargs: Any) -> list[int]:
         butter_calls["x"],
         ad.get_raw_value(),
     )
+
+
+def test_butter_audiodataset() -> None:
+    ads = AudioDataset([MockedAudioData(mocked_value=[]) for _ in range(2)])
+
+    assert all(ad.butter is None for ad in ads.data)
+
+    butter = Butterworth(N=2, Wn=[1000, 2000], btype="bandpass")
+    butter2 = Butterworth(N=4, Wn=100, btype="lowpass")
+
+    # AudioDataset to AudioData
+    ads.butter = butter
+    assert all(ad.butter == butter for ad in ads.data)
+
+    # AudioData to AudioDataset
+    for ad in ads.data:
+        ad.butter = butter2
+    assert ads.butter == butter2