MDCT-sound-compression/decompression.py at master · 8Altair/MDCT-sound-compression · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
from __future__ import annotations

import struct
from pathlib import Path
from typing import Tuple

import numpy as np

from logging_configuration import decompression_logger as logger
from compression import generate_mdct_cosine_matrix, generate_sine_window


class BitReader:
    """
        Utility for reading arbitrary-length bit fields from a binary buffer.
    """

    __slots__ = ("_data", "_byte_index", "_buffer", "_bits_remaining")

    def __init__(self, data: bytes):
        self._data = data
        self._byte_index = 0
        self._buffer = 0
        self._bits_remaining = 0

    def read_bits(self, bit_count: int) -> int:
        if bit_count <= 0:
            raise ValueError("bit_count must be positive.")

        value = 0
        for _ in range(bit_count):
            if self._bits_remaining == 0:
                if self._byte_index >= len(self._data):
                    raise EOFError("Attempted to read past end of bitstream.")
                self._buffer = self._data[self._byte_index]
                self._byte_index += 1
                self._bits_remaining = 8

            self._bits_remaining -= 1
            bit = (self._buffer >> self._bits_remaining) & 0x1
            value = (value << 1) | bit

        return value


def _decode_signed_coefficient(total_bits: int, encoded_value: int) -> int:
    """
        Decode sign-magnitude integer from bit-field representation.
    """
    if total_bits <= 0:
        raise ValueError("total_bits must be positive.")

    magnitude_bits = total_bits - 1
    sign_bit = (encoded_value >> magnitude_bits) & 0x1
    magnitude_mask = (1 << magnitude_bits) - 1
    magnitude = encoded_value & magnitude_mask
    return -magnitude if sign_bit else magnitude


def load_compressed_mdct(compressed_file: Path) -> Tuple[int, int, int, int, np.ndarray, np.ndarray]:
    """
        Load MDCT coefficients for Mid and Side channels from a binary file produced by save_compressed_mdct.
    """
    function_tag = "[load_compressed_mdct]"
    try:
        with compressed_file.open("rb") as binary_stream:
            header_format = "<IHIH"
            header_size = struct.calcsize(header_format)
            header_bytes = binary_stream.read(header_size)
            if len(header_bytes) != header_size:
                raise ValueError(f"{function_tag} Compressed file header is incomplete.")

            total_samples, block_size, sample_rate, compression_factor = struct.unpack(header_format, header_bytes)
            payload = binary_stream.read()

        retained_coefficients = int(block_size) - int(compression_factor)
        if retained_coefficients < 0:
            raise ValueError(f"{function_tag} Invalid retained coefficient count derived from header.")

        number_of_blocks = (int(total_samples) // int(block_size)) + 1
        if number_of_blocks <= 0:
            raise ValueError(f"{function_tag} Computed non-positive number of blocks ({number_of_blocks}).")

        bit_reader = BitReader(payload)
        mid_mdct_matrix = np.zeros((number_of_blocks, block_size), dtype=np.int32)
        side_mdct_matrix = np.zeros((number_of_blocks, block_size), dtype=np.int32)

        for matrix_label, coefficient_matrix in (("Mid", mid_mdct_matrix), ("Side", side_mdct_matrix)):
            for block_index in range(number_of_blocks):
                for coeff_index in range(retained_coefficients):
                    length_bits = bit_reader.read_bits(6)
                    encoded_value = bit_reader.read_bits(length_bits)
                    coefficient = _decode_signed_coefficient(length_bits, encoded_value)
                    coefficient_matrix[block_index, coeff_index] = coefficient
            logger.debug(f"{function_tag} Deserialized {matrix_label} matrix ({coefficient_matrix.shape[0]} blocks).")

        logger.info(f"{function_tag} Loaded compressed MDCT data from '{compressed_file.as_posix()}'.")
        return int(total_samples), int(block_size), int(sample_rate), int(compression_factor), mid_mdct_matrix, side_mdct_matrix

    except Exception as error:
        logger.critical(f"{function_tag} Failed to load compressed MDCT data: {error}\n")
        raise


def apply_imdct_transform_to_blocks(mdct_matrix: np.ndarray, block_size: int) -> np.ndarray:
    """
        Apply IMDCT to each block and multiply the result by the synthesis window.
    """
    function_tag = "[apply_imdct_transform_to_blocks]"
    MDCT_QUANTIZATION_SCALE = 32768.0

    try:
        if mdct_matrix is None or getattr(mdct_matrix, "ndim", 0) != 2:
            logger.error(f"{function_tag} mdct_matrix must be 2D. Got ndim="
                         f"{None if mdct_matrix is None else mdct_matrix.ndim}.\n")
            raise ValueError(f"{function_tag} mdct_matrix must be 2D (number_of_blocks x N).")

        if block_size is None or int(block_size) <= 0:
            logger.error(f"{function_tag} Invalid block_size={block_size}.\n")
            raise ValueError(f"{function_tag} block_size must be a positive integer.")

        block_size_N = int(block_size)
        expected_coefficients = block_size_N

        if mdct_matrix.shape[1] != expected_coefficients:
            logger.error(f"{function_tag} Coefficient count mismatch: "
                         f"expected {expected_coefficients}, got {mdct_matrix.shape[1]}.\n")
            raise ValueError(f"{function_tag} Each block must have exactly N MDCT coefficients.")

        cosine_matrix = generate_mdct_cosine_matrix(block_size_N)
        sine_window = generate_sine_window(block_size_N)

        # Undo quantization scale
        mdct_float_matrix = mdct_matrix.astype(np.float32, copy=False) / MDCT_QUANTIZATION_SCALE

        imdct_blocks = ((2.0 / float(block_size_N)) * (mdct_float_matrix @ cosine_matrix.T))

        windowed_blocks = imdct_blocks * sine_window.reshape(1, -1)

        logger.info(f"{function_tag} IMDCT applied: blocks shape={windowed_blocks.shape}")
        logger.debug(f"{function_tag} Example block head: "
                     f"{windowed_blocks[0, :min(6, windowed_blocks.shape[1])]}\n")

        return windowed_blocks.astype(np.float32, copy=False)

    except Exception as error:
        logger.critical(f"{function_tag} Error during IMDCT processing: {error}\n")
        raise


def reconstruct_signal_from_blocks(block_matrix: np.ndarray, block_size: int, total_samples: int) -> np.ndarray:
    """
        Overlap-add windowed blocks to reconstruct the time-domain signal and trim padding.
    """
    function_tag = "[reconstruct_signal_from_blocks]"
    try:
        if block_matrix is None or getattr(block_matrix, "ndim", 0) != 2:
            logger.error(f"{function_tag} block_matrix must be 2D. Got ndim="
                         f"{None if block_matrix is None else block_matrix.ndim}.\n")
            raise ValueError(f"{function_tag} block_matrix must be 2D.")

        if block_size is None or int(block_size) <= 0:
            logger.error(f"{function_tag} Invalid block_size={block_size}. It must be a positive integer.\n")
            raise ValueError(f"{function_tag} block_size must be a positive integer.")

        block_size_N = int(block_size)
        window_length = 2 * block_size_N
        hop_size = block_size_N
        number_of_blocks = int(block_matrix.shape[0])

        if int(block_matrix.shape[1]) != window_length:
            logger.error(f"{function_tag} Block length mismatch: expected {window_length}, got {block_matrix.shape[1]}.\n")
            raise ValueError(f"{function_tag} Each block must have length 2*N for reconstruction.")

        output_length = (number_of_blocks - 1) * hop_size + window_length
        reconstructed = np.zeros(output_length, dtype=np.float32)

        for block_index in range(number_of_blocks):
            start = block_index * hop_size
            end = start + window_length
            reconstructed[start:end] += block_matrix[block_index]

        start_trim = block_size_N
        end_trim = start_trim + int(total_samples)
        if end_trim > reconstructed.shape[0]:
            logger.error(f"{function_tag} Attempted to trim beyond reconstructed length (end_trim={end_trim}, length={reconstructed.shape[0]}).\n")
            raise ValueError(f"{function_tag} Reconstruction trimming exceeds signal length.")

        trimmed_signal = reconstructed[start_trim:end_trim]
        logger.info(f"{function_tag} Reconstructed signal length={trimmed_signal.shape[0]}")
        return trimmed_signal

    except Exception as error:
        logger.critical(f"{function_tag} Error during signal reconstruction: {error}\n")
        raise


def sound_decompression(compressed_file_path: str | Path) -> Tuple[np.ndarray, np.ndarray, int]:
    """
        Reconstruct stereo Left/Right signals from a compressed MDCT binary file.

        Returns:
            tuple containing (left_channel, right_channel, sample_rate).
            Left and Right channels are returned as float32 in range [-1.0, 1.0].
    """
    function_tag = "[sound_decompression]"
    try:
        compressed_file_path = Path(compressed_file_path)
        if not compressed_file_path.exists():
            logger.error(f"{function_tag} Compressed file not found: '{compressed_file_path}'\n")
            raise FileNotFoundError(f"{function_tag} File not found: {compressed_file_path}")
        if not compressed_file_path.is_file():
            logger.error(f"{function_tag} Provided path is not a file: '{compressed_file_path}'\n")
            raise ValueError(f"{function_tag} compressed_file_path must point to a file.")

        (total_samples, block_size_N, sample_rate, compression_factor_M,
         mid_mdct_matrix, side_mdct_matrix) = load_compressed_mdct(compressed_file_path)

        logger.info(
            f"{function_tag} Loaded metadata: samples={total_samples}, "
            f"N={block_size_N}, sr={sample_rate}, M={compression_factor_M}")

        logger.info(f"{function_tag} Applying IMDCT to Mid blocks...")
        mid_time_blocks = apply_imdct_transform_to_blocks(mid_mdct_matrix, block_size_N)

        logger.info(f"{function_tag} Applying IMDCT to Side blocks...")
        side_time_blocks = apply_imdct_transform_to_blocks(side_mdct_matrix, block_size_N)

        logger.info(f"{function_tag} Reconstructing Mid signal via overlap-add...")
        mid_signal = reconstruct_signal_from_blocks(mid_time_blocks, block_size_N, total_samples)

        logger.info(f"{function_tag} Reconstructing Side signal via overlap-add...")
        side_signal = reconstruct_signal_from_blocks(side_time_blocks, block_size_N, total_samples)

        logger.info(f"{function_tag} Converting Mid/Side back to Left/Right...")

        # Correct reconstruction (float domain)
        left_signal = mid_signal + side_signal
        right_signal = mid_signal - side_signal

        # Prevent clipping, preserve waveform
        left_signal = np.clip(left_signal, -1.0, 1.0).astype(np.float32, copy=False)
        right_signal = np.clip(right_signal, -1.0, 1.0).astype(np.float32, copy=False)

        logger.info(f"{function_tag} Decompression completed successfully.\n")
        return left_signal, right_signal, sample_rate

    except Exception as error:
        logger.critical(f"{function_tag} Unhandled error in sound_decompression: {error}\n")
        raise