From d91e30e034132ce5399e344901786878c8ca193d Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Sat, 13 Jun 2026 22:14:29 -0400 Subject: [PATCH 1/2] Extract codec extradata in add_mux_stream for Matroska Muxing packets into Matroska failed in avformat_write_header because the muxer needs CodecPrivate (the codec extradata) before the first packet. For codec-less mux streams whose format embeds a global header, buffer packets and run the extract_extradata bitstream filter to recover the parameter sets from the bitstream, then set them on codecpar before the header is written. Streams give up after their first packet, so already length-prefixed packets fall back to the muxer's default behavior with no regression. closes #2198 --- CHANGELOG.rst | 2 +- av/container/core.pxd | 2 +- av/container/output.pxd | 6 ++ av/container/output.py | 122 ++++++++++++++++++++++++++++++++++++++++ include/avcodec.pxd | 1 + scripts/build-deps | 2 +- tests/test_remux.py | 42 ++++++++++++++ 7 files changed, 174 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 67e265cf6..3efcf11f0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -42,7 +42,7 @@ Features: Fixes: -- Nothing (yet) +- Fix ``add_mux_stream`` producing unwritable Matroska files by extracting codec extradata from the bitstream before the header is written by :gh-user:`WyattBlue` (:issue:`2198`). v17.1.0 ------- diff --git a/av/container/core.pxd b/av/container/core.pxd index 7cba26407..badd84abb 100644 --- a/av/container/core.pxd +++ b/av/container/core.pxd @@ -37,7 +37,7 @@ cdef class Container: cdef readonly dict metadata # Private API. - cdef uint8_t _myflag # enum: writeable, input_was_opened, started, done + cdef uint8_t _myflag # enum: writeable, input_was_opened, started, done, extradata_planned cdef _assert_open(self) cdef int err_check(self, int value) except -1 diff --git a/av/container/output.pxd b/av/container/output.pxd index 71edc3bf2..2ef93206f 100644 --- a/av/container/output.pxd +++ b/av/container/output.pxd @@ -1,9 +1,15 @@ cimport libav as lib from av.container.core cimport Container +from av.packet cimport Packet from av.stream cimport Stream cdef class OutputContainer(Container): cdef lib.AVPacket *packet_ptr + cdef dict _extradata_bsfs + cdef list _buffered_packets + cdef _mux_one(self, Packet packet) + cdef _buffer_for_extradata(self, Packet packet) + cdef _try_extract_extradata(self, Packet packet) cpdef start_encoding(self) diff --git a/av/container/output.py b/av/container/output.py index 1b8f846e9..8c56ef080 100644 --- a/av/container/output.py +++ b/av/container/output.py @@ -2,6 +2,7 @@ import cython from cython.cimports import libav as lib +from cython.cimports.av.bitstream import BitStreamFilterContext from cython.cimports.av.codec.codec import Codec from cython.cimports.av.codec.context import CodecContext, wrap_codec_context from cython.cimports.av.container.streams import StreamContainer @@ -10,10 +11,39 @@ from cython.cimports.av.packet import Packet from cython.cimports.av.stream import Stream, wrap_stream from cython.cimports.av.utils import dict_to_avdict, to_avrational +from cython.cimports.libc.stdint import uint8_t +from cython.cimports.libc.string import memcpy, memset + + +@cython.cfunc +def _set_codecpar_extradata( + stream: cython.pointer[lib.AVStream], + data: cython.pointer[uint8_t], + size: cython.int, +): + buf: cython.p_uchar = cython.cast( + cython.p_uchar, lib.av_malloc(size + lib.AV_INPUT_BUFFER_PADDING_SIZE) + ) + if buf == cython.NULL: + raise MemoryError("Could not allocate extradata") + + memcpy(buf, data, size) + memset(buf + size, 0, lib.AV_INPUT_BUFFER_PADDING_SIZE) + + lib.av_freep(cython.address(stream.codecpar.extradata)) + stream.codecpar.extradata = buf + stream.codecpar.extradata_size = size @cython.cfunc def close_output(self: OutputContainer): + if self.packet_ptr != cython.NULL and self._buffered_packets: + buffered: list = self._buffered_packets + self._buffered_packets = [] + packet: Packet + for packet in buffered: + self._mux_one(packet) + self.streams = StreamContainer() if self._myflag & 12 == 4: # enum.started and not enum.done # If the underlying Python IO file was already closed (e.g. during GC @@ -38,6 +68,8 @@ class OutputContainer(Container): def __cinit__(self, *args, **kwargs): self.streams = StreamContainer() self.metadata = {} + self._extradata_bsfs = {} + self._buffered_packets = [] with cython.nogil: self.packet_ptr = lib.av_packet_alloc() @@ -558,6 +590,13 @@ def mux(self, packets): self.mux_one(packet) def mux_one(self, packet: Packet): + if not (self._myflag & 4) and self._buffer_for_extradata(packet): + return + + self._mux_one(packet) + + @cython.cfunc + def _mux_one(self, packet: Packet): self.start_encoding() # Assert the packet is in stream time. @@ -577,3 +616,86 @@ def mux_one(self, packet: Packet): with cython.nogil: ret: cython.int = lib.av_interleaved_write_frame(self.ptr, self.packet_ptr) self.err_check(ret) + + @cython.cfunc + def _buffer_for_extradata(self, packet: Packet): + """Buffer ``packet`` until extradata is known for all mux streams that + need it. Returns True if the packet was buffered (caller should stop).""" + if not (self._myflag & 16): # extradata_planned + self._myflag |= 16 + if self.ptr.oformat.flags & lib.AVFMT_GLOBALHEADER: + stream: Stream + for stream in self.streams: + if ( + stream.codec_context is not None + or stream.ptr.codecpar.extradata != cython.NULL + ): + continue + try: + bsf = BitStreamFilterContext( + "extract_extradata", in_stream=stream + ) + except Exception: + continue # Codec does not support extradata extraction. + self._extradata_bsfs[stream.ptr.index] = bsf + + if not self._extradata_bsfs: + return False # Nothing to wait for; mux normally. + + self._try_extract_extradata(packet) + self._buffered_packets.append(packet) + if self._extradata_bsfs: + return True # Still waiting on some stream's extradata. + + # All extradata is resolved: write the header and flush buffered packets. + buffered: list = self._buffered_packets + self._buffered_packets = [] + buffered_packet: Packet + for buffered_packet in buffered: + self._mux_one(buffered_packet) + return True + + @cython.cfunc + def _try_extract_extradata(self, packet: Packet): + idx: cython.int = packet.ptr.stream_index + if idx not in self._extradata_bsfs: + return + + bsf_wrapper: BitStreamFilterContext = self._extradata_bsfs[idx] + bsf: cython.pointer[lib.AVBSFContext] = bsf_wrapper.ptr + + tmp: cython.pointer[lib.AVPacket] = lib.av_packet_alloc() + if tmp == cython.NULL: + raise MemoryError("Could not allocate packet") + + size: cython.size_t = 0 + sd: cython.pointer[uint8_t] + try: + # Clone the packet so the filter does not consume the caller's data. + if lib.av_packet_ref(tmp, packet.ptr) < 0: + return + + if lib.av_bsf_send_packet(bsf, tmp) < 0: + lib.av_packet_unref(tmp) # send failed; we still own the ref + return + + # The filter rejects packets that are already length-prefixed rather + # than annex-b, returning an error here; treat that and EOF/EAGAIN + # alike as "no in-band extradata". + while lib.av_bsf_receive_packet(bsf, tmp) == 0: + sd = lib.av_packet_get_side_data( + tmp, lib.AV_PKT_DATA_NEW_EXTRADATA, cython.address(size) + ) + if sd != cython.NULL and size > 0: + _set_codecpar_extradata( + self.ptr.streams[idx], sd, cython.cast(cython.int, size) + ) + lib.av_packet_unref(tmp) + break + lib.av_packet_unref(tmp) + finally: + lib.av_packet_free(cython.address(tmp)) + # A stream's first packet is the only reliable place to find in-band + # parameter sets, so stop waiting on this stream regardless of the + # result, falling back to the muxer's default behavior. + del self._extradata_bsfs[idx] diff --git a/include/avcodec.pxd b/include/avcodec.pxd index 5d13abed1..534f1ae1b 100644 --- a/include/avcodec.pxd +++ b/include/avcodec.pxd @@ -298,6 +298,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: cdef char* avcodec_get_name(AVCodecID id) cdef int avcodec_open2(AVCodecContext *ctx, const AVCodec *codec, AVDictionary **options) cdef enum AVPacketSideDataType: + AV_PKT_DATA_NEW_EXTRADATA AV_PKT_DATA_DISPLAYMATRIX cdef struct AVPacketSideData: uint8_t *data diff --git a/scripts/build-deps b/scripts/build-deps index d89b1e406..ee90471c1 100755 --- a/scripts/build-deps +++ b/scripts/build-deps @@ -67,7 +67,7 @@ echo ./configure --enable-debug=3 \ --enable-libx264 \ --disable-bsfs \ - --enable-bsf=chomp,h264_mp4toannexb,setts \ + --enable-bsf=chomp,extract_extradata,h264_mp4toannexb,setts \ --enable-sse \ --enable-avx \ --enable-avx2 \ diff --git a/tests/test_remux.py b/tests/test_remux.py index 85f00563b..c5045e979 100644 --- a/tests/test_remux.py +++ b/tests/test_remux.py @@ -1,4 +1,5 @@ import io +from fractions import Fraction import numpy as np import pytest @@ -87,6 +88,47 @@ def test_add_mux_stream_no_codec_context() -> None: assert "audio/" in repr(audio_stream) +def test_add_mux_stream_matroska_extradata() -> None: + """Regression test for #2198. + + Muxing pre-encoded H.264 packets into Matroska used to fail in + ``avformat_write_header`` because the muxer needs ``CodecPrivate`` (the codec + extradata) before the first packet is written. When the packets are annex-b + with in-band parameter sets but the stream has no extradata, + ``add_mux_stream`` extracts it from the bitstream so the file is written and + stays decodable. + """ + if av.codec.Codec("h264", "w").name != "libx264": + pytest.skip("requires libx264") + + # Encode without a global header, so the packets carry annex-b parameter + # sets in-band and the codec context exposes no extradata to copy. + cc = av.CodecContext.create("libx264", "w") + cc.width, cc.height, cc.pix_fmt = 320, 240, "yuv420p" + cc.time_base = Fraction(1, 24) + cc.framerate = Fraction(24, 1) + + packets = [] + for i in range(24): + frame = av.VideoFrame.from_ndarray( + np.zeros((240, 320, 3), dtype="uint8"), format="rgb24" + ) + frame.pts = i + frame.time_base = Fraction(1, 24) + packets.extend(cc.encode(frame)) + packets.extend(cc.encode(None)) + assert cc.extradata is None # nothing to copy onto the stream + + buf = io.BytesIO() + with av.open(buf, "w", format="matroska") as output: + out_stream = output.add_mux_stream("libx264", rate=24, width=320, height=240) + for packet in packets: + packet.stream = out_stream + output.mux(packet) + + assert _decoded_frame_count(buf) == 24 + + def test_add_stream_from_template_copies_time_base() -> None: """add_stream_from_template must propagate the source stream's time_base. From b0be14e03c04780eb8e0b67149d992d9ae3a9586 Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Sat, 13 Jun 2026 23:01:27 -0400 Subject: [PATCH 2/2] Build native-arch extension in macOS smoke job actions/setup-python provides the python.org universal2 CPython, so setuptools compiled the extensions for arm64 and x86_64. Also stop installing libvorbis. --- .github/workflows/smoke.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 067947b31..17c24fded 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -67,7 +67,6 @@ jobs: cmake \ libjpeg-dev \ libtool \ - libvorbis-dev \ libx264-dev \ nasm \ pkg-config \ @@ -77,7 +76,7 @@ jobs: fi ;; macos-14) - brew install automake libtool libpng libvorbis libvpx opus x264 + brew install automake libtool libpng libvpx opus x264 ;; esac @@ -88,6 +87,7 @@ jobs: - name: Build run: | + [ "${{ runner.os }}" = "macOS" ] && export ARCHFLAGS="-arch $(uname -m)" . scripts/activate.sh ffmpeg-${{ matrix.config.ffmpeg }} scripts/build