diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 067947b31..17c24fded 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -67,7 +67,6 @@ jobs: cmake \ libjpeg-dev \ libtool \ - libvorbis-dev \ libx264-dev \ nasm \ pkg-config \ @@ -77,7 +76,7 @@ jobs: fi ;; macos-14) - brew install automake libtool libpng libvorbis libvpx opus x264 + brew install automake libtool libpng libvpx opus x264 ;; esac @@ -88,6 +87,7 @@ jobs: - name: Build run: | + [ "${{ runner.os }}" = "macOS" ] && export ARCHFLAGS="-arch $(uname -m)" . scripts/activate.sh ffmpeg-${{ matrix.config.ffmpeg }} scripts/build diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 67e265cf6..3efcf11f0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -42,7 +42,7 @@ Features: Fixes: -- Nothing (yet) +- Fix ``add_mux_stream`` producing unwritable Matroska files by extracting codec extradata from the bitstream before the header is written by :gh-user:`WyattBlue` (:issue:`2198`). v17.1.0 ------- diff --git a/av/container/core.pxd b/av/container/core.pxd index 7cba26407..badd84abb 100644 --- a/av/container/core.pxd +++ b/av/container/core.pxd @@ -37,7 +37,7 @@ cdef class Container: cdef readonly dict metadata # Private API. - cdef uint8_t _myflag # enum: writeable, input_was_opened, started, done + cdef uint8_t _myflag # enum: writeable, input_was_opened, started, done, extradata_planned cdef _assert_open(self) cdef int err_check(self, int value) except -1 diff --git a/av/container/output.pxd b/av/container/output.pxd index 71edc3bf2..2ef93206f 100644 --- a/av/container/output.pxd +++ b/av/container/output.pxd @@ -1,9 +1,15 @@ cimport libav as lib from av.container.core cimport Container +from av.packet cimport Packet from av.stream cimport Stream cdef class OutputContainer(Container): cdef lib.AVPacket *packet_ptr + cdef dict _extradata_bsfs + cdef list _buffered_packets + cdef _mux_one(self, Packet packet) + cdef _buffer_for_extradata(self, Packet packet) + cdef _try_extract_extradata(self, Packet packet) cpdef start_encoding(self) diff --git a/av/container/output.py b/av/container/output.py index 1b8f846e9..8c56ef080 100644 --- a/av/container/output.py +++ b/av/container/output.py @@ -2,6 +2,7 @@ import cython from cython.cimports import libav as lib +from cython.cimports.av.bitstream import BitStreamFilterContext from cython.cimports.av.codec.codec import Codec from cython.cimports.av.codec.context import CodecContext, wrap_codec_context from cython.cimports.av.container.streams import StreamContainer @@ -10,10 +11,39 @@ from cython.cimports.av.packet import Packet from cython.cimports.av.stream import Stream, wrap_stream from cython.cimports.av.utils import dict_to_avdict, to_avrational +from cython.cimports.libc.stdint import uint8_t +from cython.cimports.libc.string import memcpy, memset + + +@cython.cfunc +def _set_codecpar_extradata( + stream: cython.pointer[lib.AVStream], + data: cython.pointer[uint8_t], + size: cython.int, +): + buf: cython.p_uchar = cython.cast( + cython.p_uchar, lib.av_malloc(size + lib.AV_INPUT_BUFFER_PADDING_SIZE) + ) + if buf == cython.NULL: + raise MemoryError("Could not allocate extradata") + + memcpy(buf, data, size) + memset(buf + size, 0, lib.AV_INPUT_BUFFER_PADDING_SIZE) + + lib.av_freep(cython.address(stream.codecpar.extradata)) + stream.codecpar.extradata = buf + stream.codecpar.extradata_size = size @cython.cfunc def close_output(self: OutputContainer): + if self.packet_ptr != cython.NULL and self._buffered_packets: + buffered: list = self._buffered_packets + self._buffered_packets = [] + packet: Packet + for packet in buffered: + self._mux_one(packet) + self.streams = StreamContainer() if self._myflag & 12 == 4: # enum.started and not enum.done # If the underlying Python IO file was already closed (e.g. during GC @@ -38,6 +68,8 @@ class OutputContainer(Container): def __cinit__(self, *args, **kwargs): self.streams = StreamContainer() self.metadata = {} + self._extradata_bsfs = {} + self._buffered_packets = [] with cython.nogil: self.packet_ptr = lib.av_packet_alloc() @@ -558,6 +590,13 @@ def mux(self, packets): self.mux_one(packet) def mux_one(self, packet: Packet): + if not (self._myflag & 4) and self._buffer_for_extradata(packet): + return + + self._mux_one(packet) + + @cython.cfunc + def _mux_one(self, packet: Packet): self.start_encoding() # Assert the packet is in stream time. @@ -577,3 +616,86 @@ def mux_one(self, packet: Packet): with cython.nogil: ret: cython.int = lib.av_interleaved_write_frame(self.ptr, self.packet_ptr) self.err_check(ret) + + @cython.cfunc + def _buffer_for_extradata(self, packet: Packet): + """Buffer ``packet`` until extradata is known for all mux streams that + need it. Returns True if the packet was buffered (caller should stop).""" + if not (self._myflag & 16): # extradata_planned + self._myflag |= 16 + if self.ptr.oformat.flags & lib.AVFMT_GLOBALHEADER: + stream: Stream + for stream in self.streams: + if ( + stream.codec_context is not None + or stream.ptr.codecpar.extradata != cython.NULL + ): + continue + try: + bsf = BitStreamFilterContext( + "extract_extradata", in_stream=stream + ) + except Exception: + continue # Codec does not support extradata extraction. + self._extradata_bsfs[stream.ptr.index] = bsf + + if not self._extradata_bsfs: + return False # Nothing to wait for; mux normally. + + self._try_extract_extradata(packet) + self._buffered_packets.append(packet) + if self._extradata_bsfs: + return True # Still waiting on some stream's extradata. + + # All extradata is resolved: write the header and flush buffered packets. + buffered: list = self._buffered_packets + self._buffered_packets = [] + buffered_packet: Packet + for buffered_packet in buffered: + self._mux_one(buffered_packet) + return True + + @cython.cfunc + def _try_extract_extradata(self, packet: Packet): + idx: cython.int = packet.ptr.stream_index + if idx not in self._extradata_bsfs: + return + + bsf_wrapper: BitStreamFilterContext = self._extradata_bsfs[idx] + bsf: cython.pointer[lib.AVBSFContext] = bsf_wrapper.ptr + + tmp: cython.pointer[lib.AVPacket] = lib.av_packet_alloc() + if tmp == cython.NULL: + raise MemoryError("Could not allocate packet") + + size: cython.size_t = 0 + sd: cython.pointer[uint8_t] + try: + # Clone the packet so the filter does not consume the caller's data. + if lib.av_packet_ref(tmp, packet.ptr) < 0: + return + + if lib.av_bsf_send_packet(bsf, tmp) < 0: + lib.av_packet_unref(tmp) # send failed; we still own the ref + return + + # The filter rejects packets that are already length-prefixed rather + # than annex-b, returning an error here; treat that and EOF/EAGAIN + # alike as "no in-band extradata". + while lib.av_bsf_receive_packet(bsf, tmp) == 0: + sd = lib.av_packet_get_side_data( + tmp, lib.AV_PKT_DATA_NEW_EXTRADATA, cython.address(size) + ) + if sd != cython.NULL and size > 0: + _set_codecpar_extradata( + self.ptr.streams[idx], sd, cython.cast(cython.int, size) + ) + lib.av_packet_unref(tmp) + break + lib.av_packet_unref(tmp) + finally: + lib.av_packet_free(cython.address(tmp)) + # A stream's first packet is the only reliable place to find in-band + # parameter sets, so stop waiting on this stream regardless of the + # result, falling back to the muxer's default behavior. + del self._extradata_bsfs[idx] diff --git a/include/avcodec.pxd b/include/avcodec.pxd index 5d13abed1..534f1ae1b 100644 --- a/include/avcodec.pxd +++ b/include/avcodec.pxd @@ -298,6 +298,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: cdef char* avcodec_get_name(AVCodecID id) cdef int avcodec_open2(AVCodecContext *ctx, const AVCodec *codec, AVDictionary **options) cdef enum AVPacketSideDataType: + AV_PKT_DATA_NEW_EXTRADATA AV_PKT_DATA_DISPLAYMATRIX cdef struct AVPacketSideData: uint8_t *data diff --git a/scripts/build-deps b/scripts/build-deps index d89b1e406..ee90471c1 100755 --- a/scripts/build-deps +++ b/scripts/build-deps @@ -67,7 +67,7 @@ echo ./configure --enable-debug=3 \ --enable-libx264 \ --disable-bsfs \ - --enable-bsf=chomp,h264_mp4toannexb,setts \ + --enable-bsf=chomp,extract_extradata,h264_mp4toannexb,setts \ --enable-sse \ --enable-avx \ --enable-avx2 \ diff --git a/tests/test_remux.py b/tests/test_remux.py index 85f00563b..c5045e979 100644 --- a/tests/test_remux.py +++ b/tests/test_remux.py @@ -1,4 +1,5 @@ import io +from fractions import Fraction import numpy as np import pytest @@ -87,6 +88,47 @@ def test_add_mux_stream_no_codec_context() -> None: assert "audio/" in repr(audio_stream) +def test_add_mux_stream_matroska_extradata() -> None: + """Regression test for #2198. + + Muxing pre-encoded H.264 packets into Matroska used to fail in + ``avformat_write_header`` because the muxer needs ``CodecPrivate`` (the codec + extradata) before the first packet is written. When the packets are annex-b + with in-band parameter sets but the stream has no extradata, + ``add_mux_stream`` extracts it from the bitstream so the file is written and + stays decodable. + """ + if av.codec.Codec("h264", "w").name != "libx264": + pytest.skip("requires libx264") + + # Encode without a global header, so the packets carry annex-b parameter + # sets in-band and the codec context exposes no extradata to copy. + cc = av.CodecContext.create("libx264", "w") + cc.width, cc.height, cc.pix_fmt = 320, 240, "yuv420p" + cc.time_base = Fraction(1, 24) + cc.framerate = Fraction(24, 1) + + packets = [] + for i in range(24): + frame = av.VideoFrame.from_ndarray( + np.zeros((240, 320, 3), dtype="uint8"), format="rgb24" + ) + frame.pts = i + frame.time_base = Fraction(1, 24) + packets.extend(cc.encode(frame)) + packets.extend(cc.encode(None)) + assert cc.extradata is None # nothing to copy onto the stream + + buf = io.BytesIO() + with av.open(buf, "w", format="matroska") as output: + out_stream = output.add_mux_stream("libx264", rate=24, width=320, height=240) + for packet in packets: + packet.stream = out_stream + output.mux(packet) + + assert _decoded_frame_count(buf) == 24 + + def test_add_stream_from_template_copies_time_base() -> None: """add_stream_from_template must propagate the source stream's time_base.