Skip to content
Merged

Patch #2304

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 157 additions & 6 deletions av/video/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,18 +1459,20 @@ def from_dlpack(
if not isinstance(planes, (tuple, list)):
planes = (planes,)

if len(planes) != 2:
raise ValueError(
"from_dlpack currently supports 2-plane formats only (nv12/p010le/p016le)"
)

sw_fmt: lib.AVPixelFormat = get_pix_fmt(format)
nv12 = get_pix_fmt(b"nv12")
p010le = get_pix_fmt(b"p010le")
p016le = get_pix_fmt(b"p016le")

if sw_fmt not in (nv12, p010le, p016le):
raise NotImplementedError("from_dlpack supports nv12, p010le, p016le only")
return VideoFrame._from_dlpack_planar(
planes, sw_fmt, format, width, height, stream, device_id
)

if len(planes) != 2:
raise ValueError(
"from_dlpack currently supports 2-plane formats only (nv12/p010le/p016le)"
)

expected_bits = 8 if sw_fmt == nv12 else 16
itemsize = 1 if expected_bits == 8 else 2
Expand Down Expand Up @@ -1656,3 +1658,152 @@ def from_dlpack(
if m1 != cython.NULL:
m1.deleter(m1)
raise

@staticmethod
def _from_dlpack_planar(planes, sw_fmt, format, width, height, stream, device_id):
# CPU-only import for planar formats whose planes each hold a single
# component: yuv420p/yuv422p/yuv444p, gray, gbrp, and their 16-bit
# little-endian variants. nv12/p010le/p016le keep their dedicated
# 2-plane path in from_dlpack().
desc: cython.pointer[cython.const[lib.AVPixFmtDescriptor]] = (
lib.av_pix_fmt_desc_get(sw_fmt)
)
if desc == cython.NULL:
raise NotImplementedError(f"unknown pixel format {format!r}")
if desc.flags & (
lib.AV_PIX_FMT_FLAG_BITSTREAM
| lib.AV_PIX_FMT_FLAG_PAL
| lib.AV_PIX_FMT_FLAG_BAYER
):
raise NotImplementedError(
f"from_dlpack does not support bitstream, palette, or Bayer "
f"formats ({format!r})"
)

i: cython.int
nb_planes: cython.int = 0
for i in range(desc.nb_components):
if cython.cast(cython.int, desc.comp[i].plane) + 1 > nb_planes:
nb_planes = desc.comp[i].plane + 1

p: cython.int
count: cython.int
comp_of_plane = [0] * nb_planes
for p in range(nb_planes):
count = 0
for i in range(desc.nb_components):
if cython.cast(cython.int, desc.comp[i].plane) == p:
comp_of_plane[p] = i
count += 1
if count != 1:
raise NotImplementedError(
"from_dlpack supports nv12/p010le/p016le and planar "
f"single-component formats; {format!r} is not supported"
)

if len(planes) != nb_planes:
raise ValueError(
f"{format!r} requires {nb_planes} plane(s), got {len(planes)}"
)

itemsize: cython.int = desc.comp[0].step
if itemsize != 1 and itemsize != 2:
raise NotImplementedError(
"only 8- and 16-bit components are supported for DLPack import"
)
if itemsize == 2 and desc.flags & lib.AV_PIX_FMT_FLAG_BE:
raise NotImplementedError(
"big-endian formats are not supported for DLPack import"
)
expected_bits: cython.int = itemsize * 8

if device_id not in (None, 0):
raise ValueError("device_id must be 0 for CPU tensors")

log2_w: cython.int = desc.log2_chroma_w
log2_h: cython.int = desc.log2_chroma_h

frame: VideoFrame = None
m: cython.pointer[DLManagedTensor] = cython.NULL
try:
frame = alloc_video_frame()
frame.ptr.format = sw_fmt

for p in range(nb_planes):
m = _consume_dlpack(planes[p], stream)

if m.dl_tensor.device_type != kCPU:
raise NotImplementedError(
"only CPU DLPack tensors are supported for this format"
)
if m.dl_tensor.device_id != 0:
raise ValueError("CPU DLPack tensors must have device_id == 0")

if (
m.dl_tensor.dtype.code != 1
or m.dl_tensor.dtype.bits != expected_bits
or m.dl_tensor.dtype.lanes != 1
):
raise TypeError(f"unexpected dtype for plane {p}")

if m.dl_tensor.ndim != 2:
raise ValueError(f"plane {p} must be 2D (H, W)")

ph = cython.cast(int64_t, m.dl_tensor.shape[0])
pw = cython.cast(int64_t, m.dl_tensor.shape[1])

if p == 0:
if width == 0 and height == 0:
width = cython.cast(int, pw)
height = cython.cast(int, ph)
elif width == 0 or height == 0:
raise ValueError("either specify both width/height or neither")
elif pw != width or ph != height:
raise ValueError("plane 0 shape does not match width/height")
if (log2_w and width % 2) or (log2_h and height % 2):
raise ValueError(f"width/height must be even for {format!r}")
frame.ptr.width = width
frame.ptr.height = height

comp_idx = comp_of_plane[p]
is_chroma = (comp_idx == 1 or comp_idx == 2) and (log2_w or log2_h)
exp_w = (-((-width) >> log2_w)) if is_chroma else width
exp_h = (-((-height) >> log2_h)) if is_chroma else height

if pw != exp_w or ph != exp_h:
raise ValueError(f"plane {p} must have shape ({exp_h}, {exp_w})")

if m.dl_tensor.strides != cython.NULL:
if m.dl_tensor.strides[1] != 1:
raise ValueError(
f"plane {p} must be contiguous in the last dimension"
)
pitch_elems = cython.cast(int64_t, m.dl_tensor.strides[0])
else:
pitch_elems = cython.cast(int64_t, exp_w)

linesize = cython.cast(int, pitch_elems * itemsize)
size = cython.cast(int, linesize * exp_h)

ptr = cython.cast(
cython.pointer[uint8_t], m.dl_tensor.data
) + cython.cast(cython.size_t, m.dl_tensor.byte_offset)

frame.ptr.buf[p] = lib.av_buffer_create(
ptr, size, _dlpack_avbuffer_free, cython.cast(cython.p_void, m), 0
)
if frame.ptr.buf[p] == cython.NULL:
raise MemoryError(f"av_buffer_create failed for plane {p}")
frame.ptr.data[p] = ptr
frame.ptr.linesize[p] = linesize
m = cython.NULL

frame._init_user_attributes()
return frame

except Exception:
if frame is not None:
lib.av_frame_unref(frame.ptr)
if m != cython.NULL:
m.deleter(m)
raise
70 changes: 69 additions & 1 deletion av/video/plane.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,76 @@ def __dlpack__(self, *, stream: int | None = None):
st2 = 1
else:
raise ValueError("invalid plane index for P010/P016")
else:
elif device_type != kCPU:
raise NotImplementedError("unsupported sw_format for DLPack export")
else:
# Generic CPU export. Describe the plane straight from its
# pixel-format descriptor: planes holding a single component (the
# Y/U/V planes of planar YUV, gray, ...) become 2D (H, W), while
# planes that interleave several components (packed RGB, the chroma
# plane of NV12, ...) become 3D (H, W, C).
desc: cython.pointer[cython.const[lib.AVPixFmtDescriptor]] = (
lib.av_pix_fmt_desc_get(sw_fmt)
)
if desc == cython.NULL:
raise NotImplementedError("unknown pixel format for DLPack export")
if desc.flags & (
lib.AV_PIX_FMT_FLAG_BITSTREAM
| lib.AV_PIX_FMT_FLAG_PAL
| lib.AV_PIX_FMT_FLAG_BAYER
):
raise NotImplementedError(
"bitstream, palette, and Bayer formats are not supported for "
"DLPack export"
)

step_bytes: cython.int = 0
ncomp: cython.int = 0
i: cython.int
for i in range(desc.nb_components):
if desc.comp[i].plane != self.index:
continue
if ncomp == 0:
step_bytes = desc.comp[i].step
elif cython.cast(cython.int, desc.comp[i].step) != step_bytes:
raise NotImplementedError(
"mixed component step is not supported for DLPack export"
)
ncomp += 1

if ncomp == 0:
raise ValueError(f"plane {self.index} has no components")
if step_bytes % ncomp:
raise NotImplementedError(
"unsupported component packing for DLPack export"
)
itemsize = step_bytes // ncomp
if itemsize != 1 and itemsize != 2:
raise NotImplementedError(
"only 8- and 16-bit components are supported for DLPack export"
)
if itemsize == 2 and desc.flags & lib.AV_PIX_FMT_FLAG_BE:
raise NotImplementedError(
"big-endian formats are not supported for DLPack export"
)
bits = itemsize * 8
if line_size % itemsize:
raise ValueError("linesize is not aligned to dtype")

if ncomp == 1:
ndim = 2
s0 = self.height
s1 = self.width
st0 = line_size // itemsize
st1 = 1
else:
ndim = 3
s0 = self.height
s1 = self.width
s2 = ncomp
st0 = line_size // itemsize
st1 = ncomp
st2 = 1

frame_ref: cython.pointer[lib.AVFrame] = lib.av_frame_alloc()
if frame_ref == cython.NULL:
Expand Down
Loading
Loading