From 8769d13fccebc825d576ddb11fa625d48eb20ec8 Mon Sep 17 00:00:00 2001 From: Benedikt Lorch Date: Tue, 6 Jan 2026 10:45:05 +0100 Subject: [PATCH 1/6] Wrap VideoEncParams and VideoBlockParams --- av/sidedata/encparams.pxd | 11 ++ av/sidedata/encparams.pyi | 27 ++++ av/sidedata/encparams.pyx | 165 +++++++++++++++++++++++++ av/sidedata/sidedata.pyx | 3 + include/libav.pxd | 1 + include/libavutil/video_enc_params.pxd | 25 ++++ 6 files changed, 232 insertions(+) create mode 100644 av/sidedata/encparams.pxd create mode 100644 av/sidedata/encparams.pyi create mode 100644 av/sidedata/encparams.pyx create mode 100644 include/libavutil/video_enc_params.pxd diff --git a/av/sidedata/encparams.pxd b/av/sidedata/encparams.pxd new file mode 100644 index 000000000..031b59a42 --- /dev/null +++ b/av/sidedata/encparams.pxd @@ -0,0 +1,11 @@ +cimport libav as lib + +from av.sidedata.sidedata cimport SideData + + +cdef class VideoEncParams(SideData): + pass + + +cdef class VideoBlockParams: + cdef lib.AVVideoBlockParams *ptr diff --git a/av/sidedata/encparams.pyi b/av/sidedata/encparams.pyi new file mode 100644 index 000000000..40762a66c --- /dev/null +++ b/av/sidedata/encparams.pyi @@ -0,0 +1,27 @@ +from enum import IntEnum +from typing import cast + +import numpy as np + +class VideoEncParamsType(IntEnum): + NONE = cast(int, ...) + VP9 = cast(int, ...) + H264 = cast(int, ...) + MPEG2 = cast(int, ...) + +class VideoEncParams: + nb_blocks: int + blocks_offset: int + block_size: int + codec_type: VideoEncParamsType + qp: int + delta_qp: int + def block_params(self, idx: int) -> VideoBlockParams: ... + def qp_map(self) -> np.ndarray[int, int]: ... + +class VideoBlockParams: + src_x: int + src_y: int + w: int + h: int + delta_qp: int diff --git a/av/sidedata/encparams.pyx b/av/sidedata/encparams.pyx new file mode 100644 index 000000000..984ca4960 --- /dev/null +++ b/av/sidedata/encparams.pyx @@ -0,0 +1,165 @@ +cimport libav as lib +from libc.stdint cimport uint8_t, int32_t + +from enum import IntEnum +import numpy as np + + +VideoEncParamsType = IntEnum( + "AVVideoEncParamsType", + { + "NONE": lib.AV_VIDEO_ENC_PARAMS_NONE, + "VP9": lib.AV_VIDEO_ENC_PARAMS_VP9, + "H264": lib.AV_VIDEO_ENC_PARAMS_H264, + "MPEG2": lib.AV_VIDEO_ENC_PARAMS_MPEG2, + }, +) + +cdef class VideoEncParams(SideData): + def __repr__(self): + return f"" + + @property + def nb_blocks(self): + """ + Number of blocks in the array + May be 0, in which case no per-block information is present. In this case + the values of blocks_offset / block_size are unspecified and should not + be accessed. + """ + return ( self.ptr.data).nb_blocks + + @property + def blocks_offset(self): + """ + Offset in bytes from the beginning of this structure at which the array of blocks starts. + """ + return ( self.ptr.data).blocks_offset + + @property + def block_size(self): + """ + Size of each block in bytes. May not match sizeof(AVVideoBlockParams). + """ + return ( self.ptr.data).block_size + + @property + def codec_type(self): + """ + Type of the parameters (the codec they are used with). + """ + cdef lib.AVVideoEncParamsType t = ( self.ptr.data).type + return VideoEncParamsType(t) + + @property + def qp(self): + """ + Base quantisation parameter for the frame. The final quantiser for a + given block in a given plane is obtained from this value, possibly + combined with `delta_qp` and the per-block delta in a manner + documented for each type. + """ + return ( self.ptr.data).qp + + @property + def delta_qp(self): + """ + Quantisation parameter offset from the base (per-frame) qp for a given + plane (first index) and AC/DC coefficients (second index). + """ + cdef lib.AVVideoEncParams *p = self.ptr.data + return [[p.delta_qp[i][j] for j in range(2)] for i in range(4)] + + def block_params(self, idx): + """ + Get the encoding parameters for a given block + """ + # Validate given index + if idx < 0 or idx >= self.nb_blocks: + raise ValueError("Expected idx in range [0, nb_blocks)") + + return VideoBlockParams(self, idx) + + def qp_map(self): + """ + Convenience method that creates a 2-D map with the quantization parameters per macroblock. + Only for MPEG2 and H264 encoded videos. + """ + cdef int mb_h = (self.frame.ptr.height + 15) // 16 + cdef int mb_w = (self.frame.ptr.width + 15) // 16 + cdef int nb_mb = mb_h * mb_w + cdef int block_idx + cdef int y + cdef int x + cdef VideoBlockParams block + + # Validate number of blocks + if self.nb_blocks != nb_mb: + raise RuntimeError("Expected frame size to match number of blocks in side data") + + # Validate type + cdef lib.AVVideoEncParamsType type = ( self.ptr.data).type + if type != lib.AVVideoEncParamsType.AV_VIDEO_ENC_PARAMS_MPEG2 and type != lib.AVVideoEncParamsType.AV_VIDEO_ENC_PARAMS_H264: + raise ValueError("Expected MPEG2 or H264") + + # Create a 2-D map with the number of macroblocks + cdef int32_t[:, ::1] map = np.empty((mb_h, mb_w), dtype=np.int32) + + # Fill map with quantization parameter per macroblock + for block_idx in range(nb_mb): + block = VideoBlockParams(self, block_idx) + y = block.src_y // 16 + x = block.src_x // 16 + map[y, x] = self.qp + block.delta_qp + + return np.asarray(map) + + +cdef class VideoBlockParams: + def __init__(self, VideoEncParams video_enc_params, int idx) -> None: + cdef uint8_t* base = video_enc_params.ptr.data + cdef Py_ssize_t offset = video_enc_params.blocks_offset + idx * video_enc_params.block_size + self.ptr = (base + offset) + + def __repr__(self): + return f"" + + @property + def src_x(self): + """ + Horizontal distance in luma pixels from the top-left corner of the visible frame + to the top-left corner of the block. + Can be negative if top/right padding is present on the coded frame. + """ + return self.ptr.src_x + + @property + def src_y(self): + """ + Vertical distance in luma pixels from the top-left corner of the visible frame + to the top-left corner of the block. + Can be negative if top/right padding is present on the coded frame. + """ + return self.ptr.src_y + + @property + def w(self): + """ + Width of the block in luma pixels + """ + return self.ptr.w + + @property + def h(self): + """ + Height of the block in luma pixels + """ + return self.ptr.h + + @property + def delta_qp(self): + """ + Difference between this block's final quantization parameter and the + corresponding per-frame value. + """ + return self.ptr.delta_qp diff --git a/av/sidedata/sidedata.pyx b/av/sidedata/sidedata.pyx index 65b1387f0..a8a209893 100644 --- a/av/sidedata/sidedata.pyx +++ b/av/sidedata/sidedata.pyx @@ -4,6 +4,7 @@ from collections.abc import Mapping from enum import Enum from av.sidedata.motionvectors import MotionVectors +from av.sidedata.encparams import VideoEncParams cdef object _cinit_bypass_sentinel = object() @@ -49,6 +50,8 @@ class Type(Enum): cdef SideData wrap_side_data(Frame frame, int index): if frame.ptr.side_data[index].type == lib.AV_FRAME_DATA_MOTION_VECTORS: return MotionVectors(_cinit_bypass_sentinel, frame, index) + elif frame.ptr.side_data[index].type == lib.AV_FRAME_DATA_VIDEO_ENC_PARAMS: + return VideoEncParams(_cinit_bypass_sentinel, frame, index) else: return SideData(_cinit_bypass_sentinel, frame, index) diff --git a/include/libav.pxd b/include/libav.pxd index 568913208..5e074a1b8 100644 --- a/include/libav.pxd +++ b/include/libav.pxd @@ -7,6 +7,7 @@ include "libavutil/frame.pxd" include "libavutil/hwcontext.pxd" include "libavutil/samplefmt.pxd" include "libavutil/motion_vector.pxd" +include "libavutil/video_enc_params.pxd" include "libavcodec/avcodec.pxd" include "libavcodec/bsf.pxd" diff --git a/include/libavutil/video_enc_params.pxd b/include/libavutil/video_enc_params.pxd new file mode 100644 index 000000000..aec452666 --- /dev/null +++ b/include/libavutil/video_enc_params.pxd @@ -0,0 +1,25 @@ +from libc.stdint cimport uint32_t, int32_t +from libc.stddef cimport size_t + + +cdef extern from "libavutil/video_enc_params.h" nogil: + cdef enum AVVideoEncParamsType: + AV_VIDEO_ENC_PARAMS_NONE + AV_VIDEO_ENC_PARAMS_VP9 + AV_VIDEO_ENC_PARAMS_H264 + AV_VIDEO_ENC_PARAMS_MPEG2 + + cdef struct AVVideoEncParams: + uint32_t nb_blocks + size_t blocks_offset + size_t block_size + AVVideoEncParamsType type + int32_t qp + int32_t delta_qp[4][2] + + cdef struct AVVideoBlockParams: + int32_t src_x + int32_t src_y + int32_t w + int32_t h + int32_t delta_qp \ No newline at end of file From 6c229feb5c9e1ea4d91cdc32d0639cf375d9393e Mon Sep 17 00:00:00 2001 From: Benedikt Lorch Date: Wed, 7 Jan 2026 11:15:02 +0100 Subject: [PATCH 2/6] Lazily import numpy inside the qp_map method, just like in the motion vectors' to_ndarray method --- av/sidedata/encparams.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/av/sidedata/encparams.pyx b/av/sidedata/encparams.pyx index 984ca4960..d89a79f05 100644 --- a/av/sidedata/encparams.pyx +++ b/av/sidedata/encparams.pyx @@ -2,7 +2,6 @@ cimport libav as lib from libc.stdint cimport uint8_t, int32_t from enum import IntEnum -import numpy as np VideoEncParamsType = IntEnum( @@ -85,6 +84,8 @@ cdef class VideoEncParams(SideData): Convenience method that creates a 2-D map with the quantization parameters per macroblock. Only for MPEG2 and H264 encoded videos. """ + import numpy as np + cdef int mb_h = (self.frame.ptr.height + 15) // 16 cdef int mb_w = (self.frame.ptr.width + 15) // 16 cdef int nb_mb = mb_h * mb_w From 3ec5ec72139f7387698f82230817ff9a38036541 Mon Sep 17 00:00:00 2001 From: Benedikt Lorch Date: Wed, 7 Jan 2026 11:42:25 +0100 Subject: [PATCH 3/6] Add two simple tests --- tests/test_decode.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_decode.py b/tests/test_decode.py index 7ad722bfd..2d64029f2 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -137,6 +137,29 @@ def test_decoded_motion_vectors_no_flag(self) -> None: if not frame.key_frame: assert vectors is None return + + def test_decoded_video_enc_params(self) -> None: + container = av.open(fate_suite("h264/interlaced_crop.mp4")) + stream = container.streams.video[0] + stream.codec_context.options = {"export_side_data": "venc_params"} + + for frame in container.decode(stream): + video_enc_params = frame.side_data.get(av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS) + assert video_enc_params is not None + assert video_enc_params.nb_blocks == 40 * 24 + + first_block = video_enc_params.block_params(0) + assert video_enc_params.qp + first_block.delta_qp == 29 + return + + def test_decoded_video_enc_params_no_flag(self) -> None: + container = av.open(fate_suite("h264/interlaced_crop.mp4")) + stream = container.streams.video[0] + # When no additional flag is given, there should be no side data with the video encoding params + + for frame in container.decode(stream): + video_enc_params = frame.side_data.get(av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS) + assert video_enc_params is None def test_decode_video_corrupt(self) -> None: # write an empty file From 841d82b2b6a3daefe400ad3e7c5a1e7e8e0969d8 Mon Sep 17 00:00:00 2001 From: Benedikt Lorch Date: Wed, 7 Jan 2026 11:47:53 +0100 Subject: [PATCH 4/6] Make ruff happy --- av/sidedata/encparams.pyx | 4 ++-- av/sidedata/sidedata.pyx | 2 +- tests/test_decode.py | 12 ++++++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/av/sidedata/encparams.pyx b/av/sidedata/encparams.pyx index d89a79f05..507dc4cdc 100644 --- a/av/sidedata/encparams.pyx +++ b/av/sidedata/encparams.pyx @@ -1,5 +1,5 @@ cimport libav as lib -from libc.stdint cimport uint8_t, int32_t +from libc.stdint cimport int32_t, uint8_t from enum import IntEnum @@ -85,7 +85,7 @@ cdef class VideoEncParams(SideData): Only for MPEG2 and H264 encoded videos. """ import numpy as np - + cdef int mb_h = (self.frame.ptr.height + 15) // 16 cdef int mb_w = (self.frame.ptr.width + 15) // 16 cdef int nb_mb = mb_h * mb_w diff --git a/av/sidedata/sidedata.pyx b/av/sidedata/sidedata.pyx index a8a209893..96c3d72f5 100644 --- a/av/sidedata/sidedata.pyx +++ b/av/sidedata/sidedata.pyx @@ -3,8 +3,8 @@ from libc.stdint cimport int32_t from collections.abc import Mapping from enum import Enum -from av.sidedata.motionvectors import MotionVectors from av.sidedata.encparams import VideoEncParams +from av.sidedata.motionvectors import MotionVectors cdef object _cinit_bypass_sentinel = object() diff --git a/tests/test_decode.py b/tests/test_decode.py index 2d64029f2..0c57d6cc2 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -137,28 +137,32 @@ def test_decoded_motion_vectors_no_flag(self) -> None: if not frame.key_frame: assert vectors is None return - + def test_decoded_video_enc_params(self) -> None: container = av.open(fate_suite("h264/interlaced_crop.mp4")) stream = container.streams.video[0] stream.codec_context.options = {"export_side_data": "venc_params"} for frame in container.decode(stream): - video_enc_params = frame.side_data.get(av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS) + video_enc_params = frame.side_data.get( + av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS + ) assert video_enc_params is not None assert video_enc_params.nb_blocks == 40 * 24 first_block = video_enc_params.block_params(0) assert video_enc_params.qp + first_block.delta_qp == 29 return - + def test_decoded_video_enc_params_no_flag(self) -> None: container = av.open(fate_suite("h264/interlaced_crop.mp4")) stream = container.streams.video[0] # When no additional flag is given, there should be no side data with the video encoding params for frame in container.decode(stream): - video_enc_params = frame.side_data.get(av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS) + video_enc_params = frame.side_data.get( + av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS + ) assert video_enc_params is None def test_decode_video_corrupt(self) -> None: From f3b6598469654b0bae7054de4373cb348a86e552 Mon Sep 17 00:00:00 2001 From: Benedikt Lorch Date: Wed, 7 Jan 2026 16:40:35 +0100 Subject: [PATCH 5/6] Remove an empty line that ruff dislikes --- av/sidedata/encparams.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/av/sidedata/encparams.pyx b/av/sidedata/encparams.pyx index 507dc4cdc..380dc53d4 100644 --- a/av/sidedata/encparams.pyx +++ b/av/sidedata/encparams.pyx @@ -3,7 +3,6 @@ from libc.stdint cimport int32_t, uint8_t from enum import IntEnum - VideoEncParamsType = IntEnum( "AVVideoEncParamsType", { From fdbff11bb869a5d5c024bfad8d847ecf78263b34 Mon Sep 17 00:00:00 2001 From: Benedikt Lorch Date: Wed, 7 Jan 2026 18:41:18 +0100 Subject: [PATCH 6/6] Make mypy pass --- av/sidedata/encparams.pyi | 4 ++-- tests/test_decode.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/av/sidedata/encparams.pyi b/av/sidedata/encparams.pyi index 40762a66c..de962ba51 100644 --- a/av/sidedata/encparams.pyi +++ b/av/sidedata/encparams.pyi @@ -1,5 +1,5 @@ from enum import IntEnum -from typing import cast +from typing import Any, cast import numpy as np @@ -17,7 +17,7 @@ class VideoEncParams: qp: int delta_qp: int def block_params(self, idx: int) -> VideoBlockParams: ... - def qp_map(self) -> np.ndarray[int, int]: ... + def qp_map(self) -> np.ndarray[Any, Any]: ... class VideoBlockParams: src_x: int diff --git a/tests/test_decode.py b/tests/test_decode.py index 0c57d6cc2..685b743f5 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -2,6 +2,7 @@ import os import pathlib from fractions import Fraction +from typing import cast import numpy as np import pytest @@ -144,8 +145,9 @@ def test_decoded_video_enc_params(self) -> None: stream.codec_context.options = {"export_side_data": "venc_params"} for frame in container.decode(stream): - video_enc_params = frame.side_data.get( - av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS + video_enc_params = cast( + av.sidedata.encparams.VideoEncParams, + frame.side_data.get("VIDEO_ENC_PARAMS"), ) assert video_enc_params is not None assert video_enc_params.nb_blocks == 40 * 24 @@ -160,9 +162,7 @@ def test_decoded_video_enc_params_no_flag(self) -> None: # When no additional flag is given, there should be no side data with the video encoding params for frame in container.decode(stream): - video_enc_params = frame.side_data.get( - av.sidedata.sidedata.Type.VIDEO_ENC_PARAMS - ) + video_enc_params = frame.side_data.get("VIDEO_ENC_PARAMS") assert video_enc_params is None def test_decode_video_corrupt(self) -> None: