From 3743bef4a575cfccf702ef436932b985435e02ca Mon Sep 17 00:00:00 2001 From: Oleg Okunevych Date: Fri, 5 Dec 2025 17:48:34 +0200 Subject: [PATCH] Detect AV1 keyframes logic. --- lib/ex_webrtc/rtp/av1.ex | 169 ++++++++++++++++++++++++ lib/ex_webrtc/rtp/munger.ex | 7 +- test/ex_webrtc/rtp/av1_test.exs | 223 ++++++++++++++++++++++++++++++++ 3 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 lib/ex_webrtc/rtp/av1.ex create mode 100644 test/ex_webrtc/rtp/av1_test.exs diff --git a/lib/ex_webrtc/rtp/av1.ex b/lib/ex_webrtc/rtp/av1.ex new file mode 100644 index 00000000..79edf07e --- /dev/null +++ b/lib/ex_webrtc/rtp/av1.ex @@ -0,0 +1,169 @@ +defmodule ExWebRTC.RTP.AV1 do + @moduledoc """ + Utilities for RTP packets carrying AV1 encoded payload. + """ + + alias ExRTP.Packet + alias ExWebRTC.RTP.AV1 + + @obu_frame 6 + @obu_sequence_header 1 + @obu_temporal_delimiter 2 + + @doc """ + Checks whether RTP payload contains an AV1 keyframe. + + According to the [AV1 RTP spec](https://aomediacodec.github.io/av1-rtp-spec/v1.0.0.html) §4.4, + the RTP aggregation header's N bit marks the start of a new coded video sequence (CVS). + A CVS must contain a sequence header and the first frame must be a KEY_FRAME as defined + by ISO/IEC 23094-1 §6.8: + - `show_existing_frame` = 0 (a new frame, not a reference reuse) + - `frame_type` = KEY_FRAME (0) + - `show_frame` = 1 (displayed frame) + + Some encoders repeat sequence headers in non-key frames, therefore the + presence of a sequence header alone is not considered sufficient for keyframe + detection. + """ + @spec keyframe?(Packet.t()) :: boolean() + def keyframe?(%Packet{payload: rtp_payload}) do + # Parse the AV1 RTP payload + # First check N bit (primary indicator per AV1 RTP spec) + # Then fall back to checking for sequence header or frame OBU content + case AV1.Payload.parse(rtp_payload) do + {:ok, av1_payload} -> + # N bit = 1 indicates new coded video sequence (keyframe with sequence header) + # Per AV1 RTP spec §4.4: + # - Z bit: first OBU is continuation from previous packet + # - Y bit: last OBU will continue in next packet + # - W bits: number of OBU elements (0=use length fields, 1-3=count) + # + # For keyframe detection: + # - If N=1, it's definitely a keyframe + # - If Z=0 (not a continuation), check for sequence header or KEY_FRAME + # - If Z=1 (continuation), we can't reliably detect keyframe from this packet + av1_payload.n == 1 or + (av1_payload.z == 0 and check_keyframe_in_payload(av1_payload.payload)) + + {:error, _reason} -> + false + end + end + + # Check keyframe using Z/Y bits for fragmentation (AV1 RTP spec compliant) + # Z=0 means this packet starts with the beginning of an OBU (not a continuation) + # W indicates OBU element count: 0=length-prefixed, 1-3=that many OBUs + defp check_keyframe_in_payload(obu_data) do + has_keyframe?(obu_data) + end + + # Scan through OBUs looking for sequence headers (for CVS resets) and keyframes + defp has_keyframe?(<<>>), do: false + + defp has_keyframe?(obu_data) do + case AV1.OBU.parse(obu_data) do + {:ok, obu, rest} -> + cond do + obu.type == @obu_sequence_header -> + has_keyframe?(rest) + + obu.type == @obu_frame -> + is_keyframe_frame_obu?(obu) or has_keyframe?(rest) + + obu.type == @obu_temporal_delimiter -> + has_keyframe?(rest) + + true -> + has_keyframe?(rest) + end + + {:error, _reason} -> + # Try partial frame header check as last resort + check_partial_frame_header(obu_data) + end + end + + defp is_keyframe_frame_obu?(%AV1.OBU{type: @obu_frame, payload: payload}) do + keyframe_frame_payload?(payload) + end + + defp is_keyframe_frame_obu?(_obu), do: false + + defp keyframe_frame_payload?(payload) do + case payload do + <<0::1, frame_type::2, 1::1, _rest::bitstring>> -> frame_type == 0 + _ -> false + end + end + + defp check_partial_frame_header(obu_data) do + obu_data + |> candidate_partial_obus() + |> Enum.any?(&keyframe_from_partial_obu?/1) + end + + defp candidate_partial_obus(obu_data) do + [obu_data | maybe_strip_length_prefix(obu_data)] + end + + defp maybe_strip_length_prefix(obu_data) do + case AV1.LEB128.read(obu_data) do + {:ok, leb_size, _value} when byte_size(obu_data) > leb_size -> + rest_size = byte_size(obu_data) - leb_size + [binary_part(obu_data, leb_size, rest_size)] + + _ -> + [] + end + rescue + ArgumentError -> + [] + end + + defp keyframe_from_partial_obu?(<<0::1, type::4, x::1, s::1, 0::1, rest::binary>>) do + if type == @obu_frame do + with {:ok, payload_with_metadata} <- drop_extension(rest, x), + {:ok, payload} <- slice_payload(payload_with_metadata, s) do + keyframe_frame_payload?(payload) + else + _ -> false + end + else + false + end + end + + defp keyframe_from_partial_obu?(_), do: false + + defp drop_extension(rest, 0), do: {:ok, rest} + + defp drop_extension(rest, 1) do + case rest do + <<_tid::3, _sid::2, 0::3, tail::binary>> -> {:ok, tail} + _ -> :error + end + end + + defp slice_payload(rest, 0), do: {:ok, rest} + + defp slice_payload(rest, 1) do + case AV1.LEB128.read(rest) do + {:ok, leb_size, payload_size} when byte_size(rest) >= leb_size -> + payload_and_rest = binary_part(rest, leb_size, byte_size(rest) - leb_size) + take_size = min(payload_size, byte_size(payload_and_rest)) + + {:ok, + if take_size == byte_size(payload_and_rest) do + payload_and_rest + else + binary_part(payload_and_rest, 0, take_size) + end} + + _ -> + :error + end + rescue + ArgumentError -> + :error + end +end diff --git a/lib/ex_webrtc/rtp/munger.ex b/lib/ex_webrtc/rtp/munger.ex index 82176928..1a1a021e 100644 --- a/lib/ex_webrtc/rtp/munger.ex +++ b/lib/ex_webrtc/rtp/munger.ex @@ -78,7 +78,7 @@ defmodule ExWebRTC.RTP.Munger do `clock_rate` is the clock rate of the codec carried in munged RTP packets. """ - @spec new(:opus | :h264 | :vp8 | RTPCodecParameters.t(), non_neg_integer()) :: t() + @spec new(:opus | :h264 | :vp8 | :av1 | RTPCodecParameters.t(), non_neg_integer()) :: t() def new(:opus, clock_rate) do %__MODULE__{clock_rate: clock_rate} end @@ -91,11 +91,16 @@ defmodule ExWebRTC.RTP.Munger do %__MODULE__{clock_rate: clock_rate, vp8_munger: VP8.Munger.new()} end + def new(:av1, clock_rate) do + %__MODULE__{clock_rate: clock_rate} + end + def new(%RTPCodecParameters{} = codec_params) do case codec_params.mime_type do "audio/opus" -> new(:opus, codec_params.clock_rate) "video/H264" -> new(:h264, codec_params.clock_rate) "video/VP8" -> new(:vp8, codec_params.clock_rate) + "video/AV1" -> new(:av1, codec_params.clock_rate) end end diff --git a/test/ex_webrtc/rtp/av1_test.exs b/test/ex_webrtc/rtp/av1_test.exs new file mode 100644 index 00000000..0d66e241 --- /dev/null +++ b/test/ex_webrtc/rtp/av1_test.exs @@ -0,0 +1,223 @@ +defmodule ExWebRTC.RTP.AV1Test do + use ExUnit.Case, async: true + + alias ExRTP.Packet + alias ExWebRTC.RTP.AV1 + + @obu_temporal_delimiter 2 + @obu_frame 6 + + describe "keyframe?/1" do + test "detects keyframe from single complete OBU" do + # Create a frame OBU with KEY_FRAME type (0) + # OBU header: forbidden=0, type=6 (frame), extension=0, has_size=1, reserved=0 + # Frame header: show_existing_frame=0, frame_type=0 (KEY_FRAME) + frame_payload = <<0::1, 0::2, 1::1, 0::4>> + + # Create complete OBU with size + obu_header = <<0::1, @obu_frame::4, 0::1, 1::1, 0::1>> + obu_size = <> + complete_obu = obu_header <> obu_size <> frame_payload + + # Create AV1 RTP payload with Z=0, Y=0 (single complete OBU) + rtp_payload = <<0::1, 0::1, 1::2, 0::1, 0::3, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + assert AV1.keyframe?(packet) + end + + test "detects non-keyframe from single complete OBU" do + # Create a frame OBU with INTER_FRAME type (1) + # Frame header: show_existing_frame=0, frame_type=1 (INTER_FRAME) + frame_payload = <<0::1, 1::2, 1::1, 0::4>> + + # Create complete OBU with size + obu_header = <<0::1, @obu_frame::4, 0::1, 1::1, 0::1>> + obu_size = <> + complete_obu = obu_header <> obu_size <> frame_payload + + # Create AV1 RTP payload with Z=0, Y=0 (single complete OBU) + rtp_payload = <<0::1, 0::1, 1::2, 0::1, 0::3, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + refute AV1.keyframe?(packet) + end + + test "returns false for non-frame OBU" do + # Create a temporal delimiter OBU (not a frame) + obu_header = <<0::1, @obu_temporal_delimiter::4, 0::1, 1::1, 0::1>> + obu_size = <<0>> + complete_obu = obu_header <> obu_size + + # Create AV1 RTP payload + rtp_payload = <<0::1, 0::1, 1::2, 0::1, 0::3, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + refute AV1.keyframe?(packet) + end + + test "returns false when only sequence header is present" do + sequence_payload = <<0xAA>> + obu_header = <<0::1, 1::4, 0::1, 1::1, 0::1>> + obu_size = <> + complete_obu = obu_header <> obu_size <> sequence_payload + + rtp_payload = <<0::1, 0::1, 1::2, 0::1, 0::3, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + refute AV1.keyframe?(packet) + end + + test "returns false when frame is not displayed" do + frame_payload = <<0::1, 0::2, 0::1, 0::4>> + obu_header = <<0::1, @obu_frame::4, 0::1, 1::1, 0::1>> + obu_size = <> + complete_obu = obu_header <> obu_size <> frame_payload + + rtp_payload = <<0::1, 0::1, 1::2, 0::1, 0::3, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + refute AV1.keyframe?(packet) + end + + test "returns false for fragmented OBU (last fragment)" do + # Z=1, Y=0: last fragment + frame_payload = <<0::1, 0::2, 0::5>> + rtp_payload = <<1::1, 0::1, 1::2, 0::1, 0::3, frame_payload::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 2, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + refute AV1.keyframe?(packet) + end + + test "returns false for fragmented OBU (middle fragment)" do + # Z=1, Y=1: middle fragment + frame_payload = <<0::1, 0::2, 0::5>> + rtp_payload = <<1::1, 1::1, 1::2, 0::1, 0::3, frame_payload::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 2, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + refute AV1.keyframe?(packet) + end + + test "detects keyframe from N bit (new coded video sequence)" do + # N bit = 1 indicates new coded video sequence (keyframe with sequence header) + # This is the primary way to detect keyframes per RFC + # Create any payload - the N bit is what matters + obu_header = <<0::1, @obu_temporal_delimiter::4, 0::1, 1::1, 0::1>> + obu_size = <<0>> + complete_obu = obu_header <> obu_size + + # Create AV1 RTP payload with N=1 (new coded video sequence) + rtp_payload = <<0::1, 1::1, 0::2, 1::1, 0::3, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: rtp_payload + } + + assert AV1.keyframe?(packet) + end + + test "returns false for invalid payload" do + packet = %Packet{ + payload_type: 45, + sequence_number: 1, + timestamp: 1000, + ssrc: 12345, + payload: <<0xFF, 0xFF>> + } + + refute AV1.keyframe?(packet) + end + + test "detects keyframe when RTP payload uses length prefix" do + frame_payload = <<0::1, 0::2, 1::1, 0::4>> <> :binary.copy(<<0>>, 130) + obu_header = <<0::1, @obu_frame::4, 0::1, 0::1, 0::1>> + complete_obu = obu_header <> frame_payload + leb_prefix = AV1.LEB128.encode(byte_size(complete_obu)) + + rtp_payload = <<0::1, 0::1, 0::2, 0::1, 0::3, leb_prefix::binary, complete_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 3, + timestamp: 2000, + ssrc: 6789, + payload: rtp_payload + } + + assert AV1.keyframe?(packet) + end + + test "detects keyframe when OBU size exceeds fragment" do + declared_size = 200 + frame_payload = <<0::1, 0::2, 1::1, 0::4>> + obu_header = <<0::1, @obu_frame::4, 0::1, 1::1, 0::1>> + obu_size = AV1.LEB128.encode(declared_size) + truncated_obu = obu_header <> obu_size <> frame_payload + + rtp_payload = <<0::1, 0::1, 1::2, 0::1, 0::3, truncated_obu::binary>> + + packet = %Packet{ + payload_type: 45, + sequence_number: 4, + timestamp: 3000, + ssrc: 6789, + payload: rtp_payload + } + + assert AV1.keyframe?(packet) + end + end +end