diff --git a/src/converter/mod.rs b/src/converter/mod.rs index 3b0c7fc..b0d6b8f 100644 --- a/src/converter/mod.rs +++ b/src/converter/mod.rs @@ -14,6 +14,16 @@ use crate::vulkan::VideoContext; use ash::vk; use tracing::debug; +/// Color space for RGB→YUV conversion matrix selection. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum ColorSpace { + /// BT.709 (standard SDR). Used for all HD/UHD SDR content. + #[default] + Bt709, + /// BT.2020 (HDR / wide color gamut). + Bt2020, +} + /// Supported input pixel formats for color conversion. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[allow(clippy::upper_case_acronyms)] @@ -26,12 +36,38 @@ pub enum InputFormat { BGRA, /// RGBA (32-bit, red first, alpha last). RGBA, + /// ABGR2101010 (packed 10-bit per channel, 2-bit alpha). + /// Maps to DRM_FORMAT_ABGR2101010 / VK_FORMAT_A2B10G10R10_UNORM_PACK32. + ABGR2101010, + /// RGBA16F (64-bit, 16-bit float per channel). + /// Maps to DRM_FORMAT_ABGR16161616F / VK_FORMAT_R16G16B16A16_SFLOAT. + /// + /// Expected input is linear-light scRGB where 1.0 = 80 nits. + /// The converter applies the PQ (ST 2084) transfer function internally. + RGBA16F, } impl InputFormat { /// Bytes per pixel for this format. pub fn bytes_per_pixel(&self) -> usize { - 4 // All current formats are 32-bit + match self { + InputFormat::BGRx + | InputFormat::RGBx + | InputFormat::BGRA + | InputFormat::RGBA + | InputFormat::ABGR2101010 => 4, + InputFormat::RGBA16F => 8, + } + } + + /// Vulkan format for creating image views of this input format. + pub fn vk_format(&self) -> vk::Format { + match self { + InputFormat::BGRx | InputFormat::BGRA => vk::Format::B8G8R8A8_UNORM, + InputFormat::RGBx | InputFormat::RGBA => vk::Format::R8G8B8A8_UNORM, + InputFormat::ABGR2101010 => vk::Format::A2B10G10R10_UNORM_PACK32, + InputFormat::RGBA16F => vk::Format::R16G16B16A16_SFLOAT, + } } } @@ -91,6 +127,7 @@ impl OutputFormat { /// Configuration for the color converter. #[derive(Clone, Debug)] +#[non_exhaustive] pub struct ColorConverterConfig { /// Input frame width. pub width: u32, @@ -100,6 +137,31 @@ pub struct ColorConverterConfig { pub input_format: InputFormat, /// Output YUV format. pub output_format: OutputFormat, + /// Color space for the RGB→YUV matrix. + /// Use Bt709 for SDR, Bt2020 for HDR. + pub color_space: ColorSpace, + /// Full range (0-255 luma) or limited/studio range (16-235 luma). + /// Must match the `full_range` flag in `ColorDescription` for correct playback. + pub full_range: bool, +} + +impl ColorConverterConfig { + /// Create a new configuration with BT.709 color space and full range. + pub fn new( + width: u32, + height: u32, + input_format: InputFormat, + output_format: OutputFormat, + ) -> Self { + Self { + width, + height, + input_format, + output_format, + color_space: ColorSpace::Bt709, + full_range: true, + } + } } /// GPU-based color format converter. @@ -551,12 +613,14 @@ impl ColorConverter { &[], ); - // Push constants: width, height, input_format, output_format. - let push_constants: [u32; 4] = [ + // Push constants: width, height, input_format, output_format, color_space, full_range. + let push_constants: [u32; 6] = [ self.config.width, self.config.height, self.config.input_format as u32, self.config.output_format as u32, + self.config.color_space as u32, + self.config.full_range as u32, ]; let push_constants_bytes: &[u8] = std::slice::from_raw_parts( push_constants.as_ptr() as *const u8, @@ -718,7 +782,7 @@ impl ColorConverter { let view_info = vk::ImageViewCreateInfo::default() .image(src_image) .view_type(vk::ImageViewType::TYPE_2D) - .format(vk::Format::B8G8R8A8_UNORM) + .format(self.config.input_format.vk_format()) .subresource_range(vk::ImageSubresourceRange { aspect_mask: vk::ImageAspectFlags::COLOR, base_mip_level: 0, @@ -779,6 +843,8 @@ mod tests { assert_eq!(InputFormat::RGBx.bytes_per_pixel(), 4); assert_eq!(InputFormat::BGRA.bytes_per_pixel(), 4); assert_eq!(InputFormat::RGBA.bytes_per_pixel(), 4); + assert_eq!(InputFormat::ABGR2101010.bytes_per_pixel(), 4); + assert_eq!(InputFormat::RGBA16F.bytes_per_pixel(), 8); } #[test] @@ -788,6 +854,24 @@ mod tests { assert_eq!(InputFormat::RGBx as u32, 1); assert_eq!(InputFormat::BGRA as u32, 2); assert_eq!(InputFormat::RGBA as u32, 3); + assert_eq!(InputFormat::ABGR2101010 as u32, 4); + assert_eq!(InputFormat::RGBA16F as u32, 5); + } + + #[test] + fn test_input_format_vk_format() { + assert_eq!(InputFormat::BGRx.vk_format(), vk::Format::B8G8R8A8_UNORM); + assert_eq!(InputFormat::BGRA.vk_format(), vk::Format::B8G8R8A8_UNORM); + assert_eq!(InputFormat::RGBx.vk_format(), vk::Format::R8G8B8A8_UNORM); + assert_eq!(InputFormat::RGBA.vk_format(), vk::Format::R8G8B8A8_UNORM); + assert_eq!( + InputFormat::ABGR2101010.vk_format(), + vk::Format::A2B10G10R10_UNORM_PACK32 + ); + assert_eq!( + InputFormat::RGBA16F.vk_format(), + vk::Format::R16G16B16A16_SFLOAT + ); } // ======================== @@ -875,6 +959,8 @@ mod tests { height: 1080, input_format: InputFormat::BGRx, output_format: OutputFormat::NV12, + color_space: ColorSpace::Bt709, + full_range: true, }; let cloned = config.clone(); @@ -882,6 +968,8 @@ mod tests { assert_eq!(cloned.height, 1080); assert_eq!(cloned.input_format, InputFormat::BGRx); assert_eq!(cloned.output_format, OutputFormat::NV12); + assert_eq!(cloned.color_space, ColorSpace::Bt709); + assert!(cloned.full_range); } #[test] @@ -891,6 +979,8 @@ mod tests { height: 480, input_format: InputFormat::RGBA, output_format: OutputFormat::I420, + color_space: ColorSpace::Bt709, + full_range: true, }; let debug_str = format!("{:?}", config); @@ -930,6 +1020,8 @@ mod tests { height: 64, input_format: InputFormat::BGRx, output_format: OutputFormat::NV12, + color_space: ColorSpace::Bt709, + full_range: true, }; let result = ColorConverter::new(context, config); @@ -962,6 +1054,8 @@ mod tests { height: 32, input_format: *input_format, output_format: *output_format, + color_space: ColorSpace::Bt709, + full_range: true, }; let result = ColorConverter::new(context.clone(), config); diff --git a/src/converter/pipeline.rs b/src/converter/pipeline.rs index bea8a5d..58ab1be 100644 --- a/src/converter/pipeline.rs +++ b/src/converter/pipeline.rs @@ -38,7 +38,7 @@ pub fn create_converter( let push_constant_range = vk::PushConstantRange::default() .stage_flags(vk::ShaderStageFlags::COMPUTE) .offset(0) - .size(16); // 4 x u32: width, height, input_format, output_format + .size(24); // 6 x u32: width, height, input_format, output_format, color_space, full_range let pipeline_layout_info = vk::PipelineLayoutCreateInfo::default() .set_layouts(std::slice::from_ref(&descriptor_set_layout)) @@ -48,7 +48,7 @@ pub fn create_converter( .map_err(|e| PixelForgeError::ResourceCreation(e.to_string()))?; // Create compute shader module. - let shader_code = super::shader::get_spirv_code(); + let shader_code = super::shader::get_spirv_code()?; let shader_info = vk::ShaderModuleCreateInfo::default().code(&shader_code); let shader_module = unsafe { device.create_shader_module(&shader_info, None) } diff --git a/src/converter/shader.rs b/src/converter/shader.rs index 0d76712..0b9f3a4 100644 --- a/src/converter/shader.rs +++ b/src/converter/shader.rs @@ -1,149 +1,35 @@ //! Compute shader for color format conversion. //! //! This module contains the SPIR-V bytecode for the color conversion compute shader. -//! The shader converts RGB/BGR formats to various YUV formats using BT.601 coefficients. +//! The shader converts RGB/BGR formats to various YUV formats using BT.709 (SDR) +//! or BT.2020 (HDR) coefficients, selected via push constants. + +use crate::error::{PixelForgeError, Result}; +use std::sync::OnceLock; + +/// Cached compiled SPIR-V bytecode. +static SPIRV_CACHE: OnceLock> = OnceLock::new(); /// Get the SPIR-V bytecode for the color conversion shader. /// /// The shader expects: -/// - Push constants: width (u32), height (u32), input_format (u32), output_format (u32) -/// - Binding 0: Input buffer (RGB/BGR data) +/// - Push constants: width, height, input_format, output_format, color_space, full_range (6 × u32) +/// - Binding 0: Input image (sampler2D) /// - Binding 1: Output buffer (YUV data) /// /// Workgroup size: 8x8x1. -pub fn get_spirv_code() -> Vec { - // SPIR-V bytecode generated from the GLSL compute shader below. - // - // #version 450 - // - // layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - // - // layout(push_constant) uniform PushConstants { - // uint width; - // uint height; - // uint input_format; // 0=BGRx, 1=RGBx, 2=BGRA, 3=RGBA - // uint output_format; // 0=NV12, 1=I420, 2=YUV444 - // } params; - // - // layout(std430, binding = 0) readonly buffer InputBuffer { - // uint input_data[]; - // }; - // - // layout(std430, binding = 1) writeonly buffer OutputBuffer { - // uint output_data[]; - // }; - // - // // BT.601 conversion coefficients - // const float Y_R = 0.299; - // const float Y_G = 0.587; - // const float Y_B = 0.114; - // const float U_R = -0.169; - // const float U_G = -0.331; - // const float U_B = 0.500; - // const float V_R = 0.500; - // const float V_G = -0.419; - // const float V_B = -0.081; - // - // vec3 extract_rgb(uint pixel, uint format) { - // uint b0 = (pixel >> 0) & 0xFF; - // uint b1 = (pixel >> 8) & 0xFF; - // uint b2 = (pixel >> 16) & 0xFF; - // - // if (format == 0 || format == 2) { - // // BGRx or BGRA - // return vec3(float(b2), float(b1), float(b0)); - // } else { - // // RGBx or RGBA - // return vec3(float(b0), float(b1), float(b2)); - // } - // } - // - // vec3 rgb_to_yuv(vec3 rgb) { - // float y = Y_R * rgb.r + Y_G * rgb.g + Y_B * rgb.b; - // float u = 128.0 + U_R * rgb.r + U_G * rgb.g + U_B * rgb.b; - // float v = 128.0 + V_R * rgb.r + V_G * rgb.g + V_B * rgb.b; - // return vec3(clamp(y, 0.0, 255.0), clamp(u, 0.0, 255.0), clamp(v, 0.0, 255.0)); - // } - // - // void main() { - // uint x = gl_GlobalInvocationID.x; - // uint y = gl_GlobalInvocationID.y; - // - // if (x >= params.width || y >= params.height) return; - // - // uint pixel_idx = y * params.width + x; - // uint pixel = input_data[pixel_idx]; - // vec3 rgb = extract_rgb(pixel, params.input_format); - // vec3 yuv = rgb_to_yuv(rgb); - // - // uint pixel_count = params.width * params.height; - // - // if (params.output_format == 2) { - // // YUV444: Full resolution Y, U, V planes - // output_data[pixel_idx] = uint(yuv.x); - // output_data[pixel_count + pixel_idx] = uint(yuv.y); - // output_data[2 * pixel_count + pixel_idx] = uint(yuv.z); - // } else { - // // YUV420: Write Y for every pixel - // // Write packed Y values (4 pixels per uint) - // uint y_byte_idx = pixel_idx; - // uint y_word_idx = y_byte_idx / 4; - // uint y_byte_offset = y_byte_idx % 4; - // - // atomicOr(output_data[y_word_idx], uint(yuv.x) << (y_byte_offset * 8)); - // - // // Only process UV for top-left pixel of each 2x2 block - // if ((x % 2 == 0) && (y % 2 == 0)) { - // uint uv_x = x / 2; - // uint uv_y = y / 2; - // uint uv_width = params.width / 2; - // uint uv_idx = uv_y * uv_width + uv_x; - // - // // Average UV from 2x2 block for better quality - // vec3 yuv00 = yuv; - // vec3 yuv10 = rgb_to_yuv(extract_rgb(input_data[pixel_idx + 1], params.input_format)); - // vec3 yuv01 = rgb_to_yuv(extract_rgb(input_data[pixel_idx + params.width], params.input_format)); - // vec3 yuv11 = rgb_to_yuv(extract_rgb(input_data[pixel_idx + params.width + 1], params.input_format)); - // - // float avg_u = (yuv00.y + yuv10.y + yuv01.y + yuv11.y) / 4.0; - // float avg_v = (yuv00.z + yuv10.z + yuv01.z + yuv11.z) / 4.0; - // - // if (params.output_format == 0) { - // // NV12: Interleaved UV after Y plane - // uint uv_base = pixel_count; - // uint uv_byte_idx = uv_idx * 2; - // uint uv_word_idx = uv_byte_idx / 4; - // uint uv_byte_offset = uv_byte_idx % 4; - // - // uint uv_packed = (uint(avg_v) << 8) | uint(avg_u); - // atomicOr(output_data[uv_base/4 + uv_word_idx], uv_packed << (uv_byte_offset * 8)); - // } else { - // // I420: Separate U and V planes - // uint u_base = pixel_count; - // uint v_base = pixel_count + pixel_count / 4; - // - // uint u_byte_idx = u_base + uv_idx; - // uint u_word_idx = u_byte_idx / 4; - // uint u_byte_offset = u_byte_idx % 4; - // - // uint v_byte_idx = v_base + uv_idx; - // uint v_word_idx = v_byte_idx / 4; - // uint v_byte_offset = v_byte_idx % 4; - // - // atomicOr(output_data[u_word_idx], uint(avg_u) << (u_byte_offset * 8)); - // atomicOr(output_data[v_word_idx], uint(avg_v) << (v_byte_offset * 8)); - // } - // } - // } - // } - - // For now, return a placeholder - we'll compile the actual shader. - // This needs to be replaced with the actual SPIR-V bytecode. - compile_glsl_to_spirv() +/// +/// The compiled SPIR-V is cached after the first successful compilation. +pub fn get_spirv_code() -> Result> { + if let Some(cached) = SPIRV_CACHE.get() { + return Ok(cached.clone()); + } + let spirv = compile_glsl_to_spirv()?; + Ok(SPIRV_CACHE.get_or_init(|| spirv).clone()) } /// Compile GLSL to SPIR-V at runtime using shaderc. -fn compile_glsl_to_spirv() -> Vec { +fn compile_glsl_to_spirv() -> Result> { const SHADER_SOURCE: &str = r#" #version 450 @@ -152,8 +38,10 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(push_constant) uniform PushConstants { uint width; uint height; - uint input_format; // 0=BGRx, 1=RGBx, 2=BGRA, 3=RGBA (unused with texelFetch swizzle) + uint input_format; // 0=BGRx, 1=RGBx, 2=BGRA, 3=RGBA, 4=ABGR2101010, 5=RGBA16F uint output_format; // 0=NV12, 1=I420, 2=YUV444, 3=P010, 4=YUV444P10 + uint color_space; // 0=BT.709, 1=BT.2020 + uint full_range; // 0=limited/studio range, 1=full range } params; // Source image sampled directly — eliminates the image-to-buffer copy. @@ -163,35 +51,107 @@ layout(std430, binding = 1) buffer OutputBuffer { uint output_data[]; }; -// BT.601 conversion coefficients. -const float Y_R = 0.299; -const float Y_G = 0.587; -const float Y_B = 0.114; -const float U_R = -0.169; -const float U_G = -0.331; -const float U_B = 0.500; -const float V_R = 0.500; -const float V_G = -0.419; -const float V_B = -0.081; - -// Read RGB from source image via texelFetch (Vulkan format handles BGRA→RGBA swizzle). +// BT.709 conversion coefficients (SDR). +const float BT709_Y_R = 0.2126; +const float BT709_Y_G = 0.7152; +const float BT709_Y_B = 0.0722; +const float BT709_U_R = -0.1146; +const float BT709_U_G = -0.3854; +const float BT709_U_B = 0.5000; +const float BT709_V_R = 0.5000; +const float BT709_V_G = -0.4542; +const float BT709_V_B = -0.0458; + +// BT.2020 conversion coefficients (HDR). +const float BT2020_Y_R = 0.2627; +const float BT2020_Y_G = 0.6780; +const float BT2020_Y_B = 0.0593; +const float BT2020_U_R = -0.1396; +const float BT2020_U_G = -0.3604; +const float BT2020_U_B = 0.5000; +const float BT2020_V_R = 0.5000; +const float BT2020_V_G = -0.4598; +const float BT2020_V_B = -0.0402; + +// PQ (ST 2084) constants for inverse EOTF. +const float PQ_M1 = 0.1593017578125; +const float PQ_M2 = 78.84375; +const float PQ_C1 = 0.8359375; +const float PQ_C2 = 18.8515625; +const float PQ_C3 = 18.6875; + +// Apply PQ inverse EOTF: linear light [0,1] → PQ signal [0,1]. +// Input should be normalized to [0,1] where 1.0 = 10,000 nits. +vec3 linear_to_pq(vec3 L) { + L = max(L, vec3(0.0)); + vec3 Lm1 = pow(L, vec3(PQ_M1)); + vec3 N = pow((PQ_C1 + PQ_C2 * Lm1) / (1.0 + PQ_C3 * Lm1), vec3(PQ_M2)); + return N; +} + +// Read normalized RGB from source image via texelFetch. +// Returns values in [0, 1] range for all formats. +// For RGBA16F (HDR), applies PQ transfer function to map linear-light to [0, 1]. vec3 read_rgb(ivec2 coord) { vec4 rgba = texelFetch(inputImage, coord, 0); - return rgba.rgb * 255.0; + if (params.input_format == 5u) { + // RGBA16F: linear-light floats in scene-referred scRGB scale + // where 1.0 = 80 nits (the sRGB / scRGB reference white). + // PQ EOTF input must be absolute luminance normalized to [0, 1] + // where 1.0 = 10 000 nits, hence the factor 10000 / 80 = 125. + return linear_to_pq(rgba.rgb / 125.0); + } + // UNORM formats (8-bit and 10-bit): texelFetch returns [0.0, 1.0]. + return rgba.rgb; } +// Convert normalized RGB [0,1] to YUV. +// Returns Y in [0, 1], U and V in [0, 1] centered at 0.5. vec3 rgb_to_yuv(vec3 rgb) { - float y = Y_R * rgb.r + Y_G * rgb.g + Y_B * rgb.b; - float u = 128.0 + U_R * rgb.r + U_G * rgb.g + U_B * rgb.b; - float v = 128.0 + V_R * rgb.r + V_G * rgb.g + V_B * rgb.b; - return vec3(clamp(y, 0.0, 255.0), clamp(u, 0.0, 255.0), clamp(v, 0.0, 255.0)); + float yr, yg, yb, ur, ug, ub, vr, vg, vb; + if (params.color_space == 1u) { + // BT.2020 + yr = BT2020_Y_R; yg = BT2020_Y_G; yb = BT2020_Y_B; + ur = BT2020_U_R; ug = BT2020_U_G; ub = BT2020_U_B; + vr = BT2020_V_R; vg = BT2020_V_G; vb = BT2020_V_B; + } else { + // BT.709 (default) + yr = BT709_Y_R; yg = BT709_Y_G; yb = BT709_Y_B; + ur = BT709_U_R; ug = BT709_U_G; ub = BT709_U_B; + vr = BT709_V_R; vg = BT709_V_G; vb = BT709_V_B; + } + float y = yr * rgb.r + yg * rgb.g + yb * rgb.b; + float u = 0.5 + ur * rgb.r + ug * rgb.g + ub * rgb.b; + float v = 0.5 + vr * rgb.r + vg * rgb.g + vb * rgb.b; + return vec3(clamp(y, 0.0, 1.0), clamp(u, 0.0, 1.0), clamp(v, 0.0, 1.0)); +} + +// --- 8-bit quantization --- + +uint q8_y(float y) { + if (params.full_range == 0u) return uint(clamp(y * 219.0 + 16.0, 0.0, 255.0)); + return uint(clamp(y * 255.0, 0.0, 255.0)); +} + +uint q8_c(float c) { + if (params.full_range == 0u) return uint(clamp((c - 0.5) * 224.0 + 128.0, 0.0, 255.0)); + return uint(clamp(c * 255.0, 0.0, 255.0)); +} + +// --- 10-bit quantization (P010 layout: value in upper 10 bits of 16-bit word) --- + +uint q10_y(float y) { + uint val; + if (params.full_range == 0u) val = uint(clamp(y * 876.0 + 64.0, 0.0, 1023.0)); + else val = uint(clamp(y * 1023.0, 0.0, 1023.0)); + return (val << 6u) & 0xFFC0u; } -// Convert 8-bit value to 10-bit in 16-bit word (value in upper 10 bits). -uint to_10bit(float val) { - // Scale from 0-255 to 0-1023 (10-bit range), then shift left by 6. - uint val10 = uint(val * 4.0); // 0-255 -> 0-1020, close to 0-1023. - return (val10 << 6u) & 0xFFC0u; // Mask to ensure upper 10 bits only. +uint q10_c(float c) { + uint val; + if (params.full_range == 0u) val = uint(clamp((c - 0.5) * 896.0 + 512.0, 0.0, 1023.0)); + else val = uint(clamp(c * 1023.0, 0.0, 1023.0)); + return (val << 6u) & 0xFFC0u; } void main() { @@ -208,38 +168,37 @@ void main() { if (params.output_format == 2u) { // YUV444 8-bit: Full resolution, byte-packed into uints. - // Each pixel writes one byte to Y, U, and V planes. uint y_byte_idx = pixel_idx; uint y_word_idx = y_byte_idx / 4u; uint y_byte_offset = y_byte_idx % 4u; - atomicOr(output_data[y_word_idx], uint(yuv.x) << (y_byte_offset * 8u)); + atomicOr(output_data[y_word_idx], q8_y(yuv.x) << (y_byte_offset * 8u)); uint u_base = pixel_count; uint u_byte_idx = u_base + pixel_idx; uint u_word_idx = u_byte_idx / 4u; uint u_byte_offset = u_byte_idx % 4u; - atomicOr(output_data[u_word_idx], uint(yuv.y) << (u_byte_offset * 8u)); + atomicOr(output_data[u_word_idx], q8_c(yuv.y) << (u_byte_offset * 8u)); uint v_base = 2u * pixel_count; uint v_byte_idx = v_base + pixel_idx; uint v_word_idx = v_byte_idx / 4u; uint v_byte_offset = v_byte_idx % 4u; - atomicOr(output_data[v_word_idx], uint(yuv.z) << (v_byte_offset * 8u)); + atomicOr(output_data[v_word_idx], q8_c(yuv.z) << (v_byte_offset * 8u)); } else if (params.output_format == 4u) { // YUV444P10 (10-bit): 2-plane semi-planar format. uint y_half_offset = pixel_idx % 2u; uint y_packed_idx = pixel_idx / 2u; - atomicOr(output_data[y_packed_idx], to_10bit(yuv.x) << (y_half_offset * 16u)); + atomicOr(output_data[y_packed_idx], q10_y(yuv.x) << (y_half_offset * 16u)); uint uv_base_words = pixel_count / 2u; uint uv_word_idx = uv_base_words + pixel_idx; - uint uv_packed = to_10bit(yuv.y) | (to_10bit(yuv.z) << 16u); + uint uv_packed = q10_c(yuv.y) | (q10_c(yuv.z) << 16u); output_data[uv_word_idx] = uv_packed; } else if (params.output_format == 3u) { // P010 (10-bit NV12): 2-plane semi-planar, 4:2:0 subsampling. uint y_half_offset = pixel_idx % 2u; uint y_packed_idx = pixel_idx / 2u; - atomicOr(output_data[y_packed_idx], to_10bit(yuv.x) << (y_half_offset * 16u)); + atomicOr(output_data[y_packed_idx], q10_y(yuv.x) << (y_half_offset * 16u)); if ((x % 2u == 0u) && (y % 2u == 0u)) { uint uv_x = x / 2u; @@ -260,7 +219,7 @@ void main() { uint uv_base_words = pixel_count / 2u; uint uv_word_idx = uv_base_words + uv_idx; - uint uv_packed = to_10bit(avg_u) | (to_10bit(avg_v) << 16u); + uint uv_packed = q10_c(avg_u) | (q10_c(avg_v) << 16u); output_data[uv_word_idx] = uv_packed; } } else { @@ -268,7 +227,7 @@ void main() { uint y_byte_idx = pixel_idx; uint y_word_idx = y_byte_idx / 4u; uint y_byte_offset = y_byte_idx % 4u; - atomicOr(output_data[y_word_idx], uint(yuv.x) << (y_byte_offset * 8u)); + atomicOr(output_data[y_word_idx], q8_y(yuv.x) << (y_byte_offset * 8u)); if ((x % 2u == 0u) && (y % 2u == 0u)) { uint uv_x = x / 2u; @@ -295,11 +254,11 @@ void main() { uint uv_byte_offset = uv_byte_idx % 4u; if (uv_byte_offset <= 2u) { - uint uv_packed = (uint(avg_v) << 8u) | uint(avg_u); + uint uv_packed = (q8_c(avg_v) << 8u) | q8_c(avg_u); atomicOr(output_data[uv_word_idx], uv_packed << (uv_byte_offset * 8u)); } else { - atomicOr(output_data[uv_word_idx], uint(avg_u) << 24u); - atomicOr(output_data[uv_word_idx + 1u], uint(avg_v)); + atomicOr(output_data[uv_word_idx], q8_c(avg_u) << 24u); + atomicOr(output_data[uv_word_idx + 1u], q8_c(avg_v)); } } else { // I420: Separate U and V planes. @@ -315,16 +274,20 @@ void main() { uint v_word_idx = v_byte_idx / 4u; uint v_byte_offset = v_byte_idx % 4u; - atomicOr(output_data[u_word_idx], uint(avg_u) << (u_byte_offset * 8u)); - atomicOr(output_data[v_word_idx], uint(avg_v) << (v_byte_offset * 8u)); + atomicOr(output_data[u_word_idx], q8_c(avg_u) << (u_byte_offset * 8u)); + atomicOr(output_data[v_word_idx], q8_c(avg_v) << (v_byte_offset * 8u)); } } } } "#; - let compiler = shaderc::Compiler::new().expect("Failed to create shaderc compiler"); - let options = shaderc::CompileOptions::new().expect("Failed to create compile options"); + let compiler = shaderc::Compiler::new().ok_or_else(|| { + PixelForgeError::ShaderCompilation("failed to create shaderc compiler".into()) + })?; + let options = shaderc::CompileOptions::new().ok_or_else(|| { + PixelForgeError::ShaderCompilation("failed to create compile options".into()) + })?; let artifact = compiler .compile_into_spirv( @@ -334,7 +297,7 @@ void main() { "main", Some(&options), ) - .expect("Failed to compile shader"); + .map_err(|e| PixelForgeError::ShaderCompilation(e.to_string()))?; - artifact.as_binary().to_vec() + Ok(artifact.as_binary().to_vec()) } diff --git a/src/encoder/av1/init.rs b/src/encoder/av1/init.rs index 1e12b90..3c43171 100644 --- a/src/encoder/av1/init.rs +++ b/src/encoder/av1/init.rs @@ -285,14 +285,45 @@ impl AV1Encoder { let frame_height_bits = 32 - (height - 1).leading_zeros(); // AV1 color configuration. + // Map ColorDescription to AV1 enum constants, defaulting to BT.709. + let (av1_color_primaries, av1_transfer, av1_matrix, av1_full_range) = if let Some(cd) = + &config.color_description + { + let primaries = match cd.color_primaries { + 9 => { + ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_2020 + } + _ => { + ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709 + } + }; + let transfer = match cd.transfer_characteristics { + 16 => ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_SMPTE_2084, + _ => ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, + }; + let matrix = match cd.matrix_coefficients { + 9 => ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_2020_NCL, + _ => ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, + }; + let full_range = if cd.full_range { 1 } else { 0 }; + (primaries, transfer, matrix, full_range) + } else { + ( + ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709, + ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, + ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, + 1, // full range for SDR + ) + }; + let color_config_flags = ash::vk::native::StdVideoAV1ColorConfigFlags { _bitfield_align_1: [], _bitfield_1: ash::vk::native::StdVideoAV1ColorConfigFlags::new_bitfield_1( - 0, // mono_chrome - 1, // color_range (full range) - 0, // separate_uv_delta_q - 1, // color_description_present_flag (we provide color primaries/transfer/matrix) - 0, // reserved + 0, // mono_chrome + av1_full_range, // color_range + 0, // separate_uv_delta_q + 1, // color_description_present_flag + 0, // reserved ), }; @@ -315,9 +346,9 @@ impl AV1Encoder { subsampling_x, subsampling_y, reserved1: 0, - color_primaries: ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709, - transfer_characteristics: ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, - matrix_coefficients: ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, + color_primaries: av1_color_primaries, + transfer_characteristics: av1_transfer, + matrix_coefficients: av1_matrix, chroma_sample_position: ash::vk::native::StdVideoAV1ChromaSamplePosition_STD_VIDEO_AV1_CHROMA_SAMPLE_POSITION_UNKNOWN, }; diff --git a/src/encoder/h264/init.rs b/src/encoder/h264/init.rs index ef351a9..5fdd5aa 100644 --- a/src/encoder/h264/init.rs +++ b/src/encoder/h264/init.rs @@ -8,6 +8,7 @@ use crate::encoder::resources::{ map_bitstream_buffer, query_supported_video_formats, ClearImageParams, MIN_BITSTREAM_BUFFER_SIZE, }; +use crate::encoder::ColorDescription; use crate::encoder::PixelFormat; use crate::error::{PixelForgeError, Result}; use crate::vulkan::VideoContext; @@ -435,7 +436,10 @@ impl H264Encoder { let mut vui_flags: ash::vk::native::StdVideoH264SpsVuiFlags = unsafe { std::mem::zeroed() }; vui_flags.set_aspect_ratio_info_present_flag(1); vui_flags.set_video_signal_type_present_flag(1); - vui_flags.set_video_full_range_flag(1); + let color_desc = config + .color_description + .unwrap_or(ColorDescription::bt709()); + vui_flags.set_video_full_range_flag(if color_desc.full_range { 1 } else { 0 }); vui_flags.set_color_description_present_flag(1); // Do not set HRD parameters when rate control is disabled/CQP. // HRD with zeroed bitrate values causes device loss on some drivers (AMD). @@ -449,9 +453,9 @@ impl H264Encoder { sar_width: 0, sar_height: 0, video_format: 5, - colour_primaries: 1, - transfer_characteristics: 1, - matrix_coefficients: 1, + colour_primaries: color_desc.color_primaries, + transfer_characteristics: color_desc.transfer_characteristics, + matrix_coefficients: color_desc.matrix_coefficients, num_units_in_tick: 0, time_scale: 0, max_num_reorder_frames: if config.b_frame_count > 0 { 1 } else { 0 }, diff --git a/src/encoder/h265/init.rs b/src/encoder/h265/init.rs index 636b171..7241c19 100644 --- a/src/encoder/h265/init.rs +++ b/src/encoder/h265/init.rs @@ -8,7 +8,7 @@ use crate::encoder::resources::{ make_codec_name, map_bitstream_buffer, query_supported_video_formats, ClearImageParams, MIN_BITSTREAM_BUFFER_SIZE, }; -use crate::encoder::{BitDepth, PixelFormat}; +use crate::encoder::{BitDepth, ColorDescription, PixelFormat}; use crate::error::{PixelForgeError, Result}; use crate::vulkan::VideoContext; use ash::vk; @@ -395,7 +395,7 @@ impl H265Encoder { 0, // long_term_ref_pics_present_flag 0, // sps_temporal_mvp_enabled_flag 0, // strong_intra_smoothing_enabled_flag - 0, // vui_parameters_present_flag + 1, // vui_parameters_present_flag 0, // sps_extension_present_flag 0, // sps_range_extension_flag 0, // transform_skip_rotation_enabled_flag @@ -415,6 +415,65 @@ impl H265Encoder { ), }; + // Build VUI structure. Defaults to BT.709 when no color description is set. + let color_desc = config + .color_description + .unwrap_or(ColorDescription::bt709()); + let vui_flags = ash::vk::native::StdVideoH265SpsVuiFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265SpsVuiFlags::new_bitfield_1( + 1, // aspect_ratio_info_present_flag + 0, // overscan_info_present_flag + 0, // overscan_appropriate_flag + 1, // video_signal_type_present_flag + if color_desc.full_range { 1 } else { 0 }, // video_full_range_flag + 1, // colour_description_present_flag + 0, // chroma_loc_info_present_flag + 0, // neutral_chroma_indication_flag + 0, // field_seq_flag + 0, // frame_field_info_present_flag + 0, // default_display_window_flag + 0, // vui_timing_info_present_flag + 0, // vui_poc_proportional_to_timing_flag + 0, // vui_hrd_parameters_present_flag + 0, // bitstream_restriction_flag + 0, // tiles_fixed_structure_flag + 0, // motion_vectors_over_pic_boundaries_flag + 0, // restricted_ref_pic_lists_flag + ), + __bindgen_padding_0: 0, + }; + + let vui = ash::vk::native::StdVideoH265SequenceParameterSetVui { + flags: vui_flags, + aspect_ratio_idc: + ash::vk::native::StdVideoH265AspectRatioIdc_STD_VIDEO_H265_ASPECT_RATIO_IDC_SQUARE, + sar_width: 0, + sar_height: 0, + video_format: 5, + colour_primaries: color_desc.color_primaries, + transfer_characteristics: color_desc.transfer_characteristics, + matrix_coeffs: color_desc.matrix_coefficients, + chroma_sample_loc_type_top_field: 0, + chroma_sample_loc_type_bottom_field: 0, + reserved1: 0, + reserved2: 0, + def_disp_win_left_offset: 0, + def_disp_win_right_offset: 0, + def_disp_win_top_offset: 0, + def_disp_win_bottom_offset: 0, + vui_num_units_in_tick: 0, + vui_time_scale: 0, + vui_num_ticks_poc_diff_one_minus1: 0, + min_spatial_segmentation_idc: 0, + reserved3: 0, + max_bytes_per_pic_denom: 0, + max_bits_per_min_cu_denom: 0, + log2_max_mv_length_horizontal: 0, + log2_max_mv_length_vertical: 0, + pHrdParameters: ptr::null(), + }; + // Calculate bit depth minus 8 values for SPS (0 for 8-bit, 2 for 10-bit) let bit_depth_minus8: u8 = match config.bit_depth { BitDepth::Eight => 0, @@ -477,7 +536,7 @@ impl H265Encoder { pScalingLists: ptr::null(), pShortTermRefPicSet: ptr::null(), pLongTermRefPicsSps: ptr::null(), - pSequenceParameterSetVui: ptr::null(), + pSequenceParameterSetVui: &vui, pPredictorPaletteEntries: ptr::null(), }; diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index 06e211d..f925539 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -142,6 +142,44 @@ pub struct Dimensions { pub height: u32, } +/// Video signal color description for VUI parameters. +/// +/// Describes how color is encoded in the video stream, allowing decoders +/// to correctly interpret the color space. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ColorDescription { + /// Color primaries (1=BT.709, 9=BT.2020). + pub color_primaries: u8, + /// Transfer characteristics (1=BT.709, 16=ST2084/PQ). + pub transfer_characteristics: u8, + /// Matrix coefficients (1=BT.709, 9=BT.2020 NCL). + pub matrix_coefficients: u8, + /// Full range (true) or limited/TV range (false). + pub full_range: bool, +} + +impl ColorDescription { + /// BT.709 color description (standard SDR). + pub fn bt709() -> Self { + Self { + color_primaries: 1, + transfer_characteristics: 1, + matrix_coefficients: 1, + full_range: true, + } + } + + /// BT.2020 with PQ transfer function (HDR10). + pub fn bt2020_pq() -> Self { + Self { + color_primaries: 9, + transfer_characteristics: 16, + matrix_coefficients: 9, + full_range: false, + } + } +} + /// Encode configuration. #[derive(Debug, Clone)] #[must_use] @@ -183,6 +221,9 @@ pub struct EncodeConfig { /// P-frames. Setting it equal to `virtual_buffer_size_ms` gives /// IDR frames maximum headroom. pub initial_virtual_buffer_size_ms: u32, + /// Color description for VUI signaling. + /// Defaults to BT.709 (full-range) when `None`. + pub color_description: Option, } impl EncodeConfig { @@ -207,6 +248,7 @@ impl EncodeConfig { max_reference_frames: DEFAULT_MAX_REFERENCE_FRAMES, virtual_buffer_size_ms: 1000, initial_virtual_buffer_size_ms: 1000, + color_description: None, } } @@ -231,6 +273,7 @@ impl EncodeConfig { max_reference_frames: DEFAULT_MAX_REFERENCE_FRAMES, virtual_buffer_size_ms: 1000, initial_virtual_buffer_size_ms: 1000, + color_description: None, } } @@ -255,6 +298,7 @@ impl EncodeConfig { max_reference_frames: DEFAULT_MAX_REFERENCE_FRAMES, virtual_buffer_size_ms: 1000, initial_virtual_buffer_size_ms: 1000, + color_description: None, } } @@ -333,6 +377,12 @@ impl EncodeConfig { self.initial_virtual_buffer_size_ms = ms; self } + + /// Set the color description for VUI signaling. + pub fn with_color_description(mut self, desc: ColorDescription) -> Self { + self.color_description = Some(desc); + self + } } /// Encoded video packet. @@ -748,4 +798,27 @@ mod tests { assert_ne!(Codec::H265, Codec::AV1); } } + + // ColorDescription tests. + mod color_description_tests { + use super::*; + + #[test] + fn test_bt709() { + let cd = ColorDescription::bt709(); + assert_eq!(cd.color_primaries, 1); + assert_eq!(cd.transfer_characteristics, 1); + assert_eq!(cd.matrix_coefficients, 1); + assert!(cd.full_range); + } + + #[test] + fn test_bt2020_pq() { + let cd = ColorDescription::bt2020_pq(); + assert_eq!(cd.color_primaries, 9); + assert_eq!(cd.transfer_characteristics, 16); + assert_eq!(cd.matrix_coefficients, 9); + assert!(!cd.full_range); + } + } } diff --git a/src/error.rs b/src/error.rs index 9aa7a73..82d012e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -56,6 +56,10 @@ pub enum PixelForgeError { /// Generic Vulkan error. #[error("Vulkan error: {0}")] Vulkan(ash::vk::Result), + + /// Shader compilation failed. + #[error("Shader compilation failed: {0}")] + ShaderCompilation(String), } impl From for PixelForgeError { diff --git a/src/lib.rs b/src/lib.rs index 5fc76bf..e84e038 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -152,10 +152,10 @@ pub mod error; pub mod image; pub mod vulkan; -pub use converter::{ColorConverter, ColorConverterConfig, InputFormat, OutputFormat}; +pub use converter::{ColorConverter, ColorConverterConfig, ColorSpace, InputFormat, OutputFormat}; pub use encoder::{ - BitDepth as EncodeBitDepth, Codec, EncodeConfig, EncodedPacket, Encoder, FrameType, - PixelFormat, RateControlMode, DEFAULT_FRAME_RATE, DEFAULT_GOP_SIZE, DEFAULT_H264_QP, + BitDepth as EncodeBitDepth, Codec, ColorDescription, EncodeConfig, EncodedPacket, Encoder, + FrameType, PixelFormat, RateControlMode, DEFAULT_FRAME_RATE, DEFAULT_GOP_SIZE, DEFAULT_H264_QP, DEFAULT_H265_QP, DEFAULT_MAX_BITRATE, DEFAULT_MAX_REFERENCE_FRAMES, DEFAULT_TARGET_BITRATE, }; pub use error::PixelForgeError; diff --git a/src/vulkan.rs b/src/vulkan.rs index 1e195b4..96df177 100644 --- a/src/vulkan.rs +++ b/src/vulkan.rs @@ -350,12 +350,27 @@ impl VideoContext { supported_encode_codecs = encode_codecs; info!("Selected device: {}", device_name); break; + } else { + warn!( + "Device {} skipped: video_support={}, encode_supported={}, compute_support={}", + device_name, has_video_support, encode_supported, has_compute_support + ); + if !has_video_support { + warn!(" - No queue with VIDEO_ENCODE_KHR flag found"); + } + if !encode_supported { + warn!( + " - Required codecs not supported: {:?}", + builder.required_encode_codecs + ); + warn!(" - Available codecs: {:?}", encode_codecs); + } } } let physical_device = selected_device.ok_or_else(|| { PixelForgeError::NoSuitableDevice( - "No device with required video support found".to_string(), + "No device with required video support found. Ensure your GPU drivers support Vulkan Video extensions (VK_KHR_video_queue, VK_KHR_video_encode_queue, etc.).".to_string(), ) })?;