diff --git a/Cargo.lock b/Cargo.lock index 42a02c2..a11fcc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1890,6 +1890,7 @@ dependencies = [ "md-5", "pdf-font", "pdf-object", + "pdf-object-collection", "pdf-page", "pdf-parser", "thiserror 2.0.12", @@ -1948,6 +1949,15 @@ dependencies = [ "image", "jpeg2k", "num-traits", + "thiserror 2.0.12", +] + +[[package]] +name = "pdf-object-collection" +version = "0.1.0" +dependencies = [ + "num-traits", + "pdf-object", "serde", "serde_json", "thiserror 2.0.12", @@ -1964,6 +1974,7 @@ dependencies = [ "pdf-font", "pdf-graphics", "pdf-object", + "pdf-object-collection", "pdf-postscript", "thiserror 2.0.12", ] diff --git a/crates/pdf-canvas/src/canvas_external_object_ops.rs b/crates/pdf-canvas/src/canvas_external_object_ops.rs index bbcb3ea..5de65c4 100644 --- a/crates/pdf-canvas/src/canvas_external_object_ops.rs +++ b/crates/pdf-canvas/src/canvas_external_object_ops.rs @@ -228,7 +228,7 @@ impl XObjectOps for PdfCanvas<'_, B> { .resources .ok_or(PdfCanvasError::MissingPageResources)?; - match resources.xobjects.get(xobject_name) { + match resources.xobject(xobject_name) { Some(XObject::Image(image)) => self.render_image_xobject(image), Some(XObject::Form(form)) => self.render_content_stream( &form.content_stream.operations, diff --git a/crates/pdf-canvas/src/canvas_graphics_state_ops.rs b/crates/pdf-canvas/src/canvas_graphics_state_ops.rs index 942ad6a..1b96394 100644 --- a/crates/pdf-canvas/src/canvas_graphics_state_ops.rs +++ b/crates/pdf-canvas/src/canvas_graphics_state_ops.rs @@ -72,8 +72,7 @@ impl GraphicsStateOps for PdfCanvas<'_, B> { .ok_or(PdfCanvasError::MissingPageResources)?; let states = resources - .external_graphics_states - .get(dict_name) + .external_graphics_state(dict_name) .ok_or_else(|| PdfCanvasError::GraphicsStateNotFound(dict_name.to_string()))?; for state in &states.params { diff --git a/crates/pdf-canvas/src/canvas_text_ops.rs b/crates/pdf-canvas/src/canvas_text_ops.rs index a3ee412..d7ca8bf 100644 --- a/crates/pdf-canvas/src/canvas_text_ops.rs +++ b/crates/pdf-canvas/src/canvas_text_ops.rs @@ -102,7 +102,7 @@ impl TextStateOps for PdfCanvas<'_, B> { self.current_state_mut()?.text_state.font_size = size; if let Some(resources) = self.current_state()?.resources - && let Some(font) = resources.fonts.get(font_name) + && let Some(font) = resources.font(font_name) { self.current_state_mut()?.text_state.font = Some(font); return Ok(()); diff --git a/crates/pdf-canvas/src/pdf_canvas.rs b/crates/pdf-canvas/src/pdf_canvas.rs index 44dbb20..ca4db12 100644 --- a/crates/pdf-canvas/src/pdf_canvas.rs +++ b/crates/pdf-canvas/src/pdf_canvas.rs @@ -462,7 +462,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> { let Some(pattern) = self .current_state()? .resources - .and_then(|r| r.patterns.get(pattern_name)) + .and_then(|r| r.pattern(pattern_name)) else { return Err(PdfCanvasError::PatternNotFound(pattern_name.to_string())); }; @@ -483,7 +483,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> { let Some(pattern) = self .current_state()? .resources - .and_then(|r| r.patterns.get(pattern_name)) + .and_then(|r| r.pattern(pattern_name)) else { return Err(PdfCanvasError::PatternNotFound(pattern_name.to_string())); }; diff --git a/crates/pdf-canvas/src/shading.rs b/crates/pdf-canvas/src/shading.rs index 9bf6e11..848ab2f 100644 --- a/crates/pdf-canvas/src/shading.rs +++ b/crates/pdf-canvas/src/shading.rs @@ -8,7 +8,7 @@ impl ShadingOps for PdfCanvas<'_, B> { fn paint_shading(&mut self, shading_name: &str) -> Result<(), Self::ErrorType> { let state = self.current_state()?; - let Some(shading) = state.resources.and_then(|r| r.shadings.get(shading_name)) else { + let Some(shading) = state.resources.and_then(|r| r.shading(shading_name)) else { return Err(PdfCanvasError::PatternNotFound(shading_name.to_string())); }; diff --git a/crates/pdf-content-stream/src/pdf_operator/operands.rs b/crates/pdf-content-stream/src/pdf_operator/operands.rs index 4631e95..b44ff11 100644 --- a/crates/pdf-content-stream/src/pdf_operator/operands.rs +++ b/crates/pdf-content-stream/src/pdf_operator/operands.rs @@ -1,6 +1,7 @@ -use std::{borrow::Cow, rc::Rc}; - -use pdf_object::{ObjectVariant, dictionary::Dictionary, object_resolver::UnimplementedResolver}; +use pdf_object::{ + dictionary::Dictionary, object_resolver::UnimplementedResolver, object_variant::ObjectVariant, +}; +use std::borrow::Cow; use crate::{TextElement, error::PdfOperatorError}; @@ -65,9 +66,9 @@ impl<'a> Operands<'a> { }) } - pub fn get_dictionary(&mut self) -> Result, PdfOperatorError> { + pub fn get_dictionary(&mut self) -> Result, PdfOperatorError> { self.take_and_map("Dictionary", |value| match value { - ObjectVariant::Dictionary(dict) => Ok(std::rc::Rc::clone(dict)), + ObjectVariant::Dictionary(dict) => Ok(dict.clone()), _ => Err(PdfOperatorError::InvalidOperandType { expected_type: "Dictionary", found_type: value.name(), diff --git a/crates/pdf-content-stream/src/pdf_operator/variants.rs b/crates/pdf-content-stream/src/pdf_operator/variants.rs index e8a0f62..eef673e 100644 --- a/crates/pdf-content-stream/src/pdf_operator/variants.rs +++ b/crates/pdf-content-stream/src/pdf_operator/variants.rs @@ -1,4 +1,5 @@ use pdf_object::object_resolver::UnimplementedResolver; +use pdf_object::object_variant::ObjectVariant; use pdf_parser::{parser::PdfParser, traits::CommentParser}; use pdf_tokenizer::PdfToken; @@ -162,7 +163,7 @@ impl PdfOperatorVariant { /// before parsing. fn parse_operator( name: &str, - operands: &mut Vec, + operands: &mut Vec, ) -> Result { let Some(descriptor) = get_operation_descriptor(name) else { return Err(PdfOperatorError::UnknownOperator(name.to_string())); diff --git a/crates/pdf-document/Cargo.toml b/crates/pdf-document/Cargo.toml index eca9270..be01c0d 100644 --- a/crates/pdf-document/Cargo.toml +++ b/crates/pdf-document/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +pdf-object-collection = { path = "../pdf-object-collection" } pdf-object = { path = "../pdf-object" } pdf-parser = { path = "../pdf-parser" } pdf-page = { path = "../pdf-page" } diff --git a/crates/pdf-document/src/document.rs b/crates/pdf-document/src/document.rs index 2628dc6..75a9c14 100644 --- a/crates/pdf-document/src/document.rs +++ b/crates/pdf-document/src/document.rs @@ -1,10 +1,7 @@ -use pdf_object::object_collection::ObjectCollection; use pdf_page::page::PdfPage; /// Represents a PDF document. pub struct PdfDocument { - /// The collection of all objects in the PDF document. - pub objects: ObjectCollection, /// The pages in the PDF document. pub pages: Vec, } diff --git a/crates/pdf-document/src/encryption.rs b/crates/pdf-document/src/encryption.rs index f7fb06c..cc41ad9 100644 --- a/crates/pdf-document/src/encryption.rs +++ b/crates/pdf-document/src/encryption.rs @@ -216,14 +216,15 @@ impl EncryptDictionary { mod tests { use super::*; use pdf_object::{ - ObjectVariant, dictionary::Dictionary, object_resolver::UnimplementedResolver, + dictionary::Dictionary, object_resolver::UnimplementedResolver, + object_variant::ObjectVariant, }; use std::collections::BTreeMap; fn make_dictionary(entries: Vec<(&str, ObjectVariant)>) -> Dictionary { let mut map = BTreeMap::new(); for (key, value) in entries { - map.insert(key.to_string(), Box::new(value)); + map.insert(key.to_string(), value); } Dictionary::new(map) } diff --git a/crates/pdf-document/src/reader.rs b/crates/pdf-document/src/reader.rs index ee4ca22..152c5fb 100644 --- a/crates/pdf-document/src/reader.rs +++ b/crates/pdf-document/src/reader.rs @@ -1,18 +1,25 @@ -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use crate::decryption::{DecryptionError, DocumentDecryptor}; use crate::document::PdfDocument; +use pdf_object::indirect_object::IndirectObject; use pdf_object::object_resolver::{ObjectResolver, UnimplementedResolver}; use pdf_object::{ - ObjectVariant, cross_reference_table::{CrossReferenceEntry, CrossReferenceStatus, CrossReferenceTable}, + dictionary::Dictionary, error::ObjectError, - object_collection::ObjectCollection, + object_variant::ObjectVariant, stream::StreamObject, trailer::Trailer, - traits::FromDictionary, }; +use pdf_object_collection::object_collection::ObjectCollection; +use pdf_page::content_stream::ContentStream; +use pdf_page::media_box::MediaBox; +use pdf_page::page::PdfPage; use pdf_page::pages::{PdfPages, PdfPagesError}; +use pdf_page::resource::Resource; +use pdf_page::resource_cache::ResourceCache; +use pdf_page::resources::Resources; use pdf_parser::{ error::ParserError, header::HeaderError, parser::PdfParser, traits::HeaderParser, }; @@ -60,7 +67,7 @@ impl PdfReader { /// 4. Loads all objects referenced in the xref table /// 5. Extracts the document catalog and page tree /// - /// # Arguments + /// # Parameters /// /// - `input`: Raw PDF file bytes /// - `password`: The document password (user or owner password) @@ -94,10 +101,13 @@ impl PdfReader { } // Build the cross-reference index - let CrossReferenceTable { entries, trailer } = build_xref_index(&mut parser)?; + let CrossReferenceTable { + entries, + mut trailer, + } = build_xref_index(&mut parser)?; // Check for encryption and handle it before loading other objects. - let decryptor = if let Some(encrypt_ref) = trailer.dictionary.get("Encrypt") { + let decryptor = if let Some(encrypt_ref) = trailer.dictionary.take("Encrypt") { // Load the encryption object first (it's unencrypted per PDF spec). let encryption = load_encrypt_dictionary(encrypt_ref, &entries, &mut parser)?; @@ -119,15 +129,12 @@ impl PdfReader { }; // Load all objects from the xref table, decrypting streams if needed - let objects = load_objects_with_decryption(&entries, &mut parser, decryptor.as_ref())?; + let mut objects = load_objects_with_decryption(&entries, &mut parser, decryptor.as_ref())?; // Extract catalog and page tree - let pages = extract_page_tree(&trailer, &objects)?; + let pages = extract_page_tree(&trailer, &mut objects)?; - Ok(PdfDocument { - objects, - pages: pages.pages, - }) + Ok(PdfDocument { pages }) } } @@ -231,17 +238,29 @@ fn merge_xref_chain( Ok(CrossReferenceTable::new(entries, trailer)) } -/// Extracts the page tree from the document catalog. -/// -/// Follows the chain: Trailer → /Root (Catalog) → /Pages (Page Tree) -/// -/// # Returns +#[derive(Default)] +struct ResourceCacheWrapper { + cache: HashMap, +} + +impl ResourceCache for ResourceCacheWrapper { + fn get(&self, obj_num: &usize) -> Option<&Resource> { + self.cache.get(obj_num) + } + + fn insert(&mut self, obj_num: usize, resource: Resource) { + self.cache.insert(obj_num, resource); + } +} +/// Extracts the page tree from the document catalog using a shared resource cache. /// -/// Returns a `PdfPages` structure containing the document's page hierarchy. +/// Follows the chain: Trailer → /Root (Catalog) → /Pages (Page Tree). +/// A `ResourceCache` is threaded through the traversal so that resources referenced +/// by the same PDF object number are parsed once and shared via `Rc`. fn extract_page_tree( trailer: &Trailer, - objects: &dyn ObjectResolver, -) -> Result { + objects: &mut dyn ObjectResolver, +) -> Result, PdfReaderError> { // Get the document catalog via the /Root entry in the trailer let catalog = trailer .dictionary @@ -251,8 +270,48 @@ fn extract_page_tree( // Get the page tree via the /Pages entry in the catalog let pages_dict = catalog.get_or_err("Pages")?.try_dictionary(objects)?; - // Parse the page tree structure - PdfPages::from_dictionary(pages_dict, objects).map_err(Into::into) + let mut cache = ResourceCacheWrapper::default(); + flatten_page_tree(pages_dict, objects, &mut cache).map_err(Into::into) +} + +/// Recursively traverses the PDF page tree, constructing `PdfPage` objects +/// with shared resources via the provided `ResourceCache`. +fn flatten_page_tree( + dictionary: &Dictionary, + objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, +) -> Result, PdfPagesError> { + let kids_array = dictionary.get_or_err("Kids")?.try_array(objects)?; + + let mut pages = vec![]; + + for value in kids_array { + let dictionary = value.try_dictionary(objects)?; + + match dictionary.get_or_err("Type")?.try_str(objects)?.as_ref() { + PdfPage::KEY => { + let contents = ContentStream::from_dictionary(dictionary, objects)?; + let media_box = MediaBox::from_dictionary(dictionary, objects)?; + let resources = Resources::read(dictionary, objects, cache)?; + + pages.push(PdfPage { + contents, + media_box, + resources, + }); + } + PdfPages::KEY => { + pages.extend(flatten_page_tree(dictionary, objects, cache)?); + } + obj_type => { + return Err(PdfPagesError::UnexpectedObjectTypeInKids { + found_type: obj_type.to_string(), + }); + } + } + } + + Ok(pages) } /// Loads and parses the encryption dictionary from the PDF. @@ -265,7 +324,7 @@ fn extract_page_tree( /// to locate and parse that object first before we can understand how to decrypt /// other objects. /// -/// # Arguments +/// # Parameters /// /// - `encrypt_ref`: The `/Encrypt` entry from the trailer (usually an indirect reference). /// - `entries`: The cross-reference table entries for locating objects. @@ -275,7 +334,7 @@ fn extract_page_tree( /// /// Returns an `EncryptDictionary` containing the encryption parameters. fn load_encrypt_dictionary( - encrypt_ref: &ObjectVariant, + encrypt_ref: ObjectVariant, entries: &BTreeMap, parser: &mut PdfParser, ) -> Result { @@ -284,11 +343,11 @@ fn load_encrypt_dictionary( ObjectVariant::Reference(obj_num) => { // Look up the object in the xref table let entry = entries - .get(obj_num) - .ok_or(ObjectError::FailedResolveObjectReference { obj_num: *obj_num })?; + .get(&obj_num) + .ok_or(ObjectError::FailedResolveObjectReference { obj_num })?; if entry.status != CrossReferenceStatus::Normal { - return Err(ObjectError::FailedResolveObjectReference { obj_num: *obj_num }.into()); + return Err(ObjectError::FailedResolveObjectReference { obj_num }.into()); } // Parse the encryption object at the specified offset @@ -297,8 +356,8 @@ fn load_encrypt_dictionary( // Extract the dictionary from the parsed object match object { ObjectVariant::Dictionary(dict) => dict, - ObjectVariant::IndirectObject(indirect) => match indirect.object.as_ref() { - Some(ObjectVariant::Dictionary(dict)) => std::rc::Rc::clone(dict), + ObjectVariant::IndirectObject(indirect) => match indirect.object { + Some(ObjectVariant::Dictionary(dict)) => dict, _ => { return Err(ObjectError::FailedResolveDictionaryObject { resolved_type: "IndirectObject", @@ -314,7 +373,7 @@ fn load_encrypt_dictionary( } } } - ObjectVariant::Dictionary(dict) => std::rc::Rc::clone(dict), + ObjectVariant::Dictionary(dict) => dict, other => { return Err(ObjectError::FailedResolveDictionaryObject { resolved_type: other.name(), @@ -332,7 +391,7 @@ fn load_encrypt_dictionary( /// The /ID entry is an array of two byte strings that uniquely identify the document. /// The first element is used for encryption key derivation. /// -/// # Arguments +/// # Parameters /// /// - `trailer`: The PDF trailer containing the /ID entry. /// @@ -357,7 +416,7 @@ fn extract_document_id(trailer: &Trailer) -> Result, PdfReaderError> { /// is provided. Only streams are decrypted; strings within dictionaries are decrypted /// separately during object resolution. /// -/// # Arguments +/// # Parameters /// /// - `entries`: The cross-reference table entries. /// - `parser`: The PDF parser for reading object data. @@ -413,13 +472,13 @@ fn decrypt_object( match object { ObjectVariant::IndirectObject(indirect) => { // Check if the inner object is a stream - if let Some(ObjectVariant::Stream(stream)) = &indirect.object { - let decrypted_stream = decrypt_stream_object(stream, decryptor)?; + if let Some(ObjectVariant::Stream(stream)) = indirect.object { + let decrypted_stream = decrypt_stream_object(&stream, decryptor)?; // Create a new IndirectObject with the decrypted stream - let new_indirect = pdf_object::indirect_object::IndirectObject::new( + let new_indirect = IndirectObject::new( indirect.object_number, indirect.generation_number, - Some(ObjectVariant::Stream(std::rc::Rc::new(decrypted_stream))), + Some(ObjectVariant::Stream(decrypted_stream)), ); return Ok(ObjectVariant::IndirectObject(Box::new(new_indirect))); } @@ -428,10 +487,9 @@ fn decrypt_object( } ObjectVariant::Stream(stream) => { let decrypted = decrypt_stream_object(&stream, decryptor)?; - Ok(ObjectVariant::Stream(std::rc::Rc::new(decrypted))) + Ok(ObjectVariant::Stream(decrypted)) } - // Other objects pass through unchanged - // (string decryption would be handled separately during resolution) + // Other objects pass through unchanged. other => Ok(other), } } @@ -455,7 +513,7 @@ fn decrypt_stream_object( Ok(StreamObject::new( stream.object_number, stream.generation_number, - std::rc::Rc::clone(&stream.dictionary), + stream.dictionary.clone(), decrypted_data, stream.filters().cloned(), )) diff --git a/crates/pdf-font/src/encoding.rs b/crates/pdf-font/src/encoding.rs index 8583989..d4aad67 100644 --- a/crates/pdf-font/src/encoding.rs +++ b/crates/pdf-font/src/encoding.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver, traits::FromDictionary}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use thiserror::Error; /// Represents the base encoding of a font. @@ -103,16 +103,11 @@ impl Encoding { } } -impl FromDictionary for Encoding { - const KEY: &'static str = "Encoding"; - type ResultType = Self; - - type ErrorType = EncodingReadError; - - fn from_dictionary( +impl Encoding { + pub fn from_dictionary( dictionary: &pdf_object::dictionary::Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result { let mut encoding = match dictionary.get("BaseEncoding") { Some(base) => { let base_encoding = FontEncoding::from(base.try_str(objects)?); diff --git a/crates/pdf-font/src/font.rs b/crates/pdf-font/src/font.rs index 9a4fdf9..9b4f0d7 100644 --- a/crates/pdf-font/src/font.rs +++ b/crates/pdf-font/src/font.rs @@ -1,9 +1,6 @@ use std::borrow::Cow; -use pdf_object::{ - dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, -}; +use pdf_object::{dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver}; use thiserror::Error; use crate::{ @@ -44,15 +41,13 @@ pub enum Font { TrueType(TrueTypeFont), } -impl FromDictionary for Font { - const KEY: &'static str = "Font"; - type ResultType = Self; - type ErrorType = FontError; +impl Font { + pub const KEY: &'static str = "Font"; - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result { // Determine the font subtype from the dictionary. let subtype = dictionary.get_or_err("Subtype")?.try_str(objects)?; diff --git a/crates/pdf-font/src/glyph_widths_map.rs b/crates/pdf-font/src/glyph_widths_map.rs index 5c4ff85..8f1d039 100644 --- a/crates/pdf-font/src/glyph_widths_map.rs +++ b/crates/pdf-font/src/glyph_widths_map.rs @@ -1,4 +1,6 @@ -use pdf_object::{ObjectVariant, error::ObjectError, object_resolver::ObjectResolver}; +use pdf_object::{ + error::ObjectError, object_resolver::ObjectResolver, object_variant::ObjectVariant, +}; use std::collections::BTreeMap; use thiserror::Error; @@ -242,7 +244,7 @@ impl GlyphWidthsMap { #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { use super::*; - use pdf_object::{ObjectVariant, object_resolver::UnimplementedResolver}; + use pdf_object::{object_resolver::UnimplementedResolver, object_variant::ObjectVariant}; // Helper to create a pdf_object::Value::Number for i64 fn num_i64(n: i64) -> ObjectVariant { diff --git a/crates/pdf-font/src/simple_font_glyph_map.rs b/crates/pdf-font/src/simple_font_glyph_map.rs index 95b0eb0..4daf0ca 100644 --- a/crates/pdf-font/src/simple_font_glyph_map.rs +++ b/crates/pdf-font/src/simple_font_glyph_map.rs @@ -1,20 +1,18 @@ use std::collections::HashMap; -use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver, traits::FromDictionary}; +use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver}; use crate::font::FontError; pub struct SimpleFontGlyphWidthsMap; -impl FromDictionary for SimpleFontGlyphWidthsMap { +impl SimpleFontGlyphWidthsMap { const KEY: &'static str = "Widths"; - type ResultType = Option>; - type ErrorType = FontError; - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result>, FontError> { // Read required fields /FirstChar entry. let first_char = dictionary .get_or_err("FirstChar")? diff --git a/crates/pdf-font/src/true_type_font.rs b/crates/pdf-font/src/true_type_font.rs index 6d9ffbf..b0fcf3a 100644 --- a/crates/pdf-font/src/true_type_font.rs +++ b/crates/pdf-font/src/true_type_font.rs @@ -1,9 +1,6 @@ use std::{borrow::Cow, collections::HashMap}; -use pdf_object::{ - dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, -}; +use pdf_object::{dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver}; use crate::{flags::FontFlags, font::FontError, simple_font_glyph_map::SimpleFontGlyphWidthsMap}; @@ -14,15 +11,11 @@ pub struct TrueTypeFont { pub widths: Option>, } -impl FromDictionary for TrueTypeFont { - const KEY: &'static str = "Font"; - type ResultType = Self; - type ErrorType = FontError; - - fn from_dictionary( +impl TrueTypeFont { + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result { // Read embedded font file. let font_file = Self::read_font_file(dictionary, objects)?.to_vec(); // Read the `/Widths` entry. diff --git a/crates/pdf-font/src/type0_font.rs b/crates/pdf-font/src/type0_font.rs index 79d013b..e398de1 100644 --- a/crates/pdf-font/src/type0_font.rs +++ b/crates/pdf-font/src/type0_font.rs @@ -1,7 +1,4 @@ -use pdf_object::{ - dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, -}; +use pdf_object::{dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver}; use crate::{ encoding::FontEncoding, @@ -56,16 +53,11 @@ pub enum Type0FontError { InvalidDescendantFonts(&'static str), } -impl FromDictionary for Type0Font { - const KEY: &'static str = "Font"; - - type ResultType = Self; - type ErrorType = FontError; - - fn from_dictionary( +impl Type0Font { + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result { // Extract the optional `/Encoding` entry which specifies the CMap used to map // character codes to CIDs. Common values include "Identity-H" and "Identity-V". let encoding = dictionary diff --git a/crates/pdf-font/src/type1_font.rs b/crates/pdf-font/src/type1_font.rs index de9d57c..c6d2f00 100644 --- a/crates/pdf-font/src/type1_font.rs +++ b/crates/pdf-font/src/type1_font.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use pdf_object::{ - ObjectVariant, dictionary::Dictionary, object_resolver::ObjectResolver, traits::FromDictionary, + dictionary::Dictionary, object_resolver::ObjectResolver, object_variant::ObjectVariant, }; use crate::{ @@ -24,15 +24,11 @@ pub struct Type1Font { pub encoding: Encoding, } -impl FromDictionary for Type1Font { - const KEY: &'static str = "Font"; - type ResultType = Self; - type ErrorType = FontError; - - fn from_dictionary( +impl Type1Font { + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result { // Read embedded font file. let font_file = Self::read_font_file(dictionary, objects)?; diff --git a/crates/pdf-font/src/type3_font.rs b/crates/pdf-font/src/type3_font.rs index 56625d4..0bf093a 100644 --- a/crates/pdf-font/src/type3_font.rs +++ b/crates/pdf-font/src/type3_font.rs @@ -2,8 +2,8 @@ use std::collections::HashMap; use pdf_content_stream::{error::PdfOperatorError, pdf_operator::PdfOperatorVariant}; use pdf_object::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, + dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, + object_variant::ObjectVariant, }; use thiserror::Error; @@ -40,15 +40,11 @@ pub enum Type3FontError { EncodingReadError(#[from] EncodingReadError), } -impl FromDictionary for Type3Font { - const KEY: &'static str = "Font"; - type ResultType = Self; - type ErrorType = Type3FontError; - - fn from_dictionary( +impl Type3Font { + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result { let font_matrix = dictionary .get_or_err("FontMatrix")? .try_array_of::(objects)?; diff --git a/crates/pdf-object-collection/Cargo.toml b/crates/pdf-object-collection/Cargo.toml new file mode 100644 index 0000000..0a616d5 --- /dev/null +++ b/crates/pdf-object-collection/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "pdf-object-collection" +version = "0.1.0" +edition = "2024" + +# Inherit all lint configurations from the workspace root +[lints] +workspace = true + +[features] +default = [] +json = ["serde", "serde_json"] + +[dependencies] +pdf-object = { path = "../pdf-object" } +num-traits = "0.2.19" +thiserror = "2.0.12" +serde = { version = "1.0", features = ["derive"], optional = true } +serde_json = { version = "1.0", optional = true } diff --git a/crates/pdf-object-collection/src/lib.rs b/crates/pdf-object-collection/src/lib.rs new file mode 100644 index 0000000..18538f2 --- /dev/null +++ b/crates/pdf-object-collection/src/lib.rs @@ -0,0 +1 @@ +pub mod object_collection; diff --git a/crates/pdf-object/src/object_collection.rs b/crates/pdf-object-collection/src/object_collection.rs similarity index 97% rename from crates/pdf-object/src/object_collection.rs rename to crates/pdf-object-collection/src/object_collection.rs index a6968cd..b995a63 100644 --- a/crates/pdf-object/src/object_collection.rs +++ b/crates/pdf-object-collection/src/object_collection.rs @@ -1,7 +1,6 @@ -use crate::{ - ObjectVariant, error::ObjectError, indirect_object::IndirectObject, - object_resolver::ObjectResolver, -}; +use pdf_object::indirect_object::IndirectObject; +use pdf_object::object_resolver::ObjectResolver; +use pdf_object::{error::ObjectError, object_variant::ObjectVariant}; use std::collections::HashMap; #[cfg(feature = "json")] @@ -225,7 +224,7 @@ impl ObjectCollection { /// Converts a `Dictionary` to a `serde_json::Value`. #[cfg(feature = "json")] - fn dictionary_to_json(dict: &crate::dictionary::Dictionary) -> JsonValue { + fn dictionary_to_json(dict: &pdf_object::dictionary::Dictionary) -> JsonValue { let mut map = serde_json::Map::new(); for (key, value) in &dict.dictionary { map.insert(key.clone(), Self::object_variant_to_json(value.as_ref())); diff --git a/crates/pdf-object/Cargo.toml b/crates/pdf-object/Cargo.toml index 03fddd6..890ed98 100644 --- a/crates/pdf-object/Cargo.toml +++ b/crates/pdf-object/Cargo.toml @@ -9,13 +9,10 @@ workspace = true [features] default = [] -json = ["serde", "serde_json"] [dependencies] num-traits = "0.2.19" thiserror = "2.0.12" flate2 = "1.1.1" jpeg2k = "0.10.1" -image = { version = "0.25.9", default-features = false, features = ["jpeg"] } -serde = { version = "1.0", features = ["derive"], optional = true } -serde_json = { version = "1.0", optional = true } \ No newline at end of file +image = { version = "0.25.9", default-features = false, features = ["jpeg"] } \ No newline at end of file diff --git a/crates/pdf-object/src/dictionary.rs b/crates/pdf-object/src/dictionary.rs index e4db4a7..f35658d 100644 --- a/crates/pdf-object/src/dictionary.rs +++ b/crates/pdf-object/src/dictionary.rs @@ -1,33 +1,56 @@ use std::collections::BTreeMap; -use crate::{ObjectVariant, error::ObjectError}; +use crate::{error::ObjectError, object_variant::ObjectVariant}; #[derive(Debug, PartialEq, Clone)] pub struct Dictionary { - pub dictionary: BTreeMap>, + pub dictionary: BTreeMap, + pub object_number: usize, } impl Dictionary { - pub fn new(dictionary: BTreeMap>) -> Self { - Dictionary { dictionary } + pub fn new(dictionary: BTreeMap) -> Self { + Dictionary { + dictionary, + object_number: 0, + } } /// Returns a reference to the value associated with the given key, if present. /// - /// Parameters: - /// - `key`: The dictionary entry name to look up. + /// # Parameters: + /// + /// - `key`: The dictionary entry name to look up. + /// + /// # Returns /// - /// Returns `Some(&ObjectVariant)` when the key exists, or `None` if it does not. + /// Returns an optional reference to [`ObjectVariant`] when the key exists, or `None` if it does not. pub fn get(&self, key: &str) -> Option<&ObjectVariant> { - self.dictionary.get(key).map(|b| b.as_ref()) + self.dictionary.get(key) + } + + /// Removes and returns the value associated with the given key, if present. + /// + /// # Parameters + /// + /// - `key`: The dictionary entry name to remove and return. + /// + /// # Returns + /// + /// Returns an `Option` containing the [`ObjectVariant`] if the key exists, or `None` if it does not. + pub fn take(&mut self, key: &str) -> Option { + self.dictionary.remove(key) } - /// Returns a reference to the value for `key`, or an error if the key is missing. + /// Returns a reference to the value associated with the given key, or an error if the key is missing. + /// + /// # Parameters + /// + /// - `key`: The dictionary entry name to look up. /// - /// This is a convenience for required entries where absence should be treated as an error. + /// # Returns /// - /// Errors - /// - `ObjectError::MissingRequiredKey` if the key is not found in the dictionary. + /// Returns `Ok(&ObjectVariant)` if the key exists, or an [`ObjectError::MissingRequiredKey`] if it does not. pub fn get_or_err(&self, key: &str) -> Result<&ObjectVariant, ObjectError> { self.get(key) .ok_or_else(|| ObjectError::MissingRequiredKey { diff --git a/crates/pdf-object/src/filter.rs b/crates/pdf-object/src/filter.rs index 84e6ead..c801318 100644 --- a/crates/pdf-object/src/filter.rs +++ b/crates/pdf-object/src/filter.rs @@ -1,8 +1,8 @@ use std::borrow::Cow; use crate::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, + dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, + object_variant::ObjectVariant, }; /// Represents the compression filter applied to a stream or image in a PDF. @@ -60,16 +60,13 @@ impl From<&str> for Filter { /// /// This corresponds to the `/Filter` entry in a PDF Image XObject's dictionary. /// The filter specifies the algorithm used to decompress the raw image data. -impl FromDictionary for Filter { +impl Filter { const KEY: &'static str = "Filter"; - type ResultType = Option>; - type ErrorType = ObjectError; - - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result>, ObjectError> { let Some(filter_obj) = dictionary.get(Self::KEY) else { return Ok(None); }; diff --git a/crates/pdf-object/src/indirect_object.rs b/crates/pdf-object/src/indirect_object.rs index 9f5aac1..8589642 100644 --- a/crates/pdf-object/src/indirect_object.rs +++ b/crates/pdf-object/src/indirect_object.rs @@ -1,4 +1,4 @@ -use crate::ObjectVariant; +use crate::object_variant::ObjectVariant; /// Represents an indirect object in a PDF file. /// An indirect object is a data structure that can be referenced by other objects. diff --git a/crates/pdf-object/src/lib.rs b/crates/pdf-object/src/lib.rs index f884b6b..b45d2c7 100644 --- a/crates/pdf-object/src/lib.rs +++ b/crates/pdf-object/src/lib.rs @@ -3,12 +3,8 @@ pub mod dictionary; pub mod error; pub mod filter; pub mod indirect_object; -pub mod object_collection; pub mod object_resolver; pub mod object_variant; pub mod stream; pub mod trailer; -pub mod traits; pub mod version; - -pub use object_variant::ObjectVariant; diff --git a/crates/pdf-object/src/object_resolver.rs b/crates/pdf-object/src/object_resolver.rs index 79a5c32..04b6cee 100644 --- a/crates/pdf-object/src/object_resolver.rs +++ b/crates/pdf-object/src/object_resolver.rs @@ -1,4 +1,4 @@ -use crate::{ObjectVariant, error::ObjectError}; +use crate::{error::ObjectError, object_variant::ObjectVariant}; pub trait ObjectResolver { /// Resolves an object reference to its underlying object. diff --git a/crates/pdf-object/src/object_variant.rs b/crates/pdf-object/src/object_variant.rs index 74b93bb..dd44571 100644 --- a/crates/pdf-object/src/object_variant.rs +++ b/crates/pdf-object/src/object_variant.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, rc::Rc}; +use std::borrow::Cow; use num_traits::FromPrimitive; @@ -17,7 +17,7 @@ use crate::trailer::Trailer; #[derive(Debug, PartialEq, Clone)] pub enum ObjectVariant { /// A PDF dictionary object. - Dictionary(Rc), + Dictionary(Box), /// A PDF array of objects. Array(Vec), /// A literal string (enclosed in parentheses in PDF syntax). @@ -47,7 +47,7 @@ pub enum ObjectVariant { /// An indirect reference pointing to an object number. Reference(usize), /// A stream object, which may have associated dictionary and data. - Stream(Rc), + Stream(StreamObject), } impl ObjectVariant { @@ -58,7 +58,7 @@ impl ObjectVariant { /// /// # Parameters /// - /// - `objects`: A reference to the `ObjectCollection` used for resolving references. + /// - `objects`: A reference to the `ObjectResolver` used for resolving references. /// /// # Returns /// @@ -88,7 +88,7 @@ impl ObjectVariant { /// /// # Parameters /// - /// - `objects`: A reference to the `ObjectCollection` used for resolving references. + /// - `objects`: A reference to the `ObjectResolver` used for resolving references. /// /// # Returns /// @@ -105,7 +105,7 @@ impl ObjectVariant { }; match object { - ObjectVariant::Stream(s) => Ok(s.as_ref()), + ObjectVariant::Stream(s) => Ok(s), _ => Err(ObjectError::TypeMismatch("Stream", object.name())), } } @@ -117,7 +117,7 @@ impl ObjectVariant { /// /// # Parameters /// - /// - `objects`: A reference to the `ObjectCollection` used for resolving references. + /// - `objects`: A reference to the `ObjectResolver` used for resolving references. /// /// # Returns /// @@ -146,7 +146,7 @@ impl ObjectVariant { /// /// # Parameters /// - /// - `objects`: A reference to the `ObjectCollection` used for resolving references. + /// - `objects`: A reference to the `ObjectResolver` used for resolving references. /// /// # Returns /// @@ -212,7 +212,7 @@ impl ObjectVariant { /// /// # Parameters /// - /// - `objects`: A reference to the `ObjectCollection` used for resolving references. + /// - `objects`: A reference to the `ObjectResolver` used for resolving references. /// /// # Returns /// @@ -262,7 +262,7 @@ impl ObjectVariant { /// /// # Parameters /// - /// - `objects`: A reference to the `ObjectCollection` used for resolving references. + /// - `objects`: A reference to the `ObjectResolver` used for resolving references. /// /// # Returns /// @@ -356,10 +356,15 @@ impl ObjectVariant { } /// Returns the object number if this is a `Reference`. - pub fn try_reference(&self) -> Result { + pub fn try_object_number(&self) -> Result { match self { ObjectVariant::Reference(value) => Ok(*value), - _ => Err(ObjectError::TypeMismatch("Reference", self.name())), + ObjectVariant::Dictionary(value) => Ok(value.object_number), + ObjectVariant::Stream(value) => Ok(value.object_number), + _ => Err(ObjectError::TypeMismatch( + "Reference or Object with number", + self.name(), + )), } } diff --git a/crates/pdf-object/src/stream.rs b/crates/pdf-object/src/stream.rs index 135822d..70d2fb9 100644 --- a/crates/pdf-object/src/stream.rs +++ b/crates/pdf-object/src/stream.rs @@ -1,7 +1,6 @@ -use std::{borrow::Cow, rc::Rc}; - use crate::error::ObjectError; use crate::{dictionary::Dictionary, filter::Filter}; +use std::borrow::Cow; /// Represents a PDF stream object. /// @@ -16,7 +15,7 @@ pub struct StreamObject { /// The generation number, used for PDF incremental updates. pub generation_number: usize, /// The dictionary associated with this stream. - pub dictionary: Rc, + pub dictionary: Box, /// The raw, uncompressed, byte data of the stream. data: Vec, /// The filters applied to the stream data. @@ -28,7 +27,7 @@ impl StreamObject { pub fn new( object_number: usize, generation_number: usize, - dictionary: Rc, + dictionary: Box, data: Vec, filters: Option>, ) -> Self { diff --git a/crates/pdf-object/src/trailer.rs b/crates/pdf-object/src/trailer.rs index a57d95c..31b0f42 100644 --- a/crates/pdf-object/src/trailer.rs +++ b/crates/pdf-object/src/trailer.rs @@ -1,5 +1,3 @@ -use std::rc::Rc; - use crate::dictionary::Dictionary; /// Represents the trailer of a PDF document. @@ -10,7 +8,7 @@ use crate::dictionary::Dictionary; #[derive(Debug, PartialEq, Clone)] pub struct Trailer { /// The dictionary object containing the trailer information. - pub dictionary: Rc, + pub dictionary: Box, /// The byte offset from the beginning of the file to the start of /// the cross-reference table (`xref` section), used for locating /// objects within the PDF. @@ -18,7 +16,7 @@ pub struct Trailer { } impl Trailer { - pub fn new(dictionary: Rc, offset: usize) -> Self { + pub fn new(dictionary: Box, offset: usize) -> Self { Trailer { dictionary, offset } } } diff --git a/crates/pdf-object/src/traits.rs b/crates/pdf-object/src/traits.rs deleted file mode 100644 index f19ed99..0000000 --- a/crates/pdf-object/src/traits.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::str; - -use crate::{dictionary::Dictionary, object_resolver::ObjectResolver}; - -/// A trait for types that can be constructed from a PDF [`Dictionary`]. -/// -/// This trait is used to abstract the process of extracting and parsing -/// specific entries from a PDF dictionary, potentially resolving indirect -/// objects using an [`ObjectCollection`]. -pub trait FromDictionary { - /// The key in the PDF dictionary that this type is responsible for parsing. - const KEY: &'static str; - - /// The type that will be produced after successfully parsing the dictionary entry. - type ResultType; - - /// The type of error that can occur during parsing. - type ErrorType; - - /// Attempts to construct an instance of `Self::ResultType` from the given PDF dictionary. - /// - /// # Parameters - /// - /// - `dictionary`: A reference to the PDF [`Dictionary`] to parse. - /// - `objects`: A reference to the [`ObjectCollection`] used to resolve any indirect objects. - /// - /// # Returns - /// - /// A `Result` containing the parsed `Self::ResultType` on success, or a [`ErrorType`] on failure. - fn from_dictionary( - dictionary: &Dictionary, - objects: &dyn ObjectResolver, - ) -> Result; -} diff --git a/crates/pdf-page/Cargo.toml b/crates/pdf-page/Cargo.toml index 8dcf550..4e6601f 100644 --- a/crates/pdf-page/Cargo.toml +++ b/crates/pdf-page/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] pdf-object = { path = "../pdf-object" } +pdf-object-collection = { path = "../pdf-object-collection" } pdf-content-stream = { path = "../pdf-content-stream" } pdf-font = { path = "../pdf-font" } pdf-graphics = { path = "../pdf-graphics" } diff --git a/crates/pdf-page/src/color_space.rs b/crates/pdf-page/src/color_space.rs index 052ec3d..8ba4a1c 100644 --- a/crates/pdf-page/src/color_space.rs +++ b/crates/pdf-page/src/color_space.rs @@ -1,6 +1,6 @@ use pdf_object::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, + dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, + object_variant::ObjectVariant, }; use thiserror::Error; @@ -112,15 +112,13 @@ impl ColorSpace { } } -impl FromDictionary for ColorSpace { +impl ColorSpace { const KEY: &'static str = "ColorSpace"; - type ResultType = Option; - type ErrorType = ColorSpaceError; - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result, ColorSpaceError> { let Some(color_space_obj) = dictionary.get(Self::KEY) else { return Ok(None); }; diff --git a/crates/pdf-page/src/content_stream.rs b/crates/pdf-page/src/content_stream.rs index 8ce31f9..bccbd08 100644 --- a/crates/pdf-page/src/content_stream.rs +++ b/crates/pdf-page/src/content_stream.rs @@ -1,19 +1,8 @@ use pdf_content_stream::{error::PdfOperatorError, pdf_operator::PdfOperatorVariant}; use pdf_object::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, + dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, + object_variant::ObjectVariant, }; -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum ContentStreamReadError { - #[error("Unsupported entry type for Content Stream: '{found_type}'")] - UnsupportedEntryType { found_type: &'static str }, - #[error("Error parsing content stream operators: {0}")] - ContentStreamError(#[from] PdfOperatorError), - #[error("{0}")] - ObjectError(#[from] ObjectError), -} /// Represents the content stream of a PDF page, containing a sequence /// of drawing operators. @@ -26,7 +15,7 @@ pub struct ContentStream { fn process_content_stream_array( array: &[ObjectVariant], objects: &dyn ObjectResolver, -) -> Result, ContentStreamReadError> { +) -> Result, PdfOperatorError> { let mut concatenated_ops = Vec::new(); for value_in_array in array.iter() { let data = value_in_array.try_stream(objects)?.data()?; @@ -36,26 +25,21 @@ fn process_content_stream_array( Ok(concatenated_ops) } -impl FromDictionary for ContentStream { - const KEY: &'static str = "Contents"; - type ResultType = Option; - type ErrorType = ContentStreamReadError; - - fn from_dictionary( +impl ContentStream { + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result, PdfOperatorError> { + const KEY: &str = "Contents"; + // Get the optional `/Contents` entry from the page dictionary. - let Some(contents) = dictionary.get(Self::KEY) else { + let Some(contents) = dictionary.get(KEY) else { return Ok(None); }; - // Resolve the /Contents entry if it's an indirect reference. - let contents = objects.resolve_object(contents)?; - // Process the resolved /Contents object. // It should be a Stream or an Array whose payload is one of these. - let operations = match &contents { + let operations = match objects.resolve_object(contents)? { ObjectVariant::Stream(stream) => { let data = stream.data()?; PdfOperatorVariant::from(&data)? @@ -65,9 +49,7 @@ impl FromDictionary for ContentStream { process_content_stream_array(array_obj, objects)? } other => { - return Err(ContentStreamReadError::UnsupportedEntryType { - found_type: other.name(), - }); + return Err(ObjectError::TypeMismatch("Stream or Array", other.name()).into()); } }; diff --git a/crates/pdf-page/src/external_graphics_state.rs b/crates/pdf-page/src/external_graphics_state.rs index c03ac27..96b4898 100644 --- a/crates/pdf-page/src/external_graphics_state.rs +++ b/crates/pdf-page/src/external_graphics_state.rs @@ -1,13 +1,16 @@ use std::borrow::Cow; use pdf_object::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, + dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, + object_variant::ObjectVariant, }; use thiserror::Error; -use crate::xobject::{XObject, XObjectError, XObjectReader}; +use crate::{ + resource_cache::ResourceCache, + xobject::{XObject, XObjectError}, +}; use num_traits::FromPrimitive; use pdf_graphics::{BlendMode, LineCap, LineJoin, MaskMode}; @@ -113,21 +116,16 @@ pub struct ExternalGraphicsState { pub params: Vec, } -impl FromDictionary for ExternalGraphicsState { - const KEY: &'static str = "ExtGState"; - - type ResultType = Self; - - type ErrorType = ExternalGraphicsStateError; - +impl ExternalGraphicsState { /// Parse an ExtGState dictionary into a strongly-typed `ExternalGraphicsState`. /// /// This delegates each key's parsing to small helpers to keep control flow /// and error handling readable. Unknown keys are logged and skipped. - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + cache: &mut dyn ResourceCache, + ) -> Result { let mut params: Vec = Vec::new(); for (name, value) in &dictionary.dictionary { @@ -137,11 +135,11 @@ impl FromDictionary for ExternalGraphicsState { continue; } // Resolve reference (if any). - let resolved = match value.as_ref() { + let resolved = match value { ObjectVariant::Reference(_) => objects.resolve_object(value)?, _ => value, }; - if let Some(param) = parse_entry(name, resolved, objects)? { + if let Some(param) = parse_entry(name, resolved, objects, cache)? { params.push(param); } } @@ -198,7 +196,7 @@ fn parse_font( actual_desc: format!("array with {} elements", arr.len()), }); }; - let font_ref = font_ref.try_reference()?; + let font_ref = font_ref.try_object_number()?; let font_size = font_size.try_number::(objects)?; Ok(ExternalGraphicsStateKey::Font(font_ref, font_size)) } @@ -252,6 +250,7 @@ fn parse_soft_mask( key_name: &str, value: &ObjectVariant, objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, ) -> Result { let smask = match value { ObjectVariant::Dictionary(dict) => { @@ -260,7 +259,7 @@ fn parse_soft_mask( // Parse the "G" key for the `XObject` let stream = dict.get_or_err("G")?.try_stream(objects)?; - let shape = XObject::read_xobject(&stream.dictionary, stream, objects)?; + let shape = XObject::read_xobject(&stream.dictionary, stream, objects, cache)?; Some(SoftMask { mask_type, shape }) } @@ -286,6 +285,7 @@ fn parse_entry( name: &str, value: &ObjectVariant, objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, ) -> Result, ExternalGraphicsStateError> { let parsed = match name { "TR" => ExternalGraphicsStateKey::TransferFunction, @@ -320,7 +320,7 @@ fn parse_entry( "OPM" => ExternalGraphicsStateKey::OverprintMode(value.try_number::(objects)?), "Font" => parse_font(name, value, objects)?, "BM" => parse_blend_mode(value, objects)?, - "SMask" => parse_soft_mask(name, value, objects)?, + "SMask" => parse_soft_mask(name, value, objects, cache)?, "CA" => ExternalGraphicsStateKey::StrokingAlpha(value.try_number::(objects)?), "ca" => ExternalGraphicsStateKey::NonStrokingAlpha(value.try_number::(objects)?), "SA" => ExternalGraphicsStateKey::StrokeAdjustment(value.try_boolean(objects)?), diff --git a/crates/pdf-page/src/form.rs b/crates/pdf-page/src/form.rs index c94b13f..692c714 100644 --- a/crates/pdf-page/src/form.rs +++ b/crates/pdf-page/src/form.rs @@ -3,13 +3,13 @@ use pdf_graphics::rect::Rect; use pdf_graphics::transform::Transform; use pdf_object::error::ObjectError; use pdf_object::stream::StreamObject; -use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver, traits::FromDictionary}; +use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver}; use thiserror::Error; use crate::content_stream::ContentStream; use crate::matrix::Matrix; +use crate::resource_cache::ResourceCache; use crate::resources::{Resources, ResourcesError}; -use crate::xobject::XObjectReader; /// Errors that can occur during parsing of a Form XObject. #[derive(Debug, Error)] @@ -34,14 +34,13 @@ pub struct FormXObject { pub content_stream: ContentStream, } -impl XObjectReader for FormXObject { - type ErrorType = FormXObjectError; - +impl FormXObject { /// Parses a Form XObject from its dictionary and stream data. - fn read_xobject( + pub fn read_xobject( dictionary: &Dictionary, stream_data: &StreamObject, objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, ) -> Result { // Retrieve the `/BBox` entry. let bbox = Rect::from( @@ -54,7 +53,7 @@ impl XObjectReader for FormXObject { let matrix = Matrix::from_dictionary(dictionary, objects)?; // Parse the `/Resources` entry if present, mapping any errors. - let resources = Resources::from_dictionary(dictionary, objects).map_err(|err| { + let resources = Resources::read(dictionary, objects, cache).map_err(|err| { FormXObjectError::ResourcesError { source: Box::new(err), } diff --git a/crates/pdf-page/src/functions/exponential_interpolation.rs b/crates/pdf-page/src/functions/exponential_interpolation.rs index f1cd8d4..1229cef 100644 --- a/crates/pdf-page/src/functions/exponential_interpolation.rs +++ b/crates/pdf-page/src/functions/exponential_interpolation.rs @@ -1,4 +1,4 @@ -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use crate::functions::{ Function, FunctionImpl, FunctionInterpolationError, FunctionReadError, clamp_and_normalize, diff --git a/crates/pdf-page/src/functions/mod.rs b/crates/pdf-page/src/functions/mod.rs index a13c01c..a7bafa2 100644 --- a/crates/pdf-page/src/functions/mod.rs +++ b/crates/pdf-page/src/functions/mod.rs @@ -6,7 +6,9 @@ //! - Type 3: Stitching functions (combining multiple functions) //! - Type 4: PostScript Calculator functions -use pdf_object::{ObjectVariant, error::ObjectError, object_resolver::ObjectResolver}; +use pdf_object::{ + error::ObjectError, object_resolver::ObjectResolver, object_variant::ObjectVariant, +}; use pdf_postscript::calculator::CalcError; use thiserror::Error; diff --git a/crates/pdf-page/src/functions/postscript_calculator.rs b/crates/pdf-page/src/functions/postscript_calculator.rs index f1690df..43eef11 100644 --- a/crates/pdf-page/src/functions/postscript_calculator.rs +++ b/crates/pdf-page/src/functions/postscript_calculator.rs @@ -1,5 +1,5 @@ use num_traits::ToPrimitive; -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use pdf_postscript::operator::Operator; use crate::functions::{ diff --git a/crates/pdf-page/src/functions/sampled.rs b/crates/pdf-page/src/functions/sampled.rs index d2f8109..e0d64a2 100644 --- a/crates/pdf-page/src/functions/sampled.rs +++ b/crates/pdf-page/src/functions/sampled.rs @@ -1,6 +1,6 @@ use num_derive::FromPrimitive; use num_traits::{FromPrimitive, ToPrimitive}; -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use crate::functions::{ Function, FunctionImpl, FunctionInterpolationError, FunctionReadError, ensure_stream_len, diff --git a/crates/pdf-page/src/functions/stitching.rs b/crates/pdf-page/src/functions/stitching.rs index c911213..c38e8ac 100644 --- a/crates/pdf-page/src/functions/stitching.rs +++ b/crates/pdf-page/src/functions/stitching.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use crate::functions::{ Function, FunctionImpl, FunctionInterpolationError, FunctionReadError, get_pair, diff --git a/crates/pdf-page/src/image.rs b/crates/pdf-page/src/image.rs index 43a9466..8f6d50c 100644 --- a/crates/pdf-page/src/image.rs +++ b/crates/pdf-page/src/image.rs @@ -14,13 +14,14 @@ use pdf_graphics::PixelFormat; use pdf_object::{ dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - stream::StreamObject, traits::FromDictionary, + stream::StreamObject, }; use thiserror::Error; use crate::{ color_space::{ColorSpace, ColorSpaceError}, - xobject::{XObject, XObjectError, XObjectReader}, + resource_cache::ResourceCache, + xobject::{XObject, XObjectError}, }; /// Errors that can occur when parsing or processing PDF Image XObjects. @@ -92,9 +93,7 @@ pub struct ImageXObject { pub color_space: Option, } -impl XObjectReader for ImageXObject { - type ErrorType = ImageXObjectError; - +impl ImageXObject { /// Parses an Image XObject from a PDF stream dictionary and data. /// /// This method extracts all required and optional image properties from the @@ -117,11 +116,12 @@ impl XObjectReader for ImageXObject { /// - An unsupported filter or filter combination is encountered /// - The color space cannot be parsed /// - The soft mask is not a valid Image XObject - fn read_xobject( + pub fn read_xobject( dictionary: &Dictionary, stream_data: &StreamObject, objects: &dyn ObjectResolver, - ) -> Result { + cache: &mut dyn ResourceCache, + ) -> Result { // Extract required image properties from the dictionary. let width = dictionary .get_or_err("Width")? @@ -147,7 +147,7 @@ impl XObjectReader for ImageXObject { .ok_or(ImageXObjectError::InvalidImageDimensions { width, height })?; // Parse the optional `/SMask` entry and convert to RGBA if needed. - let smask = Self::parse_smask(dictionary, objects)?; + let smask = Self::parse_smask(dictionary, objects, cache)?; let (data, pixel_format) = if smask.is_some() || num_color_components == 3 { ( @@ -183,6 +183,7 @@ impl ImageXObject { fn parse_smask( dictionary: &Dictionary, objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, ) -> Result>, ImageXObjectError> { let Some(smask_obj) = dictionary.get("SMask") else { return Ok(None); @@ -192,11 +193,9 @@ impl ImageXObject { let stream = smask_obj.try_stream(objects)?; // Recursively parse the SMask as an XObject. - let smask_xobject = - XObject::read_xobject(&stream.dictionary, stream, objects).map_err(|e| { - ImageXObjectError::SMaskReadError { - source: Box::new(e), - } + let smask_xobject = XObject::read_xobject(&stream.dictionary, stream, objects, cache) + .map_err(|e| ImageXObjectError::SMaskReadError { + source: Box::new(e), })?; // Ensure the SMask is actually an Image XObject. match smask_xobject { diff --git a/crates/pdf-page/src/lib.rs b/crates/pdf-page/src/lib.rs index 4048dab..5403558 100644 --- a/crates/pdf-page/src/lib.rs +++ b/crates/pdf-page/src/lib.rs @@ -14,3 +14,5 @@ pub mod shading; pub mod xobject; pub mod functions; +pub mod resource; +pub mod resource_cache; diff --git a/crates/pdf-page/src/matrix.rs b/crates/pdf-page/src/matrix.rs index 1f6e705..ab2e6f6 100644 --- a/crates/pdf-page/src/matrix.rs +++ b/crates/pdf-page/src/matrix.rs @@ -1,21 +1,16 @@ use pdf_graphics::transform::Transform; -use pdf_object::{ - dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, -}; +use pdf_object::{dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver}; pub struct Matrix; -impl FromDictionary for Matrix { +impl Matrix { const KEY: &'static str = "Matrix"; - type ResultType = Option; - type ErrorType = ObjectError; - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { - let Some(matrix_obj) = dictionary.get("Matrix") else { + ) -> Result, ObjectError> { + let Some(matrix_obj) = dictionary.get(Self::KEY) else { return Ok(None); }; diff --git a/crates/pdf-page/src/media_box.rs b/crates/pdf-page/src/media_box.rs index 8e039da..87c2935 100644 --- a/crates/pdf-page/src/media_box.rs +++ b/crates/pdf-page/src/media_box.rs @@ -1,5 +1,5 @@ use pdf_object::error::ObjectError; -use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver, traits::FromDictionary}; +use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver}; /// Defines the page boundaries within a PDF document. /// @@ -28,15 +28,13 @@ impl MediaBox { } } -impl FromDictionary for MediaBox { +impl MediaBox { const KEY: &'static str = "MediaBox"; - type ResultType = Option; - type ErrorType = ObjectError; - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + ) -> Result, ObjectError> { let Some(media_box_obj) = dictionary.get(Self::KEY) else { return Ok(None); }; diff --git a/crates/pdf-page/src/page.rs b/crates/pdf-page/src/page.rs index ccae22a..44e054d 100644 --- a/crates/pdf-page/src/page.rs +++ b/crates/pdf-page/src/page.rs @@ -1,7 +1,8 @@ use crate::{ - content_stream::ContentStream, media_box::MediaBox, pages::PdfPagesError, resources::Resources, + content_stream::ContentStream, media_box::MediaBox, pages::PdfPagesError, + resource_cache::ResourceCache, resources::Resources, }; -use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver, traits::FromDictionary}; +use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver}; /// Represents a single page in a PDF document. /// @@ -18,19 +19,17 @@ pub struct PdfPage { pub resources: Option, } -impl FromDictionary for PdfPage { - const KEY: &'static str = "Page"; +impl PdfPage { + pub const KEY: &'static str = "Page"; - type ResultType = Self; - type ErrorType = PdfPagesError; - - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + cache: &mut dyn ResourceCache, + ) -> Result { let contents = ContentStream::from_dictionary(dictionary, objects)?; let media_box = MediaBox::from_dictionary(dictionary, objects)?; - let resources = Resources::from_dictionary(dictionary, objects)?; + let resources = Resources::read(dictionary, objects, cache)?; Ok(Self { contents, diff --git a/crates/pdf-page/src/pages.rs b/crates/pdf-page/src/pages.rs index ff564de..359b0fa 100644 --- a/crates/pdf-page/src/pages.rs +++ b/crates/pdf-page/src/pages.rs @@ -1,8 +1,6 @@ -use crate::{content_stream::ContentStreamReadError, page::PdfPage, resources::ResourcesError}; -use pdf_object::{ - dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, -}; +use crate::{page::PdfPage, resource_cache::ResourceCache, resources::ResourcesError}; +use pdf_content_stream::error::PdfOperatorError; +use pdf_object::{dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver}; use thiserror::Error; @@ -16,25 +14,21 @@ pub enum PdfPagesError { #[error("{0}")] ObjectError(#[from] ObjectError), #[error("Failed to parse content stream for page: {0}")] - ContentStreamParse(#[from] ContentStreamReadError), + ContentStreamParse(#[from] PdfOperatorError), #[error("Failed to parse resources for page: {0}")] ResourcesParse(#[from] ResourcesError), } -pub struct PdfPages { - pub pages: Vec, -} - -impl FromDictionary for PdfPages { - const KEY: &'static str = "Pages"; +pub struct PdfPages; - type ResultType = Self; - type ErrorType = PdfPagesError; +impl PdfPages { + pub const KEY: &'static str = "Pages"; - fn from_dictionary( + pub fn from_dictionary( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { + cache: &mut dyn ResourceCache, + ) -> Result, PdfPagesError> { // The `/Kids` array is a required entry in a Pages dictionary. It contains // indirect references to child objects, which can be either other Pages nodes // or leaf Page nodes. @@ -46,9 +40,6 @@ impl FromDictionary for PdfPages { // Iterate over each entry in the `/Kids` array. for value in kids_array { - // Each entry must be an indirect reference. We extract its object number - // for use in error messages. - // Resolve the indirect reference to get the child's dictionary. let dictionary = value.try_dictionary(objects)?; @@ -56,14 +47,13 @@ impl FromDictionary for PdfPages { match dictionary.get_or_err("Type")?.try_str(objects)?.as_ref() { PdfPage::KEY => { // If the child is a leaf node (`/Type /Page`), parse it as a `PdfPage`. - let page = PdfPage::from_dictionary(dictionary, objects)?; + let page = PdfPage::from_dictionary(dictionary, objects, cache)?; pages.push(page); } PdfPages::KEY => { // If the child is another branch node (`/Type /Pages`), recursively call this // function to process its children and extend our list of pages. - let pages_obj = PdfPages::from_dictionary(dictionary, objects)?; - pages.extend(pages_obj.pages); + pages.extend(PdfPages::from_dictionary(dictionary, objects, cache)?); } obj_type => { // If the child has an unexpected type, return an error. @@ -74,6 +64,6 @@ impl FromDictionary for PdfPages { } } - Ok(Self { pages }) + Ok(pages) } } diff --git a/crates/pdf-page/src/pattern.rs b/crates/pdf-page/src/pattern.rs index c3a15fa..87da74a 100644 --- a/crates/pdf-page/src/pattern.rs +++ b/crates/pdf-page/src/pattern.rs @@ -1,37 +1,21 @@ use pdf_graphics::{rect::Rect, transform::Transform}; -use pdf_object::{ - dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - stream::StreamObject, traits::FromDictionary, -}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use thiserror::Error; use crate::{ content_stream::ContentStream, - external_graphics_state::{ExternalGraphicsState, ExternalGraphicsStateError}, + external_graphics_state::ExternalGraphicsState, matrix::Matrix, + resource_cache::ResourceCache, resources::{Resources, ResourcesError}, - shading::{Shading, ShadingError}, + shading::Shading, }; /// Defines errors that can occur while parsing a Pattern. #[derive(Debug, Error)] pub enum PatternError { - #[error("Missing required entry in Pattern: /{0}")] - MissingRequiredEntry(&'static str), - #[error("Invalid integer value for /PatternType value: {0}")] - InvalidPatternType(i32), #[error("Invalid value for key '{key}': {value}")] - InvalidValue { key: &'static str, value: String }, - #[error("Failed to parse resources for page: {err}")] - ResourcesParse { err: Box }, - #[error("External Graphics State parsing error: {0}")] - ExternalGraphicsStateError(#[from] ExternalGraphicsStateError), - #[error("Shading parsing error: {0}")] - ShadingError(#[from] ShadingError), - #[error("Error parsing content stream: {0}")] - ContentStreamError(#[from] pdf_content_stream::error::PdfOperatorError), - #[error("{0}")] - ObjectError(#[from] ObjectError), + InvalidValue { key: &'static str, value: i32 }, } /// PaintType for tiling patterns. @@ -43,14 +27,17 @@ pub enum PaintType { Uncolored = 2, } -impl PaintType { - /// Attempts to create a `PaintType` from an integer value, returning `None` if the - /// value is not a valid paint type. - pub fn from_i32(val: i32) -> Option { - match val { - 1 => Some(PaintType::Colored), - 2 => Some(PaintType::Uncolored), - _ => None, +impl TryFrom for PaintType { + type Error = PatternError; + + fn try_from(value: i32) -> Result { + match value { + 1 => Ok(PaintType::Colored), + 2 => Ok(PaintType::Uncolored), + _ => Err(PatternError::InvalidValue { + key: "PaintType", + value, + }), } } } @@ -64,14 +51,17 @@ pub enum PatternType { Shading = 2, } -impl PatternType { - /// Attempts to create a `PatternType` from an integer value, returning `None` if the - /// value is not a valid pattern type. - pub fn from_i32(val: i32) -> Option { - match val { - 1 => Some(PatternType::Tiling), - 2 => Some(PatternType::Shading), - _ => None, +impl TryFrom for PatternType { + type Error = PatternError; + + fn try_from(value: i32) -> Result { + match value { + 1 => Ok(PatternType::Tiling), + 2 => Ok(PatternType::Shading), + _ => Err(PatternError::InvalidValue { + key: "PatternType", + value, + }), } } } @@ -88,15 +78,18 @@ pub enum TilingType { ConstantSpacingFast = 3, } -impl TilingType { - /// Attempts to create a `TilingType` from an integer value, returning `None` if the - /// value is not a valid tiling type. - pub fn from_i32(val: i32) -> Option { - match val { - 1 => Some(TilingType::ConstantSpacing), - 2 => Some(TilingType::NoDistortion), - 3 => Some(TilingType::ConstantSpacingFast), - _ => None, +impl TryFrom for TilingType { + type Error = PatternError; + + fn try_from(value: i32) -> Result { + match value { + 1 => Ok(TilingType::ConstantSpacing), + 2 => Ok(TilingType::NoDistortion), + 3 => Ok(TilingType::ConstantSpacingFast), + _ => Err(PatternError::InvalidValue { + key: "TilingType", + value, + }), } } } @@ -105,6 +98,7 @@ impl TilingType { /// /// Patterns are used as "colors" for filling or stroking paths, allowing for repeating /// graphical figures or smooth color transitions (gradients) to be used. +#[allow(clippy::large_enum_variant)] pub enum Pattern { /// A tiling pattern, which consists of a small graphical figure (a "pattern cell") /// that is replicated at fixed intervals to fill an area. @@ -138,11 +132,28 @@ pub enum Pattern { } impl Pattern { - pub(crate) fn from_dictionary( - dictionary: &Dictionary, + /// Reads and constructs a `Pattern` from a PDF object. + /// + /// This function parses a PDF pattern object, which can be either a tiling pattern or a shading pattern, + /// from the provided `object` using the given `objects` resolver and `cache` for resource management. + /// It extracts all required fields and sub-objects, handling both pattern types as defined by the PDF specification. + /// + /// # Parameters + /// + /// - `object`: The PDF object variant representing the pattern to parse. + /// - `objects`: The object resolver used to resolve indirect references within the PDF. + /// - `cache`: A mutable reference to the resource cache for resolving and storing resources. + /// + /// # Returns + /// + /// Returns a `Result` containing the constructed `Pattern` on success, or a `ResourcesError` if parsing fails. + pub(crate) fn read( + object: &ObjectVariant, objects: &dyn ObjectResolver, - stream: Option<&StreamObject>, - ) -> Result { + cache: &mut dyn ResourceCache, + ) -> Result { + let dictionary = object.try_dictionary(objects)?; + let pattern_type = dictionary .get_or_err("PatternType")? .try_number::(objects)?; @@ -150,30 +161,20 @@ impl Pattern { // Read the transformation matrix for the pattern. Defaults to identity. let matrix = Matrix::from_dictionary(dictionary, objects)?; - match PatternType::from_i32(pattern_type) { - Some(PatternType::Tiling) => { + match PatternType::try_from(pattern_type)? { + PatternType::Tiling => { // Read the `/PaintType` entry. let paint_type_int = dictionary .get_or_err("PaintType")? .try_number::(objects)?; - let paint_type = PaintType::from_i32(paint_type_int).ok_or_else(|| { - PatternError::InvalidValue { - key: "PaintType", - value: paint_type_int.to_string(), - } - })?; + let paint_type = PaintType::try_from(paint_type_int)?; // Read the `/TilingType` entry. let tiling_type_int = dictionary .get_or_err("TilingType")? .try_number::(objects)?; - let tiling_type = TilingType::from_i32(tiling_type_int).ok_or_else(|| { - PatternError::InvalidValue { - key: "TilingType", - value: tiling_type_int.to_string(), - } - })?; + let tiling_type = TilingType::try_from(tiling_type_int)?; // Read the `/BBox` entry. let bbox = dictionary @@ -188,15 +189,9 @@ impl Pattern { let y_step = dictionary.get_or_err("YStep")?.try_number::(objects)?; // Read the `/Resources` entry. Needed by the pattern's content stream. - let resources = Resources::from_dictionary(dictionary, objects) - .map_err(|err| PatternError::ResourcesParse { err: Box::new(err) })? - .ok_or(PatternError::MissingRequiredEntry("Resources"))?; - - let stream_data = stream.ok_or(PatternError::MissingRequiredEntry( - "Stream data for Tiling Pattern", - ))?; + let resources = Resources::read(dictionary, objects, cache)?.unwrap_or_default(); - let stream_data = stream_data.data()?; + let stream_data = object.try_stream(objects)?.data()?; let content_stream = ContentStream { operations: pdf_content_stream::pdf_operator::PdfOperatorVariant::from( @@ -214,7 +209,7 @@ impl Pattern { content_stream, }) } - Some(PatternType::Shading) => { + PatternType::Shading => { let shading_object = dictionary.get_or_err("Shading")?; // Read the shading object that defines the gradient fill. let shading = Shading::from_dictionary(shading_object, objects)?; @@ -224,7 +219,7 @@ impl Pattern { .get("ExtGState") .map(|obj| obj.try_dictionary(objects)) .transpose()? - .map(|ext| ExternalGraphicsState::from_dictionary(ext, objects)) + .map(|ext| ExternalGraphicsState::from_dictionary(ext, objects, cache)) .transpose()?; Ok(Pattern::Shading { @@ -233,7 +228,6 @@ impl Pattern { ext_g_state, }) } - _ => Err(PatternError::InvalidPatternType(pattern_type)), } } } diff --git a/crates/pdf-page/src/resource.rs b/crates/pdf-page/src/resource.rs new file mode 100644 index 0000000..1cf87b7 --- /dev/null +++ b/crates/pdf-page/src/resource.rs @@ -0,0 +1,22 @@ +use crate::{ + external_graphics_state::ExternalGraphicsState, pattern::Pattern, shading::Shading, + xobject::XObject, +}; +use pdf_font::font::Font; +use std::rc::Rc; + +/// Represents a PDF resource used on a page, such as fonts, +/// graphics states, XObjects, patterns, or shadings. +#[derive(Clone)] +pub enum Resource { + /// A font resource used for text rendering. + Font(Rc), + /// An external graphics state resource. + ExternalGraphicsState(Rc), + /// An XObject resource, such as an image or form object. + XObject(Rc), + /// A pattern resource, used for tiling or shading fills. + Pattern(Rc), + /// A shading resource, used for gradient fills and complex color transitions. + Shading(Rc), +} diff --git a/crates/pdf-page/src/resource_cache.rs b/crates/pdf-page/src/resource_cache.rs new file mode 100644 index 0000000..a9a46d6 --- /dev/null +++ b/crates/pdf-page/src/resource_cache.rs @@ -0,0 +1,31 @@ +use crate::resource::Resource; + +/// A trait for managing cached PDF resources by object number. +/// +/// [`ResourceCache`] provides an interface for storing and retrieving PDF +/// resources (such as fonts, images, etc.) associated with their object numbers. +/// This allows efficient reuse and lookup of resources during PDF page processing. +/// Implementors of this trait can define custom caching strategies for resource +/// management. +pub trait ResourceCache { + /// Retrieves a reference to a `Resource` associated with the given object number, + /// if it exists. + /// + /// # Parameters + /// + /// - `obj_num`: The object number used as the key to look up the resource. + /// + /// # Returns + /// + /// An `Option` containing a reference to the `Resource` if found, or `None` if + /// not present. + fn get(&self, obj_num: &usize) -> Option<&Resource>; + + /// Inserts a `Resource` into the cache, associating it with the given object number. + /// + /// # Parameters + /// + /// - `obj_num`: The object number to associate with the resource. + /// - `resource`: The `Resource` to insert into the cache. + fn insert(&mut self, obj_num: usize, resource: Resource); +} diff --git a/crates/pdf-page/src/resources.rs b/crates/pdf-page/src/resources.rs index c97e20c..131ba4c 100644 --- a/crates/pdf-page/src/resources.rs +++ b/crates/pdf-page/src/resources.rs @@ -5,240 +5,324 @@ //! and shadings. use std::collections::HashMap; +use std::rc::Rc; use pdf_font::font::{Font, FontError}; -use pdf_object::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, -}; +use pdf_object::{dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver}; use thiserror::Error; use crate::{ external_graphics_state::{ExternalGraphicsState, ExternalGraphicsStateError}, pattern::{Pattern, PatternError}, + resource::Resource, + resource_cache::ResourceCache, shading::{Shading, ShadingError}, - xobject::{XObject, XObjectError, XObjectReader}, + xobject::{XObject, XObjectError}, }; /// Contains all resources referenced by a PDF content stream. /// -/// The `Resources` struct holds collections of various PDF objects that can be +/// The `Resources` struct holds a unified collection of PDF objects that can be /// referenced by name within content streams, including fonts, graphics states, /// XObjects (images/forms), patterns, and shadings. -/// -/// # PDF Reference -/// See PDF 32000-1:2008 Section 7.8.3 "Resource Dictionaries" #[derive(Default)] -pub struct Resources { - /// Named font resources (key: `/Font`) - pub fonts: HashMap, - /// Named external graphics state resources (key: `/ExtGState`) - pub external_graphics_states: HashMap, - /// Named XObject resources such as images and forms (key: `/XObject`) - pub xobjects: HashMap, - /// Named pattern resources (key: `/Pattern`) - pub patterns: HashMap, - /// Named shading resources (key: `/Shading`) - pub shadings: HashMap, -} +pub struct Resources(HashMap); /// Errors that can occur while parsing a PDF Resources dictionary. #[derive(Debug, Error)] pub enum ResourcesError { - /// Error occurred while parsing a font resource. #[error("Error processing font: {0}")] FontError(#[from] FontError), - /// Error occurred while parsing an external graphics state. #[error("External Graphics State parsing error: {0}")] ExternalGraphicsStateError(#[from] ExternalGraphicsStateError), - /// Error occurred while parsing an XObject. #[error("XObject parsing error: {0}")] XObjectError(#[from] XObjectError), - /// Error occurred while parsing a pattern. #[error("Pattern parsing error: {0}")] PatternError(#[from] PatternError), - /// General PDF object error. #[error("{0}")] ObjectError(#[from] ObjectError), - /// Error occurred while parsing a shading. #[error("Shading parsing error: {0}")] ShadingError(#[from] ShadingError), - /// A resource entry had an unexpected type. - #[error("Invalid type for entry '{entry_name}': expected {expected_type}, found {found_type}")] - InvalidEntryType { - entry_name: &'static str, - expected_type: &'static str, - found_type: &'static str, - }, + #[error("Error parsing content stream: {0}")] + ContentStreamError(#[from] pdf_content_stream::error::PdfOperatorError), } -impl Resources { - /// Attempts to retrieve a sub-dictionary from the resources dictionary. - /// - /// Returns `Ok(None)` if the key doesn't exist, `Ok(Some(dict))` if found, - /// or an error if the value exists but isn't a valid dictionary. - fn get_sub_dictionary<'a>( - resources: &'a Dictionary, - key: &str, - objects: &'a dyn ObjectResolver, - ) -> Result, ResourcesError> { - resources - .get(key) - .map(|entry| entry.try_dictionary(objects)) - .transpose() - .map_err(Into::into) +/// Attempts to retrieve a sub-dictionary from the resources dictionary. +/// +/// # Parameters +/// +/// - `resources`: The main resources dictionary to search within. +/// - `key`: The key of the sub-dictionary to retrieve (e.g., "Font", "Pattern"). +/// - `objects`: The object resolver to resolve indirect references if necessary. +/// +/// # Returns +/// +/// Returns `Ok(None)` if the key doesn't exist, `Ok(Some(dict))` if found, +/// or an error if the value exists but isn't a valid dictionary. +fn get_sub_dictionary<'a>( + resources: &'a Dictionary, + key: &str, + objects: &'a dyn ObjectResolver, +) -> Result, ResourcesError> { + resources + .get(key) + .map(|entry| entry.try_dictionary(objects)) + .transpose() + .map_err(Into::into) +} + +/// Parses all font resources from the `/Font` sub-dictionary. +fn read_fonts( + resources: &Dictionary, + objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, +) -> Result, ResourcesError> { + let Some(font_dict) = get_sub_dictionary(resources, Font::KEY, objects)? else { + return Ok(HashMap::new()); + }; + + let mut result = HashMap::new(); + for (name, value) in &font_dict.dictionary { + let dict = value.try_dictionary(objects)?; + if let Some(cached) = cache.get(&dict.object_number) { + result.insert(name.clone(), cached.clone()); + continue; + } + + let resource = Resource::Font(Rc::new(Font::from_dictionary(dict, objects)?)); + cache.insert(dict.object_number, resource.clone()); + result.insert(name.clone(), resource); } + Ok(result) +} - /// Parses all font resources from the `/Font` sub-dictionary. - fn parse_fonts( - resources: &Dictionary, - objects: &dyn ObjectResolver, - ) -> Result, ResourcesError> { - let Some(font_dict) = Self::get_sub_dictionary(resources, Font::KEY, objects)? else { - return Ok(HashMap::new()); - }; +/// Parses all external graphics state resources from the `/ExtGState` sub-dictionary. +fn read_external_graphics_states( + resources: &Dictionary, + objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, +) -> Result, ResourcesError> { + let Some(ext_gstate_dict) = get_sub_dictionary(resources, "ExtGState", objects)? else { + return Ok(HashMap::new()); + }; + + let mut result = HashMap::new(); + for (name, value) in &ext_gstate_dict.dictionary { + let dict = value.try_dictionary(objects)?; + if let Some(cached) = cache.get(&dict.object_number) { + result.insert(name.clone(), cached.clone()); + continue; + } + + let resource = Resource::ExternalGraphicsState(Rc::new( + ExternalGraphicsState::from_dictionary(dict, objects, cache)?, + )); + cache.insert(dict.object_number, resource.clone()); + result.insert(name.clone(), resource); + } + Ok(result) +} + +/// Parses all pattern resources from the `/Pattern` sub-dictionary. +fn read_patterns( + resources: &Dictionary, + objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, +) -> Result, ResourcesError> { + let Some(pattern_dict) = get_sub_dictionary(resources, "Pattern", objects)? else { + return Ok(HashMap::new()); + }; - font_dict - .dictionary - .iter() - .map(|(name, value)| { - let dict = value.try_dictionary(objects)?; - let font = Font::from_dictionary(dict, objects)?; - Ok((name.clone(), font)) - }) - .collect() + let mut result = HashMap::new(); + for (name, value) in &pattern_dict.dictionary { + let object_number = value.try_object_number()?; + if let Some(cached) = cache.get(&object_number) { + result.insert(name.clone(), cached.clone()); + continue; + } + let pattern = Pattern::read(value, objects, cache)?; + let resource = Resource::Pattern(Rc::new(pattern)); + cache.insert(object_number, resource.clone()); + + result.insert(name.clone(), resource); } + Ok(result) +} - /// Parses all external graphics state resources from the `/ExtGState` sub-dictionary. - fn parse_external_graphics_states( - resources: &Dictionary, - objects: &dyn ObjectResolver, - ) -> Result, ResourcesError> { - let Some(ext_gstate_dict) = Self::get_sub_dictionary(resources, "ExtGState", objects)? - else { - return Ok(HashMap::new()); - }; +/// Parses all XObject resources from the `/XObject` sub-dictionary. +fn read_xobjects( + resources: &Dictionary, + objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, +) -> Result, ResourcesError> { + let Some(xobject_dict) = get_sub_dictionary(resources, "XObject", objects)? else { + return Ok(HashMap::new()); + }; - ext_gstate_dict - .dictionary - .iter() - .map(|(name, value)| { - let dict = value.try_dictionary(objects)?; - let state = ExternalGraphicsState::from_dictionary(dict, objects)?; - Ok((name.clone(), state)) - }) - .collect() + let mut result = HashMap::new(); + for (name, value) in &xobject_dict.dictionary { + let stream = value.try_stream(objects)?; + if let Some(cached) = cache.get(&stream.object_number) { + result.insert(name.clone(), cached.clone()); + continue; + } + + let resource = Resource::XObject(Rc::new(XObject::read_xobject( + &stream.dictionary, + stream, + objects, + cache, + )?)); + cache.insert(stream.object_number, resource.clone()); + result.insert(name.clone(), resource); } + Ok(result) +} - /// Parses all pattern resources from the `/Pattern` sub-dictionary. - /// - /// Patterns can be either dictionaries (Type 2 shading patterns) or - /// streams (Type 1 tiling patterns), so both cases are handled. - fn parse_patterns( - resources: &Dictionary, - objects: &dyn ObjectResolver, - ) -> Result, ResourcesError> { - let Some(pattern_dict) = Self::get_sub_dictionary(resources, "Pattern", objects)? else { - return Ok(HashMap::new()); - }; +/// Parses all shading resources from the `/Shading` sub-dictionary. +fn read_shadings( + resources: &Dictionary, + objects: &dyn ObjectResolver, + cache: &mut dyn ResourceCache, +) -> Result, ResourcesError> { + let Some(shading_dict) = get_sub_dictionary(resources, "Shading", objects)? else { + return Ok(HashMap::new()); + }; - pattern_dict - .dictionary - .iter() - .map(|(name, value)| { - let pattern = match objects.resolve_object(value)? { - ObjectVariant::Dictionary(dict) => { - Pattern::from_dictionary(dict, objects, None)? - } - ObjectVariant::Stream(stream) => { - Pattern::from_dictionary(&stream.dictionary, objects, Some(stream))? - } - other => { - return Err(ResourcesError::InvalidEntryType { - entry_name: "Pattern", - expected_type: "Dictionary or Stream", - found_type: other.name(), - }); - } - }; - Ok((name.clone(), pattern)) - }) - .collect() + let mut result = HashMap::new(); + for (name, value) in &shading_dict.dictionary { + let object_number = value.try_object_number()?; + if let Some(cached) = cache.get(&object_number) { + result.insert(name.clone(), cached.clone()); + continue; + } + let resource = Resource::Shading(Rc::new(Shading::from_dictionary(value, objects)?)); + cache.insert(object_number, resource.clone()); + result.insert(name.clone(), resource); } + Ok(result) +} - /// Parses all XObject resources from the `/XObject` sub-dictionary. +impl Resources { + /// Returns a reference to a font resource by name, if it exists. /// - /// XObjects are always streams containing either images or form content. - fn parse_xobjects( - resources: &Dictionary, - objects: &dyn ObjectResolver, - ) -> Result, ResourcesError> { - let Some(xobject_dict) = Self::get_sub_dictionary(resources, "XObject", objects)? else { - return Ok(HashMap::new()); - }; + /// # Parameters + /// + /// - `name`: The resource name as referenced in the PDF content stream. + /// + /// # Returns + /// + /// An `Option` containing a reference to the [`Font`] if found, or `None` if not present or not a font. + pub fn font(&self, name: &str) -> Option<&Font> { + match self.0.get(name)? { + Resource::Font(font) => Some(font), + _ => None, + } + } - xobject_dict - .dictionary - .iter() - .map(|(name, value)| { - let stream = value.try_stream(objects)?; - let xobject = XObject::read_xobject(&stream.dictionary, stream, objects)?; - Ok((name.clone(), xobject)) - }) - .collect() + /// Returns a reference to an external graphics state resource by name, if it exists. + /// + /// # Parameters + /// + /// - `name`: The resource name as referenced in the PDF content stream. + /// + /// # Returns + /// + /// An `Option` containing a reference to the [`ExternalGraphicsState`] if found, or `None` if not present or not an external graphics state. + pub fn external_graphics_state(&self, name: &str) -> Option<&ExternalGraphicsState> { + match self.0.get(name)? { + Resource::ExternalGraphicsState(state) => Some(state), + _ => None, + } } - /// Parses all shading resources from the `/Shading` sub-dictionary. - fn parse_shadings( - resources: &Dictionary, - objects: &dyn ObjectResolver, - ) -> Result, ResourcesError> { - let Some(shading_dict) = Self::get_sub_dictionary(resources, "Shading", objects)? else { - return Ok(HashMap::new()); - }; + /// Returns a reference to an XObject resource by name, if it exists. + /// + /// # Parameters + /// + /// - `name`: The resource name as referenced in the PDF content stream. + /// + /// # Returns + /// + /// An `Option` containing a reference to the [`XObject`] if found, or `None` if not present or not an XObject. + pub fn xobject(&self, name: &str) -> Option<&XObject> { + match self.0.get(name)? { + Resource::XObject(xobject) => Some(xobject), + _ => None, + } + } - shading_dict - .dictionary - .iter() - .map(|(name, value)| { - let shading = Shading::from_dictionary(value, objects)?; - Ok((name.clone(), shading)) - }) - .collect() + /// Returns a reference to a pattern resource by name, if it exists. + /// + /// # Parameters + /// + /// - `name`: The resource name as referenced in the PDF content stream. + /// + /// # Returns + /// + /// An `Option` containing a reference to the [`Pattern`] if found, or `None` if not present or not a pattern. + pub fn pattern(&self, name: &str) -> Option<&Pattern> { + match self.0.get(name)? { + Resource::Pattern(pattern) => Some(pattern), + _ => None, + } } -} -impl FromDictionary for Resources { - const KEY: &'static str = "Resources"; - type ResultType = Option; - type ErrorType = ResourcesError; + /// Returns a reference to a shading resource by name, if it exists. + /// + /// # Parameters + /// + /// - `name`: The resource name as referenced in the PDF content stream. + /// + /// # Returns + /// + /// An `Option` containing a reference to the [`Shading`] if found, or `None` if not present or not a shading. + pub fn shading(&self, name: &str) -> Option<&Shading> { + match self.0.get(name)? { + Resource::Shading(shading) => Some(shading), + _ => None, + } + } - fn from_dictionary( + /// Reads the `/Resources` dictionary. + /// + /// This function extracts all resource types (fonts, external graphics states, patterns, + /// XObjects, and shadings) referenced in the provided `dictionary`. + /// + /// # Parameters + /// + /// - `dictionary`: The PDF dictionary potentially containing a `/Resources` entry. + /// - `objects`: An object resolver for resolving indirect PDF object references. + /// - `cache`: A mutable resource cache for storing and retrieving parsed resources. + /// + /// # Returns + /// + /// Returns `Ok(Some(Resources))` if resources are found and parsed successfully, `Ok(None)` + /// if no `/Resources` entry exists, or an error if parsing fails for any resource type. + /// + /// # Errors + /// + /// Returns a [`ResourcesError`] if any resource fails to parse or resolve. + pub fn read( dictionary: &Dictionary, objects: &dyn ObjectResolver, - ) -> Result { - let Some(resources_entry) = dictionary.get(Self::KEY) else { + cache: &mut dyn ResourceCache, + ) -> Result, ResourcesError> { + const KEY: &str = "Resources"; + + let Some(resources_entry) = dictionary.get(KEY) else { return Ok(None); }; - // Resolve the `/Resources` dictionary (may be a direct dict or indirect reference). let resources = resources_entry.try_dictionary(objects)?; - // Parse each resource category independently. - // Using separate methods improves readability and allows for easier - // error tracking when debugging resource loading issues. - let fonts = Self::parse_fonts(resources, objects)?; - let external_graphics_states = Self::parse_external_graphics_states(resources, objects)?; - let patterns = Self::parse_patterns(resources, objects)?; - let xobjects = Self::parse_xobjects(resources, objects)?; - let shadings = Self::parse_shadings(resources, objects)?; - - Ok(Some(Self { - fonts, - external_graphics_states, - xobjects, - patterns, - shadings, - })) + let mut map = HashMap::new(); + map.extend(read_fonts(resources, objects, cache)?); + map.extend(read_external_graphics_states(resources, objects, cache)?); + map.extend(read_patterns(resources, objects, cache)?); + map.extend(read_xobjects(resources, objects, cache)?); + map.extend(read_shadings(resources, objects, cache)?); + + Ok(Some(Self(map))) } } diff --git a/crates/pdf-page/src/shading.rs b/crates/pdf-page/src/shading.rs index b30e2f4..d44eef7 100644 --- a/crates/pdf-page/src/shading.rs +++ b/crates/pdf-page/src/shading.rs @@ -13,8 +13,8 @@ use pdf_graphics::rect::Rect; use pdf_object::{ - ObjectVariant, dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, - traits::FromDictionary, + dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, + object_variant::ObjectVariant, }; use thiserror::Error; diff --git a/crates/pdf-page/src/xobject.rs b/crates/pdf-page/src/xobject.rs index 6fc185e..8ec61e8 100644 --- a/crates/pdf-page/src/xobject.rs +++ b/crates/pdf-page/src/xobject.rs @@ -1,6 +1,7 @@ use crate::{ form::{FormXObject, FormXObjectError}, image::{ImageXObject, ImageXObjectError}, + resource_cache::ResourceCache, }; use pdf_object::{ dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver, @@ -33,53 +34,24 @@ pub enum XObjectError { ObjectError(#[from] ObjectError), } -/// A trait for parsing specific types of XObjects from their dictionary and stream data. -/// -/// This internal trait provides a common interface for different XObject parsers -/// (like `ImageXObject` or `FormXObject`) to be constructed from the raw components -/// of a PDF stream object. -pub(crate) trait XObjectReader { - type ErrorType; - - /// Parses an XObject from its dictionary and associated stream data. - /// - /// # Parameters - /// - /// - `dictionary`: The dictionary part of the XObject stream. - /// - `stream_data`: The raw byte data of the XObject stream. - /// - `objects`: A collection of all PDF objects in the document, used to resolve - /// any indirect references within the XObject's dictionary. - /// - /// # Returns - /// - /// A `Result` containing the parsed XObject of type `Self` on success, - /// or an error of type `Self::ErrorType` on failure. - fn read_xobject( - dictionary: &Dictionary, - stream_data: &StreamObject, - objects: &dyn ObjectResolver, - ) -> Result - where - Self: Sized; -} - -impl XObjectReader for XObject { - type ErrorType = XObjectError; - - fn read_xobject( +impl XObject { + pub fn read_xobject( dictionary: &Dictionary, stream_data: &StreamObject, objects: &dyn ObjectResolver, - ) -> Result { + cache: &mut dyn ResourceCache, + ) -> Result { let subtype = dictionary.get_or_err("Subtype")?.try_str(objects)?; match subtype.as_ref() { "Image" => { - let image_xobject = ImageXObject::read_xobject(dictionary, stream_data, objects)?; + let image_xobject = + ImageXObject::read_xobject(dictionary, stream_data, objects, cache)?; Ok(XObject::Image(image_xobject)) } "Form" => { - let form_xobject = FormXObject::read_xobject(dictionary, stream_data, objects)?; + let form_xobject = + FormXObject::read_xobject(dictionary, stream_data, objects, cache)?; Ok(XObject::Form(Box::new(form_xobject))) } other => Err(XObjectError::UnsupportedXObjectType { diff --git a/crates/pdf-parser/src/array.rs b/crates/pdf-parser/src/array.rs index 5532dea..0090388 100644 --- a/crates/pdf-parser/src/array.rs +++ b/crates/pdf-parser/src/array.rs @@ -1,4 +1,4 @@ -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use pdf_tokenizer::PdfToken; use crate::{error::ParserError, parser::PdfParser, traits::ArrayParser}; diff --git a/crates/pdf-parser/src/dictionary.rs b/crates/pdf-parser/src/dictionary.rs index ba3814d..453ad3b 100644 --- a/crates/pdf-parser/src/dictionary.rs +++ b/crates/pdf-parser/src/dictionary.rs @@ -67,10 +67,10 @@ impl DictionaryParser for PdfParser<'_> { self.skip_whitespace(); - // Parse value. - let value = self.parse_object(objects)?; + // Parse object. + let object = self.parse_object(objects)?; - dictionary.insert(key, Box::new(value)); + dictionary.insert(key, object); self.skip_whitespace(); } diff --git a/crates/pdf-parser/src/indirect_object.rs b/crates/pdf-parser/src/indirect_object.rs index e4bc533..938d470 100644 --- a/crates/pdf-parser/src/indirect_object.rs +++ b/crates/pdf-parser/src/indirect_object.rs @@ -1,8 +1,6 @@ -use std::rc::Rc; - use pdf_object::{ - ObjectVariant, filter::Filter, indirect_object::IndirectObject, - object_resolver::ObjectResolver, stream::StreamObject, traits::FromDictionary, + filter::Filter, indirect_object::IndirectObject, object_resolver::ObjectResolver, + object_variant::ObjectVariant, stream::StreamObject, }; use pdf_tokenizer::PdfToken; use thiserror::Error; @@ -95,7 +93,10 @@ impl IndirectObjectParser for PdfParser<'_> { }; // Parse the object. - let object = self.parse_object(objects)?; + let mut object = self.parse_object(objects)?; + if let ObjectVariant::Dictionary(ref mut d) = object { + d.object_number = object_number; + } self.skip_whitespace(); @@ -110,13 +111,13 @@ impl IndirectObjectParser for PdfParser<'_> { let filters = Filter::from_dictionary(&dictionary, objects)?; - return Ok(Some(ObjectVariant::Stream(Rc::new(StreamObject::new( + return Ok(Some(ObjectVariant::Stream(StreamObject::new( object_number, generation_number, dictionary, stream, filters, - ))))); + )))); } // Read the keyword `endobj`. @@ -131,7 +132,7 @@ impl IndirectObjectParser for PdfParser<'_> { #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] mod tests { - use pdf_object::{ObjectVariant, object_resolver::UnimplementedResolver}; + use pdf_object::{object_resolver::UnimplementedResolver, object_variant::ObjectVariant}; use super::*; diff --git a/crates/pdf-parser/src/number.rs b/crates/pdf-parser/src/number.rs index 457b918..c024483 100644 --- a/crates/pdf-parser/src/number.rs +++ b/crates/pdf-parser/src/number.rs @@ -1,4 +1,4 @@ -use pdf_object::ObjectVariant; +use pdf_object::object_variant::ObjectVariant; use pdf_tokenizer::PdfToken; use thiserror::Error; diff --git a/crates/pdf-parser/src/parser.rs b/crates/pdf-parser/src/parser.rs index 428ad10..2100ab3 100644 --- a/crates/pdf-parser/src/parser.rs +++ b/crates/pdf-parser/src/parser.rs @@ -1,7 +1,7 @@ -use std::{rc::Rc, str::FromStr}; +use std::str::FromStr; use crate::error::ParserError; -use pdf_object::{ObjectVariant, object_resolver::ObjectResolver}; +use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant}; use pdf_tokenizer::{PdfToken, Tokenizer}; use crate::traits::{ @@ -243,7 +243,7 @@ impl PdfParser<'_> { } } PdfToken::DoubleLeftAngleBracket => { - ObjectVariant::Dictionary(Rc::new(self.parse_dictionary(objects)?)) + ObjectVariant::Dictionary(Box::new(self.parse_dictionary(objects)?)) } PdfToken::LeftAngleBracket => ObjectVariant::HexString(self.parse_hex_string()?), PdfToken::Solidus => ObjectVariant::Name(self.parse_name()?), diff --git a/crates/pdf-parser/src/stream.rs b/crates/pdf-parser/src/stream.rs index 89b0bc8..130bb31 100644 --- a/crates/pdf-parser/src/stream.rs +++ b/crates/pdf-parser/src/stream.rs @@ -110,14 +110,14 @@ impl StreamParser for PdfParser<'_> { mod tests { use std::collections::BTreeMap; - use pdf_object::{ObjectVariant, object_resolver::UnimplementedResolver}; + use pdf_object::{object_resolver::UnimplementedResolver, object_variant::ObjectVariant}; use super::*; #[test] fn test_parse_stream_missing_stream_keyword() { let dictionary = Dictionary::new( - vec![("Length".to_string(), Box::new(ObjectVariant::Integer(11)))] + vec![("Length".to_string(), ObjectVariant::Integer(11))] .into_iter() .collect(), ); @@ -132,7 +132,7 @@ mod tests { #[test] fn test_parse_stream_missing_endstream_keyword() { let dictionary = Dictionary::new( - vec![("Length".to_string(), Box::new(ObjectVariant::Integer(11)))] + vec![("Length".to_string(), ObjectVariant::Integer(11))] .into_iter() .collect(), ); @@ -158,7 +158,7 @@ mod tests { #[test] fn test_parse_stream_incorrect_length() { let dictionary = Dictionary::new( - vec![("Length".to_string(), Box::new(ObjectVariant::Integer(5)))] // Incorrect length + vec![("Length".to_string(), ObjectVariant::Integer(5))] // Incorrect length .into_iter() .collect(), ); @@ -173,7 +173,7 @@ mod tests { #[test] fn test_parse_stream_with_extra_whitespace() { let dictionary = Dictionary::new( - vec![("Length".to_string(), Box::new(ObjectVariant::Integer(11)))] + vec![("Length".to_string(), ObjectVariant::Integer(11))] .into_iter() .collect(), ); diff --git a/crates/pdf-parser/src/trailer.rs b/crates/pdf-parser/src/trailer.rs index 3571313..46a9e41 100644 --- a/crates/pdf-parser/src/trailer.rs +++ b/crates/pdf-parser/src/trailer.rs @@ -1,6 +1,7 @@ use crate::{error::ParserError, parser::PdfParser, traits::TrailerParser}; use pdf_object::{ - ObjectVariant, error::ObjectError, object_resolver::ObjectResolver, trailer::Trailer, + error::ObjectError, object_resolver::ObjectResolver, object_variant::ObjectVariant, + trailer::Trailer, }; impl TrailerParser for PdfParser<'_> { @@ -78,7 +79,7 @@ impl TrailerParser for PdfParser<'_> { #[cfg(test)] #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { - use pdf_object::{ObjectVariant, object_resolver::UnimplementedResolver}; + use pdf_object::{object_resolver::UnimplementedResolver, object_variant::ObjectVariant}; use super::*; diff --git a/crates/pdf-parser/src/traits.rs b/crates/pdf-parser/src/traits.rs index d0a62ac..a9d012f 100644 --- a/crates/pdf-parser/src/traits.rs +++ b/crates/pdf-parser/src/traits.rs @@ -1,6 +1,7 @@ use pdf_object::{ - ObjectVariant, cross_reference_table::CrossReferenceTable, dictionary::Dictionary, - object_resolver::ObjectResolver, trailer::Trailer, version::Version, + cross_reference_table::CrossReferenceTable, dictionary::Dictionary, + object_resolver::ObjectResolver, object_variant::ObjectVariant, trailer::Trailer, + version::Version, }; pub trait ArrayParser {