Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/pdf-canvas/src/canvas_external_object_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ impl<B: CanvasBackend> XObjectOps for PdfCanvas<'_, B> {
match resources.xobject(xobject_name) {
Some(XObject::Image(image)) => self.render_image_xobject(image),
Some(XObject::Form(form)) => self.render_content_stream(
&form.content_stream.operations,
&form.content_stream,
form.matrix,
Some(&form.bbox),
form.resources.as_ref(),
Expand Down
2 changes: 1 addition & 1 deletion crates/pdf-canvas/src/canvas_graphics_state_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ impl<B: CanvasBackend> GraphicsStateOps for PdfCanvas<'_, B> {
// Render the form's content stream into the mask canvas.
self.record_content_stream(
&mut recording_canvas,
&form.content_stream.operations,
&form.content_stream,
form.matrix,
&form.bbox,
form.resources.as_ref(),
Expand Down
16 changes: 8 additions & 8 deletions crates/pdf-canvas/src/pdf_canvas.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::sync::Arc;

use pdf_content_stream::pdf_operator::PdfOperatorVariant;
use pdf_content_stream::{content_stream::ContentStream, pdf_operator::PdfOperatorVariant};
use pdf_graphics::{
MaskMode, PaintMode, PathFillType, pdf_path::PdfPath, rect::Rect, transform::Transform,
};
Expand Down Expand Up @@ -129,7 +129,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
/// # Parameters
///
/// - `recording_canvas`: Target offscreen canvas to record into.
/// - `operations`: Parsed operator list to execute.
/// - `content_stream`: The content stream containing the PDF operators to execute.
/// - `mat`: Optional additional matrix (applied like a PDF `cm` / XObject `/Matrix`).
/// - `bbox`: The content-space bounding box to map to the recording surface.
/// - `resources`: Optional resource dictionary for resolving fonts, patterns, etc.
Expand All @@ -142,7 +142,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
pub(crate) fn record_content_stream(
&self,
recording_canvas: &mut RecordingCanvas,
operations: &[PdfOperatorVariant],
content_stream: &ContentStream,
mat: Option<Transform>,
bbox: &Rect,
resources: Option<&'a Resources>,
Expand Down Expand Up @@ -177,7 +177,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
};

// Render the form's content stream into the mask canvas.
other.render_content_stream(operations, mat, Some(bbox), resources, filter)
other.render_content_stream(content_stream, mat, Some(bbox), resources, filter)
}

/// Returns a reference to the current graphics state on the stack.
Expand Down Expand Up @@ -338,7 +338,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
// Render the tiling content into a temporary canvas.
self.record_content_stream(
&mut recording_canvas,
&content_stream.operations,
content_stream,
None,
&bbox,
Some(resources),
Expand Down Expand Up @@ -495,7 +495,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
///
/// # Parameters
///
/// - `operations`: The list of PDF operators to execute.
/// - `content_stream`: The content stream containing the PDF operators to execute.
/// - `mat`: Optional transformation matrix to apply.
/// - `bbox`: Optional bounding box to clip the rendering.
/// - `resources`: Optional resource dictionary to use for rendering.
Expand All @@ -506,7 +506,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
/// Returns an error if any operation fails or if the graphics state is invalid.
pub fn render_content_stream(
&mut self,
operations: &[PdfOperatorVariant],
content_stream: &ContentStream,
mat: Option<Transform>,
bbox: Option<&Rect>,
resources: Option<&'a Resources>,
Expand Down Expand Up @@ -538,7 +538,7 @@ impl<'a, B: CanvasBackend> PdfCanvas<'a, B> {
self.current_state_mut()?.resources = Some(resources);
}

for op in operations {
for op in &content_stream.0 {
if filter.is_some_and(|filter| filter(op)) {
continue;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
use pdf_content_stream::{error::PdfOperatorError, pdf_operator::PdfOperatorVariant};
use crate::{error::PdfOperatorError, pdf_operator::PdfOperatorVariant};
use pdf_object::{
dictionary::Dictionary, error::ObjectError, object_resolver::ObjectResolver,
object_variant::ObjectVariant,
object_variant::ObjectVariant, stream::StreamObject,
};

/// Represents the content stream of a PDF page, containing a sequence
/// of drawing operators.
pub struct ContentStream {
/// Flat, ordered list of all PDF content stream operators that belong to a page.
pub operations: Vec<PdfOperatorVariant>,
}
pub struct ContentStream(pub Vec<PdfOperatorVariant>);

// Helper function to process an array whose elements should be streams or references to streams
/// Processes an array of PDF objects, each expected to be a stream or reference to a stream,
/// and concatenates their content stream operators into a single vector.
///
/// # Parameters
///
/// - `array`: Slice of PDF objects representing streams or references to streams.
/// - `objects`: Resolver for indirect PDF objects.
///
/// # Returns
///
/// Concatenated list of operators or error.
fn process_content_stream_array(
array: &[ObjectVariant],
objects: &dyn ObjectResolver,
Expand All @@ -26,6 +33,19 @@ fn process_content_stream_array(
}

impl ContentStream {
/// Constructs a [`ContentStream`] from a PDF page dictionary by resolving the `/Contents` entry.
///
/// The `/Contents` entry may be a stream or an array of streams. This function resolves the entry,
/// parses the content stream operators, and returns a [`ContentStream`] containing all operators.
///
/// # Parameters
///
/// - `dictionary`: The page dictionary containing the `/Contents` entry.
/// - `objects`: Resolver for indirect PDF objects.
///
/// # Returns
///
/// The parsed content stream or None if missing.
pub fn from_dictionary(
dictionary: &Dictionary,
objects: &dyn ObjectResolver,
Expand Down Expand Up @@ -53,6 +73,12 @@ impl ContentStream {
}
};

Ok(Some(ContentStream { operations }))
Ok(Some(ContentStream(operations)))
}

pub fn from_stream(stream: &StreamObject) -> Result<Self, PdfOperatorError> {
let data = stream.data()?;
let operations = PdfOperatorVariant::from(&data)?;
Ok(ContentStream(operations))
}
}
1 change: 1 addition & 0 deletions crates/pdf-content-stream/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod clipping_path_operators;
pub mod color_operators;
pub mod compatibility_operators;
pub mod content_stream;
pub mod error;
pub mod graphics_state_operators;
pub mod marked_content_operators;
Expand Down
1 change: 1 addition & 0 deletions crates/pdf-document/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2024"
[dependencies]
pdf-object-collection = { path = "../pdf-object-collection" }
pdf-object = { path = "../pdf-object" }
pdf-content-stream = { path = "../pdf-content-stream" }
pdf-parser = { path = "../pdf-parser" }
pdf-page = { path = "../pdf-page" }
pdf-font = { path = "../pdf-font" }
Expand Down
45 changes: 1 addition & 44 deletions crates/pdf-document/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,16 @@ use pdf_object::indirect_object::IndirectObject;
use pdf_object::object_resolver::{ObjectResolver, UnimplementedResolver};
use pdf_object::{
cross_reference_table::{CrossReferenceEntry, CrossReferenceStatus, CrossReferenceTable},
dictionary::Dictionary,
error::ObjectError,
object_variant::ObjectVariant,
stream::StreamObject,
trailer::Trailer,
};
use pdf_object_collection::object_collection::ObjectCollection;
use pdf_page::content_stream::ContentStream;
use pdf_page::media_box::MediaBox;
use pdf_page::page::PdfPage;
use pdf_page::pages::{PdfPages, PdfPagesError};
use pdf_page::resource::Resource;
use pdf_page::resource_cache::ResourceCache;
use pdf_page::resources::Resources;
use pdf_parser::{
error::ParserError, header::HeaderError, parser::PdfParser, traits::HeaderParser,
};
Expand Down Expand Up @@ -271,46 +267,7 @@ fn extract_page_tree(
let pages_dict = catalog.get_or_err("Pages")?.try_dictionary(objects)?;

let mut cache = ResourceCacheWrapper::default();
flatten_page_tree(pages_dict, objects, &mut cache).map_err(Into::into)
}

/// Recursively traverses the PDF page tree, constructing `PdfPage` objects
/// with shared resources via the provided `ResourceCache`.
fn flatten_page_tree(
dictionary: &Dictionary,
objects: &dyn ObjectResolver,
cache: &mut dyn ResourceCache,
) -> Result<Vec<PdfPage>, PdfPagesError> {
let kids_array = dictionary.get_or_err("Kids")?.try_array(objects)?;

let mut pages = vec![];

for value in kids_array {
let dictionary = value.try_dictionary(objects)?;

match dictionary.get_or_err("Type")?.try_str(objects)?.as_ref() {
PdfPage::KEY => {
let contents = ContentStream::from_dictionary(dictionary, objects)?;
let media_box = MediaBox::from_dictionary(dictionary, objects)?;
let resources = Resources::read(dictionary, objects, cache)?;

pages.push(PdfPage {
contents,
media_box,
resources,
});
}
PdfPages::KEY => {
pages.extend(flatten_page_tree(dictionary, objects, cache)?);
}
obj_type => {
return Err(PdfPagesError::UnexpectedObjectTypeInKids {
found_type: obj_type.to_string(),
});
}
}
}

let pages = PdfPages::from_dictionary(pages_dict, objects, &mut cache)?;
Ok(pages)
}

Expand Down
2 changes: 1 addition & 1 deletion crates/pdf-object-collection/src/object_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ impl ObjectCollection {
fn dictionary_to_json(dict: &pdf_object::dictionary::Dictionary) -> JsonValue {
let mut map = serde_json::Map::new();
for (key, value) in &dict.dictionary {
map.insert(key.clone(), Self::object_variant_to_json(value.as_ref()));
map.insert(key.clone(), Self::object_variant_to_json(value));
}
json!({
"type": "Dictionary",
Expand Down
7 changes: 2 additions & 5 deletions crates/pdf-page/src/form.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use pdf_content_stream::content_stream::ContentStream;
use pdf_content_stream::error::PdfOperatorError;
use pdf_graphics::rect::Rect;
use pdf_graphics::transform::Transform;
Expand All @@ -6,7 +7,6 @@ use pdf_object::stream::StreamObject;
use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver};
use thiserror::Error;

use crate::content_stream::ContentStream;
use crate::matrix::Matrix;
use crate::resource_cache::ResourceCache;
use crate::resources::{Resources, ResourcesError};
Expand Down Expand Up @@ -59,11 +59,8 @@ impl FormXObject {
}
})?;

let stream_data = stream_data.data()?;
// Parse the content stream data.
let content_stream = ContentStream {
operations: pdf_content_stream::pdf_operator::PdfOperatorVariant::from(&stream_data)?,
};
let content_stream = ContentStream::from_stream(stream_data)?;

Ok(FormXObject {
bbox,
Expand Down
1 change: 0 additions & 1 deletion crates/pdf-page/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pub mod color_space;
pub mod color_stops;
pub mod content_stream;
pub mod external_graphics_state;
pub mod form;
pub mod image;
Expand Down
4 changes: 2 additions & 2 deletions crates/pdf-page/src/page.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
content_stream::ContentStream, media_box::MediaBox, pages::PdfPagesError,
resource_cache::ResourceCache, resources::Resources,
media_box::MediaBox, pages::PdfPagesError, resource_cache::ResourceCache, resources::Resources,
};
use pdf_content_stream::content_stream::ContentStream;
use pdf_object::{dictionary::Dictionary, object_resolver::ObjectResolver};

/// Represents a single page in a PDF document.
Expand Down
11 changes: 11 additions & 0 deletions crates/pdf-page/src/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ pub struct PdfPages;
impl PdfPages {
pub const KEY: &'static str = "Pages";

/// Recursively parses a PDF Pages dictionary and returns a flattened list of all leaf `PdfPage` objects.
///
/// # Parameters
///
/// - `dictionary`: The Pages dictionary to parse.
/// - `objects`: Resolver for indirect PDF objects.
/// - `cache`: Resource cache for page resources.
///
/// # Returns
///
/// Vector of parsed pages or error.
pub fn from_dictionary(
dictionary: &Dictionary,
objects: &dyn ObjectResolver,
Expand Down
11 changes: 4 additions & 7 deletions crates/pdf-page/src/pattern.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use pdf_content_stream::content_stream::ContentStream;
use pdf_graphics::{rect::Rect, transform::Transform};
use pdf_object::{object_resolver::ObjectResolver, object_variant::ObjectVariant};
use thiserror::Error;

use crate::{
content_stream::ContentStream,
external_graphics_state::ExternalGraphicsState,
matrix::Matrix,
resource_cache::ResourceCache,
Expand Down Expand Up @@ -191,13 +191,10 @@ impl Pattern {
// Read the `/Resources` entry. Needed by the pattern's content stream.
let resources = Resources::read(dictionary, objects, cache)?.unwrap_or_default();

let stream_data = object.try_stream(objects)?.data()?;
let stream_data = object.try_stream(objects)?;

let content_stream = ContentStream::from_stream(stream_data)?;

let content_stream = ContentStream {
operations: pdf_content_stream::pdf_operator::PdfOperatorVariant::from(
&stream_data,
)?,
};
Ok(Pattern::Tiling {
paint_type,
tiling_type,
Expand Down
16 changes: 2 additions & 14 deletions crates/pdf-renderer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,7 @@ impl<'a, 'b, B: CanvasBackend> PdfRenderer<'a, 'b, B> {
};
let mut canvas = PdfCanvas::new(self.canvas, page, None)?;
if let Some(cs) = &page.contents {
canvas.render_content_stream(
&cs.operations,
None,
None,
page.resources.as_ref(),
None,
)?;
canvas.render_content_stream(cs, None, None, page.resources.as_ref(), None)?;
}
Ok(())
}
Expand Down Expand Up @@ -94,13 +88,7 @@ pub fn render_page_to_recording(
{
let mut canvas = PdfCanvas::new(&mut recording, page, None)?;
if let Some(cs) = &page.contents {
canvas.render_content_stream(
&cs.operations,
None,
None,
page.resources.as_ref(),
None,
)?;
canvas.render_content_stream(cs, None, None, page.resources.as_ref(), None)?;
}
}

Expand Down