Skip to content

Commit 6aecd83

Browse files
committed
Enhance DOCX to PDF conversion by adding support for last rendered page breaks and adjusting line height calculations; update handling of empty paragraphs and improve proactive page break logic.
1 parent 86d1f8a commit 6aecd83

4 files changed

Lines changed: 43 additions & 13 deletions

File tree

src/MiniPdf/DocxReader.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ internal static DocxDocument Read(Stream stream)
447447
bool isNumberedList = false;
448448
bool pageBreakBefore = false;
449449
bool pageBreakAfter = false;
450+
bool hasLastRenderedPageBreak = false;
450451
bool snapToGrid = true;
451452
int listLevel = 0;
452453
string? listText = null;
@@ -778,6 +779,16 @@ internal static DocxDocument Read(Stream stream)
778779
continue;
779780
}
780781

782+
// Detect lastRenderedPageBreak: Word's hint that a page break occurred
783+
// at this position in the last rendering pass. Only honour it when the
784+
// marker appears before any visible content in the paragraph (i.e. a
785+
// paragraph-level page break, not a mid-paragraph line break).
786+
if (!hasLastRenderedPageBreak && runs.Count == 0 && images.Count == 0
787+
&& child.Element(W + "lastRenderedPageBreak") != null)
788+
{
789+
hasLastRenderedPageBreak = true;
790+
}
791+
781792
var run = ReadRun(child, bold, italic, fontSize, color, caps, charSpacing, paragraphFontName, defaultLatinFontName, defaultEastAsiaFontName, styles);
782793
if (run != null)
783794
{
@@ -852,7 +863,8 @@ internal static DocxDocument Read(Stream stream)
852863
ParagraphFontName: paragraphFontName,
853864
KeepNext: keepNext,
854865
AutoSpaceDE: autoSpaceDE,
855-
AutoSpaceDN: autoSpaceDN);
866+
AutoSpaceDN: autoSpaceDN,
867+
HasLastRenderedPageBreak: hasLastRenderedPageBreak);
856868
}
857869

858870
/// <summary>
@@ -3507,7 +3519,8 @@ internal sealed record DocxParagraph(
35073519
string? ParagraphFontName = null,
35083520
bool KeepNext = false,
35093521
bool AutoSpaceDE = true,
3510-
bool AutoSpaceDN = true
3522+
bool AutoSpaceDN = true,
3523+
bool HasLastRenderedPageBreak = false
35113524
) : DocxElement;
35123525

35133526
/// <summary>Represents a single border edge.</summary>

src/MiniPdf/DocxToPdfConverter.cs

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -807,14 +807,22 @@ private static void RenderParagraph(RenderState state, DocxParagraph paragraph)
807807
var gridPitch = options.GridLinePitch;
808808
if (paragraph.LineSpacing == 0)
809809
{
810-
// Auto-spaced: use max run font size for grid-snapped height.
810+
// Auto-spaced: for fonts that fit in a single grid cell, snap
811+
// the line height to the grid pitch. For larger fonts spanning
812+
// multiple cells, keep the raw auto-spacing value (fontSize ×
813+
// metricsFactor) — Word advances by the natural line height,
814+
// not the full n×gridPitch allocation, so subsequent text
815+
// resumes closer to the heading.
811816
var maxFs = fontSize;
812817
foreach (var run in paragraph.Runs)
813818
{
814819
var runFs = run.FontSize > 0 ? run.FontSize : fontSize;
815820
if (runFs > maxFs) maxFs = runFs;
816821
}
817-
lineHeight = Math.Max(gridPitch, Compat.Ceiling(maxFs / gridPitch) * gridPitch);
822+
if (Compat.Ceiling(maxFs / gridPitch) > 1)
823+
lineHeight = Math.Max(gridPitch, lineHeight);
824+
else
825+
lineHeight = gridPitch;
818826
}
819827
else if (paragraph.LineSpacingAbsolute && !paragraph.LineSpacingExact)
820828
{
@@ -838,6 +846,15 @@ private static void RenderParagraph(RenderState state, DocxParagraph paragraph)
838846
}
839847
}
840848

849+
// Force a page break when the paragraph carries Word's lastRenderedPageBreak
850+
// hint, indicating that Word placed a break before this paragraph's content.
851+
// This must precede the empty-paragraph block so it fires for non-empty
852+
// paragraphs that would render visible text.
853+
if (paragraph.HasLastRenderedPageBreak && state.CurrentPage != null && !state.IsTopOfPage)
854+
{
855+
state.ForceNewPage();
856+
}
857+
841858
// Handle empty paragraphs before EnsurePage — they don't produce visible content
842859
// and should not force a new page (avoids spurious trailing pages).
843860
if (paragraph.Runs.Count == 0 && paragraph.Images.Count == 0 && paragraph.Shading == null
@@ -1226,15 +1243,11 @@ private static void RenderParagraph(RenderState state, DocxParagraph paragraph)
12261243

12271244
for (var i = 0; i < lines.Count; i++)
12281245
{
1229-
// Proactive page break: break only when the text's descenders
1230-
// would clip below the bottom margin. Text is rendered at the
1231-
// current Y (baseline); only the descent extends downward.
1232-
// Using the full lineHeight (which includes inter-line spacing /
1233-
// grid pitch) is over-conservative and causes premature breaks
1234-
// that don't match Word/LibreOffice behavior.
1235-
var pageBreakDescent = runFontSize * (GetFontMetricsFactor(runFontName) - 1f);
1246+
// Proactive page break: break when the text's descenders would
1247+
// clip below the bottom margin.
1248+
var pageBreakThreshold = runFontSize * (GetFontMetricsFactor(runFontName) - 1f);
12361249
if (state.CurrentPage != null && !state.IsTopOfPage
1237-
&& state.CurrentY - pageBreakDescent < state.Options.MarginBottom)
1250+
&& state.CurrentY - pageBreakThreshold < state.Options.MarginBottom)
12381251
{
12391252
state.ForceNewPage();
12401253
}
@@ -3836,7 +3849,11 @@ private static string AddInterScriptSpacing(string text, bool autoSpaceDE = true
38363849
{
38373850
if (autoSpaceDE && ShouldInsertInterScriptSpace(text, i))
38383851
{
3839-
sb.Append(' ');
3852+
// Use THIN SPACE so the inter-script gap is not treated as a word
3853+
// boundary by WordWrap (Split(' ')) and is not expanded by justify
3854+
// word spacing. Word's autoSpaceDE is visual spacing only – it does
3855+
// NOT create a line-break opportunity.
3856+
sb.Append('\u2009');
38403857
}
38413858
else if (autoSpaceDN && ShouldInsertDigitCjkSpace(text, i))
38423859
{
1.47 KB
Loading
10.8 KB
Loading

0 commit comments

Comments
 (0)