diff --git a/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs b/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs index 5432cd35..cdbdb3bb 100644 --- a/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs +++ b/OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs @@ -176,6 +176,7 @@ public void DA102_Throws(string name, string data) [Theory] [InlineData("DA025-TemplateDocument.docx", "DA-Data.xml", false)] + [InlineData("DA-lastRenderedPageBreak.docx", "DA-lastRenderedPageBreak.xml", false)] public void DA103_UseXmlDocument(string name, string data, bool err) { var sourceDir = new DirectoryInfo("../../../../TestFiles/"); diff --git a/OpenXmlPowerTools.Tests/OpenXmlRegexTests.cs b/OpenXmlPowerTools.Tests/OpenXmlRegexTests.cs index 334277b4..810d3009 100644 --- a/OpenXmlPowerTools.Tests/OpenXmlRegexTests.cs +++ b/OpenXmlPowerTools.Tests/OpenXmlRegexTests.cs @@ -197,6 +197,21 @@ public class OpenXmlRegexTests "; + private const string LastRenderedPageBreakXmlString = +@" + + + + ThisIsAParagraphContainingNoNaturalLi + + + + neBreaksSoTheLineBreakIsForced. + + + +"; + private static string InnerText(XContainer e) { return e.Descendants(W.r) @@ -365,5 +380,30 @@ public void CanReplaceTextWithFields() Assert.Equal(1, count); Assert.Equal("As stated in Article {__1} and this Section {__1.1}, this is described in Exhibit 4.", innerText); } + + [Fact] + public void CanMatchDespiteInvisibleLayoutMarkers() + { + XDocument partDocument = XDocument.Parse(LastRenderedPageBreakXmlString); + XElement p = partDocument.Descendants(W.p).Last(); + + using (var stream = new MemoryStream()) + using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType)) + { + MainDocumentPart part = wordDocument.AddMainDocumentPart(); + part.PutXDocument(partDocument); + + var content = partDocument.Descendants(W.p); + var regex = new Regex(@"LineBreak"); + int count = OpenXmlRegex.Replace(content, regex, "LB", null); + + p = partDocument.Descendants(W.p).Last(); + string innerText = InnerText(p); + + Assert.Equal(2, count); + Assert.Equal("ThisIsAParagraphContainingNoNaturalLBsSoTheLBIsForced.", innerText); + } + } + } } \ No newline at end of file diff --git a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs index b54e8f69..667695d2 100644 --- a/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs +++ b/OpenXmlPowerTools.Tests/UnicodeMapperTests.cs @@ -124,5 +124,34 @@ public void CanStringifySymbols() Assert.Equal(symFromChar1.ToString(SaveOptions.None), symFromChar2.ToString(SaveOptions.None)); Assert.Equal(symFromChar1.ToString(SaveOptions.None), symFromChar3.ToString(SaveOptions.None)); } + + private const string LastRenderedPageBreakXmlString = +@" + + + + ThisIsAParagraphContainingNoNaturalLi + + + + neBreaksSoTheLineBreakIsForced. + + + +"; + + [Fact] + public void IgnoresTemporaryLayoutMarkers() + { + XDocument partDocument = XDocument.Parse(LastRenderedPageBreakXmlString); + XElement p = partDocument.Descendants(W.p).Last(); + string actual = p.Descendants(W.r) + .Select(UnicodeMapper.RunToString) + .StringConcatenate(); + // p.Value is "the concatenated text content of this element", which + // (in THIS test case, which does not feature any symbols or special + // characters) should exactly match the output of UnicodeMapper: + Assert.Equal(p.Value, actual); + } } } \ No newline at end of file diff --git a/OpenXmlPowerTools/OpenXmlRegex.cs b/OpenXmlPowerTools/OpenXmlRegex.cs index 6e930992..34cf4420 100644 --- a/OpenXmlPowerTools/OpenXmlRegex.cs +++ b/OpenXmlPowerTools/OpenXmlRegex.cs @@ -446,7 +446,7 @@ private static object WmlSearchAndReplaceTransform(XNode node, Regex regex, stri if (element.Name == W.r) { return element.Elements() - .Where(e => e.Name != W.rPr) + .Where(e => e.Name != W.rPr && e.Name != W.lastRenderedPageBreak) .Select(e => e.Name == W.t ? ((string)e).Select(c => new XElement(W.r, diff --git a/OpenXmlPowerTools/UnicodeMapper.cs b/OpenXmlPowerTools/UnicodeMapper.cs index 141e4358..ffc42716 100644 --- a/OpenXmlPowerTools/UnicodeMapper.cs +++ b/OpenXmlPowerTools/UnicodeMapper.cs @@ -99,6 +99,11 @@ public static string RunToString(XElement element) { return HorizontalTabulation.ToString(); } + // Ignore temporary layout markers that are not actual document content + if (element.Name == W.lastRenderedPageBreak) + { + return string.Empty; + } if (element.Name == W.fldChar) { diff --git a/TestFiles/DA-lastRenderedPageBreak.docx b/TestFiles/DA-lastRenderedPageBreak.docx new file mode 100644 index 00000000..8baae736 Binary files /dev/null and b/TestFiles/DA-lastRenderedPageBreak.docx differ diff --git a/TestFiles/DA-lastRenderedPageBreak.xml b/TestFiles/DA-lastRenderedPageBreak.xml new file mode 100644 index 00000000..667715df --- /dev/null +++ b/TestFiles/DA-lastRenderedPageBreak.xml @@ -0,0 +1,4 @@ + + test + emergency + \ No newline at end of file