Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -290,11 +290,11 @@ describe('Inplace bookmark semantic regression coverage (Allure)', () => {
});
});

await allureStep('Then reconstruction falls back to rebuild with premerge enabled', async () => {
// With premergeRuns: true (default), ILPA falls back to rebuild.
// See GitHub issue #TBD (premerge-enabled inplace safety check failure).
expect(ilpa.reconstructionModeUsed).toBe('rebuild');
expect(ilpa.fallbackReason).toBeDefined();
await allureStep('Then reconstruction succeeds in inplace mode', async () => {
// Issue #35 fixed: setLeafText now correctly syncs both `data` and `nodeValue`
// on xmldom text nodes, so premergeRuns: true no longer causes round-trip failure.
expect(ilpa.reconstructionModeUsed).toBe('inplace');
expect(ilpa.fallbackReason).toBeUndefined();
});

await allureStep('And read_text parity holds after accept all', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,10 @@ describe('Reconstruction metadata', () => {

expect(result.engine).toBe('atomizer');
expect(result.reconstructionModeRequested).toBe('inplace');
// With premergeRuns: true (default), ILPA falls back to rebuild due to
// round-trip safety check failure. See GitHub issue #35 (premerge-enabled
// inplace safety check failure).
expect(result.reconstructionModeUsed).toBe('rebuild');
expect(result.fallbackReason).toBeDefined();
// Issue #35 fixed: setLeafText now correctly syncs both `data` and `nodeValue`
// on xmldom text nodes, so premergeRuns: true no longer causes round-trip failure.
expect(result.reconstructionModeUsed).toBe('inplace');
expect(result.fallbackReason).toBeUndefined();
},
180000
);
Expand Down
18 changes: 9 additions & 9 deletions packages/docx-core/src/integration/stability-invariants.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ describe('Stability invariants', () => {
readFile(ILPA_REVISED_DOC),
]);

// premergeRuns defaults to true — do not override. ILPA falls back to rebuild
// with premerge enabled. See GitHub issue #35 (premerge-enabled inplace safety
// check failure).
// premergeRuns defaults to true — do not override.
// Issue #35 fixed: setLeafText now syncs both `data` and `nodeValue` on xmldom
// text nodes, so ILPA no longer falls back to rebuild with premerge enabled.
const runs = await Promise.all([
runAndSnapshot(original, revised, 'inplace'),
runAndSnapshot(original, revised, 'inplace'),
Expand All @@ -211,12 +211,12 @@ describe('Stability invariants', () => {
assertNormalizedEqual(first.semantic.accepted, second.semantic.accepted, 'determinism/ilpa/accepted');
assertNormalizedEqual(first.semantic.rejected, second.semantic.rejected, 'determinism/ilpa/rejected');

// With premergeRuns: true (default), ILPA falls back to rebuild due to
// round-trip safety check failure. Determinism still holds across runs.
expect(first.reconstructionModeUsed).toBe('rebuild');
expect(second.reconstructionModeUsed).toBe('rebuild');
expect(first.fallbackReason).toBeDefined();
expect(second.fallbackReason).toBeDefined();
expect(first.reconstructionModeUsed).toBe('inplace');
expect(second.reconstructionModeUsed).toBe('inplace');
expect(first.fallbackReason).toBeUndefined();
expect(second.fallbackReason).toBeUndefined();
expect(first.failedChecks).toEqual([]);
expect(second.failedChecks).toEqual([]);
expect(first.failedChecks).toEqual(second.failedChecks);
},
180000
Expand Down
11 changes: 10 additions & 1 deletion packages/docx-core/src/primitives/dom-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,21 @@ export function getLeafText(el: Element): string | undefined {
/**
* Set the direct text content of a leaf element. Replaces or creates
* the first text child node.
*
* NOTE: We must update BOTH `data` and `nodeValue` on the text node.
* In @xmldom/xmldom, `CharacterData` stores text in two separate plain
* properties: `data` (read by XMLSerializer) and `nodeValue` (read by
* `textContent` getter). All CharacterData mutation methods keep them
* in sync, but a direct `child.nodeValue = text` assignment only updates
* `nodeValue`, leaving the stale original value in `data`. This causes
* XMLSerializer to output the old text. Using `replaceData` (which sets
* both atomically) is the correct W3C-compliant approach.
*/
export function setLeafText(el: Element, text: string): void {
for (let i = 0; i < el.childNodes.length; i++) {
const child = el.childNodes[i]!;
if (child.nodeType === NODE_TYPE.TEXT) {
child.nodeValue = text;
(child as CharacterData).replaceData(0, (child as CharacterData).length, text);
return;
}
}
Expand Down
83 changes: 83 additions & 0 deletions packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/**
* Upstream bug report: @xmldom/xmldom CharacterData nodeValue/data desync
*
* In @xmldom/xmldom, `CharacterData` stores text in two separate plain
* properties: `data` (read by XMLSerializer) and `nodeValue` (read by the
* `textContent` getter). All built-in mutation methods (appendData,
* replaceData, splitText, textContent setter) keep them in sync via:
* `this.nodeValue = this.data = text`
*
* However, a direct `node.nodeValue = text` assignment is NOT intercepted —
* it only updates the instance property, leaving `data` stale. Since
* XMLSerializer reads `node.data`, mutations via direct nodeValue assignment
* are silently lost in serialized output.
*
* WHATWG DOM Living Standard §4.10: for CharacterData nodes, `nodeValue`
* getter/setter must be equivalent to `data`.
*
* This caused a silent data-loss bug in our DOCX comparison engine (Issue #35).
* The fix was to use `replaceData()` instead of direct `nodeValue` assignment
* in `setLeafText()` (packages/docx-core/src/primitives/dom-helpers.ts).
*
* These tests document the bug for filing upstream at:
* https://github.com/xmldom/xmldom/issues
*
* Filed as companion to our merged PR #960 (ParentNode.children getter).
*/

import { describe, expect, it } from 'vitest';

Check failure on line 28 in packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts

View workflow job for this annotation

GitHub Actions / workspace-lint

'it' import from 'vitest' is restricted. Import itAllure/testAllure from the package-local allure-test.js helper. Keep describe/expect from vitest
import { DOMParser, XMLSerializer } from '@xmldom/xmldom';

describe('xmldom CharacterData nodeValue/data sync', () => {
it('replaceData keeps nodeValue and data in sync', () => {
const doc = new DOMParser().parseFromString('<r/>', 'text/xml');
const text = doc.createTextNode('original');
doc.documentElement!.appendChild(text);

text.replaceData(0, text.length, 'updated');

expect(text.nodeValue).toBe('updated');
expect(text.data).toBe('updated');
expect(new XMLSerializer().serializeToString(doc)).toContain('updated');
});

// This test is marked it.fails() to document the upstream bug in @xmldom/xmldom 0.8.x.
// Direct nodeValue assignment only updates the instance property — `data` stays stale,
// so XMLSerializer silently outputs the old text.
//
// Once the library implements nodeValue as a getter/setter on CharacterData.prototype
// (or via Node.prototype dispatch), change this to a normal it() with the same assertions.
//
// Fix direction:
// Object.defineProperty(CharacterData.prototype, 'nodeValue', {
// get() { return this.data; },
// set(v) { const s = v == null ? '' : String(v); this.data = s; this.length = s.length; }
// });
it.fails('direct nodeValue assignment updates nodeValue but NOT data or XMLSerializer output', () => {
const doc = new DOMParser().parseFromString('<r/>', 'text/xml');
const text = doc.createTextNode('original');
doc.documentElement!.appendChild(text);

text.nodeValue = 'updated';

// nodeValue appears correct — the instance property was shadowed
expect(text.nodeValue).toBe('updated');
// data is stale — these fail in xmldom 0.8.x
expect(text.data).toBe('updated');
expect(new XMLSerializer().serializeToString(doc)).toContain('updated');
});

// Real-world impact: simulates the setLeafText path in our DOCX comparison engine
// before the fix (Issue #35). The merged atom appeared correct in DOM traversal
// (nodeValue read back 'hello world') but XMLSerializer silently wrote stale text.
it.fails('merging atom text via nodeValue loses data on serialization', () => {
const doc = new DOMParser().parseFromString('<w:t>hello </w:t>', 'text/xml');
const textNode = doc.documentElement!.firstChild as CharacterData;

textNode.nodeValue = 'hello world';

// These fail — data and serialized output remain 'hello '
expect(textNode.data).toBe('hello world');
expect(new XMLSerializer().serializeToString(doc)).toContain('hello world');
});
});
Loading