From 8484b9b7fab303a44fe9a94474b1d348dfcf942e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 27 Apr 2026 00:33:36 +0000 Subject: [PATCH 1/2] Initial plan From 1dc38327bd192ec80464f5dd146b7e8b43e5fe52 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 27 Apr 2026 01:20:44 +0000 Subject: [PATCH 2/2] fix: detect and repair Myers diff long-distance same-block matching for duplicate functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a function is copied verbatim in the new file (e.g. old getNewPlan + new modified getNewPlan + old getNewPlan copy), the Myers algorithm matches the old content to the identical later copy (0 edit distance), leaving the modified earlier version as all-added. Two complementary post-processing passes are applied after diffLines(): 1. cleanupLongDistanceSame — handles the simple [ADDED(A), SAME(S)] pattern by finding S's first significant line inside A, splitting A at that point, and rearranging to [ADDED(before), REMOVED(S), ADDED(from+S)]. 2. cleanupSplitMatches — handles the interleaved [ADDED,SAME,ADDED,SAME…] pattern by detecting consecutive SAME blocks whose new-file positions jump far more than their old-file positions (ratio > 3×), then consolidating to [ADDED(before), REMOVED(all-SAMEs-old), ADDED(new-content-from-first-SAME)]. Three new tests cover: the interleaved Myers variant, the simple variant, and a regression guard that prepending content to a file does not falsely trigger the fix. Agent-Logs-Url: https://github.com/Aeolun/react-diff-viewer-continued/sessions/400ed340-4aa8-4e09-9c98-d583dcbfb7d1 Co-authored-by: Aeolun <1116482+Aeolun@users.noreply.github.com> --- src/compute-lines.ts | 248 +++++++++++++++++++++++++++++++++++++ test/compute-lines.test.ts | 123 +++++++++++++++++- 2 files changed, 370 insertions(+), 1 deletion(-) diff --git a/src/compute-lines.ts b/src/compute-lines.ts index b3451c9..9a4e8a4 100644 --- a/src/compute-lines.ts +++ b/src/compute-lines.ts @@ -390,6 +390,247 @@ const constructLines = (value: string): string[] => { return lines; }; +/** + * Fixes the "duplicate function" diff problem where the Myers algorithm matches + * old content to a far-away identical copy in the new file, instead of a nearby + * modified version. + * + * Myers minimises total edit distance. When a function body is copied verbatim + * elsewhere in the new file, matching old→copy costs 0 edits, which Myers + * prefers — even though a modified version of the function appears earlier. + * The result: the modified version looks "all-added" and the old function looks + * "unchanged", which is confusing. + * + * Pattern detected: [..., ADDED(A), SAME(S)] where + * - no REMOVED blocks precede SAME(S) in the diff, and + * - S's first significant line also appears somewhere inside A. + * + * When this pattern is found it means Myers matched old content S to a LATER + * identical copy, while an EARLIER (possibly modified) version lives inside A. + * + * Transformation applied: + * [..., ADDED(A), SAME(S)] + * → [..., ADDED(A[0..k-1]), REMOVED(S), ADDED(A[k..]), ADDED(S)] + * + * where k is the index in A at which S's first significant line first appears. + * + * Effect in the viewer: + * • ADDED(A[0..k-1]) — new content before the modified function (green) + * • REMOVED(S) paired with ADDED(A[k..]) — old function shown as modified + * against the nearby earlier version (highlighted changes) ✓ + * • ADDED(S) — the identical copy further in the new file (green) ✓ + */ +function cleanupLongDistanceSame(changes: Change[]): Change[] { + if (changes.length === 0) return changes; + + // Only apply when no REMOVED blocks exist yet (pure-addition scenario). + // This avoids interfering with diffs that already have explicit removals. + if (changes.some(c => c.removed)) return changes; + + // Minimum trimmed length for a line to be considered "significant". + // Short lines like "}", "};", or "{" are too common to be reliable anchors. + const MIN_SIG_LINE_LENGTH = 8; + + for (let i = 0; i < changes.length; i++) { + const chunk = changes[i]; + if (chunk.added || chunk.removed) continue; + + // ── SAME block ────────────────────────────────────────────────────────── + const sameLines = constructLines(chunk.value); + + // Only consider blocks with multiple lines (single-line matches are rarely + // the source of this problem). + if (sameLines.length < 2) continue; + + // Find the first significant line of this SAME block. + const firstSigLine = sameLines.find(l => l.trim().length > MIN_SIG_LINE_LENGTH); + if (!firstSigLine) continue; + + // Search preceding ADDED chunks for the earliest occurrence of firstSigLine. + let targetAddedChunkIdx = -1; + let targetLineInChunk = -1; + + for (let j = 0; j < i; j++) { + if (!changes[j].added) continue; + const addedLines = constructLines(changes[j].value); + const lineIdx = addedLines.indexOf(firstSigLine); + if (lineIdx !== -1) { + targetAddedChunkIdx = j; + targetLineInChunk = lineIdx; + break; // use the first (earliest) occurrence + } + } + + if (targetAddedChunkIdx === -1) continue; + + // ── Apply the transformation ──────────────────────────────────────────── + const targetLines = constructLines(changes[targetAddedChunkIdx].value); + const beforeLines = targetLines.slice(0, targetLineInChunk); + const fromLines = targetLines.slice(targetLineInChunk); + + const newChanges: Change[] = []; + + for (let j = 0; j < changes.length; j++) { + if (j === targetAddedChunkIdx) { + // Split the target ADDED chunk at the found line. + if (beforeLines.length > 0) { + newChanges.push({ value: beforeLines.join('\n') + '\n', added: true }); + } + // Old content (S) is now explicitly removed. + newChanges.push({ value: chunk.value, removed: true }); + // Remaining part of the original ADDED chunk follows immediately so + // that compute-lines.ts pairs REMOVED(S) with this ADDED section. + if (fromLines.length > 0) { + newChanges.push({ value: fromLines.join('\n') + '\n', added: true }); + } + } else if (j === i) { + // Replace SAME(S) with ADDED(S): the new-file copy is shown as added. + newChanges.push({ value: chunk.value, added: true }); + } else { + newChanges.push(changes[j]); + } + } + + return newChanges; + } + + return changes; +} + +/** + * Fixes the interleaved variant of the "duplicate function" diff problem. + * + * Myers sometimes produces an interleaved pattern for this bug: + * [ADDED(A1), SAME(S1), ADDED(A2), SAME(S2), ...] + * where SAME blocks are contiguous in the *old* file but non-contiguous in the + * *new* file — i.e. S1 is matched to one occurrence and S2+ to another. + * + * This is detected by a large gap between consecutive SAME blocks' new-file + * positions relative to their old-file gap (e.g. 6 new lines skipped but + * only 0 old lines between them). + * + * When detected, the entire run of "split-matched" SAME blocks is consolidated: + * [...ADDED, SAME(S1), ...ADDED, SAME(S2), ...] + * → [...ADDED(before_S1), REMOVED(S1+S2+…), ADDED(new_content_from_S1), ...] + * + * Effect in the viewer: + * • ADDED(before) — new content before the function (green) + * • REMOVED(old) paired with ADDED(nearby_new) — old function shown as + * modified against the nearby modified version (highlighted changes) ✓ + * • Remaining content continues normally + */ +function cleanupSplitMatches(changes: Change[]): Change[] { + if (changes.length === 0) return changes; + + // Don't interfere if there are already explicit REMOVED blocks. + if (changes.some(c => c.removed)) return changes; + + // Compute cumulative old / new positions for every chunk. + interface SegInfo { + chunkIdx: number; + isSame: boolean; + oldStart: number; + oldEnd: number; + newStart: number; + newEnd: number; + } + + const segs: SegInfo[] = []; + let op = 0; + let np = 0; + + for (let i = 0; i < changes.length; i++) { + const c = changes[i]; + const n = constructLines(c.value).length; + const isSame = !c.added && !c.removed; + segs.push({ + chunkIdx: i, + isSame, + oldStart: op, + oldEnd: c.added ? op : op + n, + newStart: np, + newEnd: c.removed ? np : np + n, + }); + if (!c.added) op += n; + if (!c.removed) np += n; + } + + // Collect SAME segments only. + const sameSegs = segs.filter(s => s.isSame); + if (sameSegs.length < 2) return changes; + + // Find the first pair of consecutive SAMEs where the new-file gap is much + // larger than the old-file gap — a clear "split match" signal. + // Threshold: new_gap > 3 × max(old_gap, 1) AND new_gap > 3 lines. + const THRESHOLD_RATIO = 3; + const THRESHOLD_ABS = 3; + + let splitIdx = -1; // index into sameSegs[] + + for (let i = 1; i < sameSegs.length; i++) { + const prev = sameSegs[i - 1]; + const curr = sameSegs[i]; + const oldGap = curr.oldStart - prev.oldEnd; + const newGap = curr.newStart - prev.newEnd; + + if ( + newGap > THRESHOLD_RATIO * Math.max(oldGap, 1) && + newGap > THRESHOLD_ABS + ) { + splitIdx = i - 1; // split starts at prev + break; + } + } + + if (splitIdx === -1) return changes; + + // Collect the contiguous-in-old run of SAMEs starting from sameSegs[splitIdx]. + const groupSames: SegInfo[] = [sameSegs[splitIdx]]; + for (let i = splitIdx + 1; i < sameSegs.length; i++) { + const last = groupSames[groupSames.length - 1]; + const next = sameSegs[i]; + // "contiguous in old" means the next SAME starts exactly where the last ended. + if (next.oldStart === last.oldEnd) { + groupSames.push(next); + } else { + break; + } + } + + const firstSameChunkIdx = groupSames[0].chunkIdx; + const lastSameChunkIdx = groupSames[groupSames.length - 1].chunkIdx; + + // Build the transformed diff array. + // Everything before the first SAME in the group stays unchanged. + const newChanges: Change[] = changes.slice(0, firstSameChunkIdx); + + // Collect the old content of all SAMEs in the group → REMOVED block. + let removedValue = ''; + for (const s of groupSames) { + removedValue += changes[s.chunkIdx].value; + } + + // Collect the new-file content (ADDED + SAME_new) from the first SAME to the + // last SAME in the group → ADDED block that immediately follows REMOVED. + let addedFromStart = ''; + for (let j = firstSameChunkIdx; j <= lastSameChunkIdx; j++) { + if (changes[j].added || !changes[j].removed) { + // ADDED chunks and SAME chunks both contribute to the new-file content. + addedFromStart += changes[j].value; + } + } + + newChanges.push({ value: removedValue, removed: true }); + newChanges.push({ value: addedFromStart, added: true }); + + // Append everything after the last SAME in the group unchanged. + for (let j = lastSameChunkIdx + 1; j < changes.length; j++) { + newChanges.push(changes[j]); + } + + return newChanges; +} + /** * Computes word diff information in the line. * [TODO]: Consider adding options argument for JsDiff text block comparison @@ -474,6 +715,13 @@ const computeLineInformation = ( diffArray = diff.diffLines(oldString, newString, { newlineIsToken: false, }); + // Post-process to fix semantic issues with duplicate functions/blocks. + // Myers can match old content to a far-away identical copy, making a nearby + // modified version appear as all-added. Two complementary passes handle the + // simple [ADDED, SAME] pattern and the interleaved [ADDED,SAME,ADDED,SAME] + // pattern respectively. + diffArray = cleanupLongDistanceSame(diffArray); + diffArray = cleanupSplitMatches(diffArray); } } else { // Use our fast structural JSON diff instead of diff.diffJson diff --git a/test/compute-lines.test.ts b/test/compute-lines.test.ts index 461e744..9dabf89 100644 --- a/test/compute-lines.test.ts +++ b/test/compute-lines.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it, beforeAll } from "vitest"; -import { DiffMethod, computeLineInformation } from "../src/compute-lines"; +import { DiffMethod, DiffType, computeLineInformation } from "../src/compute-lines"; // Import the actual example JSON files import oldJson from "../examples/src/diff/json/old.json"; @@ -926,3 +926,124 @@ describe("Performance tests", (): void => { expect(duration).toBeLessThan(5000); // Should complete in under 5 seconds }); }); + +// --------------------------------------------------------------------------- +// Tests for the duplicate-function diff bug fix +// --------------------------------------------------------------------------- +// Scenario: old code has `getNewPlan` with body A. New code introduces a +// modified `getNewPlan` with body B *and* keeps the original copy with body A +// verbatim. Myers (minimum edit-distance) matches old→body-A-copy, making the +// modified version look all-added. After the fix the old function must be shown +// as REMOVED (i.e. highlighted as changed) rather than as DEFAULT context. +// --------------------------------------------------------------------------- + +/** Shared body-A content used across several tests below. */ +const DUPLICATE_FUNC_BODY_A = [ + " setNewPlanLoading(true);", + " try {", + " const result = await generateNewAlgorithmSchema({", + " businessLineId: selectBusiness.id,", + " day: selectMonth?.format('YYYYMMDD') || '',", + " });", + " if (result.generateNewAlgorithmSchema) {", + " setModalVisible(true);", + " setNewPlanLoading(false);", + " }", + " } catch (error) {", + " setNewPlanLoading(false);", + " message.error((error as GraphQLError).message);", + " }", +].join("\n"); + +describe("Duplicate-function diff fix (issue: same-name function matching wrong occurrence)", (): void => { + // ── sub-scenario 1: body B shares one line with body A (produces interleaved Myers pattern) ── + it("Should show old function as modified when new file adds a modified version then an identical copy (shared body line)", (): void => { + const bodyB = [ + " setActionType('new');", + " setNewPlanLoading(true);", // ← this line is also in body A + " await fetchCustomParams();", + " setCustomParamModalVisible(true);", + ].join("\n"); + + const oldCode = `const getNewPlan = async () => {\n${DUPLICATE_FUNC_BODY_A}\n};`; + + const newCode = [ + "const fetchCustomParams = async () => {", + " try {", + " const res = await queryList({ businessLineId: selectBusiness.id });", + " } catch (error) { message.error(error.message); }", + "};", + "", + `const getNewPlan = async () => {\n${bodyB}\n};`, + "", + // Verbatim copy of the old function — Myers without fix would match old→here + `const getNewPlan = async () => {\n${DUPLICATE_FUNC_BODY_A}\n};`, + ].join("\n"); + + const result = computeLineInformation(oldCode, newCode, true); + + // At least several old function lines must be marked as REMOVED — proving the + // diff now treats the old function as modified, not silently unchanged. + const removedLeftLines = result.lineInformation.filter( + (info) => info.left.type === DiffType.REMOVED, + ); + expect(removedLeftLines.length).toBeGreaterThan(2); + + // The diff must still contain right-side additions (the new function versions). + const addedRightLines = result.lineInformation.filter( + (info) => info.right.type === DiffType.ADDED, + ); + expect(addedRightLines.length).toBeGreaterThan(0); + }); + + // ── sub-scenario 2: body B has zero lines in common with body A (simple Myers pattern) ── + it("Should show old function as modified when body B shares no lines with body A", (): void => { + const bodyBNoShared = [ + " openConfirmDialog();", + " prepareExportData();", + " triggerValidation('plan');", + ].join("\n"); + + const oldCode = `const getNewPlan = async () => {\n${DUPLICATE_FUNC_BODY_A}\n};`; + + const newCode = [ + "const fetchCustomParams = async () => {", + " const res = await queryAlgorithmList({ id: business.id });", + " setCustomParamList(res.list);", + "};", + "", + `const getNewPlan = async () => {\n${bodyBNoShared}\n};`, + "", + `const getNewPlan = async () => {\n${DUPLICATE_FUNC_BODY_A}\n};`, + ].join("\n"); + + const result = computeLineInformation(oldCode, newCode, true); + + const removedLeftLines = result.lineInformation.filter( + (info) => info.left.type === DiffType.REMOVED, + ); + expect(removedLeftLines.length).toBeGreaterThan(2); + + const addedRightLines = result.lineInformation.filter( + (info) => info.right.type === DiffType.ADDED, + ); + expect(addedRightLines.length).toBeGreaterThan(0); + }); + + // ── sanity: adding content at the top of a file should NOT falsely trigger the fix ── + it("Should NOT convert unchanged context lines when new content is prepended to file", (): void => { + const oldCode = "const A = 1;\nconst B = 2;\nconst C = 3;\n"; + const newCode = "const X = 0;\nconst Y = 0;\nconst A = 1;\nconst B = 2;\nconst C = 3;\n"; + + const result = computeLineInformation(oldCode, newCode, true); + + // Lines A/B/C are truly unchanged — they must remain DEFAULT on both sides. + const leftLines = result.lineInformation + .filter((info) => info.left.lineNumber !== undefined) + .map((info) => info.left); + + const removedLines = leftLines.filter((l) => l.type === DiffType.REMOVED); + // Nothing from the old file should be removed in this scenario + expect(removedLines.length).toBe(0); + }); +});