From 432f58798187624449a20ad42b0b5c6e73ae354f Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Sat, 16 May 2026 05:10:34 -0400 Subject: [PATCH 1/2] Reproducer for #335 fix #2: parse-asymmetry fallback to simple hash diff When the two module-graph JSON payloads passed to get-impacted-targets are not byte-equal but one of them fails to parse, findChangedModules(emptyMap, fullMap) reports every module in the populated graph as "added". The impacted set then explodes -- either fanning out into one rdeps subprocess per matched repo (the multi-hour fan-out in #335) or, when no BazelQueryService is bound, falling through to allTargets.keys so every hashed label is reported as impacted. Both outcomes are far worse than a per-target hash diff. PR #336 made the parser tolerant of stderr-polluted JSON, which covers the historical 18.0.x base graph case, but the asymmetry remains a real failure mode for genuinely unparseable input (corrupted base graphs, truncation, future serialization format changes). The reproducer adds two @Ignore'd tests in CalculateImpactedTargetsInteractorTest covering both code paths that branch on changedModules.isNotEmpty(): execute() and executeWithDistances(). Both feed a non-JSON fromGraph and a parseable toGraph and assert that only the actually-hash-changed target appears in the impacted set. Today both tests fail (every target is reported); the @Ignore'd state keeps CI green until fix #2 lands. Generated with Claude Code (https://claude.com/claude-code) --- .../CalculateImpactedTargetsInteractorTest.kt | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt index 5a647e2..ca32617 100644 --- a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt @@ -758,4 +758,153 @@ class CalculateImpactedTargetsInteractorTest : KoinTest { assertThat(queryExpression).contains("@@aspect_bazel_lib~2.23.0//...") assertThat(queryExpression).contains(" + ") } + + // ------------------------------------------------------------------------ + // Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #2 + // ------------------------------------------------------------------------ + // When the two module-graph JSON payloads passed to get-impacted-targets are not + // byte-equal but one of them fails to parse, `findChangedModules(emptyMap, fullMap)` + // reports every module in the successfully-parsed graph as "added" and the impacted + // set explodes: + // + // * With a BazelQueryService bound, every "added" module fans out into an rdeps + // query against its canonical repo(s). On the workspace in #335 that produced + // ~5,000 serial subprocesses and the run took multiple hours. + // * With no BazelQueryService bound (or one that errors), the failure-tolerant + // fallback path returns `allTargets.keys` -- every hashed label is reported as + // impacted, which on a large workspace defeats the point of running bazel-diff. + // + // Both outcomes are far worse than the per-target hash diff that would have run if + // bazel-diff hadn't been told there was a module graph at all. + // + // Today this asymmetry was caused by the stderr-pollution shape #336 fixed (an + // 18.0.x base graph fed into 18.1.0+ would parse to empty before that PR). It can + // still happen for genuinely unparseable input: a truncated/corrupted base graph + // pulled out of object storage, a future bazel-mod-graph serialisation change, or + // a base graph captured before bazel itself learned to emit `--output=json`. + // + // Fix #2 from the issue: when one parsed map is empty and the other is not, while + // `fromModuleGraphJson != toModuleGraphJson`, fall back to `computeSimpleImpactedTargets` + // instead of treating every module in the populated graph as "added". A per-target + // hash diff is bounded by the size of the hash set; the module-fan-out path is not. + // + // The reproducer is `@Ignore`d so CI stays green. Drop the annotation once the + // asymmetry-detection fallback lands and confirm this test passes: only the target + // whose hash actually changed should appear in the impacted set. + @Test + @org.junit.Ignore( + "Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #2 " + + "(parse-asymmetry should fall back to computeSimpleImpactedTargets). " + + "Today an unparseable base graph + parseable head graph causes every module " + + "in the head graph to be treated as 'added', and -- with no BazelQueryService " + + "bound to handle the rdeps fan-out -- every target ends up reported as " + + "impacted. The desired behaviour is to detect the asymmetry and fall back " + + "to a per-target hash diff. Drop @Ignore once the fallback lands.") + fun execute_parseAsymmetryFallsBackToSimpleHashDiff_reproducerForIssue335Fix2() { + // Three targets, only //:changed has actually changed. + val startHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-old", "h1-old"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + val endHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-new", "h1-new"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + + // fromGraph has no '{' at all, so ModuleGraphParser.parseModuleGraph falls through + // to `if (start < 0) return emptyMap()`. Mirrors a corrupted base-graph stored from + // an old bazel-diff run or a totally different serialisation format. + val fromModuleGraph = "garbage-non-json-payload" + // toGraph parses cleanly to two modules; without the fix those two modules look + // like "added" relative to the empty fromGraph. + val toModuleGraph = + """ + { + "key": "root", "name": "root", "version": "", "apparentName": "root", + "dependencies": [ + {"key": "modA@1.0", "name": "modA", "version": "1.0", "apparentName": "modA"}, + {"key": "modB@1.0", "name": "modB", "version": "1.0", "apparentName": "modB"} + ] + } + """ + .trimIndent() + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor() + .execute( + from = startHashes, + to = endHashes, + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = fromModuleGraph, + toModuleGraphJson = toModuleGraph, + ) + + val impacted = outputWriter.toString().trimEnd('\n').split("\n").filter { it.isNotEmpty() }.toSet() + // With the fallback: only the actually-changed target is in the impacted set. + // Without the fallback (today): every hashed target is reported because + // queryTargetsDependingOnModules returns `allTargets.keys` when no BazelQueryService + // is bound -- i.e. {//:changed, //:unchanged_a, //:unchanged_b}. + assertThat(impacted).containsOnly("//:changed") + } + + // Same asymmetry case as above, but routed through executeWithDistances (the path + // taken when --depsFile is passed to get-impacted-targets). The fix must cover both + // execute() and executeWithDistances() -- both branch on `changedModules.isNotEmpty()` + // and call queryTargetsDependingOnModules independently. + @Test + @org.junit.Ignore( + "Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #2 " + + "via the executeWithDistances() path. Drop @Ignore once the asymmetry " + + "fallback lands and confirm only the hash-diff target is reported.") + fun executeWithDistances_parseAsymmetryFallsBackToSimpleHashDiff_reproducerForIssue335Fix2() { + val startHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-old", "h1-old"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + val endHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-new", "h1-new"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + + val fromModuleGraph = "garbage-non-json-payload" + val toModuleGraph = + """ + { + "key": "root", "name": "root", "version": "", "apparentName": "root", + "dependencies": [ + {"key": "modA@1.0", "name": "modA", "version": "1.0", "apparentName": "modA"} + ] + } + """ + .trimIndent() + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor() + .executeWithDistances( + from = startHashes, + to = endHashes, + depEdges = emptyMap(), + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = fromModuleGraph, + toModuleGraphJson = toModuleGraph, + ) + + val output = outputWriter.toString() + // With the fallback: only //:changed appears in the distance-metrics JSON. + // Without the fallback: every target is reported, all at distance 0 (because the + // "module-impacted" branch in executeWithDistances forces distance 0). + assertThat(output).contains("//:changed") + assertThat(output).doesNotContain("//:unchanged_a") + assertThat(output).doesNotContain("//:unchanged_b") + } } From 1bcec7dd436f6a66d69876e4086160c23d13acc9 Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Mon, 18 May 2026 11:02:45 -0400 Subject: [PATCH 2/2] Fix #335 fix #2: fall back to per-target hash diff on parse asymmetry `bazel mod graph --output=json` always emits at least the root module, so a parsed module-graph map that is empty really does mean parse failure (a truncated/corrupted base graph from object storage, a future serialisation change, an old stderr-polluted 18.0.x capture from before #336). When the two JSON payloads are not byte-equal but exactly one side parses to empty, the historical behaviour was for `findChangedModules(emptyMap, fullMap)` to report every module on the parseable side as "added" and the impacted set exploded: * With a `BazelQueryService` bound, every "added" module fanned out into an `rdeps` query against its canonical repo(s). On the workspace in #335 that produced ~5,000 serial subprocesses and the run took multiple hours. * With no `BazelQueryService` bound (or one that errored), the failure-tolerant fallback path returned `allTargets.keys` -- every hashed label was reported as impacted, which on a large workspace defeats the point of running bazel-diff. The fix detects this asymmetry in `detectChangedModules` and returns an empty changed-modules set, so callers fall through to `computeSimpleImpactedTargets` (the per-target hash diff that would have run if no module graph had been supplied). A per-target hash diff is bounded by the size of the hash set; the module-fan-out path is not. The two regression tests added in the parent reproducer commit have their `@Ignore` annotations dropped and now pass: 21/21 tests in `CalculateImpactedTargetsInteractorTest` (19 pre-existing + 2 new). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../CalculateImpactedTargetsInteractor.kt | 18 ++++++ .../CalculateImpactedTargetsInteractorTest.kt | 56 ++++++------------- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt index d1f5762..08b95fb 100644 --- a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt +++ b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt @@ -284,6 +284,24 @@ class CalculateImpactedTargetsInteractor : KoinComponent { val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson) val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson) + // Parse-asymmetry guard for https://github.com/Tinder/bazel-diff/issues/335. + // The JSON strings differ (otherwise we wouldn't be here), but exactly one side parsed + // to a non-empty graph. `bazel mod graph --output=json` always emits at least the root + // module, so an empty parse means the JSON was unparseable -- truncated, corrupted, a + // future serialisation change, or an older stderr-polluted capture from before #336. + // Treating every module on the parseable side as "added" would explode the impacted + // set: ~5k serial rdeps subprocesses with a real BazelQueryService, or `allTargets.keys` + // with none bound. Return empty so callers fall back to the per-target hash diff, which + // is bounded and correct for this case. + if (fromGraph.isEmpty() != toGraph.isEmpty()) { + logger.w { + "Module graph parse asymmetry detected (one side parsed to ${fromGraph.size} modules, " + + "the other to ${toGraph.size}). Falling back to per-target hash diff. " + + "See https://github.com/Tinder/bazel-diff/issues/335" + } + return emptySet() + } + // Find changed modules val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph) diff --git a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt index ca32617..b4b4d3a 100644 --- a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt @@ -760,47 +760,31 @@ class CalculateImpactedTargetsInteractorTest : KoinTest { } // ------------------------------------------------------------------------ - // Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #2 + // Regression coverage for https://github.com/Tinder/bazel-diff/issues/335 fix #2 // ------------------------------------------------------------------------ // When the two module-graph JSON payloads passed to get-impacted-targets are not - // byte-equal but one of them fails to parse, `findChangedModules(emptyMap, fullMap)` - // reports every module in the successfully-parsed graph as "added" and the impacted - // set explodes: + // byte-equal but one of them fails to parse, the historical behaviour was for + // `findChangedModules(emptyMap, fullMap)` to report every module in the successfully- + // parsed graph as "added" and the impacted set exploded: // - // * With a BazelQueryService bound, every "added" module fans out into an rdeps + // * With a BazelQueryService bound, every "added" module fanned out into an rdeps // query against its canonical repo(s). On the workspace in #335 that produced // ~5,000 serial subprocesses and the run took multiple hours. - // * With no BazelQueryService bound (or one that errors), the failure-tolerant - // fallback path returns `allTargets.keys` -- every hashed label is reported as - // impacted, which on a large workspace defeats the point of running bazel-diff. + // * With no BazelQueryService bound (or one that errored), the failure-tolerant + // fallback path returned `allTargets.keys` -- every hashed label was reported as + // impacted, which on a large workspace defeated the point of running bazel-diff. // - // Both outcomes are far worse than the per-target hash diff that would have run if + // Both outcomes were far worse than the per-target hash diff that would have run if // bazel-diff hadn't been told there was a module graph at all. // - // Today this asymmetry was caused by the stderr-pollution shape #336 fixed (an - // 18.0.x base graph fed into 18.1.0+ would parse to empty before that PR). It can - // still happen for genuinely unparseable input: a truncated/corrupted base graph - // pulled out of object storage, a future bazel-mod-graph serialisation change, or - // a base graph captured before bazel itself learned to emit `--output=json`. - // - // Fix #2 from the issue: when one parsed map is empty and the other is not, while - // `fromModuleGraphJson != toModuleGraphJson`, fall back to `computeSimpleImpactedTargets` - // instead of treating every module in the populated graph as "added". A per-target - // hash diff is bounded by the size of the hash set; the module-fan-out path is not. - // - // The reproducer is `@Ignore`d so CI stays green. Drop the annotation once the - // asymmetry-detection fallback lands and confirm this test passes: only the target - // whose hash actually changed should appear in the impacted set. + // The fix in `detectChangedModules` detects parse asymmetry (one side parses to a + // non-empty graph, the other parses to empty while the JSONs were not byte-equal) and + // returns an empty changed-modules set so callers fall back to + // `computeSimpleImpactedTargets`. `bazel mod graph --output=json` always emits at + // least the root module, so an empty parse really does mean parse failure -- not "no + // modules". @Test - @org.junit.Ignore( - "Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #2 " + - "(parse-asymmetry should fall back to computeSimpleImpactedTargets). " + - "Today an unparseable base graph + parseable head graph causes every module " + - "in the head graph to be treated as 'added', and -- with no BazelQueryService " + - "bound to handle the rdeps fan-out -- every target ends up reported as " + - "impacted. The desired behaviour is to detect the asymmetry and fall back " + - "to a per-target hash diff. Drop @Ignore once the fallback lands.") - fun execute_parseAsymmetryFallsBackToSimpleHashDiff_reproducerForIssue335Fix2() { + fun execute_parseAsymmetryFallsBackToSimpleHashDiff_regressionForIssue335Fix2() { // Three targets, only //:changed has actually changed. val startHashes = mapOf( @@ -846,7 +830,7 @@ class CalculateImpactedTargetsInteractorTest : KoinTest { val impacted = outputWriter.toString().trimEnd('\n').split("\n").filter { it.isNotEmpty() }.toSet() // With the fallback: only the actually-changed target is in the impacted set. - // Without the fallback (today): every hashed target is reported because + // Without the fallback: every hashed target would be reported because // queryTargetsDependingOnModules returns `allTargets.keys` when no BazelQueryService // is bound -- i.e. {//:changed, //:unchanged_a, //:unchanged_b}. assertThat(impacted).containsOnly("//:changed") @@ -857,11 +841,7 @@ class CalculateImpactedTargetsInteractorTest : KoinTest { // execute() and executeWithDistances() -- both branch on `changedModules.isNotEmpty()` // and call queryTargetsDependingOnModules independently. @Test - @org.junit.Ignore( - "Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #2 " + - "via the executeWithDistances() path. Drop @Ignore once the asymmetry " + - "fallback lands and confirm only the hash-diff target is reported.") - fun executeWithDistances_parseAsymmetryFallsBackToSimpleHashDiff_reproducerForIssue335Fix2() { + fun executeWithDistances_parseAsymmetryFallsBackToSimpleHashDiff_regressionForIssue335Fix2() { val startHashes = mapOf( "//:changed" to TargetHash("Rule", "h1-old", "h1-old"),