diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt index d1f5762..08b95fb 100644 --- a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt +++ b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt @@ -284,6 +284,24 @@ class CalculateImpactedTargetsInteractor : KoinComponent { val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson) val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson) + // Parse-asymmetry guard for https://github.com/Tinder/bazel-diff/issues/335. + // The JSON strings differ (otherwise we wouldn't be here), but exactly one side parsed + // to a non-empty graph. `bazel mod graph --output=json` always emits at least the root + // module, so an empty parse means the JSON was unparseable -- truncated, corrupted, a + // future serialisation change, or an older stderr-polluted capture from before #336. + // Treating every module on the parseable side as "added" would explode the impacted + // set: ~5k serial rdeps subprocesses with a real BazelQueryService, or `allTargets.keys` + // with none bound. Return empty so callers fall back to the per-target hash diff, which + // is bounded and correct for this case. + if (fromGraph.isEmpty() != toGraph.isEmpty()) { + logger.w { + "Module graph parse asymmetry detected (one side parsed to ${fromGraph.size} modules, " + + "the other to ${toGraph.size}). Falling back to per-target hash diff. " + + "See https://github.com/Tinder/bazel-diff/issues/335" + } + return emptySet() + } + // Find changed modules val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph) diff --git a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt index 5a647e2..b4b4d3a 100644 --- a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt @@ -758,4 +758,133 @@ class CalculateImpactedTargetsInteractorTest : KoinTest { assertThat(queryExpression).contains("@@aspect_bazel_lib~2.23.0//...") assertThat(queryExpression).contains(" + ") } + + // ------------------------------------------------------------------------ + // Regression coverage for https://github.com/Tinder/bazel-diff/issues/335 fix #2 + // ------------------------------------------------------------------------ + // When the two module-graph JSON payloads passed to get-impacted-targets are not + // byte-equal but one of them fails to parse, the historical behaviour was for + // `findChangedModules(emptyMap, fullMap)` to report every module in the successfully- + // parsed graph as "added" and the impacted set exploded: + // + // * With a BazelQueryService bound, every "added" module fanned out into an rdeps + // query against its canonical repo(s). On the workspace in #335 that produced + // ~5,000 serial subprocesses and the run took multiple hours. + // * With no BazelQueryService bound (or one that errored), the failure-tolerant + // fallback path returned `allTargets.keys` -- every hashed label was reported as + // impacted, which on a large workspace defeated the point of running bazel-diff. + // + // Both outcomes were far worse than the per-target hash diff that would have run if + // bazel-diff hadn't been told there was a module graph at all. + // + // The fix in `detectChangedModules` detects parse asymmetry (one side parses to a + // non-empty graph, the other parses to empty while the JSONs were not byte-equal) and + // returns an empty changed-modules set so callers fall back to + // `computeSimpleImpactedTargets`. `bazel mod graph --output=json` always emits at + // least the root module, so an empty parse really does mean parse failure -- not "no + // modules". + @Test + fun execute_parseAsymmetryFallsBackToSimpleHashDiff_regressionForIssue335Fix2() { + // Three targets, only //:changed has actually changed. + val startHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-old", "h1-old"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + val endHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-new", "h1-new"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + + // fromGraph has no '{' at all, so ModuleGraphParser.parseModuleGraph falls through + // to `if (start < 0) return emptyMap()`. Mirrors a corrupted base-graph stored from + // an old bazel-diff run or a totally different serialisation format. + val fromModuleGraph = "garbage-non-json-payload" + // toGraph parses cleanly to two modules; without the fix those two modules look + // like "added" relative to the empty fromGraph. + val toModuleGraph = + """ + { + "key": "root", "name": "root", "version": "", "apparentName": "root", + "dependencies": [ + {"key": "modA@1.0", "name": "modA", "version": "1.0", "apparentName": "modA"}, + {"key": "modB@1.0", "name": "modB", "version": "1.0", "apparentName": "modB"} + ] + } + """ + .trimIndent() + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor() + .execute( + from = startHashes, + to = endHashes, + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = fromModuleGraph, + toModuleGraphJson = toModuleGraph, + ) + + val impacted = outputWriter.toString().trimEnd('\n').split("\n").filter { it.isNotEmpty() }.toSet() + // With the fallback: only the actually-changed target is in the impacted set. + // Without the fallback: every hashed target would be reported because + // queryTargetsDependingOnModules returns `allTargets.keys` when no BazelQueryService + // is bound -- i.e. {//:changed, //:unchanged_a, //:unchanged_b}. + assertThat(impacted).containsOnly("//:changed") + } + + // Same asymmetry case as above, but routed through executeWithDistances (the path + // taken when --depsFile is passed to get-impacted-targets). The fix must cover both + // execute() and executeWithDistances() -- both branch on `changedModules.isNotEmpty()` + // and call queryTargetsDependingOnModules independently. + @Test + fun executeWithDistances_parseAsymmetryFallsBackToSimpleHashDiff_regressionForIssue335Fix2() { + val startHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-old", "h1-old"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + val endHashes = + mapOf( + "//:changed" to TargetHash("Rule", "h1-new", "h1-new"), + "//:unchanged_a" to TargetHash("Rule", "h2", "h2"), + "//:unchanged_b" to TargetHash("Rule", "h3", "h3"), + ) + + val fromModuleGraph = "garbage-non-json-payload" + val toModuleGraph = + """ + { + "key": "root", "name": "root", "version": "", "apparentName": "root", + "dependencies": [ + {"key": "modA@1.0", "name": "modA", "version": "1.0", "apparentName": "modA"} + ] + } + """ + .trimIndent() + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor() + .executeWithDistances( + from = startHashes, + to = endHashes, + depEdges = emptyMap(), + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = fromModuleGraph, + toModuleGraphJson = toModuleGraph, + ) + + val output = outputWriter.toString() + // With the fallback: only //:changed appears in the distance-metrics JSON. + // Without the fallback: every target is reported, all at distance 0 (because the + // "module-impacted" branch in executeWithDistances forces distance 0). + assertThat(output).contains("//:changed") + assertThat(output).doesNotContain("//:unchanged_a") + assertThat(output).doesNotContain("//:unchanged_b") + } }