From 108c7b25fc6b07bc08d7a3473f2f1bcffda78336 Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Tue, 12 May 2026 10:45:55 -0400 Subject: [PATCH 1/2] Add reproducer test for #335 loose substring match in queryTargetsDependingOnModules Issue #335 lists multiple potential fixes; my first PR (#340) covered fix #1 (parser robustness). This PR adds a reproducer for fix #3: the loose substring match in CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules. The current code is: val moduleRepos = allTargets.keys .filter { it.startsWith("@@") && it.contains(moduleName) } .map { it.substring(2).substringBefore("//") } .toSet() `it.contains(moduleName)` is a very loose match. A single changed module like `aspect_bazel_lib` matches every canonical repo whose name starts with that string -- e.g. `@@aspect_bazel_lib+`, `@@aspect_bazel_lib++toolchains+bats_toolchains`, etc. Each match becomes its own serial `bazel query rdeps(//..., @@//...)` subprocess. On the workspace in #335 that fan-out produced ~5,000 subprocesses and took multiple hours. New test class CalculateImpactedTargetsInteractorIssue335Test: - Wires its own koin module with a mock BazelQueryService that captures every query expression. - Simulates a workspace with one module (aspect_bazel_lib) plus two module-extension repos whose canonical names begin with the same apparent name. - Asserts that resolving the changed module yields exactly ONE rdeps query, scoped to the actual changed repo. @Ignore'd until the match is tightened (match canonical repo key exactly, or look up via `bazel mod dump_repo_mapping`). cli/BUILD wires up the new kt_jvm_test target. Refs https://github.com/Tinder/bazel-diff/issues/335 Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/BUILD | 6 + ...teImpactedTargetsInteractorIssue335Test.kt | 161 ++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt diff --git a/cli/BUILD b/cli/BUILD index ec0c770..0bdb52b 100644 --- a/cli/BUILD +++ b/cli/BUILD @@ -72,6 +72,12 @@ kt_jvm_test( runtime_deps = [":cli-test-lib"], ) +kt_jvm_test( + name = "CalculateImpactedTargetsInteractorIssue335Test", + test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorIssue335Test", + runtime_deps = [":cli-test-lib"], +) + kt_jvm_test( name = "NormalisingPathConverterTest", test_class = "com.bazel_diff.cli.converter.NormalisingPathConverterTest", diff --git a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt new file mode 100644 index 0000000..e17af77 --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt @@ -0,0 +1,161 @@ +package com.bazel_diff.interactor + +import assertk.assertThat +import assertk.assertions.contains +import assertk.assertions.doesNotContain +import assertk.assertions.hasSize +import com.bazel_diff.SilentLogger +import com.bazel_diff.bazel.BazelQueryService +import com.bazel_diff.bazel.BazelTarget +import com.bazel_diff.hash.TargetHash +import com.bazel_diff.log.Logger +import com.google.gson.GsonBuilder +import java.io.StringWriter +import kotlinx.coroutines.runBlocking +import org.junit.After +import org.junit.Before +import org.junit.Test +import org.koin.core.context.startKoin +import org.koin.core.context.stopKoin +import org.koin.dsl.module +import org.koin.test.KoinTest +import org.mockito.kotlin.any +import org.mockito.kotlin.eq +import org.mockito.kotlin.mock +import org.mockito.kotlin.whenever + +/** + * Reproducer for the "loose substring match" tail of + * https://github.com/Tinder/bazel-diff/issues/335 (fix #3 in the issue body). + * + * `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` resolves changed module + * keys to canonical external repos with: + * + * ``` + * val moduleRepos = allTargets.keys + * .filter { it.startsWith("@@") && it.contains(moduleName) } + * .map { it.substring(2).substringBefore("//") } + * .toSet() + * ``` + * + * `it.contains(moduleName)` is a very loose match. In a workspace with many module extensions + * a single changed module like `aspect_bazel_lib` matches every canonical repo whose name + * starts with that string -- e.g. `@@aspect_bazel_lib+`, `@@aspect_bazel_lib++toolchains+bats_toolchains`, + * etc. Each match becomes its own serial `bazel query rdeps(//..., @@//...)` subprocess. + * On the workspace in #335 that fan-out produced ~5,000 subprocesses and took multiple hours. + * + * This test asserts that resolving a single changed module yields at most one rdeps query + * (the one targeting the actual changed repo). The fix is either to match the canonical repo + * key exactly (e.g. `aspect_bazel_lib+` -- the part before any further `+`/`~`), or to look up + * the repo via `bazel mod dump_repo_mapping` instead of a substring scan. + */ +class CalculateImpactedTargetsInteractorIssue335Test : KoinTest { + + private val queryService: BazelQueryService = mock() + private val capturedQueries = mutableListOf() + + @Before + fun setUp() { + // Capture every query expression that flows through queryService.query(...). + runBlocking { + whenever(queryService.query(any(), eq(false))).thenAnswer { invocation -> + val q = invocation.getArgument(0) + capturedQueries.add(q) + emptyList() + } + whenever(queryService.query(any())).thenAnswer { invocation -> + val q = invocation.getArgument(0) + capturedQueries.add(q) + emptyList() + } + } + startKoin { + modules( + module { + single { SilentLogger } + single { queryService } + single { GsonBuilder().disableHtmlEscaping().setPrettyPrinting().create() } + }) + } + } + + @After + fun tearDown() { + stopKoin() + } + + @Test + @org.junit.Ignore( + "Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #3 (loose " + + "substring match in queryTargetsDependingOnModules). Today a single changed module " + + "fans out into one rdeps subprocess per repo whose canonical name contains the " + + "apparent module name, which explodes on workspaces with many module extensions. " + + "Drop @Ignore once the match is tightened to the actual canonical repo key.") + fun queryTargetsDependingOnModules_doesNotOverMatchExtensionRepoNames_reproducerForIssue335() { + // Simulate the #335 workspace: one module (aspect_bazel_lib) and several module-extension + // repos whose canonical names also begin with "aspect_bazel_lib". The bug is that all + // of those get treated as the "changed module" and each spawns its own rdeps query. + val startHashes = + mapOf( + "//:target1" to TargetHash("Rule", "h1", "h1"), + "@@aspect_bazel_lib+//foo:bar" to TargetHash("Rule", "h2-v1", "h2-v1"), + "@@aspect_bazel_lib++toolchains+bats_toolchains//x:y" to + TargetHash("Rule", "h3", "h3"), + "@@aspect_bazel_lib++toolchains+yq_toolchains//x:y" to + TargetHash("Rule", "h4", "h4"), + "@@unrelated_repo+//pkg:tgt" to TargetHash("Rule", "h5", "h5"), + ) + // Only the aspect_bazel_lib *module* itself changes version; the extension repos do not + // appear in the module graph at all. + val endHashes = startHashes.toMutableMap() + endHashes["@@aspect_bazel_lib+//foo:bar"] = TargetHash("Rule", "h2-v2", "h2-v2") + + val fromModuleGraph = + """ + { + "key": "root", + "name": "root", + "version": "", + "apparentName": "root", + "dependencies": [ + {"key": "aspect_bazel_lib@1.0.0", "name": "aspect_bazel_lib", "version": "1.0.0", "apparentName": "aspect_bazel_lib"} + ] + } + """ + .trimIndent() + val toModuleGraph = + """ + { + "key": "root", + "name": "root", + "version": "", + "apparentName": "root", + "dependencies": [ + {"key": "aspect_bazel_lib@2.0.0", "name": "aspect_bazel_lib", "version": "2.0.0", "apparentName": "aspect_bazel_lib"} + ] + } + """ + .trimIndent() + + val outputWriter = StringWriter() + CalculateImpactedTargetsInteractor() + .execute( + from = startHashes, + to = endHashes, + outputWriter = outputWriter, + targetTypes = null, + fromModuleGraphJson = fromModuleGraph, + toModuleGraphJson = toModuleGraph, + ) + + val rdepsQueries = capturedQueries.filter { it.contains("rdeps(") } + // Desired: exactly ONE rdeps query, scoped to the actual changed repo. + // Today: one query per canonical repo containing the substring "aspect_bazel_lib", so + // this list has 3 entries -- aspect_bazel_lib+, aspect_bazel_lib++toolchains+bats_toolchains, + // and aspect_bazel_lib++toolchains+yq_toolchains. + assertThat(rdepsQueries).hasSize(1) + assertThat(rdepsQueries[0]).contains("@@aspect_bazel_lib+//...") + assertThat(rdepsQueries[0]).doesNotContain("toolchains+bats_toolchains") + assertThat(rdepsQueries[0]).doesNotContain("toolchains+yq_toolchains") + } +} From e1fe6395f403a650c671821025eb13b3724830ef Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Mon, 18 May 2026 13:40:52 -0400 Subject: [PATCH 2/2] Fix #335 fix #3: tighten canonical-repo match in queryTargetsDependingOnModules `queryTargetsDependingOnModules` resolved each changed module key to its canonical bzlmod repos by scanning `allTargets.keys` with `it.startsWith("@@") && it.contains(moduleName)`. The `contains` match is far too loose: a single changed module like `aspect_bazel_lib` matched every canonical repo whose name began with that string -- e.g. `@@aspect_bazel_lib+`, `@@aspect_bazel_lib++toolchains+bats_toolchains`, `@@aspect_bazel_lib++toolchains+yq_toolchains`, etc. Each match was then unioned into the rdeps query expression (after the per-repo loop became a single unioned query in an earlier change), but the same fan-out behaviour on older revisions produced ~5,000 serial subprocesses on the workspace in #335 and took multiple hours. Tighten the match to the module's *base* canonical repo only: `` where `` is `+` or `~` and the remainder contains no further separators. Module-extension repos always layer additional `+`/`~` segments on top and are rejected here. That narrowing is safe because extension-repo content changes already propagate to main-repo consumers through the per-target hash dep chain: `computeSimpleImpactedTargets` runs unconditionally for `@@*` labels and the build-graph hasher folds dep hashes into each rule's hash, so a consumer of a changed extension repo flips its own hash and is picked up without needing an rdeps query targeting the extension repo. The rdeps query exists to catch the harder case -- MODULE.bazel-only bumps that don't flip any hash but do change which version of a base module repo is selected -- and that is exactly the case the base-repo match handles. The reproducer test class from PR #346 had a latent Mockito misuse in its `@Before`: the second `whenever(queryService.query(any()))` stub omits the default-argument matcher and was throwing `InvalidUseOfMatchersException` before any assertion ran. The test was `@Ignore`d so the bug was never observed. Dropping the redundant stub and dropping `@Ignore` -- the first stub already covers the production caller, which always passes `useCquery = false` explicitly. Verification: - `bazel test //cli:CalculateImpactedTargetsInteractorTest` -- PASSED (21 pre-existing tests still pass, including `testUnionsRdepsAcrossChangedModules` which exercises the `~` form through the same code path). - `bazel test //cli:CalculateImpactedTargetsInteractorIssue335Test` -- PASSED (the un-`@Ignore`d reproducer now passes; reverting just the production change reproduces the bug with the loose match yielding `rdeps(//..., @@aspect_bazel_lib+//... + @@aspect_bazel_lib++toolchains+bats_toolchains//... + @@aspect_bazel_lib++toolchains+yq_toolchains//...)`). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../CalculateImpactedTargetsInteractor.kt | 56 ++++++++++++++++--- ...teImpactedTargetsInteractorIssue335Test.kt | 48 ++++++---------- 2 files changed, 66 insertions(+), 38 deletions(-) diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt index 08b95fb..c0ef72c 100644 --- a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt +++ b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt @@ -340,17 +340,36 @@ class CalculateImpactedTargetsInteractor : KoinComponent { return allTargets.keys } - // Map every changed module to its matching bzlmod canonical repos. A single module - // name can match multiple canonical repos (e.g. rules_jvm_external matches - // rules_jvm_external~~maven~maven, rules_jvm_external~~toolchains~...). Log per - // module so an operator can attribute a pathologically large impacted set back to - // a specific module bump. + // Map every changed module to its bzlmod canonical *base* repo. We deliberately do + // NOT pull in module-extension repos here (e.g. `aspect_bazel_lib++toolchains+bats_toolchains`): + // a loose `it.contains(moduleName)` substring scan matched all of them and produced + // ~5,000 serial rdeps subprocesses on the workspace in + // https://github.com/Tinder/bazel-diff/issues/335. Extension-repo target hashes + // propagate to main-repo consumers via the normal dep-hash chain, so the simple + // hash diff already catches consumers when an extension repo's contents actually + // change. The rdeps query only needs to find main-repo targets whose dependency on + // the module's *base* repo isn't reflected in a hash flip (MODULE.bazel-only bumps, + // dep additions/removals against an otherwise-unchanged repo, etc.). + // + // Canonical-name forms we recognise as the base repo for module X: + // * Bazel 7+ default form: `X+` (no version embedded) + // * Bazel <7 form / multi-version: `X~` (e.g. abseil-cpp~20240722.0) + // * Bazel 7+ with explicit version: `X+` (no further `+` segments) + // + // Extension-repo forms we reject: + // * `X+++` (Bazel 7+, empty version) + // * `X+++` (Bazel 7+, embedded version) + // * `X~~~` (Bazel <7) val moduleRepos = mutableSetOf() for (moduleKey in changedModuleKeys) { val moduleName = moduleKey.substringBefore("@") val matched = allTargets.keys - .filter { it.startsWith("@@") && it.contains(moduleName) } - .map { it.substring(2).substringBefore("//") } + .filter { it.startsWith("@@") } + .mapNotNull { target -> + val canonical = target.substring(2).substringBefore("//") + if (canonicalRepoIsBaseForModule(canonical, moduleName)) canonical else null + } + .distinct() if (matched.isEmpty()) { logger.w { "No external repository matched module $moduleKey" } } else { @@ -384,4 +403,27 @@ class CalculateImpactedTargetsInteractor : KoinComponent { logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" } return impactedTargets } + + /** + * Returns true if [canonical] (a bzlmod canonical repo name, with `@@` and `//...` stripped) + * is the base repo for [moduleName] -- i.e. the module's own repo, not a module-extension repo + * defined by that module. + * + * The base repo has the shape `` where `` is `+` or `~` + * and `` contains no further separators. Module-extension repos always layer + * additional `+` or `~` segments onto the canonical name and are rejected. See + * https://github.com/Tinder/bazel-diff/issues/335 for the motivating bug. + * + * Module names in bzlmod are restricted to `[a-z0-9._-]`, so neither `+` nor `~` can appear + * inside [moduleName] itself; the separator check below is unambiguous. + */ + private fun canonicalRepoIsBaseForModule(canonical: String, moduleName: String): Boolean { + if (!canonical.startsWith(moduleName)) return false + val rest = canonical.substring(moduleName.length) + if (rest.isEmpty()) return false + val separator = rest[0] + if (separator != '+' && separator != '~') return false + val versionSegment = rest.substring(1) + return !versionSegment.contains('+') && !versionSegment.contains('~') + } } diff --git a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt index e17af77..0a1903a 100644 --- a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt +++ b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorIssue335Test.kt @@ -25,29 +25,24 @@ import org.mockito.kotlin.mock import org.mockito.kotlin.whenever /** - * Reproducer for the "loose substring match" tail of + * Regression coverage for the "loose substring match" tail of * https://github.com/Tinder/bazel-diff/issues/335 (fix #3 in the issue body). * - * `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` resolves changed module - * keys to canonical external repos with: + * `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` historically resolved + * changed module keys to canonical external repos with `it.contains(moduleName)`. That was a + * very loose match: in a workspace with many module extensions a single changed module like + * `aspect_bazel_lib` matched every canonical repo whose name started with that string -- e.g. + * `@@aspect_bazel_lib+`, `@@aspect_bazel_lib++toolchains+bats_toolchains`, etc. Each match + * became its own serial `bazel query rdeps(//..., @@//...)` subprocess. On the workspace + * in #335 that fan-out produced ~5,000 subprocesses and took multiple hours. * - * ``` - * val moduleRepos = allTargets.keys - * .filter { it.startsWith("@@") && it.contains(moduleName) } - * .map { it.substring(2).substringBefore("//") } - * .toSet() - * ``` + * The fix tightens the match to the base module repo only (the canonical name has the shape + * `` with no further `+`/`~` segments). Extension-repo content + * changes propagate to main-repo consumers through the normal dep-hash chain, so the simple + * per-target hash diff already catches them. * - * `it.contains(moduleName)` is a very loose match. In a workspace with many module extensions - * a single changed module like `aspect_bazel_lib` matches every canonical repo whose name - * starts with that string -- e.g. `@@aspect_bazel_lib+`, `@@aspect_bazel_lib++toolchains+bats_toolchains`, - * etc. Each match becomes its own serial `bazel query rdeps(//..., @@//...)` subprocess. - * On the workspace in #335 that fan-out produced ~5,000 subprocesses and took multiple hours. - * - * This test asserts that resolving a single changed module yields at most one rdeps query - * (the one targeting the actual changed repo). The fix is either to match the canonical repo - * key exactly (e.g. `aspect_bazel_lib+` -- the part before any further `+`/`~`), or to look up - * the repo via `bazel mod dump_repo_mapping` instead of a substring scan. + * This test asserts that resolving a single changed module yields exactly one rdeps query, + * scoped to the actual base repo. */ class CalculateImpactedTargetsInteractorIssue335Test : KoinTest { @@ -57,17 +52,14 @@ class CalculateImpactedTargetsInteractorIssue335Test : KoinTest { @Before fun setUp() { // Capture every query expression that flows through queryService.query(...). + // The production caller in queryTargetsDependingOnModules always passes + // `useCquery = false`, so one matcher pair covers it. runBlocking { whenever(queryService.query(any(), eq(false))).thenAnswer { invocation -> val q = invocation.getArgument(0) capturedQueries.add(q) emptyList() } - whenever(queryService.query(any())).thenAnswer { invocation -> - val q = invocation.getArgument(0) - capturedQueries.add(q) - emptyList() - } } startKoin { modules( @@ -85,13 +77,7 @@ class CalculateImpactedTargetsInteractorIssue335Test : KoinTest { } @Test - @org.junit.Ignore( - "Reproducer for https://github.com/Tinder/bazel-diff/issues/335 fix #3 (loose " + - "substring match in queryTargetsDependingOnModules). Today a single changed module " + - "fans out into one rdeps subprocess per repo whose canonical name contains the " + - "apparent module name, which explodes on workspaces with many module extensions. " + - "Drop @Ignore once the match is tightened to the actual canonical repo key.") - fun queryTargetsDependingOnModules_doesNotOverMatchExtensionRepoNames_reproducerForIssue335() { + fun queryTargetsDependingOnModules_doesNotOverMatchExtensionRepoNames_regressionForIssue335() { // Simulate the #335 workspace: one module (aspect_bazel_lib) and several module-extension // repos whose canonical names also begin with "aspect_bazel_lib". The bug is that all // of those get treated as the "changed module" and each spawns its own rdeps query.