Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cli/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ kt_jvm_test(
runtime_deps = [":cli-test-lib"],
)

kt_jvm_test(
name = "CalculateImpactedTargetsInteractorIssue335Test",
test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorIssue335Test",
runtime_deps = [":cli-test-lib"],
)

kt_jvm_test(
name = "NormalisingPathConverterTest",
test_class = "com.bazel_diff.cli.converter.NormalisingPathConverterTest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,17 +340,36 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
return allTargets.keys
}

// Map every changed module to its matching bzlmod canonical repos. A single module
// name can match multiple canonical repos (e.g. rules_jvm_external matches
// rules_jvm_external~~maven~maven, rules_jvm_external~~toolchains~...). Log per
// module so an operator can attribute a pathologically large impacted set back to
// a specific module bump.
// Map every changed module to its bzlmod canonical *base* repo. We deliberately do
// NOT pull in module-extension repos here (e.g. `aspect_bazel_lib++toolchains+bats_toolchains`):
// a loose `it.contains(moduleName)` substring scan matched all of them and produced
// ~5,000 serial rdeps subprocesses on the workspace in
// https://github.com/Tinder/bazel-diff/issues/335. Extension-repo target hashes
// propagate to main-repo consumers via the normal dep-hash chain, so the simple
// hash diff already catches consumers when an extension repo's contents actually
// change. The rdeps query only needs to find main-repo targets whose dependency on
// the module's *base* repo isn't reflected in a hash flip (MODULE.bazel-only bumps,
// dep additions/removals against an otherwise-unchanged repo, etc.).
//
// Canonical-name forms we recognise as the base repo for module X:
// * Bazel 7+ default form: `X+` (no version embedded)
// * Bazel <7 form / multi-version: `X~<version>` (e.g. abseil-cpp~20240722.0)
// * Bazel 7+ with explicit version: `X+<version>` (no further `+` segments)
//
// Extension-repo forms we reject:
// * `X++<ext>+<repo>` (Bazel 7+, empty version)
// * `X+<version>+<ext>+<repo>` (Bazel 7+, embedded version)
// * `X~<version>~<ext>~<repo>` (Bazel <7)
val moduleRepos = mutableSetOf<String>()
for (moduleKey in changedModuleKeys) {
val moduleName = moduleKey.substringBefore("@")
val matched = allTargets.keys
.filter { it.startsWith("@@") && it.contains(moduleName) }
.map { it.substring(2).substringBefore("//") }
.filter { it.startsWith("@@") }
.mapNotNull { target ->
val canonical = target.substring(2).substringBefore("//")
if (canonicalRepoIsBaseForModule(canonical, moduleName)) canonical else null
}
.distinct()
if (matched.isEmpty()) {
logger.w { "No external repository matched module $moduleKey" }
} else {
Expand Down Expand Up @@ -384,4 +403,27 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
return impactedTargets
}

/**
* Returns true if [canonical] (a bzlmod canonical repo name, with `@@` and `//...` stripped)
* is the base repo for [moduleName] -- i.e. the module's own repo, not a module-extension repo
* defined by that module.
*
* The base repo has the shape `<moduleName><sep><versionSegment>` where `<sep>` is `+` or `~`
* and `<versionSegment>` contains no further separators. Module-extension repos always layer
* additional `+` or `~` segments onto the canonical name and are rejected. See
* https://github.com/Tinder/bazel-diff/issues/335 for the motivating bug.
*
* Module names in bzlmod are restricted to `[a-z0-9._-]`, so neither `+` nor `~` can appear
* inside [moduleName] itself; the separator check below is unambiguous.
*/
private fun canonicalRepoIsBaseForModule(canonical: String, moduleName: String): Boolean {
if (!canonical.startsWith(moduleName)) return false
val rest = canonical.substring(moduleName.length)
if (rest.isEmpty()) return false
val separator = rest[0]
if (separator != '+' && separator != '~') return false
val versionSegment = rest.substring(1)
return !versionSegment.contains('+') && !versionSegment.contains('~')
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package com.bazel_diff.interactor

import assertk.assertThat
import assertk.assertions.contains
import assertk.assertions.doesNotContain
import assertk.assertions.hasSize
import com.bazel_diff.SilentLogger
import com.bazel_diff.bazel.BazelQueryService
import com.bazel_diff.bazel.BazelTarget
import com.bazel_diff.hash.TargetHash
import com.bazel_diff.log.Logger
import com.google.gson.GsonBuilder
import java.io.StringWriter
import kotlinx.coroutines.runBlocking
import org.junit.After
import org.junit.Before
import org.junit.Test
import org.koin.core.context.startKoin
import org.koin.core.context.stopKoin
import org.koin.dsl.module
import org.koin.test.KoinTest
import org.mockito.kotlin.any
import org.mockito.kotlin.eq
import org.mockito.kotlin.mock
import org.mockito.kotlin.whenever

/**
* Regression coverage for the "loose substring match" tail of
* https://github.com/Tinder/bazel-diff/issues/335 (fix #3 in the issue body).
*
* `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` historically resolved
* changed module keys to canonical external repos with `it.contains(moduleName)`. That was a
* very loose match: in a workspace with many module extensions a single changed module like
* `aspect_bazel_lib` matched every canonical repo whose name started with that string -- e.g.
* `@@aspect_bazel_lib+`, `@@aspect_bazel_lib++toolchains+bats_toolchains`, etc. Each match
* became its own serial `bazel query rdeps(//..., @@<repo>//...)` subprocess. On the workspace
* in #335 that fan-out produced ~5,000 subprocesses and took multiple hours.
*
* The fix tightens the match to the base module repo only (the canonical name has the shape
* `<moduleName><sep><versionSegment>` with no further `+`/`~` segments). Extension-repo content
* changes propagate to main-repo consumers through the normal dep-hash chain, so the simple
* per-target hash diff already catches them.
*
* This test asserts that resolving a single changed module yields exactly one rdeps query,
* scoped to the actual base repo.
*/
class CalculateImpactedTargetsInteractorIssue335Test : KoinTest {

private val queryService: BazelQueryService = mock()
private val capturedQueries = mutableListOf<String>()

@Before
fun setUp() {
// Capture every query expression that flows through queryService.query(...).
// The production caller in queryTargetsDependingOnModules always passes
// `useCquery = false`, so one matcher pair covers it.
runBlocking {
whenever(queryService.query(any<String>(), eq(false))).thenAnswer { invocation ->
val q = invocation.getArgument<String>(0)
capturedQueries.add(q)
emptyList<BazelTarget>()
}
}
startKoin {
modules(
module {
single<Logger> { SilentLogger }
single { queryService }
single { GsonBuilder().disableHtmlEscaping().setPrettyPrinting().create() }
})
}
}

@After
fun tearDown() {
stopKoin()
}

@Test
fun queryTargetsDependingOnModules_doesNotOverMatchExtensionRepoNames_regressionForIssue335() {
// Simulate the #335 workspace: one module (aspect_bazel_lib) and several module-extension
// repos whose canonical names also begin with "aspect_bazel_lib". The bug is that all
// of those get treated as the "changed module" and each spawns its own rdeps query.
val startHashes =
mapOf(
"//:target1" to TargetHash("Rule", "h1", "h1"),
"@@aspect_bazel_lib+//foo:bar" to TargetHash("Rule", "h2-v1", "h2-v1"),
"@@aspect_bazel_lib++toolchains+bats_toolchains//x:y" to
TargetHash("Rule", "h3", "h3"),
"@@aspect_bazel_lib++toolchains+yq_toolchains//x:y" to
TargetHash("Rule", "h4", "h4"),
"@@unrelated_repo+//pkg:tgt" to TargetHash("Rule", "h5", "h5"),
)
// Only the aspect_bazel_lib *module* itself changes version; the extension repos do not
// appear in the module graph at all.
val endHashes = startHashes.toMutableMap()
endHashes["@@aspect_bazel_lib+//foo:bar"] = TargetHash("Rule", "h2-v2", "h2-v2")

val fromModuleGraph =
"""
{
"key": "root",
"name": "root",
"version": "",
"apparentName": "root",
"dependencies": [
{"key": "aspect_bazel_lib@1.0.0", "name": "aspect_bazel_lib", "version": "1.0.0", "apparentName": "aspect_bazel_lib"}
]
}
"""
.trimIndent()
val toModuleGraph =
"""
{
"key": "root",
"name": "root",
"version": "",
"apparentName": "root",
"dependencies": [
{"key": "aspect_bazel_lib@2.0.0", "name": "aspect_bazel_lib", "version": "2.0.0", "apparentName": "aspect_bazel_lib"}
]
}
"""
.trimIndent()

val outputWriter = StringWriter()
CalculateImpactedTargetsInteractor()
.execute(
from = startHashes,
to = endHashes,
outputWriter = outputWriter,
targetTypes = null,
fromModuleGraphJson = fromModuleGraph,
toModuleGraphJson = toModuleGraph,
)

val rdepsQueries = capturedQueries.filter { it.contains("rdeps(") }
// Desired: exactly ONE rdeps query, scoped to the actual changed repo.
// Today: one query per canonical repo containing the substring "aspect_bazel_lib", so
// this list has 3 entries -- aspect_bazel_lib+, aspect_bazel_lib++toolchains+bats_toolchains,
// and aspect_bazel_lib++toolchains+yq_toolchains.
assertThat(rdepsQueries).hasSize(1)
assertThat(rdepsQueries[0]).contains("@@aspect_bazel_lib+//...")
assertThat(rdepsQueries[0]).doesNotContain("toolchains+bats_toolchains")
assertThat(rdepsQueries[0]).doesNotContain("toolchains+yq_toolchains")
}
}
Loading