From 21c8a328852762ec2f0d79b8c1862a14d9e0facc Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 17 Apr 2026 16:02:22 +0100 Subject: [PATCH 1/4] Add --partition-seed option for shuffled partition assignment The default --partition M/N divides invocations into contiguous chunks in workspace-member order, which leaves heavy members biased toward the same partition across CI runs. --partition-seed hashes the seed string with FNV-1a and uses it to seed a SplitMix64-driven Fisher-Yates shuffle of the assignment, balancing load across partitions while keeping each partition exactly the same size as the unseeded version. --- README.md | 6 ++++++ src/cli.rs | 12 +++++++++++ src/main.rs | 50 ++++++++++++++++++++++++++++++++++++++++++-- tests/long-help.txt | 6 ++++++ tests/short-help.txt | 2 ++ 5 files changed, 74 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3cbbe908..bd29a897 100644 --- a/README.md +++ b/README.md @@ -225,6 +225,12 @@ OPTIONS: --partition Partition runs and execute only its subset according to M/N. + --partition-seed + Seed string to shuffle partition assignment for load balancing. + + Requires --partition. Any string is accepted (e.g. a git commit hash); the same seed + produces the same assignment across all M/N runs. + --log-group Log grouping: none, github-actions. diff --git a/src/cli.rs b/src/cli.rs index ff05a968..9b97379d 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -55,6 +55,8 @@ pub(crate) struct Args { pub(crate) keep_going: bool, /// --partition pub(crate) partition: Option, + /// --partition-seed (hashed via FNV-1a so the user can pass any string, e.g. a git short hash) + pub(crate) partition_seed: Option, /// --print-command-list pub(crate) print_command_list: bool, /// --version-range/--rust-version @@ -159,6 +161,7 @@ impl Args { let mut clean_per_version = false; let mut keep_going = false; let mut partition = None; + let mut partition_seed: Option = None; let mut print_command_list = false; let mut no_manifest_path = false; let mut locked = false; @@ -313,6 +316,7 @@ impl Args { Long("clean-per-version") => parse_flag!(clean_per_version), Long("keep-going") => parse_flag!(keep_going), Long("partition") => parse_opt!(partition, false), + Long("partition-seed") => parse_opt!(partition_seed, false), Long("print-command-list") => parse_flag!(print_command_list), Long("no-manifest-path") => parse_flag!(no_manifest_path), Long("locked") => parse_flag!(locked), @@ -574,6 +578,10 @@ impl Args { }; let partition = partition.as_deref().map(str::parse).transpose()?; + let partition_seed = partition_seed.as_deref().map(crate::fnv1a_64); + if partition_seed.is_some() && partition.is_none() { + bail!("--partition-seed can only be used together with --partition"); + } if no_dev_deps || no_private { let flag = if no_dev_deps && no_private { @@ -625,6 +633,7 @@ impl Args { clean_per_version, keep_going, partition, + partition_seed, print_command_list, no_manifest_path, include_features: include_features.into_iter().map(Into::into).collect(), @@ -844,6 +853,9 @@ const HELP: &[HelpText<'_>] = &[ ("", "--keep-going", "", "Keep going on failure", &[]), ("", "--partition", "", "Partition runs and execute only its subset according to M/N", &[ ]), + ("", "--partition-seed", "", "Seed string to shuffle partition assignment for load balancing", &[ + "Requires --partition. Any string is accepted (e.g. a git commit hash); the same seed produces the same assignment across all M/N runs.", + ]), ("", "--log-group", "", "Log grouping: none, github-actions", &[ "If this option is not used, the environment will be automatically detected.", ]), diff --git a/src/main.rs b/src/main.rs index fcbe1023..85541794 100644 --- a/src/main.rs +++ b/src/main.rs @@ -125,6 +125,7 @@ fn try_main() -> Result<()> { } } } + progress.ensure_permutation(cx.partition_seed); // First, generate the lockfile using the oldest cargo specified. // https://github.com/taiki-e/cargo-hack/issues/105 @@ -151,6 +152,7 @@ fn try_main() -> Result<()> { } else { let total = packages.iter().map(|p| p.feature_count).sum(); progress.total = total; + progress.ensure_permutation(cx.partition_seed); default_cargo_exec_on_packages(cx, &packages, &mut progress, &mut keep_going)?; } if keep_going.count > 0 { @@ -165,12 +167,56 @@ fn try_main() -> Result<()> { struct Progress { total: usize, count: usize, + /// Permutation of `0..total` controlling partition assignment when `--partition-seed` is set. + /// `None` preserves the original contiguous-chunk behavior. + permutation: Option>, } impl Progress { + fn ensure_permutation(&mut self, seed: Option) { + let Some(seed) = seed else { return }; + if self.permutation.is_some() { + return; + } + let mut perm: Vec = (0..self.total).collect(); + let mut rng = SplitMix64(seed); + for i in (1..perm.len()).rev() { + // `rng.next() % bound` is in `0..=i`, which always fits in usize. + #[allow(clippy::cast_possible_truncation)] + let j = (rng.next() % (i as u64 + 1)) as usize; + perm.swap(i, j); + } + self.permutation = Some(perm); + } + fn in_partition(&self, partition: &Partition) -> bool { - let current_index = self.count / self.total.div_ceil(partition.count); - current_index == partition.index + let pos = match &self.permutation { + Some(p) => p[self.count], + None => self.count, + }; + pos / self.total.div_ceil(partition.count) == partition.index + } +} + +/// FNV-1a 64-bit. Stable across platforms and Rust versions, unlike `std::hash::DefaultHasher`. +pub(crate) fn fnv1a_64(s: &str) -> u64 { + let mut h: u64 = 0xcbf2_9ce4_8422_2325; + for &b in s.as_bytes() { + h ^= u64::from(b); + h = h.wrapping_mul(0x0000_0100_0000_01b3); + } + h +} + +/// SplitMix64 PRNG. Used to drive a deterministic Fisher-Yates shuffle from a single u64 seed. +struct SplitMix64(u64); +impl SplitMix64 { + fn next(&mut self) -> u64 { + self.0 = self.0.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = self.0; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) } } diff --git a/tests/long-help.txt b/tests/long-help.txt index 53023f27..f3f32624 100644 --- a/tests/long-help.txt +++ b/tests/long-help.txt @@ -195,6 +195,12 @@ OPTIONS: --partition Partition runs and execute only its subset according to M/N. + --partition-seed + Seed string to shuffle partition assignment for load balancing. + + Requires --partition. Any string is accepted (e.g. a git commit hash); the same seed + produces the same assignment across all M/N runs. + --log-group Log grouping: none, github-actions. diff --git a/tests/short-help.txt b/tests/short-help.txt index 5ed21b18..4c08e325 100644 --- a/tests/short-help.txt +++ b/tests/short-help.txt @@ -49,6 +49,8 @@ OPTIONS: --keep-going Keep going on failure --partition Partition runs and execute only its subset according to M/N + --partition-seed Seed string to shuffle partition assignment for load + balancing --log-group Log grouping: none, github-actions --print-command-list Print commands without run (Unstable) --no-manifest-path Do not pass --manifest-path option to cargo (Unstable) From e6c1be594342c01a540a78a2f51037ad36ba49de Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 17 Apr 2026 16:04:52 +0100 Subject: [PATCH 2/4] Test --partition-seed determinism and coverage partition_seeded verifies that the same seed produces identical partition decisions across runs and that the union of "running" commands across all M/N partitions covers every invocation exactly once with the unseeded chunk sizes (6, 6, 5). partition_bad gains a case for using --partition-seed without --partition. --- tests/test.rs | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index 725ae263..eb480aff 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -2015,6 +2015,89 @@ fn partition_bad() { .stderr_contains( "The argument '--partition' was provided more than once, but cannot be used multiple times", ); + + cargo_hack(["check", "--each-feature", "--partition-seed", "abc"]) + .assert_failure("real") + .stderr_contains("--partition-seed can only be used together with --partition"); +} + +#[test] +fn partition_seeded() { + /// Captured stderr is collapsed onto one line by the test helper. Scan the joined string + /// for `(running|skipping) ``` markers and return them sorted, so we can compare + /// without depending on per-run noise (tempdir paths, cargo build output, etc). + fn extract_decisions(stderr: &str) -> Vec { + let mut out = vec![]; + for marker in ["running `", "skipping `"] { + let mut rest = stderr; + while let Some(idx) = rest.find(marker) { + let after = &rest[idx..]; + let end = after.find(')').expect("decision line ends with `(N/M)`"); + out.push(after[..=end].to_owned()); + rest = &after[marker.len()..]; + } + } + out.sort(); + out + } + + fn extract_running(stderr: &str) -> Vec { + extract_decisions(stderr) + .into_iter() + .filter(|l| l.starts_with("running `")) + .collect() + } + + let seed = "abc1234"; + + // Same seed -> identical partition decisions (determinism). + let out1 = cargo_hack([ + "check", + "--feature-powerset", + "--partition", + "1/3", + "--partition-seed", + seed, + ]) + .assert_success("real"); + let out2 = cargo_hack([ + "check", + "--feature-powerset", + "--partition", + "1/3", + "--partition-seed", + seed, + ]) + .assert_success("real"); + assert_eq!( + extract_decisions(&out1.0.as_ref().unwrap().stderr), + extract_decisions(&out2.0.as_ref().unwrap().stderr), + ); + + // Each partition runs the expected unseeded chunk size, and the union of "running" commands + // across all partitions covers exactly the 17 invocations with no duplicates. + let mut running = vec![]; + let mut sizes = vec![]; + for m in 1..=3 { + let out = cargo_hack([ + "check", + "--feature-powerset", + "--partition", + &format!("{m}/3"), + "--partition-seed", + seed, + ]) + .assert_success("real"); + let lines = extract_running(&out.0.as_ref().unwrap().stderr); + sizes.push(lines.len()); + running.extend(lines); + } + assert_eq!(sizes, vec![6, 6, 5], "partition sizes should match the unseeded chunking"); + assert_eq!(running.len(), 17); + let mut unique = running.clone(); + unique.sort(); + unique.dedup(); + assert_eq!(unique.len(), 17, "no invocation should run in more than one partition"); } #[test] From 9d8f6551d1c487b0930d12aab56026f1d21b5107 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 17 Apr 2026 16:05:13 +0100 Subject: [PATCH 3/4] Document --partition-seed in changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f0c1d09..bf46da76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com ## [Unreleased] +- Add `--partition-seed ` option for deterministic shuffling of `--partition` assignment. Accepts any string (e.g. a git short hash), hashed stably with FNV-1a; the same seed produces the same assignment across all `M/N` runs, balancing load when some workspace members are heavier than others. + ## [0.6.44] - 2026-03-20 - Publish [artifact attestations](https://docs.github.com/en/actions/concepts/security/artifact-attestations). From 7dd3a1f39b5780defce4aafc6df8a60be3bda4e4 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 17 Apr 2026 18:01:23 +0100 Subject: [PATCH 4/4] Fix tidy: apply rustfmt and allow PRNG in cspell dict - Reformat with nightly rustfmt: uppercase hex literals in fnv1a_64 (hex_literal_case = "Upper") and collapse a few multi-line blocks (use_small_heuristics = "Max"). - Add PRNG to project-dictionary.txt; it appears in a SplitMix64 doc comment. --- .github/.cspell/project-dictionary.txt | 1 + src/cli.rs | 12 ++++++++--- src/main.rs | 4 ++-- tests/test.rs | 29 +++++++------------------- 4 files changed, 19 insertions(+), 27 deletions(-) diff --git a/.github/.cspell/project-dictionary.txt b/.github/.cspell/project-dictionary.txt index 05f5c529..fe7fcce8 100644 --- a/.github/.cspell/project-dictionary.txt +++ b/.github/.cspell/project-dictionary.txt @@ -1,5 +1,6 @@ binstall objc +PRNG qpmember subcrate vvpmember diff --git a/src/cli.rs b/src/cli.rs index 9b97379d..9872f450 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -853,9 +853,15 @@ const HELP: &[HelpText<'_>] = &[ ("", "--keep-going", "", "Keep going on failure", &[]), ("", "--partition", "", "Partition runs and execute only its subset according to M/N", &[ ]), - ("", "--partition-seed", "", "Seed string to shuffle partition assignment for load balancing", &[ - "Requires --partition. Any string is accepted (e.g. a git commit hash); the same seed produces the same assignment across all M/N runs.", - ]), + ( + "", + "--partition-seed", + "", + "Seed string to shuffle partition assignment for load balancing", + &[ + "Requires --partition. Any string is accepted (e.g. a git commit hash); the same seed produces the same assignment across all M/N runs.", + ], + ), ("", "--log-group", "", "Log grouping: none, github-actions", &[ "If this option is not used, the environment will be automatically detected.", ]), diff --git a/src/main.rs b/src/main.rs index 85541794..fd0e054c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -200,10 +200,10 @@ impl Progress { /// FNV-1a 64-bit. Stable across platforms and Rust versions, unlike `std::hash::DefaultHasher`. pub(crate) fn fnv1a_64(s: &str) -> u64 { - let mut h: u64 = 0xcbf2_9ce4_8422_2325; + let mut h: u64 = 0xCBF2_9CE4_8422_2325; for &b in s.as_bytes() { h ^= u64::from(b); - h = h.wrapping_mul(0x0000_0100_0000_01b3); + h = h.wrapping_mul(0x0000_0100_0000_01B3); } h } diff --git a/tests/test.rs b/tests/test.rs index eb480aff..af491bd1 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -2042,33 +2042,18 @@ fn partition_seeded() { } fn extract_running(stderr: &str) -> Vec { - extract_decisions(stderr) - .into_iter() - .filter(|l| l.starts_with("running `")) - .collect() + extract_decisions(stderr).into_iter().filter(|l| l.starts_with("running `")).collect() } let seed = "abc1234"; // Same seed -> identical partition decisions (determinism). - let out1 = cargo_hack([ - "check", - "--feature-powerset", - "--partition", - "1/3", - "--partition-seed", - seed, - ]) - .assert_success("real"); - let out2 = cargo_hack([ - "check", - "--feature-powerset", - "--partition", - "1/3", - "--partition-seed", - seed, - ]) - .assert_success("real"); + let out1 = + cargo_hack(["check", "--feature-powerset", "--partition", "1/3", "--partition-seed", seed]) + .assert_success("real"); + let out2 = + cargo_hack(["check", "--feature-powerset", "--partition", "1/3", "--partition-seed", seed]) + .assert_success("real"); assert_eq!( extract_decisions(&out1.0.as_ref().unwrap().stderr), extract_decisions(&out2.0.as_ref().unwrap().stderr),