From dc30accfafea9af14c2a59167cee7efb9983495d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 18 Dec 2025 22:05:02 +0100 Subject: [PATCH 1/9] Add SCCACHE_BASEDIR support --- README.md | 24 ++++++- docs/Configuration.md | 9 +++ src/cache/cache.rs | 5 ++ src/cache/disk.rs | 6 ++ src/compiler/c.rs | 106 +++++++++++++++++++++------- src/compiler/compiler.rs | 5 ++ src/config.rs | 81 ++++++++++++++++++++- src/test/tests.rs | 1 + src/util.rs | 147 +++++++++++++++++++++++++++++++++++++++ tests/harness/mod.rs | 1 + tests/oauth.rs | 1 + 11 files changed, 357 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index ed0a56b80..51ff5c52f 100644 --- a/README.md +++ b/README.md @@ -278,12 +278,34 @@ This is most useful when using sccache for Rust compilation, as rustc supports u --- +Normalizing Paths with `SCCACHE_BASEDIR` +----------------------------------------- + +By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIR` to strip a base directory from paths before hashing: + +```bash +export SCCACHE_BASEDIR=/home/user/project +``` + +This is similar to ccache's `CCACHE_BASEDIR` and helps when: +* Building the same project from different directories +* Sharing cache between CI jobs with different checkout paths +* Multiple developers working with different username paths + +You can also configure this in the sccache config file: + +```toml +basedir = "/home/user/project" +``` + +--- + Known Caveats ------------- ### General -* Absolute paths to files must match to get a cache hit. This means that even if you are using a shared cache, everyone will have to build at the same absolute path (i.e. not in `$HOME`) in order to benefit each other. In Rust this includes the source for third party crates which are stored in `$HOME/.cargo/registry/cache` by default. +* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIR` (see above) to normalize paths before hashing. ### Rust diff --git a/docs/Configuration.md b/docs/Configuration.md index c6c43c646..32e1ec8cc 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,6 +6,14 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 +# Base directory to strip from paths for cache key computation. +# Similar to ccache's CCACHE_BASEDIR. This enables cache hits across +# different absolute paths when compiling the same source code. +# For example, if basedir is "/home/user/project", then paths like +# "/home/user/project/src/main.c" will be normalized to "./src/main.c" +# for caching purposes. +basedir = "/home/user/project" + [dist] # where to find the scheduler scheduler_url = "http://1.2.3.4:10600" @@ -134,6 +142,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path +* `SCCACHE_BASEDIR` base directory to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Environment variable takes precedence over file configuration. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 4e9d52800..86886a043 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -381,6 +381,10 @@ pub trait Storage: Send + Sync { // Enable by default, only in local mode PreprocessorCacheModeConfig::default() } + /// Return the base directory for path normalization if configured + fn basedir(&self) -> Option<&Path> { + None + } /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. /// Only applicable when using preprocessor cache mode. @@ -742,6 +746,7 @@ pub fn storage_from_config( pool, preprocessor_cache_mode_config, rw_mode, + config.basedir.clone(), ))) } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index c4f3491e9..2a5b17ef5 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,6 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, + basedir: Option, } impl DiskCache { @@ -84,6 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, + basedir: Option, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -99,6 +101,7 @@ impl DiskCache { max_size, })), rw_mode, + basedir, } } } @@ -181,6 +184,9 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } + fn basedir(&self) -> Option<&Path> { + self.basedir.as_deref() + } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { let key = normalize_key(key); Ok(self diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 8db84d265..ceea3c0c8 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -613,6 +613,7 @@ where &env_vars, &preprocessor_result.stdout, self.compiler.plusplus(), + storage.basedir(), ) }; @@ -1421,7 +1422,7 @@ impl pkg::ToolchainPackager for CToolchainPackager { } /// The cache is versioned by the inputs to `hash_key`. -pub const CACHE_VERSION: &[u8] = b"11"; +pub const CACHE_VERSION: &[u8] = b"12"; /// Environment variables that are factored into the cache key. static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { @@ -1452,6 +1453,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, + basedir: Option<&Path>, ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1475,7 +1477,16 @@ pub fn hash_key( val.hash(&mut HashToDigest { digest: &mut m }); } } - m.update(preprocessor_output); + + // Strip basedir from preprocessor output if configured + let preprocessor_output_to_hash = if let Some(base) = basedir { + use crate::util::strip_basedir; + Cow::Owned(strip_basedir(preprocessor_output, base)) + } else { + Cow::Borrowed(preprocessor_output) + }; + + m.update(&preprocessor_output_to_hash); m.finish() } @@ -1490,8 +1501,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None) ); } @@ -1500,8 +1511,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, None) ); } @@ -1510,7 +1521,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), hash_key( "abcd", Language::CHeader, @@ -1518,7 +1529,8 @@ mod test { &[], &[], PREPROCESSED, - false + false, + None ) ); } @@ -1528,7 +1540,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true), + hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, None), hash_key( "abcd", Language::CxxHeader, @@ -1536,7 +1548,8 @@ mod test { &[], &[], PREPROCESSED, - true + true, + None ) ); } @@ -1546,8 +1559,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, None) ); } @@ -1560,18 +1573,18 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), + hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, None) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), + hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, None) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), + hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, None) ); } @@ -1586,9 +1599,10 @@ mod test { &[], &[], &b"hello world"[..], - false + false, + None ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false) + hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, None) ); } @@ -1598,11 +1612,11 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false); + let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1623,12 +1637,54 @@ mod test { &extra_data, &[], PREPROCESSED, - false + false, + None ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None) ); } + #[test] + fn test_hash_key_basedir() { + use std::path::Path; + + let args = ovec!["a", "b", "c"]; + let digest = "abcd"; + + // Test 1: Same hash with different absolute paths when basedir is used + let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; + let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; + + let basedir1 = Path::new("/home/user1/project"); + let basedir2 = Path::new("/home/user2/project"); + + let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir1)); + let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, Some(basedir2)); + + assert_eq!(h1, h2); + + // Test 2: Different hashes without basedir + let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, None); + let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, None); + + assert_neq!(h1_no_base, h2_no_base); + + // Test 3: Works for C++ files too + let preprocessed_cpp1 = b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; + let preprocessed_cpp2 = b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; + + let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, Some(basedir1)); + let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, Some(basedir2)); + + assert_eq!(h_cpp1, h_cpp2); + + // Test 4: Works with trailing slashes + let basedir_slash = Path::new("/home/user1/project/"); + let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir_slash)); + + assert_eq!(h1, h_slash); + } + #[test] fn test_language_from_file_name() { fn t(extension: &str, expected: Language) { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index bdab84e1e..430e3d124 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2314,6 +2314,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2444,6 +2445,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2747,6 +2749,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2876,6 +2879,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2974,6 +2978,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/config.rs b/src/config.rs index 8edaeb099..018bccf8c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,8 +19,8 @@ use fs_err as fs; #[cfg(any(feature = "dist-client", feature = "dist-server"))] use serde::ser::Serializer; use serde::{ - Deserialize, Serialize, de::{self, DeserializeOwned, Deserializer}, + Deserialize, Serialize, }; #[cfg(test)] use serial_test::serial; @@ -584,6 +584,8 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, + /// Base directory to strip from paths for cache key computation. + pub basedir: Option, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -621,6 +623,7 @@ pub fn try_read_config_file(path: &Path) -> Result, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -946,7 +949,10 @@ fn config_from_env() -> Result { oss, }; - Ok(EnvConfig { cache }) + // ======= Base directory ======= + let basedir = env::var_os("SCCACHE_BASEDIR").map(PathBuf::from); + + Ok(EnvConfig { cache, basedir }) } // The directories crate changed the location of `config_dir` on macos in version 3, @@ -978,6 +984,9 @@ pub struct Config { pub fallback_cache: DiskCacheConfig, pub dist: DistConfig, pub server_startup_timeout: Option, + /// Base directory to strip from paths for cache key computation. + /// Similar to ccache's CCACHE_BASEDIR. + pub basedir: Option, } impl Config { @@ -999,21 +1008,29 @@ impl Config { cache, dist, server_startup_timeout_ms, + basedir: file_basedir, } = file_conf; conf_caches.merge(cache); let server_startup_timeout = server_startup_timeout_ms.map(std::time::Duration::from_millis); - let EnvConfig { cache } = env_conf; + let EnvConfig { + cache, + basedir: env_basedir, + } = env_conf; conf_caches.merge(cache); + // Environment variable takes precedence over file config + let basedir = env_basedir.or(file_basedir); + let (caches, fallback_cache) = conf_caches.into_fallback(); Self { cache: caches, fallback_cache, dist, server_startup_timeout, + basedir, } } } @@ -1287,6 +1304,7 @@ fn config_overrides() { }), ..Default::default() }, + basedir: None, }; let file_conf = FileConfig { @@ -1313,6 +1331,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, + basedir: None, }; assert_eq!( @@ -1335,10 +1354,64 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, + basedir: None, } ); } +#[test] +fn config_basedir_overrides() { + use std::path::PathBuf; + + // Test that env variable takes precedence over file config + let env_conf = EnvConfig { + cache: Default::default(), + basedir: Some(PathBuf::from("/env/basedir")), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedir: Some(PathBuf::from("/file/basedir")), + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedir, Some(PathBuf::from("/env/basedir"))); + + // Test that file config is used when env is None + let env_conf = EnvConfig { + cache: Default::default(), + basedir: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedir: Some(PathBuf::from("/file/basedir")), + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedir, Some(PathBuf::from("/file/basedir"))); + + // Test that both None results in None + let env_conf = EnvConfig { + cache: Default::default(), + basedir: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedir: None, + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedir, None); +} + #[test] #[serial] #[cfg(feature = "s3")] @@ -1644,6 +1717,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), + basedir: None, } ) } @@ -1736,6 +1810,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, + basedir: None, } ); } diff --git a/src/test/tests.rs b/src/test/tests.rs index ee41f6c2e..7ed2c7e8f 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -85,6 +85,7 @@ where runtime.handle(), PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, + None, )); let client = Client::new(); diff --git a/src/util.rs b/src/util.rs index 18329b42b..be268bb1f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1015,6 +1015,54 @@ pub fn num_cpus() -> usize { std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) } +/// Strip the base directory from the preprocessor output to enable cache hits +/// across different absolute paths. +/// +/// This function searches for the basedir path in the preprocessor output and +/// replaces it with a relative path marker. +/// +/// The function handles both Unix-style (`/`) and Windows-style (`\`) path separators, +/// and normalizes trailing slashes. +pub fn strip_basedir(preprocessor_output: &[u8], basedir: &Path) -> Vec { + // Normalize the basedir by removing trailing slashes + let basedir_normalized = basedir.to_string_lossy(); + let basedir_str = basedir_normalized.trim_end_matches('/').trim_end_matches('\\'); + let basedir_bytes = basedir_str.as_bytes(); + + // If basedir is empty or preprocessor output is empty, return as-is + if basedir_bytes.is_empty() || preprocessor_output.is_empty() { + return preprocessor_output.to_vec(); + } + + let mut result = Vec::with_capacity(preprocessor_output.len()); + let mut i = 0; + + while i < preprocessor_output.len() { + // Check if we have a match for basedir at current position + if i + basedir_bytes.len() <= preprocessor_output.len() + && &preprocessor_output[i..i + basedir_bytes.len()] == basedir_bytes + { + // Check if this is actually a path boundary (preceded by whitespace, quote, or start) + let is_boundary = i == 0 + || preprocessor_output[i - 1].is_ascii_whitespace() + || preprocessor_output[i - 1] == b'"' + || preprocessor_output[i - 1] == b'<'; + + if is_boundary { + // Replace basedir with "." + result.push(b'.'); + i += basedir_bytes.len(); + continue; + } + } + + result.push(preprocessor_output[i]); + i += 1; + } + + result +} + #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1167,4 +1215,103 @@ mod tests { let empty_result = super::ascii_unescape_default(&[]).unwrap(); assert!(empty_result.is_empty(), "{:?}", empty_result); } + + #[test] + fn test_strip_basedir_simple() { + use std::path::Path; + + // Simple cases + let basedir = Path::new("/home/user/project"); + let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; + assert_eq!(output, expected); + + // Multiple occurrences + let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; + assert_eq!(output, expected); + + // No occurrences + let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedir(input, basedir); + assert_eq!(output, input); + } + + #[test] + fn test_strip_basedir_empty() { + use std::path::Path; + + // Empty basedir + let basedir = Path::new(""); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + assert_eq!(output, input); + + // Empty input + let basedir = Path::new("/home/user/project"); + let input = b""; + let output = super::strip_basedir(input, basedir); + assert_eq!(output, input); + } + + #[test] + fn test_strip_basedir_not_at_boundary() { + use std::path::Path; + + // basedir should only match at word boundaries + let basedir = Path::new("/home/user"); + let input = b"text/home/user/file.c and \"/home/user/other.c\""; + let output = super::strip_basedir(input, basedir); + // Should only replace the second occurrence (after quote) + let expected = b"text/home/user/file.c and \"./other.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedir_trailing_slashes() { + use std::path::Path; + + // Without trailing slash + let basedir = Path::new("/home/user/project"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // With single trailing slash + let basedir = Path::new("/home/user/project/"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // With multiple trailing slashes + let basedir = Path::new("/home/user/project////"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_backslashes() { + use std::path::Path; + + // Without trailing backslash + let basedir = Path::new("C:\\Users\\test\\project"); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \".\\src\\main.c\""; + assert_eq!(output, expected); + + // With multiple trailing backslashes + let basedir = Path::new("C:\\Users\\test\\project\\\\\\"); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \".\\src\\main.c\""; + assert_eq!(output, expected); + } } diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 219c08c29..73b10fa99 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -190,6 +190,7 @@ pub fn sccache_client_cfg( rewrite_includes_only: false, // TODO }, server_startup_timeout_ms: None, + basedir: None, } } diff --git a/tests/oauth.rs b/tests/oauth.rs index 066bcc2bd..7d3eb9cd9 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -60,6 +60,7 @@ fn config_with_dist_auth( rewrite_includes_only: true, }, server_startup_timeout_ms: None, + basedir: None, } } From ace9c0ebb1512ce81be707b5c054a83e44155e49 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 18 Dec 2025 23:46:40 +0100 Subject: [PATCH 2/9] Fix clippy, introduce multiple basedirs logic --- README.md | 13 +++ docs/Configuration.md | 8 +- src/cache/cache.rs | 25 +++- src/cache/disk.rs | 10 +- src/compiler/c.rs | 93 ++++++++------- src/compiler/compiler.rs | 10 +- src/config.rs | 238 +++++++++++++++++++++++++++++++++++---- src/test/tests.rs | 4 +- src/util.rs | 158 ++++++++++++++++---------- tests/harness/mod.rs | 80 ++++++------- tests/oauth.rs | 2 +- 11 files changed, 455 insertions(+), 186 deletions(-) diff --git a/README.md b/README.md index 51ff5c52f..5e093cca6 100644 --- a/README.md +++ b/README.md @@ -287,15 +287,28 @@ By default, sccache requires absolute paths to match for cache hits. To enable c export SCCACHE_BASEDIR=/home/user/project ``` +You can also specify multiple base directories by separating them with `|` (pipe character). When multiple directories are provided, the longest matching prefix is used: + +```bash +export SCCACHE_BASEDIR="/home/user/project|/home/user/workspace" +``` + This is similar to ccache's `CCACHE_BASEDIR` and helps when: * Building the same project from different directories * Sharing cache between CI jobs with different checkout paths * Multiple developers working with different username paths +* Working with multiple project checkouts simultaneously + +**Note:** Only absolute paths are supported. Relative paths will be ignored with a warning. You can also configure this in the sccache config file: ```toml +# Single directory basedir = "/home/user/project" + +# Or multiple directories +basedir = ["/home/user/project", "/home/user/workspace"] ``` --- diff --git a/docs/Configuration.md b/docs/Configuration.md index 32e1ec8cc..5371c3582 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,13 +6,17 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 -# Base directory to strip from paths for cache key computation. +# Base directory (or directories) to strip from paths for cache key computation. # Similar to ccache's CCACHE_BASEDIR. This enables cache hits across # different absolute paths when compiling the same source code. +# Can be a single path or an array of paths. When multiple paths are provided, +# the longest matching prefix is used. # For example, if basedir is "/home/user/project", then paths like # "/home/user/project/src/main.c" will be normalized to "./src/main.c" # for caching purposes. basedir = "/home/user/project" +# Or multiple directories: +# basedir = ["/home/user/project", "/home/user/workspace"] [dist] # where to find the scheduler @@ -142,7 +146,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIR` base directory to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Environment variable takes precedence over file configuration. +* `SCCACHE_BASEDIR` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 86886a043..cfe8e7c20 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -381,9 +381,9 @@ pub trait Storage: Send + Sync { // Enable by default, only in local mode PreprocessorCacheModeConfig::default() } - /// Return the base directory for path normalization if configured - fn basedir(&self) -> Option<&Path> { - None + /// Return the base directories for path normalization if configured + fn basedirs(&self) -> &[PathBuf] { + &[] } /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. @@ -740,13 +740,30 @@ pub fn storage_from_config( let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode; let rw_mode = config.fallback_cache.rw_mode.into(); debug!("Init disk cache with dir {:?}, size {}", dir, size); + + // Validate that all basedirs are absolute paths + let basedirs: Vec = config.basedir.iter() + .filter_map(|p| { + if p.is_absolute() { + Some(p.clone()) + } else { + warn!("Ignoring relative basedir path: {:?}. Only absolute paths are supported.", p); + None + } + }) + .collect(); + + if !basedirs.is_empty() { + debug!("Using basedirs for path normalization: {:?}", basedirs); + } + Ok(Arc::new(DiskCache::new( dir, size, pool, preprocessor_cache_mode_config, rw_mode, - config.basedir.clone(), + basedirs, ))) } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index 2a5b17ef5..515c3f2ca 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,7 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, - basedir: Option, + basedirs: Vec, } impl DiskCache { @@ -85,7 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, - basedir: Option, + basedirs: Vec, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -101,7 +101,7 @@ impl DiskCache { max_size, })), rw_mode, - basedir, + basedirs, } } } @@ -184,8 +184,8 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } - fn basedir(&self) -> Option<&Path> { - self.basedir.as_deref() + fn basedirs(&self) -> &[PathBuf] { + &self.basedirs } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { let key = normalize_key(key); diff --git a/src/compiler/c.rs b/src/compiler/c.rs index ceea3c0c8..c902376ae 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -613,7 +613,7 @@ where &env_vars, &preprocessor_result.stdout, self.compiler.plusplus(), - storage.basedir(), + storage.basedirs(), ) }; @@ -1445,6 +1445,11 @@ static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { }); /// Compute the hash key of `compiler` compiling `preprocessor_output` with `args`. +/// +/// If `basedirs` are provided, paths in the preprocessor output will be normalized by +/// stripping the longest matching basedir prefix. This enables cache hits across different +/// absolute paths (similar to ccache's CCACHE_BASEDIR). +#[allow(clippy::too_many_arguments)] pub fn hash_key( compiler_digest: &str, language: Language, @@ -1453,7 +1458,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, - basedir: Option<&Path>, + basedirs: &[PathBuf], ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1478,10 +1483,10 @@ pub fn hash_key( } } - // Strip basedir from preprocessor output if configured - let preprocessor_output_to_hash = if let Some(base) = basedir { - use crate::util::strip_basedir; - Cow::Owned(strip_basedir(preprocessor_output, base)) + // Strip basedirs from preprocessor output if configured + let preprocessor_output_to_hash = if !basedirs.is_empty() { + use crate::util::strip_basedirs; + Cow::Owned(strip_basedirs(preprocessor_output, basedirs)) } else { Cow::Borrowed(preprocessor_output) }; @@ -1501,8 +1506,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1511,8 +1516,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, None) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, &[]) ); } @@ -1521,7 +1526,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), hash_key( "abcd", Language::CHeader, @@ -1530,7 +1535,7 @@ mod test { &[], PREPROCESSED, false, - None + &[] ) ); } @@ -1540,7 +1545,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, None), + hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, &[]), hash_key( "abcd", Language::CxxHeader, @@ -1549,7 +1554,7 @@ mod test { &[], PREPROCESSED, true, - None + &[] ) ); } @@ -1559,8 +1564,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, None) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1573,18 +1578,18 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), - hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), - hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1600,9 +1605,9 @@ mod test { &[], &b"hello world"[..], false, - None + &[] ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, None) + hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, &[]) ); } @@ -1612,11 +1617,11 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None); + let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); + let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); + let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1638,15 +1643,15 @@ mod test { &[], PREPROCESSED, false, - None + &[] ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]) ); } #[test] fn test_hash_key_basedir() { - use std::path::Path; + use std::path::PathBuf; let args = ovec!["a", "b", "c"]; let digest = "abcd"; @@ -1655,17 +1660,17 @@ mod test { let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; - let basedir1 = Path::new("/home/user1/project"); - let basedir2 = Path::new("/home/user2/project"); + let basedir1 = PathBuf::from("/home/user1/project"); + let basedir2 = PathBuf::from("/home/user2/project"); - let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir1)); - let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, Some(basedir2)); + let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir1)); + let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, std::slice::from_ref(&basedir2)); assert_eq!(h1, h2); // Test 2: Different hashes without basedir - let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, None); - let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, None); + let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &[]); + let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, &[]); assert_neq!(h1_no_base, h2_no_base); @@ -1673,16 +1678,24 @@ mod test { let preprocessed_cpp1 = b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; let preprocessed_cpp2 = b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; - let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, Some(basedir1)); - let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, Some(basedir2)); + let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, std::slice::from_ref(&basedir1)); + let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, std::slice::from_ref(&basedir2)); assert_eq!(h_cpp1, h_cpp2); // Test 4: Works with trailing slashes - let basedir_slash = Path::new("/home/user1/project/"); - let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir_slash)); + let basedir_slash = PathBuf::from("/home/user1/project/"); + let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir_slash)); assert_eq!(h1, h_slash); + + // Test 5: Multiple basedirs - longest match wins + let basedirs = vec![ + PathBuf::from("/home/user1"), + PathBuf::from("/home/user1/project"), // This should match (longest) + ]; + let h_multi = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &basedirs); + assert_eq!(h1, h_multi); } #[test] diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 430e3d124..95bb6dbbf 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2314,7 +2314,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2445,7 +2445,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2749,7 +2749,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2879,7 +2879,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2978,7 +2978,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/config.rs b/src/config.rs index 018bccf8c..1c9af6542 100644 --- a/src/config.rs +++ b/src/config.rs @@ -584,8 +584,29 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, - /// Base directory to strip from paths for cache key computation. - pub basedir: Option, + /// Base directory (or directories) to strip from paths for cache key computation. + /// Can be a single path or an array of paths. + #[serde(default, deserialize_with = "deserialize_basedir")] + pub basedir: Vec, +} + +fn deserialize_basedir<'de, D>(deserializer: D) -> std::result::Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + use serde::Deserialize; + + #[derive(Deserialize)] + #[serde(untagged)] + enum StringOrVec { + String(PathBuf), + Vec(Vec), + } + + match StringOrVec::deserialize(deserializer)? { + StringOrVec::String(s) => Ok(vec![s]), + StringOrVec::Vec(v) => Ok(v), + } } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -623,7 +644,7 @@ pub fn try_read_config_file(path: &Path) -> Result, + basedir: Vec, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -950,7 +971,16 @@ fn config_from_env() -> Result { }; // ======= Base directory ======= - let basedir = env::var_os("SCCACHE_BASEDIR").map(PathBuf::from); + // Support multiple paths separated by '|' (a character forbidden in paths) + let basedir = env::var_os("SCCACHE_BASEDIR") + .map(|s| { + s.to_string_lossy() + .split('|') + .map(|p| PathBuf::from(p.trim())) + .filter(|p| !p.as_os_str().is_empty()) + .collect() + }) + .unwrap_or_default(); Ok(EnvConfig { cache, basedir }) } @@ -984,9 +1014,10 @@ pub struct Config { pub fallback_cache: DiskCacheConfig, pub dist: DistConfig, pub server_startup_timeout: Option, - /// Base directory to strip from paths for cache key computation. + /// Base directory (or directories) to strip from paths for cache key computation. /// Similar to ccache's CCACHE_BASEDIR. - pub basedir: Option, + /// Currently only the first directory is used if multiple are specified. + pub basedir: Vec, } impl Config { @@ -1022,7 +1053,11 @@ impl Config { conf_caches.merge(cache); // Environment variable takes precedence over file config - let basedir = env_basedir.or(file_basedir); + let basedir = if !env_basedir.is_empty() { + env_basedir + } else { + file_basedir + }; let (caches, fallback_cache) = conf_caches.into_fallback(); Self { @@ -1304,7 +1339,7 @@ fn config_overrides() { }), ..Default::default() }, - basedir: None, + basedir: vec![], }; let file_conf = FileConfig { @@ -1331,7 +1366,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], }; assert_eq!( @@ -1354,7 +1389,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, - basedir: None, + basedir: vec![], } ); } @@ -1366,50 +1401,207 @@ fn config_basedir_overrides() { // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedir: Some(PathBuf::from("/env/basedir")), + basedir: vec![PathBuf::from("/env/basedir")], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: Some(PathBuf::from("/file/basedir")), + basedir: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, Some(PathBuf::from("/env/basedir"))); + assert_eq!(config.basedir, vec![PathBuf::from("/env/basedir")]); - // Test that file config is used when env is None + // Test that file config is used when env is empty let env_conf = EnvConfig { cache: Default::default(), - basedir: None, + basedir: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: Some(PathBuf::from("/file/basedir")), + basedir: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, Some(PathBuf::from("/file/basedir"))); + assert_eq!(config.basedir, vec![PathBuf::from("/file/basedir")]); - // Test that both None results in None + // Test that both empty results in empty let env_conf = EnvConfig { cache: Default::default(), - basedir: None, + basedir: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, None); + assert_eq!(config.basedir, Vec::::new()); +} + +#[test] +fn test_deserialize_basedir_single() { + use std::path::PathBuf; + + // Test single string value + let toml = r#" + basedir = "/home/user/project" + + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); +} + +#[test] +fn test_deserialize_basedir_multiple() { + use std::path::PathBuf; + + // Test array of paths + let toml = r#" + basedir = ["/home/user/project", "/home/user/workspace"] + + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); +} + +#[test] +fn test_deserialize_basedir_missing() { + use std::path::PathBuf; + + // Test no basedir specified (should default to empty vec) + let toml = r#" + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!(config.basedir, Vec::::new()); +} + +#[test] +#[serial] +fn test_env_basedir_single() { + use std::path::PathBuf; + + unsafe { + std::env::set_var("SCCACHE_BASEDIR", "/home/user/project"); + } + let config = config_from_env().unwrap(); + assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_multiple() { + use std::path::PathBuf; + + unsafe { + std::env::set_var("SCCACHE_BASEDIR", "/home/user/project|/home/user/workspace"); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_with_spaces() { + use std::path::PathBuf; + + // Test that spaces around paths are trimmed + unsafe { + std::env::set_var( + "SCCACHE_BASEDIR", + " /home/user/project | /home/user/workspace ", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_empty_entries() { + use std::path::PathBuf; + + // Test that empty entries are filtered out + unsafe { + std::env::set_var( + "SCCACHE_BASEDIR", + "/home/user/project||/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_not_set() { + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } + let config = config_from_env().unwrap(); + assert_eq!(config.basedir, Vec::::new()); } #[test] @@ -1717,7 +1909,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), - basedir: None, + basedir: vec![], } ) } @@ -1810,7 +2002,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], } ); } diff --git a/src/test/tests.rs b/src/test/tests.rs index 7ed2c7e8f..17e284ce5 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -31,7 +31,7 @@ use std::net::TcpListener; use std::path::Path; #[cfg(not(target_os = "macos"))] use std::process::Command; -use std::sync::{Arc, Mutex, mpsc}; +use std::sync::{mpsc, Arc, Mutex}; use std::thread; use std::time::Duration; use tokio::runtime::Runtime; @@ -85,7 +85,7 @@ where runtime.handle(), PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, - None, + vec![], )); let client = Client::new(); diff --git a/src/util.rs b/src/util.rs index be268bb1f..15d54c3f6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1015,49 +1015,69 @@ pub fn num_cpus() -> usize { std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) } -/// Strip the base directory from the preprocessor output to enable cache hits -/// across different absolute paths. +/// Strip base directories from absolute paths in preprocessor output. /// -/// This function searches for the basedir path in the preprocessor output and -/// replaces it with a relative path marker. +/// This function searches for basedir paths in the preprocessor output and +/// replaces them with relative path markers. When multiple basedirs are provided, +/// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. /// -/// The function handles both Unix-style (`/`) and Windows-style (`\`) path separators, -/// and normalizes trailing slashes. -pub fn strip_basedir(preprocessor_output: &[u8], basedir: &Path) -> Vec { - // Normalize the basedir by removing trailing slashes - let basedir_normalized = basedir.to_string_lossy(); - let basedir_str = basedir_normalized.trim_end_matches('/').trim_end_matches('\\'); - let basedir_bytes = basedir_str.as_bytes(); - - // If basedir is empty or preprocessor output is empty, return as-is - if basedir_bytes.is_empty() || preprocessor_output.is_empty() { +/// Only paths that start with one of the basedirs are modified. The paths are expected to be +/// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). +pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec { + if basedirs.is_empty() || preprocessor_output.is_empty() { return preprocessor_output.to_vec(); } + // Prepare normalized basedirs sorted by length (longest first) to match longest prefix first + let mut basedir_data: Vec<_> = basedirs + .iter() + .map(|basedir| { + let normalized = basedir.to_string_lossy(); + let trimmed = normalized.trim_end_matches('/').trim_end_matches('\\'); + (trimmed.as_bytes().to_vec(), trimmed.len()) + }) + .filter(|(bytes, _)| !bytes.is_empty()) + .collect(); + + if basedir_data.is_empty() { + return preprocessor_output.to_vec(); + } + + // Sort by length descending (longest first) + basedir_data.sort_by(|a, b| b.1.cmp(&a.1)); + let mut result = Vec::with_capacity(preprocessor_output.len()); let mut i = 0; while i < preprocessor_output.len() { - // Check if we have a match for basedir at current position - if i + basedir_bytes.len() <= preprocessor_output.len() - && &preprocessor_output[i..i + basedir_bytes.len()] == basedir_bytes - { - // Check if this is actually a path boundary (preceded by whitespace, quote, or start) - let is_boundary = i == 0 - || preprocessor_output[i - 1].is_ascii_whitespace() - || preprocessor_output[i - 1] == b'"' - || preprocessor_output[i - 1] == b'<'; - - if is_boundary { - // Replace basedir with "." - result.push(b'.'); - i += basedir_bytes.len(); - continue; + let mut matched = false; + + // Try to match each basedir (longest first) + for (basedir_bytes, basedir_len) in &basedir_data { + // Check if we have a match for this basedir at current position + if i + basedir_len <= preprocessor_output.len() + && preprocessor_output[i..i + basedir_len] == basedir_bytes[..] + { + // Check if this is actually a path boundary (preceded by whitespace, quote, or start) + let is_boundary = i == 0 + || preprocessor_output[i - 1].is_ascii_whitespace() + || preprocessor_output[i - 1] == b'"' + || preprocessor_output[i - 1] == b'<'; + + if is_boundary { + // Replace basedir with "." + result.push(b'.'); + i += basedir_len; + matched = true; + break; + } } } - result.push(preprocessor_output[i]); - i += 1; + if !matched { + result.push(preprocessor_output[i]); + i += 1; + } } result @@ -1218,52 +1238,51 @@ mod tests { #[test] fn test_strip_basedir_simple() { - use std::path::Path; + use std::path::PathBuf; // Simple cases - let basedir = Path::new("/home/user/project"); + let basedir = PathBuf::from("/home/user/project"); let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; assert_eq!(output, expected); // Multiple occurrences let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; assert_eq!(output, expected); // No occurrences let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); assert_eq!(output, input); } #[test] fn test_strip_basedir_empty() { - use std::path::Path; + use std::path::PathBuf; - // Empty basedir - let basedir = Path::new(""); + // Empty basedir slice let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, &[]); assert_eq!(output, input); // Empty input - let basedir = Path::new("/home/user/project"); + let basedir = PathBuf::from("/home/user/project"); let input = b""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); assert_eq!(output, input); } #[test] fn test_strip_basedir_not_at_boundary() { - use std::path::Path; + use std::path::PathBuf; // basedir should only match at word boundaries - let basedir = Path::new("/home/user"); + let basedir = PathBuf::from("/home/user"); let input = b"text/home/user/file.c and \"/home/user/other.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); // Should only replace the second occurrence (after quote) let expected = b"text/home/user/file.c and \"./other.c\""; assert_eq!(output, expected); @@ -1271,26 +1290,51 @@ mod tests { #[test] fn test_strip_basedir_trailing_slashes() { - use std::path::Path; + use std::path::PathBuf; // Without trailing slash - let basedir = Path::new("/home/user/project"); + let basedir = PathBuf::from("/home/user/project"); let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); // With single trailing slash - let basedir = Path::new("/home/user/project/"); + let basedir = PathBuf::from("/home/user/project/"); let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); // With multiple trailing slashes - let basedir = Path::new("/home/user/project////"); + let basedir = PathBuf::from("/home/user/project////"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedirs_multiple() { + use std::path::PathBuf; + + // Multiple basedirs - should match longest first + let basedirs = vec![ + PathBuf::from("/home/user1/project"), + PathBuf::from("/home/user2/workspace"), + ]; + let input = b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; + let output = super::strip_basedirs(input, &basedirs); + let expected = b"# 1 \"./src/main.c\"\n# 2 \"./lib/util.c\""; + assert_eq!(output, expected); + + // Longest prefix wins + let basedirs = vec![ + PathBuf::from("/home/user"), + PathBuf::from("/home/user/project"), // This should match first (longest) + ]; let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, &basedirs); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); } @@ -1298,19 +1342,19 @@ mod tests { #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_backslashes() { - use std::path::Path; + use std::path::PathBuf; // Without trailing backslash - let basedir = Path::new("C:\\Users\\test\\project"); + let basedir = PathBuf::from("C:\\Users\\test\\project"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \".\\src\\main.c\""; assert_eq!(output, expected); // With multiple trailing backslashes - let basedir = Path::new("C:\\Users\\test\\project\\\\\\"); + let basedir = PathBuf::from("C:\\Users\\test\\project\\\\\\"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \".\\src\\main.c\""; assert_eq!(output, expected); } diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 73b10fa99..877f3ccfa 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -190,7 +190,7 @@ pub fn sccache_client_cfg( rewrite_includes_only: false, // TODO }, server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], } } @@ -577,44 +577,32 @@ impl Drop for DistSystem { let mut exits = vec![]; if let Some(scheduler_name) = self.scheduler_name.as_ref() { - droperr!( - Command::new("docker") - .args(["logs", scheduler_name]) - .output() - .map(|o| logs.push((scheduler_name, o))) - ); - droperr!( - Command::new("docker") - .args(["kill", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o))) - ); - droperr!( - Command::new("docker") - .args(["rm", "-f", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o))) - ); + droperr!(Command::new("docker") + .args(["logs", scheduler_name]) + .output() + .map(|o| logs.push((scheduler_name, o)))); + droperr!(Command::new("docker") + .args(["kill", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o)))); + droperr!(Command::new("docker") + .args(["rm", "-f", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o)))); } for server_name in self.server_names.iter() { - droperr!( - Command::new("docker") - .args(["logs", server_name]) - .output() - .map(|o| logs.push((server_name, o))) - ); - droperr!( - Command::new("docker") - .args(["kill", server_name]) - .output() - .map(|o| outputs.push((server_name, o))) - ); - droperr!( - Command::new("docker") - .args(["rm", "-f", server_name]) - .output() - .map(|o| outputs.push((server_name, o))) - ); + droperr!(Command::new("docker") + .args(["logs", server_name]) + .output() + .map(|o| logs.push((server_name, o)))); + droperr!(Command::new("docker") + .args(["kill", server_name]) + .output() + .map(|o| outputs.push((server_name, o)))); + droperr!(Command::new("docker") + .args(["rm", "-f", server_name]) + .output() + .map(|o| outputs.push((server_name, o)))); } for &pid in self.server_pids.iter() { droperr!(nix::sys::signal::kill(pid, Signal::SIGINT)); @@ -631,16 +619,14 @@ impl Drop for DistSystem { if killagain { eprintln!("SIGINT didn't kill process, trying SIGKILL"); droperr!(nix::sys::signal::kill(pid, Signal::SIGKILL)); - droperr!( - nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) - .map_err(|e| e.to_string()) - .and_then(|ws| if ws == WaitStatus::StillAlive { - Err("process alive after sigkill".to_owned()) - } else { - exits.push(ws); - Ok(()) - }) - ); + droperr!(nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) + .map_err(|e| e.to_string()) + .and_then(|ws| if ws == WaitStatus::StillAlive { + Err("process alive after sigkill".to_owned()) + } else { + exits.push(ws); + Ok(()) + })); } } diff --git a/tests/oauth.rs b/tests/oauth.rs index 7d3eb9cd9..6798d9939 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -60,7 +60,7 @@ fn config_with_dist_auth( rewrite_includes_only: true, }, server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], } } From dc589c019b2b3a1e082592a554bf70a45c5d5b4e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Dec 2025 00:27:42 +0100 Subject: [PATCH 3/9] Make codecov happy --- tests/oauth.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/oauth.rs b/tests/oauth.rs index 6798d9939..7096e084a 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -226,6 +226,7 @@ fn test_auth_with_config(dist_auth: sccache::config::DistAuth) { .tempdir() .unwrap(); let sccache_config = config_with_dist_auth(conf_dir.path(), dist_auth); + assert!(sccache_config.basedir.is_empty()); let sccache_config_path = conf_dir.path().join("sccache-config.json"); fs::File::create(&sccache_config_path) .unwrap() From 9eb3241b6e7ad5d95c167e4e82678ae70d1b3f6f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Dec 2025 00:35:48 +0100 Subject: [PATCH 4/9] Apply fmt to all files --- src/cache/cache.rs | 9 +- src/compiler/c.rs | 287 ++++++++++++++++++++++++++++++++++++++----- src/config.rs | 2 +- src/test/tests.rs | 2 +- src/util.rs | 8 +- tests/harness/mod.rs | 78 +++++++----- 6 files changed, 317 insertions(+), 69 deletions(-) diff --git a/src/cache/cache.rs b/src/cache/cache.rs index cfe8e7c20..b4e71b788 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -742,12 +742,17 @@ pub fn storage_from_config( debug!("Init disk cache with dir {:?}, size {}", dir, size); // Validate that all basedirs are absolute paths - let basedirs: Vec = config.basedir.iter() + let basedirs: Vec = config + .basedir + .iter() .filter_map(|p| { if p.is_absolute() { Some(p.clone()) } else { - warn!("Ignoring relative basedir path: {:?}. Only absolute paths are supported.", p); + warn!( + "Ignoring relative basedir path: {:?}. Only absolute paths are supported.", + p + ); None } }) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index c902376ae..18f54387c 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1445,7 +1445,7 @@ static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { }); /// Compute the hash key of `compiler` compiling `preprocessor_output` with `args`. -/// +/// /// If `basedirs` are provided, paths in the preprocessor output will be normalized by /// stripping the longest matching basedir prefix. This enables cache hits across different /// absolute paths (similar to ccache's CCACHE_BASEDIR). @@ -1506,8 +1506,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1516,8 +1534,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ) ); } @@ -1526,7 +1562,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key( "abcd", Language::CHeader, @@ -1545,7 +1590,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, &[]), + hash_key( + "abcd", + Language::Cxx, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ), hash_key( "abcd", Language::CxxHeader, @@ -1564,8 +1618,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "wxyz", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1578,17 +1650,53 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, &[]) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + digest, + Language::C, + &xyz, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1607,7 +1715,16 @@ mod test { false, &[] ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + &b"goodbye"[..], + false, + &[] + ) ); } @@ -1617,11 +1734,38 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]); + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); + let h3 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1645,7 +1789,16 @@ mod test { false, &[] ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]) + hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1663,38 +1816,112 @@ mod test { let basedir1 = PathBuf::from("/home/user1/project"); let basedir2 = PathBuf::from("/home/user2/project"); - let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir1)); - let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, std::slice::from_ref(&basedir2)); + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + std::slice::from_ref(&basedir1), + ); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + std::slice::from_ref(&basedir2), + ); assert_eq!(h1, h2); // Test 2: Different hashes without basedir - let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &[]); - let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, &[]); + let h1_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &[], + ); + let h2_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &[], + ); assert_neq!(h1_no_base, h2_no_base); // Test 3: Works for C++ files too - let preprocessed_cpp1 = b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; - let preprocessed_cpp2 = b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; - - let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, std::slice::from_ref(&basedir1)); - let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, std::slice::from_ref(&basedir2)); + let preprocessed_cpp1 = + b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; + let preprocessed_cpp2 = + b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; + + let h_cpp1 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp1, + true, + std::slice::from_ref(&basedir1), + ); + let h_cpp2 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp2, + true, + std::slice::from_ref(&basedir2), + ); assert_eq!(h_cpp1, h_cpp2); // Test 4: Works with trailing slashes let basedir_slash = PathBuf::from("/home/user1/project/"); - let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir_slash)); + let h_slash = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + std::slice::from_ref(&basedir_slash), + ); assert_eq!(h1, h_slash); // Test 5: Multiple basedirs - longest match wins let basedirs = vec![ PathBuf::from("/home/user1"), - PathBuf::from("/home/user1/project"), // This should match (longest) + PathBuf::from("/home/user1/project"), // This should match (longest) ]; - let h_multi = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &basedirs); + let h_multi = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs, + ); assert_eq!(h1, h_multi); } diff --git a/src/config.rs b/src/config.rs index 1c9af6542..4e29fa5ac 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,8 +19,8 @@ use fs_err as fs; #[cfg(any(feature = "dist-client", feature = "dist-server"))] use serde::ser::Serializer; use serde::{ - de::{self, DeserializeOwned, Deserializer}, Deserialize, Serialize, + de::{self, DeserializeOwned, Deserializer}, }; #[cfg(test)] use serial_test::serial; diff --git a/src/test/tests.rs b/src/test/tests.rs index 17e284ce5..cfa5f11f3 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -31,7 +31,7 @@ use std::net::TcpListener; use std::path::Path; #[cfg(not(target_os = "macos"))] use std::process::Command; -use std::sync::{mpsc, Arc, Mutex}; +use std::sync::{Arc, Mutex, mpsc}; use std::thread; use std::time::Duration; use tokio::runtime::Runtime; diff --git a/src/util.rs b/src/util.rs index 15d54c3f6..94686e0d6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1248,7 +1248,8 @@ mod tests { assert_eq!(output, expected); // Multiple occurrences - let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; + let input = + b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; assert_eq!(output, expected); @@ -1323,7 +1324,8 @@ mod tests { PathBuf::from("/home/user1/project"), PathBuf::from("/home/user2/workspace"), ]; - let input = b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; + let input = + b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; let output = super::strip_basedirs(input, &basedirs); let expected = b"# 1 \"./src/main.c\"\n# 2 \"./lib/util.c\""; assert_eq!(output, expected); @@ -1331,7 +1333,7 @@ mod tests { // Longest prefix wins let basedirs = vec![ PathBuf::from("/home/user"), - PathBuf::from("/home/user/project"), // This should match first (longest) + PathBuf::from("/home/user/project"), // This should match first (longest) ]; let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, &basedirs); diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 877f3ccfa..74302e660 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -577,32 +577,44 @@ impl Drop for DistSystem { let mut exits = vec![]; if let Some(scheduler_name) = self.scheduler_name.as_ref() { - droperr!(Command::new("docker") - .args(["logs", scheduler_name]) - .output() - .map(|o| logs.push((scheduler_name, o)))); - droperr!(Command::new("docker") - .args(["kill", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o)))); - droperr!(Command::new("docker") - .args(["rm", "-f", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o)))); + droperr!( + Command::new("docker") + .args(["logs", scheduler_name]) + .output() + .map(|o| logs.push((scheduler_name, o))) + ); + droperr!( + Command::new("docker") + .args(["kill", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o))) + ); + droperr!( + Command::new("docker") + .args(["rm", "-f", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o))) + ); } for server_name in self.server_names.iter() { - droperr!(Command::new("docker") - .args(["logs", server_name]) - .output() - .map(|o| logs.push((server_name, o)))); - droperr!(Command::new("docker") - .args(["kill", server_name]) - .output() - .map(|o| outputs.push((server_name, o)))); - droperr!(Command::new("docker") - .args(["rm", "-f", server_name]) - .output() - .map(|o| outputs.push((server_name, o)))); + droperr!( + Command::new("docker") + .args(["logs", server_name]) + .output() + .map(|o| logs.push((server_name, o))) + ); + droperr!( + Command::new("docker") + .args(["kill", server_name]) + .output() + .map(|o| outputs.push((server_name, o))) + ); + droperr!( + Command::new("docker") + .args(["rm", "-f", server_name]) + .output() + .map(|o| outputs.push((server_name, o))) + ); } for &pid in self.server_pids.iter() { droperr!(nix::sys::signal::kill(pid, Signal::SIGINT)); @@ -619,14 +631,16 @@ impl Drop for DistSystem { if killagain { eprintln!("SIGINT didn't kill process, trying SIGKILL"); droperr!(nix::sys::signal::kill(pid, Signal::SIGKILL)); - droperr!(nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) - .map_err(|e| e.to_string()) - .and_then(|ws| if ws == WaitStatus::StillAlive { - Err("process alive after sigkill".to_owned()) - } else { - exits.push(ws); - Ok(()) - })); + droperr!( + nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) + .map_err(|e| e.to_string()) + .and_then(|ws| if ws == WaitStatus::StillAlive { + Err("process alive after sigkill".to_owned()) + } else { + exits.push(ws); + Ok(()) + }) + ); } } From d2e6eddb2be1cb4c1dd7d2e3b2a1fa9117734c1e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 22 Dec 2025 11:43:35 +0100 Subject: [PATCH 5/9] Clean out `basedir` intermediate step, use `basedirs` --- README.md | 14 ++-- docs/Configuration.md | 8 +-- src/cache/cache.rs | 2 +- src/compiler/c.rs | 16 +++-- src/config.rs | 145 ++++++++++++++++-------------------------- src/util.rs | 8 +-- tests/harness/mod.rs | 2 +- tests/oauth.rs | 4 +- 8 files changed, 82 insertions(+), 117 deletions(-) diff --git a/README.md b/README.md index 5e093cca6..5134b6646 100644 --- a/README.md +++ b/README.md @@ -278,19 +278,19 @@ This is most useful when using sccache for Rust compilation, as rustc supports u --- -Normalizing Paths with `SCCACHE_BASEDIR` +Normalizing Paths with `SCCACHE_BASEDIRS` ----------------------------------------- -By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIR` to strip a base directory from paths before hashing: +By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIRS` to strip a base directory from paths before hashing: ```bash -export SCCACHE_BASEDIR=/home/user/project +export SCCACHE_BASEDIRS=/home/user/project ``` You can also specify multiple base directories by separating them with `|` (pipe character). When multiple directories are provided, the longest matching prefix is used: ```bash -export SCCACHE_BASEDIR="/home/user/project|/home/user/workspace" +export SCCACHE_BASEDIRS="/home/user/project|/home/user/workspace" ``` This is similar to ccache's `CCACHE_BASEDIR` and helps when: @@ -305,10 +305,10 @@ You can also configure this in the sccache config file: ```toml # Single directory -basedir = "/home/user/project" +basedirs = ["/home/user/project"] # Or multiple directories -basedir = ["/home/user/project", "/home/user/workspace"] +basedirs = ["/home/user/project", "/home/user/workspace"] ``` --- @@ -318,7 +318,7 @@ Known Caveats ### General -* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIR` (see above) to normalize paths before hashing. +* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIRS` (see above) to normalize paths before hashing. ### Rust diff --git a/docs/Configuration.md b/docs/Configuration.md index 5371c3582..822e4322e 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -9,14 +9,14 @@ server_startup_timeout_ms = 10000 # Base directory (or directories) to strip from paths for cache key computation. # Similar to ccache's CCACHE_BASEDIR. This enables cache hits across # different absolute paths when compiling the same source code. -# Can be a single path or an array of paths. When multiple paths are provided, +# Can be an array of paths. When multiple paths are provided, # the longest matching prefix is used. # For example, if basedir is "/home/user/project", then paths like # "/home/user/project/src/main.c" will be normalized to "./src/main.c" # for caching purposes. -basedir = "/home/user/project" +basedirs = ["/home/user/project"] # Or multiple directories: -# basedir = ["/home/user/project", "/home/user/workspace"] +# basedirs = ["/home/user/project", "/home/user/workspace"] [dist] # where to find the scheduler @@ -146,7 +146,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIR` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index b4e71b788..eed89a460 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -743,7 +743,7 @@ pub fn storage_from_config( // Validate that all basedirs are absolute paths let basedirs: Vec = config - .basedir + .basedirs .iter() .filter_map(|p| { if p.is_absolute() { diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 18f54387c..efd17cc7b 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1803,7 +1803,7 @@ mod test { } #[test] - fn test_hash_key_basedir() { + fn test_hash_key_basedirs() { use std::path::PathBuf; let args = ovec!["a", "b", "c"]; @@ -1813,8 +1813,10 @@ mod test { let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; - let basedir1 = PathBuf::from("/home/user1/project"); - let basedir2 = PathBuf::from("/home/user2/project"); + let basedirs = [ + PathBuf::from("/home/user1/project"), + PathBuf::from("/home/user2/project"), + ]; let h1 = hash_key( digest, @@ -1824,7 +1826,7 @@ mod test { &[], preprocessed1, false, - std::slice::from_ref(&basedir1), + &basedirs, ); let h2 = hash_key( digest, @@ -1834,7 +1836,7 @@ mod test { &[], preprocessed2, false, - std::slice::from_ref(&basedir2), + &basedirs, ); assert_eq!(h1, h2); @@ -1877,7 +1879,7 @@ mod test { &[], preprocessed_cpp1, true, - std::slice::from_ref(&basedir1), + &basedirs, ); let h_cpp2 = hash_key( digest, @@ -1887,7 +1889,7 @@ mod test { &[], preprocessed_cpp2, true, - std::slice::from_ref(&basedir2), + &basedirs, ); assert_eq!(h_cpp1, h_cpp2); diff --git a/src/config.rs b/src/config.rs index 4e29fa5ac..ea6ec12e2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -586,27 +586,7 @@ pub struct FileConfig { pub server_startup_timeout_ms: Option, /// Base directory (or directories) to strip from paths for cache key computation. /// Can be a single path or an array of paths. - #[serde(default, deserialize_with = "deserialize_basedir")] - pub basedir: Vec, -} - -fn deserialize_basedir<'de, D>(deserializer: D) -> std::result::Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - use serde::Deserialize; - - #[derive(Deserialize)] - #[serde(untagged)] - enum StringOrVec { - String(PathBuf), - Vec(Vec), - } - - match StringOrVec::deserialize(deserializer)? { - StringOrVec::String(s) => Ok(vec![s]), - StringOrVec::Vec(v) => Ok(v), - } + pub basedirs: Vec, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -644,7 +624,7 @@ pub fn try_read_config_file(path: &Path) -> Result, + basedirs: Vec, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -972,7 +952,7 @@ fn config_from_env() -> Result { // ======= Base directory ======= // Support multiple paths separated by '|' (a character forbidden in paths) - let basedir = env::var_os("SCCACHE_BASEDIR") + let basedirs = env::var_os("SCCACHE_BASEDIRS") .map(|s| { s.to_string_lossy() .split('|') @@ -982,7 +962,7 @@ fn config_from_env() -> Result { }) .unwrap_or_default(); - Ok(EnvConfig { cache, basedir }) + Ok(EnvConfig { cache, basedirs }) } // The directories crate changed the location of `config_dir` on macos in version 3, @@ -1016,8 +996,7 @@ pub struct Config { pub server_startup_timeout: Option, /// Base directory (or directories) to strip from paths for cache key computation. /// Similar to ccache's CCACHE_BASEDIR. - /// Currently only the first directory is used if multiple are specified. - pub basedir: Vec, + pub basedirs: Vec, } impl Config { @@ -1039,7 +1018,7 @@ impl Config { cache, dist, server_startup_timeout_ms, - basedir: file_basedir, + basedirs: file_basedirs, } = file_conf; conf_caches.merge(cache); @@ -1048,15 +1027,15 @@ impl Config { let EnvConfig { cache, - basedir: env_basedir, + basedirs: env_basedirs, } = env_conf; conf_caches.merge(cache); // Environment variable takes precedence over file config - let basedir = if !env_basedir.is_empty() { - env_basedir + let basedirs = if !env_basedirs.is_empty() { + env_basedirs } else { - file_basedir + file_basedirs }; let (caches, fallback_cache) = conf_caches.into_fallback(); @@ -1065,7 +1044,7 @@ impl Config { fallback_cache, dist, server_startup_timeout, - basedir, + basedirs, } } } @@ -1339,7 +1318,7 @@ fn config_overrides() { }), ..Default::default() }, - basedir: vec![], + basedirs: vec![], }; let file_conf = FileConfig { @@ -1366,7 +1345,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![], + basedirs: vec![], }; assert_eq!( @@ -1389,90 +1368,71 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, - basedir: vec![], + basedirs: vec![], } ); } #[test] -fn config_basedir_overrides() { +fn config_basedirs_overrides() { use std::path::PathBuf; // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedir: vec![PathBuf::from("/env/basedir")], + basedirs: vec![PathBuf::from("/env/basedir")], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![PathBuf::from("/file/basedir")], + basedirs: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, vec![PathBuf::from("/env/basedir")]); + assert_eq!(config.basedirs, vec![PathBuf::from("/env/basedir")]); // Test that file config is used when env is empty let env_conf = EnvConfig { cache: Default::default(), - basedir: vec![], + basedirs: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![PathBuf::from("/file/basedir")], + basedirs: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, vec![PathBuf::from("/file/basedir")]); + assert_eq!(config.basedirs, vec![PathBuf::from("/file/basedir")]); // Test that both empty results in empty let env_conf = EnvConfig { cache: Default::default(), - basedir: vec![], + basedirs: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![], + basedirs: vec![], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, Vec::::new()); + assert_eq!(config.basedirs, Vec::::new()); } #[test] -fn test_deserialize_basedir_single() { - use std::path::PathBuf; - - // Test single string value - let toml = r#" - basedir = "/home/user/project" - - [cache.disk] - dir = "/tmp/cache" - size = 1073741824 - - [dist] - "#; - - let config: FileConfig = toml::from_str(toml).unwrap(); - assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); -} - -#[test] -fn test_deserialize_basedir_multiple() { +fn test_deserialize_basedirs() { use std::path::PathBuf; // Test array of paths let toml = r#" - basedir = ["/home/user/project", "/home/user/workspace"] + basedirs = ["/home/user/project", "/home/user/workspace"] [cache.disk] dir = "/tmp/cache" @@ -1483,7 +1443,7 @@ fn test_deserialize_basedir_multiple() { let config: FileConfig = toml::from_str(toml).unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") @@ -1492,10 +1452,10 @@ fn test_deserialize_basedir_multiple() { } #[test] -fn test_deserialize_basedir_missing() { +fn test_deserialize_basedirs_missing() { use std::path::PathBuf; - // Test no basedir specified (should default to empty vec) + // Test no basedirs specified (should default to empty vec) let toml = r#" [cache.disk] dir = "/tmp/cache" @@ -1505,103 +1465,106 @@ fn test_deserialize_basedir_missing() { "#; let config: FileConfig = toml::from_str(toml).unwrap(); - assert_eq!(config.basedir, Vec::::new()); + assert_eq!(config.basedirs, Vec::::new()); } #[test] #[serial] -fn test_env_basedir_single() { +fn test_env_basedirs_single() { use std::path::PathBuf; unsafe { - std::env::set_var("SCCACHE_BASEDIR", "/home/user/project"); + std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); } let config = config_from_env().unwrap(); - assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); + assert_eq!(config.basedirs, vec![PathBuf::from("/home/user/project")]); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_multiple() { +fn test_env_basedirs_multiple() { use std::path::PathBuf; unsafe { - std::env::set_var("SCCACHE_BASEDIR", "/home/user/project|/home/user/workspace"); + std::env::set_var( + "SCCACHE_BASEDIRS", + "/home/user/project|/home/user/workspace", + ); } let config = config_from_env().unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") ] ); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_with_spaces() { +fn test_env_basedirs_with_spaces() { use std::path::PathBuf; // Test that spaces around paths are trimmed unsafe { std::env::set_var( - "SCCACHE_BASEDIR", + "SCCACHE_BASEDIRS", " /home/user/project | /home/user/workspace ", ); } let config = config_from_env().unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") ] ); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_empty_entries() { +fn test_env_basedirs_empty_entries() { use std::path::PathBuf; // Test that empty entries are filtered out unsafe { std::env::set_var( - "SCCACHE_BASEDIR", + "SCCACHE_BASEDIRS", "/home/user/project||/home/user/workspace", ); } let config = config_from_env().unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") ] ); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_not_set() { +fn test_env_basedirs_not_set() { unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } let config = config_from_env().unwrap(); - assert_eq!(config.basedir, Vec::::new()); + assert_eq!(config.basedirs, Vec::::new()); } #[test] @@ -1909,7 +1872,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), - basedir: vec![], + basedirs: vec![], } ) } @@ -2002,7 +1965,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, - basedir: vec![], + basedirs: vec![], } ); } diff --git a/src/util.rs b/src/util.rs index 94686e0d6..c31296b9a 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1029,7 +1029,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec = basedirs + let mut basedirs_data: Vec<_> = basedirs .iter() .map(|basedir| { let normalized = basedir.to_string_lossy(); @@ -1039,12 +1039,12 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Date: Tue, 23 Dec 2025 14:38:48 +0100 Subject: [PATCH 6/9] Cover Windows case for mixed slashes in paths --- src/util.rs | 78 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/src/util.rs b/src/util.rs index c31296b9a..4fdeedebc 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1021,6 +1021,9 @@ pub fn num_cpus() -> usize { /// replaces them with relative path markers. When multiple basedirs are provided, /// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. /// +/// On Windows, this function handles paths with mixed forward and backward slashes, +/// which can occur when different build tools produce preprocessor output. +/// /// Only paths that start with one of the basedirs are modified. The paths are expected to be /// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec { @@ -1055,21 +1058,33 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Vec { + path.iter() + .map(|&b| if b == b'\\' { b'/' } else { b }) + .collect() +} + +#[cfg(not(target_os = "windows"))] +fn normalize_path_slashes(path: &[u8]) -> Vec { + path.to_vec() +} + #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1360,4 +1389,23 @@ mod tests { let expected = b"# 1 \".\\src\\main.c\""; assert_eq!(output, expected); } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_mixed_slashes() { + use std::path::PathBuf; + + // Mixed forward and backslashes in input (common from certain build systems) + let basedir = PathBuf::from("C:\\Users\\test\\project"); + let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \".\\src/main.c\""; + assert_eq!(output, expected, "Failed to strip mixed slash path"); + + // Also test the reverse case + let input = b"# 1 \"C:\\Users/test/project/src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src\\main.c\""; + assert_eq!(output, expected, "Failed to strip reverse mixed slash path"); + } } From 6f47f36c333248b0c81e8e6356d6bd9a4093eac5 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 23 Dec 2025 14:57:22 +0100 Subject: [PATCH 7/9] Add `Base directories` to `--show-stats` output --- src/server.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/server.rs b/src/server.rs index e44c00bed..6561225be 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1636,6 +1636,7 @@ pub struct ServerInfo { pub max_cache_size: Option, pub use_preprocessor_cache_mode: bool, pub version: String, + pub basedirs: Vec, } /// Status of the dist client. @@ -1932,6 +1933,7 @@ impl ServerInfo { let use_preprocessor_cache_mode; let cache_size; let max_cache_size; + let basedirs; if let Some(storage) = storage { cache_location = storage.location(); use_preprocessor_cache_mode = storage @@ -1939,11 +1941,17 @@ impl ServerInfo { .use_preprocessor_cache_mode; (cache_size, max_cache_size) = futures::try_join!(storage.current_size(), storage.max_size())?; + basedirs = storage + .basedirs() + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); } else { cache_location = String::new(); use_preprocessor_cache_mode = false; cache_size = None; max_cache_size = None; + basedirs = Vec::new(); } let version = env!("CARGO_PKG_VERSION").to_string(); Ok(ServerInfo { @@ -1953,6 +1961,7 @@ impl ServerInfo { max_cache_size, use_preprocessor_cache_mode, version, + basedirs, }) } @@ -1965,6 +1974,16 @@ impl ServerInfo { self.cache_location, name_width = name_width ); + println!( + "{: Date: Tue, 23 Dec 2025 21:11:40 +0100 Subject: [PATCH 8/9] Make `basedirs` match case insensitive on Windows --- README.md | 2 ++ docs/Configuration.md | 3 ++- src/util.rs | 62 ++++++++++++++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 5134b6646..b80077ca8 100644 --- a/README.md +++ b/README.md @@ -293,6 +293,8 @@ You can also specify multiple base directories by separating them with `|` (pipe export SCCACHE_BASEDIRS="/home/user/project|/home/user/workspace" ``` +Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. + This is similar to ccache's `CCACHE_BASEDIR` and helps when: * Building the same project from different directories * Sharing cache between CI jobs with different checkout paths diff --git a/docs/Configuration.md b/docs/Configuration.md index 822e4322e..0da157ebf 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -11,6 +11,7 @@ server_startup_timeout_ms = 10000 # different absolute paths when compiling the same source code. # Can be an array of paths. When multiple paths are provided, # the longest matching prefix is used. +# Path matching is case-insensitive on Windows and case-sensitive on other OSes. # For example, if basedir is "/home/user/project", then paths like # "/home/user/project/src/main.c" will be normalized to "./src/main.c" # for caching purposes. @@ -146,7 +147,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/util.rs b/src/util.rs index 4fdeedebc..0f04e6126 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1021,8 +1021,10 @@ pub fn num_cpus() -> usize { /// replaces them with relative path markers. When multiple basedirs are provided, /// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. /// -/// On Windows, this function handles paths with mixed forward and backward slashes, -/// which can occur when different build tools produce preprocessor output. +/// Path matching is case-insensitive to handle various filesystem behaviors and build system +/// configurations uniformly across all operating systems. On Windows, this function also handles +/// paths with mixed forward and backward slashes, which can occur when different build tools +/// produce preprocessor output. /// /// Only paths that start with one of the basedirs are modified. The paths are expected to be /// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). @@ -1059,19 +1061,19 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Vec { +fn normalize_path(path: &[u8]) -> Vec { path.iter() - .map(|&b| if b == b'\\' { b'/' } else { b }) + .map(|&b| match b { + b'A'..=b'Z' => b + 32, + b'\\' => b'/', + _ => b, + }) .collect() } #[cfg(not(target_os = "windows"))] -fn normalize_path_slashes(path: &[u8]) -> Vec { +fn normalize_path(path: &[u8]) -> Vec { path.to_vec() } @@ -1370,6 +1377,29 @@ mod tests { assert_eq!(output, expected); } + #[test] + fn test_strip_basedir_case_insensitive() { + use std::path::PathBuf; + + // Case insensitive matching - basedir in lowercase, input in uppercase + let basedir = PathBuf::from("/home/user/project"); + let input = b"# 1 \"/HOME/USER/PROJECT/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // Mixed case in both + let input = b"# 1 \"/Home/User/Project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, expected); + + // Basedir in uppercase, input in lowercase + let basedir = PathBuf::from("/HOME/USER/PROJECT"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, expected); + } + #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_backslashes() { From 99dde65cddd16f5a22d3f5d3d2479ebd5d12ea67 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 24 Dec 2025 00:04:36 +0100 Subject: [PATCH 9/9] Add trace logs to strip_basedirs, fix remainings --- src/compiler/c.rs | 1 + src/compiler/preprocessor_cache.rs | 99 +++++++++++++++++++++++++++++- src/util.rs | 10 +++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index efd17cc7b..354b7ec93 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -444,6 +444,7 @@ where &absolute_input_path, self.compiler.plusplus(), preprocessor_cache_mode_config, + storage.basedirs(), )? } else { None diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index d03cd6e18..788fe662c 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -381,6 +381,7 @@ pub fn preprocessor_cache_entry_hash_key( input_file: &Path, plusplus: bool, config: PreprocessorCacheModeConfig, + basedirs: &[PathBuf], ) -> anyhow::Result> { // If you change any of the inputs to the hash, you should change `FORMAT_VERSION`. let mut m = Digest::new(); @@ -414,7 +415,15 @@ pub fn preprocessor_cache_entry_hash_key( // share preprocessor cache entries and a/r.h exists. let mut buf = vec![]; encode_path(&mut buf, input_file)?; - m.update(&buf); + + // Strip basedirs from the input file path if configured + let buf_to_hash = if !basedirs.is_empty() { + use crate::util::strip_basedirs; + strip_basedirs(&buf, basedirs) + } else { + buf.clone() + }; + m.update(&buf_to_hash); let reader = std::fs::File::open(input_file) .with_context(|| format!("while hashing the input file '{}'", input_file.display()))?; @@ -634,4 +643,92 @@ mod test { assert!(!finder.found_timestamp()); assert!(!finder.found_date()); } + + #[test] + fn test_preprocessor_cache_entry_hash_key_basedirs() { + use std::fs; + use tempfile::TempDir; + + // Create two different base directories + let dir1 = TempDir::new().unwrap(); + let dir2 = TempDir::new().unwrap(); + + // Create identical files with the same relative path in each directory + let file1_path = dir1.path().join("test.c"); + let file2_path = dir2.path().join("test.c"); + + let content = b"int main() { return 0; }"; + fs::write(&file1_path, content).unwrap(); + fs::write(&file2_path, content).unwrap(); + + let config = PreprocessorCacheModeConfig::activated(); + + // Test 1: With basedirs, hashes should be the same + let hash1_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + ) + .unwrap() + .unwrap(); + + let hash2_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + ) + .unwrap() + .unwrap(); + + assert_eq!( + hash1_with_basedirs, hash2_with_basedirs, + "Hashes should be equal when using basedirs with identical files in different directories" + ); + + // Test 2: Without basedirs, hashes should be different + let hash1_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + let hash2_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + assert_ne!( + hash1_no_basedirs, hash2_no_basedirs, + "Hashes should be different without basedirs for files in different directories" + ); + } } diff --git a/src/util.rs b/src/util.rs index 0f04e6126..bbd8a8966 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1051,6 +1051,12 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec