diff --git a/README.md b/README.md index ed0a56b80..b80077ca8 100644 --- a/README.md +++ b/README.md @@ -278,12 +278,49 @@ This is most useful when using sccache for Rust compilation, as rustc supports u --- +Normalizing Paths with `SCCACHE_BASEDIRS` +----------------------------------------- + +By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIRS` to strip a base directory from paths before hashing: + +```bash +export SCCACHE_BASEDIRS=/home/user/project +``` + +You can also specify multiple base directories by separating them with `|` (pipe character). When multiple directories are provided, the longest matching prefix is used: + +```bash +export SCCACHE_BASEDIRS="/home/user/project|/home/user/workspace" +``` + +Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. + +This is similar to ccache's `CCACHE_BASEDIR` and helps when: +* Building the same project from different directories +* Sharing cache between CI jobs with different checkout paths +* Multiple developers working with different username paths +* Working with multiple project checkouts simultaneously + +**Note:** Only absolute paths are supported. Relative paths will be ignored with a warning. + +You can also configure this in the sccache config file: + +```toml +# Single directory +basedirs = ["/home/user/project"] + +# Or multiple directories +basedirs = ["/home/user/project", "/home/user/workspace"] +``` + +--- + Known Caveats ------------- ### General -* Absolute paths to files must match to get a cache hit. This means that even if you are using a shared cache, everyone will have to build at the same absolute path (i.e. not in `$HOME`) in order to benefit each other. In Rust this includes the source for third party crates which are stored in `$HOME/.cargo/registry/cache` by default. +* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIRS` (see above) to normalize paths before hashing. ### Rust diff --git a/docs/Configuration.md b/docs/Configuration.md index c6c43c646..0da157ebf 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,6 +6,19 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 +# Base directory (or directories) to strip from paths for cache key computation. +# Similar to ccache's CCACHE_BASEDIR. This enables cache hits across +# different absolute paths when compiling the same source code. +# Can be an array of paths. When multiple paths are provided, +# the longest matching prefix is used. +# Path matching is case-insensitive on Windows and case-sensitive on other OSes. +# For example, if basedir is "/home/user/project", then paths like +# "/home/user/project/src/main.c" will be normalized to "./src/main.c" +# for caching purposes. +basedirs = ["/home/user/project"] +# Or multiple directories: +# basedirs = ["/home/user/project", "/home/user/workspace"] + [dist] # where to find the scheduler scheduler_url = "http://1.2.3.4:10600" @@ -134,6 +147,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 4e9d52800..eed89a460 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -381,6 +381,10 @@ pub trait Storage: Send + Sync { // Enable by default, only in local mode PreprocessorCacheModeConfig::default() } + /// Return the base directories for path normalization if configured + fn basedirs(&self) -> &[PathBuf] { + &[] + } /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. /// Only applicable when using preprocessor cache mode. @@ -736,12 +740,35 @@ pub fn storage_from_config( let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode; let rw_mode = config.fallback_cache.rw_mode.into(); debug!("Init disk cache with dir {:?}, size {}", dir, size); + + // Validate that all basedirs are absolute paths + let basedirs: Vec = config + .basedirs + .iter() + .filter_map(|p| { + if p.is_absolute() { + Some(p.clone()) + } else { + warn!( + "Ignoring relative basedir path: {:?}. Only absolute paths are supported.", + p + ); + None + } + }) + .collect(); + + if !basedirs.is_empty() { + debug!("Using basedirs for path normalization: {:?}", basedirs); + } + Ok(Arc::new(DiskCache::new( dir, size, pool, preprocessor_cache_mode_config, rw_mode, + basedirs, ))) } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index c4f3491e9..515c3f2ca 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,6 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, + basedirs: Vec, } impl DiskCache { @@ -84,6 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, + basedirs: Vec, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -99,6 +101,7 @@ impl DiskCache { max_size, })), rw_mode, + basedirs, } } } @@ -181,6 +184,9 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } + fn basedirs(&self) -> &[PathBuf] { + &self.basedirs + } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { let key = normalize_key(key); Ok(self diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 8db84d265..354b7ec93 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -444,6 +444,7 @@ where &absolute_input_path, self.compiler.plusplus(), preprocessor_cache_mode_config, + storage.basedirs(), )? } else { None @@ -613,6 +614,7 @@ where &env_vars, &preprocessor_result.stdout, self.compiler.plusplus(), + storage.basedirs(), ) }; @@ -1421,7 +1423,7 @@ impl pkg::ToolchainPackager for CToolchainPackager { } /// The cache is versioned by the inputs to `hash_key`. -pub const CACHE_VERSION: &[u8] = b"11"; +pub const CACHE_VERSION: &[u8] = b"12"; /// Environment variables that are factored into the cache key. static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { @@ -1444,6 +1446,11 @@ static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { }); /// Compute the hash key of `compiler` compiling `preprocessor_output` with `args`. +/// +/// If `basedirs` are provided, paths in the preprocessor output will be normalized by +/// stripping the longest matching basedir prefix. This enables cache hits across different +/// absolute paths (similar to ccache's CCACHE_BASEDIR). +#[allow(clippy::too_many_arguments)] pub fn hash_key( compiler_digest: &str, language: Language, @@ -1452,6 +1459,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, + basedirs: &[PathBuf], ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1475,7 +1483,16 @@ pub fn hash_key( val.hash(&mut HashToDigest { digest: &mut m }); } } - m.update(preprocessor_output); + + // Strip basedirs from preprocessor output if configured + let preprocessor_output_to_hash = if !basedirs.is_empty() { + use crate::util::strip_basedirs; + Cow::Owned(strip_basedirs(preprocessor_output, basedirs)) + } else { + Cow::Borrowed(preprocessor_output) + }; + + m.update(&preprocessor_output_to_hash); m.finish() } @@ -1490,8 +1507,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1500,8 +1535,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ) ); } @@ -1510,7 +1563,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key( "abcd", Language::CHeader, @@ -1518,7 +1580,8 @@ mod test { &[], &[], PREPROCESSED, - false + false, + &[] ) ); } @@ -1528,7 +1591,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true), + hash_key( + "abcd", + Language::Cxx, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ), hash_key( "abcd", Language::CxxHeader, @@ -1536,7 +1608,8 @@ mod test { &[], &[], PREPROCESSED, - true + true, + &[] ) ); } @@ -1546,8 +1619,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "wxyz", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1560,18 +1651,54 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + digest, + Language::C, + &xyz, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1586,9 +1713,19 @@ mod test { &[], &[], &b"hello world"[..], - false + false, + &[] ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + &b"goodbye"[..], + false, + &[] + ) ); } @@ -1598,11 +1735,38 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false); + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h3 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1623,10 +1787,145 @@ mod test { &extra_data, &[], PREPROCESSED, - false + false, + &[] ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) + ); + } + + #[test] + fn test_hash_key_basedirs() { + use std::path::PathBuf; + + let args = ovec!["a", "b", "c"]; + let digest = "abcd"; + + // Test 1: Same hash with different absolute paths when basedir is used + let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; + let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; + + let basedirs = [ + PathBuf::from("/home/user1/project"), + PathBuf::from("/home/user2/project"), + ]; + + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs, + ); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &basedirs, + ); + + assert_eq!(h1, h2); + + // Test 2: Different hashes without basedir + let h1_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &[], + ); + let h2_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &[], + ); + + assert_neq!(h1_no_base, h2_no_base); + + // Test 3: Works for C++ files too + let preprocessed_cpp1 = + b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; + let preprocessed_cpp2 = + b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; + + let h_cpp1 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp1, + true, + &basedirs, + ); + let h_cpp2 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp2, + true, + &basedirs, + ); + + assert_eq!(h_cpp1, h_cpp2); + + // Test 4: Works with trailing slashes + let basedir_slash = PathBuf::from("/home/user1/project/"); + let h_slash = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + std::slice::from_ref(&basedir_slash), + ); + + assert_eq!(h1, h_slash); + + // Test 5: Multiple basedirs - longest match wins + let basedirs = vec![ + PathBuf::from("/home/user1"), + PathBuf::from("/home/user1/project"), // This should match (longest) + ]; + let h_multi = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs, ); + assert_eq!(h1, h_multi); } #[test] diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index bdab84e1e..95bb6dbbf 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2314,6 +2314,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2444,6 +2445,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2747,6 +2749,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2876,6 +2879,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2974,6 +2978,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index d03cd6e18..788fe662c 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -381,6 +381,7 @@ pub fn preprocessor_cache_entry_hash_key( input_file: &Path, plusplus: bool, config: PreprocessorCacheModeConfig, + basedirs: &[PathBuf], ) -> anyhow::Result> { // If you change any of the inputs to the hash, you should change `FORMAT_VERSION`. let mut m = Digest::new(); @@ -414,7 +415,15 @@ pub fn preprocessor_cache_entry_hash_key( // share preprocessor cache entries and a/r.h exists. let mut buf = vec![]; encode_path(&mut buf, input_file)?; - m.update(&buf); + + // Strip basedirs from the input file path if configured + let buf_to_hash = if !basedirs.is_empty() { + use crate::util::strip_basedirs; + strip_basedirs(&buf, basedirs) + } else { + buf.clone() + }; + m.update(&buf_to_hash); let reader = std::fs::File::open(input_file) .with_context(|| format!("while hashing the input file '{}'", input_file.display()))?; @@ -634,4 +643,92 @@ mod test { assert!(!finder.found_timestamp()); assert!(!finder.found_date()); } + + #[test] + fn test_preprocessor_cache_entry_hash_key_basedirs() { + use std::fs; + use tempfile::TempDir; + + // Create two different base directories + let dir1 = TempDir::new().unwrap(); + let dir2 = TempDir::new().unwrap(); + + // Create identical files with the same relative path in each directory + let file1_path = dir1.path().join("test.c"); + let file2_path = dir2.path().join("test.c"); + + let content = b"int main() { return 0; }"; + fs::write(&file1_path, content).unwrap(); + fs::write(&file2_path, content).unwrap(); + + let config = PreprocessorCacheModeConfig::activated(); + + // Test 1: With basedirs, hashes should be the same + let hash1_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + ) + .unwrap() + .unwrap(); + + let hash2_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + ) + .unwrap() + .unwrap(); + + assert_eq!( + hash1_with_basedirs, hash2_with_basedirs, + "Hashes should be equal when using basedirs with identical files in different directories" + ); + + // Test 2: Without basedirs, hashes should be different + let hash1_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + let hash2_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + assert_ne!( + hash1_no_basedirs, hash2_no_basedirs, + "Hashes should be different without basedirs for files in different directories" + ); + } } diff --git a/src/config.rs b/src/config.rs index 8edaeb099..ea6ec12e2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -584,6 +584,9 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, + /// Base directory (or directories) to strip from paths for cache key computation. + /// Can be a single path or an array of paths. + pub basedirs: Vec, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -621,6 +624,7 @@ pub fn try_read_config_file(path: &Path) -> Result, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -946,7 +950,19 @@ fn config_from_env() -> Result { oss, }; - Ok(EnvConfig { cache }) + // ======= Base directory ======= + // Support multiple paths separated by '|' (a character forbidden in paths) + let basedirs = env::var_os("SCCACHE_BASEDIRS") + .map(|s| { + s.to_string_lossy() + .split('|') + .map(|p| PathBuf::from(p.trim())) + .filter(|p| !p.as_os_str().is_empty()) + .collect() + }) + .unwrap_or_default(); + + Ok(EnvConfig { cache, basedirs }) } // The directories crate changed the location of `config_dir` on macos in version 3, @@ -978,6 +994,9 @@ pub struct Config { pub fallback_cache: DiskCacheConfig, pub dist: DistConfig, pub server_startup_timeout: Option, + /// Base directory (or directories) to strip from paths for cache key computation. + /// Similar to ccache's CCACHE_BASEDIR. + pub basedirs: Vec, } impl Config { @@ -999,21 +1018,33 @@ impl Config { cache, dist, server_startup_timeout_ms, + basedirs: file_basedirs, } = file_conf; conf_caches.merge(cache); let server_startup_timeout = server_startup_timeout_ms.map(std::time::Duration::from_millis); - let EnvConfig { cache } = env_conf; + let EnvConfig { + cache, + basedirs: env_basedirs, + } = env_conf; conf_caches.merge(cache); + // Environment variable takes precedence over file config + let basedirs = if !env_basedirs.is_empty() { + env_basedirs + } else { + file_basedirs + }; + let (caches, fallback_cache) = conf_caches.into_fallback(); Self { cache: caches, fallback_cache, dist, server_startup_timeout, + basedirs, } } } @@ -1287,6 +1318,7 @@ fn config_overrides() { }), ..Default::default() }, + basedirs: vec![], }; let file_conf = FileConfig { @@ -1313,6 +1345,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, + basedirs: vec![], }; assert_eq!( @@ -1335,10 +1368,205 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, + basedirs: vec![], } ); } +#[test] +fn config_basedirs_overrides() { + use std::path::PathBuf; + + // Test that env variable takes precedence over file config + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![PathBuf::from("/env/basedir")], + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![PathBuf::from("/file/basedir")], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedirs, vec![PathBuf::from("/env/basedir")]); + + // Test that file config is used when env is empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![], + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![PathBuf::from("/file/basedir")], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedirs, vec![PathBuf::from("/file/basedir")]); + + // Test that both empty results in empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![], + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedirs, Vec::::new()); +} + +#[test] +fn test_deserialize_basedirs() { + use std::path::PathBuf; + + // Test array of paths + let toml = r#" + basedirs = ["/home/user/project", "/home/user/workspace"] + + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); +} + +#[test] +fn test_deserialize_basedirs_missing() { + use std::path::PathBuf; + + // Test no basedirs specified (should default to empty vec) + let toml = r#" + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!(config.basedirs, Vec::::new()); +} + +#[test] +#[serial] +fn test_env_basedirs_single() { + use std::path::PathBuf; + + unsafe { + std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); + } + let config = config_from_env().unwrap(); + assert_eq!(config.basedirs, vec![PathBuf::from("/home/user/project")]); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +fn test_env_basedirs_multiple() { + use std::path::PathBuf; + + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "/home/user/project|/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +fn test_env_basedirs_with_spaces() { + use std::path::PathBuf; + + // Test that spaces around paths are trimmed + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + " /home/user/project | /home/user/workspace ", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +fn test_env_basedirs_empty_entries() { + use std::path::PathBuf; + + // Test that empty entries are filtered out + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "/home/user/project||/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +fn test_env_basedirs_not_set() { + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + let config = config_from_env().unwrap(); + assert_eq!(config.basedirs, Vec::::new()); +} + #[test] #[serial] #[cfg(feature = "s3")] @@ -1644,6 +1872,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), + basedirs: vec![], } ) } @@ -1736,6 +1965,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, + basedirs: vec![], } ); } diff --git a/src/server.rs b/src/server.rs index e44c00bed..6561225be 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1636,6 +1636,7 @@ pub struct ServerInfo { pub max_cache_size: Option, pub use_preprocessor_cache_mode: bool, pub version: String, + pub basedirs: Vec, } /// Status of the dist client. @@ -1932,6 +1933,7 @@ impl ServerInfo { let use_preprocessor_cache_mode; let cache_size; let max_cache_size; + let basedirs; if let Some(storage) = storage { cache_location = storage.location(); use_preprocessor_cache_mode = storage @@ -1939,11 +1941,17 @@ impl ServerInfo { .use_preprocessor_cache_mode; (cache_size, max_cache_size) = futures::try_join!(storage.current_size(), storage.max_size())?; + basedirs = storage + .basedirs() + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); } else { cache_location = String::new(); use_preprocessor_cache_mode = false; cache_size = None; max_cache_size = None; + basedirs = Vec::new(); } let version = env!("CARGO_PKG_VERSION").to_string(); Ok(ServerInfo { @@ -1953,6 +1961,7 @@ impl ServerInfo { max_cache_size, use_preprocessor_cache_mode, version, + basedirs, }) } @@ -1965,6 +1974,16 @@ impl ServerInfo { self.cache_location, name_width = name_width ); + println!( + "{: usize { std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) } +/// Strip base directories from absolute paths in preprocessor output. +/// +/// This function searches for basedir paths in the preprocessor output and +/// replaces them with relative path markers. When multiple basedirs are provided, +/// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. +/// +/// Path matching is case-insensitive to handle various filesystem behaviors and build system +/// configurations uniformly across all operating systems. On Windows, this function also handles +/// paths with mixed forward and backward slashes, which can occur when different build tools +/// produce preprocessor output. +/// +/// Only paths that start with one of the basedirs are modified. The paths are expected to be +/// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). +pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec { + if basedirs.is_empty() || preprocessor_output.is_empty() { + return preprocessor_output.to_vec(); + } + + // Prepare normalized basedirs sorted by length (longest first) to match longest prefix first + let mut basedirs_data: Vec<_> = basedirs + .iter() + .map(|basedir| { + let normalized = basedir.to_string_lossy(); + let trimmed = normalized.trim_end_matches('/').trim_end_matches('\\'); + (trimmed.as_bytes().to_vec(), trimmed.len()) + }) + .filter(|(bytes, _)| !bytes.is_empty()) + .collect(); + + if basedirs_data.is_empty() { + return preprocessor_output.to_vec(); + } + + // Sort by length descending (longest first) + basedirs_data.sort_by(|a, b| b.1.cmp(&a.1)); + + trace!( + "Stripping basedirs from preprocessor output with length {}: {:?}", + preprocessor_output.len(), + basedirs + ); + + let mut result = Vec::with_capacity(preprocessor_output.len()); + let mut i = 0; + + while i < preprocessor_output.len() { + let mut matched = false; + + // Try to match each basedir (longest first) + for (basedir_bytes, basedir_len) in &basedirs_data { + // Check if we have a match for this basedir at current position + if i + basedir_len <= preprocessor_output.len() { + let candidate = &preprocessor_output[i..i + basedir_len]; + + // Try exact match first + let exact_match = candidate == basedir_bytes; + + // Try case-insensitive match + let normalized_match = if !exact_match { + normalize_path(candidate) == normalize_path(basedir_bytes) + } else { + false + }; + + if exact_match || normalized_match { + // Check if this is actually a path boundary (preceded by whitespace, quote, or start) + let is_boundary = i == 0 + || preprocessor_output[i - 1].is_ascii_whitespace() + || preprocessor_output[i - 1] == b'"' + || preprocessor_output[i - 1] == b'<'; + + if is_boundary { + // Replace basedir with "." + result.push(b'.'); + i += basedir_len; + matched = true; + trace!( + "Stripped basedir: {}", + String::from_utf8_lossy(basedir_bytes) + ); + break; + } + } + } + } + + if !matched { + result.push(preprocessor_output[i]); + i += 1; + } + } + + result +} + +/// Normalize path for case-insensitive comparison. +/// On Windows: converts all backslashes to forward slashes; +/// lowercases ASCII characters for consistency. +#[cfg(target_os = "windows")] +fn normalize_path(path: &[u8]) -> Vec { + path.iter() + .map(|&b| match b { + b'A'..=b'Z' => b + 32, + b'\\' => b'/', + _ => b, + }) + .collect() +} + +#[cfg(not(target_os = "windows"))] +fn normalize_path(path: &[u8]) -> Vec { + path.to_vec() +} + #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1167,4 +1281,171 @@ mod tests { let empty_result = super::ascii_unescape_default(&[]).unwrap(); assert!(empty_result.is_empty(), "{:?}", empty_result); } + + #[test] + fn test_strip_basedir_simple() { + use std::path::PathBuf; + + // Simple cases + let basedir = PathBuf::from("/home/user/project"); + let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; + assert_eq!(output, expected); + + // Multiple occurrences + let input = + b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; + assert_eq!(output, expected); + + // No occurrences + let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, input); + } + + #[test] + fn test_strip_basedir_empty() { + use std::path::PathBuf; + + // Empty basedir slice + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, &[]); + assert_eq!(output, input); + + // Empty input + let basedir = PathBuf::from("/home/user/project"); + let input = b""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, input); + } + + #[test] + fn test_strip_basedir_not_at_boundary() { + use std::path::PathBuf; + + // basedir should only match at word boundaries + let basedir = PathBuf::from("/home/user"); + let input = b"text/home/user/file.c and \"/home/user/other.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + // Should only replace the second occurrence (after quote) + let expected = b"text/home/user/file.c and \"./other.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedir_trailing_slashes() { + use std::path::PathBuf; + + // Without trailing slash + let basedir = PathBuf::from("/home/user/project"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // With single trailing slash + let basedir = PathBuf::from("/home/user/project/"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // With multiple trailing slashes + let basedir = PathBuf::from("/home/user/project////"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedirs_multiple() { + use std::path::PathBuf; + + // Multiple basedirs - should match longest first + let basedirs = vec![ + PathBuf::from("/home/user1/project"), + PathBuf::from("/home/user2/workspace"), + ]; + let input = + b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; + let output = super::strip_basedirs(input, &basedirs); + let expected = b"# 1 \"./src/main.c\"\n# 2 \"./lib/util.c\""; + assert_eq!(output, expected); + + // Longest prefix wins + let basedirs = vec![ + PathBuf::from("/home/user"), + PathBuf::from("/home/user/project"), // This should match first (longest) + ]; + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, &basedirs); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedir_case_insensitive() { + use std::path::PathBuf; + + // Case insensitive matching - basedir in lowercase, input in uppercase + let basedir = PathBuf::from("/home/user/project"); + let input = b"# 1 \"/HOME/USER/PROJECT/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // Mixed case in both + let input = b"# 1 \"/Home/User/Project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, expected); + + // Basedir in uppercase, input in lowercase + let basedir = PathBuf::from("/HOME/USER/PROJECT"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, expected); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_backslashes() { + use std::path::PathBuf; + + // Without trailing backslash + let basedir = PathBuf::from("C:\\Users\\test\\project"); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \".\\src\\main.c\""; + assert_eq!(output, expected); + + // With multiple trailing backslashes + let basedir = PathBuf::from("C:\\Users\\test\\project\\\\\\"); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \".\\src\\main.c\""; + assert_eq!(output, expected); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_mixed_slashes() { + use std::path::PathBuf; + + // Mixed forward and backslashes in input (common from certain build systems) + let basedir = PathBuf::from("C:\\Users\\test\\project"); + let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \".\\src/main.c\""; + assert_eq!(output, expected, "Failed to strip mixed slash path"); + + // Also test the reverse case + let input = b"# 1 \"C:\\Users/test/project/src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src\\main.c\""; + assert_eq!(output, expected, "Failed to strip reverse mixed slash path"); + } } diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 219c08c29..9bb49cc7a 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -190,6 +190,7 @@ pub fn sccache_client_cfg( rewrite_includes_only: false, // TODO }, server_startup_timeout_ms: None, + basedirs: vec![], } } diff --git a/tests/oauth.rs b/tests/oauth.rs index 066bcc2bd..bfbf0f2f5 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -60,6 +60,7 @@ fn config_with_dist_auth( rewrite_includes_only: true, }, server_startup_timeout_ms: None, + basedirs: vec![], } } @@ -225,6 +226,7 @@ fn test_auth_with_config(dist_auth: sccache::config::DistAuth) { .tempdir() .unwrap(); let sccache_config = config_with_dist_auth(conf_dir.path(), dist_auth); + assert!(sccache_config.basedirs.is_empty()); let sccache_config_path = conf_dir.path().join("sccache-config.json"); fs::File::create(&sccache_config_path) .unwrap()