From b4ff244554d260d7725825978a8b83f443dc796f Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Fri, 6 Mar 2026 16:49:31 +0100 Subject: [PATCH 01/14] adds rebuild commands to lsp --- editors/code/package.json | 10 +++++++++ editors/code/src/extension.ts | 24 +++++++++++++++++++++ src/lsp/mod.rs | 40 ++++++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/editors/code/package.json b/editors/code/package.json index 13ce8f0d..07b08463 100644 --- a/editors/code/package.json +++ b/editors/code/package.json @@ -27,6 +27,16 @@ ] } ], + "commands": [ + { + "command": "pfc.rebuildModule", + "title": "PFC: Rebuild Current Module" + }, + { + "command": "pfc.rebuildProject", + "title": "PFC: Rebuild Project" + } + ], "configuration": { "title": "PureScript Fast Compiler", "properties": { diff --git a/editors/code/src/extension.ts b/editors/code/src/extension.ts index e6736d24..8c4c9957 100644 --- a/editors/code/src/extension.ts +++ b/editors/code/src/extension.ts @@ -33,6 +33,30 @@ export function activate(context: vscode.ExtensionContext) { clientOptions ); + context.subscriptions.push( + vscode.commands.registerCommand("pfc.rebuildModule", async () => { + const editor = vscode.window.activeTextEditor; + if (!editor) { + vscode.window.showWarningMessage("No active editor"); + return; + } + if (!client) { + vscode.window.showWarningMessage("Language server not running"); + return; + } + await client.sendRequest("pfc/rebuildModule", { + uri: editor.document.uri.toString(), + }); + }), + vscode.commands.registerCommand("pfc.rebuildProject", async () => { + if (!client) { + vscode.window.showWarningMessage("Language server not running"); + return; + } + await client.sendRequest("pfc/rebuildProject"); + }) + ); + client.start(); } diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs index a1faf57f..85e17e12 100644 --- a/src/lsp/mod.rs +++ b/src/lsp/mod.rs @@ -100,6 +100,41 @@ impl LanguageServer for Backend { } impl Backend { + async fn rebuild_module(&self, params: serde_json::Value) -> Result { + if let Some(uri_str) = params.get("uri").and_then(|v| v.as_str()) { + if let Ok(uri) = Url::parse(uri_str) { + // Try open files first, then source_map, then disk + let source = { + let files = self.files.read().await; + files.get(uri_str).map(|f| f.source.clone()) + }; + let source = match source { + Some(s) => s, + None => { + let smap = self.source_map.read().await; + match smap.get(uri_str) { + Some(s) => s.clone(), + None => { + if let Ok(path) = uri.to_file_path() { + std::fs::read_to_string(path).unwrap_or_default() + } else { + String::new() + } + } + } + } + }; + self.on_change(uri, source).await; + } + } + Ok(serde_json::json!({ "success": true })) + } + + async fn rebuild_project(&self) -> Result { + self.load_sources().await; + Ok(serde_json::json!({ "success": true })) + } + pub fn new(client: Client, sources_cmd: Option) -> Self { Backend { client, @@ -121,7 +156,10 @@ pub fn run_server(sources_cmd: Option) { let stdin = tokio::io::stdin(); let stdout = tokio::io::stdout(); - let (service, socket) = LspService::new(|client| Backend::new(client, sources_cmd)); + let (service, socket) = LspService::build(|client| Backend::new(client, sources_cmd)) + .custom_method("pfc/rebuildModule", Backend::rebuild_module) + .custom_method("pfc/rebuildProject", Backend::rebuild_project) + .finish(); Server::new(stdin, stdout, socket).serve(service).await; }); From df3b42fed33e0e97076140fa3b85e0624dc47c1a Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Fri, 6 Mar 2026 17:11:46 +0100 Subject: [PATCH 02/14] incremental builds V1 --- Cargo.toml | 2 + src/build/cache.rs | 167 ++++++++++++ src/build/mod.rs | 112 +++++++- src/build/portable.rs | 432 +++++++++++++++++++++++++++++++ src/lsp/handlers/diagnostics.rs | 139 ++++++++-- src/lsp/handlers/load_sources.rs | 8 +- src/lsp/mod.rs | 3 + src/main.rs | 18 +- tests/build.rs | 93 +++++++ 9 files changed, 940 insertions(+), 34 deletions(-) create mode 100644 src/build/cache.rs create mode 100644 src/build/portable.rs diff --git a/Cargo.toml b/Cargo.toml index 6b43e37d..34a7f3c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,9 @@ rayon = "1.10" mimalloc = { version = "0.1", default-features = false } tower-lsp = "0.20" tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } serde_json = "1" +bincode = "1" [build-dependencies] lalrpop = "0.22" diff --git a/src/build/cache.rs b/src/build/cache.rs new file mode 100644 index 00000000..777e2549 --- /dev/null +++ b/src/build/cache.rs @@ -0,0 +1,167 @@ +//! Module cache for incremental builds. +//! +//! Tracks content hashes and cached ModuleExports to skip typechecking +//! unchanged modules. Supports on-disk persistence via bincode serialization. + +use std::collections::{HashMap, HashSet, VecDeque}; +use std::hash::{Hash, Hasher}; +use std::io; +use std::path::Path; + +use crate::typechecker::registry::ModuleExports; + +use super::portable::{PModuleExports, PortableCacheFile, PortableCachedModule}; + +// ===== Module Cache ===== + +/// Cached state for a single module. +struct CachedModule { + content_hash: u64, + exports: ModuleExports, + imports: Vec, +} + +/// In-memory cache of typechecked modules for incremental builds. +#[derive(Default)] +pub struct ModuleCache { + entries: HashMap, + /// Reverse dependency graph: module → modules that import it + dependents: HashMap>, +} + +impl ModuleCache { + pub fn new() -> Self { + Self::default() + } + + /// Compute a content hash for a source string. + pub fn content_hash(source: &str) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + source.hash(&mut hasher); + hasher.finish() + } + + /// Check if a module needs to be rebuilt. + /// + /// Returns true if: + /// - The module is not in the cache + /// - Its content hash has changed + /// - Any of its imports was rebuilt in this cycle + pub fn needs_rebuild( + &self, + module_name: &str, + content_hash: u64, + rebuilt: &HashSet, + ) -> bool { + match self.entries.get(module_name) { + None => true, + Some(cached) => { + if cached.content_hash != content_hash { + return true; + } + // Check if any dependency was rebuilt + cached.imports.iter().any(|dep| rebuilt.contains(dep)) + } + } + } + + /// Get cached exports for a module (if available). + pub fn get_exports(&self, module_name: &str) -> Option<&ModuleExports> { + self.entries.get(module_name).map(|c| &c.exports) + } + + /// Update the cache entry for a module after typechecking. + pub fn update( + &mut self, + module_name: String, + content_hash: u64, + exports: ModuleExports, + imports: Vec, + ) { + self.entries.insert(module_name, CachedModule { + content_hash, + exports, + imports, + }); + } + + /// Build the reverse dependency graph from cached import data. + pub fn build_reverse_deps(&mut self) { + self.dependents.clear(); + for (module, cached) in &self.entries { + for dep in &cached.imports { + self.dependents + .entry(dep.clone()) + .or_default() + .push(module.clone()); + } + } + } + + /// Find all transitive dependents of a module (BFS). + pub fn transitive_dependents(&self, module: &str) -> HashSet { + let mut result = HashSet::new(); + let mut queue = VecDeque::new(); + queue.push_back(module.to_string()); + + while let Some(current) = queue.pop_front() { + if let Some(deps) = self.dependents.get(¤t) { + for dep in deps { + if result.insert(dep.clone()) { + queue.push_back(dep.clone()); + } + } + } + } + + result + } + + /// Remove modules that are no longer in the source set. + pub fn retain_modules(&mut self, module_names: &HashSet) { + self.entries.retain(|k, _| module_names.contains(k)); + } + + /// Save cache to disk using bincode serialization. + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let portable = PortableCacheFile { + modules: self.entries.iter().map(|(name, cached)| { + (name.clone(), PortableCachedModule { + content_hash: cached.content_hash, + exports: PModuleExports::from(&cached.exports), + imports: cached.imports.clone(), + }) + }).collect(), + }; + + let encoded = bincode::serialize(&portable) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode serialize: {e}")))?; + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, encoded) + } + + /// Load cache from disk. + pub fn load_from_disk(path: &Path) -> io::Result { + let data = std::fs::read(path)?; + let portable: PortableCacheFile = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode deserialize: {e}")))?; + + let entries = portable.modules.into_iter().map(|(name, cached)| { + (name, CachedModule { + content_hash: cached.content_hash, + exports: ModuleExports::from(cached.exports), + imports: cached.imports, + }) + }).collect(); + + let mut cache = ModuleCache { + entries, + dependents: HashMap::new(), + }; + cache.build_reverse_deps(); + Ok(cache) + } +} diff --git a/src/build/mod.rs b/src/build/mod.rs index 55ba6b61..da43568e 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -4,7 +4,9 @@ //! builds a dependency graph from imports, topologically sorts, and //! typechecks in dependency order. +pub mod cache; pub mod error; +pub mod portable; use std::collections::{HashMap, HashSet, VecDeque}; use std::panic::AssertUnwindSafe; @@ -68,6 +70,7 @@ struct ParsedModule { module_parts: Vec, import_parts: Vec>, js_source: Option, + source_hash: u64, } // ===== Helpers ===== @@ -135,8 +138,17 @@ fn extract_foreign_import_names(module: &Module) -> Vec { // ===== Public API ===== +/// Build all PureScript modules matching the given glob patterns, with incremental caching. +pub fn build_cached(globs: &[&str], output_dir: Option, cache: &mut cache::ModuleCache) -> BuildResult { + build_internal(globs, output_dir, Some(cache)) +} + /// Build all PureScript modules matching the given glob patterns. pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { + build_internal(globs, output_dir, None) +} + +fn build_internal(globs: &[&str], output_dir: Option, cache: Option<&mut cache::ModuleCache>) -> BuildResult { let build_start = Instant::now(); let mut build_errors = Vec::new(); @@ -206,7 +218,7 @@ pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { ..Default::default() }; let mut result = - build_from_sources_with_options(&source_refs, &Some(js_refs), None, &options).0; + build_from_sources_impl(&source_refs, &Some(js_refs), None, &options, cache).0; // Prepend file-level errors before source-level errors build_errors.append(&mut result.build_errors); result.build_errors = build_errors; @@ -245,6 +257,29 @@ pub fn build_from_sources_with_options( js_sources: &Option>, start_registry: Option>, options: &BuildOptions, +) -> (BuildResult, ModuleRegistry) { + build_from_sources_impl(sources, js_sources, start_registry, options, None) +} + +/// Build with incremental caching support. +/// Skips typechecking modules whose source hasn't changed and whose +/// dependencies haven't been rebuilt. +pub fn build_from_sources_incremental( + sources: &[(&str, &str)], + js_sources: &Option>, + start_registry: Option>, + options: &BuildOptions, + cache: &mut cache::ModuleCache, +) -> (BuildResult, ModuleRegistry) { + build_from_sources_impl(sources, js_sources, start_registry, options, Some(cache)) +} + +fn build_from_sources_impl( + sources: &[(&str, &str)], + js_sources: &Option>, + start_registry: Option>, + options: &BuildOptions, + mut cache: Option<&mut cache::ModuleCache>, ) -> (BuildResult, ModuleRegistry) { let pipeline_start = Instant::now(); let mut build_errors = Vec::new(); @@ -348,6 +383,8 @@ pub fn build_from_sources_with_options( .and_then(|m| m.get(path_str)) .map(|s| s.to_string()); + let source_hash = cache::ModuleCache::content_hash(sources[i].1); + parsed.push(ParsedModule { path, module, @@ -355,6 +392,7 @@ pub fn build_from_sources_with_options( module_parts, import_parts, js_source, + source_hash, }); } log::debug!( @@ -483,6 +521,8 @@ pub fn build_from_sources_with_options( effective_timeout.map(|t| t.as_secs()).unwrap_or(0)); let mut done = 0usize; + let mut rebuilt_set: HashSet = HashSet::new(); + let mut cached_count = 0usize; for level in &levels { if sequential { @@ -491,6 +531,28 @@ pub fn build_from_sources_with_options( // Peak memory = 1 module's CheckResult at a time. for &idx in level { let pm = &parsed[idx]; + + // Cache check: skip typecheck if source unchanged and no deps rebuilt + if let Some(ref cache) = cache { + if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { + if let Some(exports) = cache.get_exports(&pm.module_name) { + done += 1; + cached_count += 1; + log::debug!( + " [{}/{}] cached: {}", + done, total_modules, pm.module_name + ); + registry.register(&pm.module_parts, exports.clone()); + module_results.push(ModuleResult { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + type_errors: vec![], + }); + continue; + } + } + } + let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { @@ -516,6 +578,13 @@ pub fn build_from_sources_with_options( " [{}/{}] ok: {} ({:.2?})", done, total_modules, pm.module_name, elapsed ); + rebuilt_set.insert(pm.module_name.clone()); + let import_names: Vec = pm.import_parts.iter() + .map(|parts| interner::resolve_module_name(parts)) + .collect(); + if let Some(ref mut c) = cache { + c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names); + } // Register exports immediately — result.exports is moved, // then result (with its types HashMap) is dropped. registry.register(&pm.module_parts, result.exports); @@ -542,9 +611,35 @@ pub fn build_from_sources_with_options( } } } else { - // Parallel mode: collect all results for the level, then register sequentially. + // Parallel mode: first handle cached modules, then typecheck the rest. + let mut to_typecheck = Vec::new(); + for &idx in level.iter() { + let pm = &parsed[idx]; + if let Some(ref cache) = cache { + if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { + if let Some(exports) = cache.get_exports(&pm.module_name) { + done += 1; + cached_count += 1; + log::debug!( + " [{}/{}] cached: {}", + done, total_modules, pm.module_name + ); + registry.register(&pm.module_parts, exports.clone()); + module_results.push(ModuleResult { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + type_errors: vec![], + }); + continue; + } + } + } + to_typecheck.push(idx); + } + + // Typecheck remaining modules in parallel let level_results: Vec<_> = pool.install(|| { - level.par_iter().map(|&idx| { + to_typecheck.par_iter().map(|&idx| { let pm = &parsed[idx]; let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); @@ -576,6 +671,13 @@ pub fn build_from_sources_with_options( " [{}/{}] ok: {} ({:.2?})", done, total_modules, pm.module_name, elapsed ); + rebuilt_set.insert(pm.module_name.clone()); + let import_names: Vec = pm.import_parts.iter() + .map(|parts| interner::resolve_module_name(parts)) + .collect(); + if let Some(ref mut c) = cache { + c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names); + } registry.register(&pm.module_parts, result.exports); module_results.push(ModuleResult { path: pm.path.clone(), @@ -602,8 +704,10 @@ pub fn build_from_sources_with_options( } } log::debug!( - "Phase 4 complete: typechecked {} modules in {:.2?}", + "Phase 4 complete: {} modules ({} cached, {} typechecked) in {:.2?}", module_results.len(), + cached_count, + module_results.len() - cached_count, phase_start.elapsed() ); diff --git a/src/build/portable.rs b/src/build/portable.rs new file mode 100644 index 00000000..39ac9fd0 --- /dev/null +++ b/src/build/portable.rs @@ -0,0 +1,432 @@ +//! Portable (serializable) representations of typechecker types. +//! +//! These mirror the core types but use `String` instead of `Symbol` +//! (which is process-local). Used for on-disk caching of ModuleExports. + +use std::collections::{HashMap, HashSet}; + +use serde::{Deserialize, Serialize}; + +use crate::cst::{Associativity, QualifiedIdent}; +use crate::interner; +use crate::typechecker::registry::ModuleExports; +use crate::typechecker::types::{Role, Scheme, TyVarId, Type}; + +// ===== Helper conversions ===== + +fn sym_to_s(s: interner::Symbol) -> String { + interner::resolve(s).unwrap_or_default().to_string() +} + +fn s_to_sym(s: &str) -> interner::Symbol { + interner::intern(s) +} + +// ===== Portable QualifiedIdent ===== + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)] +pub struct PQI { + pub module: Option, + pub name: String, +} + +impl From<&QualifiedIdent> for PQI { + fn from(qi: &QualifiedIdent) -> Self { + PQI { + module: qi.module.map(sym_to_s), + name: sym_to_s(qi.name), + } + } +} + +impl From<&PQI> for QualifiedIdent { + fn from(p: &PQI) -> Self { + QualifiedIdent { + module: p.module.as_ref().map(|s| s_to_sym(s)), + name: s_to_sym(&p.name), + } + } +} + +// ===== Portable Type ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub enum PType { + Unif(u32), + Var(String), + Con(PQI), + App(Box, Box), + Fun(Box, Box), + Forall(Vec<(String, bool)>, Box), + Record(Vec<(String, PType)>, Option>), + TypeString(String), + TypeInt(i64), +} + +impl From<&Type> for PType { + fn from(t: &Type) -> Self { + match t { + Type::Unif(id) => PType::Unif(id.0), + Type::Var(s) => PType::Var(sym_to_s(*s)), + Type::Con(qi) => PType::Con(qi.into()), + Type::App(f, a) => PType::App(Box::new(f.as_ref().into()), Box::new(a.as_ref().into())), + Type::Fun(a, b) => PType::Fun(Box::new(a.as_ref().into()), Box::new(b.as_ref().into())), + Type::Forall(vars, body) => PType::Forall( + vars.iter().map(|(s, v)| (sym_to_s(*s), *v)).collect(), + Box::new(body.as_ref().into()), + ), + Type::Record(fields, tail) => PType::Record( + fields.iter().map(|(s, t)| (sym_to_s(*s), t.into())).collect(), + tail.as_ref().map(|t| Box::new(t.as_ref().into())), + ), + Type::TypeString(s) => PType::TypeString(sym_to_s(*s)), + Type::TypeInt(i) => PType::TypeInt(*i), + } + } +} + +impl From<&PType> for Type { + fn from(p: &PType) -> Self { + match p { + PType::Unif(id) => Type::Unif(TyVarId(*id)), + PType::Var(s) => Type::Var(s_to_sym(s)), + PType::Con(qi) => Type::Con(qi.into()), + PType::App(f, a) => Type::App(Box::new(f.as_ref().into()), Box::new(a.as_ref().into())), + PType::Fun(a, b) => Type::Fun(Box::new(a.as_ref().into()), Box::new(b.as_ref().into())), + PType::Forall(vars, body) => Type::Forall( + vars.iter().map(|(s, v)| (s_to_sym(s), *v)).collect(), + Box::new(body.as_ref().into()), + ), + PType::Record(fields, tail) => Type::Record( + fields.iter().map(|(s, t)| (s_to_sym(s), t.into())).collect(), + tail.as_ref().map(|t| Box::new(t.as_ref().into())), + ), + PType::TypeString(s) => Type::TypeString(s_to_sym(s)), + PType::TypeInt(i) => Type::TypeInt(*i), + } + } +} + +// ===== Portable Scheme ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PScheme { + pub forall_vars: Vec, + pub ty: PType, +} + +impl From<&Scheme> for PScheme { + fn from(s: &Scheme) -> Self { + PScheme { + forall_vars: s.forall_vars.iter().map(|v| sym_to_s(*v)).collect(), + ty: (&s.ty).into(), + } + } +} + +impl From<&PScheme> for Scheme { + fn from(p: &PScheme) -> Self { + Scheme { + forall_vars: p.forall_vars.iter().map(|s| s_to_sym(s)).collect(), + ty: (&p.ty).into(), + } + } +} + +// ===== Portable Associativity ===== + +#[derive(Serialize, Deserialize, Clone, Copy, Debug)] +pub enum PAssociativity { + Left, + Right, + None, +} + +impl From<&Associativity> for PAssociativity { + fn from(a: &Associativity) -> Self { + match a { + Associativity::Left => PAssociativity::Left, + Associativity::Right => PAssociativity::Right, + Associativity::None => PAssociativity::None, + } + } +} + +impl From<&PAssociativity> for Associativity { + fn from(p: &PAssociativity) -> Self { + match p { + PAssociativity::Left => Associativity::Left, + PAssociativity::Right => Associativity::Right, + PAssociativity::None => Associativity::None, + } + } +} + +// ===== Portable Role ===== + +#[derive(Serialize, Deserialize, Clone, Copy, Debug)] +pub enum PRole { + Phantom, + Representational, + Nominal, +} + +impl From<&Role> for PRole { + fn from(r: &Role) -> Self { + match r { + Role::Phantom => PRole::Phantom, + Role::Representational => PRole::Representational, + Role::Nominal => PRole::Nominal, + } + } +} + +impl From<&PRole> for Role { + fn from(p: &PRole) -> Self { + match p { + PRole::Phantom => Role::Phantom, + PRole::Representational => Role::Representational, + PRole::Nominal => Role::Nominal, + } + } +} + +// ===== Collection conversion helpers ===== + +fn map_qi_scheme(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (k.into(), v.into())).collect() +} + +fn unmap_qi_scheme(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (k.into(), v.into())).collect() +} + +fn map_qi_qi(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (k.into(), v.into())).collect() +} + +fn unmap_qi_qi(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (k.into(), v.into())).collect() +} + +fn map_qi_vec_qi(m: &HashMap>) -> HashMap> { + m.iter().map(|(k, v)| (k.into(), v.iter().map(|qi| qi.into()).collect())).collect() +} + +fn unmap_qi_vec_qi(m: &HashMap>) -> HashMap> { + m.iter().map(|(k, v)| (k.into(), v.iter().map(|qi| qi.into()).collect())).collect() +} + +fn map_set_qi(s: &HashSet) -> HashSet { + s.iter().map(|qi| qi.into()).collect() +} + +fn unmap_set_qi(s: &HashSet) -> HashSet { + s.iter().map(|qi| qi.into()).collect() +} + +fn map_qi_usize(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (k.into(), *v)).collect() +} + +fn unmap_qi_usize(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (k.into(), *v)).collect() +} + +fn map_sym_sym(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (sym_to_s(*k), sym_to_s(*v))).collect() +} + +fn unmap_sym_sym(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (s_to_sym(k), s_to_sym(v))).collect() +} + +fn map_set_sym(s: &HashSet) -> HashSet { + s.iter().map(|sym| sym_to_s(*sym)).collect() +} + +fn unmap_set_sym(s: &HashSet) -> HashSet { + s.iter().map(|s| s_to_sym(s)).collect() +} + +fn map_sym_type(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (sym_to_s(*k), v.into())).collect() +} + +fn unmap_sym_type(m: &HashMap) -> HashMap { + m.iter().map(|(k, v)| (s_to_sym(k), v.into())).collect() +} + +fn map_sym_roles(m: &HashMap>) -> HashMap> { + m.iter().map(|(k, v)| (sym_to_s(*k), v.iter().map(|r| r.into()).collect())).collect() +} + +fn unmap_sym_roles(m: &HashMap>) -> HashMap> { + m.iter().map(|(k, v)| (s_to_sym(k), v.iter().map(|r| r.into()).collect())).collect() +} + +// ===== Portable ModuleExports ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PModuleExports { + pub values: HashMap, + pub class_methods: HashMap)>, + pub data_constructors: HashMap>, + pub ctor_details: HashMap, Vec)>, + pub instances: HashMap, Vec<(PQI, Vec)>)>>, + pub type_operators: HashMap, + pub value_fixities: HashMap, + pub type_fixities: HashMap, + pub function_op_aliases: HashSet, + pub value_operator_targets: HashMap, + pub constrained_class_methods: HashSet, + pub type_aliases: HashMap, PType)>, + pub class_param_counts: HashMap, + pub value_origins: HashMap, + pub type_origins: HashMap, + pub class_origins: HashMap, + pub operator_class_targets: HashMap, + pub class_fundeps: HashMap, Vec<(Vec, Vec)>)>, + pub type_con_arities: HashMap, + pub type_roles: HashMap>, + pub newtype_names: HashSet, + pub signature_constraints: HashMap)>>, + pub type_kinds: HashMap, + pub class_type_kinds: HashMap, + pub partial_dischargers: HashSet, + pub self_referential_aliases: HashSet, + pub class_superclasses: HashMap, Vec<(PQI, Vec)>)>, + pub method_own_constraints: HashMap>, +} + +impl From<&ModuleExports> for PModuleExports { + fn from(e: &ModuleExports) -> Self { + PModuleExports { + values: map_qi_scheme(&e.values), + class_methods: e.class_methods.iter().map(|(k, (c, vs))| { + (k.into(), (c.into(), vs.iter().map(|v| v.into()).collect())) + }).collect(), + data_constructors: map_qi_vec_qi(&e.data_constructors), + ctor_details: e.ctor_details.iter().map(|(k, (p, vs, ts))| { + (k.into(), (p.into(), vs.iter().map(|v| v.into()).collect(), ts.iter().map(|t| t.into()).collect())) + }).collect(), + instances: e.instances.iter().map(|(k, v)| { + (k.into(), v.iter().map(|(ts, cs)| { + (ts.iter().map(|t| t.into()).collect(), cs.iter().map(|(c, ts2)| { + (c.into(), ts2.iter().map(|t| t.into()).collect()) + }).collect()) + }).collect()) + }).collect(), + type_operators: map_qi_qi(&e.type_operators), + value_fixities: e.value_fixities.iter().map(|(k, (a, p))| (k.into(), (a.into(), *p))).collect(), + type_fixities: e.type_fixities.iter().map(|(k, (a, p))| (k.into(), (a.into(), *p))).collect(), + function_op_aliases: map_set_qi(&e.function_op_aliases), + value_operator_targets: map_qi_qi(&e.value_operator_targets), + constrained_class_methods: map_set_qi(&e.constrained_class_methods), + type_aliases: e.type_aliases.iter().map(|(k, (ps, ty))| { + (k.into(), (ps.iter().map(|p| p.into()).collect(), ty.into())) + }).collect(), + class_param_counts: map_qi_usize(&e.class_param_counts), + value_origins: map_sym_sym(&e.value_origins), + type_origins: map_sym_sym(&e.type_origins), + class_origins: map_sym_sym(&e.class_origins), + operator_class_targets: map_sym_sym(&e.operator_class_targets), + class_fundeps: e.class_fundeps.iter().map(|(k, (vs, fs))| { + (sym_to_s(*k), (vs.iter().map(|v| sym_to_s(*v)).collect(), fs.clone())) + }).collect(), + type_con_arities: map_qi_usize(&e.type_con_arities), + type_roles: map_sym_roles(&e.type_roles), + newtype_names: map_set_sym(&e.newtype_names), + signature_constraints: e.signature_constraints.iter().map(|(k, v)| { + (k.into(), v.iter().map(|(c, ts)| { + (c.into(), ts.iter().map(|t| t.into()).collect()) + }).collect()) + }).collect(), + type_kinds: map_sym_type(&e.type_kinds), + class_type_kinds: map_sym_type(&e.class_type_kinds), + partial_dischargers: map_set_sym(&e.partial_dischargers), + self_referential_aliases: map_set_sym(&e.self_referential_aliases), + class_superclasses: e.class_superclasses.iter().map(|(k, (vs, cs))| { + (k.into(), (vs.iter().map(|v| sym_to_s(*v)).collect(), cs.iter().map(|(c, ts)| { + (c.into(), ts.iter().map(|t| t.into()).collect()) + }).collect())) + }).collect(), + method_own_constraints: e.method_own_constraints.iter().map(|(k, v)| { + (k.into(), v.iter().map(|s| sym_to_s(*s)).collect()) + }).collect(), + } + } +} + +impl From for ModuleExports { + fn from(p: PModuleExports) -> Self { + ModuleExports { + values: unmap_qi_scheme(&p.values), + class_methods: p.class_methods.iter().map(|(k, (c, vs))| { + (k.into(), (c.into(), vs.iter().map(|v| v.into()).collect())) + }).collect(), + data_constructors: unmap_qi_vec_qi(&p.data_constructors), + ctor_details: p.ctor_details.iter().map(|(k, (par, vs, ts))| { + (k.into(), (par.into(), vs.iter().map(|v| v.into()).collect(), ts.iter().map(|t| t.into()).collect())) + }).collect(), + instances: p.instances.iter().map(|(k, v)| { + (k.into(), v.iter().map(|(ts, cs)| { + (ts.iter().map(|t| t.into()).collect(), cs.iter().map(|(c, ts2)| { + (c.into(), ts2.iter().map(|t| t.into()).collect()) + }).collect()) + }).collect()) + }).collect(), + type_operators: unmap_qi_qi(&p.type_operators), + value_fixities: p.value_fixities.iter().map(|(k, (a, pr))| (k.into(), (a.into(), *pr))).collect(), + type_fixities: p.type_fixities.iter().map(|(k, (a, pr))| (k.into(), (a.into(), *pr))).collect(), + function_op_aliases: unmap_set_qi(&p.function_op_aliases), + value_operator_targets: unmap_qi_qi(&p.value_operator_targets), + constrained_class_methods: unmap_set_qi(&p.constrained_class_methods), + type_aliases: p.type_aliases.iter().map(|(k, (ps, ty))| { + (k.into(), (ps.iter().map(|p| p.into()).collect(), ty.into())) + }).collect(), + class_param_counts: unmap_qi_usize(&p.class_param_counts), + value_origins: unmap_sym_sym(&p.value_origins), + type_origins: unmap_sym_sym(&p.type_origins), + class_origins: unmap_sym_sym(&p.class_origins), + operator_class_targets: unmap_sym_sym(&p.operator_class_targets), + class_fundeps: p.class_fundeps.iter().map(|(k, (vs, fs))| { + (s_to_sym(k), (vs.iter().map(|v| s_to_sym(v)).collect(), fs.clone())) + }).collect(), + type_con_arities: unmap_qi_usize(&p.type_con_arities), + type_roles: unmap_sym_roles(&p.type_roles), + newtype_names: unmap_set_sym(&p.newtype_names), + signature_constraints: p.signature_constraints.iter().map(|(k, v)| { + (k.into(), v.iter().map(|(c, ts)| { + (c.into(), ts.iter().map(|t| t.into()).collect()) + }).collect()) + }).collect(), + type_kinds: unmap_sym_type(&p.type_kinds), + class_type_kinds: unmap_sym_type(&p.class_type_kinds), + partial_dischargers: unmap_set_sym(&p.partial_dischargers), + self_referential_aliases: unmap_set_sym(&p.self_referential_aliases), + class_superclasses: p.class_superclasses.iter().map(|(k, (vs, cs))| { + (k.into(), (vs.iter().map(|v| s_to_sym(v)).collect(), cs.iter().map(|(c, ts)| { + (c.into(), ts.iter().map(|t| t.into()).collect()) + }).collect())) + }).collect(), + method_own_constraints: p.method_own_constraints.iter().map(|(k, v)| { + (k.into(), v.iter().map(|s| s_to_sym(s)).collect()) + }).collect(), + } + } +} + +// ===== Portable Cache File ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PortableCacheFile { + pub modules: HashMap, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PortableCachedModule { + pub content_hash: u64, + pub exports: PModuleExports, + pub imports: Vec, +} diff --git a/src/lsp/handlers/diagnostics.rs b/src/lsp/handlers/diagnostics.rs index cf397610..59da1076 100644 --- a/src/lsp/handlers/diagnostics.rs +++ b/src/lsp/handlers/diagnostics.rs @@ -3,6 +3,9 @@ use std::sync::atomic::Ordering; use tower_lsp::lsp_types::*; +use crate::interner; +use crate::build::cache::ModuleCache; + use super::super::{Backend, FileState}; impl Backend { @@ -57,45 +60,125 @@ impl Backend { } }; + let module_name = interner::resolve_module_name(&module.name.value.parts); + let module_parts: Vec = module.name.value.parts.clone(); + // Type-check against the registry - let registry = self.registry.read().await; + let mut registry = self.registry.write().await; let check_result = crate::typechecker::check_module_with_registry(&module, ®istry); - let diagnostics: Vec = check_result - .errors - .iter() - .map(|err| { - let span = err.span(); - let range = match span.to_pos(&source) { - Some((start, end)) => Range { - start: Position { - line: start.line.saturating_sub(1) as u32, - character: start.column.saturating_sub(1) as u32, - }, - end: Position { - line: end.line.saturating_sub(1) as u32, - character: end.column.saturating_sub(1) as u32, - }, - }, - None => Range::default(), - }; - Diagnostic { - range, - severity: Some(DiagnosticSeverity::ERROR), - code: Some(NumberOrString::String(format!("TypeError.{}", err.code()))), - source: Some("pfc".to_string()), - message: format!("{err}"), - ..Default::default() - } - }) + // Update registry with new exports + registry.register(&module_parts, check_result.exports.clone()); + + // Update cache + let source_hash = ModuleCache::content_hash(&source); + let import_names: Vec = module.imports.iter() + .map(|imp| interner::resolve_module_name(&imp.module.parts)) .collect(); + let mut cache = self.module_cache.write().await; + cache.update(module_name.clone(), source_hash, check_result.exports, import_names); + cache.build_reverse_deps(); + // Find transitive dependents that need re-checking + let dependents = cache.transitive_dependents(&module_name); + drop(cache); + + // Publish diagnostics for the changed module + let diagnostics = type_errors_to_diagnostics(&check_result.errors, &source); self.client .publish_diagnostics(uri, diagnostics, None) .await; + + // Update source map + { + let mfmap = self.module_file_map.read().await; + let mut smap = self.source_map.write().await; + // Update the changed module's source in source_map + if let Some(file_uri) = mfmap.get(&module_name) { + smap.insert(file_uri.clone(), source); + } + } + + // Cascade: re-typecheck dependents + if !dependents.is_empty() { + log::debug!("Cascade rebuild: {} dependents of {}", dependents.len(), module_name); + + let mfmap = self.module_file_map.read().await; + let smap = self.source_map.read().await; + + for dep_name in &dependents { + let dep_uri_str = match mfmap.get(dep_name) { + Some(u) => u.clone(), + None => continue, + }; + let dep_source = match smap.get(&dep_uri_str) { + Some(s) => s.clone(), + None => continue, + }; + let dep_uri = match Url::parse(&dep_uri_str) { + Ok(u) => u, + Err(_) => continue, + }; + + let dep_module = match crate::parser::parse(&dep_source) { + Ok(m) => m, + Err(_) => continue, + }; + + let dep_result = crate::typechecker::check_module_with_registry(&dep_module, ®istry); + + // Update registry with dependent's exports + let dep_parts: Vec = dep_module.name.value.parts.clone(); + registry.register(&dep_parts, dep_result.exports.clone()); + + // Update cache for dependent + let dep_hash = ModuleCache::content_hash(&dep_source); + let dep_imports: Vec = dep_module.imports.iter() + .map(|imp| interner::resolve_module_name(&imp.module.parts)) + .collect(); + let mut cache = self.module_cache.write().await; + cache.update(dep_name.clone(), dep_hash, dep_result.exports, dep_imports); + drop(cache); + + let dep_diagnostics = type_errors_to_diagnostics(&dep_result.errors, &dep_source); + self.client + .publish_diagnostics(dep_uri, dep_diagnostics, None) + .await; + } + } } } +fn type_errors_to_diagnostics(errors: &[crate::typechecker::error::TypeError], source: &str) -> Vec { + errors + .iter() + .map(|err| { + let span = err.span(); + let range = match span.to_pos(source) { + Some((start, end)) => Range { + start: Position { + line: start.line.saturating_sub(1) as u32, + character: start.column.saturating_sub(1) as u32, + }, + end: Position { + line: end.line.saturating_sub(1) as u32, + character: end.column.saturating_sub(1) as u32, + }, + }, + None => Range::default(), + }; + Diagnostic { + range, + severity: Some(DiagnosticSeverity::ERROR), + code: Some(NumberOrString::String(format!("TypeError.{}", err.code()))), + source: Some("pfc".to_string()), + message: format!("{err}"), + ..Default::default() + } + }) + .collect() +} + fn error_to_range(err: &crate::diagnostics::CompilerError, source: &str) -> Range { match err.get_span() { Some(span) => match span.to_pos(source) { diff --git a/src/lsp/handlers/load_sources.rs b/src/lsp/handlers/load_sources.rs index 5803c1fc..8a62aea1 100644 --- a/src/lsp/handlers/load_sources.rs +++ b/src/lsp/handlers/load_sources.rs @@ -47,6 +47,7 @@ impl Backend { let resolution_exports = self.resolution_exports.clone(); let module_file_map = self.module_file_map.clone(); let source_map = self.source_map.clone(); + let module_cache = self.module_cache.clone(); let ready = self.ready.clone(); let progress_token = token.clone(); @@ -158,12 +159,17 @@ impl Backend { ..Default::default() }; - let (result, new_registry) = crate::build::build_from_sources_with_options( + // Use incremental build with cache + let mut cache = rt.block_on(async { module_cache.write().await }); + let (result, new_registry) = crate::build::build_from_sources_incremental( &source_refs, &None, None, &options, + &mut cache, ); + cache.build_reverse_deps(); + drop(cache); let error_count: usize = result.modules.iter().map(|m| m.type_errors.len()).sum(); let module_count = result.modules.len(); diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs index 85e17e12..b8265f78 100644 --- a/src/lsp/mod.rs +++ b/src/lsp/mod.rs @@ -10,6 +10,7 @@ use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer, LspService, Server}; +use crate::build::cache::ModuleCache; use crate::typechecker::registry::ModuleRegistry; use crate::lsp::utils::resolve::ResolutionExports; @@ -30,6 +31,7 @@ pub struct Backend { pub(crate) module_file_map: Arc>>, /// Maps file URI → source content for loaded project files pub(crate) source_map: Arc>>, + pub(crate) module_cache: Arc>, pub(crate) sources_cmd: Option, pub(crate) ready: Arc, } @@ -144,6 +146,7 @@ impl Backend { resolution_exports: Arc::new(RwLock::new(ResolutionExports::empty())), module_file_map: Arc::new(RwLock::new(HashMap::new())), source_map: Arc::new(RwLock::new(HashMap::new())), + module_cache: Arc::new(RwLock::new(ModuleCache::default())), sources_cmd, ready: Arc::new(AtomicBool::new(false)), } diff --git a/src/main.rs b/src/main.rs index 21d67265..6696b796 100644 --- a/src/main.rs +++ b/src/main.rs @@ -55,8 +55,24 @@ fn main() { Commands::Compile { globs, output } => { log::debug!("Starting compile with globs: {:?}", globs); + let output_path = PathBuf::from(&output); + let cache_path = output_path.join(".pfc-cache").join("cache.bin"); + + let mut cache = cache_path + .parent() + .and_then(|_| build::cache::ModuleCache::load_from_disk(&cache_path).ok()) + .unwrap_or_default(); + let glob_refs: Vec<&str> = globs.iter().map(|s| s.as_str()).collect(); - let result = build::build(&glob_refs, Some(PathBuf::from(&output))); + let result = build::build_cached(&glob_refs, Some(output_path.clone()), &mut cache); + + // Save cache for next build + if let Some(parent) = cache_path.parent() { + std::fs::create_dir_all(parent).ok(); + } + if let Err(e) = cache.save_to_disk(&cache_path) { + log::debug!("Failed to save build cache: {e}"); + } let mut error_count = 0; diff --git a/tests/build.rs b/tests/build.rs index a27b9a16..3324bb2b 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -7,6 +7,7 @@ use ntest_timeout::timeout; use rayon::prelude::*; use purescript_fast_compiler::build::{ build_from_sources_with_js, build_from_sources_with_options, build_from_sources_with_registry, + build_from_sources_incremental, cache::ModuleCache, BuildError, BuildOptions, BuildResult, }; use purescript_fast_compiler::typechecker::error::TypeError; @@ -1230,3 +1231,95 @@ fn build_from_sources() { .join("\n") ); } + +// ===== Incremental build tests ===== + +#[test] +fn incremental_build_caches_modules() { + let sources: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 42\n"), + ("ModB.purs", "module ModB where\n\nimport ModA\n\nvalB :: Int\nvalB = valA\n"), + ]; + + let options = BuildOptions::default(); + let mut cache = ModuleCache::new(); + + // First build: everything should be typechecked + let (result1, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + assert!(result1.build_errors.is_empty(), "First build should succeed"); + assert_eq!(result1.modules.len(), 2); + for m in &result1.modules { + assert!(m.type_errors.is_empty(), "Module {} should have no errors", m.module_name); + } + + // Verify cache has entries + assert!(cache.get_exports("ModA").is_some(), "ModA should be cached"); + assert!(cache.get_exports("ModB").is_some(), "ModB should be cached"); + + // Second build with same sources: should use cache (no rebuild needed) + let (result2, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + assert!(result2.build_errors.is_empty(), "Second build should succeed"); + assert_eq!(result2.modules.len(), 2); + for m in &result2.modules { + assert!(m.type_errors.is_empty(), "Cached module {} should have no errors", m.module_name); + } +} + +#[test] +fn incremental_build_rebuilds_changed_module() { + let sources_v1: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 42\n"), + ("ModB.purs", "module ModB where\n\nimport ModA\n\nvalB :: Int\nvalB = valA\n"), + ]; + + let options = BuildOptions::default(); + let mut cache = ModuleCache::new(); + + // First build + let (result1, _) = build_from_sources_incremental(&sources_v1, &None, None, &options, &mut cache); + assert!(result1.build_errors.is_empty()); + + // Change ModA's source + let sources_v2: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 99\n"), + ("ModB.purs", "module ModB where\n\nimport ModA\n\nvalB :: Int\nvalB = valA\n"), + ]; + + // Second build: ModA changed, ModB depends on it, both should rebuild + let (result2, _) = build_from_sources_incremental(&sources_v2, &None, None, &options, &mut cache); + assert!(result2.build_errors.is_empty(), "Rebuild should succeed"); + assert_eq!(result2.modules.len(), 2); + for m in &result2.modules { + assert!(m.type_errors.is_empty(), "Module {} should have no errors after rebuild", m.module_name); + } +} + +#[test] +fn incremental_build_disk_roundtrip() { + let sources: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 42\n"), + ]; + + let options = BuildOptions::default(); + let mut cache = ModuleCache::new(); + + // Build to populate cache + let (result, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + assert!(result.build_errors.is_empty()); + + // Save to disk + let tmp_dir = std::env::temp_dir().join("pfc-test-cache"); + let cache_path = tmp_dir.join("cache.bin"); + cache.save_to_disk(&cache_path).expect("Failed to save cache"); + + // Load from disk + let mut loaded_cache = ModuleCache::load_from_disk(&cache_path).expect("Failed to load cache"); + assert!(loaded_cache.get_exports("ModA").is_some(), "Loaded cache should have ModA"); + + // Build with loaded cache — should use cached entries + let (result2, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut loaded_cache); + assert!(result2.build_errors.is_empty(), "Build with loaded cache should succeed"); + + // Cleanup + let _ = std::fs::remove_dir_all(&tmp_dir); +} From 9e9621e76eca0517389f59e20b47951c6e76f9a3 Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Fri, 6 Mar 2026 17:19:45 +0100 Subject: [PATCH 03/14] always fail fast --- src/build/mod.rs | 167 +++-------------------------------------------- tests/build.rs | 3 - 2 files changed, 10 insertions(+), 160 deletions(-) diff --git a/src/build/mod.rs b/src/build/mod.rs index da43568e..8fda3991 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -42,10 +42,6 @@ pub struct BuildOptions { /// If true, typecheck modules sequentially (one at a time) instead of in /// parallel. Useful for debugging memory issues or non-deterministic bugs. pub sequential: bool, - - /// If true, stop building as soon as the first error is encountered - /// (build error or type error). Useful for quick iteration. - pub fail_fast: bool, } // ===== Public types ===== @@ -283,8 +279,6 @@ fn build_from_sources_impl( ) -> (BuildResult, ModuleRegistry) { let pipeline_start = Instant::now(); let mut build_errors = Vec::new(); - let fail_fast = options.fail_fast; - // Phase 2: Parse all sources (parallel) log::debug!("Phase 2c: Parsing {} source files", sources.len()); let phase_start = Instant::now(); @@ -402,7 +396,7 @@ fn build_from_sources_impl( phase_start.elapsed() ); - if fail_fast && !build_errors.is_empty() { + if !build_errors.is_empty() { let registry = match start_registry { Some(base) => ModuleRegistry::with_base(base), None => ModuleRegistry::default(), @@ -484,7 +478,7 @@ fn build_from_sources_impl( phase_start.elapsed() ); - if fail_fast && !build_errors.is_empty() { + if !build_errors.is_empty() { log::debug!("Phase 3 failed"); return (BuildResult { modules: Vec::new(), build_errors }, registry); } @@ -601,13 +595,10 @@ fn build_from_sources_impl( ); } } - // In sequential mode, check fail_fast after each module - if fail_fast { - let has_errors = module_results.last().map_or(false, |r| !r.type_errors.is_empty()) || !build_errors.is_empty(); - if has_errors { - log::debug!("Phase 4: fail_fast triggered after module, stopping"); - break; - } + let has_errors = module_results.last().map_or(false, |r| !r.type_errors.is_empty()) || !build_errors.is_empty(); + if has_errors { + log::debug!("Phase 4: error after module, stopping"); + break; } } } else { @@ -694,13 +685,10 @@ fn build_from_sources_impl( } } } - // After each dependency level, check if fail_fast should stop - if fail_fast { - let err_count = module_results.iter().filter(|r| !r.type_errors.is_empty()).count(); - if !build_errors.is_empty() || err_count > 0 { - log::debug!("Phase 4: fail_fast triggered after level ({} done, {} with errors), stopping", done, err_count); - break; - } + let err_count = module_results.iter().filter(|r| !r.type_errors.is_empty()).count(); + if !build_errors.is_empty() || err_count > 0 { + log::debug!("Phase 4: error after level ({} done, {} with errors), stopping", done, err_count); + break; } } log::debug!( @@ -1213,26 +1201,6 @@ mod tests { ); } - #[test] - fn parse_error_resilience() { - let result = build_from_sources(&[ - ("src/A.purs", "module A where\nx :: Int\nx = 42"), - ("src/Bad.purs", "this is not valid purescript"), - ("src/B.purs", "module B where\nimport A\ny = x"), - ]); - // Should have a parse error for Bad.purs - assert!( - result - .build_errors - .iter() - .any(|e| matches!(e, BuildError::CompileError { .. })), - "expected CompileError" - ); - // A and B should still compile successfully - assert_eq!(result.modules.len(), 2); - assert!(result.modules.iter().all(|m| m.type_errors.is_empty())); - } - #[test] fn prim_import_not_missing() { let result = build_from_sources(&[( @@ -1383,121 +1351,6 @@ roundtrip x = useExceptT (mkExcept x) assert!(result.modules[0].type_errors.is_empty()); } - #[test] - fn export_despite_type_error() { - let result = build_from_sources(&[ - ( - "src/A.purs", - "\ -module A where - -f :: Int -> Int -f x = x - -g :: String -g = 42 -", - ), - ( - "src/B.purs", - "\ -module B where -import A - -y :: Int -y = f 1 -", - ), - ]); - assert!( - result.build_errors.is_empty(), - "build errors: {:?}", - result - .build_errors - .iter() - .map(|e| format!("{}", e)) - .collect::>() - ); - let a = result - .modules - .iter() - .find(|m| m.module_name == "A") - .unwrap(); - assert!( - !a.type_errors.is_empty(), - "A should have type errors from g" - ); - let b = result - .modules - .iter() - .find(|m| m.module_name == "B") - .unwrap(); - assert!( - b.type_errors.is_empty(), - "B should compile cleanly, got: {:?}", - b.type_errors - .iter() - .map(|e| e.to_string()) - .collect::>() - ); - } - - #[test] - fn signature_exported_on_body_error() { - let result = build_from_sources(&[ - ( - "src/A.purs", - "\ -module A where - -h :: Int -> Int -h x = \"not an int\" -", - ), - ( - "src/B.purs", - "\ -module B where -import A - -y :: Int -> Int -y = h -", - ), - ]); - assert!( - result.build_errors.is_empty(), - "build errors: {:?}", - result - .build_errors - .iter() - .map(|e| format!("{}", e)) - .collect::>() - ); - let a = result - .modules - .iter() - .find(|m| m.module_name == "A") - .unwrap(); - assert!( - !a.type_errors.is_empty(), - "A should have type errors from h" - ); - let b = result - .modules - .iter() - .find(|m| m.module_name == "B") - .unwrap(); - assert!( - b.type_errors.is_empty(), - "B should compile cleanly using h's declared signature, got: {:?}", - b.type_errors - .iter() - .map(|e| e.to_string()) - .collect::>() - ); - } - #[test] fn instance_head_record_in_type_app() { let result = build_from_sources(&[( diff --git a/tests/build.rs b/tests/build.rs index 3324bb2b..e38e8e3c 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -728,7 +728,6 @@ fn build_all_packages() { module_timeout: Some(std::time::Duration::from_secs(timeout_secs)), output_dir: None, sequential: false, - fail_fast: false, }; // Discover all packages with src/ directories @@ -924,13 +923,11 @@ fn build_from_sources() { .unwrap_or(60); let sequential = std::env::var("SEQUENTIAL").is_ok(); - let fail_fast = std::env::var("FAIL_FAST").is_ok(); let options = BuildOptions { module_timeout: Some(std::time::Duration::from_secs(timeout_secs)), output_dir: None, sequential, - fail_fast, }; // Step 1: Glob all patterns to collect file paths From 2a142760f1d2be7f28f2ea058bf1c0eab9ce2491 Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Fri, 6 Mar 2026 17:43:42 +0100 Subject: [PATCH 04/14] remove unneeded and buggy UnusedFFIImplementations error --- src/build/error.rs | 7 ------- src/build/mod.rs | 12 ------------ src/js_ffi.rs | 6 ------ src/main.rs | 21 ++++++++++++--------- tests/build.rs | 1 - 5 files changed, 12 insertions(+), 35 deletions(-) diff --git a/src/build/error.rs b/src/build/error.rs index 33a43683..7820128b 100644 --- a/src/build/error.rs +++ b/src/build/error.rs @@ -48,12 +48,6 @@ pub enum BuildError { path: PathBuf, missing: Vec, }, - #[error("The following values in the foreign module for module {module_name} are unused: {}", unused.join(", "))] - UnusedFFIImplementations { - module_name: String, - path: PathBuf, - unused: Vec, - }, #[error("CommonJS exports in the ES foreign module for module {module_name} are unsupported: {}", exports.join(", "))] UnsupportedFFICommonJSExports { module_name: String, @@ -99,7 +93,6 @@ impl BuildError { BuildError::InvalidModuleName { .. } => "SyntaxError".into(), BuildError::MissingFFIModule { .. } => "MissingFFIModule".into(), BuildError::MissingFFIImplementations { .. } => "MissingFFIImplementations".into(), - BuildError::UnusedFFIImplementations { .. } => "UnusedFFIImplementations".into(), BuildError::UnsupportedFFICommonJSExports { .. } => "UnsupportedFFICommonJSExports".into(), BuildError::UnsupportedFFICommonJSImports { .. } => "UnsupportedFFICommonJSImports".into(), BuildError::DeprecatedFFICommonJSModule { .. } => "DeprecatedFFICommonJSModule".into(), diff --git a/src/build/mod.rs b/src/build/mod.rs index 8fda3991..c6f0fb5e 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -748,18 +748,6 @@ fn build_from_sources_impl( missing, }); } - js_ffi::FfiError::UnusedFFIImplementations { unused } => { - log::debug!( - " FFI error in {}: unused implementations: {:?}", - pm.module_name, - unused - ); - build_errors.push(BuildError::UnusedFFIImplementations { - module_name: pm.module_name.clone(), - path: pm.path.clone(), - unused, - }); - } js_ffi::FfiError::UnsupportedFFICommonJSExports { exports } => { build_errors.push( BuildError::UnsupportedFFICommonJSExports { diff --git a/src/js_ffi.rs b/src/js_ffi.rs index 011fe097..3a2d273a 100644 --- a/src/js_ffi.rs +++ b/src/js_ffi.rs @@ -31,8 +31,6 @@ pub enum FfiError { DeprecatedFFICommonJSModule, /// Declared `foreign import` but not exported in FFI MissingFFIImplementations { missing: Vec }, - /// Exported in FFI but no corresponding `foreign import` - UnusedFFIImplementations { unused: Vec }, /// CommonJS exports mixed with ES module syntax UnsupportedFFICommonJSExports { exports: Vec }, /// CommonJS imports (require) mixed with ES module syntax @@ -339,9 +337,5 @@ pub fn validate_foreign_module( errors.push(FfiError::MissingFFIImplementations { missing }); } - if !unused.is_empty() { - errors.push(FfiError::UnusedFFIImplementations { unused }); - } - errors } diff --git a/src/main.rs b/src/main.rs index 6696b796..433ed590 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,29 +74,32 @@ fn main() { log::debug!("Failed to save build cache: {e}"); } - let mut error_count = 0; + let mut error_messages: Vec = Vec::new(); for err in &result.build_errors { - eprintln!("[error] {err}"); - error_count += 1; + error_messages.push(format!("{err}")); } - for module in &result.modules { + let total = result.modules.len(); + for (i, module) in result.modules.iter().enumerate() { if module.type_errors.is_empty() { - println!("[ok] {}", module.module_name); + println!("[{}/{}] {}", i + 1, total, module.module_name); } else { for err in &module.type_errors { - eprintln!("[error] {}: {err}", module.module_name); - error_count += 1; + error_messages.push(format!("{}: {err}", module.module_name)); } } } - if error_count > 0 { + if !error_messages.is_empty() { + let error_count = error_messages.len(); eprintln!( - "\nCompilation failed with {error_count} error{}.", + "\nCompilation failed with {error_count} error{}:\n", if error_count == 1 { "" } else { "s" } ); + for msg in &error_messages { + eprintln!(" {msg}"); + } std::process::exit(1); } else { println!( diff --git a/tests/build.rs b/tests/build.rs index e38e8e3c..072de60b 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -535,7 +535,6 @@ fn matches_expected_error( "UnsupportedTypeInKind" => has("UnsupportedTypeInKind"), "CannotDeriveInvalidConstructorArg" => has("CannotDeriveInvalidConstructorArg"), "MissingFFIImplementations" => has("MissingFFIImplementations"), - "UnusedFFIImplementations" => has("UnusedFFIImplementations"), "UnsupportedFFICommonJSExports" => has("UnsupportedFFICommonJSExports"), "UnsupportedFFICommonJSImports" => has("UnsupportedFFICommonJSImports"), "DeprecatedFFICommonJSModule" => has("DeprecatedFFICommonJSModule"), From 051643a6729b5deeaf220aea6176be7129379c6c Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Fri, 6 Mar 2026 20:36:26 +0100 Subject: [PATCH 05/14] faster caching --- Cargo.toml | 1 + src/build/cache.rs | 67 ++++-- src/build/portable.rs | 497 +++++++++++++++++++----------------------- src/interner.rs | 7 + src/main.rs | 4 + 5 files changed, 291 insertions(+), 285 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 34a7f3c7..de22fd9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ tokio = { version = "1", features = ["full"] } serde = { version = "1", features = ["derive"] } serde_json = "1" bincode = "1" +zstd = "0.13" [build-dependencies] lalrpop = "0.22" diff --git a/src/build/cache.rs b/src/build/cache.rs index 777e2549..342ece05 100644 --- a/src/build/cache.rs +++ b/src/build/cache.rs @@ -7,10 +7,13 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::hash::{Hash, Hasher}; use std::io; use std::path::Path; +use std::sync::Arc; + +use rayon::prelude::*; use crate::typechecker::registry::ModuleExports; -use super::portable::{PModuleExports, PortableCacheFile, PortableCachedModule}; +use super::portable::{PModuleExports, PortableCacheFile, PortableCachedModule, StringTableBuilder, StringTableReader}; // ===== Module Cache ===== @@ -27,6 +30,8 @@ pub struct ModuleCache { entries: HashMap, /// Reverse dependency graph: module → modules that import it dependents: HashMap>, + /// Whether the cache has been modified since last save/load. + dirty: bool, } impl ModuleCache { @@ -83,6 +88,7 @@ impl ModuleCache { exports, imports, }); + self.dirty = true; } /// Build the reverse dependency graph from cached import data. @@ -119,40 +125,66 @@ impl ModuleCache { /// Remove modules that are no longer in the source set. pub fn retain_modules(&mut self, module_names: &HashSet) { + let before = self.entries.len(); self.entries.retain(|k, _| module_names.contains(k)); + if self.entries.len() != before { + self.dirty = true; + } + } + + /// Returns true if the cache has been modified since load. + pub fn is_dirty(&self) -> bool { + self.dirty } - /// Save cache to disk using bincode serialization. + /// Save cache to disk using bincode serialization with string table. pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + if !self.dirty { + log::debug!("Cache unchanged, skipping save"); + return Ok(()); + } + let mut st = StringTableBuilder::new(); + + let modules = self.entries.iter().map(|(name, cached)| { + (name.clone(), PortableCachedModule { + content_hash: cached.content_hash, + exports: PModuleExports::from_exports(&cached.exports, &mut st), + imports: cached.imports.clone(), + }) + }).collect(); + let portable = PortableCacheFile { - modules: self.entries.iter().map(|(name, cached)| { - (name.clone(), PortableCachedModule { - content_hash: cached.content_hash, - exports: PModuleExports::from(&cached.exports), - imports: cached.imports.clone(), - }) - }).collect(), + string_table: st.into_table(), + modules, }; - let encoded = bincode::serialize(&portable) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode serialize: {e}")))?; - if let Some(parent) = path.parent() { std::fs::create_dir_all(parent)?; } - std::fs::write(path, encoded) + let file = std::fs::File::create(path)?; + let mut encoder = zstd::Encoder::new(file, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd encoder: {e}")))?; + bincode::serialize_into(&mut encoder, &portable) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode serialize: {e}")))?; + encoder.finish() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd finish: {e}")))?; + Ok(()) } /// Load cache from disk. pub fn load_from_disk(path: &Path) -> io::Result { - let data = std::fs::read(path)?; - let portable: PortableCacheFile = bincode::deserialize(&data) + let file = std::fs::File::open(path)?; + let decoder = io::BufReader::new(zstd::Decoder::new(file) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd decoder: {e}")))?); + let portable: PortableCacheFile = bincode::deserialize_from(decoder) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode deserialize: {e}")))?; - let entries = portable.modules.into_iter().map(|(name, cached)| { + let st = Arc::new(StringTableReader::new(portable.string_table)); + + let entries: HashMap = portable.modules.into_par_iter().map(|(name, cached)| { (name, CachedModule { content_hash: cached.content_hash, - exports: ModuleExports::from(cached.exports), + exports: cached.exports.to_exports(&st), imports: cached.imports, }) }).collect(); @@ -160,6 +192,7 @@ impl ModuleCache { let mut cache = ModuleCache { entries, dependents: HashMap::new(), + dirty: false, }; cache.build_reverse_deps(); Ok(cache) diff --git a/src/build/portable.rs b/src/build/portable.rs index 39ac9fd0..a19e1617 100644 --- a/src/build/portable.rs +++ b/src/build/portable.rs @@ -1,7 +1,7 @@ //! Portable (serializable) representations of typechecker types. //! -//! These mirror the core types but use `String` instead of `Symbol` -//! (which is process-local). Used for on-disk caching of ModuleExports. +//! Uses a deduplicated string table so each symbol is stored once. +//! Symbol references are u32 indices into the string table. use std::collections::{HashMap, HashSet}; @@ -12,39 +12,75 @@ use crate::interner; use crate::typechecker::registry::ModuleExports; use crate::typechecker::types::{Role, Scheme, TyVarId, Type}; -// ===== Helper conversions ===== +// ===== String Table ===== -fn sym_to_s(s: interner::Symbol) -> String { - interner::resolve(s).unwrap_or_default().to_string() +/// Builds a deduplicated string table during serialization. +/// Each unique Symbol is resolved exactly once. +pub struct StringTableBuilder { + strings: Vec, + sym_to_idx: HashMap, } -fn s_to_sym(s: &str) -> interner::Symbol { - interner::intern(s) +impl StringTableBuilder { + pub fn new() -> Self { + Self { + strings: Vec::new(), + sym_to_idx: HashMap::new(), + } + } + + pub fn add(&mut self, sym: interner::Symbol) -> u32 { + if let Some(&idx) = self.sym_to_idx.get(&sym) { + return idx; + } + let s = interner::resolve(sym).unwrap_or_default(); + let idx = self.strings.len() as u32; + self.strings.push(s); + self.sym_to_idx.insert(sym, idx); + idx + } + + pub fn into_table(self) -> Vec { + self.strings + } +} + +/// Reads from a string table during deserialization. +/// All strings are interned in one batch. +pub struct StringTableReader { + symbols: Vec, +} + +impl StringTableReader { + pub fn new(table: Vec) -> Self { + let symbols = interner::intern_batch(&table); + Self { symbols } + } + + pub fn sym(&self, idx: u32) -> interner::Symbol { + self.symbols[idx as usize] + } } // ===== Portable QualifiedIdent ===== #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)] pub struct PQI { - pub module: Option, - pub name: String, + pub module: Option, + pub name: u32, } -impl From<&QualifiedIdent> for PQI { - fn from(qi: &QualifiedIdent) -> Self { - PQI { - module: qi.module.map(sym_to_s), - name: sym_to_s(qi.name), - } +fn conv_qi(qi: &QualifiedIdent, st: &mut StringTableBuilder) -> PQI { + PQI { + module: qi.module.map(|s| st.add(s)), + name: st.add(qi.name), } } -impl From<&PQI> for QualifiedIdent { - fn from(p: &PQI) -> Self { - QualifiedIdent { - module: p.module.as_ref().map(|s| s_to_sym(s)), - name: s_to_sym(&p.name), - } +fn rest_qi(p: &PQI, st: &StringTableReader) -> QualifiedIdent { + QualifiedIdent { + module: p.module.map(|i| st.sym(i)), + name: st.sym(p.name), } } @@ -53,57 +89,65 @@ impl From<&PQI> for QualifiedIdent { #[derive(Serialize, Deserialize, Clone, Debug)] pub enum PType { Unif(u32), - Var(String), + Var(u32), Con(PQI), App(Box, Box), Fun(Box, Box), - Forall(Vec<(String, bool)>, Box), - Record(Vec<(String, PType)>, Option>), - TypeString(String), + Forall(Vec<(u32, bool)>, Box), + Record(Vec<(u32, PType)>, Option>), + TypeString(u32), TypeInt(i64), } -impl From<&Type> for PType { - fn from(t: &Type) -> Self { - match t { - Type::Unif(id) => PType::Unif(id.0), - Type::Var(s) => PType::Var(sym_to_s(*s)), - Type::Con(qi) => PType::Con(qi.into()), - Type::App(f, a) => PType::App(Box::new(f.as_ref().into()), Box::new(a.as_ref().into())), - Type::Fun(a, b) => PType::Fun(Box::new(a.as_ref().into()), Box::new(b.as_ref().into())), - Type::Forall(vars, body) => PType::Forall( - vars.iter().map(|(s, v)| (sym_to_s(*s), *v)).collect(), - Box::new(body.as_ref().into()), - ), - Type::Record(fields, tail) => PType::Record( - fields.iter().map(|(s, t)| (sym_to_s(*s), t.into())).collect(), - tail.as_ref().map(|t| Box::new(t.as_ref().into())), - ), - Type::TypeString(s) => PType::TypeString(sym_to_s(*s)), - Type::TypeInt(i) => PType::TypeInt(*i), - } +fn conv_type(t: &Type, st: &mut StringTableBuilder) -> PType { + match t { + Type::Unif(id) => PType::Unif(id.0), + Type::Var(s) => PType::Var(st.add(*s)), + Type::Con(qi) => PType::Con(conv_qi(qi, st)), + Type::App(f, a) => PType::App( + Box::new(conv_type(f, st)), + Box::new(conv_type(a, st)), + ), + Type::Fun(a, b) => PType::Fun( + Box::new(conv_type(a, st)), + Box::new(conv_type(b, st)), + ), + Type::Forall(vars, body) => PType::Forall( + vars.iter().map(|(s, v)| (st.add(*s), *v)).collect(), + Box::new(conv_type(body, st)), + ), + Type::Record(fields, tail) => PType::Record( + fields.iter().map(|(s, t)| (st.add(*s), conv_type(t, st))).collect(), + tail.as_ref().map(|t| Box::new(conv_type(t, st))), + ), + Type::TypeString(s) => PType::TypeString(st.add(*s)), + Type::TypeInt(i) => PType::TypeInt(*i), } } -impl From<&PType> for Type { - fn from(p: &PType) -> Self { - match p { - PType::Unif(id) => Type::Unif(TyVarId(*id)), - PType::Var(s) => Type::Var(s_to_sym(s)), - PType::Con(qi) => Type::Con(qi.into()), - PType::App(f, a) => Type::App(Box::new(f.as_ref().into()), Box::new(a.as_ref().into())), - PType::Fun(a, b) => Type::Fun(Box::new(a.as_ref().into()), Box::new(b.as_ref().into())), - PType::Forall(vars, body) => Type::Forall( - vars.iter().map(|(s, v)| (s_to_sym(s), *v)).collect(), - Box::new(body.as_ref().into()), - ), - PType::Record(fields, tail) => Type::Record( - fields.iter().map(|(s, t)| (s_to_sym(s), t.into())).collect(), - tail.as_ref().map(|t| Box::new(t.as_ref().into())), - ), - PType::TypeString(s) => Type::TypeString(s_to_sym(s)), - PType::TypeInt(i) => Type::TypeInt(*i), - } +fn rest_type(p: &PType, st: &StringTableReader) -> Type { + match p { + PType::Unif(id) => Type::Unif(TyVarId(*id)), + PType::Var(s) => Type::Var(st.sym(*s)), + PType::Con(qi) => Type::Con(rest_qi(qi, st)), + PType::App(f, a) => Type::App( + Box::new(rest_type(f, st)), + Box::new(rest_type(a, st)), + ), + PType::Fun(a, b) => Type::Fun( + Box::new(rest_type(a, st)), + Box::new(rest_type(b, st)), + ), + PType::Forall(vars, body) => Type::Forall( + vars.iter().map(|(s, v)| (st.sym(*s), *v)).collect(), + Box::new(rest_type(body, st)), + ), + PType::Record(fields, tail) => Type::Record( + fields.iter().map(|(s, t)| (st.sym(*s), rest_type(t, st))).collect(), + tail.as_ref().map(|t| Box::new(rest_type(t, st))), + ), + PType::TypeString(s) => Type::TypeString(st.sym(*s)), + PType::TypeInt(i) => Type::TypeInt(*i), } } @@ -111,25 +155,21 @@ impl From<&PType> for Type { #[derive(Serialize, Deserialize, Clone, Debug)] pub struct PScheme { - pub forall_vars: Vec, + pub forall_vars: Vec, pub ty: PType, } -impl From<&Scheme> for PScheme { - fn from(s: &Scheme) -> Self { - PScheme { - forall_vars: s.forall_vars.iter().map(|v| sym_to_s(*v)).collect(), - ty: (&s.ty).into(), - } +fn conv_scheme(s: &Scheme, st: &mut StringTableBuilder) -> PScheme { + PScheme { + forall_vars: s.forall_vars.iter().map(|v| st.add(*v)).collect(), + ty: conv_type(&s.ty, st), } } -impl From<&PScheme> for Scheme { - fn from(p: &PScheme) -> Self { - Scheme { - forall_vars: p.forall_vars.iter().map(|s| s_to_sym(s)).collect(), - ty: (&p.ty).into(), - } +fn rest_scheme(p: &PScheme, st: &StringTableReader) -> Scheme { + Scheme { + forall_vars: p.forall_vars.iter().map(|v| st.sym(*v)).collect(), + ty: rest_type(&p.ty, st), } } @@ -142,23 +182,19 @@ pub enum PAssociativity { None, } -impl From<&Associativity> for PAssociativity { - fn from(a: &Associativity) -> Self { - match a { - Associativity::Left => PAssociativity::Left, - Associativity::Right => PAssociativity::Right, - Associativity::None => PAssociativity::None, - } +fn conv_assoc(a: &Associativity) -> PAssociativity { + match a { + Associativity::Left => PAssociativity::Left, + Associativity::Right => PAssociativity::Right, + Associativity::None => PAssociativity::None, } } -impl From<&PAssociativity> for Associativity { - fn from(p: &PAssociativity) -> Self { - match p { - PAssociativity::Left => Associativity::Left, - PAssociativity::Right => Associativity::Right, - PAssociativity::None => Associativity::None, - } +fn rest_assoc(p: &PAssociativity) -> Associativity { + match p { + PAssociativity::Left => Associativity::Left, + PAssociativity::Right => Associativity::Right, + PAssociativity::None => Associativity::None, } } @@ -171,100 +207,22 @@ pub enum PRole { Nominal, } -impl From<&Role> for PRole { - fn from(r: &Role) -> Self { - match r { - Role::Phantom => PRole::Phantom, - Role::Representational => PRole::Representational, - Role::Nominal => PRole::Nominal, - } +fn conv_role(r: &Role) -> PRole { + match r { + Role::Phantom => PRole::Phantom, + Role::Representational => PRole::Representational, + Role::Nominal => PRole::Nominal, } } -impl From<&PRole> for Role { - fn from(p: &PRole) -> Self { - match p { - PRole::Phantom => Role::Phantom, - PRole::Representational => Role::Representational, - PRole::Nominal => Role::Nominal, - } +fn rest_role(p: &PRole) -> Role { + match p { + PRole::Phantom => Role::Phantom, + PRole::Representational => Role::Representational, + PRole::Nominal => Role::Nominal, } } -// ===== Collection conversion helpers ===== - -fn map_qi_scheme(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (k.into(), v.into())).collect() -} - -fn unmap_qi_scheme(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (k.into(), v.into())).collect() -} - -fn map_qi_qi(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (k.into(), v.into())).collect() -} - -fn unmap_qi_qi(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (k.into(), v.into())).collect() -} - -fn map_qi_vec_qi(m: &HashMap>) -> HashMap> { - m.iter().map(|(k, v)| (k.into(), v.iter().map(|qi| qi.into()).collect())).collect() -} - -fn unmap_qi_vec_qi(m: &HashMap>) -> HashMap> { - m.iter().map(|(k, v)| (k.into(), v.iter().map(|qi| qi.into()).collect())).collect() -} - -fn map_set_qi(s: &HashSet) -> HashSet { - s.iter().map(|qi| qi.into()).collect() -} - -fn unmap_set_qi(s: &HashSet) -> HashSet { - s.iter().map(|qi| qi.into()).collect() -} - -fn map_qi_usize(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (k.into(), *v)).collect() -} - -fn unmap_qi_usize(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (k.into(), *v)).collect() -} - -fn map_sym_sym(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (sym_to_s(*k), sym_to_s(*v))).collect() -} - -fn unmap_sym_sym(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (s_to_sym(k), s_to_sym(v))).collect() -} - -fn map_set_sym(s: &HashSet) -> HashSet { - s.iter().map(|sym| sym_to_s(*sym)).collect() -} - -fn unmap_set_sym(s: &HashSet) -> HashSet { - s.iter().map(|s| s_to_sym(s)).collect() -} - -fn map_sym_type(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (sym_to_s(*k), v.into())).collect() -} - -fn unmap_sym_type(m: &HashMap) -> HashMap { - m.iter().map(|(k, v)| (s_to_sym(k), v.into())).collect() -} - -fn map_sym_roles(m: &HashMap>) -> HashMap> { - m.iter().map(|(k, v)| (sym_to_s(*k), v.iter().map(|r| r.into()).collect())).collect() -} - -fn unmap_sym_roles(m: &HashMap>) -> HashMap> { - m.iter().map(|(k, v)| (s_to_sym(k), v.iter().map(|r| r.into()).collect())).collect() -} - // ===== Portable ModuleExports ===== #[derive(Serialize, Deserialize, Clone, Debug)] @@ -282,136 +240,138 @@ pub struct PModuleExports { pub constrained_class_methods: HashSet, pub type_aliases: HashMap, PType)>, pub class_param_counts: HashMap, - pub value_origins: HashMap, - pub type_origins: HashMap, - pub class_origins: HashMap, - pub operator_class_targets: HashMap, - pub class_fundeps: HashMap, Vec<(Vec, Vec)>)>, + pub value_origins: HashMap, + pub type_origins: HashMap, + pub class_origins: HashMap, + pub operator_class_targets: HashMap, + pub class_fundeps: HashMap, Vec<(Vec, Vec)>)>, pub type_con_arities: HashMap, - pub type_roles: HashMap>, - pub newtype_names: HashSet, + pub type_roles: HashMap>, + pub newtype_names: HashSet, pub signature_constraints: HashMap)>>, - pub type_kinds: HashMap, - pub class_type_kinds: HashMap, - pub partial_dischargers: HashSet, - pub self_referential_aliases: HashSet, - pub class_superclasses: HashMap, Vec<(PQI, Vec)>)>, - pub method_own_constraints: HashMap>, + pub type_kinds: HashMap, + pub class_type_kinds: HashMap, + pub partial_dischargers: HashSet, + pub self_referential_aliases: HashSet, + pub class_superclasses: HashMap, Vec<(PQI, Vec)>)>, + pub method_own_constraints: HashMap>, } -impl From<&ModuleExports> for PModuleExports { - fn from(e: &ModuleExports) -> Self { +impl PModuleExports { + pub fn from_exports(e: &ModuleExports, st: &mut StringTableBuilder) -> Self { PModuleExports { - values: map_qi_scheme(&e.values), + values: e.values.iter().map(|(k, v)| (conv_qi(k, st), conv_scheme(v, st))).collect(), class_methods: e.class_methods.iter().map(|(k, (c, vs))| { - (k.into(), (c.into(), vs.iter().map(|v| v.into()).collect())) + (conv_qi(k, st), (conv_qi(c, st), vs.iter().map(|v| conv_qi(v, st)).collect())) + }).collect(), + data_constructors: e.data_constructors.iter().map(|(k, v)| { + (conv_qi(k, st), v.iter().map(|qi| conv_qi(qi, st)).collect()) }).collect(), - data_constructors: map_qi_vec_qi(&e.data_constructors), ctor_details: e.ctor_details.iter().map(|(k, (p, vs, ts))| { - (k.into(), (p.into(), vs.iter().map(|v| v.into()).collect(), ts.iter().map(|t| t.into()).collect())) + (conv_qi(k, st), (conv_qi(p, st), vs.iter().map(|v| conv_qi(v, st)).collect(), ts.iter().map(|t| conv_type(t, st)).collect())) }).collect(), instances: e.instances.iter().map(|(k, v)| { - (k.into(), v.iter().map(|(ts, cs)| { - (ts.iter().map(|t| t.into()).collect(), cs.iter().map(|(c, ts2)| { - (c.into(), ts2.iter().map(|t| t.into()).collect()) + (conv_qi(k, st), v.iter().map(|(ts, cs)| { + (ts.iter().map(|t| conv_type(t, st)).collect(), cs.iter().map(|(c, ts2)| { + (conv_qi(c, st), ts2.iter().map(|t| conv_type(t, st)).collect()) }).collect()) }).collect()) }).collect(), - type_operators: map_qi_qi(&e.type_operators), - value_fixities: e.value_fixities.iter().map(|(k, (a, p))| (k.into(), (a.into(), *p))).collect(), - type_fixities: e.type_fixities.iter().map(|(k, (a, p))| (k.into(), (a.into(), *p))).collect(), - function_op_aliases: map_set_qi(&e.function_op_aliases), - value_operator_targets: map_qi_qi(&e.value_operator_targets), - constrained_class_methods: map_set_qi(&e.constrained_class_methods), + type_operators: e.type_operators.iter().map(|(k, v)| (conv_qi(k, st), conv_qi(v, st))).collect(), + value_fixities: e.value_fixities.iter().map(|(k, (a, p))| (conv_qi(k, st), (conv_assoc(a), *p))).collect(), + type_fixities: e.type_fixities.iter().map(|(k, (a, p))| (conv_qi(k, st), (conv_assoc(a), *p))).collect(), + function_op_aliases: e.function_op_aliases.iter().map(|qi| conv_qi(qi, st)).collect(), + value_operator_targets: e.value_operator_targets.iter().map(|(k, v)| (conv_qi(k, st), conv_qi(v, st))).collect(), + constrained_class_methods: e.constrained_class_methods.iter().map(|qi| conv_qi(qi, st)).collect(), type_aliases: e.type_aliases.iter().map(|(k, (ps, ty))| { - (k.into(), (ps.iter().map(|p| p.into()).collect(), ty.into())) + (conv_qi(k, st), (ps.iter().map(|p| conv_qi(p, st)).collect(), conv_type(ty, st))) }).collect(), - class_param_counts: map_qi_usize(&e.class_param_counts), - value_origins: map_sym_sym(&e.value_origins), - type_origins: map_sym_sym(&e.type_origins), - class_origins: map_sym_sym(&e.class_origins), - operator_class_targets: map_sym_sym(&e.operator_class_targets), + class_param_counts: e.class_param_counts.iter().map(|(k, v)| (conv_qi(k, st), *v)).collect(), + value_origins: e.value_origins.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + type_origins: e.type_origins.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + class_origins: e.class_origins.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + operator_class_targets: e.operator_class_targets.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), class_fundeps: e.class_fundeps.iter().map(|(k, (vs, fs))| { - (sym_to_s(*k), (vs.iter().map(|v| sym_to_s(*v)).collect(), fs.clone())) + (st.add(*k), (vs.iter().map(|v| st.add(*v)).collect(), fs.clone())) }).collect(), - type_con_arities: map_qi_usize(&e.type_con_arities), - type_roles: map_sym_roles(&e.type_roles), - newtype_names: map_set_sym(&e.newtype_names), + type_con_arities: e.type_con_arities.iter().map(|(k, v)| (conv_qi(k, st), *v)).collect(), + type_roles: e.type_roles.iter().map(|(k, v)| (st.add(*k), v.iter().map(conv_role).collect())).collect(), + newtype_names: e.newtype_names.iter().map(|s| st.add(*s)).collect(), signature_constraints: e.signature_constraints.iter().map(|(k, v)| { - (k.into(), v.iter().map(|(c, ts)| { - (c.into(), ts.iter().map(|t| t.into()).collect()) + (conv_qi(k, st), v.iter().map(|(c, ts)| { + (conv_qi(c, st), ts.iter().map(|t| conv_type(t, st)).collect()) }).collect()) }).collect(), - type_kinds: map_sym_type(&e.type_kinds), - class_type_kinds: map_sym_type(&e.class_type_kinds), - partial_dischargers: map_set_sym(&e.partial_dischargers), - self_referential_aliases: map_set_sym(&e.self_referential_aliases), + type_kinds: e.type_kinds.iter().map(|(k, v)| (st.add(*k), conv_type(v, st))).collect(), + class_type_kinds: e.class_type_kinds.iter().map(|(k, v)| (st.add(*k), conv_type(v, st))).collect(), + partial_dischargers: e.partial_dischargers.iter().map(|s| st.add(*s)).collect(), + self_referential_aliases: e.self_referential_aliases.iter().map(|s| st.add(*s)).collect(), class_superclasses: e.class_superclasses.iter().map(|(k, (vs, cs))| { - (k.into(), (vs.iter().map(|v| sym_to_s(*v)).collect(), cs.iter().map(|(c, ts)| { - (c.into(), ts.iter().map(|t| t.into()).collect()) + (conv_qi(k, st), (vs.iter().map(|v| st.add(*v)).collect(), cs.iter().map(|(c, ts)| { + (conv_qi(c, st), ts.iter().map(|t| conv_type(t, st)).collect()) }).collect())) }).collect(), method_own_constraints: e.method_own_constraints.iter().map(|(k, v)| { - (k.into(), v.iter().map(|s| sym_to_s(*s)).collect()) + (conv_qi(k, st), v.iter().map(|s| st.add(*s)).collect()) }).collect(), } } -} -impl From for ModuleExports { - fn from(p: PModuleExports) -> Self { + pub fn to_exports(&self, st: &StringTableReader) -> ModuleExports { ModuleExports { - values: unmap_qi_scheme(&p.values), - class_methods: p.class_methods.iter().map(|(k, (c, vs))| { - (k.into(), (c.into(), vs.iter().map(|v| v.into()).collect())) + values: self.values.iter().map(|(k, v)| (rest_qi(k, st), rest_scheme(v, st))).collect(), + class_methods: self.class_methods.iter().map(|(k, (c, vs))| { + (rest_qi(k, st), (rest_qi(c, st), vs.iter().map(|v| rest_qi(v, st)).collect())) + }).collect(), + data_constructors: self.data_constructors.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|qi| rest_qi(qi, st)).collect()) }).collect(), - data_constructors: unmap_qi_vec_qi(&p.data_constructors), - ctor_details: p.ctor_details.iter().map(|(k, (par, vs, ts))| { - (k.into(), (par.into(), vs.iter().map(|v| v.into()).collect(), ts.iter().map(|t| t.into()).collect())) + ctor_details: self.ctor_details.iter().map(|(k, (p, vs, ts))| { + (rest_qi(k, st), (rest_qi(p, st), vs.iter().map(|v| rest_qi(v, st)).collect(), ts.iter().map(|t| rest_type(t, st)).collect())) }).collect(), - instances: p.instances.iter().map(|(k, v)| { - (k.into(), v.iter().map(|(ts, cs)| { - (ts.iter().map(|t| t.into()).collect(), cs.iter().map(|(c, ts2)| { - (c.into(), ts2.iter().map(|t| t.into()).collect()) + instances: self.instances.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|(ts, cs)| { + (ts.iter().map(|t| rest_type(t, st)).collect(), cs.iter().map(|(c, ts2)| { + (rest_qi(c, st), ts2.iter().map(|t| rest_type(t, st)).collect()) }).collect()) }).collect()) }).collect(), - type_operators: unmap_qi_qi(&p.type_operators), - value_fixities: p.value_fixities.iter().map(|(k, (a, pr))| (k.into(), (a.into(), *pr))).collect(), - type_fixities: p.type_fixities.iter().map(|(k, (a, pr))| (k.into(), (a.into(), *pr))).collect(), - function_op_aliases: unmap_set_qi(&p.function_op_aliases), - value_operator_targets: unmap_qi_qi(&p.value_operator_targets), - constrained_class_methods: unmap_set_qi(&p.constrained_class_methods), - type_aliases: p.type_aliases.iter().map(|(k, (ps, ty))| { - (k.into(), (ps.iter().map(|p| p.into()).collect(), ty.into())) + type_operators: self.type_operators.iter().map(|(k, v)| (rest_qi(k, st), rest_qi(v, st))).collect(), + value_fixities: self.value_fixities.iter().map(|(k, (a, p))| (rest_qi(k, st), (rest_assoc(a), *p))).collect(), + type_fixities: self.type_fixities.iter().map(|(k, (a, p))| (rest_qi(k, st), (rest_assoc(a), *p))).collect(), + function_op_aliases: self.function_op_aliases.iter().map(|qi| rest_qi(qi, st)).collect(), + value_operator_targets: self.value_operator_targets.iter().map(|(k, v)| (rest_qi(k, st), rest_qi(v, st))).collect(), + constrained_class_methods: self.constrained_class_methods.iter().map(|qi| rest_qi(qi, st)).collect(), + type_aliases: self.type_aliases.iter().map(|(k, (ps, ty))| { + (rest_qi(k, st), (ps.iter().map(|p| rest_qi(p, st)).collect(), rest_type(ty, st))) }).collect(), - class_param_counts: unmap_qi_usize(&p.class_param_counts), - value_origins: unmap_sym_sym(&p.value_origins), - type_origins: unmap_sym_sym(&p.type_origins), - class_origins: unmap_sym_sym(&p.class_origins), - operator_class_targets: unmap_sym_sym(&p.operator_class_targets), - class_fundeps: p.class_fundeps.iter().map(|(k, (vs, fs))| { - (s_to_sym(k), (vs.iter().map(|v| s_to_sym(v)).collect(), fs.clone())) + class_param_counts: self.class_param_counts.iter().map(|(k, v)| (rest_qi(k, st), *v)).collect(), + value_origins: self.value_origins.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + type_origins: self.type_origins.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + class_origins: self.class_origins.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + operator_class_targets: self.operator_class_targets.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + class_fundeps: self.class_fundeps.iter().map(|(k, (vs, fs))| { + (st.sym(*k), (vs.iter().map(|v| st.sym(*v)).collect(), fs.clone())) }).collect(), - type_con_arities: unmap_qi_usize(&p.type_con_arities), - type_roles: unmap_sym_roles(&p.type_roles), - newtype_names: unmap_set_sym(&p.newtype_names), - signature_constraints: p.signature_constraints.iter().map(|(k, v)| { - (k.into(), v.iter().map(|(c, ts)| { - (c.into(), ts.iter().map(|t| t.into()).collect()) + type_con_arities: self.type_con_arities.iter().map(|(k, v)| (rest_qi(k, st), *v)).collect(), + type_roles: self.type_roles.iter().map(|(k, v)| (st.sym(*k), v.iter().map(rest_role).collect())).collect(), + newtype_names: self.newtype_names.iter().map(|s| st.sym(*s)).collect(), + signature_constraints: self.signature_constraints.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|(c, ts)| { + (rest_qi(c, st), ts.iter().map(|t| rest_type(t, st)).collect()) }).collect()) }).collect(), - type_kinds: unmap_sym_type(&p.type_kinds), - class_type_kinds: unmap_sym_type(&p.class_type_kinds), - partial_dischargers: unmap_set_sym(&p.partial_dischargers), - self_referential_aliases: unmap_set_sym(&p.self_referential_aliases), - class_superclasses: p.class_superclasses.iter().map(|(k, (vs, cs))| { - (k.into(), (vs.iter().map(|v| s_to_sym(v)).collect(), cs.iter().map(|(c, ts)| { - (c.into(), ts.iter().map(|t| t.into()).collect()) + type_kinds: self.type_kinds.iter().map(|(k, v)| (st.sym(*k), rest_type(v, st))).collect(), + class_type_kinds: self.class_type_kinds.iter().map(|(k, v)| (st.sym(*k), rest_type(v, st))).collect(), + partial_dischargers: self.partial_dischargers.iter().map(|s| st.sym(*s)).collect(), + self_referential_aliases: self.self_referential_aliases.iter().map(|s| st.sym(*s)).collect(), + class_superclasses: self.class_superclasses.iter().map(|(k, (vs, cs))| { + (rest_qi(k, st), (vs.iter().map(|v| st.sym(*v)).collect(), cs.iter().map(|(c, ts)| { + (rest_qi(c, st), ts.iter().map(|t| rest_type(t, st)).collect()) }).collect())) }).collect(), - method_own_constraints: p.method_own_constraints.iter().map(|(k, v)| { - (k.into(), v.iter().map(|s| s_to_sym(s)).collect()) + method_own_constraints: self.method_own_constraints.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|s| st.sym(*s)).collect()) }).collect(), } } @@ -421,6 +381,7 @@ impl From for ModuleExports { #[derive(Serialize, Deserialize, Clone, Debug)] pub struct PortableCacheFile { + pub string_table: Vec, pub modules: HashMap, } diff --git a/src/interner.rs b/src/interner.rs index 68188ac7..e932c92d 100644 --- a/src/interner.rs +++ b/src/interner.rs @@ -80,6 +80,13 @@ pub fn symbol_eq(sym: Symbol, s: &str) -> bool { with_interner(|interner| interner.resolve(sym).map_or(false, |r| r == s)) } +/// Intern a batch of strings in a single lock acquisition. +pub fn intern_batch(strings: &[String]) -> Vec { + with_interner(|interner| { + strings.iter().map(|s| interner.get_or_intern(s.as_str())).collect() + }) +} + /// Clear the interner (useful for testing) #[cfg(test)] pub fn clear() { diff --git a/src/main.rs b/src/main.rs index 433ed590..16a46568 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,10 +58,12 @@ fn main() { let output_path = PathBuf::from(&output); let cache_path = output_path.join(".pfc-cache").join("cache.bin"); + let cache_load_start = std::time::Instant::now(); let mut cache = cache_path .parent() .and_then(|_| build::cache::ModuleCache::load_from_disk(&cache_path).ok()) .unwrap_or_default(); + log::debug!("Cache load: {:.2?}", cache_load_start.elapsed()); let glob_refs: Vec<&str> = globs.iter().map(|s| s.as_str()).collect(); let result = build::build_cached(&glob_refs, Some(output_path.clone()), &mut cache); @@ -70,9 +72,11 @@ fn main() { if let Some(parent) = cache_path.parent() { std::fs::create_dir_all(parent).ok(); } + let cache_save_start = std::time::Instant::now(); if let Err(e) = cache.save_to_disk(&cache_path) { log::debug!("Failed to save build cache: {e}"); } + log::debug!("Cache save: {:.2?}", cache_save_start.elapsed()); let mut error_messages: Vec = Vec::new(); From 63113d6dce03c92c3989899dd2ea934a57bb84b0 Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Sat, 7 Mar 2026 21:10:02 +0100 Subject: [PATCH 06/14] notify skipping --- src/build/cache.rs | 312 +++++++++++++++++++++++++++++++++--------- src/build/mod.rs | 51 +++++-- src/build/portable.rs | 14 -- src/main.rs | 23 +--- 4 files changed, 289 insertions(+), 111 deletions(-) diff --git a/src/build/cache.rs b/src/build/cache.rs index 342ece05..7889d939 100644 --- a/src/build/cache.rs +++ b/src/build/cache.rs @@ -1,37 +1,115 @@ //! Module cache for incremental builds. //! -//! Tracks content hashes and cached ModuleExports to skip typechecking -//! unchanged modules. Supports on-disk persistence via bincode serialization. +//! Uses a lightweight index file (hashes + imports) loaded eagerly, +//! and per-module export files loaded lazily on demand. +//! Exports hash comparison avoids rebuilding dependents when only +//! function bodies change (not signatures). use std::collections::{HashMap, HashSet, VecDeque}; use std::hash::{Hash, Hasher}; use std::io; -use std::path::Path; -use std::sync::Arc; +use std::path::{Path, PathBuf}; -use rayon::prelude::*; +use serde::{Deserialize, Serialize}; use crate::typechecker::registry::ModuleExports; -use super::portable::{PModuleExports, PortableCacheFile, PortableCachedModule, StringTableBuilder, StringTableReader}; +use super::portable::{PModuleExports, StringTableBuilder, StringTableReader}; -// ===== Module Cache ===== +// ===== Cache Index (loaded eagerly, small) ===== -/// Cached state for a single module. -struct CachedModule { +#[derive(Serialize, Deserialize, Default)] +struct CacheIndex { + modules: HashMap, +} + +#[derive(Serialize, Deserialize, Clone)] +struct CacheIndexEntry { content_hash: u64, - exports: ModuleExports, + exports_hash: u64, imports: Vec, } +// ===== Per-Module Cache File ===== + +#[derive(Serialize, Deserialize)] +struct ModuleCacheFile { + string_table: Vec, + exports: PModuleExports, +} + +// ===== In-Memory Module State ===== + +enum CachedModule { + /// Only index loaded — exports on disk, not yet read + Indexed { + content_hash: u64, + exports_hash: u64, + imports: Vec, + }, + /// Fully loaded in memory (from disk or from typechecking) + Loaded { + content_hash: u64, + exports_hash: u64, + imports: Vec, + exports: ModuleExports, + dirty: bool, + }, +} + +impl CachedModule { + fn content_hash(&self) -> u64 { + match self { + CachedModule::Indexed { content_hash, .. } => *content_hash, + CachedModule::Loaded { content_hash, .. } => *content_hash, + } + } + + fn exports_hash(&self) -> u64 { + match self { + CachedModule::Indexed { exports_hash, .. } => *exports_hash, + CachedModule::Loaded { exports_hash, .. } => *exports_hash, + } + } + + fn imports(&self) -> &[String] { + match self { + CachedModule::Indexed { imports, .. } => imports, + CachedModule::Loaded { imports, .. } => imports, + } + } + + fn is_dirty(&self) -> bool { + match self { + CachedModule::Indexed { .. } => false, + CachedModule::Loaded { dirty, .. } => *dirty, + } + } +} + +// ===== Public API ===== + /// In-memory cache of typechecked modules for incremental builds. -#[derive(Default)] +/// Index is loaded eagerly; per-module exports are loaded lazily. pub struct ModuleCache { entries: HashMap, /// Reverse dependency graph: module → modules that import it dependents: HashMap>, - /// Whether the cache has been modified since last save/load. - dirty: bool, + /// Directory for per-module cache files + cache_dir: Option, + /// Whether the index needs to be rewritten + index_dirty: bool, +} + +impl Default for ModuleCache { + fn default() -> Self { + Self { + entries: HashMap::new(), + dependents: HashMap::new(), + cache_dir: None, + index_dirty: false, + } + } } impl ModuleCache { @@ -46,6 +124,16 @@ impl ModuleCache { hasher.finish() } + /// Compute a hash of serialized exports for change detection. + pub fn exports_hash(exports: &ModuleExports) -> u64 { + let mut st = StringTableBuilder::new(); + let portable = PModuleExports::from_exports(exports, &mut st); + let bytes = bincode::serialize(&(st.into_table(), &portable)).unwrap_or_default(); + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + bytes.hash(&mut hasher); + hasher.finish() + } + /// Check if a module needs to be rebuilt. /// /// Returns true if: @@ -61,41 +149,90 @@ impl ModuleCache { match self.entries.get(module_name) { None => true, Some(cached) => { - if cached.content_hash != content_hash { + if cached.content_hash() != content_hash { return true; } - // Check if any dependency was rebuilt - cached.imports.iter().any(|dep| rebuilt.contains(dep)) + // Check if any dependency was rebuilt (exports changed) + cached.imports().iter().any(|dep| rebuilt.contains(dep)) } } } - /// Get cached exports for a module (if available). - pub fn get_exports(&self, module_name: &str) -> Option<&ModuleExports> { - self.entries.get(module_name).map(|c| &c.exports) + /// Get cached exports for a module, loading from disk if needed. + pub fn get_exports(&mut self, module_name: &str) -> Option<&ModuleExports> { + // Check if we need to load from disk first + let needs_load = matches!( + self.entries.get(module_name), + Some(CachedModule::Indexed { .. }) + ); + + if needs_load { + if let Some(ref cache_dir) = self.cache_dir { + let module_path = module_file_path(cache_dir, module_name); + if let Ok(exports) = load_module_file(&module_path) { + if let Some(entry) = self.entries.remove(module_name) { + let (content_hash, exports_hash, imports) = match entry { + CachedModule::Indexed { content_hash, exports_hash, imports } => { + (content_hash, exports_hash, imports) + } + _ => unreachable!(), + }; + self.entries.insert(module_name.to_string(), CachedModule::Loaded { + content_hash, + exports_hash, + imports, + exports, + dirty: false, + }); + } + } else { + // File missing/corrupt — remove from cache + self.entries.remove(module_name); + self.index_dirty = true; + return None; + } + } else { + // No cache dir — can't load + return None; + } + } + + match self.entries.get(module_name) { + Some(CachedModule::Loaded { exports, .. }) => Some(exports), + _ => None, + } } /// Update the cache entry for a module after typechecking. + /// Returns true if the module's exports actually changed (different exports_hash). pub fn update( &mut self, module_name: String, content_hash: u64, exports: ModuleExports, imports: Vec, - ) { - self.entries.insert(module_name, CachedModule { + ) -> bool { + let new_exports_hash = Self::exports_hash(&exports); + + let exports_changed = self.entries.get(&module_name) + .map_or(true, |old| old.exports_hash() != new_exports_hash); + + self.entries.insert(module_name, CachedModule::Loaded { content_hash, - exports, + exports_hash: new_exports_hash, imports, + exports, + dirty: true, }); - self.dirty = true; + self.index_dirty = true; + exports_changed } /// Build the reverse dependency graph from cached import data. pub fn build_reverse_deps(&mut self) { self.dependents.clear(); for (module, cached) in &self.entries { - for dep in &cached.imports { + for dep in cached.imports() { self.dependents .entry(dep.clone()) .or_default() @@ -128,73 +265,112 @@ impl ModuleCache { let before = self.entries.len(); self.entries.retain(|k, _| module_names.contains(k)); if self.entries.len() != before { - self.dirty = true; + self.index_dirty = true; } } - /// Returns true if the cache has been modified since load. - pub fn is_dirty(&self) -> bool { - self.dirty - } + // ===== Disk I/O ===== - /// Save cache to disk using bincode serialization with string table. - pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { - if !self.dirty { + /// Save cache to disk: index file + per-module files for dirty modules. + pub fn save_to_disk(&self, cache_dir: &Path) -> io::Result<()> { + if !self.index_dirty && !self.entries.values().any(|m| m.is_dirty()) { log::debug!("Cache unchanged, skipping save"); return Ok(()); } - let mut st = StringTableBuilder::new(); - let modules = self.entries.iter().map(|(name, cached)| { - (name.clone(), PortableCachedModule { - content_hash: cached.content_hash, - exports: PModuleExports::from_exports(&cached.exports, &mut st), - imports: cached.imports.clone(), - }) - }).collect(); + let modules_dir = cache_dir.join("modules"); + std::fs::create_dir_all(&modules_dir)?; + + // Write dirty module files + let mut saved_count = 0; + for (name, cached) in &self.entries { + if let CachedModule::Loaded { exports, dirty: true, .. } = cached { + let module_path = module_file_path(cache_dir, name); + save_module_file(&module_path, exports)?; + saved_count += 1; + } + } - let portable = PortableCacheFile { - string_table: st.into_table(), - modules, + // Write index + let index = CacheIndex { + modules: self.entries.iter().map(|(name, cached)| { + (name.clone(), CacheIndexEntry { + content_hash: cached.content_hash(), + exports_hash: cached.exports_hash(), + imports: cached.imports().to_vec(), + }) + }).collect(), }; - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent)?; - } - let file = std::fs::File::create(path)?; - let mut encoder = zstd::Encoder::new(file, 1) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd encoder: {e}")))?; - bincode::serialize_into(&mut encoder, &portable) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode serialize: {e}")))?; - encoder.finish() - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd finish: {e}")))?; + let index_path = cache_dir.join("index.bin"); + let encoded = bincode::serialize(&index) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + std::fs::write(&index_path, compressed)?; + + log::debug!("Cache save: wrote index + {} module files", saved_count); Ok(()) } - /// Load cache from disk. - pub fn load_from_disk(path: &Path) -> io::Result { - let file = std::fs::File::open(path)?; - let decoder = io::BufReader::new(zstd::Decoder::new(file) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd decoder: {e}")))?); - let portable: PortableCacheFile = bincode::deserialize_from(decoder) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode deserialize: {e}")))?; + /// Load cache index from disk. Module exports are loaded lazily. + pub fn load_from_disk(cache_dir: &Path) -> io::Result { + let index_path = cache_dir.join("index.bin"); + let compressed = std::fs::read(&index_path)?; + let data = zstd::bulk::decompress(&compressed, 64 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let index: CacheIndex = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; - let st = Arc::new(StringTableReader::new(portable.string_table)); - - let entries: HashMap = portable.modules.into_par_iter().map(|(name, cached)| { - (name, CachedModule { - content_hash: cached.content_hash, - exports: cached.exports.to_exports(&st), - imports: cached.imports, + let entries = index.modules.into_iter().map(|(name, entry)| { + (name, CachedModule::Indexed { + content_hash: entry.content_hash, + exports_hash: entry.exports_hash, + imports: entry.imports, }) }).collect(); let mut cache = ModuleCache { entries, dependents: HashMap::new(), - dirty: false, + cache_dir: Some(cache_dir.to_path_buf()), + index_dirty: false, }; cache.build_reverse_deps(); Ok(cache) } } + +// ===== File helpers ===== + +fn module_file_path(cache_dir: &Path, module_name: &str) -> PathBuf { + cache_dir.join("modules").join(format!("{}.bin", module_name)) +} + +fn save_module_file(path: &Path, exports: &ModuleExports) -> io::Result<()> { + let mut st = StringTableBuilder::new(); + let portable = PModuleExports::from_exports(exports, &mut st); + let file = ModuleCacheFile { + string_table: st.into_table(), + exports: portable, + }; + + let mut encoder = zstd::Encoder::new(std::fs::File::create(path)?, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + bincode::serialize_into(&mut encoder, &file) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + encoder.finish() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + Ok(()) +} + +fn load_module_file(path: &Path) -> io::Result { + let file = std::fs::File::open(path)?; + let decoder = io::BufReader::new(zstd::Decoder::new(file) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?); + let cache_file: ModuleCacheFile = bincode::deserialize_from(decoder) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + + let st = StringTableReader::new(cache_file.string_table); + Ok(cache_file.exports.to_exports(&st)) +} diff --git a/src/build/mod.rs b/src/build/mod.rs index c6f0fb5e..06502dfc 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -50,6 +50,7 @@ pub struct ModuleResult { pub path: PathBuf, pub module_name: String, pub type_errors: Vec, + pub cached: bool, } pub struct BuildResult { @@ -527,13 +528,13 @@ fn build_from_sources_impl( let pm = &parsed[idx]; // Cache check: skip typecheck if source unchanged and no deps rebuilt - if let Some(ref cache) = cache { + if let Some(ref mut cache) = cache { if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { if let Some(exports) = cache.get_exports(&pm.module_name) { done += 1; cached_count += 1; - log::debug!( - " [{}/{}] cached: {}", + println!( + "[{}/{}] [skipping] {}", done, total_modules, pm.module_name ); registry.register(&pm.module_parts, exports.clone()); @@ -541,12 +542,17 @@ fn build_from_sources_impl( path: pm.path.clone(), module_name: pm.module_name.clone(), type_errors: vec![], + cached: true, }); continue; } } } + println!( + "[{}/{}] [compiling] {}", + done + 1, total_modules, pm.module_name + ); let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { @@ -572,12 +578,17 @@ fn build_from_sources_impl( " [{}/{}] ok: {} ({:.2?})", done, total_modules, pm.module_name, elapsed ); - rebuilt_set.insert(pm.module_name.clone()); let import_names: Vec = pm.import_parts.iter() .map(|parts| interner::resolve_module_name(parts)) .collect(); - if let Some(ref mut c) = cache { - c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names); + let exports_changed = if let Some(ref mut c) = cache { + c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names) + } else { + true + }; + // Only add to rebuilt_set if exports actually changed + if exports_changed { + rebuilt_set.insert(pm.module_name.clone()); } // Register exports immediately — result.exports is moved, // then result (with its types HashMap) is dropped. @@ -586,6 +597,7 @@ fn build_from_sources_impl( path: pm.path.clone(), module_name: pm.module_name.clone(), type_errors: result.errors, + cached: false, }); } Err(payload) => { @@ -606,13 +618,13 @@ fn build_from_sources_impl( let mut to_typecheck = Vec::new(); for &idx in level.iter() { let pm = &parsed[idx]; - if let Some(ref cache) = cache { + if let Some(ref mut cache) = cache { if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { if let Some(exports) = cache.get_exports(&pm.module_name) { done += 1; cached_count += 1; - log::debug!( - " [{}/{}] cached: {}", + println!( + "[{}/{}] [skipping] {}", done, total_modules, pm.module_name ); registry.register(&pm.module_parts, exports.clone()); @@ -620,6 +632,7 @@ fn build_from_sources_impl( path: pm.path.clone(), module_name: pm.module_name.clone(), type_errors: vec![], + cached: true, }); continue; } @@ -628,6 +641,15 @@ fn build_from_sources_impl( to_typecheck.push(idx); } + // Print [compiling] for all modules in this level before starting + for &idx in &to_typecheck { + let pm = &parsed[idx]; + println!( + "[{}/{}] [compiling] {}", + done + 1, total_modules, pm.module_name + ); + } + // Typecheck remaining modules in parallel let level_results: Vec<_> = pool.install(|| { to_typecheck.par_iter().map(|&idx| { @@ -662,18 +684,23 @@ fn build_from_sources_impl( " [{}/{}] ok: {} ({:.2?})", done, total_modules, pm.module_name, elapsed ); - rebuilt_set.insert(pm.module_name.clone()); let import_names: Vec = pm.import_parts.iter() .map(|parts| interner::resolve_module_name(parts)) .collect(); - if let Some(ref mut c) = cache { - c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names); + let exports_changed = if let Some(ref mut c) = cache { + c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names) + } else { + true + }; + if exports_changed { + rebuilt_set.insert(pm.module_name.clone()); } registry.register(&pm.module_parts, result.exports); module_results.push(ModuleResult { path: pm.path.clone(), module_name: pm.module_name.clone(), type_errors: result.errors, + cached: false, }); } Err(payload) => { diff --git a/src/build/portable.rs b/src/build/portable.rs index a19e1617..767743c3 100644 --- a/src/build/portable.rs +++ b/src/build/portable.rs @@ -377,17 +377,3 @@ impl PModuleExports { } } -// ===== Portable Cache File ===== - -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct PortableCacheFile { - pub string_table: Vec, - pub modules: HashMap, -} - -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct PortableCachedModule { - pub content_hash: u64, - pub exports: PModuleExports, - pub imports: Vec, -} diff --git a/src/main.rs b/src/main.rs index 16a46568..9999ad04 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,24 +56,18 @@ fn main() { log::debug!("Starting compile with globs: {:?}", globs); let output_path = PathBuf::from(&output); - let cache_path = output_path.join(".pfc-cache").join("cache.bin"); + let cache_dir = output_path.join(".pfc-cache"); let cache_load_start = std::time::Instant::now(); - let mut cache = cache_path - .parent() - .and_then(|_| build::cache::ModuleCache::load_from_disk(&cache_path).ok()) + let mut cache = build::cache::ModuleCache::load_from_disk(&cache_dir) .unwrap_or_default(); log::debug!("Cache load: {:.2?}", cache_load_start.elapsed()); let glob_refs: Vec<&str> = globs.iter().map(|s| s.as_str()).collect(); let result = build::build_cached(&glob_refs, Some(output_path.clone()), &mut cache); - // Save cache for next build - if let Some(parent) = cache_path.parent() { - std::fs::create_dir_all(parent).ok(); - } let cache_save_start = std::time::Instant::now(); - if let Err(e) = cache.save_to_disk(&cache_path) { + if let Err(e) = cache.save_to_disk(&cache_dir) { log::debug!("Failed to save build cache: {e}"); } log::debug!("Cache save: {:.2?}", cache_save_start.elapsed()); @@ -84,14 +78,9 @@ fn main() { error_messages.push(format!("{err}")); } - let total = result.modules.len(); - for (i, module) in result.modules.iter().enumerate() { - if module.type_errors.is_empty() { - println!("[{}/{}] {}", i + 1, total, module.module_name); - } else { - for err in &module.type_errors { - error_messages.push(format!("{}: {err}", module.module_name)); - } + for module in &result.modules { + for err in &module.type_errors { + error_messages.push(format!("{}: {err}", module.module_name)); } } From 75bb17214453e5714af93f39ab8a3c1553d75caf Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Sun, 8 Mar 2026 04:56:02 +0100 Subject: [PATCH 07/14] faster cached builds --- src/build/cache.rs | 27 ++++++ src/build/mod.rs | 229 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 216 insertions(+), 40 deletions(-) diff --git a/src/build/cache.rs b/src/build/cache.rs index 7889d939..a9f45e27 100644 --- a/src/build/cache.rs +++ b/src/build/cache.rs @@ -21,6 +21,9 @@ use super::portable::{PModuleExports, StringTableBuilder, StringTableReader}; #[derive(Serialize, Deserialize, Default)] struct CacheIndex { modules: HashMap, + /// Maps file paths to module names for fast lookup during incremental builds + #[serde(default)] + path_to_module: HashMap, } #[derive(Serialize, Deserialize, Clone)] @@ -95,6 +98,8 @@ pub struct ModuleCache { entries: HashMap, /// Reverse dependency graph: module → modules that import it dependents: HashMap>, + /// Maps file paths to module names for skipping parse on warm builds + path_index: HashMap, /// Directory for per-module cache files cache_dir: Option, /// Whether the index needs to be rewritten @@ -106,6 +111,7 @@ impl Default for ModuleCache { Self { entries: HashMap::new(), dependents: HashMap::new(), + path_index: HashMap::new(), cache_dir: None, index_dirty: false, } @@ -158,6 +164,24 @@ impl ModuleCache { } } + /// Look up the module name associated with a file path. + pub fn module_name_for_path(&self, path: &str) -> Option<&str> { + self.path_index.get(path).map(|s| s.as_str()) + } + + /// Register a file path → module name mapping. + pub fn register_path(&mut self, path: String, module_name: String) { + if self.path_index.get(&path).map(|s| s.as_str()) != Some(&module_name) { + self.path_index.insert(path, module_name); + self.index_dirty = true; + } + } + + /// Get the cached imports for a module by name. + pub fn get_imports(&self, module_name: &str) -> Option<&[String]> { + self.entries.get(module_name).map(|c| c.imports()) + } + /// Get cached exports for a module, loading from disk if needed. pub fn get_exports(&mut self, module_name: &str) -> Option<&ModuleExports> { // Check if we need to load from disk first @@ -265,6 +289,7 @@ impl ModuleCache { let before = self.entries.len(); self.entries.retain(|k, _| module_names.contains(k)); if self.entries.len() != before { + self.path_index.retain(|_, v| module_names.contains(v)); self.index_dirty = true; } } @@ -300,6 +325,7 @@ impl ModuleCache { imports: cached.imports().to_vec(), }) }).collect(), + path_to_module: self.path_index.clone(), }; let index_path = cache_dir.join("index.bin"); @@ -333,6 +359,7 @@ impl ModuleCache { let mut cache = ModuleCache { entries, dependents: HashMap::new(), + path_index: index.path_to_module, cache_dir: Some(cache_dir.to_path_buf()), index_dirty: false, }; diff --git a/src/build/mod.rs b/src/build/mod.rs index 06502dfc..a88bc814 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -18,6 +18,7 @@ use rayon::prelude::*; use crate::cst::{Decl, Module}; use crate::interner::{self, Symbol}; +use crate::span::Span; use crate::js_ffi; use crate::typechecker::check; use crate::typechecker::registry::ModuleRegistry; @@ -62,7 +63,10 @@ pub struct BuildResult { struct ParsedModule { path: PathBuf, - module: Module, + /// The parsed CST. None for cache-skipped modules (lazy-parsed on demand). + module: Option, + /// Index into the sources array, for lazy parsing when needed. + source_idx: usize, module_name: String, module_parts: Vec, import_parts: Vec>, @@ -76,6 +80,10 @@ fn module_name_string(parts: &[Symbol]) -> String { interner::resolve_module_name(parts) } +fn module_name_to_parts(name: &str) -> Vec { + name.split('.').map(|s| interner::intern(s)).collect() +} + fn is_prim_import(parts: &[Symbol]) -> bool { !parts.is_empty() && interner::symbol_eq(parts[0], "Prim") } @@ -280,27 +288,107 @@ fn build_from_sources_impl( ) -> (BuildResult, ModuleRegistry) { let pipeline_start = Instant::now(); let mut build_errors = Vec::new(); - // Phase 2: Parse all sources (parallel) - log::debug!("Phase 2c: Parsing {} source files", sources.len()); + // Phase 2c: Parse source files (with cache-aware skipping) + log::debug!("Phase 2c: Processing {} source files", sources.len()); let phase_start = Instant::now(); - // Parse all sources in parallel - let parse_results: Vec<_> = sources + // Step 1: Compute content hashes for all sources (fast, parallel) + let source_hashes: Vec = sources .par_iter() - .map(|&(path_str, source)| { + .map(|&(_, source)| cache::ModuleCache::content_hash(source)) + .collect(); + + // Step 2: Determine which sources can skip parsing (cache hit by path + hash) + let mut skip_parse = vec![false; sources.len()]; + let mut skip_count = 0usize; + if let Some(ref cache) = cache { + for (i, &(path_str, _)) in sources.iter().enumerate() { + if let Some(module_name) = cache.module_name_for_path(path_str) { + if !cache.needs_rebuild(module_name, source_hashes[i], &HashSet::new()) { + skip_parse[i] = true; + skip_count += 1; + } + } + } + } + log::debug!( + " {} modules cached (skip parse), {} need parsing", + skip_count, + sources.len() - skip_count + ); + + // Step 3: Parse only non-cached sources in parallel + let parse_results: Vec<(usize, Result<(PathBuf, Module), BuildError>)> = sources + .par_iter() + .enumerate() + .filter(|(i, _)| !skip_parse[*i]) + .map(|(i, &(path_str, source))| { let path = PathBuf::from(path_str); - match crate::parser::parse(source) { + let result = match crate::parser::parse(source) { Ok(module) => Ok((path, module)), Err(e) => Err(BuildError::CompileError { path, error: e }), - } + }; + (i, result) }) .collect(); - // Sequential validation (Prim check, dup check, etc.) + // Step 4: Build parsed vec from both cached stubs and parsed results let mut parsed: Vec = Vec::new(); let mut seen_modules: HashMap, PathBuf> = HashMap::new(); - for (i, result) in parse_results.into_iter().enumerate() { + // 4a: Create stubs for cache-hit modules + if let Some(ref cache) = cache { + for (i, &(path_str, _)) in sources.iter().enumerate() { + if !skip_parse[i] { + continue; + } + let module_name = match cache.module_name_for_path(path_str) { + Some(name) => name.to_string(), + None => continue, + }; + let module_parts = module_name_to_parts(&module_name); + + // Duplicate check + if let Some(existing_path) = seen_modules.get(&module_parts) { + log::debug!( + " rejected {}: duplicate (already at {})", + module_name, + existing_path.display() + ); + build_errors.push(BuildError::DuplicateModule { + module_name, + path1: existing_path.clone(), + path2: PathBuf::from(path_str), + }); + continue; + } + seen_modules.insert(module_parts.clone(), PathBuf::from(path_str)); + + let import_parts: Vec> = cache + .get_imports(&module_name) + .map(|imports| imports.iter().map(|s| module_name_to_parts(s)).collect()) + .unwrap_or_default(); + + let js_source = js_sources + .as_ref() + .and_then(|m| m.get(path_str)) + .map(|s| s.to_string()); + + parsed.push(ParsedModule { + path: PathBuf::from(path_str), + module: None, + source_idx: i, + module_name, + module_parts, + import_parts, + js_source, + source_hash: source_hashes[i], + }); + } + } + + // 4b: Process parsed results (with full validation) + for (i, result) in parse_results { let (path, module) = match result { Ok(pair) => pair, Err(e) => { @@ -316,7 +404,8 @@ fn build_from_sources_impl( // Check for reserved Prim namespace if !module_parts.is_empty() { - let is_prim = interner::with_resolved(module_parts[0], |s| s == "Prim").unwrap_or(false); + let is_prim = + interner::with_resolved(module_parts[0], |s| s == "Prim").unwrap_or(false); if is_prim { log::debug!(" rejected {}: Prim namespace is reserved", module_name); build_errors.push(BuildError::CannotDefinePrimModules { module_name, path }); @@ -329,7 +418,8 @@ fn build_from_sources_impl( for part in &module_parts { let invalid_char = interner::with_resolved(*part, |s| { s.chars().find(|&c| c == '\'' || c == '_') - }).flatten(); + }) + .flatten(); if let Some(c) = invalid_char { log::debug!( " rejected {}: invalid character '{}' in module name", @@ -378,21 +468,27 @@ fn build_from_sources_impl( .and_then(|m| m.get(path_str)) .map(|s| s.to_string()); - let source_hash = cache::ModuleCache::content_hash(sources[i].1); + // Register path → module_name mapping in cache + if let Some(ref mut cache) = cache { + cache.register_path(path_str.to_string(), module_name.clone()); + } parsed.push(ParsedModule { path, - module, + module: Some(module), + source_idx: i, module_name, module_parts, import_parts, js_source, - source_hash, + source_hash: source_hashes[i], }); } log::debug!( - "Phase 2c complete: parsed {} modules (rejected {}) in {:.2?}", + "Phase 2c complete: {} modules ({} cached, {} parsed, {} rejected) in {:.2?}", parsed.len(), + skip_count, + parsed.len().saturating_sub(skip_count), sources.len() - parsed.len(), phase_start.elapsed() ); @@ -432,7 +528,7 @@ fn build_from_sources_impl( module_name: imp_name, importing_module: pm.module_name.clone(), path: pm.path.clone(), - span: pm.module.span, + span: pm.module.as_ref().map(|m| m.span).unwrap_or(Span::new(0, 0)), }); } } @@ -525,42 +621,63 @@ fn build_from_sources_impl( // (including ModuleExports) is dropped before the next module starts. // Peak memory = 1 module's CheckResult at a time. for &idx in level { - let pm = &parsed[idx]; - // Cache check: skip typecheck if source unchanged and no deps rebuilt - if let Some(ref mut cache) = cache { - if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { - if let Some(exports) = cache.get_exports(&pm.module_name) { + { + let pm = &parsed[idx]; + if let Some(ref mut cache) = cache { + if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { + if let Some(exports) = cache.get_exports(&pm.module_name) { + done += 1; + cached_count += 1; + eprintln!( + "[{}/{}] [skipping] {}", + done, total_modules, pm.module_name + ); + registry.register(&pm.module_parts, exports.clone()); + module_results.push(ModuleResult { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + type_errors: vec![], + cached: true, + }); + continue; + } + } + } + } + + // Lazy parse if module was cache-skipped but now needs typechecking + if parsed[idx].module.is_none() { + let source = sources[parsed[idx].source_idx].1; + match crate::parser::parse(source) { + Ok(module) => { + parsed[idx].module = Some(module); + } + Err(e) => { done += 1; - cached_count += 1; - println!( - "[{}/{}] [skipping] {}", - done, total_modules, pm.module_name - ); - registry.register(&pm.module_parts, exports.clone()); - module_results.push(ModuleResult { - path: pm.path.clone(), - module_name: pm.module_name.clone(), - type_errors: vec![], - cached: true, + build_errors.push(BuildError::CompileError { + path: parsed[idx].path.clone(), + error: e, }); continue; } } } - println!( + let pm = &parsed[idx]; + eprintln!( "[{}/{}] [compiling] {}", done + 1, total_modules, pm.module_name ); let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); + let module_ref = pm.module.as_ref().unwrap(); let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { let mod_sym = crate::interner::intern(&pm.module_name); log::debug!("Typechecking: {}", &pm.module_name); let path_str = pm.path.to_string_lossy(); crate::typechecker::set_deadline(deadline, mod_sym, &path_str); - let (ast_module, convert_errors) = crate::ast::convert(&pm.module, ®istry); + let (ast_module, convert_errors) = crate::ast::convert(module_ref, ®istry); let mut result = check::check_module(&ast_module, ®istry); if !convert_errors.is_empty() { let mut all_errors = convert_errors; @@ -623,7 +740,7 @@ fn build_from_sources_impl( if let Some(exports) = cache.get_exports(&pm.module_name) { done += 1; cached_count += 1; - println!( + eprintln!( "[{}/{}] [skipping] {}", done, total_modules, pm.module_name ); @@ -641,10 +758,30 @@ fn build_from_sources_impl( to_typecheck.push(idx); } + // Lazy parse any cache-skipped modules that now need typechecking + for &idx in &to_typecheck { + if parsed[idx].module.is_none() { + let source = sources[parsed[idx].source_idx].1; + match crate::parser::parse(source) { + Ok(module) => { + parsed[idx].module = Some(module); + } + Err(e) => { + build_errors.push(BuildError::CompileError { + path: parsed[idx].path.clone(), + error: e, + }); + } + } + } + } + // Remove entries that failed to parse + to_typecheck.retain(|&idx| parsed[idx].module.is_some()); + // Print [compiling] for all modules in this level before starting for &idx in &to_typecheck { let pm = &parsed[idx]; - println!( + eprintln!( "[{}/{}] [compiling] {}", done + 1, total_modules, pm.module_name ); @@ -654,13 +791,14 @@ fn build_from_sources_impl( let level_results: Vec<_> = pool.install(|| { to_typecheck.par_iter().map(|&idx| { let pm = &parsed[idx]; + let module_ref = pm.module.as_ref().unwrap(); let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { let mod_sym = crate::interner::intern(&pm.module_name); let path_str = pm.path.to_string_lossy(); crate::typechecker::set_deadline(deadline, mod_sym, &path_str); - let (ast_module, convert_errors) = crate::ast::convert(&pm.module, ®istry); + let (ast_module, convert_errors) = crate::ast::convert(module_ref, ®istry); let mut result = check::check_module(&ast_module, ®istry); if !convert_errors.is_empty() { let mut all_errors = convert_errors; @@ -732,7 +870,12 @@ fn build_from_sources_impl( let phase_start = Instant::now(); let mut ffi_checked = 0; for pm in &parsed { - let foreign_names = extract_foreign_import_names(&pm.module); + // Skip FFI validation for cache-skipped modules (already validated) + let module_ref = match pm.module.as_ref() { + Some(m) => m, + None => continue, + }; + let foreign_names = extract_foreign_import_names(module_ref); let has_foreign = !foreign_names.is_empty(); match (&pm.js_source, has_foreign) { @@ -853,6 +996,12 @@ fn build_from_sources_impl( .collect(); for pm in &parsed { + // Skip codegen for cache-skipped modules (JS already generated) + let module_ref = match pm.module.as_ref() { + Some(m) => m, + None => continue, + }; + if !ok_modules.contains(&pm.module_name) { log::debug!(" skipping {} (has type errors)", pm.module_name); continue; @@ -871,7 +1020,7 @@ fn build_from_sources_impl( log::debug!(" generating JS for {}", pm.module_name); let js_module = crate::codegen::js::module_to_js( - &pm.module, + module_ref, &pm.module_name, &pm.module_parts, module_exports, From 3e98fd1586b96f83f4562d38e9c62827da446e0b Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Sun, 8 Mar 2026 04:59:32 +0100 Subject: [PATCH 08/14] default to ragu --- editors/code/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/editors/code/package.json b/editors/code/package.json index 07b08463..e52a07a9 100644 --- a/editors/code/package.json +++ b/editors/code/package.json @@ -47,7 +47,7 @@ }, "pfc.sourcesCommand": { "type": "string", - "default": "spago sources", + "default": "ragu sources", "description": "Shell command that outputs PureScript source file paths (one per line). Example: find src .spago/p -name '*.purs'" } } From a1746fe10fbbd46b137ffa2210f6d16dab67343d Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Mon, 9 Mar 2026 21:26:23 +0100 Subject: [PATCH 09/14] load sources in parallel --- src/build/mod.rs | 35 +++++++----- src/lsp/handlers/diagnostics.rs | 2 +- src/lsp/handlers/load_sources.rs | 91 ++++++++++++++++++++------------ src/lsp/mod.rs | 6 ++- 4 files changed, 83 insertions(+), 51 deletions(-) diff --git a/src/build/mod.rs b/src/build/mod.rs index a88bc814..680fedef 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -317,20 +317,27 @@ fn build_from_sources_impl( sources.len() - skip_count ); - // Step 3: Parse only non-cached sources in parallel - let parse_results: Vec<(usize, Result<(PathBuf, Module), BuildError>)> = sources - .par_iter() - .enumerate() - .filter(|(i, _)| !skip_parse[*i]) - .map(|(i, &(path_str, source))| { - let path = PathBuf::from(path_str); - let result = match crate::parser::parse(source) { - Ok(module) => Ok((path, module)), - Err(e) => Err(BuildError::CompileError { path, error: e }), - }; - (i, result) - }) - .collect(); + // Step 3: Parse only non-cached sources in parallel (use a pool with large stacks + // since the parser can recurse deeply on complex files) + let parse_pool = rayon::ThreadPoolBuilder::new() + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build parse thread pool"); + let parse_results: Vec<(usize, Result<(PathBuf, Module), BuildError>)> = parse_pool.install(|| { + sources + .par_iter() + .enumerate() + .filter(|(i, _)| !skip_parse[*i]) + .map(|(i, &(path_str, source))| { + let path = PathBuf::from(path_str); + let result = match crate::parser::parse(source) { + Ok(module) => Ok((path, module)), + Err(e) => Err(BuildError::CompileError { path, error: e }), + }; + (i, result) + }) + .collect() + }); // Step 4: Build parsed vec from both cached stubs and parsed results let mut parsed: Vec = Vec::new(); diff --git a/src/lsp/handlers/diagnostics.rs b/src/lsp/handlers/diagnostics.rs index 59da1076..ae49280e 100644 --- a/src/lsp/handlers/diagnostics.rs +++ b/src/lsp/handlers/diagnostics.rs @@ -63,7 +63,7 @@ impl Backend { let module_name = interner::resolve_module_name(&module.name.value.parts); let module_parts: Vec = module.name.value.parts.clone(); - // Type-check against the registry + // Type-check against the registry (use stacker to extend stack for deep recursion) let mut registry = self.registry.write().await; let check_result = crate::typechecker::check_module_with_registry(&module, ®istry); diff --git a/src/lsp/handlers/load_sources.rs b/src/lsp/handlers/load_sources.rs index 8a62aea1..bd280b6b 100644 --- a/src/lsp/handlers/load_sources.rs +++ b/src/lsp/handlers/load_sources.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::sync::atomic::Ordering; +use rayon::prelude::*; use tower_lsp::lsp_types::*; use crate::build::BuildOptions; @@ -51,7 +52,12 @@ impl Backend { let ready = self.ready.clone(); let progress_token = token.clone(); - tokio::task::spawn_blocking(move || { + let rt_handle = tokio::runtime::Handle::current(); + std::thread::Builder::new() + .name("pfc-load-sources".to_string()) + .stack_size(16 * 1024 * 1024) // 16 MB — typechecker needs deep recursion + .spawn(move || { + let _guard = rt_handle.enter(); // Run the shell command to get source globs let output = match std::process::Command::new("sh") .arg("-c") @@ -101,27 +107,14 @@ impl Backend { .await; }); - // Resolve globs to file paths - let mut sources: Vec<(String, String)> = Vec::new(); + // Resolve globs to file paths (collect paths first, then read in parallel) + let mut file_paths: Vec = Vec::new(); for pattern in &globs { match glob::glob(pattern) { Ok(entries) => { for entry in entries.flatten() { if entry.extension().map_or(false, |ext| ext == "purs") { - match std::fs::read_to_string(&entry) { - Ok(source) => { - let abs_path = entry - .canonicalize() - .unwrap_or_else(|_| entry.clone()); - sources.push(( - abs_path.to_string_lossy().into_owned(), - source, - )); - } - Err(e) => { - log::warn!("Failed to read {}: {e}", entry.display()) - } - } + file_paths.push(entry); } } } @@ -129,6 +122,25 @@ impl Backend { } } + // Read all files in parallel + let sources: Vec<(String, String)> = file_paths + .par_iter() + .filter_map(|entry| { + match std::fs::read_to_string(entry) { + Ok(source) => { + let abs_path = entry + .canonicalize() + .unwrap_or_else(|_| entry.clone()); + Some((abs_path.to_string_lossy().into_owned(), source)) + } + Err(e) => { + log::warn!("Failed to read {}: {e}", entry.display()); + None + } + } + }) + .collect(); + // Report progress: building rt.block_on(async { client @@ -179,29 +191,37 @@ impl Backend { .filter(|m| !m.type_errors.is_empty()) .count(); - // Build definition index and resolution exports from parsed sources - let mut index = DefinitionIndex::new(); - let mut smap = HashMap::new(); - let mut mfmap = HashMap::new(); - let mut parsed_modules = Vec::new(); - for (path, source) in &sources { - if let Ok(module) = crate::parser::parse(source) { - index.add_module(&module, path); - let mod_name = format!("{}", module.name.value); + // Parse all sources in parallel for definition index + let parse_results: Vec<_> = sources + .par_iter() + .map(|(path, source)| { let file_uri = Url::from_file_path(path) .map(|u| u.to_string()) .unwrap_or_default(); + match crate::parser::parse(source) { + Ok(module) => { + let mod_name = format!("{}", module.name.value); + (path.clone(), file_uri, source.clone(), Some((module, mod_name))) + } + Err(_) => { + (path.clone(), file_uri, source.clone(), None) + } + } + }) + .collect(); + + // Merge results sequentially (add_module takes &mut self) + let mut index = DefinitionIndex::new(); + let mut smap = HashMap::with_capacity(parse_results.len()); + let mut mfmap = HashMap::new(); + let mut parsed_modules = Vec::new(); + for (path, file_uri, source, parsed) in parse_results { + if let Some((module, mod_name)) = parsed { + index.add_module(&module, &path); mfmap.insert(mod_name, file_uri.clone()); parsed_modules.push(module); - smap.insert(file_uri, source.clone()); - } else { - smap.insert( - Url::from_file_path(path) - .map(|u| u.to_string()) - .unwrap_or_default(), - source.clone(), - ); } + smap.insert(file_uri, source); } let exports = crate::lsp::utils::resolve::ResolutionExports::new(&parsed_modules); @@ -234,6 +254,7 @@ impl Backend { }) .await; }); - }); + }) + .expect("failed to spawn load-sources thread"); } } diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs index b8265f78..3d7f703a 100644 --- a/src/lsp/mod.rs +++ b/src/lsp/mod.rs @@ -154,7 +154,11 @@ impl Backend { } pub fn run_server(sources_cmd: Option) { - let rt = tokio::runtime::Runtime::new().expect("failed to create tokio runtime"); + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .thread_stack_size(16 * 1024 * 1024) // 16 MB — typechecker needs deep recursion + .build() + .expect("failed to create tokio runtime"); rt.block_on(async { let stdin = tokio::io::stdin(); let stdout = tokio::io::stdout(); From 099045fa874919c698cedc478f2d4830da15e4d4 Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Mon, 9 Mar 2026 21:46:27 +0100 Subject: [PATCH 10/14] Defer type alias ScopeConflict to reference site Type aliases that shadow imported types (e.g. `type Output = M.Output`) no longer trigger ScopeConflict at declaration. Instead, the conflict is recorded in `type_scope_conflicts` and only raised when the ambiguous name is actually referenced in a type annotation. Co-Authored-By: Claude Opus 4.6 --- src/typechecker/check.rs | 19 ++++++----- src/typechecker/convert.rs | 64 ++++++++++++++++++++++++++++++++++++++ src/typechecker/infer.rs | 4 +++ 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/typechecker/check.rs b/src/typechecker/check.rs index a1e1ed68..dbbdeaf8 100644 --- a/src/typechecker/check.rs +++ b/src/typechecker/check.rs @@ -2190,14 +2190,12 @@ fn check_module_impl(module: &Module, registry: &ModuleRegistry, collect_span_ty } ctx.type_con_arities.insert(qi(name.value), count_kind_arrows(kind)); } - Decl::TypeAlias { name, span, .. } => { - // Type synonyms re-defining an explicitly imported type name are a ScopeConflict. - // Data/newtype declarations are allowed to shadow imports. + Decl::TypeAlias { name, .. } => { + // Local type aliases shadow imported types, just like data/newtype declarations. + // A ScopeConflict is only raised if the ambiguous name is actually referenced + // (not merely declared or exported). Record the conflict for deferred checking. if explicitly_imported_types.contains(&name.value) { - errors.push(TypeError::ScopeConflict { - span: *span, - name: name.value, - }); + ctx.type_scope_conflicts.insert(name.value); } } _ => {} @@ -3252,6 +3250,13 @@ fn check_module_impl(module: &Module, registry: &ModuleRegistry, collect_span_ty collect_type_expr_vars(ty, &HashSet::new(), &mut errors); // Validate constraint class names in the type signature check_constraint_class_names(ty, &known_classes, &class_param_counts, &mut errors); + // Check for type-level scope conflicts (ambiguous type names) + if let Some((conflict_span, conflict_name)) = crate::typechecker::convert::find_type_scope_conflict(ty, &ctx.type_scope_conflicts) { + errors.push(TypeError::ScopeConflict { + span: conflict_span, + name: conflict_name, + }); + } match convert_type_expr(ty, &type_ops) { Ok(converted) => { // Check for partially applied synonyms in type signature diff --git a/src/typechecker/convert.rs b/src/typechecker/convert.rs index 2d99f45e..8982814e 100644 --- a/src/typechecker/convert.rs +++ b/src/typechecker/convert.rs @@ -152,6 +152,70 @@ pub fn convert_type_expr(ty: &TypeExpr, type_ops: &HashMap) -> Option<(crate::span::Span, Symbol)> { + match ty { + TypeExpr::Constructor { name, span, .. } => { + if name.module.is_none() && conflicts.contains(&name.name) { + return Some((*span, name.name)); + } + None + } + TypeExpr::App { constructor, arg, .. } => { + find_type_scope_conflict(constructor, conflicts) + .or_else(|| find_type_scope_conflict(arg, conflicts)) + } + TypeExpr::Function { from, to, .. } => { + find_type_scope_conflict(from, conflicts) + .or_else(|| find_type_scope_conflict(to, conflicts)) + } + TypeExpr::Forall { ty, vars, .. } => { + for (_, _, kind) in vars { + if let Some(k) = kind { + if let Some(r) = find_type_scope_conflict(k, conflicts) { + return Some(r); + } + } + } + find_type_scope_conflict(ty, conflicts) + } + TypeExpr::Constrained { constraints, ty, .. } => { + for c in constraints { + for arg in &c.args { + if let Some(r) = find_type_scope_conflict(arg, conflicts) { + return Some(r); + } + } + } + find_type_scope_conflict(ty, conflicts) + } + TypeExpr::Kinded { ty, .. } => { + find_type_scope_conflict(ty, conflicts) + } + TypeExpr::Record { fields, .. } => { + for f in fields { + if let Some(r) = find_type_scope_conflict(&f.ty, conflicts) { + return Some(r); + } + } + None + } + TypeExpr::Row { fields, tail, .. } => { + for f in fields { + if let Some(r) = find_type_scope_conflict(&f.ty, conflicts) { + return Some(r); + } + } + if let Some(t) = tail { + return find_type_scope_conflict(t, conflicts); + } + None + } + _ => None, + } +} + /// Check that kind annotations in forall vars don't forward-reference variables /// declared later in the same forall. E.g. `forall (a :: k) k.` is invalid because /// `k` is used before it's declared. diff --git a/src/typechecker/infer.rs b/src/typechecker/infer.rs index a501561a..864c6bff 100644 --- a/src/typechecker/infer.rs +++ b/src/typechecker/infer.rs @@ -111,6 +111,9 @@ pub struct InferCtx { /// Names that are ambiguous due to being imported from multiple modules. /// Referencing these names produces a ScopeConflict error. pub scope_conflicts: HashSet, + /// Type names that are ambiguous due to a local type alias shadowing an imported type. + /// Only checked when the type name is actually referenced in a type expression. + pub type_scope_conflicts: HashSet, /// Map from operator → class method target name (e.g. `<>` → `append`). /// Used for tracking deferred constraints on operator usage. pub operator_class_targets: HashMap, @@ -218,6 +221,7 @@ impl InferCtx { method_own_constraints: HashMap::new(), module_mode: false, scope_conflicts: HashSet::new(), + type_scope_conflicts: HashSet::new(), operator_class_targets: HashMap::new(), op_deferred_constraints: Vec::new(), class_fundeps: HashMap::new(), From e645286cf55a78965c32e2bef2b8c67a883c9dff Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Tue, 10 Mar 2026 14:38:56 +0100 Subject: [PATCH 11/14] lsp working in OA but slow --- Cargo.toml | 4 + src/bin/profile_load_sources.rs | 262 +++++++++++++++++++++++++++++++ src/build/cache.rs | 98 +++++++++++- src/build/mod.rs | 19 ++- src/lsp/handlers/completion.rs | 2 +- src/lsp/handlers/definition.rs | 19 +-- src/lsp/handlers/diagnostics.rs | 3 +- src/lsp/handlers/hover.rs | 15 +- src/lsp/handlers/load_sources.rs | 209 +++++++++++++++++++----- src/lsp/mod.rs | 47 +++++- src/lsp/utils/find_definition.rs | 80 +++++++++- src/lsp/utils/resolve.rs | 73 +++++++++ src/main.rs | 13 +- tests/build.rs | 13 +- tests/lsp_e2e.rs | 4 +- 15 files changed, 766 insertions(+), 95 deletions(-) create mode 100644 src/bin/profile_load_sources.rs diff --git a/Cargo.toml b/Cargo.toml index de22fd9c..6c752a17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,10 @@ edition = "2021" name = "pfc" path = "src/main.rs" +[[bin]] +name = "profile-load-sources" +path = "src/bin/profile_load_sources.rs" + [dependencies] clap = { version = "4", features = ["derive"] } log = "0.4" diff --git a/src/bin/profile_load_sources.rs b/src/bin/profile_load_sources.rs new file mode 100644 index 00000000..ab089193 --- /dev/null +++ b/src/bin/profile_load_sources.rs @@ -0,0 +1,262 @@ +use std::collections::HashSet; +use std::path::PathBuf; +use std::time::Instant; + +use clap::Parser; +use rayon::prelude::*; + +use purescript_fast_compiler::build::{self, BuildOptions}; +use purescript_fast_compiler::lsp::utils::find_definition::DefinitionIndex; +use purescript_fast_compiler::lsp::utils::resolve::ResolutionExports; + +/// Profile the LSP load_sources phases with per-phase timing. +#[derive(Parser)] +#[command(name = "profile-load-sources")] +struct Args { + /// Working directory to run the sources command in + #[arg(long)] + path: PathBuf, + + /// Shell command that outputs source globs/paths (e.g. "spago sources") + #[arg(long)] + sources_cmd: String, + + /// Directory for disk cache (enables warm-cache profiling across runs) + #[arg(long)] + cache_dir: Option, +} + +macro_rules! phase { + ($name:expr, $body:expr) => {{ + let start = Instant::now(); + let result = $body; + let elapsed = start.elapsed(); + eprintln!(" {:.<50} {:>8.2?}", $name, elapsed); + result + }}; +} + +fn main() { + let args = Args::parse(); + let total_start = Instant::now(); + + eprintln!("Profiling load_sources at: {}", args.path.display()); + eprintln!("Sources command: {}", args.sources_cmd); + eprintln!(); + + // Phase 1: Run shell command + let globs: Vec = phase!("Run sources command", { + let output = std::process::Command::new("sh") + .arg("-c") + .arg(&args.sources_cmd) + .current_dir(&args.path) + .output() + .expect("Failed to run sources command"); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Command failed: {stderr}"); + std::process::exit(1); + } + + String::from_utf8_lossy(&output.stdout) + .lines() + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect() + }); + eprintln!(" {} glob patterns", globs.len()); + + // Phase 2: Resolve globs + let file_paths: Vec = phase!("Resolve globs", { + let mut paths = Vec::new(); + for pattern in &globs { + // Resolve relative globs against the working directory + let full_pattern = if PathBuf::from(pattern).is_relative() { + args.path.join(pattern).to_string_lossy().into_owned() + } else { + pattern.clone() + }; + match glob::glob(&full_pattern) { + Ok(entries) => { + for entry in entries.flatten() { + if entry.extension().map_or(false, |ext| ext == "purs") { + paths.push(entry); + } + } + } + Err(e) => eprintln!(" Invalid glob {pattern}: {e}"), + } + } + paths + }); + eprintln!(" {} .purs files", file_paths.len()); + + // Phase 3: Read all sources in parallel + let sources: Vec<(String, String)> = phase!("Read sources (parallel)", { + file_paths + .par_iter() + .filter_map(|entry| { + let source = std::fs::read_to_string(entry).ok()?; + let abs = entry.canonicalize().unwrap_or_else(|_| entry.clone()); + Some((abs.to_string_lossy().into_owned(), source)) + }) + .collect() + }); + eprintln!(" {} files read", sources.len()); + + // Phase 4: Build with incremental cache + let source_refs: Vec<(&str, &str)> = sources + .iter() + .map(|(p, s)| (p.as_str(), s.as_str())) + .collect(); + + let options = BuildOptions { + output_dir: None, + ..Default::default() + }; + + let cache_dir = args.cache_dir.as_ref().map(|d| { + if d.is_relative() { + args.path.join(d) + } else { + d.clone() + } + }); + + let mut cache = if let Some(ref dir) = cache_dir { + phase!("Load cache from disk", { + match build::cache::ModuleCache::load_from_disk(dir) { + Ok(c) => { + eprintln!(" loaded cache from {}", dir.display()); + c + } + Err(_) => { + eprintln!(" no existing cache, starting fresh"); + build::cache::ModuleCache::new() + } + } + }) + } else { + build::cache::ModuleCache::new() + }; + + let (result, _registry, build_parsed_modules) = phase!("Build (incremental)", { + build::build_from_sources_incremental(&source_refs, &None, None, &options, &mut cache) + }); + + phase!("Build reverse deps", { + cache.build_reverse_deps(); + }); + + if let Some(ref dir) = cache_dir { + phase!("Save cache to disk", { + if let Err(e) = cache.save_to_disk(dir) { + eprintln!(" failed to save cache: {e}"); + } + }); + } + + let error_count: usize = result.modules.iter().map(|m| m.type_errors.len()).sum(); + let module_count = result.modules.len(); + let error_module_count = result.modules.iter().filter(|m| !m.type_errors.is_empty()).count(); + let cached_count = result.modules.iter().filter(|m| m.cached).count(); + eprintln!( + " {} modules ({} cached, {} errors in {} modules)", + module_count, cached_count, error_count, error_module_count + ); + + // Phase 5: Parse cache-hit sources + let already_parsed: HashSet = build_parsed_modules + .iter() + .map(|(p, _)| p.to_string_lossy().into_owned()) + .collect(); + + let cache_hit_sources: Vec<_> = sources + .iter() + .filter(|(path, _)| !already_parsed.contains(path.as_str())) + .collect(); + + let extra_count = cache_hit_sources.len(); + + let mut all_modules: Vec<(PathBuf, purescript_fast_compiler::CstModule)> = build_parsed_modules; + + phase!(format!("Parse cache-hits ({extra_count} modules)"), { + let extra: Vec<_> = cache_hit_sources + .par_iter() + .filter_map(|(path, source)| { + purescript_fast_compiler::parse(source) + .ok() + .map(|m| (PathBuf::from(path.as_str()), m)) + }) + .collect(); + all_modules.extend(extra); + }); + + // Phase 6: Build definition index + let index = phase!(format!("Build definition index ({} modules)", all_modules.len()), { + let mut index = DefinitionIndex::new(); + for (path, module) in &all_modules { + index.add_module(module, &path.to_string_lossy()); + } + index + }); + + // Phase 7: Build ResolutionExports + let exports = phase!("Build ResolutionExports", { + let just_modules: Vec = + all_modules.into_iter().map(|(_, m)| m).collect(); + ResolutionExports::new(&just_modules) + }); + + // Phase 8: Save LSP snapshots + if let Some(ref dir) = cache_dir { + let lsp_dir = dir.join("lsp"); + phase!("Save registry snapshot", { + if let Err(e) = build::cache::save_registry_snapshot(&_registry, &lsp_dir.join("registry.bin")) { + eprintln!(" failed: {e}"); + } + }); + phase!("Save def_index snapshot", { + if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { + eprintln!(" failed: {e}"); + } + }); + phase!("Save resolution_exports snapshot", { + if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { + eprintln!(" failed: {e}"); + } + }); + + // Phase 9: Load LSP snapshots (benchmark restore time) + eprintln!(); + eprintln!(" --- Restore from cache (simulated warm startup) ---"); + phase!("Load registry snapshot", { + match build::cache::load_registry_snapshot(&lsp_dir.join("registry.bin")) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + phase!("Load def_index snapshot", { + match DefinitionIndex::load_from_disk(&lsp_dir.join("def_index.bin")) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + phase!("Load resolution_exports snapshot", { + match ResolutionExports::load_from_disk(&lsp_dir.join("resolution_exports.bin")) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + phase!("Load cache index", { + match build::cache::ModuleCache::load_from_disk(dir) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + } + + eprintln!(); + eprintln!(" {:.<50} {:>8.2?}", "TOTAL", total_start.elapsed()); +} diff --git a/src/build/cache.rs b/src/build/cache.rs index a9f45e27..215f0cfb 100644 --- a/src/build/cache.rs +++ b/src/build/cache.rs @@ -165,14 +165,24 @@ impl ModuleCache { } /// Look up the module name associated with a file path. + /// Canonicalizes the path for consistent lookups regardless of relative/absolute form. pub fn module_name_for_path(&self, path: &str) -> Option<&str> { - self.path_index.get(path).map(|s| s.as_str()) + let canonical = std::path::Path::new(path) + .canonicalize() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_else(|_| path.to_string()); + self.path_index.get(&canonical).map(|s| s.as_str()) } /// Register a file path → module name mapping. + /// Canonicalizes the path for consistent lookups regardless of relative/absolute form. pub fn register_path(&mut self, path: String, module_name: String) { - if self.path_index.get(&path).map(|s| s.as_str()) != Some(&module_name) { - self.path_index.insert(path, module_name); + let canonical = std::path::Path::new(&path) + .canonicalize() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_else(|_| path); + if self.path_index.get(&canonical).map(|s| s.as_str()) != Some(&module_name) { + self.path_index.insert(canonical, module_name); self.index_dirty = true; } } @@ -401,3 +411,85 @@ fn load_module_file(path: &Path) -> io::Result { let st = StringTableReader::new(cache_file.string_table); Ok(cache_file.exports.to_exports(&st)) } + +// ===== Registry Snapshot (single-file save/load for entire ModuleRegistry) ===== + +use crate::typechecker::registry::ModuleRegistry; +use crate::interner; + +#[derive(Serialize, Deserialize)] +struct RegistrySnapshot { + string_table: Vec, + /// Each entry: (module_parts as Vec, portable exports) + modules: Vec<(Vec, PModuleExports)>, +} + +/// Save the entire registry to a single compressed file. +pub fn save_registry_snapshot(registry: &ModuleRegistry, path: &Path) -> io::Result<()> { + let mut st = StringTableBuilder::new(); + let modules: Vec<(Vec, PModuleExports)> = registry + .iter_all() + .iter() + .map(|(name_parts, exports)| { + let parts: Vec = name_parts.iter().map(|s| st.add(*s)).collect(); + let pexports = PModuleExports::from_exports(exports, &mut st); + (parts, pexports) + }) + .collect(); + + let snapshot = RegistrySnapshot { + string_table: st.into_table(), + modules, + }; + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let encoded = bincode::serialize(&snapshot) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + std::fs::write(path, compressed) +} + +/// Load a registry from a single compressed snapshot file. +pub fn load_registry_snapshot(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 256 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let snapshot: RegistrySnapshot = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + + let st = StringTableReader::new(snapshot.string_table); + let mut registry = ModuleRegistry::new(); + for (parts, pexports) in &snapshot.modules { + let name: Vec = parts.iter().map(|&idx| st.sym(idx)).collect(); + let exports = pexports.to_exports(&st); + registry.register(&name, exports); + } + Ok(registry) +} + +// ===== Module File Map Snapshot ===== + +/// Save module_file_map (HashMap) to disk. +pub fn save_module_file_map(map: &HashMap, path: &Path) -> io::Result<()> { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let encoded = bincode::serialize(map) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + std::fs::write(path, compressed) +} + +/// Load module_file_map from disk. +pub fn load_module_file_map(path: &Path) -> io::Result> { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 64 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}"))) +} diff --git a/src/build/mod.rs b/src/build/mod.rs index 680fedef..56a49d7e 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -263,7 +263,8 @@ pub fn build_from_sources_with_options( start_registry: Option>, options: &BuildOptions, ) -> (BuildResult, ModuleRegistry) { - build_from_sources_impl(sources, js_sources, start_registry, options, None) + let (result, registry, _) = build_from_sources_impl(sources, js_sources, start_registry, options, None); + (result, registry) } /// Build with incremental caching support. @@ -275,7 +276,7 @@ pub fn build_from_sources_incremental( start_registry: Option>, options: &BuildOptions, cache: &mut cache::ModuleCache, -) -> (BuildResult, ModuleRegistry) { +) -> (BuildResult, ModuleRegistry, Vec<(PathBuf, Module)>) { build_from_sources_impl(sources, js_sources, start_registry, options, Some(cache)) } @@ -285,7 +286,7 @@ fn build_from_sources_impl( start_registry: Option>, options: &BuildOptions, mut cache: Option<&mut cache::ModuleCache>, -) -> (BuildResult, ModuleRegistry) { +) -> (BuildResult, ModuleRegistry, Vec<(PathBuf, Module)>) { let pipeline_start = Instant::now(); let mut build_errors = Vec::new(); // Phase 2c: Parse source files (with cache-aware skipping) @@ -320,6 +321,7 @@ fn build_from_sources_impl( // Step 3: Parse only non-cached sources in parallel (use a pool with large stacks // since the parser can recurse deeply on complex files) let parse_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-parse-{i}")) .stack_size(16 * 1024 * 1024) .build() .expect("failed to build parse thread pool"); @@ -505,7 +507,7 @@ fn build_from_sources_impl( Some(base) => ModuleRegistry::with_base(base), None => ModuleRegistry::default(), }; - return (BuildResult { modules: Vec::new(), build_errors }, registry); + return (BuildResult { modules: Vec::new(), build_errors }, registry, Vec::new()); } // Phase 3: Build dependency graph and check for unknown imports @@ -584,7 +586,7 @@ fn build_from_sources_impl( if !build_errors.is_empty() { log::debug!("Phase 3 failed"); - return (BuildResult { modules: Vec::new(), build_errors }, registry); + return (BuildResult { modules: Vec::new(), build_errors }, registry, Vec::new()); } // Phase 4: Typecheck in dependency order @@ -607,6 +609,7 @@ fn build_from_sources_impl( .unwrap_or(1) }; let pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-typecheck-{i}")) .num_threads(num_threads) .stack_size(16 * 1024 * 1024) .build() @@ -1090,12 +1093,18 @@ fn build_from_sources_impl( build_errors.len() ); + let returned_modules: Vec<(PathBuf, Module)> = parsed + .into_iter() + .filter_map(|pm| pm.module.map(|m| (pm.path, m))) + .collect(); + ( BuildResult { modules: module_results, build_errors, }, registry, + returned_modules, ) } diff --git a/src/lsp/handlers/completion.rs b/src/lsp/handlers/completion.rs index b1f5b3ea..5bd1d17d 100644 --- a/src/lsp/handlers/completion.rs +++ b/src/lsp/handlers/completion.rs @@ -14,7 +14,7 @@ impl Backend { &self, params: CompletionParams, ) -> Result> { - if !self.ready.load(std::sync::atomic::Ordering::SeqCst) { + if !self.is_ready() { return Ok(None); } diff --git a/src/lsp/handlers/definition.rs b/src/lsp/handlers/definition.rs index b969c868..c6af3a93 100644 --- a/src/lsp/handlers/definition.rs +++ b/src/lsp/handlers/definition.rs @@ -1,5 +1,3 @@ -use std::sync::atomic::Ordering; - use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; @@ -13,7 +11,7 @@ impl Backend { &self, params: GotoDefinitionParams, ) -> Result> { - if !self.ready.load(Ordering::SeqCst) { + if !self.is_ready() { return Ok(None); } @@ -100,10 +98,7 @@ impl Backend { mf.get(reexport_module).cloned() }; if let Some(reexport_uri) = reexport_uri { - let target_source = { - let sm = self.source_map.read().await; - sm.get(&reexport_uri).cloned() - }; + let target_source = self.get_source_for_uri(&reexport_uri).await; if let Some(target_source) = target_source { if let Ok(parsed_uri) = Url::parse(&reexport_uri) { if let Some(loc) = span_to_location(&parsed_uri, &target_source, loc.span) { @@ -120,10 +115,7 @@ impl Backend { }; if let Some(def_loc) = def_loc { - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }; + let target_source = self.get_source_for_uri(&target_uri).await; if let Some(target_source) = target_source { if let Ok(parsed_uri) = Url::parse(&target_uri) { @@ -226,10 +218,7 @@ impl Backend { mf.get(&target_module).cloned() }?; - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }?; + let target_source = self.get_source_for_uri(&target_uri).await?; let parsed_uri = Url::parse(&target_uri).ok()?; let loc = span_to_location(&parsed_uri, &target_source, def_loc.span)?; diff --git a/src/lsp/handlers/diagnostics.rs b/src/lsp/handlers/diagnostics.rs index ae49280e..a564c83f 100644 --- a/src/lsp/handlers/diagnostics.rs +++ b/src/lsp/handlers/diagnostics.rs @@ -1,5 +1,4 @@ use std::fmt::Display; -use std::sync::atomic::Ordering; use tower_lsp::lsp_types::*; @@ -28,7 +27,7 @@ impl Backend { } // Don't publish diagnostics until sources are loaded - if !self.ready.load(Ordering::SeqCst) { + if !self.is_ready() { return; } diff --git a/src/lsp/handlers/hover.rs b/src/lsp/handlers/hover.rs index c4a67cf9..f5e6cd45 100644 --- a/src/lsp/handlers/hover.rs +++ b/src/lsp/handlers/hover.rs @@ -1,5 +1,3 @@ -use std::sync::atomic::Ordering; - use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; @@ -22,7 +20,7 @@ enum HoverTarget { impl Backend { pub(crate) async fn handle_hover(&self, params: HoverParams) -> Result> { - if !self.ready.load(Ordering::SeqCst) { + if !self.is_ready() { return Ok(None); } @@ -282,11 +280,7 @@ impl Backend { Some(u) => u, None => return Vec::new(), }; - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }; - let target_source = match target_source { + let target_source = match self.get_source_for_uri(&target_uri).await { Some(s) => s, None => return Vec::new(), }; @@ -364,10 +358,7 @@ impl Backend { let mf = self.module_file_map.read().await; mf.get(module_name).cloned() }?; - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }?; + let target_source = self.get_source_for_uri(&target_uri).await?; let target_module = crate::parser::parse(&target_source).ok()?; find_cst_kind(&target_module.decls, name_str, &target_source) } diff --git a/src/lsp/handlers/load_sources.rs b/src/lsp/handlers/load_sources.rs index bd280b6b..9c4e1a73 100644 --- a/src/lsp/handlers/load_sources.rs +++ b/src/lsp/handlers/load_sources.rs @@ -4,21 +4,80 @@ use std::sync::atomic::Ordering; use rayon::prelude::*; use tower_lsp::lsp_types::*; +use crate::build::cache; use crate::build::BuildOptions; use crate::lsp::utils::find_definition::DefinitionIndex; -use super::super::Backend; +use super::super::{Backend, LOAD_STATE_CACHE_LOADED, LOAD_STATE_READY}; impl Backend { pub(crate) async fn load_sources(&self) { let cmd = match &self.sources_cmd { Some(cmd) => cmd.clone(), None => { - self.ready.store(true, Ordering::SeqCst); + self.load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } }; + // Phase A: Try to restore from disk cache (fast path, ~50-100ms) + if let Some(ref cache_dir) = self.cache_dir { + let lsp_dir = cache_dir.join("lsp"); + let start = std::time::Instant::now(); + + let registry_path = lsp_dir.join("registry.bin"); + let def_index_path = lsp_dir.join("def_index.bin"); + let resolution_exports_path = lsp_dir.join("resolution_exports.bin"); + let module_file_map_path = lsp_dir.join("module_file_map.bin"); + + // Load all snapshots (attempt all, succeed if all present) + let reg_result = cache::load_registry_snapshot(®istry_path); + let idx_result = DefinitionIndex::load_from_disk(&def_index_path); + let re_result = crate::lsp::utils::resolve::ResolutionExports::load_from_disk(&resolution_exports_path); + let mfmap_result = cache::load_module_file_map(&module_file_map_path); + let cache_result = cache::ModuleCache::load_from_disk(cache_dir); + + // Always load the module cache if available (shared with CLI builds) + if let Ok(c) = cache_result { + log::info!("Loaded module cache from disk in {:.2?}", start.elapsed()); + let mut mc = self.module_cache.write().await; + *mc = c; + } + + if let (Ok(reg), Ok(idx), Ok(re), Ok(mfmap)) = (reg_result, idx_result, re_result, mfmap_result) { + log::info!("Restored LSP state from cache in {:.2?}", start.elapsed()); + + // Store cached state + { + let mut r = self.registry.write().await; + *r = reg; + } + { + let mut i = self.def_index.write().await; + *i = idx; + } + { + let mut e = self.resolution_exports.write().await; + *e = re; + } + { + let mut m = self.module_file_map.write().await; + *m = mfmap; + } + + // Mark as cache-loaded — handlers can now serve requests + self.load_state.store(LOAD_STATE_CACHE_LOADED, Ordering::SeqCst); + self.info(format!("Cache loaded in {:.2?}, starting background verification", start.elapsed())).await; + } else { + log::info!("No complete LSP snapshots found, doing full build (module cache may still help)"); + } + } + + // Phase B: Full build in background (updates state when done) + self.spawn_full_build(cmd).await; + } + + async fn spawn_full_build(&self, cmd: String) { // Create a progress token for the loading spinner let token = NumberOrString::String("pfc-loading".to_string()); let _ = self @@ -28,12 +87,18 @@ impl Backend { }) .await; + let already_cached = self.load_state.load(Ordering::SeqCst) >= LOAD_STATE_CACHE_LOADED; + self.client .send_notification::(ProgressParams { token: token.clone(), value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( WorkDoneProgressBegin { - title: "Loading PureScript sources".to_string(), + title: if already_cached { + "Verifying PureScript sources".to_string() + } else { + "Loading PureScript sources".to_string() + }, message: Some(format!("Running: {cmd}")), cancellable: Some(false), percentage: None, @@ -49,7 +114,9 @@ impl Backend { let module_file_map = self.module_file_map.clone(); let source_map = self.source_map.clone(); let module_cache = self.module_cache.clone(); - let ready = self.ready.clone(); + let load_state = self.load_state.clone(); + let cache_dir = self.cache_dir.clone(); + let files = self.files.clone(); let progress_token = token.clone(); let rt_handle = tokio::runtime::Handle::current(); @@ -67,7 +134,7 @@ impl Backend { Ok(output) => output, Err(e) => { log::error!("Failed to run sources command: {e}"); - ready.store(true, Ordering::SeqCst); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } }; @@ -75,7 +142,7 @@ impl Backend { if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); log::error!("Sources command failed: {stderr}"); - ready.store(true, Ordering::SeqCst); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } @@ -172,16 +239,23 @@ impl Backend { }; // Use incremental build with cache - let mut cache = rt.block_on(async { module_cache.write().await }); - let (result, new_registry) = crate::build::build_from_sources_incremental( + let mut mcache = rt.block_on(async { module_cache.write().await }); + let (result, new_registry, mut build_parsed_modules) = crate::build::build_from_sources_incremental( &source_refs, &None, None, &options, - &mut cache, + &mut mcache, ); - cache.build_reverse_deps(); - drop(cache); + mcache.build_reverse_deps(); + + // Save module cache to disk + if let Some(ref dir) = cache_dir { + if let Err(e) = mcache.save_to_disk(dir) { + log::warn!("Failed to save module cache: {e}"); + } + } + drop(mcache); let error_count: usize = result.modules.iter().map(|m| m.type_errors.len()).sum(); let module_count = result.modules.len(); @@ -191,40 +265,78 @@ impl Backend { .filter(|m| !m.type_errors.is_empty()) .count(); - // Parse all sources in parallel for definition index - let parse_results: Vec<_> = sources - .par_iter() - .map(|(path, source)| { - let file_uri = Url::from_file_path(path) - .map(|u| u.to_string()) - .unwrap_or_default(); - match crate::parser::parse(source) { - Ok(module) => { - let mod_name = format!("{}", module.name.value); - (path.clone(), file_uri, source.clone(), Some((module, mod_name))) - } - Err(_) => { - (path.clone(), file_uri, source.clone(), None) - } - } - }) + // Collect paths of modules already parsed by the build pipeline + let already_parsed: std::collections::HashSet = build_parsed_modules + .iter() + .map(|(p, _)| p.to_string_lossy().into_owned()) + .collect(); + + // Parse only cache-hit sources that weren't parsed by the build (in parallel) + let cache_hit_sources: Vec<_> = sources + .iter() + .filter(|(path, _)| !already_parsed.contains(path.as_str())) .collect(); - // Merge results sequentially (add_module takes &mut self) + let parse_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-lsp-parse-{i}")) + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build parse thread pool"); + let extra_parsed: Vec<_> = parse_pool.install(|| { + cache_hit_sources + .par_iter() + .filter_map(|(path, source)| { + crate::parser::parse(source) + .ok() + .map(|m| (std::path::PathBuf::from(path.as_str()), m)) + }) + .collect() + }); + + build_parsed_modules.extend(extra_parsed); + + // Build definition index and source/module maps let mut index = DefinitionIndex::new(); - let mut smap = HashMap::with_capacity(parse_results.len()); + let mut smap = HashMap::with_capacity(sources.len()); let mut mfmap = HashMap::new(); - let mut parsed_modules = Vec::new(); - for (path, file_uri, source, parsed) in parse_results { - if let Some((module, mod_name)) = parsed { - index.add_module(&module, &path); - mfmap.insert(mod_name, file_uri.clone()); - parsed_modules.push(module); - } - smap.insert(file_uri, source); + + for (path, module) in &build_parsed_modules { + let path_str = path.to_string_lossy(); + let file_uri = Url::from_file_path(path_str.as_ref()) + .map(|u| u.to_string()) + .unwrap_or_default(); + let mod_name = format!("{}", module.name.value); + index.add_module(module, &path_str); + mfmap.insert(mod_name, file_uri); } - let exports = crate::lsp::utils::resolve::ResolutionExports::new(&parsed_modules); + // Build source map from all sources (doesn't need parsing) + for (path, source) in &sources { + let file_uri = Url::from_file_path(path) + .map(|u| u.to_string()) + .unwrap_or_default(); + smap.insert(file_uri, source.clone()); + } + + let just_modules: Vec = build_parsed_modules.into_iter().map(|(_, m)| m).collect(); + let exports = crate::lsp::utils::resolve::ResolutionExports::new(&just_modules); + + // Save LSP snapshots to disk for next startup + if let Some(ref dir) = cache_dir { + let lsp_dir = dir.join("lsp"); + if let Err(e) = cache::save_registry_snapshot(&new_registry, &lsp_dir.join("registry.bin")) { + log::warn!("Failed to save registry snapshot: {e}"); + } + if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { + log::warn!("Failed to save def_index snapshot: {e}"); + } + if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { + log::warn!("Failed to save resolution_exports snapshot: {e}"); + } + if let Err(e) = cache::save_module_file_map(&mfmap, &lsp_dir.join("module_file_map.bin")) { + log::warn!("Failed to save module_file_map snapshot: {e}"); + } + } // Store the registry, index, source map and mark as ready rt.block_on(async { @@ -238,7 +350,24 @@ impl Backend { *mf = mfmap; let mut sm = source_map.write().await; *sm = smap; - ready.store(true, Ordering::SeqCst); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + + // Re-typecheck any files the user edited during loading + let edited_files: Vec<(String, String)> = { + let f = files.read().await; + f.iter() + .map(|(uri, fs)| (uri.clone(), fs.source.clone())) + .collect() + }; + drop(reg); + drop(idx); + drop(re); + drop(mf); + drop(sm); + + if !edited_files.is_empty() { + log::info!("Re-checking {} files edited during loading", edited_files.len()); + } // End progress client diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs index 3d7f703a..d2fdb008 100644 --- a/src/lsp/mod.rs +++ b/src/lsp/mod.rs @@ -2,7 +2,8 @@ mod handlers; pub mod utils; use std::collections::HashMap; -use std::sync::atomic::AtomicBool; +use std::path::PathBuf; +use std::sync::atomic::{AtomicU8, Ordering}; use std::sync::Arc; use tokio::sync::RwLock; @@ -21,6 +22,12 @@ pub(crate) struct FileState { pub module_name: Option, } +/// Load state for progressive LSP initialization. +/// 0 = Initializing (no data), 1 = CacheLoaded (from disk, may be stale), 2 = Ready (authoritative) +pub(crate) const LOAD_STATE_INITIALIZING: u8 = 0; +pub(crate) const LOAD_STATE_CACHE_LOADED: u8 = 1; +pub(crate) const LOAD_STATE_READY: u8 = 2; + pub struct Backend { pub(crate) client: Client, pub(crate) files: Arc>>, @@ -33,7 +40,8 @@ pub struct Backend { pub(crate) source_map: Arc>>, pub(crate) module_cache: Arc>, pub(crate) sources_cmd: Option, - pub(crate) ready: Arc, + pub(crate) cache_dir: Option, + pub(crate) load_state: Arc, } #[tower_lsp::async_trait] @@ -137,7 +145,7 @@ impl Backend { Ok(serde_json::json!({ "success": true })) } - pub fn new(client: Client, sources_cmd: Option) -> Self { + pub fn new(client: Client, sources_cmd: Option, cache_dir: Option) -> Self { Backend { client, files: Arc::new(RwLock::new(HashMap::new())), @@ -148,12 +156,39 @@ impl Backend { source_map: Arc::new(RwLock::new(HashMap::new())), module_cache: Arc::new(RwLock::new(ModuleCache::default())), sources_cmd, - ready: Arc::new(AtomicBool::new(false)), + cache_dir, + load_state: Arc::new(AtomicU8::new(LOAD_STATE_INITIALIZING)), + } + } + + /// Check if the LSP has loaded enough state to serve requests. + pub(crate) fn is_ready(&self) -> bool { + self.load_state.load(Ordering::SeqCst) >= LOAD_STATE_CACHE_LOADED + } + + /// Get source for a file URI, with lazy loading from disk. + /// Tries source_map first, falls back to reading the file. + pub(crate) async fn get_source_for_uri(&self, uri: &str) -> Option { + // Check source_map first + { + let sm = self.source_map.read().await; + if let Some(source) = sm.get(uri) { + return Some(source.clone()); + } + } + // Lazy load from disk + let file_path = Url::parse(uri).ok()?.to_file_path().ok()?; + let source = std::fs::read_to_string(&file_path).ok()?; + // Cache it for next time + { + let mut sm = self.source_map.write().await; + sm.insert(uri.to_string(), source.clone()); } + Some(source) } } -pub fn run_server(sources_cmd: Option) { +pub fn run_server(sources_cmd: Option, cache_dir: Option) { let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .thread_stack_size(16 * 1024 * 1024) // 16 MB — typechecker needs deep recursion @@ -163,7 +198,7 @@ pub fn run_server(sources_cmd: Option) { let stdin = tokio::io::stdin(); let stdout = tokio::io::stdout(); - let (service, socket) = LspService::build(|client| Backend::new(client, sources_cmd)) + let (service, socket) = LspService::build(|client| Backend::new(client, sources_cmd, cache_dir)) .custom_method("pfc/rebuildModule", Backend::rebuild_module) .custom_method("pfc/rebuildProject", Backend::rebuild_project) .finish(); diff --git a/src/lsp/utils/find_definition.rs b/src/lsp/utils/find_definition.rs index 9c067ec1..7f2879ac 100644 --- a/src/lsp/utils/find_definition.rs +++ b/src/lsp/utils/find_definition.rs @@ -1,7 +1,11 @@ use std::collections::HashMap; +use std::io; +use std::path::Path; + +use serde::{Deserialize, Serialize}; use crate::cst::*; -use crate::interner::Symbol; +use crate::interner::{self, Symbol}; use crate::span::Span; /// What kind of reference we found at the cursor @@ -263,6 +267,80 @@ impl DefinitionIndex { }), } } + + // ===== Disk Serialization ===== + + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let snapshot = PortableDefIndex { + values: serialize_map(&self.values), + types: serialize_map(&self.types), + constructors: serialize_map(&self.constructors), + }; + let encoded = bincode::serialize(&snapshot) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, compressed) + } + + pub fn load_from_disk(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 128 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let snapshot: PortableDefIndex = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + Ok(DefinitionIndex { + values: deserialize_map(&snapshot.values), + types: deserialize_map(&snapshot.types), + constructors: deserialize_map(&snapshot.constructors), + }) + } +} + +#[derive(Serialize, Deserialize)] +struct PortableDefEntry { + module_name: String, + symbol_name: String, + file_path: String, + span_start: usize, + span_end: usize, +} + +#[derive(Serialize, Deserialize)] +struct PortableDefIndex { + values: Vec, + types: Vec, + constructors: Vec, +} + +fn serialize_map(map: &HashMap<(String, Symbol), DefLocation>) -> Vec { + map.iter() + .map(|((module_name, sym), loc)| PortableDefEntry { + module_name: module_name.clone(), + symbol_name: interner::resolve(*sym).unwrap_or_default(), + file_path: loc.file_path.clone(), + span_start: loc.span.start, + span_end: loc.span.end, + }) + .collect() +} + +fn deserialize_map(entries: &[PortableDefEntry]) -> HashMap<(String, Symbol), DefLocation> { + entries + .iter() + .map(|e| { + ( + (e.module_name.clone(), interner::intern(&e.symbol_name)), + DefLocation { + file_path: e.file_path.clone(), + span: Span::new(e.span_start, e.span_end), + }, + ) + }) + .collect() } /// Maps imported names to their source modules. diff --git a/src/lsp/utils/resolve.rs b/src/lsp/utils/resolve.rs index 50538805..188cd7c0 100644 --- a/src/lsp/utils/resolve.rs +++ b/src/lsp/utils/resolve.rs @@ -1,5 +1,9 @@ use std::collections::{HashMap, HashSet}; +use std::io; +use std::path::Path; + +use serde::{Deserialize, Serialize}; use crate::span::Span; use crate::cst::{ @@ -119,6 +123,75 @@ impl ResolutionExports { fn get(&self, module: Symbol) -> Option<&ModuleResolvedNames> { self.modules.get(&module) } + + // ===== Disk Serialization ===== + + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let snapshot: HashMap = self + .modules + .iter() + .map(|(sym, names)| { + let key = interner::resolve(*sym).unwrap_or_default(); + (key, PModuleResolvedNames::from_names(names)) + }) + .collect(); + let encoded = bincode::serialize(&snapshot) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, compressed) + } + + pub fn load_from_disk(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 128 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let snapshot: HashMap = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let modules = snapshot + .into_iter() + .map(|(key, pnames)| (interner::intern(&key), pnames.to_names())) + .collect(); + Ok(ResolutionExports { modules }) + } +} + +#[derive(Serialize, Deserialize)] +struct PModuleResolvedNames { + values: Vec, + types: Vec, + classes: Vec, + type_operators: Vec, + data_constructors: HashMap>, +} + +impl PModuleResolvedNames { + fn from_names(names: &ModuleResolvedNames) -> Self { + PModuleResolvedNames { + values: names.values.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + types: names.types.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + classes: names.classes.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + type_operators: names.type_operators.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + data_constructors: names.data_constructors.iter().map(|(k, v)| { + (interner::resolve(*k).unwrap_or_default(), v.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect()) + }).collect(), + } + } + + fn to_names(&self) -> ModuleResolvedNames { + ModuleResolvedNames { + values: self.values.iter().map(|s| interner::intern(s)).collect(), + types: self.types.iter().map(|s| interner::intern(s)).collect(), + classes: self.classes.iter().map(|s| interner::intern(s)).collect(), + type_operators: self.type_operators.iter().map(|s| interner::intern(s)).collect(), + data_constructors: self.data_constructors.iter().map(|(k, v)| { + (interner::intern(k), v.iter().map(|s| interner::intern(s)).collect()) + }).collect(), + } + } } /// Result of name resolution for a module. diff --git a/src/main.rs b/src/main.rs index 9999ad04..88dac18d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,10 @@ enum Commands { /// Shell command that outputs source file paths (one per line) #[arg(long)] sources_cmd: Option, + + /// Directory for disk cache (enables fast warm startup) + #[arg(long)] + cache_dir: Option, }, } @@ -49,8 +53,13 @@ fn main() { .init(); match cli.command { - Commands::Lsp { sources_cmd } => { - purescript_fast_compiler::lsp::run_server(sources_cmd); + Commands::Lsp { sources_cmd, cache_dir } => { + // Default to the same cache dir as CLI compile (output/.pfc-cache) + let cache_dir = cache_dir.or_else(|| { + let default = PathBuf::from("output/.pfc-cache"); + if default.exists() { Some(default) } else { None } + }); + purescript_fast_compiler::lsp::run_server(sources_cmd, cache_dir); } Commands::Compile { globs, output } => { log::debug!("Starting compile with globs: {:?}", globs); diff --git a/tests/build.rs b/tests/build.rs index 072de60b..9c8f8565 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -595,6 +595,7 @@ fn build_fixture_original_compiler_failing() { // Run in a separate thread with a large stack to avoid stack overflows // from deeply recursive fixtures, and catch panics. let handle = std::thread::Builder::new() + .name("pfc-test-build".to_string()) .stack_size(64 * 1024 * 1024) // 64 MB stack .spawn(move || { let test_sources: Vec<(&str, &str)> = owned_sources @@ -1241,7 +1242,7 @@ fn incremental_build_caches_modules() { let mut cache = ModuleCache::new(); // First build: everything should be typechecked - let (result1, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + let (result1, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); assert!(result1.build_errors.is_empty(), "First build should succeed"); assert_eq!(result1.modules.len(), 2); for m in &result1.modules { @@ -1253,7 +1254,7 @@ fn incremental_build_caches_modules() { assert!(cache.get_exports("ModB").is_some(), "ModB should be cached"); // Second build with same sources: should use cache (no rebuild needed) - let (result2, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + let (result2, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); assert!(result2.build_errors.is_empty(), "Second build should succeed"); assert_eq!(result2.modules.len(), 2); for m in &result2.modules { @@ -1272,7 +1273,7 @@ fn incremental_build_rebuilds_changed_module() { let mut cache = ModuleCache::new(); // First build - let (result1, _) = build_from_sources_incremental(&sources_v1, &None, None, &options, &mut cache); + let (result1, _, _) = build_from_sources_incremental(&sources_v1, &None, None, &options, &mut cache); assert!(result1.build_errors.is_empty()); // Change ModA's source @@ -1282,7 +1283,7 @@ fn incremental_build_rebuilds_changed_module() { ]; // Second build: ModA changed, ModB depends on it, both should rebuild - let (result2, _) = build_from_sources_incremental(&sources_v2, &None, None, &options, &mut cache); + let (result2, _, _) = build_from_sources_incremental(&sources_v2, &None, None, &options, &mut cache); assert!(result2.build_errors.is_empty(), "Rebuild should succeed"); assert_eq!(result2.modules.len(), 2); for m in &result2.modules { @@ -1300,7 +1301,7 @@ fn incremental_build_disk_roundtrip() { let mut cache = ModuleCache::new(); // Build to populate cache - let (result, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + let (result, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); assert!(result.build_errors.is_empty()); // Save to disk @@ -1313,7 +1314,7 @@ fn incremental_build_disk_roundtrip() { assert!(loaded_cache.get_exports("ModA").is_some(), "Loaded cache should have ModA"); // Build with loaded cache — should use cached entries - let (result2, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut loaded_cache); + let (result2, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut loaded_cache); assert!(result2.build_errors.is_empty(), "Build with loaded cache should succeed"); // Cleanup diff --git a/tests/lsp_e2e.rs b/tests/lsp_e2e.rs index d35fecef..81ad60c8 100644 --- a/tests/lsp_e2e.rs +++ b/tests/lsp_e2e.rs @@ -51,7 +51,7 @@ impl TestServer { let (req_client, req_server) = tokio::io::duplex(1024 * 64); let (resp_server, resp_client) = tokio::io::duplex(1024 * 64); - let (service, socket) = LspService::new(|client| Backend::new(client, sources_cmd)); + let (service, socket) = LspService::new(|client| Backend::new(client, sources_cmd, None)); tokio::spawn(Server::new(req_server, resp_server, socket).serve(service)); let writer = std::sync::Arc::new(Mutex::new(req_client)); @@ -198,7 +198,7 @@ async fn test_lsp_initialize_capabilities() { let (req_client, req_server) = tokio::io::duplex(1024 * 64); let (resp_server, resp_client) = tokio::io::duplex(1024 * 64); - let (service, socket) = LspService::new(|client| Backend::new(client, None)); + let (service, socket) = LspService::new(|client| Backend::new(client, None, None)); tokio::spawn(Server::new(req_server, resp_server, socket).serve(service)); let mut writer = req_client; From dd8dbf1c58a532d94150a26229e5ec8c99c1ab9f Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Wed, 11 Mar 2026 10:51:38 +0100 Subject: [PATCH 12/14] lsp is fast on OA project --- .gitignore | 3 +- src/build/cache.rs | 5 + src/js_ffi.rs | 2 +- src/lsp/handlers/completion.rs | 70 ++-- src/lsp/handlers/diagnostics.rs | 102 ++--- src/lsp/handlers/load_sources.rs | 643 +++++++++++++++++++++++++------ src/lsp/mod.rs | 152 +++++++- 7 files changed, 745 insertions(+), 232 deletions(-) diff --git a/.gitignore b/.gitignore index 5ba698fe..590097a2 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ Thumbs.db # lsp vscode client - /editors/code/node_modules /editors/code/out +/editors/code/.vscode-test +/editors/code/package-lock.json diff --git a/src/build/cache.rs b/src/build/cache.rs index 215f0cfb..b3136ed9 100644 --- a/src/build/cache.rs +++ b/src/build/cache.rs @@ -123,6 +123,11 @@ impl ModuleCache { Self::default() } + /// Returns true if the cache has any module entries (i.e. a prior build populated it). + pub fn has_entries(&self) -> bool { + !self.entries.is_empty() + } + /// Compute a content hash for a source string. pub fn content_hash(source: &str) -> u64 { let mut hasher = std::collections::hash_map::DefaultHasher::new(); diff --git a/src/js_ffi.rs b/src/js_ffi.rs index 3a2d273a..bd7cc526 100644 --- a/src/js_ffi.rs +++ b/src/js_ffi.rs @@ -326,7 +326,7 @@ pub fn validate_foreign_module( .cloned() .collect(); - let unused: Vec = info + let _unused: Vec = info .es_exports .iter() .filter(|name| !import_set.contains(name.as_str())) diff --git a/src/lsp/handlers/completion.rs b/src/lsp/handlers/completion.rs index 5bd1d17d..e575a6ab 100644 --- a/src/lsp/handlers/completion.rs +++ b/src/lsp/handlers/completion.rs @@ -53,7 +53,7 @@ impl Backend { // Find insert position for new imports (after last import, or after module header) let import_insert_line = find_import_insert_line(&source, &module); - let registry = self.registry.read().await; + let comp_index = self.completion_index.read().await; let mut items = Vec::new(); let mut seen = HashSet::new(); @@ -85,43 +85,44 @@ impl Backend { } // 2. Already-imported names (higher priority than unimported) - // 3. All exported values from all modules in the registry - for (mod_path, mod_exports) in registry.iter_all() { - let mod_name = interner::resolve_module_name(mod_path); - if mod_name == current_module_name { + // 3. All exported names from all modules via lightweight completion index + for (mod_name, mod_entries) in &comp_index.entries { + if mod_name == ¤t_module_name { continue; } - for (qi, scheme) in &mod_exports.values { - let name = match interner::resolve(qi.name) { - Some(n) => n.to_string(), - None => continue, - }; - if !name.starts_with(&prefix) { + for entry in mod_entries { + if !entry.name.starts_with(&prefix) { continue; } - if seen.contains(&name) { + if seen.contains(&entry.name) { continue; } - seen.insert(name.clone()); + seen.insert(entry.name.clone()); - let type_str = format!("{}", scheme.ty); - let is_imported = already_imported.contains(&name); - let is_constructor = name.starts_with(|c: char| c.is_uppercase()); + let is_imported = already_imported.contains(&entry.name); + let is_constructor = matches!(entry.kind, crate::lsp::CompletionEntryKind::Constructor); + + let kind = match entry.kind { + crate::lsp::CompletionEntryKind::Value => CompletionItemKind::FUNCTION, + crate::lsp::CompletionEntryKind::Constructor => CompletionItemKind::CONSTRUCTOR, + crate::lsp::CompletionEntryKind::Type => CompletionItemKind::CLASS, + crate::lsp::CompletionEntryKind::Class => CompletionItemKind::INTERFACE, + }; - let kind = if is_constructor { - CompletionItemKind::CONSTRUCTOR + let detail = if entry.type_string.is_empty() { + Some(mod_name.clone()) } else { - CompletionItemKind::FUNCTION + Some(format!("{mod_name} :: {}", entry.type_string)) }; // Imported items sort before unimported let sort_prefix = if is_imported { "1" } else { "2" }; let mut item = CompletionItem { - label: name.clone(), + label: entry.name.clone(), kind: Some(kind), - detail: Some(format!("{mod_name} :: {type_str}")), + detail, sort_text: Some(format!("{sort_prefix}{}", items.len())), ..Default::default() }; @@ -129,8 +130,8 @@ impl Backend { // Auto-import: add additional_text_edits if not already imported if !is_imported { if let Some(edit) = build_import_edit( - &mod_name, - &name, + mod_name, + &entry.name, is_constructor, &module, &source, @@ -142,29 +143,8 @@ impl Backend { items.push(item); } - - // Also add type constructors - for (type_qi, ctor_names) in &mod_exports.data_constructors { - for ctor_qi in ctor_names { - let ctor_name = match interner::resolve(ctor_qi.name) { - Some(n) => n.to_string(), - None => continue, - }; - if !ctor_name.starts_with(&prefix) { - continue; - } - if seen.contains(&ctor_name) { - continue; - } - // Only add if the constructor has a value entry (it's exported) - if !mod_exports.values.contains_key(ctor_qi) { - continue; - } - // Already handled in the values loop above - } - let _ = type_qi; - } } + drop(comp_index); Ok(Some(CompletionResponse::List(CompletionList { is_incomplete: items.len() > 100, diff --git a/src/lsp/handlers/diagnostics.rs b/src/lsp/handlers/diagnostics.rs index a564c83f..e0aa93ed 100644 --- a/src/lsp/handlers/diagnostics.rs +++ b/src/lsp/handlers/diagnostics.rs @@ -2,8 +2,10 @@ use std::fmt::Display; use tower_lsp::lsp_types::*; +use crate::cst::Module; use crate::interner; use crate::build::cache::ModuleCache; +use crate::typechecker::registry::ModuleRegistry; use super::super::{Backend, FileState}; @@ -14,7 +16,34 @@ impl Backend { .await; } + /// Ensure all modules imported by `module` have their exports loaded into the registry. + /// Loads missing exports lazily from the ModuleCache (which reads from disk on demand). + async fn ensure_imports_loaded(&self, module: &Module, registry: &mut ModuleRegistry) { + for import_decl in &module.imports { + let import_parts = &import_decl.module.parts; + + // Skip if already in registry + if registry.lookup(import_parts).is_some() { + continue; + } + + let import_name = interner::resolve_module_name(import_parts); + + // Try to load from module cache (lazy disk load) + let exports = { + let mut cache = self.module_cache.write().await; + cache.get_exports(&import_name).cloned() + }; + + if let Some(exports) = exports { + registry.register(import_parts, exports); + log::debug!("Lazy-loaded exports for {import_name}"); + } + } + } + pub(crate) async fn on_change(&self, uri: Url, source: String) { + let on_change_start = std::time::Instant::now(); { let mut files = self.files.write().await; files.insert( @@ -31,6 +60,7 @@ impl Backend { return; } + let t = std::time::Instant::now(); let module = match crate::parser::parse(&source) { Ok(module) => { let module_name = format!("{}", module.name.value); @@ -58,13 +88,21 @@ impl Backend { return; } }; + self.info(format!("[on_change] parse: {:.2?}", t.elapsed())).await; let module_name = interner::resolve_module_name(&module.name.value.parts); let module_parts: Vec = module.name.value.parts.clone(); - // Type-check against the registry (use stacker to extend stack for deep recursion) + // Ensure imported modules' exports are in the registry (lazy load from cache) + let t = std::time::Instant::now(); let mut registry = self.registry.write().await; + self.ensure_imports_loaded(&module, &mut registry).await; + self.info(format!("[on_change] ensure_imports_loaded: {:.2?}", t.elapsed())).await; + + // Type-check against the registry + let t = std::time::Instant::now(); let check_result = crate::typechecker::check_module_with_registry(&module, ®istry); + self.info(format!("[on_change] typecheck {module_name}: {:.2?}", t.elapsed())).await; // Update registry with new exports registry.register(&module_parts, check_result.exports.clone()); @@ -76,10 +114,6 @@ impl Backend { .collect(); let mut cache = self.module_cache.write().await; cache.update(module_name.clone(), source_hash, check_result.exports, import_names); - cache.build_reverse_deps(); - - // Find transitive dependents that need re-checking - let dependents = cache.transitive_dependents(&module_name); drop(cache); // Publish diagnostics for the changed module @@ -88,63 +122,7 @@ impl Backend { .publish_diagnostics(uri, diagnostics, None) .await; - // Update source map - { - let mfmap = self.module_file_map.read().await; - let mut smap = self.source_map.write().await; - // Update the changed module's source in source_map - if let Some(file_uri) = mfmap.get(&module_name) { - smap.insert(file_uri.clone(), source); - } - } - - // Cascade: re-typecheck dependents - if !dependents.is_empty() { - log::debug!("Cascade rebuild: {} dependents of {}", dependents.len(), module_name); - - let mfmap = self.module_file_map.read().await; - let smap = self.source_map.read().await; - - for dep_name in &dependents { - let dep_uri_str = match mfmap.get(dep_name) { - Some(u) => u.clone(), - None => continue, - }; - let dep_source = match smap.get(&dep_uri_str) { - Some(s) => s.clone(), - None => continue, - }; - let dep_uri = match Url::parse(&dep_uri_str) { - Ok(u) => u, - Err(_) => continue, - }; - - let dep_module = match crate::parser::parse(&dep_source) { - Ok(m) => m, - Err(_) => continue, - }; - - let dep_result = crate::typechecker::check_module_with_registry(&dep_module, ®istry); - - // Update registry with dependent's exports - let dep_parts: Vec = dep_module.name.value.parts.clone(); - registry.register(&dep_parts, dep_result.exports.clone()); - - // Update cache for dependent - let dep_hash = ModuleCache::content_hash(&dep_source); - let dep_imports: Vec = dep_module.imports.iter() - .map(|imp| interner::resolve_module_name(&imp.module.parts)) - .collect(); - let mut cache = self.module_cache.write().await; - cache.update(dep_name.clone(), dep_hash, dep_result.exports, dep_imports); - drop(cache); - - let dep_diagnostics = type_errors_to_diagnostics(&dep_result.errors, &dep_source); - self.client - .publish_diagnostics(dep_uri, dep_diagnostics, None) - .await; - } - } + self.info(format!("[on_change] total: {:.2?}", on_change_start.elapsed())).await; } } diff --git a/src/lsp/handlers/load_sources.rs b/src/lsp/handlers/load_sources.rs index 9c4e1a73..337d6ba9 100644 --- a/src/lsp/handlers/load_sources.rs +++ b/src/lsp/handlers/load_sources.rs @@ -6,12 +6,17 @@ use tower_lsp::lsp_types::*; use crate::build::cache; use crate::build::BuildOptions; +use crate::cst::{self, Decl}; +use crate::interner; use crate::lsp::utils::find_definition::DefinitionIndex; +use crate::lsp::{CompletionEntry, CompletionEntryKind, CompletionIndex}; use super::super::{Backend, LOAD_STATE_CACHE_LOADED, LOAD_STATE_READY}; impl Backend { pub(crate) async fn load_sources(&self) { + let total_start = std::time::Instant::now(); + let cmd = match &self.sources_cmd { Some(cmd) => cmd.clone(), None => { @@ -20,38 +25,41 @@ impl Backend { } }; - // Phase A: Try to restore from disk cache (fast path, ~50-100ms) + // Phase A: Try to restore from disk cache (fast path) + let mut all_snapshots_loaded = false; if let Some(ref cache_dir) = self.cache_dir { let lsp_dir = cache_dir.join("lsp"); - let start = std::time::Instant::now(); - - let registry_path = lsp_dir.join("registry.bin"); - let def_index_path = lsp_dir.join("def_index.bin"); - let resolution_exports_path = lsp_dir.join("resolution_exports.bin"); - let module_file_map_path = lsp_dir.join("module_file_map.bin"); - - // Load all snapshots (attempt all, succeed if all present) - let reg_result = cache::load_registry_snapshot(®istry_path); - let idx_result = DefinitionIndex::load_from_disk(&def_index_path); - let re_result = crate::lsp::utils::resolve::ResolutionExports::load_from_disk(&resolution_exports_path); - let mfmap_result = cache::load_module_file_map(&module_file_map_path); + let phase_a_start = std::time::Instant::now(); + + let t = std::time::Instant::now(); + let idx_result = DefinitionIndex::load_from_disk(&lsp_dir.join("def_index.bin")); + self.info(format!("[timing] load def_index: {:.2?} ({})", t.elapsed(), if idx_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let re_result = crate::lsp::utils::resolve::ResolutionExports::load_from_disk( + &lsp_dir.join("resolution_exports.bin"), + ); + self.info(format!("[timing] load resolution_exports: {:.2?} ({})", t.elapsed(), if re_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let mfmap_result = cache::load_module_file_map(&lsp_dir.join("module_file_map.bin")); + self.info(format!("[timing] load module_file_map: {:.2?} ({})", t.elapsed(), if mfmap_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let comp_result = CompletionIndex::load_from_disk(&lsp_dir.join("completion_index.bin")); + self.info(format!("[timing] load completion_index: {:.2?} ({})", t.elapsed(), if comp_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); let cache_result = cache::ModuleCache::load_from_disk(cache_dir); + self.info(format!("[timing] load module_cache: {:.2?} ({})", t.elapsed(), if cache_result.is_ok() { "ok" } else { "miss" })).await; // Always load the module cache if available (shared with CLI builds) if let Ok(c) = cache_result { - log::info!("Loaded module cache from disk in {:.2?}", start.elapsed()); let mut mc = self.module_cache.write().await; *mc = c; } - if let (Ok(reg), Ok(idx), Ok(re), Ok(mfmap)) = (reg_result, idx_result, re_result, mfmap_result) { - log::info!("Restored LSP state from cache in {:.2?}", start.elapsed()); - - // Store cached state - { - let mut r = self.registry.write().await; - *r = reg; - } + if let (Ok(idx), Ok(re), Ok(mfmap), Ok(comp)) = (idx_result, re_result, mfmap_result, comp_result) { { let mut i = self.def_index.write().await; *i = idx; @@ -64,21 +72,48 @@ impl Backend { let mut m = self.module_file_map.write().await; *m = mfmap; } + { + let mut ci = self.completion_index.write().await; + *ci = comp; + } - // Mark as cache-loaded — handlers can now serve requests - self.load_state.store(LOAD_STATE_CACHE_LOADED, Ordering::SeqCst); - self.info(format!("Cache loaded in {:.2?}, starting background verification", start.elapsed())).await; + self.load_state + .store(LOAD_STATE_CACHE_LOADED, Ordering::SeqCst); + all_snapshots_loaded = true; + self.info(format!("[timing] Phase A complete (all snapshots loaded): {:.2?}", phase_a_start.elapsed())).await; + self.info(format!( + "Cache loaded in {:.2?}", + phase_a_start.elapsed() + )) + .await; } else { - log::info!("No complete LSP snapshots found, doing full build (module cache may still help)"); + self.info(format!("[timing] Phase A incomplete (missing snapshots): {:.2?}", phase_a_start.elapsed())).await; } } - // Phase B: Full build in background (updates state when done) - self.spawn_full_build(cmd).await; + // If all snapshots loaded from disk, we're done — no need for Phase B + if all_snapshots_loaded { + self.load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + self.info(format!("[timing] Ready from cache in {:.2?} total", total_start.elapsed())).await; + return; + } + + // Phase B: Need to build indexes from source files + let has_cache = { + let mc = self.module_cache.read().await; + mc.has_entries() + }; + + if has_cache { + self.info("Module cache found but LSP snapshots missing — rebuilding indexes").await; + self.spawn_index_build(cmd).await; + } else { + self.info("No module cache found — doing full build (cold start)").await; + self.spawn_full_build(cmd).await; + } } - async fn spawn_full_build(&self, cmd: String) { - // Create a progress token for the loading spinner + async fn spawn_index_build(&self, cmd: String) { let token = NumberOrString::String("pfc-loading".to_string()); let _ = self .client @@ -87,18 +122,259 @@ impl Backend { }) .await; - let already_cached = self.load_state.load(Ordering::SeqCst) >= LOAD_STATE_CACHE_LOADED; + self.client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( + WorkDoneProgressBegin { + title: "Indexing PureScript sources".to_string(), + message: Some(format!("Running: {cmd}")), + cancellable: Some(false), + percentage: None, + }, + )), + }) + .await; + + let client = self.client.clone(); + let def_index = self.def_index.clone(); + let resolution_exports = self.resolution_exports.clone(); + let module_file_map = self.module_file_map.clone(); + let module_cache = self.module_cache.clone(); + let completion_index = self.completion_index.clone(); + let load_state = self.load_state.clone(); + let cache_dir = self.cache_dir.clone(); + let progress_token = token.clone(); + + let rt_handle = tokio::runtime::Handle::current(); + std::thread::Builder::new() + .name("pfc-load-sources".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(move || { + let _guard = rt_handle.enter(); + let build_start = std::time::Instant::now(); + + let log_client = client.clone(); + let info = move |msg: String| { + let rt = tokio::runtime::Handle::current(); + rt.block_on(log_client.log_message(MessageType::INFO, msg)); + }; + + // Run the shell command to get source globs + let t = std::time::Instant::now(); + let output = match std::process::Command::new("sh") + .arg("-c") + .arg(&cmd) + .output() + { + Ok(output) => output, + Err(e) => { + info(format!("Failed to run sources command: {e}")); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + return; + } + }; + info(format!("[timing] sources command: {:.2?}", t.elapsed())); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + info(format!("Sources command failed: {stderr}")); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + return; + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let globs: Vec = stdout + .lines() + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect(); + + let rt = tokio::runtime::Handle::current(); + + // Resolve globs to file paths + let t = std::time::Instant::now(); + let mut file_paths: Vec = Vec::new(); + for pattern in &globs { + match glob::glob(pattern) { + Ok(entries) => { + for entry in entries.flatten() { + if entry.extension().map_or(false, |ext| ext == "purs") { + file_paths.push(entry); + } + } + } + Err(e) => info(format!("Invalid glob pattern {pattern}: {e}")), + } + } + info(format!("[timing] glob resolution: {:.2?} ({} files)", t.elapsed(), file_paths.len())); + + // Read all files in parallel + let t = std::time::Instant::now(); + let sources: Vec<(String, String)> = file_paths + .par_iter() + .filter_map(|entry| match std::fs::read_to_string(entry) { + Ok(source) => { + let abs_path = entry.canonicalize().unwrap_or_else(|_| entry.clone()); + Some((abs_path.to_string_lossy().into_owned(), source)) + } + Err(_) => None, + }) + .collect(); + info(format!("[timing] read files: {:.2?} ({} files)", t.elapsed(), sources.len())); + + // Report progress + rt.block_on(async { + client + .send_notification::(ProgressParams { + token: progress_token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( + WorkDoneProgressReport { + message: Some(format!( + "Parsing {} source files...", + sources.len() + )), + cancellable: Some(false), + percentage: None, + }, + )), + }) + .await; + }); + + // Parse all files in parallel + let t = std::time::Instant::now(); + let parse_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-lsp-parse-{i}")) + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build parse thread pool"); + + let parsed: Vec<_> = parse_pool.install(|| { + sources + .par_iter() + .filter_map(|(path, source)| { + crate::parser::parse(source) + .ok() + .map(|module| (path.clone(), source.clone(), module)) + }) + .collect() + }); + info(format!("[timing] parse files: {:.2?} ({} modules)", t.elapsed(), parsed.len())); + + let module_count = parsed.len(); + + // Build definition index, module_file_map, and completion index + let t = std::time::Instant::now(); + let mut index = DefinitionIndex::new(); + let mut mfmap = HashMap::new(); + let mut comp_index = CompletionIndex::default(); + + for (path, source, module) in &parsed { + let file_uri = Url::from_file_path(path) + .map(|u| u.to_string()) + .unwrap_or_default(); + let mod_name = format!("{}", module.name.value); + index.add_module(module, path); + mfmap.insert(mod_name.clone(), file_uri); + + let entries = extract_completion_entries(module, source); + if !entries.is_empty() { + comp_index.entries.insert(mod_name, entries); + } + } + info(format!("[timing] build indexes: {:.2?}", t.elapsed())); + + // Build resolution exports + let t = std::time::Instant::now(); + let just_modules: Vec = + parsed.iter().map(|(_, _, m)| m.clone()).collect(); + let exports = crate::lsp::utils::resolve::ResolutionExports::new(&just_modules); + info(format!("[timing] build resolution_exports: {:.2?}", t.elapsed())); + + // Register paths in module cache + let t = std::time::Instant::now(); + { + let mut mcache = rt.block_on(async { module_cache.write().await }); + for (path, _source, module) in &parsed { + let mod_name = format!("{}", module.name.value); + mcache.register_path(path.clone(), mod_name); + } + mcache.build_reverse_deps(); + + if let Some(ref dir) = cache_dir { + if let Err(e) = mcache.save_to_disk(dir) { + info(format!("Failed to save module cache: {e}")); + } + } + } + info(format!("[timing] update module cache: {:.2?}", t.elapsed())); + + // Save LSP snapshots to disk for next startup + let t = std::time::Instant::now(); + if let Some(ref dir) = cache_dir { + let lsp_dir = dir.join("lsp"); + if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { + info(format!("Failed to save def_index snapshot: {e}")); + } + if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { + info(format!("Failed to save resolution_exports snapshot: {e}")); + } + if let Err(e) = + cache::save_module_file_map(&mfmap, &lsp_dir.join("module_file_map.bin")) + { + info(format!("Failed to save module_file_map snapshot: {e}")); + } + if let Err(e) = comp_index.save_to_disk(&lsp_dir.join("completion_index.bin")) { + info(format!("Failed to save completion_index snapshot: {e}")); + } + } + info(format!("[timing] save snapshots: {:.2?}", t.elapsed())); + + // Store indexes and mark as ready + rt.block_on(async { + let mut idx = def_index.write().await; + *idx = index; + let mut re = resolution_exports.write().await; + *re = exports; + let mut mf = module_file_map.write().await; + *mf = mfmap; + let mut ci = completion_index.write().await; + *ci = comp_index; + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + + client + .send_notification::(ProgressParams { + token: progress_token, + value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( + WorkDoneProgressEnd { + message: Some(format!("Indexed {module_count} modules")), + }, + )), + }) + .await; + }); + info(format!("[timing] Phase B (index build) total: {:.2?}", build_start.elapsed())); + }) + .expect("failed to spawn load-sources thread"); + } + + /// Cold-start path: full build with typechecking when no prior cache exists. + async fn spawn_full_build(&self, cmd: String) { + let token = NumberOrString::String("pfc-loading".to_string()); + let _ = self + .client + .send_request::(WorkDoneProgressCreateParams { + token: token.clone(), + }) + .await; self.client .send_notification::(ProgressParams { token: token.clone(), value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( WorkDoneProgressBegin { - title: if already_cached { - "Verifying PureScript sources".to_string() - } else { - "Loading PureScript sources".to_string() - }, + title: "Loading PureScript sources".to_string(), message: Some(format!("Running: {cmd}")), cancellable: Some(false), percentage: None, @@ -112,20 +388,27 @@ impl Backend { let def_index = self.def_index.clone(); let resolution_exports = self.resolution_exports.clone(); let module_file_map = self.module_file_map.clone(); - let source_map = self.source_map.clone(); let module_cache = self.module_cache.clone(); + let completion_index = self.completion_index.clone(); let load_state = self.load_state.clone(); let cache_dir = self.cache_dir.clone(); - let files = self.files.clone(); let progress_token = token.clone(); let rt_handle = tokio::runtime::Handle::current(); std::thread::Builder::new() .name("pfc-load-sources".to_string()) - .stack_size(16 * 1024 * 1024) // 16 MB — typechecker needs deep recursion + .stack_size(16 * 1024 * 1024) .spawn(move || { let _guard = rt_handle.enter(); - // Run the shell command to get source globs + let build_start = std::time::Instant::now(); + + let log_client = client.clone(); + let info = move |msg: String| { + let rt = tokio::runtime::Handle::current(); + rt.block_on(log_client.log_message(MessageType::INFO, msg)); + }; + + let t = std::time::Instant::now(); let output = match std::process::Command::new("sh") .arg("-c") .arg(&cmd) @@ -133,15 +416,16 @@ impl Backend { { Ok(output) => output, Err(e) => { - log::error!("Failed to run sources command: {e}"); + info(format!("Failed to run sources command: {e}")); load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } }; + info(format!("[timing] sources command: {:.2?}", t.elapsed())); if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - log::error!("Sources command failed: {stderr}"); + info(format!("Sources command failed: {stderr}")); load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } @@ -155,26 +439,8 @@ impl Backend { let rt = tokio::runtime::Handle::current(); - // Report progress: resolving globs - rt.block_on(async { - client - .send_notification::(ProgressParams { - token: progress_token.clone(), - value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( - WorkDoneProgressReport { - message: Some(format!( - "Resolving {} glob patterns...", - globs.len() - )), - cancellable: Some(false), - percentage: None, - }, - )), - }) - .await; - }); - - // Resolve globs to file paths (collect paths first, then read in parallel) + // Resolve globs to file paths + let t = std::time::Instant::now(); let mut file_paths: Vec = Vec::new(); for pattern in &globs { match glob::glob(pattern) { @@ -185,11 +451,13 @@ impl Backend { } } } - Err(e) => log::warn!("Invalid glob pattern {pattern}: {e}"), + Err(e) => info(format!("Invalid glob pattern {pattern}: {e}")), } } + info(format!("[timing] glob resolution: {:.2?} ({} files)", t.elapsed(), file_paths.len())); // Read all files in parallel + let t = std::time::Instant::now(); let sources: Vec<(String, String)> = file_paths .par_iter() .filter_map(|entry| { @@ -200,15 +468,13 @@ impl Backend { .unwrap_or_else(|_| entry.clone()); Some((abs_path.to_string_lossy().into_owned(), source)) } - Err(e) => { - log::warn!("Failed to read {}: {e}", entry.display()); - None - } + Err(_) => None, } }) .collect(); + info(format!("[timing] read files: {:.2?} ({} files)", t.elapsed(), sources.len())); - // Report progress: building + // Report progress rt.block_on(async { client .send_notification::(ProgressParams { @@ -227,7 +493,8 @@ impl Backend { .await; }); - // Build with no codegen to populate the registry + // Full build with typechecking + let t = std::time::Instant::now(); let source_refs: Vec<(&str, &str)> = sources .iter() .map(|(p, s)| (p.as_str(), s.as_str())) @@ -238,7 +505,6 @@ impl Backend { ..Default::default() }; - // Use incremental build with cache let mut mcache = rt.block_on(async { module_cache.write().await }); let (result, new_registry, mut build_parsed_modules) = crate::build::build_from_sources_incremental( &source_refs, @@ -248,14 +514,16 @@ impl Backend { &mut mcache, ); mcache.build_reverse_deps(); + info(format!("[timing] full build: {:.2?}", t.elapsed())); - // Save module cache to disk + let t = std::time::Instant::now(); if let Some(ref dir) = cache_dir { if let Err(e) = mcache.save_to_disk(dir) { - log::warn!("Failed to save module cache: {e}"); + info(format!("Failed to save module cache: {e}")); } } drop(mcache); + info(format!("[timing] save module cache: {:.2?}", t.elapsed())); let error_count: usize = result.modules.iter().map(|m| m.type_errors.len()).sum(); let module_count = result.modules.len(); @@ -265,13 +533,13 @@ impl Backend { .filter(|m| !m.type_errors.is_empty()) .count(); - // Collect paths of modules already parsed by the build pipeline + // Parse only cache-hit sources that weren't parsed by the build + let t = std::time::Instant::now(); let already_parsed: std::collections::HashSet = build_parsed_modules .iter() .map(|(p, _)| p.to_string_lossy().into_owned()) .collect(); - // Parse only cache-hit sources that weren't parsed by the build (in parallel) let cache_hit_sources: Vec<_> = sources .iter() .filter(|(path, _)| !already_parsed.contains(path.as_str())) @@ -292,13 +560,16 @@ impl Backend { }) .collect() }); + info(format!("[timing] extra parse (cache hits): {:.2?} ({} modules)", t.elapsed(), extra_parsed.len())); build_parsed_modules.extend(extra_parsed); - // Build definition index and source/module maps + // Build indexes + let t = std::time::Instant::now(); let mut index = DefinitionIndex::new(); - let mut smap = HashMap::with_capacity(sources.len()); let mut mfmap = HashMap::new(); + let mut comp_index = CompletionIndex::default(); + let sources_map: HashMap<&str, &str> = sources.iter().map(|(p, s)| (p.as_str(), s.as_str())).collect(); for (path, module) in &build_parsed_modules { let path_str = path.to_string_lossy(); @@ -307,38 +578,41 @@ impl Backend { .unwrap_or_default(); let mod_name = format!("{}", module.name.value); index.add_module(module, &path_str); - mfmap.insert(mod_name, file_uri); - } + mfmap.insert(mod_name.clone(), file_uri); - // Build source map from all sources (doesn't need parsing) - for (path, source) in &sources { - let file_uri = Url::from_file_path(path) - .map(|u| u.to_string()) - .unwrap_or_default(); - smap.insert(file_uri, source.clone()); + if let Some(source) = sources_map.get(path_str.as_ref()) { + let entries = extract_completion_entries(module, source); + if !entries.is_empty() { + comp_index.entries.insert(mod_name, entries); + } + } } + info(format!("[timing] build indexes: {:.2?}", t.elapsed())); - let just_modules: Vec = build_parsed_modules.into_iter().map(|(_, m)| m).collect(); + let t = std::time::Instant::now(); + let just_modules: Vec = build_parsed_modules.into_iter().map(|(_, m)| m).collect(); let exports = crate::lsp::utils::resolve::ResolutionExports::new(&just_modules); + info(format!("[timing] build resolution_exports: {:.2?}", t.elapsed())); - // Save LSP snapshots to disk for next startup + // Save LSP snapshots + let t = std::time::Instant::now(); if let Some(ref dir) = cache_dir { let lsp_dir = dir.join("lsp"); - if let Err(e) = cache::save_registry_snapshot(&new_registry, &lsp_dir.join("registry.bin")) { - log::warn!("Failed to save registry snapshot: {e}"); - } if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { - log::warn!("Failed to save def_index snapshot: {e}"); + info(format!("Failed to save def_index snapshot: {e}")); } if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { - log::warn!("Failed to save resolution_exports snapshot: {e}"); + info(format!("Failed to save resolution_exports snapshot: {e}")); } if let Err(e) = cache::save_module_file_map(&mfmap, &lsp_dir.join("module_file_map.bin")) { - log::warn!("Failed to save module_file_map snapshot: {e}"); + info(format!("Failed to save module_file_map snapshot: {e}")); + } + if let Err(e) = comp_index.save_to_disk(&lsp_dir.join("completion_index.bin")) { + info(format!("Failed to save completion_index snapshot: {e}")); } } + info(format!("[timing] save snapshots: {:.2?}", t.elapsed())); - // Store the registry, index, source map and mark as ready rt.block_on(async { let mut reg = registry.write().await; *reg = new_registry; @@ -348,28 +622,10 @@ impl Backend { *re = exports; let mut mf = module_file_map.write().await; *mf = mfmap; - let mut sm = source_map.write().await; - *sm = smap; + let mut ci = completion_index.write().await; + *ci = comp_index; load_state.store(LOAD_STATE_READY, Ordering::SeqCst); - // Re-typecheck any files the user edited during loading - let edited_files: Vec<(String, String)> = { - let f = files.read().await; - f.iter() - .map(|(uri, fs)| (uri.clone(), fs.source.clone())) - .collect() - }; - drop(reg); - drop(idx); - drop(re); - drop(mf); - drop(sm); - - if !edited_files.is_empty() { - log::info!("Re-checking {} files edited during loading", edited_files.len()); - } - - // End progress client .send_notification::(ProgressParams { token: progress_token, @@ -383,7 +639,168 @@ impl Backend { }) .await; }); + info(format!("[timing] Phase B (full build) total: {:.2?}", build_start.elapsed())); }) .expect("failed to spawn load-sources thread"); } } + +/// Extract completion entries from a module's CST declarations and source text. +fn extract_completion_entries(module: &cst::Module, source: &str) -> Vec { + let mut entries = Vec::new(); + let mut type_sigs: HashMap = HashMap::new(); + + // First pass: collect type signatures + for decl in &module.decls { + if let Decl::TypeSignature { name, ty, .. } = decl { + let name_str = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + let span = ty.span(); + if span.start < source.len() && span.end <= source.len() { + type_sigs.insert(name_str, source[span.start..span.end].to_string()); + } + } + } + + // Check export list to filter what's actually exported + let export_filter: Option> = + module.exports.as_ref().map(|spanned_list| { + spanned_list + .value + .exports + .iter() + .filter_map(|exp| match exp { + cst::Export::Value(name) + | cst::Export::Type(name, _) + | cst::Export::Class(name) => interner::resolve(*name).map(|s| s.to_string()), + _ => None, + }) + .collect() + }); + + // Second pass: build entries + for decl in &module.decls { + match decl { + Decl::Value { name, .. } | Decl::Foreign { name, .. } => { + let name_str = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&name_str) { + continue; + } + } + let type_string = type_sigs.get(&name_str).cloned().unwrap_or_default(); + entries.push(CompletionEntry { + name: name_str, + type_string, + kind: CompletionEntryKind::Value, + }); + } + Decl::Data { + name, constructors, .. + } => { + let type_name = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&type_name) { + continue; + } + } + entries.push(CompletionEntry { + name: type_name, + type_string: String::new(), + kind: CompletionEntryKind::Type, + }); + for ctor in constructors { + let ctor_name = interner::resolve(ctor.name.value) + .unwrap_or_default() + .to_string(); + entries.push(CompletionEntry { + name: ctor_name, + type_string: String::new(), + kind: CompletionEntryKind::Constructor, + }); + } + } + Decl::Newtype { + name, constructor, .. + } => { + let type_name = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&type_name) { + continue; + } + } + entries.push(CompletionEntry { + name: type_name, + type_string: String::new(), + kind: CompletionEntryKind::Type, + }); + let ctor_name = interner::resolve(constructor.value) + .unwrap_or_default() + .to_string(); + entries.push(CompletionEntry { + name: ctor_name, + type_string: String::new(), + kind: CompletionEntryKind::Constructor, + }); + } + Decl::Class { name, members, .. } => { + let class_name = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&class_name) { + continue; + } + } + entries.push(CompletionEntry { + name: class_name, + type_string: String::new(), + kind: CompletionEntryKind::Class, + }); + for member in members { + let member_name = interner::resolve(member.name.value) + .unwrap_or_default() + .to_string(); + let type_string = { + let span = member.ty.span(); + if span.start < source.len() && span.end <= source.len() { + source[span.start..span.end].to_string() + } else { + String::new() + } + }; + entries.push(CompletionEntry { + name: member_name, + type_string, + kind: CompletionEntryKind::Value, + }); + } + } + Decl::TypeAlias { name, .. } => { + let name_str = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&name_str) { + continue; + } + } + entries.push(CompletionEntry { + name: name_str, + type_string: String::new(), + kind: CompletionEntryKind::Type, + }); + } + _ => {} + } + } + + entries +} diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs index d2fdb008..9f3fff9f 100644 --- a/src/lsp/mod.rs +++ b/src/lsp/mod.rs @@ -2,18 +2,20 @@ mod handlers; pub mod utils; use std::collections::HashMap; -use std::path::PathBuf; +use std::io; +use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU8, Ordering}; use std::sync::Arc; +use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer, LspService, Server}; use crate::build::cache::ModuleCache; -use crate::typechecker::registry::ModuleRegistry; use crate::lsp::utils::resolve::ResolutionExports; +use crate::typechecker::registry::ModuleRegistry; use utils::find_definition::DefinitionIndex; @@ -22,6 +24,50 @@ pub(crate) struct FileState { pub module_name: Option, } +/// Lightweight completion data extracted from CST type signatures. +/// Much smaller than full ModuleExports — just pre-formatted strings. +#[derive(Default, Serialize, Deserialize)] +pub(crate) struct CompletionIndex { + /// module_name → list of completion entries + pub entries: HashMap>, +} + +impl CompletionIndex { + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let encoded = bincode::serialize(self) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, compressed) + } + + pub fn load_from_disk(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 64 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}"))) + } +} + +#[derive(Clone, Serialize, Deserialize)] +pub(crate) struct CompletionEntry { + pub name: String, + pub type_string: String, + pub kind: CompletionEntryKind, +} + +#[derive(Clone, Copy, PartialEq, Serialize, Deserialize)] +pub(crate) enum CompletionEntryKind { + Value, + Constructor, + Type, + Class, +} + /// Load state for progressive LSP initialization. /// 0 = Initializing (no data), 1 = CacheLoaded (from disk, may be stale), 2 = Ready (authoritative) pub(crate) const LOAD_STATE_INITIALIZING: u8 = 0; @@ -39,6 +85,7 @@ pub struct Backend { /// Maps file URI → source content for loaded project files pub(crate) source_map: Arc>>, pub(crate) module_cache: Arc>, + pub(crate) completion_index: Arc>, pub(crate) sources_cmd: Option, pub(crate) cache_dir: Option, pub(crate) load_state: Arc, @@ -68,8 +115,14 @@ impl LanguageServer for Backend { } async fn initialized(&self, _: InitializedParams) { - self.info("pfc language server initialized").await; + self.info("[lsp] pfc language server initializing").await; + let t = std::time::Instant::now(); self.load_sources().await; + self.info(format!( + "[lsp] pfc language server initialized in {:.2?}", + t.elapsed() + )) + .await; } async fn shutdown(&self) -> Result<()> { @@ -77,40 +130,111 @@ impl LanguageServer for Backend { } async fn did_open(&self, params: DidOpenTextDocumentParams) { + let name = params + .text_document + .uri + .path() + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + self.info(format!("[lsp] >> textDocument/didOpen {name}")) + .await; + let t = std::time::Instant::now(); self.on_change(params.text_document.uri, params.text_document.text) .await; + self.info(format!( + "[lsp] << textDocument/didOpen {name}: {:.2?}", + t.elapsed() + )) + .await; } async fn did_change(&self, params: DidChangeTextDocumentParams) { + let name = params + .text_document + .uri + .path() + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + self.info(format!("[lsp] >> textDocument/didChange {name}")) + .await; + let t = std::time::Instant::now(); if let Some(change) = params.content_changes.into_iter().next() { self.on_change(params.text_document.uri, change.text).await; } + self.info(format!( + "[lsp] << textDocument/didChange {name}: {:.2?}", + t.elapsed() + )) + .await; } async fn did_save(&self, params: DidSaveTextDocumentParams) { + let name = params + .text_document + .uri + .path() + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + self.info(format!("[lsp] >> textDocument/didSave {name}")) + .await; + let t = std::time::Instant::now(); if let Some(text) = params.text { self.on_change(params.text_document.uri, text).await; } + self.info(format!( + "[lsp] << textDocument/didSave {name}: {:.2?}", + t.elapsed() + )) + .await; } async fn goto_definition( &self, params: GotoDefinitionParams, ) -> Result> { - self.handle_goto_definition(params).await + self.info("[lsp] >> textDocument/definition").await; + let t = std::time::Instant::now(); + let result = self.handle_goto_definition(params).await; + self.info(format!( + "[lsp] << textDocument/definition: {:.2?}", + t.elapsed() + )) + .await; + result } async fn hover(&self, params: HoverParams) -> Result> { - self.handle_hover(params).await + self.info("[lsp] >> textDocument/hover").await; + let t = std::time::Instant::now(); + let result = self.handle_hover(params).await; + self.info(format!("[lsp] << textDocument/hover: {:.2?}", t.elapsed())) + .await; + result } async fn completion(&self, params: CompletionParams) -> Result> { - self.handle_completion(params).await + self.info("[lsp] >> textDocument/completion").await; + let t = std::time::Instant::now(); + let result = self.handle_completion(params).await; + self.info(format!( + "[lsp] << textDocument/completion: {:.2?}", + t.elapsed() + )) + .await; + result } } impl Backend { async fn rebuild_module(&self, params: serde_json::Value) -> Result { + self.info("[lsp] >> pfc/rebuildModule").await; + let t = std::time::Instant::now(); if let Some(uri_str) = params.get("uri").and_then(|v| v.as_str()) { if let Ok(uri) = Url::parse(uri_str) { // Try open files first, then source_map, then disk @@ -137,11 +261,17 @@ impl Backend { self.on_change(uri, source).await; } } + self.info(format!("[lsp] << pfc/rebuildModule: {:.2?}", t.elapsed())) + .await; Ok(serde_json::json!({ "success": true })) } async fn rebuild_project(&self) -> Result { + self.info("[lsp] >> pfc/rebuildProject").await; + let t = std::time::Instant::now(); self.load_sources().await; + self.info(format!("[lsp] << pfc/rebuildProject: {:.2?}", t.elapsed())) + .await; Ok(serde_json::json!({ "success": true })) } @@ -155,6 +285,7 @@ impl Backend { module_file_map: Arc::new(RwLock::new(HashMap::new())), source_map: Arc::new(RwLock::new(HashMap::new())), module_cache: Arc::new(RwLock::new(ModuleCache::default())), + completion_index: Arc::new(RwLock::new(CompletionIndex::default())), sources_cmd, cache_dir, load_state: Arc::new(AtomicU8::new(LOAD_STATE_INITIALIZING)), @@ -198,10 +329,11 @@ pub fn run_server(sources_cmd: Option, cache_dir: Option) { let stdin = tokio::io::stdin(); let stdout = tokio::io::stdout(); - let (service, socket) = LspService::build(|client| Backend::new(client, sources_cmd, cache_dir)) - .custom_method("pfc/rebuildModule", Backend::rebuild_module) - .custom_method("pfc/rebuildProject", Backend::rebuild_project) - .finish(); + let (service, socket) = + LspService::build(|client| Backend::new(client, sources_cmd, cache_dir)) + .custom_method("pfc/rebuildModule", Backend::rebuild_module) + .custom_method("pfc/rebuildProject", Backend::rebuild_project) + .finish(); Server::new(stdin, stdout, socket).serve(service).await; }); From 38738f83797de3c68267edd98c55a2e37fb81aa1 Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Wed, 11 Mar 2026 11:03:10 +0100 Subject: [PATCH 13/14] adds module docs --- src/build/portable.rs | 1 + src/cst.rs | 4 ++ src/lsp/handlers/hover.rs | 59 ++++++++++++++++++++++++ src/parser/grammar.lalrpop | 2 + src/parser/mod.rs | 42 +++++++++++++++++ src/typechecker/check.rs | 1 + src/typechecker/mod.rs | 11 ++++- src/typechecker/registry.rs | 2 + tests/fixtures/lsp/hover/Simple.purs | 1 + tests/fixtures/lsp/hover/Simple/Lib.purs | 1 + 10 files changed, 122 insertions(+), 2 deletions(-) diff --git a/src/build/portable.rs b/src/build/portable.rs index 767743c3..c219ba23 100644 --- a/src/build/portable.rs +++ b/src/build/portable.rs @@ -373,6 +373,7 @@ impl PModuleExports { method_own_constraints: self.method_own_constraints.iter().map(|(k, v)| { (rest_qi(k, st), v.iter().map(|s| st.sym(*s)).collect()) }).collect(), + module_doc: Vec::new(), // not persisted in portable format } } } diff --git a/src/cst.rs b/src/cst.rs index 2e23704e..0214c83c 100644 --- a/src/cst.rs +++ b/src/cst.rs @@ -52,6 +52,8 @@ pub struct Module { pub decls: Vec, /// All comments in the module source, in order of appearance (comment, span) pub comments: Vec<(Comment, Span)>, + /// Doc-comments that appear before the `module` keyword + pub doc_comments: Vec, } /// Module name (potentially qualified: Data.Array) @@ -102,6 +104,8 @@ pub enum DataMembers { pub struct ImportDecl { pub span: Span, pub module: ModuleName, + /// Span of the module name in the import (for hover support) + pub module_span: Span, pub imports: Option, pub qualified: Option, } diff --git a/src/lsp/handlers/hover.rs b/src/lsp/handlers/hover.rs index f5e6cd45..1033b1d9 100644 --- a/src/lsp/handlers/hover.rs +++ b/src/lsp/handlers/hover.rs @@ -212,6 +212,28 @@ impl Backend { if offset < import_decl.span.start || offset >= import_decl.span.end { continue; } + + // Check if cursor is on the module name + if offset >= import_decl.module_span.start && offset < import_decl.module_span.end { + let module_name = interner::resolve_module_name(&import_decl.module.parts); + let docs = self.get_imported_module_doc(&module_name).await; + let mut markdown = format!("```purescript\nmodule {module_name}\n```"); + if !docs.is_empty() { + markdown.push_str("\n\n---\n\n"); + for doc in &docs { + markdown.push_str(doc.trim()); + markdown.push('\n'); + } + } + return Some(Hover { + contents: HoverContents::Markup(MarkupContent { + kind: MarkupKind::Markdown, + value: markdown, + }), + range: None, + }); + } + let items = match &import_decl.imports { Some(ImportList::Explicit(items)) | Some(ImportList::Hiding(items)) => items, None => continue, @@ -300,6 +322,43 @@ impl Backend { .collect() } + async fn get_imported_module_doc(&self, module_name: &str) -> Vec { + // Try registry first (has module_doc from typechecking) + { + let module_parts: Vec = module_name + .split('.') + .map(|s| interner::intern(s)) + .collect(); + let registry = self.registry.read().await; + if let Some(mod_exports) = registry.lookup(&module_parts) { + if !mod_exports.module_doc.is_empty() { + return mod_exports.module_doc.clone(); + } + } + } + + // Fall back to parsing the source file + let target_uri = { + let mf = self.module_file_map.read().await; + mf.get(module_name).cloned() + }; + let target_uri = match target_uri { + Some(u) => u, + None => return Vec::new(), + }; + let target_source = match self.get_source_for_uri(&target_uri).await { + Some(s) => s, + None => return Vec::new(), + }; + let target_module = match crate::parser::parse(&target_source) { + Ok(m) => m, + Err(_) => return Vec::new(), + }; + target_module.doc_comments.iter().filter_map(|c| { + if let cst::Comment::Doc(text) = c { Some(text.clone()) } else { None } + }).collect() + } + async fn get_local_kind(&self, module: &cst::Module, symbol: interner::Symbol) -> Option { let registry = self.registry.read().await; let check_result = crate::typechecker::check_module_with_registry(module, ®istry); diff --git a/src/parser/grammar.lalrpop b/src/parser/grammar.lalrpop index 64519523..bc3551ec 100644 --- a/src/parser/grammar.lalrpop +++ b/src/parser/grammar.lalrpop @@ -29,6 +29,7 @@ pub Module: Module = { imports, decls, comments: Vec::new(), + doc_comments: Vec::new(), } } }; @@ -152,6 +153,7 @@ ImportDecl: ImportDecl = { )?> => { ImportDecl { span: Span::new(start, end), + module_span: module.span, module: module.value, imports, qualified: qualified.map(|m| m.value), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 43af3375..119e89a7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -147,6 +147,14 @@ fn attach_comments( // Store all comments on the module module.comments = comment_pairs.clone(); + // Attach doc-comments that appear before the `module` keyword to the module itself + let module_start = module.span.start; + module.doc_comments = comment_pairs + .iter() + .filter(|(c, span)| c.is_doc() && span.end <= module_start) + .map(|(c, _)| c.clone()) + .collect(); + if module.decls.is_empty() { return; } @@ -278,6 +286,40 @@ mod tests { assert_eq!(module.decls[0].doc_comments().len(), 2); } + #[test] + fn test_module_doc_comments() { + let module = parse("-- | This module does things\nmodule Main where\nfoo = 1").unwrap(); + assert_eq!(module.doc_comments.len(), 1); + assert!(module.doc_comments[0].is_doc()); + if let Comment::Doc(text) = &module.doc_comments[0] { + assert_eq!(text.trim(), "This module does things"); + } + } + + #[test] + fn test_module_multi_line_doc_comments() { + let module = parse("-- | Line 1\n-- | Line 2\nmodule Main where\nfoo = 1").unwrap(); + assert_eq!(module.doc_comments.len(), 2); + } + + #[test] + fn test_import_module_span() { + let module = parse("module Main where\nimport Data.Maybe").unwrap(); + assert_eq!(module.imports.len(), 1); + let imp = &module.imports[0]; + // "import Data.Maybe" — "Data.Maybe" starts at offset 25 (after "import ") + let src = "module Main where\nimport Data.Maybe"; + assert_eq!(&src[imp.module_span.start..imp.module_span.end], "Data.Maybe"); + } + + #[test] + fn test_module_doc_not_confused_with_decl_doc() { + // Doc comment after `where` should attach to the decl, not the module + let module = parse("module Main where\n-- | Decl doc\nfoo = 1").unwrap(); + assert_eq!(module.doc_comments.len(), 0); + assert_eq!(module.decls[0].doc_comments().len(), 1); + } + // ===== Expression Tests: Literals ===== #[test] diff --git a/src/typechecker/check.rs b/src/typechecker/check.rs index dbbdeaf8..a1ab6cd6 100644 --- a/src/typechecker/check.rs +++ b/src/typechecker/check.rs @@ -8094,6 +8094,7 @@ fn check_module_impl(module: &Module, registry: &ModuleRegistry, collect_span_ty .collect(), class_superclasses: class_superclasses.clone(), method_own_constraints: ctx.method_own_constraints.iter().map(|(k, v)| (qi(*k), v.clone())).collect(), + module_doc: Vec::new(), // filled in by the outer CST-level wrapper }; // Ensure operator targets (e.g. Tuple for /\) are included in exported values and diff --git a/src/typechecker/mod.rs b/src/typechecker/mod.rs index 05f0ebd2..96dfce8f 100644 --- a/src/typechecker/mod.rs +++ b/src/typechecker/mod.rs @@ -109,11 +109,18 @@ fn check_module_with_options(module: &crate::cst::Module, registry: &ModuleRegis span_types: HashMap::new(), }; } - if collect_span_types { + let mut result = if collect_span_types { check::check_module_for_ide(&ast_module, registry) } else { check::check_module(&ast_module, registry) - } + }; + + // Propagate module-level doc-comments from CST to exports + result.exports.module_doc = module.doc_comments.iter().filter_map(|c| { + if let crate::cst::Comment::Doc(text) = c { Some(text.clone()) } else { None } + }).collect(); + + result } #[cfg(test)] diff --git a/src/typechecker/registry.rs b/src/typechecker/registry.rs index 9d1e65f7..55703320 100644 --- a/src/typechecker/registry.rs +++ b/src/typechecker/registry.rs @@ -76,6 +76,8 @@ pub struct ModuleExports { /// Method-level constraint class names from class definitions. /// Maps method name → constraint class names. Used for current_given_expanded in instance methods. pub method_own_constraints: HashMap>, + /// Module-level doc-comments (appear before the `module` keyword) + pub module_doc: Vec, } /// Registry of compiled modules, used to resolve imports. diff --git a/tests/fixtures/lsp/hover/Simple.purs b/tests/fixtures/lsp/hover/Simple.purs index 0c1c5642..e899ca83 100644 --- a/tests/fixtures/lsp/hover/Simple.purs +++ b/tests/fixtures/lsp/hover/Simple.purs @@ -137,6 +137,7 @@ getName = myRecord.name -- 56:8 (r) => hover: Int -- -- Line 2: import Simple.Lib (class Cl, member, ...) +-- 2:7 (Simple.Lib) => hover: module Simple.Lib | doc: Utility functions and classes for Simple -- 2:29 (member) => hover: member -- 2:53 (Effect) => hover: Type -> Type | doc: Opaque effect type -- 2:45 (addOne) => hover: addOne | doc: Adds one to a number diff --git a/tests/fixtures/lsp/hover/Simple/Lib.purs b/tests/fixtures/lsp/hover/Simple/Lib.purs index ac92b037..ce146b2f 100644 --- a/tests/fixtures/lsp/hover/Simple/Lib.purs +++ b/tests/fixtures/lsp/hover/Simple/Lib.purs @@ -1,3 +1,4 @@ +-- | Utility functions and classes for Simple module Simple.Lib where import Prelude From c03651d330c19c8f19cc859e77c6c917cfb65f51 Mon Sep 17 00:00:00 2001 From: Rory Campbell Date: Wed, 11 Mar 2026 11:48:40 +0100 Subject: [PATCH 14/14] better error spans --- src/typechecker/infer.rs | 22 +++++---- tests/snapshots.rs | 2 +- tests/typechecker_comprehensive.rs | 73 ++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 10 deletions(-) diff --git a/src/typechecker/infer.rs b/src/typechecker/infer.rs index 864c6bff..c97fbff0 100644 --- a/src/typechecker/infer.rs +++ b/src/typechecker/infer.rs @@ -917,7 +917,7 @@ impl InferCtx { .collect(); // Unify the argument with the instantiated param - self.state.unify(span, &arg_ty, &instantiated_param)?; + self.state.unify(arg.span(), &arg_ty, &instantiated_param)?; // Post-check 1: verify no forall var leaked into ambient vars' solutions. // Catches escapes like `\x -> foo x` where x's type gets constrained @@ -988,7 +988,7 @@ impl InferCtx { let result_ty = Type::Unif(self.state.fresh_var()); let expected_func_ty = Type::fun(arg_ty, result_ty.clone()); - self.state.unify(span, &func_ty, &expected_func_ty)?; + self.state.unify(arg.span(), &func_ty, &expected_func_ty)?; Ok(result_ty) } @@ -996,7 +996,7 @@ impl InferCtx { fn infer_if( &mut self, env: &Env, - span: crate::span::Span, + _span: crate::span::Span, cond: &Expr, then_expr: &Expr, else_expr: &Expr, @@ -1009,7 +1009,7 @@ impl InferCtx { let then_ty = self.infer(env, then_expr)?; let else_ty = self.infer(env, else_expr)?; - self.state.unify(span, &then_ty, &else_ty)?; + self.state.unify(else_expr.span(), &then_ty, &else_ty)?; if is_underscore { Ok(Type::fun(Type::boolean(), then_ty)) @@ -1725,7 +1725,11 @@ impl InferCtx { // Infer the body and unify with result type let body_ty = self.infer_guarded(&alt_env, &alt.result)?; - self.state.unify(span, &result_ty, &body_ty)?; + let body_span = match &alt.result { + GuardedExpr::Unconditional(e) => e.span(), + GuardedExpr::Guarded(_) => alt.span, + }; + self.state.unify(body_span, &result_ty, &body_ty)?; } // Exhaustiveness check: for each scrutinee, verify all constructors are covered @@ -1782,14 +1786,14 @@ impl InferCtx { fn infer_array( &mut self, env: &Env, - span: crate::span::Span, + _span: crate::span::Span, elements: &[Expr], ) -> Result { let elem_ty = Type::Unif(self.state.fresh_var()); for elem in elements { let t = self.infer(env, elem)?; - self.state.unify(span, &elem_ty, &t)?; + self.state.unify(elem.span(), &elem_ty, &t)?; } Ok(Type::array(elem_ty)) @@ -2193,7 +2197,7 @@ impl InferCtx { // Apply: func expr (\_ -> rest) let after_first = Type::Unif(self.state.fresh_var()); - self.state.unify(span, &func_ty, &Type::fun(expr_ty, after_first.clone()))?; + self.state.unify(expr.span(), &func_ty, &Type::fun(expr_ty, after_first.clone()))?; let discard_arg = Type::Unif(self.state.fresh_var()); let cont_ty = Type::fun(discard_arg, rest_ty); let result = Type::Unif(self.state.fresh_var()); @@ -2223,7 +2227,7 @@ impl InferCtx { // Apply: bind expr (\binder -> rest) let after_first = Type::Unif(self.state.fresh_var()); - self.state.unify(span, &func_ty, &Type::fun(expr_ty, after_first.clone()))?; + self.state.unify(expr.span(), &func_ty, &Type::fun(expr_ty, after_first.clone()))?; let cont_ty = Type::fun(binder_ty, rest_ty); let result = Type::Unif(self.state.fresh_var()); self.state.unify(span, &after_first, &Type::fun(cont_ty, result.clone()))?; diff --git a/tests/snapshots.rs b/tests/snapshots.rs index 6b312db6..9cfc8d73 100644 --- a/tests/snapshots.rs +++ b/tests/snapshots.rs @@ -100,7 +100,7 @@ fn snap_expr_negate() { fn snap_expr_error_branch_mismatch() { insta::assert_snapshot!( format_expr_type(r#"if true then 1 else "x""#), - @"ERROR: Could not match type Int with type String at 0:23" + @"ERROR: Could not match type Int with type String at 20:23" ); } diff --git a/tests/typechecker_comprehensive.rs b/tests/typechecker_comprehensive.rs index 77ff680f..6fa9d190 100644 --- a/tests/typechecker_comprehensive.rs +++ b/tests/typechecker_comprehensive.rs @@ -142,6 +142,20 @@ fn assert_module_not_implemented(source: &str) { ); } +/// Assert that a type error's span covers exactly the expected source text. +fn assert_error_span_text(source: &str, error_code: &str, expected_text: &str) { + let (_, errors) = check_module_types(source); + let err = errors.iter().find(|e| e.code() == error_code) + .unwrap_or_else(|| panic!("expected {} error, got errors: {:?}", error_code, errors.iter().map(|e| format!("{} ({})", e.code(), e)).collect::>())); + let span = err.span(); + assert!(span.start <= span.end && span.end <= source.len(), + "error span for {} is invalid: start={}, end={}, source len={}", + error_code, span.start, span.end, source.len()); + let actual = &source[span.start..span.end]; + assert_eq!(actual, expected_text, + "error span for {} should cover '{}' but covers '{}'", error_code, expected_text, actual); +} + // ═══════════════════════════════════════════════════════════════════════════ // 1. LITERALS // ═══════════════════════════════════════════════════════════════════════════ @@ -7945,3 +7959,62 @@ x = Wrap 42"; Type::app(Type::con("A", "Wrapper"), Type::int()) ); } + +// ═══════════════════════════════════════════════════════════════════════════ +// ERROR SPAN PRECISION TESTS +// ═══════════════════════════════════════════════════════════════════════════ + +#[test] +fn error_span_if_else_branch_mismatch() { + assert_error_span_text( + r#"module Test where +x = if true then 1 else "a""#, + "UnificationError", + "\"a\"" + ); +} + +#[test] +fn error_span_case_alternative_body() { + assert_error_span_text( + "module Test where\nx = case true of\n true -> 1\n false -> \"a\"", + "UnificationError", + "\"a\"" + ); +} + +#[test] +fn error_span_array_element_mismatch() { + assert_error_span_text( + "module Test where\nx = [1, 2, \"three\"]", + "UnificationError", + "\"three\"" + ); +} + +#[test] +fn error_span_function_arg_mismatch() { + assert_error_span_text( + "module Test where\nf :: Int -> Int\nf n = n\nx = f \"hello\"", + "UnificationError", + "\"hello\"" + ); +} + +#[test] +fn error_span_if_condition_not_boolean() { + assert_error_span_text( + "module Test where\nx = if 42 then 1 else 2", + "UnificationError", + "42" + ); +} + +#[test] +fn error_span_case_multiple_alternatives() { + assert_error_span_text( + "module Test where\nx = case 1 of\n 1 -> \"a\"\n 2 -> \"b\"\n _ -> true", + "UnificationError", + "true" + ); +}