diff --git a/.gitignore b/.gitignore index 5ba698fe..590097a2 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ Thumbs.db # lsp vscode client - /editors/code/node_modules /editors/code/out +/editors/code/.vscode-test +/editors/code/package-lock.json diff --git a/Cargo.toml b/Cargo.toml index 6b43e37d..6c752a17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,10 @@ edition = "2021" name = "pfc" path = "src/main.rs" +[[bin]] +name = "profile-load-sources" +path = "src/bin/profile_load_sources.rs" + [dependencies] clap = { version = "4", features = ["derive"] } log = "0.4" @@ -26,7 +30,10 @@ rayon = "1.10" mimalloc = { version = "0.1", default-features = false } tower-lsp = "0.20" tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } serde_json = "1" +bincode = "1" +zstd = "0.13" [build-dependencies] lalrpop = "0.22" diff --git a/editors/code/package.json b/editors/code/package.json index 13ce8f0d..e52a07a9 100644 --- a/editors/code/package.json +++ b/editors/code/package.json @@ -27,6 +27,16 @@ ] } ], + "commands": [ + { + "command": "pfc.rebuildModule", + "title": "PFC: Rebuild Current Module" + }, + { + "command": "pfc.rebuildProject", + "title": "PFC: Rebuild Project" + } + ], "configuration": { "title": "PureScript Fast Compiler", "properties": { @@ -37,7 +47,7 @@ }, "pfc.sourcesCommand": { "type": "string", - "default": "spago sources", + "default": "ragu sources", "description": "Shell command that outputs PureScript source file paths (one per line). Example: find src .spago/p -name '*.purs'" } } diff --git a/editors/code/src/extension.ts b/editors/code/src/extension.ts index e6736d24..8c4c9957 100644 --- a/editors/code/src/extension.ts +++ b/editors/code/src/extension.ts @@ -33,6 +33,30 @@ export function activate(context: vscode.ExtensionContext) { clientOptions ); + context.subscriptions.push( + vscode.commands.registerCommand("pfc.rebuildModule", async () => { + const editor = vscode.window.activeTextEditor; + if (!editor) { + vscode.window.showWarningMessage("No active editor"); + return; + } + if (!client) { + vscode.window.showWarningMessage("Language server not running"); + return; + } + await client.sendRequest("pfc/rebuildModule", { + uri: editor.document.uri.toString(), + }); + }), + vscode.commands.registerCommand("pfc.rebuildProject", async () => { + if (!client) { + vscode.window.showWarningMessage("Language server not running"); + return; + } + await client.sendRequest("pfc/rebuildProject"); + }) + ); + client.start(); } diff --git a/src/bin/profile_load_sources.rs b/src/bin/profile_load_sources.rs new file mode 100644 index 00000000..ab089193 --- /dev/null +++ b/src/bin/profile_load_sources.rs @@ -0,0 +1,262 @@ +use std::collections::HashSet; +use std::path::PathBuf; +use std::time::Instant; + +use clap::Parser; +use rayon::prelude::*; + +use purescript_fast_compiler::build::{self, BuildOptions}; +use purescript_fast_compiler::lsp::utils::find_definition::DefinitionIndex; +use purescript_fast_compiler::lsp::utils::resolve::ResolutionExports; + +/// Profile the LSP load_sources phases with per-phase timing. +#[derive(Parser)] +#[command(name = "profile-load-sources")] +struct Args { + /// Working directory to run the sources command in + #[arg(long)] + path: PathBuf, + + /// Shell command that outputs source globs/paths (e.g. "spago sources") + #[arg(long)] + sources_cmd: String, + + /// Directory for disk cache (enables warm-cache profiling across runs) + #[arg(long)] + cache_dir: Option, +} + +macro_rules! phase { + ($name:expr, $body:expr) => {{ + let start = Instant::now(); + let result = $body; + let elapsed = start.elapsed(); + eprintln!(" {:.<50} {:>8.2?}", $name, elapsed); + result + }}; +} + +fn main() { + let args = Args::parse(); + let total_start = Instant::now(); + + eprintln!("Profiling load_sources at: {}", args.path.display()); + eprintln!("Sources command: {}", args.sources_cmd); + eprintln!(); + + // Phase 1: Run shell command + let globs: Vec = phase!("Run sources command", { + let output = std::process::Command::new("sh") + .arg("-c") + .arg(&args.sources_cmd) + .current_dir(&args.path) + .output() + .expect("Failed to run sources command"); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Command failed: {stderr}"); + std::process::exit(1); + } + + String::from_utf8_lossy(&output.stdout) + .lines() + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect() + }); + eprintln!(" {} glob patterns", globs.len()); + + // Phase 2: Resolve globs + let file_paths: Vec = phase!("Resolve globs", { + let mut paths = Vec::new(); + for pattern in &globs { + // Resolve relative globs against the working directory + let full_pattern = if PathBuf::from(pattern).is_relative() { + args.path.join(pattern).to_string_lossy().into_owned() + } else { + pattern.clone() + }; + match glob::glob(&full_pattern) { + Ok(entries) => { + for entry in entries.flatten() { + if entry.extension().map_or(false, |ext| ext == "purs") { + paths.push(entry); + } + } + } + Err(e) => eprintln!(" Invalid glob {pattern}: {e}"), + } + } + paths + }); + eprintln!(" {} .purs files", file_paths.len()); + + // Phase 3: Read all sources in parallel + let sources: Vec<(String, String)> = phase!("Read sources (parallel)", { + file_paths + .par_iter() + .filter_map(|entry| { + let source = std::fs::read_to_string(entry).ok()?; + let abs = entry.canonicalize().unwrap_or_else(|_| entry.clone()); + Some((abs.to_string_lossy().into_owned(), source)) + }) + .collect() + }); + eprintln!(" {} files read", sources.len()); + + // Phase 4: Build with incremental cache + let source_refs: Vec<(&str, &str)> = sources + .iter() + .map(|(p, s)| (p.as_str(), s.as_str())) + .collect(); + + let options = BuildOptions { + output_dir: None, + ..Default::default() + }; + + let cache_dir = args.cache_dir.as_ref().map(|d| { + if d.is_relative() { + args.path.join(d) + } else { + d.clone() + } + }); + + let mut cache = if let Some(ref dir) = cache_dir { + phase!("Load cache from disk", { + match build::cache::ModuleCache::load_from_disk(dir) { + Ok(c) => { + eprintln!(" loaded cache from {}", dir.display()); + c + } + Err(_) => { + eprintln!(" no existing cache, starting fresh"); + build::cache::ModuleCache::new() + } + } + }) + } else { + build::cache::ModuleCache::new() + }; + + let (result, _registry, build_parsed_modules) = phase!("Build (incremental)", { + build::build_from_sources_incremental(&source_refs, &None, None, &options, &mut cache) + }); + + phase!("Build reverse deps", { + cache.build_reverse_deps(); + }); + + if let Some(ref dir) = cache_dir { + phase!("Save cache to disk", { + if let Err(e) = cache.save_to_disk(dir) { + eprintln!(" failed to save cache: {e}"); + } + }); + } + + let error_count: usize = result.modules.iter().map(|m| m.type_errors.len()).sum(); + let module_count = result.modules.len(); + let error_module_count = result.modules.iter().filter(|m| !m.type_errors.is_empty()).count(); + let cached_count = result.modules.iter().filter(|m| m.cached).count(); + eprintln!( + " {} modules ({} cached, {} errors in {} modules)", + module_count, cached_count, error_count, error_module_count + ); + + // Phase 5: Parse cache-hit sources + let already_parsed: HashSet = build_parsed_modules + .iter() + .map(|(p, _)| p.to_string_lossy().into_owned()) + .collect(); + + let cache_hit_sources: Vec<_> = sources + .iter() + .filter(|(path, _)| !already_parsed.contains(path.as_str())) + .collect(); + + let extra_count = cache_hit_sources.len(); + + let mut all_modules: Vec<(PathBuf, purescript_fast_compiler::CstModule)> = build_parsed_modules; + + phase!(format!("Parse cache-hits ({extra_count} modules)"), { + let extra: Vec<_> = cache_hit_sources + .par_iter() + .filter_map(|(path, source)| { + purescript_fast_compiler::parse(source) + .ok() + .map(|m| (PathBuf::from(path.as_str()), m)) + }) + .collect(); + all_modules.extend(extra); + }); + + // Phase 6: Build definition index + let index = phase!(format!("Build definition index ({} modules)", all_modules.len()), { + let mut index = DefinitionIndex::new(); + for (path, module) in &all_modules { + index.add_module(module, &path.to_string_lossy()); + } + index + }); + + // Phase 7: Build ResolutionExports + let exports = phase!("Build ResolutionExports", { + let just_modules: Vec = + all_modules.into_iter().map(|(_, m)| m).collect(); + ResolutionExports::new(&just_modules) + }); + + // Phase 8: Save LSP snapshots + if let Some(ref dir) = cache_dir { + let lsp_dir = dir.join("lsp"); + phase!("Save registry snapshot", { + if let Err(e) = build::cache::save_registry_snapshot(&_registry, &lsp_dir.join("registry.bin")) { + eprintln!(" failed: {e}"); + } + }); + phase!("Save def_index snapshot", { + if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { + eprintln!(" failed: {e}"); + } + }); + phase!("Save resolution_exports snapshot", { + if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { + eprintln!(" failed: {e}"); + } + }); + + // Phase 9: Load LSP snapshots (benchmark restore time) + eprintln!(); + eprintln!(" --- Restore from cache (simulated warm startup) ---"); + phase!("Load registry snapshot", { + match build::cache::load_registry_snapshot(&lsp_dir.join("registry.bin")) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + phase!("Load def_index snapshot", { + match DefinitionIndex::load_from_disk(&lsp_dir.join("def_index.bin")) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + phase!("Load resolution_exports snapshot", { + match ResolutionExports::load_from_disk(&lsp_dir.join("resolution_exports.bin")) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + phase!("Load cache index", { + match build::cache::ModuleCache::load_from_disk(dir) { + Ok(_) => {}, + Err(e) => eprintln!(" failed: {e}"), + } + }); + } + + eprintln!(); + eprintln!(" {:.<50} {:>8.2?}", "TOTAL", total_start.elapsed()); +} diff --git a/src/build/cache.rs b/src/build/cache.rs new file mode 100644 index 00000000..b3136ed9 --- /dev/null +++ b/src/build/cache.rs @@ -0,0 +1,500 @@ +//! Module cache for incremental builds. +//! +//! Uses a lightweight index file (hashes + imports) loaded eagerly, +//! and per-module export files loaded lazily on demand. +//! Exports hash comparison avoids rebuilding dependents when only +//! function bodies change (not signatures). + +use std::collections::{HashMap, HashSet, VecDeque}; +use std::hash::{Hash, Hasher}; +use std::io; +use std::path::{Path, PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::typechecker::registry::ModuleExports; + +use super::portable::{PModuleExports, StringTableBuilder, StringTableReader}; + +// ===== Cache Index (loaded eagerly, small) ===== + +#[derive(Serialize, Deserialize, Default)] +struct CacheIndex { + modules: HashMap, + /// Maps file paths to module names for fast lookup during incremental builds + #[serde(default)] + path_to_module: HashMap, +} + +#[derive(Serialize, Deserialize, Clone)] +struct CacheIndexEntry { + content_hash: u64, + exports_hash: u64, + imports: Vec, +} + +// ===== Per-Module Cache File ===== + +#[derive(Serialize, Deserialize)] +struct ModuleCacheFile { + string_table: Vec, + exports: PModuleExports, +} + +// ===== In-Memory Module State ===== + +enum CachedModule { + /// Only index loaded — exports on disk, not yet read + Indexed { + content_hash: u64, + exports_hash: u64, + imports: Vec, + }, + /// Fully loaded in memory (from disk or from typechecking) + Loaded { + content_hash: u64, + exports_hash: u64, + imports: Vec, + exports: ModuleExports, + dirty: bool, + }, +} + +impl CachedModule { + fn content_hash(&self) -> u64 { + match self { + CachedModule::Indexed { content_hash, .. } => *content_hash, + CachedModule::Loaded { content_hash, .. } => *content_hash, + } + } + + fn exports_hash(&self) -> u64 { + match self { + CachedModule::Indexed { exports_hash, .. } => *exports_hash, + CachedModule::Loaded { exports_hash, .. } => *exports_hash, + } + } + + fn imports(&self) -> &[String] { + match self { + CachedModule::Indexed { imports, .. } => imports, + CachedModule::Loaded { imports, .. } => imports, + } + } + + fn is_dirty(&self) -> bool { + match self { + CachedModule::Indexed { .. } => false, + CachedModule::Loaded { dirty, .. } => *dirty, + } + } +} + +// ===== Public API ===== + +/// In-memory cache of typechecked modules for incremental builds. +/// Index is loaded eagerly; per-module exports are loaded lazily. +pub struct ModuleCache { + entries: HashMap, + /// Reverse dependency graph: module → modules that import it + dependents: HashMap>, + /// Maps file paths to module names for skipping parse on warm builds + path_index: HashMap, + /// Directory for per-module cache files + cache_dir: Option, + /// Whether the index needs to be rewritten + index_dirty: bool, +} + +impl Default for ModuleCache { + fn default() -> Self { + Self { + entries: HashMap::new(), + dependents: HashMap::new(), + path_index: HashMap::new(), + cache_dir: None, + index_dirty: false, + } + } +} + +impl ModuleCache { + pub fn new() -> Self { + Self::default() + } + + /// Returns true if the cache has any module entries (i.e. a prior build populated it). + pub fn has_entries(&self) -> bool { + !self.entries.is_empty() + } + + /// Compute a content hash for a source string. + pub fn content_hash(source: &str) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + source.hash(&mut hasher); + hasher.finish() + } + + /// Compute a hash of serialized exports for change detection. + pub fn exports_hash(exports: &ModuleExports) -> u64 { + let mut st = StringTableBuilder::new(); + let portable = PModuleExports::from_exports(exports, &mut st); + let bytes = bincode::serialize(&(st.into_table(), &portable)).unwrap_or_default(); + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + bytes.hash(&mut hasher); + hasher.finish() + } + + /// Check if a module needs to be rebuilt. + /// + /// Returns true if: + /// - The module is not in the cache + /// - Its content hash has changed + /// - Any of its imports was rebuilt in this cycle + pub fn needs_rebuild( + &self, + module_name: &str, + content_hash: u64, + rebuilt: &HashSet, + ) -> bool { + match self.entries.get(module_name) { + None => true, + Some(cached) => { + if cached.content_hash() != content_hash { + return true; + } + // Check if any dependency was rebuilt (exports changed) + cached.imports().iter().any(|dep| rebuilt.contains(dep)) + } + } + } + + /// Look up the module name associated with a file path. + /// Canonicalizes the path for consistent lookups regardless of relative/absolute form. + pub fn module_name_for_path(&self, path: &str) -> Option<&str> { + let canonical = std::path::Path::new(path) + .canonicalize() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_else(|_| path.to_string()); + self.path_index.get(&canonical).map(|s| s.as_str()) + } + + /// Register a file path → module name mapping. + /// Canonicalizes the path for consistent lookups regardless of relative/absolute form. + pub fn register_path(&mut self, path: String, module_name: String) { + let canonical = std::path::Path::new(&path) + .canonicalize() + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_else(|_| path); + if self.path_index.get(&canonical).map(|s| s.as_str()) != Some(&module_name) { + self.path_index.insert(canonical, module_name); + self.index_dirty = true; + } + } + + /// Get the cached imports for a module by name. + pub fn get_imports(&self, module_name: &str) -> Option<&[String]> { + self.entries.get(module_name).map(|c| c.imports()) + } + + /// Get cached exports for a module, loading from disk if needed. + pub fn get_exports(&mut self, module_name: &str) -> Option<&ModuleExports> { + // Check if we need to load from disk first + let needs_load = matches!( + self.entries.get(module_name), + Some(CachedModule::Indexed { .. }) + ); + + if needs_load { + if let Some(ref cache_dir) = self.cache_dir { + let module_path = module_file_path(cache_dir, module_name); + if let Ok(exports) = load_module_file(&module_path) { + if let Some(entry) = self.entries.remove(module_name) { + let (content_hash, exports_hash, imports) = match entry { + CachedModule::Indexed { content_hash, exports_hash, imports } => { + (content_hash, exports_hash, imports) + } + _ => unreachable!(), + }; + self.entries.insert(module_name.to_string(), CachedModule::Loaded { + content_hash, + exports_hash, + imports, + exports, + dirty: false, + }); + } + } else { + // File missing/corrupt — remove from cache + self.entries.remove(module_name); + self.index_dirty = true; + return None; + } + } else { + // No cache dir — can't load + return None; + } + } + + match self.entries.get(module_name) { + Some(CachedModule::Loaded { exports, .. }) => Some(exports), + _ => None, + } + } + + /// Update the cache entry for a module after typechecking. + /// Returns true if the module's exports actually changed (different exports_hash). + pub fn update( + &mut self, + module_name: String, + content_hash: u64, + exports: ModuleExports, + imports: Vec, + ) -> bool { + let new_exports_hash = Self::exports_hash(&exports); + + let exports_changed = self.entries.get(&module_name) + .map_or(true, |old| old.exports_hash() != new_exports_hash); + + self.entries.insert(module_name, CachedModule::Loaded { + content_hash, + exports_hash: new_exports_hash, + imports, + exports, + dirty: true, + }); + self.index_dirty = true; + exports_changed + } + + /// Build the reverse dependency graph from cached import data. + pub fn build_reverse_deps(&mut self) { + self.dependents.clear(); + for (module, cached) in &self.entries { + for dep in cached.imports() { + self.dependents + .entry(dep.clone()) + .or_default() + .push(module.clone()); + } + } + } + + /// Find all transitive dependents of a module (BFS). + pub fn transitive_dependents(&self, module: &str) -> HashSet { + let mut result = HashSet::new(); + let mut queue = VecDeque::new(); + queue.push_back(module.to_string()); + + while let Some(current) = queue.pop_front() { + if let Some(deps) = self.dependents.get(¤t) { + for dep in deps { + if result.insert(dep.clone()) { + queue.push_back(dep.clone()); + } + } + } + } + + result + } + + /// Remove modules that are no longer in the source set. + pub fn retain_modules(&mut self, module_names: &HashSet) { + let before = self.entries.len(); + self.entries.retain(|k, _| module_names.contains(k)); + if self.entries.len() != before { + self.path_index.retain(|_, v| module_names.contains(v)); + self.index_dirty = true; + } + } + + // ===== Disk I/O ===== + + /// Save cache to disk: index file + per-module files for dirty modules. + pub fn save_to_disk(&self, cache_dir: &Path) -> io::Result<()> { + if !self.index_dirty && !self.entries.values().any(|m| m.is_dirty()) { + log::debug!("Cache unchanged, skipping save"); + return Ok(()); + } + + let modules_dir = cache_dir.join("modules"); + std::fs::create_dir_all(&modules_dir)?; + + // Write dirty module files + let mut saved_count = 0; + for (name, cached) in &self.entries { + if let CachedModule::Loaded { exports, dirty: true, .. } = cached { + let module_path = module_file_path(cache_dir, name); + save_module_file(&module_path, exports)?; + saved_count += 1; + } + } + + // Write index + let index = CacheIndex { + modules: self.entries.iter().map(|(name, cached)| { + (name.clone(), CacheIndexEntry { + content_hash: cached.content_hash(), + exports_hash: cached.exports_hash(), + imports: cached.imports().to_vec(), + }) + }).collect(), + path_to_module: self.path_index.clone(), + }; + + let index_path = cache_dir.join("index.bin"); + let encoded = bincode::serialize(&index) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + std::fs::write(&index_path, compressed)?; + + log::debug!("Cache save: wrote index + {} module files", saved_count); + Ok(()) + } + + /// Load cache index from disk. Module exports are loaded lazily. + pub fn load_from_disk(cache_dir: &Path) -> io::Result { + let index_path = cache_dir.join("index.bin"); + let compressed = std::fs::read(&index_path)?; + let data = zstd::bulk::decompress(&compressed, 64 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let index: CacheIndex = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + + let entries = index.modules.into_iter().map(|(name, entry)| { + (name, CachedModule::Indexed { + content_hash: entry.content_hash, + exports_hash: entry.exports_hash, + imports: entry.imports, + }) + }).collect(); + + let mut cache = ModuleCache { + entries, + dependents: HashMap::new(), + path_index: index.path_to_module, + cache_dir: Some(cache_dir.to_path_buf()), + index_dirty: false, + }; + cache.build_reverse_deps(); + Ok(cache) + } +} + +// ===== File helpers ===== + +fn module_file_path(cache_dir: &Path, module_name: &str) -> PathBuf { + cache_dir.join("modules").join(format!("{}.bin", module_name)) +} + +fn save_module_file(path: &Path, exports: &ModuleExports) -> io::Result<()> { + let mut st = StringTableBuilder::new(); + let portable = PModuleExports::from_exports(exports, &mut st); + let file = ModuleCacheFile { + string_table: st.into_table(), + exports: portable, + }; + + let mut encoder = zstd::Encoder::new(std::fs::File::create(path)?, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + bincode::serialize_into(&mut encoder, &file) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + encoder.finish() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + Ok(()) +} + +fn load_module_file(path: &Path) -> io::Result { + let file = std::fs::File::open(path)?; + let decoder = io::BufReader::new(zstd::Decoder::new(file) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?); + let cache_file: ModuleCacheFile = bincode::deserialize_from(decoder) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + + let st = StringTableReader::new(cache_file.string_table); + Ok(cache_file.exports.to_exports(&st)) +} + +// ===== Registry Snapshot (single-file save/load for entire ModuleRegistry) ===== + +use crate::typechecker::registry::ModuleRegistry; +use crate::interner; + +#[derive(Serialize, Deserialize)] +struct RegistrySnapshot { + string_table: Vec, + /// Each entry: (module_parts as Vec, portable exports) + modules: Vec<(Vec, PModuleExports)>, +} + +/// Save the entire registry to a single compressed file. +pub fn save_registry_snapshot(registry: &ModuleRegistry, path: &Path) -> io::Result<()> { + let mut st = StringTableBuilder::new(); + let modules: Vec<(Vec, PModuleExports)> = registry + .iter_all() + .iter() + .map(|(name_parts, exports)| { + let parts: Vec = name_parts.iter().map(|s| st.add(*s)).collect(); + let pexports = PModuleExports::from_exports(exports, &mut st); + (parts, pexports) + }) + .collect(); + + let snapshot = RegistrySnapshot { + string_table: st.into_table(), + modules, + }; + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let encoded = bincode::serialize(&snapshot) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + std::fs::write(path, compressed) +} + +/// Load a registry from a single compressed snapshot file. +pub fn load_registry_snapshot(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 256 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let snapshot: RegistrySnapshot = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + + let st = StringTableReader::new(snapshot.string_table); + let mut registry = ModuleRegistry::new(); + for (parts, pexports) in &snapshot.modules { + let name: Vec = parts.iter().map(|&idx| st.sym(idx)).collect(); + let exports = pexports.to_exports(&st); + registry.register(&name, exports); + } + Ok(registry) +} + +// ===== Module File Map Snapshot ===== + +/// Save module_file_map (HashMap) to disk. +pub fn save_module_file_map(map: &HashMap, path: &Path) -> io::Result<()> { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let encoded = bincode::serialize(map) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + std::fs::write(path, compressed) +} + +/// Load module_file_map from disk. +pub fn load_module_file_map(path: &Path) -> io::Result> { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 64 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}"))) +} diff --git a/src/build/error.rs b/src/build/error.rs index 33a43683..7820128b 100644 --- a/src/build/error.rs +++ b/src/build/error.rs @@ -48,12 +48,6 @@ pub enum BuildError { path: PathBuf, missing: Vec, }, - #[error("The following values in the foreign module for module {module_name} are unused: {}", unused.join(", "))] - UnusedFFIImplementations { - module_name: String, - path: PathBuf, - unused: Vec, - }, #[error("CommonJS exports in the ES foreign module for module {module_name} are unsupported: {}", exports.join(", "))] UnsupportedFFICommonJSExports { module_name: String, @@ -99,7 +93,6 @@ impl BuildError { BuildError::InvalidModuleName { .. } => "SyntaxError".into(), BuildError::MissingFFIModule { .. } => "MissingFFIModule".into(), BuildError::MissingFFIImplementations { .. } => "MissingFFIImplementations".into(), - BuildError::UnusedFFIImplementations { .. } => "UnusedFFIImplementations".into(), BuildError::UnsupportedFFICommonJSExports { .. } => "UnsupportedFFICommonJSExports".into(), BuildError::UnsupportedFFICommonJSImports { .. } => "UnsupportedFFICommonJSImports".into(), BuildError::DeprecatedFFICommonJSModule { .. } => "DeprecatedFFICommonJSModule".into(), diff --git a/src/build/mod.rs b/src/build/mod.rs index 55ba6b61..56a49d7e 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -4,7 +4,9 @@ //! builds a dependency graph from imports, topologically sorts, and //! typechecks in dependency order. +pub mod cache; pub mod error; +pub mod portable; use std::collections::{HashMap, HashSet, VecDeque}; use std::panic::AssertUnwindSafe; @@ -16,6 +18,7 @@ use rayon::prelude::*; use crate::cst::{Decl, Module}; use crate::interner::{self, Symbol}; +use crate::span::Span; use crate::js_ffi; use crate::typechecker::check; use crate::typechecker::registry::ModuleRegistry; @@ -40,10 +43,6 @@ pub struct BuildOptions { /// If true, typecheck modules sequentially (one at a time) instead of in /// parallel. Useful for debugging memory issues or non-deterministic bugs. pub sequential: bool, - - /// If true, stop building as soon as the first error is encountered - /// (build error or type error). Useful for quick iteration. - pub fail_fast: bool, } // ===== Public types ===== @@ -52,6 +51,7 @@ pub struct ModuleResult { pub path: PathBuf, pub module_name: String, pub type_errors: Vec, + pub cached: bool, } pub struct BuildResult { @@ -63,11 +63,15 @@ pub struct BuildResult { struct ParsedModule { path: PathBuf, - module: Module, + /// The parsed CST. None for cache-skipped modules (lazy-parsed on demand). + module: Option, + /// Index into the sources array, for lazy parsing when needed. + source_idx: usize, module_name: String, module_parts: Vec, import_parts: Vec>, js_source: Option, + source_hash: u64, } // ===== Helpers ===== @@ -76,6 +80,10 @@ fn module_name_string(parts: &[Symbol]) -> String { interner::resolve_module_name(parts) } +fn module_name_to_parts(name: &str) -> Vec { + name.split('.').map(|s| interner::intern(s)).collect() +} + fn is_prim_import(parts: &[Symbol]) -> bool { !parts.is_empty() && interner::symbol_eq(parts[0], "Prim") } @@ -135,8 +143,17 @@ fn extract_foreign_import_names(module: &Module) -> Vec { // ===== Public API ===== +/// Build all PureScript modules matching the given glob patterns, with incremental caching. +pub fn build_cached(globs: &[&str], output_dir: Option, cache: &mut cache::ModuleCache) -> BuildResult { + build_internal(globs, output_dir, Some(cache)) +} + /// Build all PureScript modules matching the given glob patterns. pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { + build_internal(globs, output_dir, None) +} + +fn build_internal(globs: &[&str], output_dir: Option, cache: Option<&mut cache::ModuleCache>) -> BuildResult { let build_start = Instant::now(); let mut build_errors = Vec::new(); @@ -206,7 +223,7 @@ pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { ..Default::default() }; let mut result = - build_from_sources_with_options(&source_refs, &Some(js_refs), None, &options).0; + build_from_sources_impl(&source_refs, &Some(js_refs), None, &options, cache).0; // Prepend file-level errors before source-level errors build_errors.append(&mut result.build_errors); result.build_errors = build_errors; @@ -246,31 +263,141 @@ pub fn build_from_sources_with_options( start_registry: Option>, options: &BuildOptions, ) -> (BuildResult, ModuleRegistry) { + let (result, registry, _) = build_from_sources_impl(sources, js_sources, start_registry, options, None); + (result, registry) +} + +/// Build with incremental caching support. +/// Skips typechecking modules whose source hasn't changed and whose +/// dependencies haven't been rebuilt. +pub fn build_from_sources_incremental( + sources: &[(&str, &str)], + js_sources: &Option>, + start_registry: Option>, + options: &BuildOptions, + cache: &mut cache::ModuleCache, +) -> (BuildResult, ModuleRegistry, Vec<(PathBuf, Module)>) { + build_from_sources_impl(sources, js_sources, start_registry, options, Some(cache)) +} + +fn build_from_sources_impl( + sources: &[(&str, &str)], + js_sources: &Option>, + start_registry: Option>, + options: &BuildOptions, + mut cache: Option<&mut cache::ModuleCache>, +) -> (BuildResult, ModuleRegistry, Vec<(PathBuf, Module)>) { let pipeline_start = Instant::now(); let mut build_errors = Vec::new(); - let fail_fast = options.fail_fast; - - // Phase 2: Parse all sources (parallel) - log::debug!("Phase 2c: Parsing {} source files", sources.len()); + // Phase 2c: Parse source files (with cache-aware skipping) + log::debug!("Phase 2c: Processing {} source files", sources.len()); let phase_start = Instant::now(); - // Parse all sources in parallel - let parse_results: Vec<_> = sources + // Step 1: Compute content hashes for all sources (fast, parallel) + let source_hashes: Vec = sources .par_iter() - .map(|&(path_str, source)| { - let path = PathBuf::from(path_str); - match crate::parser::parse(source) { - Ok(module) => Ok((path, module)), - Err(e) => Err(BuildError::CompileError { path, error: e }), - } - }) + .map(|&(_, source)| cache::ModuleCache::content_hash(source)) .collect(); - // Sequential validation (Prim check, dup check, etc.) + // Step 2: Determine which sources can skip parsing (cache hit by path + hash) + let mut skip_parse = vec![false; sources.len()]; + let mut skip_count = 0usize; + if let Some(ref cache) = cache { + for (i, &(path_str, _)) in sources.iter().enumerate() { + if let Some(module_name) = cache.module_name_for_path(path_str) { + if !cache.needs_rebuild(module_name, source_hashes[i], &HashSet::new()) { + skip_parse[i] = true; + skip_count += 1; + } + } + } + } + log::debug!( + " {} modules cached (skip parse), {} need parsing", + skip_count, + sources.len() - skip_count + ); + + // Step 3: Parse only non-cached sources in parallel (use a pool with large stacks + // since the parser can recurse deeply on complex files) + let parse_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-parse-{i}")) + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build parse thread pool"); + let parse_results: Vec<(usize, Result<(PathBuf, Module), BuildError>)> = parse_pool.install(|| { + sources + .par_iter() + .enumerate() + .filter(|(i, _)| !skip_parse[*i]) + .map(|(i, &(path_str, source))| { + let path = PathBuf::from(path_str); + let result = match crate::parser::parse(source) { + Ok(module) => Ok((path, module)), + Err(e) => Err(BuildError::CompileError { path, error: e }), + }; + (i, result) + }) + .collect() + }); + + // Step 4: Build parsed vec from both cached stubs and parsed results let mut parsed: Vec = Vec::new(); let mut seen_modules: HashMap, PathBuf> = HashMap::new(); - for (i, result) in parse_results.into_iter().enumerate() { + // 4a: Create stubs for cache-hit modules + if let Some(ref cache) = cache { + for (i, &(path_str, _)) in sources.iter().enumerate() { + if !skip_parse[i] { + continue; + } + let module_name = match cache.module_name_for_path(path_str) { + Some(name) => name.to_string(), + None => continue, + }; + let module_parts = module_name_to_parts(&module_name); + + // Duplicate check + if let Some(existing_path) = seen_modules.get(&module_parts) { + log::debug!( + " rejected {}: duplicate (already at {})", + module_name, + existing_path.display() + ); + build_errors.push(BuildError::DuplicateModule { + module_name, + path1: existing_path.clone(), + path2: PathBuf::from(path_str), + }); + continue; + } + seen_modules.insert(module_parts.clone(), PathBuf::from(path_str)); + + let import_parts: Vec> = cache + .get_imports(&module_name) + .map(|imports| imports.iter().map(|s| module_name_to_parts(s)).collect()) + .unwrap_or_default(); + + let js_source = js_sources + .as_ref() + .and_then(|m| m.get(path_str)) + .map(|s| s.to_string()); + + parsed.push(ParsedModule { + path: PathBuf::from(path_str), + module: None, + source_idx: i, + module_name, + module_parts, + import_parts, + js_source, + source_hash: source_hashes[i], + }); + } + } + + // 4b: Process parsed results (with full validation) + for (i, result) in parse_results { let (path, module) = match result { Ok(pair) => pair, Err(e) => { @@ -286,7 +413,8 @@ pub fn build_from_sources_with_options( // Check for reserved Prim namespace if !module_parts.is_empty() { - let is_prim = interner::with_resolved(module_parts[0], |s| s == "Prim").unwrap_or(false); + let is_prim = + interner::with_resolved(module_parts[0], |s| s == "Prim").unwrap_or(false); if is_prim { log::debug!(" rejected {}: Prim namespace is reserved", module_name); build_errors.push(BuildError::CannotDefinePrimModules { module_name, path }); @@ -299,7 +427,8 @@ pub fn build_from_sources_with_options( for part in &module_parts { let invalid_char = interner::with_resolved(*part, |s| { s.chars().find(|&c| c == '\'' || c == '_') - }).flatten(); + }) + .flatten(); if let Some(c) = invalid_char { log::debug!( " rejected {}: invalid character '{}' in module name", @@ -348,28 +477,37 @@ pub fn build_from_sources_with_options( .and_then(|m| m.get(path_str)) .map(|s| s.to_string()); + // Register path → module_name mapping in cache + if let Some(ref mut cache) = cache { + cache.register_path(path_str.to_string(), module_name.clone()); + } + parsed.push(ParsedModule { path, - module, + module: Some(module), + source_idx: i, module_name, module_parts, import_parts, js_source, + source_hash: source_hashes[i], }); } log::debug!( - "Phase 2c complete: parsed {} modules (rejected {}) in {:.2?}", + "Phase 2c complete: {} modules ({} cached, {} parsed, {} rejected) in {:.2?}", parsed.len(), + skip_count, + parsed.len().saturating_sub(skip_count), sources.len() - parsed.len(), phase_start.elapsed() ); - if fail_fast && !build_errors.is_empty() { + if !build_errors.is_empty() { let registry = match start_registry { Some(base) => ModuleRegistry::with_base(base), None => ModuleRegistry::default(), }; - return (BuildResult { modules: Vec::new(), build_errors }, registry); + return (BuildResult { modules: Vec::new(), build_errors }, registry, Vec::new()); } // Phase 3: Build dependency graph and check for unknown imports @@ -399,7 +537,7 @@ pub fn build_from_sources_with_options( module_name: imp_name, importing_module: pm.module_name.clone(), path: pm.path.clone(), - span: pm.module.span, + span: pm.module.as_ref().map(|m| m.span).unwrap_or(Span::new(0, 0)), }); } } @@ -446,9 +584,9 @@ pub fn build_from_sources_with_options( phase_start.elapsed() ); - if fail_fast && !build_errors.is_empty() { + if !build_errors.is_empty() { log::debug!("Phase 3 failed"); - return (BuildResult { modules: Vec::new(), build_errors }, registry); + return (BuildResult { modules: Vec::new(), build_errors }, registry, Vec::new()); } // Phase 4: Typecheck in dependency order @@ -471,6 +609,7 @@ pub fn build_from_sources_with_options( .unwrap_or(1) }; let pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-typecheck-{i}")) .num_threads(num_threads) .stack_size(16 * 1024 * 1024) .build() @@ -483,6 +622,8 @@ pub fn build_from_sources_with_options( effective_timeout.map(|t| t.as_secs()).unwrap_or(0)); let mut done = 0usize; + let mut rebuilt_set: HashSet = HashSet::new(); + let mut cached_count = 0usize; for level in &levels { if sequential { @@ -490,15 +631,63 @@ pub fn build_from_sources_with_options( // (including ModuleExports) is dropped before the next module starts. // Peak memory = 1 module's CheckResult at a time. for &idx in level { + // Cache check: skip typecheck if source unchanged and no deps rebuilt + { + let pm = &parsed[idx]; + if let Some(ref mut cache) = cache { + if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { + if let Some(exports) = cache.get_exports(&pm.module_name) { + done += 1; + cached_count += 1; + eprintln!( + "[{}/{}] [skipping] {}", + done, total_modules, pm.module_name + ); + registry.register(&pm.module_parts, exports.clone()); + module_results.push(ModuleResult { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + type_errors: vec![], + cached: true, + }); + continue; + } + } + } + } + + // Lazy parse if module was cache-skipped but now needs typechecking + if parsed[idx].module.is_none() { + let source = sources[parsed[idx].source_idx].1; + match crate::parser::parse(source) { + Ok(module) => { + parsed[idx].module = Some(module); + } + Err(e) => { + done += 1; + build_errors.push(BuildError::CompileError { + path: parsed[idx].path.clone(), + error: e, + }); + continue; + } + } + } + let pm = &parsed[idx]; + eprintln!( + "[{}/{}] [compiling] {}", + done + 1, total_modules, pm.module_name + ); let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); + let module_ref = pm.module.as_ref().unwrap(); let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { let mod_sym = crate::interner::intern(&pm.module_name); log::debug!("Typechecking: {}", &pm.module_name); let path_str = pm.path.to_string_lossy(); crate::typechecker::set_deadline(deadline, mod_sym, &path_str); - let (ast_module, convert_errors) = crate::ast::convert(&pm.module, ®istry); + let (ast_module, convert_errors) = crate::ast::convert(module_ref, ®istry); let mut result = check::check_module(&ast_module, ®istry); if !convert_errors.is_empty() { let mut all_errors = convert_errors; @@ -516,6 +705,18 @@ pub fn build_from_sources_with_options( " [{}/{}] ok: {} ({:.2?})", done, total_modules, pm.module_name, elapsed ); + let import_names: Vec = pm.import_parts.iter() + .map(|parts| interner::resolve_module_name(parts)) + .collect(); + let exports_changed = if let Some(ref mut c) = cache { + c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names) + } else { + true + }; + // Only add to rebuilt_set if exports actually changed + if exports_changed { + rebuilt_set.insert(pm.module_name.clone()); + } // Register exports immediately — result.exports is moved, // then result (with its types HashMap) is dropped. registry.register(&pm.module_parts, result.exports); @@ -523,6 +724,7 @@ pub fn build_from_sources_with_options( path: pm.path.clone(), module_name: pm.module_name.clone(), type_errors: result.errors, + cached: false, }); } Err(payload) => { @@ -532,27 +734,81 @@ pub fn build_from_sources_with_options( ); } } - // In sequential mode, check fail_fast after each module - if fail_fast { - let has_errors = module_results.last().map_or(false, |r| !r.type_errors.is_empty()) || !build_errors.is_empty(); - if has_errors { - log::debug!("Phase 4: fail_fast triggered after module, stopping"); - break; - } + let has_errors = module_results.last().map_or(false, |r| !r.type_errors.is_empty()) || !build_errors.is_empty(); + if has_errors { + log::debug!("Phase 4: error after module, stopping"); + break; } } } else { - // Parallel mode: collect all results for the level, then register sequentially. + // Parallel mode: first handle cached modules, then typecheck the rest. + let mut to_typecheck = Vec::new(); + for &idx in level.iter() { + let pm = &parsed[idx]; + if let Some(ref mut cache) = cache { + if !cache.needs_rebuild(&pm.module_name, pm.source_hash, &rebuilt_set) { + if let Some(exports) = cache.get_exports(&pm.module_name) { + done += 1; + cached_count += 1; + eprintln!( + "[{}/{}] [skipping] {}", + done, total_modules, pm.module_name + ); + registry.register(&pm.module_parts, exports.clone()); + module_results.push(ModuleResult { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + type_errors: vec![], + cached: true, + }); + continue; + } + } + } + to_typecheck.push(idx); + } + + // Lazy parse any cache-skipped modules that now need typechecking + for &idx in &to_typecheck { + if parsed[idx].module.is_none() { + let source = sources[parsed[idx].source_idx].1; + match crate::parser::parse(source) { + Ok(module) => { + parsed[idx].module = Some(module); + } + Err(e) => { + build_errors.push(BuildError::CompileError { + path: parsed[idx].path.clone(), + error: e, + }); + } + } + } + } + // Remove entries that failed to parse + to_typecheck.retain(|&idx| parsed[idx].module.is_some()); + + // Print [compiling] for all modules in this level before starting + for &idx in &to_typecheck { + let pm = &parsed[idx]; + eprintln!( + "[{}/{}] [compiling] {}", + done + 1, total_modules, pm.module_name + ); + } + + // Typecheck remaining modules in parallel let level_results: Vec<_> = pool.install(|| { - level.par_iter().map(|&idx| { + to_typecheck.par_iter().map(|&idx| { let pm = &parsed[idx]; + let module_ref = pm.module.as_ref().unwrap(); let tc_start = Instant::now(); let deadline = effective_timeout.map(|t| tc_start + t); let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { let mod_sym = crate::interner::intern(&pm.module_name); let path_str = pm.path.to_string_lossy(); crate::typechecker::set_deadline(deadline, mod_sym, &path_str); - let (ast_module, convert_errors) = crate::ast::convert(&pm.module, ®istry); + let (ast_module, convert_errors) = crate::ast::convert(module_ref, ®istry); let mut result = check::check_module(&ast_module, ®istry); if !convert_errors.is_empty() { let mut all_errors = convert_errors; @@ -576,11 +832,23 @@ pub fn build_from_sources_with_options( " [{}/{}] ok: {} ({:.2?})", done, total_modules, pm.module_name, elapsed ); + let import_names: Vec = pm.import_parts.iter() + .map(|parts| interner::resolve_module_name(parts)) + .collect(); + let exports_changed = if let Some(ref mut c) = cache { + c.update(pm.module_name.clone(), pm.source_hash, result.exports.clone(), import_names) + } else { + true + }; + if exports_changed { + rebuilt_set.insert(pm.module_name.clone()); + } registry.register(&pm.module_parts, result.exports); module_results.push(ModuleResult { path: pm.path.clone(), module_name: pm.module_name.clone(), type_errors: result.errors, + cached: false, }); } Err(payload) => { @@ -592,18 +860,17 @@ pub fn build_from_sources_with_options( } } } - // After each dependency level, check if fail_fast should stop - if fail_fast { - let err_count = module_results.iter().filter(|r| !r.type_errors.is_empty()).count(); - if !build_errors.is_empty() || err_count > 0 { - log::debug!("Phase 4: fail_fast triggered after level ({} done, {} with errors), stopping", done, err_count); - break; - } + let err_count = module_results.iter().filter(|r| !r.type_errors.is_empty()).count(); + if !build_errors.is_empty() || err_count > 0 { + log::debug!("Phase 4: error after level ({} done, {} with errors), stopping", done, err_count); + break; } } log::debug!( - "Phase 4 complete: typechecked {} modules in {:.2?}", + "Phase 4 complete: {} modules ({} cached, {} typechecked) in {:.2?}", module_results.len(), + cached_count, + module_results.len() - cached_count, phase_start.elapsed() ); @@ -613,7 +880,12 @@ pub fn build_from_sources_with_options( let phase_start = Instant::now(); let mut ffi_checked = 0; for pm in &parsed { - let foreign_names = extract_foreign_import_names(&pm.module); + // Skip FFI validation for cache-skipped modules (already validated) + let module_ref = match pm.module.as_ref() { + Some(m) => m, + None => continue, + }; + let foreign_names = extract_foreign_import_names(module_ref); let has_foreign = !foreign_names.is_empty(); match (&pm.js_source, has_foreign) { @@ -656,18 +928,6 @@ pub fn build_from_sources_with_options( missing, }); } - js_ffi::FfiError::UnusedFFIImplementations { unused } => { - log::debug!( - " FFI error in {}: unused implementations: {:?}", - pm.module_name, - unused - ); - build_errors.push(BuildError::UnusedFFIImplementations { - module_name: pm.module_name.clone(), - path: pm.path.clone(), - unused, - }); - } js_ffi::FfiError::UnsupportedFFICommonJSExports { exports } => { build_errors.push( BuildError::UnsupportedFFICommonJSExports { @@ -746,6 +1006,12 @@ pub fn build_from_sources_with_options( .collect(); for pm in &parsed { + // Skip codegen for cache-skipped modules (JS already generated) + let module_ref = match pm.module.as_ref() { + Some(m) => m, + None => continue, + }; + if !ok_modules.contains(&pm.module_name) { log::debug!(" skipping {} (has type errors)", pm.module_name); continue; @@ -764,7 +1030,7 @@ pub fn build_from_sources_with_options( log::debug!(" generating JS for {}", pm.module_name); let js_module = crate::codegen::js::module_to_js( - &pm.module, + module_ref, &pm.module_name, &pm.module_parts, module_exports, @@ -827,12 +1093,18 @@ pub fn build_from_sources_with_options( build_errors.len() ); + let returned_modules: Vec<(PathBuf, Module)> = parsed + .into_iter() + .filter_map(|pm| pm.module.map(|m| (pm.path, m))) + .collect(); + ( BuildResult { modules: module_results, build_errors, }, registry, + returned_modules, ) } @@ -1109,26 +1381,6 @@ mod tests { ); } - #[test] - fn parse_error_resilience() { - let result = build_from_sources(&[ - ("src/A.purs", "module A where\nx :: Int\nx = 42"), - ("src/Bad.purs", "this is not valid purescript"), - ("src/B.purs", "module B where\nimport A\ny = x"), - ]); - // Should have a parse error for Bad.purs - assert!( - result - .build_errors - .iter() - .any(|e| matches!(e, BuildError::CompileError { .. })), - "expected CompileError" - ); - // A and B should still compile successfully - assert_eq!(result.modules.len(), 2); - assert!(result.modules.iter().all(|m| m.type_errors.is_empty())); - } - #[test] fn prim_import_not_missing() { let result = build_from_sources(&[( @@ -1279,121 +1531,6 @@ roundtrip x = useExceptT (mkExcept x) assert!(result.modules[0].type_errors.is_empty()); } - #[test] - fn export_despite_type_error() { - let result = build_from_sources(&[ - ( - "src/A.purs", - "\ -module A where - -f :: Int -> Int -f x = x - -g :: String -g = 42 -", - ), - ( - "src/B.purs", - "\ -module B where -import A - -y :: Int -y = f 1 -", - ), - ]); - assert!( - result.build_errors.is_empty(), - "build errors: {:?}", - result - .build_errors - .iter() - .map(|e| format!("{}", e)) - .collect::>() - ); - let a = result - .modules - .iter() - .find(|m| m.module_name == "A") - .unwrap(); - assert!( - !a.type_errors.is_empty(), - "A should have type errors from g" - ); - let b = result - .modules - .iter() - .find(|m| m.module_name == "B") - .unwrap(); - assert!( - b.type_errors.is_empty(), - "B should compile cleanly, got: {:?}", - b.type_errors - .iter() - .map(|e| e.to_string()) - .collect::>() - ); - } - - #[test] - fn signature_exported_on_body_error() { - let result = build_from_sources(&[ - ( - "src/A.purs", - "\ -module A where - -h :: Int -> Int -h x = \"not an int\" -", - ), - ( - "src/B.purs", - "\ -module B where -import A - -y :: Int -> Int -y = h -", - ), - ]); - assert!( - result.build_errors.is_empty(), - "build errors: {:?}", - result - .build_errors - .iter() - .map(|e| format!("{}", e)) - .collect::>() - ); - let a = result - .modules - .iter() - .find(|m| m.module_name == "A") - .unwrap(); - assert!( - !a.type_errors.is_empty(), - "A should have type errors from h" - ); - let b = result - .modules - .iter() - .find(|m| m.module_name == "B") - .unwrap(); - assert!( - b.type_errors.is_empty(), - "B should compile cleanly using h's declared signature, got: {:?}", - b.type_errors - .iter() - .map(|e| e.to_string()) - .collect::>() - ); - } - #[test] fn instance_head_record_in_type_app() { let result = build_from_sources(&[( diff --git a/src/build/portable.rs b/src/build/portable.rs new file mode 100644 index 00000000..c219ba23 --- /dev/null +++ b/src/build/portable.rs @@ -0,0 +1,380 @@ +//! Portable (serializable) representations of typechecker types. +//! +//! Uses a deduplicated string table so each symbol is stored once. +//! Symbol references are u32 indices into the string table. + +use std::collections::{HashMap, HashSet}; + +use serde::{Deserialize, Serialize}; + +use crate::cst::{Associativity, QualifiedIdent}; +use crate::interner; +use crate::typechecker::registry::ModuleExports; +use crate::typechecker::types::{Role, Scheme, TyVarId, Type}; + +// ===== String Table ===== + +/// Builds a deduplicated string table during serialization. +/// Each unique Symbol is resolved exactly once. +pub struct StringTableBuilder { + strings: Vec, + sym_to_idx: HashMap, +} + +impl StringTableBuilder { + pub fn new() -> Self { + Self { + strings: Vec::new(), + sym_to_idx: HashMap::new(), + } + } + + pub fn add(&mut self, sym: interner::Symbol) -> u32 { + if let Some(&idx) = self.sym_to_idx.get(&sym) { + return idx; + } + let s = interner::resolve(sym).unwrap_or_default(); + let idx = self.strings.len() as u32; + self.strings.push(s); + self.sym_to_idx.insert(sym, idx); + idx + } + + pub fn into_table(self) -> Vec { + self.strings + } +} + +/// Reads from a string table during deserialization. +/// All strings are interned in one batch. +pub struct StringTableReader { + symbols: Vec, +} + +impl StringTableReader { + pub fn new(table: Vec) -> Self { + let symbols = interner::intern_batch(&table); + Self { symbols } + } + + pub fn sym(&self, idx: u32) -> interner::Symbol { + self.symbols[idx as usize] + } +} + +// ===== Portable QualifiedIdent ===== + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)] +pub struct PQI { + pub module: Option, + pub name: u32, +} + +fn conv_qi(qi: &QualifiedIdent, st: &mut StringTableBuilder) -> PQI { + PQI { + module: qi.module.map(|s| st.add(s)), + name: st.add(qi.name), + } +} + +fn rest_qi(p: &PQI, st: &StringTableReader) -> QualifiedIdent { + QualifiedIdent { + module: p.module.map(|i| st.sym(i)), + name: st.sym(p.name), + } +} + +// ===== Portable Type ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub enum PType { + Unif(u32), + Var(u32), + Con(PQI), + App(Box, Box), + Fun(Box, Box), + Forall(Vec<(u32, bool)>, Box), + Record(Vec<(u32, PType)>, Option>), + TypeString(u32), + TypeInt(i64), +} + +fn conv_type(t: &Type, st: &mut StringTableBuilder) -> PType { + match t { + Type::Unif(id) => PType::Unif(id.0), + Type::Var(s) => PType::Var(st.add(*s)), + Type::Con(qi) => PType::Con(conv_qi(qi, st)), + Type::App(f, a) => PType::App( + Box::new(conv_type(f, st)), + Box::new(conv_type(a, st)), + ), + Type::Fun(a, b) => PType::Fun( + Box::new(conv_type(a, st)), + Box::new(conv_type(b, st)), + ), + Type::Forall(vars, body) => PType::Forall( + vars.iter().map(|(s, v)| (st.add(*s), *v)).collect(), + Box::new(conv_type(body, st)), + ), + Type::Record(fields, tail) => PType::Record( + fields.iter().map(|(s, t)| (st.add(*s), conv_type(t, st))).collect(), + tail.as_ref().map(|t| Box::new(conv_type(t, st))), + ), + Type::TypeString(s) => PType::TypeString(st.add(*s)), + Type::TypeInt(i) => PType::TypeInt(*i), + } +} + +fn rest_type(p: &PType, st: &StringTableReader) -> Type { + match p { + PType::Unif(id) => Type::Unif(TyVarId(*id)), + PType::Var(s) => Type::Var(st.sym(*s)), + PType::Con(qi) => Type::Con(rest_qi(qi, st)), + PType::App(f, a) => Type::App( + Box::new(rest_type(f, st)), + Box::new(rest_type(a, st)), + ), + PType::Fun(a, b) => Type::Fun( + Box::new(rest_type(a, st)), + Box::new(rest_type(b, st)), + ), + PType::Forall(vars, body) => Type::Forall( + vars.iter().map(|(s, v)| (st.sym(*s), *v)).collect(), + Box::new(rest_type(body, st)), + ), + PType::Record(fields, tail) => Type::Record( + fields.iter().map(|(s, t)| (st.sym(*s), rest_type(t, st))).collect(), + tail.as_ref().map(|t| Box::new(rest_type(t, st))), + ), + PType::TypeString(s) => Type::TypeString(st.sym(*s)), + PType::TypeInt(i) => Type::TypeInt(*i), + } +} + +// ===== Portable Scheme ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PScheme { + pub forall_vars: Vec, + pub ty: PType, +} + +fn conv_scheme(s: &Scheme, st: &mut StringTableBuilder) -> PScheme { + PScheme { + forall_vars: s.forall_vars.iter().map(|v| st.add(*v)).collect(), + ty: conv_type(&s.ty, st), + } +} + +fn rest_scheme(p: &PScheme, st: &StringTableReader) -> Scheme { + Scheme { + forall_vars: p.forall_vars.iter().map(|v| st.sym(*v)).collect(), + ty: rest_type(&p.ty, st), + } +} + +// ===== Portable Associativity ===== + +#[derive(Serialize, Deserialize, Clone, Copy, Debug)] +pub enum PAssociativity { + Left, + Right, + None, +} + +fn conv_assoc(a: &Associativity) -> PAssociativity { + match a { + Associativity::Left => PAssociativity::Left, + Associativity::Right => PAssociativity::Right, + Associativity::None => PAssociativity::None, + } +} + +fn rest_assoc(p: &PAssociativity) -> Associativity { + match p { + PAssociativity::Left => Associativity::Left, + PAssociativity::Right => Associativity::Right, + PAssociativity::None => Associativity::None, + } +} + +// ===== Portable Role ===== + +#[derive(Serialize, Deserialize, Clone, Copy, Debug)] +pub enum PRole { + Phantom, + Representational, + Nominal, +} + +fn conv_role(r: &Role) -> PRole { + match r { + Role::Phantom => PRole::Phantom, + Role::Representational => PRole::Representational, + Role::Nominal => PRole::Nominal, + } +} + +fn rest_role(p: &PRole) -> Role { + match p { + PRole::Phantom => Role::Phantom, + PRole::Representational => Role::Representational, + PRole::Nominal => Role::Nominal, + } +} + +// ===== Portable ModuleExports ===== + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PModuleExports { + pub values: HashMap, + pub class_methods: HashMap)>, + pub data_constructors: HashMap>, + pub ctor_details: HashMap, Vec)>, + pub instances: HashMap, Vec<(PQI, Vec)>)>>, + pub type_operators: HashMap, + pub value_fixities: HashMap, + pub type_fixities: HashMap, + pub function_op_aliases: HashSet, + pub value_operator_targets: HashMap, + pub constrained_class_methods: HashSet, + pub type_aliases: HashMap, PType)>, + pub class_param_counts: HashMap, + pub value_origins: HashMap, + pub type_origins: HashMap, + pub class_origins: HashMap, + pub operator_class_targets: HashMap, + pub class_fundeps: HashMap, Vec<(Vec, Vec)>)>, + pub type_con_arities: HashMap, + pub type_roles: HashMap>, + pub newtype_names: HashSet, + pub signature_constraints: HashMap)>>, + pub type_kinds: HashMap, + pub class_type_kinds: HashMap, + pub partial_dischargers: HashSet, + pub self_referential_aliases: HashSet, + pub class_superclasses: HashMap, Vec<(PQI, Vec)>)>, + pub method_own_constraints: HashMap>, +} + +impl PModuleExports { + pub fn from_exports(e: &ModuleExports, st: &mut StringTableBuilder) -> Self { + PModuleExports { + values: e.values.iter().map(|(k, v)| (conv_qi(k, st), conv_scheme(v, st))).collect(), + class_methods: e.class_methods.iter().map(|(k, (c, vs))| { + (conv_qi(k, st), (conv_qi(c, st), vs.iter().map(|v| conv_qi(v, st)).collect())) + }).collect(), + data_constructors: e.data_constructors.iter().map(|(k, v)| { + (conv_qi(k, st), v.iter().map(|qi| conv_qi(qi, st)).collect()) + }).collect(), + ctor_details: e.ctor_details.iter().map(|(k, (p, vs, ts))| { + (conv_qi(k, st), (conv_qi(p, st), vs.iter().map(|v| conv_qi(v, st)).collect(), ts.iter().map(|t| conv_type(t, st)).collect())) + }).collect(), + instances: e.instances.iter().map(|(k, v)| { + (conv_qi(k, st), v.iter().map(|(ts, cs)| { + (ts.iter().map(|t| conv_type(t, st)).collect(), cs.iter().map(|(c, ts2)| { + (conv_qi(c, st), ts2.iter().map(|t| conv_type(t, st)).collect()) + }).collect()) + }).collect()) + }).collect(), + type_operators: e.type_operators.iter().map(|(k, v)| (conv_qi(k, st), conv_qi(v, st))).collect(), + value_fixities: e.value_fixities.iter().map(|(k, (a, p))| (conv_qi(k, st), (conv_assoc(a), *p))).collect(), + type_fixities: e.type_fixities.iter().map(|(k, (a, p))| (conv_qi(k, st), (conv_assoc(a), *p))).collect(), + function_op_aliases: e.function_op_aliases.iter().map(|qi| conv_qi(qi, st)).collect(), + value_operator_targets: e.value_operator_targets.iter().map(|(k, v)| (conv_qi(k, st), conv_qi(v, st))).collect(), + constrained_class_methods: e.constrained_class_methods.iter().map(|qi| conv_qi(qi, st)).collect(), + type_aliases: e.type_aliases.iter().map(|(k, (ps, ty))| { + (conv_qi(k, st), (ps.iter().map(|p| conv_qi(p, st)).collect(), conv_type(ty, st))) + }).collect(), + class_param_counts: e.class_param_counts.iter().map(|(k, v)| (conv_qi(k, st), *v)).collect(), + value_origins: e.value_origins.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + type_origins: e.type_origins.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + class_origins: e.class_origins.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + operator_class_targets: e.operator_class_targets.iter().map(|(k, v)| (st.add(*k), st.add(*v))).collect(), + class_fundeps: e.class_fundeps.iter().map(|(k, (vs, fs))| { + (st.add(*k), (vs.iter().map(|v| st.add(*v)).collect(), fs.clone())) + }).collect(), + type_con_arities: e.type_con_arities.iter().map(|(k, v)| (conv_qi(k, st), *v)).collect(), + type_roles: e.type_roles.iter().map(|(k, v)| (st.add(*k), v.iter().map(conv_role).collect())).collect(), + newtype_names: e.newtype_names.iter().map(|s| st.add(*s)).collect(), + signature_constraints: e.signature_constraints.iter().map(|(k, v)| { + (conv_qi(k, st), v.iter().map(|(c, ts)| { + (conv_qi(c, st), ts.iter().map(|t| conv_type(t, st)).collect()) + }).collect()) + }).collect(), + type_kinds: e.type_kinds.iter().map(|(k, v)| (st.add(*k), conv_type(v, st))).collect(), + class_type_kinds: e.class_type_kinds.iter().map(|(k, v)| (st.add(*k), conv_type(v, st))).collect(), + partial_dischargers: e.partial_dischargers.iter().map(|s| st.add(*s)).collect(), + self_referential_aliases: e.self_referential_aliases.iter().map(|s| st.add(*s)).collect(), + class_superclasses: e.class_superclasses.iter().map(|(k, (vs, cs))| { + (conv_qi(k, st), (vs.iter().map(|v| st.add(*v)).collect(), cs.iter().map(|(c, ts)| { + (conv_qi(c, st), ts.iter().map(|t| conv_type(t, st)).collect()) + }).collect())) + }).collect(), + method_own_constraints: e.method_own_constraints.iter().map(|(k, v)| { + (conv_qi(k, st), v.iter().map(|s| st.add(*s)).collect()) + }).collect(), + } + } + + pub fn to_exports(&self, st: &StringTableReader) -> ModuleExports { + ModuleExports { + values: self.values.iter().map(|(k, v)| (rest_qi(k, st), rest_scheme(v, st))).collect(), + class_methods: self.class_methods.iter().map(|(k, (c, vs))| { + (rest_qi(k, st), (rest_qi(c, st), vs.iter().map(|v| rest_qi(v, st)).collect())) + }).collect(), + data_constructors: self.data_constructors.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|qi| rest_qi(qi, st)).collect()) + }).collect(), + ctor_details: self.ctor_details.iter().map(|(k, (p, vs, ts))| { + (rest_qi(k, st), (rest_qi(p, st), vs.iter().map(|v| rest_qi(v, st)).collect(), ts.iter().map(|t| rest_type(t, st)).collect())) + }).collect(), + instances: self.instances.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|(ts, cs)| { + (ts.iter().map(|t| rest_type(t, st)).collect(), cs.iter().map(|(c, ts2)| { + (rest_qi(c, st), ts2.iter().map(|t| rest_type(t, st)).collect()) + }).collect()) + }).collect()) + }).collect(), + type_operators: self.type_operators.iter().map(|(k, v)| (rest_qi(k, st), rest_qi(v, st))).collect(), + value_fixities: self.value_fixities.iter().map(|(k, (a, p))| (rest_qi(k, st), (rest_assoc(a), *p))).collect(), + type_fixities: self.type_fixities.iter().map(|(k, (a, p))| (rest_qi(k, st), (rest_assoc(a), *p))).collect(), + function_op_aliases: self.function_op_aliases.iter().map(|qi| rest_qi(qi, st)).collect(), + value_operator_targets: self.value_operator_targets.iter().map(|(k, v)| (rest_qi(k, st), rest_qi(v, st))).collect(), + constrained_class_methods: self.constrained_class_methods.iter().map(|qi| rest_qi(qi, st)).collect(), + type_aliases: self.type_aliases.iter().map(|(k, (ps, ty))| { + (rest_qi(k, st), (ps.iter().map(|p| rest_qi(p, st)).collect(), rest_type(ty, st))) + }).collect(), + class_param_counts: self.class_param_counts.iter().map(|(k, v)| (rest_qi(k, st), *v)).collect(), + value_origins: self.value_origins.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + type_origins: self.type_origins.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + class_origins: self.class_origins.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + operator_class_targets: self.operator_class_targets.iter().map(|(k, v)| (st.sym(*k), st.sym(*v))).collect(), + class_fundeps: self.class_fundeps.iter().map(|(k, (vs, fs))| { + (st.sym(*k), (vs.iter().map(|v| st.sym(*v)).collect(), fs.clone())) + }).collect(), + type_con_arities: self.type_con_arities.iter().map(|(k, v)| (rest_qi(k, st), *v)).collect(), + type_roles: self.type_roles.iter().map(|(k, v)| (st.sym(*k), v.iter().map(rest_role).collect())).collect(), + newtype_names: self.newtype_names.iter().map(|s| st.sym(*s)).collect(), + signature_constraints: self.signature_constraints.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|(c, ts)| { + (rest_qi(c, st), ts.iter().map(|t| rest_type(t, st)).collect()) + }).collect()) + }).collect(), + type_kinds: self.type_kinds.iter().map(|(k, v)| (st.sym(*k), rest_type(v, st))).collect(), + class_type_kinds: self.class_type_kinds.iter().map(|(k, v)| (st.sym(*k), rest_type(v, st))).collect(), + partial_dischargers: self.partial_dischargers.iter().map(|s| st.sym(*s)).collect(), + self_referential_aliases: self.self_referential_aliases.iter().map(|s| st.sym(*s)).collect(), + class_superclasses: self.class_superclasses.iter().map(|(k, (vs, cs))| { + (rest_qi(k, st), (vs.iter().map(|v| st.sym(*v)).collect(), cs.iter().map(|(c, ts)| { + (rest_qi(c, st), ts.iter().map(|t| rest_type(t, st)).collect()) + }).collect())) + }).collect(), + method_own_constraints: self.method_own_constraints.iter().map(|(k, v)| { + (rest_qi(k, st), v.iter().map(|s| st.sym(*s)).collect()) + }).collect(), + module_doc: Vec::new(), // not persisted in portable format + } + } +} + diff --git a/src/cst.rs b/src/cst.rs index 2e23704e..0214c83c 100644 --- a/src/cst.rs +++ b/src/cst.rs @@ -52,6 +52,8 @@ pub struct Module { pub decls: Vec, /// All comments in the module source, in order of appearance (comment, span) pub comments: Vec<(Comment, Span)>, + /// Doc-comments that appear before the `module` keyword + pub doc_comments: Vec, } /// Module name (potentially qualified: Data.Array) @@ -102,6 +104,8 @@ pub enum DataMembers { pub struct ImportDecl { pub span: Span, pub module: ModuleName, + /// Span of the module name in the import (for hover support) + pub module_span: Span, pub imports: Option, pub qualified: Option, } diff --git a/src/interner.rs b/src/interner.rs index 68188ac7..e932c92d 100644 --- a/src/interner.rs +++ b/src/interner.rs @@ -80,6 +80,13 @@ pub fn symbol_eq(sym: Symbol, s: &str) -> bool { with_interner(|interner| interner.resolve(sym).map_or(false, |r| r == s)) } +/// Intern a batch of strings in a single lock acquisition. +pub fn intern_batch(strings: &[String]) -> Vec { + with_interner(|interner| { + strings.iter().map(|s| interner.get_or_intern(s.as_str())).collect() + }) +} + /// Clear the interner (useful for testing) #[cfg(test)] pub fn clear() { diff --git a/src/js_ffi.rs b/src/js_ffi.rs index 011fe097..bd7cc526 100644 --- a/src/js_ffi.rs +++ b/src/js_ffi.rs @@ -31,8 +31,6 @@ pub enum FfiError { DeprecatedFFICommonJSModule, /// Declared `foreign import` but not exported in FFI MissingFFIImplementations { missing: Vec }, - /// Exported in FFI but no corresponding `foreign import` - UnusedFFIImplementations { unused: Vec }, /// CommonJS exports mixed with ES module syntax UnsupportedFFICommonJSExports { exports: Vec }, /// CommonJS imports (require) mixed with ES module syntax @@ -328,7 +326,7 @@ pub fn validate_foreign_module( .cloned() .collect(); - let unused: Vec = info + let _unused: Vec = info .es_exports .iter() .filter(|name| !import_set.contains(name.as_str())) @@ -339,9 +337,5 @@ pub fn validate_foreign_module( errors.push(FfiError::MissingFFIImplementations { missing }); } - if !unused.is_empty() { - errors.push(FfiError::UnusedFFIImplementations { unused }); - } - errors } diff --git a/src/lsp/handlers/completion.rs b/src/lsp/handlers/completion.rs index b1f5b3ea..e575a6ab 100644 --- a/src/lsp/handlers/completion.rs +++ b/src/lsp/handlers/completion.rs @@ -14,7 +14,7 @@ impl Backend { &self, params: CompletionParams, ) -> Result> { - if !self.ready.load(std::sync::atomic::Ordering::SeqCst) { + if !self.is_ready() { return Ok(None); } @@ -53,7 +53,7 @@ impl Backend { // Find insert position for new imports (after last import, or after module header) let import_insert_line = find_import_insert_line(&source, &module); - let registry = self.registry.read().await; + let comp_index = self.completion_index.read().await; let mut items = Vec::new(); let mut seen = HashSet::new(); @@ -85,43 +85,44 @@ impl Backend { } // 2. Already-imported names (higher priority than unimported) - // 3. All exported values from all modules in the registry - for (mod_path, mod_exports) in registry.iter_all() { - let mod_name = interner::resolve_module_name(mod_path); - if mod_name == current_module_name { + // 3. All exported names from all modules via lightweight completion index + for (mod_name, mod_entries) in &comp_index.entries { + if mod_name == ¤t_module_name { continue; } - for (qi, scheme) in &mod_exports.values { - let name = match interner::resolve(qi.name) { - Some(n) => n.to_string(), - None => continue, - }; - if !name.starts_with(&prefix) { + for entry in mod_entries { + if !entry.name.starts_with(&prefix) { continue; } - if seen.contains(&name) { + if seen.contains(&entry.name) { continue; } - seen.insert(name.clone()); + seen.insert(entry.name.clone()); - let type_str = format!("{}", scheme.ty); - let is_imported = already_imported.contains(&name); - let is_constructor = name.starts_with(|c: char| c.is_uppercase()); + let is_imported = already_imported.contains(&entry.name); + let is_constructor = matches!(entry.kind, crate::lsp::CompletionEntryKind::Constructor); + + let kind = match entry.kind { + crate::lsp::CompletionEntryKind::Value => CompletionItemKind::FUNCTION, + crate::lsp::CompletionEntryKind::Constructor => CompletionItemKind::CONSTRUCTOR, + crate::lsp::CompletionEntryKind::Type => CompletionItemKind::CLASS, + crate::lsp::CompletionEntryKind::Class => CompletionItemKind::INTERFACE, + }; - let kind = if is_constructor { - CompletionItemKind::CONSTRUCTOR + let detail = if entry.type_string.is_empty() { + Some(mod_name.clone()) } else { - CompletionItemKind::FUNCTION + Some(format!("{mod_name} :: {}", entry.type_string)) }; // Imported items sort before unimported let sort_prefix = if is_imported { "1" } else { "2" }; let mut item = CompletionItem { - label: name.clone(), + label: entry.name.clone(), kind: Some(kind), - detail: Some(format!("{mod_name} :: {type_str}")), + detail, sort_text: Some(format!("{sort_prefix}{}", items.len())), ..Default::default() }; @@ -129,8 +130,8 @@ impl Backend { // Auto-import: add additional_text_edits if not already imported if !is_imported { if let Some(edit) = build_import_edit( - &mod_name, - &name, + mod_name, + &entry.name, is_constructor, &module, &source, @@ -142,29 +143,8 @@ impl Backend { items.push(item); } - - // Also add type constructors - for (type_qi, ctor_names) in &mod_exports.data_constructors { - for ctor_qi in ctor_names { - let ctor_name = match interner::resolve(ctor_qi.name) { - Some(n) => n.to_string(), - None => continue, - }; - if !ctor_name.starts_with(&prefix) { - continue; - } - if seen.contains(&ctor_name) { - continue; - } - // Only add if the constructor has a value entry (it's exported) - if !mod_exports.values.contains_key(ctor_qi) { - continue; - } - // Already handled in the values loop above - } - let _ = type_qi; - } } + drop(comp_index); Ok(Some(CompletionResponse::List(CompletionList { is_incomplete: items.len() > 100, diff --git a/src/lsp/handlers/definition.rs b/src/lsp/handlers/definition.rs index b969c868..c6af3a93 100644 --- a/src/lsp/handlers/definition.rs +++ b/src/lsp/handlers/definition.rs @@ -1,5 +1,3 @@ -use std::sync::atomic::Ordering; - use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; @@ -13,7 +11,7 @@ impl Backend { &self, params: GotoDefinitionParams, ) -> Result> { - if !self.ready.load(Ordering::SeqCst) { + if !self.is_ready() { return Ok(None); } @@ -100,10 +98,7 @@ impl Backend { mf.get(reexport_module).cloned() }; if let Some(reexport_uri) = reexport_uri { - let target_source = { - let sm = self.source_map.read().await; - sm.get(&reexport_uri).cloned() - }; + let target_source = self.get_source_for_uri(&reexport_uri).await; if let Some(target_source) = target_source { if let Ok(parsed_uri) = Url::parse(&reexport_uri) { if let Some(loc) = span_to_location(&parsed_uri, &target_source, loc.span) { @@ -120,10 +115,7 @@ impl Backend { }; if let Some(def_loc) = def_loc { - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }; + let target_source = self.get_source_for_uri(&target_uri).await; if let Some(target_source) = target_source { if let Ok(parsed_uri) = Url::parse(&target_uri) { @@ -226,10 +218,7 @@ impl Backend { mf.get(&target_module).cloned() }?; - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }?; + let target_source = self.get_source_for_uri(&target_uri).await?; let parsed_uri = Url::parse(&target_uri).ok()?; let loc = span_to_location(&parsed_uri, &target_source, def_loc.span)?; diff --git a/src/lsp/handlers/diagnostics.rs b/src/lsp/handlers/diagnostics.rs index cf397610..e0aa93ed 100644 --- a/src/lsp/handlers/diagnostics.rs +++ b/src/lsp/handlers/diagnostics.rs @@ -1,8 +1,12 @@ use std::fmt::Display; -use std::sync::atomic::Ordering; use tower_lsp::lsp_types::*; +use crate::cst::Module; +use crate::interner; +use crate::build::cache::ModuleCache; +use crate::typechecker::registry::ModuleRegistry; + use super::super::{Backend, FileState}; impl Backend { @@ -12,7 +16,34 @@ impl Backend { .await; } + /// Ensure all modules imported by `module` have their exports loaded into the registry. + /// Loads missing exports lazily from the ModuleCache (which reads from disk on demand). + async fn ensure_imports_loaded(&self, module: &Module, registry: &mut ModuleRegistry) { + for import_decl in &module.imports { + let import_parts = &import_decl.module.parts; + + // Skip if already in registry + if registry.lookup(import_parts).is_some() { + continue; + } + + let import_name = interner::resolve_module_name(import_parts); + + // Try to load from module cache (lazy disk load) + let exports = { + let mut cache = self.module_cache.write().await; + cache.get_exports(&import_name).cloned() + }; + + if let Some(exports) = exports { + registry.register(import_parts, exports); + log::debug!("Lazy-loaded exports for {import_name}"); + } + } + } + pub(crate) async fn on_change(&self, uri: Url, source: String) { + let on_change_start = std::time::Instant::now(); { let mut files = self.files.write().await; files.insert( @@ -25,10 +56,11 @@ impl Backend { } // Don't publish diagnostics until sources are loaded - if !self.ready.load(Ordering::SeqCst) { + if !self.is_ready() { return; } + let t = std::time::Instant::now(); let module = match crate::parser::parse(&source) { Ok(module) => { let module_name = format!("{}", module.name.value); @@ -56,46 +88,74 @@ impl Backend { return; } }; + self.info(format!("[on_change] parse: {:.2?}", t.elapsed())).await; + + let module_name = interner::resolve_module_name(&module.name.value.parts); + let module_parts: Vec = module.name.value.parts.clone(); + + // Ensure imported modules' exports are in the registry (lazy load from cache) + let t = std::time::Instant::now(); + let mut registry = self.registry.write().await; + self.ensure_imports_loaded(&module, &mut registry).await; + self.info(format!("[on_change] ensure_imports_loaded: {:.2?}", t.elapsed())).await; // Type-check against the registry - let registry = self.registry.read().await; + let t = std::time::Instant::now(); let check_result = crate::typechecker::check_module_with_registry(&module, ®istry); + self.info(format!("[on_change] typecheck {module_name}: {:.2?}", t.elapsed())).await; - let diagnostics: Vec = check_result - .errors - .iter() - .map(|err| { - let span = err.span(); - let range = match span.to_pos(&source) { - Some((start, end)) => Range { - start: Position { - line: start.line.saturating_sub(1) as u32, - character: start.column.saturating_sub(1) as u32, - }, - end: Position { - line: end.line.saturating_sub(1) as u32, - character: end.column.saturating_sub(1) as u32, - }, - }, - None => Range::default(), - }; - Diagnostic { - range, - severity: Some(DiagnosticSeverity::ERROR), - code: Some(NumberOrString::String(format!("TypeError.{}", err.code()))), - source: Some("pfc".to_string()), - message: format!("{err}"), - ..Default::default() - } - }) + // Update registry with new exports + registry.register(&module_parts, check_result.exports.clone()); + + // Update cache + let source_hash = ModuleCache::content_hash(&source); + let import_names: Vec = module.imports.iter() + .map(|imp| interner::resolve_module_name(&imp.module.parts)) .collect(); + let mut cache = self.module_cache.write().await; + cache.update(module_name.clone(), source_hash, check_result.exports, import_names); + drop(cache); + // Publish diagnostics for the changed module + let diagnostics = type_errors_to_diagnostics(&check_result.errors, &source); self.client .publish_diagnostics(uri, diagnostics, None) .await; + + self.info(format!("[on_change] total: {:.2?}", on_change_start.elapsed())).await; } } +fn type_errors_to_diagnostics(errors: &[crate::typechecker::error::TypeError], source: &str) -> Vec { + errors + .iter() + .map(|err| { + let span = err.span(); + let range = match span.to_pos(source) { + Some((start, end)) => Range { + start: Position { + line: start.line.saturating_sub(1) as u32, + character: start.column.saturating_sub(1) as u32, + }, + end: Position { + line: end.line.saturating_sub(1) as u32, + character: end.column.saturating_sub(1) as u32, + }, + }, + None => Range::default(), + }; + Diagnostic { + range, + severity: Some(DiagnosticSeverity::ERROR), + code: Some(NumberOrString::String(format!("TypeError.{}", err.code()))), + source: Some("pfc".to_string()), + message: format!("{err}"), + ..Default::default() + } + }) + .collect() +} + fn error_to_range(err: &crate::diagnostics::CompilerError, source: &str) -> Range { match err.get_span() { Some(span) => match span.to_pos(source) { diff --git a/src/lsp/handlers/hover.rs b/src/lsp/handlers/hover.rs index c4a67cf9..1033b1d9 100644 --- a/src/lsp/handlers/hover.rs +++ b/src/lsp/handlers/hover.rs @@ -1,5 +1,3 @@ -use std::sync::atomic::Ordering; - use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; @@ -22,7 +20,7 @@ enum HoverTarget { impl Backend { pub(crate) async fn handle_hover(&self, params: HoverParams) -> Result> { - if !self.ready.load(Ordering::SeqCst) { + if !self.is_ready() { return Ok(None); } @@ -214,6 +212,28 @@ impl Backend { if offset < import_decl.span.start || offset >= import_decl.span.end { continue; } + + // Check if cursor is on the module name + if offset >= import_decl.module_span.start && offset < import_decl.module_span.end { + let module_name = interner::resolve_module_name(&import_decl.module.parts); + let docs = self.get_imported_module_doc(&module_name).await; + let mut markdown = format!("```purescript\nmodule {module_name}\n```"); + if !docs.is_empty() { + markdown.push_str("\n\n---\n\n"); + for doc in &docs { + markdown.push_str(doc.trim()); + markdown.push('\n'); + } + } + return Some(Hover { + contents: HoverContents::Markup(MarkupContent { + kind: MarkupKind::Markdown, + value: markdown, + }), + range: None, + }); + } + let items = match &import_decl.imports { Some(ImportList::Explicit(items)) | Some(ImportList::Hiding(items)) => items, None => continue, @@ -282,11 +302,7 @@ impl Backend { Some(u) => u, None => return Vec::new(), }; - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }; - let target_source = match target_source { + let target_source = match self.get_source_for_uri(&target_uri).await { Some(s) => s, None => return Vec::new(), }; @@ -306,6 +322,43 @@ impl Backend { .collect() } + async fn get_imported_module_doc(&self, module_name: &str) -> Vec { + // Try registry first (has module_doc from typechecking) + { + let module_parts: Vec = module_name + .split('.') + .map(|s| interner::intern(s)) + .collect(); + let registry = self.registry.read().await; + if let Some(mod_exports) = registry.lookup(&module_parts) { + if !mod_exports.module_doc.is_empty() { + return mod_exports.module_doc.clone(); + } + } + } + + // Fall back to parsing the source file + let target_uri = { + let mf = self.module_file_map.read().await; + mf.get(module_name).cloned() + }; + let target_uri = match target_uri { + Some(u) => u, + None => return Vec::new(), + }; + let target_source = match self.get_source_for_uri(&target_uri).await { + Some(s) => s, + None => return Vec::new(), + }; + let target_module = match crate::parser::parse(&target_source) { + Ok(m) => m, + Err(_) => return Vec::new(), + }; + target_module.doc_comments.iter().filter_map(|c| { + if let cst::Comment::Doc(text) = c { Some(text.clone()) } else { None } + }).collect() + } + async fn get_local_kind(&self, module: &cst::Module, symbol: interner::Symbol) -> Option { let registry = self.registry.read().await; let check_result = crate::typechecker::check_module_with_registry(module, ®istry); @@ -364,10 +417,7 @@ impl Backend { let mf = self.module_file_map.read().await; mf.get(module_name).cloned() }?; - let target_source = { - let sm = self.source_map.read().await; - sm.get(&target_uri).cloned() - }?; + let target_source = self.get_source_for_uri(&target_uri).await?; let target_module = crate::parser::parse(&target_source).ok()?; find_cst_kind(&target_module.decls, name_str, &target_source) } diff --git a/src/lsp/handlers/load_sources.rs b/src/lsp/handlers/load_sources.rs index 5803c1fc..337d6ba9 100644 --- a/src/lsp/handlers/load_sources.rs +++ b/src/lsp/handlers/load_sources.rs @@ -1,24 +1,366 @@ use std::collections::HashMap; use std::sync::atomic::Ordering; +use rayon::prelude::*; use tower_lsp::lsp_types::*; +use crate::build::cache; use crate::build::BuildOptions; +use crate::cst::{self, Decl}; +use crate::interner; use crate::lsp::utils::find_definition::DefinitionIndex; +use crate::lsp::{CompletionEntry, CompletionEntryKind, CompletionIndex}; -use super::super::Backend; +use super::super::{Backend, LOAD_STATE_CACHE_LOADED, LOAD_STATE_READY}; impl Backend { pub(crate) async fn load_sources(&self) { + let total_start = std::time::Instant::now(); + let cmd = match &self.sources_cmd { Some(cmd) => cmd.clone(), None => { - self.ready.store(true, Ordering::SeqCst); + self.load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } }; - // Create a progress token for the loading spinner + // Phase A: Try to restore from disk cache (fast path) + let mut all_snapshots_loaded = false; + if let Some(ref cache_dir) = self.cache_dir { + let lsp_dir = cache_dir.join("lsp"); + let phase_a_start = std::time::Instant::now(); + + let t = std::time::Instant::now(); + let idx_result = DefinitionIndex::load_from_disk(&lsp_dir.join("def_index.bin")); + self.info(format!("[timing] load def_index: {:.2?} ({})", t.elapsed(), if idx_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let re_result = crate::lsp::utils::resolve::ResolutionExports::load_from_disk( + &lsp_dir.join("resolution_exports.bin"), + ); + self.info(format!("[timing] load resolution_exports: {:.2?} ({})", t.elapsed(), if re_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let mfmap_result = cache::load_module_file_map(&lsp_dir.join("module_file_map.bin")); + self.info(format!("[timing] load module_file_map: {:.2?} ({})", t.elapsed(), if mfmap_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let comp_result = CompletionIndex::load_from_disk(&lsp_dir.join("completion_index.bin")); + self.info(format!("[timing] load completion_index: {:.2?} ({})", t.elapsed(), if comp_result.is_ok() { "ok" } else { "miss" })).await; + + let t = std::time::Instant::now(); + let cache_result = cache::ModuleCache::load_from_disk(cache_dir); + self.info(format!("[timing] load module_cache: {:.2?} ({})", t.elapsed(), if cache_result.is_ok() { "ok" } else { "miss" })).await; + + // Always load the module cache if available (shared with CLI builds) + if let Ok(c) = cache_result { + let mut mc = self.module_cache.write().await; + *mc = c; + } + + if let (Ok(idx), Ok(re), Ok(mfmap), Ok(comp)) = (idx_result, re_result, mfmap_result, comp_result) { + { + let mut i = self.def_index.write().await; + *i = idx; + } + { + let mut e = self.resolution_exports.write().await; + *e = re; + } + { + let mut m = self.module_file_map.write().await; + *m = mfmap; + } + { + let mut ci = self.completion_index.write().await; + *ci = comp; + } + + self.load_state + .store(LOAD_STATE_CACHE_LOADED, Ordering::SeqCst); + all_snapshots_loaded = true; + self.info(format!("[timing] Phase A complete (all snapshots loaded): {:.2?}", phase_a_start.elapsed())).await; + self.info(format!( + "Cache loaded in {:.2?}", + phase_a_start.elapsed() + )) + .await; + } else { + self.info(format!("[timing] Phase A incomplete (missing snapshots): {:.2?}", phase_a_start.elapsed())).await; + } + } + + // If all snapshots loaded from disk, we're done — no need for Phase B + if all_snapshots_loaded { + self.load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + self.info(format!("[timing] Ready from cache in {:.2?} total", total_start.elapsed())).await; + return; + } + + // Phase B: Need to build indexes from source files + let has_cache = { + let mc = self.module_cache.read().await; + mc.has_entries() + }; + + if has_cache { + self.info("Module cache found but LSP snapshots missing — rebuilding indexes").await; + self.spawn_index_build(cmd).await; + } else { + self.info("No module cache found — doing full build (cold start)").await; + self.spawn_full_build(cmd).await; + } + } + + async fn spawn_index_build(&self, cmd: String) { + let token = NumberOrString::String("pfc-loading".to_string()); + let _ = self + .client + .send_request::(WorkDoneProgressCreateParams { + token: token.clone(), + }) + .await; + + self.client + .send_notification::(ProgressParams { + token: token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin( + WorkDoneProgressBegin { + title: "Indexing PureScript sources".to_string(), + message: Some(format!("Running: {cmd}")), + cancellable: Some(false), + percentage: None, + }, + )), + }) + .await; + + let client = self.client.clone(); + let def_index = self.def_index.clone(); + let resolution_exports = self.resolution_exports.clone(); + let module_file_map = self.module_file_map.clone(); + let module_cache = self.module_cache.clone(); + let completion_index = self.completion_index.clone(); + let load_state = self.load_state.clone(); + let cache_dir = self.cache_dir.clone(); + let progress_token = token.clone(); + + let rt_handle = tokio::runtime::Handle::current(); + std::thread::Builder::new() + .name("pfc-load-sources".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(move || { + let _guard = rt_handle.enter(); + let build_start = std::time::Instant::now(); + + let log_client = client.clone(); + let info = move |msg: String| { + let rt = tokio::runtime::Handle::current(); + rt.block_on(log_client.log_message(MessageType::INFO, msg)); + }; + + // Run the shell command to get source globs + let t = std::time::Instant::now(); + let output = match std::process::Command::new("sh") + .arg("-c") + .arg(&cmd) + .output() + { + Ok(output) => output, + Err(e) => { + info(format!("Failed to run sources command: {e}")); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + return; + } + }; + info(format!("[timing] sources command: {:.2?}", t.elapsed())); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + info(format!("Sources command failed: {stderr}")); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + return; + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let globs: Vec = stdout + .lines() + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect(); + + let rt = tokio::runtime::Handle::current(); + + // Resolve globs to file paths + let t = std::time::Instant::now(); + let mut file_paths: Vec = Vec::new(); + for pattern in &globs { + match glob::glob(pattern) { + Ok(entries) => { + for entry in entries.flatten() { + if entry.extension().map_or(false, |ext| ext == "purs") { + file_paths.push(entry); + } + } + } + Err(e) => info(format!("Invalid glob pattern {pattern}: {e}")), + } + } + info(format!("[timing] glob resolution: {:.2?} ({} files)", t.elapsed(), file_paths.len())); + + // Read all files in parallel + let t = std::time::Instant::now(); + let sources: Vec<(String, String)> = file_paths + .par_iter() + .filter_map(|entry| match std::fs::read_to_string(entry) { + Ok(source) => { + let abs_path = entry.canonicalize().unwrap_or_else(|_| entry.clone()); + Some((abs_path.to_string_lossy().into_owned(), source)) + } + Err(_) => None, + }) + .collect(); + info(format!("[timing] read files: {:.2?} ({} files)", t.elapsed(), sources.len())); + + // Report progress + rt.block_on(async { + client + .send_notification::(ProgressParams { + token: progress_token.clone(), + value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( + WorkDoneProgressReport { + message: Some(format!( + "Parsing {} source files...", + sources.len() + )), + cancellable: Some(false), + percentage: None, + }, + )), + }) + .await; + }); + + // Parse all files in parallel + let t = std::time::Instant::now(); + let parse_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-lsp-parse-{i}")) + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build parse thread pool"); + + let parsed: Vec<_> = parse_pool.install(|| { + sources + .par_iter() + .filter_map(|(path, source)| { + crate::parser::parse(source) + .ok() + .map(|module| (path.clone(), source.clone(), module)) + }) + .collect() + }); + info(format!("[timing] parse files: {:.2?} ({} modules)", t.elapsed(), parsed.len())); + + let module_count = parsed.len(); + + // Build definition index, module_file_map, and completion index + let t = std::time::Instant::now(); + let mut index = DefinitionIndex::new(); + let mut mfmap = HashMap::new(); + let mut comp_index = CompletionIndex::default(); + + for (path, source, module) in &parsed { + let file_uri = Url::from_file_path(path) + .map(|u| u.to_string()) + .unwrap_or_default(); + let mod_name = format!("{}", module.name.value); + index.add_module(module, path); + mfmap.insert(mod_name.clone(), file_uri); + + let entries = extract_completion_entries(module, source); + if !entries.is_empty() { + comp_index.entries.insert(mod_name, entries); + } + } + info(format!("[timing] build indexes: {:.2?}", t.elapsed())); + + // Build resolution exports + let t = std::time::Instant::now(); + let just_modules: Vec = + parsed.iter().map(|(_, _, m)| m.clone()).collect(); + let exports = crate::lsp::utils::resolve::ResolutionExports::new(&just_modules); + info(format!("[timing] build resolution_exports: {:.2?}", t.elapsed())); + + // Register paths in module cache + let t = std::time::Instant::now(); + { + let mut mcache = rt.block_on(async { module_cache.write().await }); + for (path, _source, module) in &parsed { + let mod_name = format!("{}", module.name.value); + mcache.register_path(path.clone(), mod_name); + } + mcache.build_reverse_deps(); + + if let Some(ref dir) = cache_dir { + if let Err(e) = mcache.save_to_disk(dir) { + info(format!("Failed to save module cache: {e}")); + } + } + } + info(format!("[timing] update module cache: {:.2?}", t.elapsed())); + + // Save LSP snapshots to disk for next startup + let t = std::time::Instant::now(); + if let Some(ref dir) = cache_dir { + let lsp_dir = dir.join("lsp"); + if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { + info(format!("Failed to save def_index snapshot: {e}")); + } + if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { + info(format!("Failed to save resolution_exports snapshot: {e}")); + } + if let Err(e) = + cache::save_module_file_map(&mfmap, &lsp_dir.join("module_file_map.bin")) + { + info(format!("Failed to save module_file_map snapshot: {e}")); + } + if let Err(e) = comp_index.save_to_disk(&lsp_dir.join("completion_index.bin")) { + info(format!("Failed to save completion_index snapshot: {e}")); + } + } + info(format!("[timing] save snapshots: {:.2?}", t.elapsed())); + + // Store indexes and mark as ready + rt.block_on(async { + let mut idx = def_index.write().await; + *idx = index; + let mut re = resolution_exports.write().await; + *re = exports; + let mut mf = module_file_map.write().await; + *mf = mfmap; + let mut ci = completion_index.write().await; + *ci = comp_index; + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); + + client + .send_notification::(ProgressParams { + token: progress_token, + value: ProgressParamsValue::WorkDone(WorkDoneProgress::End( + WorkDoneProgressEnd { + message: Some(format!("Indexed {module_count} modules")), + }, + )), + }) + .await; + }); + info(format!("[timing] Phase B (index build) total: {:.2?}", build_start.elapsed())); + }) + .expect("failed to spawn load-sources thread"); + } + + /// Cold-start path: full build with typechecking when no prior cache exists. + async fn spawn_full_build(&self, cmd: String) { let token = NumberOrString::String("pfc-loading".to_string()); let _ = self .client @@ -46,12 +388,27 @@ impl Backend { let def_index = self.def_index.clone(); let resolution_exports = self.resolution_exports.clone(); let module_file_map = self.module_file_map.clone(); - let source_map = self.source_map.clone(); - let ready = self.ready.clone(); + let module_cache = self.module_cache.clone(); + let completion_index = self.completion_index.clone(); + let load_state = self.load_state.clone(); + let cache_dir = self.cache_dir.clone(); let progress_token = token.clone(); - tokio::task::spawn_blocking(move || { - // Run the shell command to get source globs + let rt_handle = tokio::runtime::Handle::current(); + std::thread::Builder::new() + .name("pfc-load-sources".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(move || { + let _guard = rt_handle.enter(); + let build_start = std::time::Instant::now(); + + let log_client = client.clone(); + let info = move |msg: String| { + let rt = tokio::runtime::Handle::current(); + rt.block_on(log_client.log_message(MessageType::INFO, msg)); + }; + + let t = std::time::Instant::now(); let output = match std::process::Command::new("sh") .arg("-c") .arg(&cmd) @@ -59,16 +416,17 @@ impl Backend { { Ok(output) => output, Err(e) => { - log::error!("Failed to run sources command: {e}"); - ready.store(true, Ordering::SeqCst); + info(format!("Failed to run sources command: {e}")); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } }; + info(format!("[timing] sources command: {:.2?}", t.elapsed())); if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - log::error!("Sources command failed: {stderr}"); - ready.store(true, Ordering::SeqCst); + info(format!("Sources command failed: {stderr}")); + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); return; } @@ -81,54 +439,42 @@ impl Backend { let rt = tokio::runtime::Handle::current(); - // Report progress: resolving globs - rt.block_on(async { - client - .send_notification::(ProgressParams { - token: progress_token.clone(), - value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report( - WorkDoneProgressReport { - message: Some(format!( - "Resolving {} glob patterns...", - globs.len() - )), - cancellable: Some(false), - percentage: None, - }, - )), - }) - .await; - }); - // Resolve globs to file paths - let mut sources: Vec<(String, String)> = Vec::new(); + let t = std::time::Instant::now(); + let mut file_paths: Vec = Vec::new(); for pattern in &globs { match glob::glob(pattern) { Ok(entries) => { for entry in entries.flatten() { if entry.extension().map_or(false, |ext| ext == "purs") { - match std::fs::read_to_string(&entry) { - Ok(source) => { - let abs_path = entry - .canonicalize() - .unwrap_or_else(|_| entry.clone()); - sources.push(( - abs_path.to_string_lossy().into_owned(), - source, - )); - } - Err(e) => { - log::warn!("Failed to read {}: {e}", entry.display()) - } - } + file_paths.push(entry); } } } - Err(e) => log::warn!("Invalid glob pattern {pattern}: {e}"), + Err(e) => info(format!("Invalid glob pattern {pattern}: {e}")), } } + info(format!("[timing] glob resolution: {:.2?} ({} files)", t.elapsed(), file_paths.len())); + + // Read all files in parallel + let t = std::time::Instant::now(); + let sources: Vec<(String, String)> = file_paths + .par_iter() + .filter_map(|entry| { + match std::fs::read_to_string(entry) { + Ok(source) => { + let abs_path = entry + .canonicalize() + .unwrap_or_else(|_| entry.clone()); + Some((abs_path.to_string_lossy().into_owned(), source)) + } + Err(_) => None, + } + }) + .collect(); + info(format!("[timing] read files: {:.2?} ({} files)", t.elapsed(), sources.len())); - // Report progress: building + // Report progress rt.block_on(async { client .send_notification::(ProgressParams { @@ -147,7 +493,8 @@ impl Backend { .await; }); - // Build with no codegen to populate the registry + // Full build with typechecking + let t = std::time::Instant::now(); let source_refs: Vec<(&str, &str)> = sources .iter() .map(|(p, s)| (p.as_str(), s.as_str())) @@ -158,12 +505,25 @@ impl Backend { ..Default::default() }; - let (result, new_registry) = crate::build::build_from_sources_with_options( + let mut mcache = rt.block_on(async { module_cache.write().await }); + let (result, new_registry, mut build_parsed_modules) = crate::build::build_from_sources_incremental( &source_refs, &None, None, &options, + &mut mcache, ); + mcache.build_reverse_deps(); + info(format!("[timing] full build: {:.2?}", t.elapsed())); + + let t = std::time::Instant::now(); + if let Some(ref dir) = cache_dir { + if let Err(e) = mcache.save_to_disk(dir) { + info(format!("Failed to save module cache: {e}")); + } + } + drop(mcache); + info(format!("[timing] save module cache: {:.2?}", t.elapsed())); let error_count: usize = result.modules.iter().map(|m| m.type_errors.len()).sum(); let module_count = result.modules.len(); @@ -173,34 +533,86 @@ impl Backend { .filter(|m| !m.type_errors.is_empty()) .count(); - // Build definition index and resolution exports from parsed sources + // Parse only cache-hit sources that weren't parsed by the build + let t = std::time::Instant::now(); + let already_parsed: std::collections::HashSet = build_parsed_modules + .iter() + .map(|(p, _)| p.to_string_lossy().into_owned()) + .collect(); + + let cache_hit_sources: Vec<_> = sources + .iter() + .filter(|(path, _)| !already_parsed.contains(path.as_str())) + .collect(); + + let parse_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|i| format!("pfc-lsp-parse-{i}")) + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build parse thread pool"); + let extra_parsed: Vec<_> = parse_pool.install(|| { + cache_hit_sources + .par_iter() + .filter_map(|(path, source)| { + crate::parser::parse(source) + .ok() + .map(|m| (std::path::PathBuf::from(path.as_str()), m)) + }) + .collect() + }); + info(format!("[timing] extra parse (cache hits): {:.2?} ({} modules)", t.elapsed(), extra_parsed.len())); + + build_parsed_modules.extend(extra_parsed); + + // Build indexes + let t = std::time::Instant::now(); let mut index = DefinitionIndex::new(); - let mut smap = HashMap::new(); let mut mfmap = HashMap::new(); - let mut parsed_modules = Vec::new(); - for (path, source) in &sources { - if let Ok(module) = crate::parser::parse(source) { - index.add_module(&module, path); - let mod_name = format!("{}", module.name.value); - let file_uri = Url::from_file_path(path) - .map(|u| u.to_string()) - .unwrap_or_default(); - mfmap.insert(mod_name, file_uri.clone()); - parsed_modules.push(module); - smap.insert(file_uri, source.clone()); - } else { - smap.insert( - Url::from_file_path(path) - .map(|u| u.to_string()) - .unwrap_or_default(), - source.clone(), - ); + let mut comp_index = CompletionIndex::default(); + let sources_map: HashMap<&str, &str> = sources.iter().map(|(p, s)| (p.as_str(), s.as_str())).collect(); + + for (path, module) in &build_parsed_modules { + let path_str = path.to_string_lossy(); + let file_uri = Url::from_file_path(path_str.as_ref()) + .map(|u| u.to_string()) + .unwrap_or_default(); + let mod_name = format!("{}", module.name.value); + index.add_module(module, &path_str); + mfmap.insert(mod_name.clone(), file_uri); + + if let Some(source) = sources_map.get(path_str.as_ref()) { + let entries = extract_completion_entries(module, source); + if !entries.is_empty() { + comp_index.entries.insert(mod_name, entries); + } } } + info(format!("[timing] build indexes: {:.2?}", t.elapsed())); - let exports = crate::lsp::utils::resolve::ResolutionExports::new(&parsed_modules); + let t = std::time::Instant::now(); + let just_modules: Vec = build_parsed_modules.into_iter().map(|(_, m)| m).collect(); + let exports = crate::lsp::utils::resolve::ResolutionExports::new(&just_modules); + info(format!("[timing] build resolution_exports: {:.2?}", t.elapsed())); + + // Save LSP snapshots + let t = std::time::Instant::now(); + if let Some(ref dir) = cache_dir { + let lsp_dir = dir.join("lsp"); + if let Err(e) = index.save_to_disk(&lsp_dir.join("def_index.bin")) { + info(format!("Failed to save def_index snapshot: {e}")); + } + if let Err(e) = exports.save_to_disk(&lsp_dir.join("resolution_exports.bin")) { + info(format!("Failed to save resolution_exports snapshot: {e}")); + } + if let Err(e) = cache::save_module_file_map(&mfmap, &lsp_dir.join("module_file_map.bin")) { + info(format!("Failed to save module_file_map snapshot: {e}")); + } + if let Err(e) = comp_index.save_to_disk(&lsp_dir.join("completion_index.bin")) { + info(format!("Failed to save completion_index snapshot: {e}")); + } + } + info(format!("[timing] save snapshots: {:.2?}", t.elapsed())); - // Store the registry, index, source map and mark as ready rt.block_on(async { let mut reg = registry.write().await; *reg = new_registry; @@ -210,11 +622,10 @@ impl Backend { *re = exports; let mut mf = module_file_map.write().await; *mf = mfmap; - let mut sm = source_map.write().await; - *sm = smap; - ready.store(true, Ordering::SeqCst); + let mut ci = completion_index.write().await; + *ci = comp_index; + load_state.store(LOAD_STATE_READY, Ordering::SeqCst); - // End progress client .send_notification::(ProgressParams { token: progress_token, @@ -228,6 +639,168 @@ impl Backend { }) .await; }); + info(format!("[timing] Phase B (full build) total: {:.2?}", build_start.elapsed())); + }) + .expect("failed to spawn load-sources thread"); + } +} + +/// Extract completion entries from a module's CST declarations and source text. +fn extract_completion_entries(module: &cst::Module, source: &str) -> Vec { + let mut entries = Vec::new(); + let mut type_sigs: HashMap = HashMap::new(); + + // First pass: collect type signatures + for decl in &module.decls { + if let Decl::TypeSignature { name, ty, .. } = decl { + let name_str = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + let span = ty.span(); + if span.start < source.len() && span.end <= source.len() { + type_sigs.insert(name_str, source[span.start..span.end].to_string()); + } + } + } + + // Check export list to filter what's actually exported + let export_filter: Option> = + module.exports.as_ref().map(|spanned_list| { + spanned_list + .value + .exports + .iter() + .filter_map(|exp| match exp { + cst::Export::Value(name) + | cst::Export::Type(name, _) + | cst::Export::Class(name) => interner::resolve(*name).map(|s| s.to_string()), + _ => None, + }) + .collect() }); + + // Second pass: build entries + for decl in &module.decls { + match decl { + Decl::Value { name, .. } | Decl::Foreign { name, .. } => { + let name_str = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&name_str) { + continue; + } + } + let type_string = type_sigs.get(&name_str).cloned().unwrap_or_default(); + entries.push(CompletionEntry { + name: name_str, + type_string, + kind: CompletionEntryKind::Value, + }); + } + Decl::Data { + name, constructors, .. + } => { + let type_name = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&type_name) { + continue; + } + } + entries.push(CompletionEntry { + name: type_name, + type_string: String::new(), + kind: CompletionEntryKind::Type, + }); + for ctor in constructors { + let ctor_name = interner::resolve(ctor.name.value) + .unwrap_or_default() + .to_string(); + entries.push(CompletionEntry { + name: ctor_name, + type_string: String::new(), + kind: CompletionEntryKind::Constructor, + }); + } + } + Decl::Newtype { + name, constructor, .. + } => { + let type_name = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&type_name) { + continue; + } + } + entries.push(CompletionEntry { + name: type_name, + type_string: String::new(), + kind: CompletionEntryKind::Type, + }); + let ctor_name = interner::resolve(constructor.value) + .unwrap_or_default() + .to_string(); + entries.push(CompletionEntry { + name: ctor_name, + type_string: String::new(), + kind: CompletionEntryKind::Constructor, + }); + } + Decl::Class { name, members, .. } => { + let class_name = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&class_name) { + continue; + } + } + entries.push(CompletionEntry { + name: class_name, + type_string: String::new(), + kind: CompletionEntryKind::Class, + }); + for member in members { + let member_name = interner::resolve(member.name.value) + .unwrap_or_default() + .to_string(); + let type_string = { + let span = member.ty.span(); + if span.start < source.len() && span.end <= source.len() { + source[span.start..span.end].to_string() + } else { + String::new() + } + }; + entries.push(CompletionEntry { + name: member_name, + type_string, + kind: CompletionEntryKind::Value, + }); + } + } + Decl::TypeAlias { name, .. } => { + let name_str = interner::resolve(name.value) + .unwrap_or_default() + .to_string(); + if let Some(ref filter) = export_filter { + if !filter.contains(&name_str) { + continue; + } + } + entries.push(CompletionEntry { + name: name_str, + type_string: String::new(), + kind: CompletionEntryKind::Type, + }); + } + _ => {} + } } + + entries } diff --git a/src/lsp/mod.rs b/src/lsp/mod.rs index a1faf57f..9f3fff9f 100644 --- a/src/lsp/mod.rs +++ b/src/lsp/mod.rs @@ -2,16 +2,20 @@ mod handlers; pub mod utils; use std::collections::HashMap; -use std::sync::atomic::AtomicBool; +use std::io; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU8, Ordering}; use std::sync::Arc; +use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer, LspService, Server}; -use crate::typechecker::registry::ModuleRegistry; +use crate::build::cache::ModuleCache; use crate::lsp::utils::resolve::ResolutionExports; +use crate::typechecker::registry::ModuleRegistry; use utils::find_definition::DefinitionIndex; @@ -20,6 +24,56 @@ pub(crate) struct FileState { pub module_name: Option, } +/// Lightweight completion data extracted from CST type signatures. +/// Much smaller than full ModuleExports — just pre-formatted strings. +#[derive(Default, Serialize, Deserialize)] +pub(crate) struct CompletionIndex { + /// module_name → list of completion entries + pub entries: HashMap>, +} + +impl CompletionIndex { + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let encoded = bincode::serialize(self) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, compressed) + } + + pub fn load_from_disk(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 64 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}"))) + } +} + +#[derive(Clone, Serialize, Deserialize)] +pub(crate) struct CompletionEntry { + pub name: String, + pub type_string: String, + pub kind: CompletionEntryKind, +} + +#[derive(Clone, Copy, PartialEq, Serialize, Deserialize)] +pub(crate) enum CompletionEntryKind { + Value, + Constructor, + Type, + Class, +} + +/// Load state for progressive LSP initialization. +/// 0 = Initializing (no data), 1 = CacheLoaded (from disk, may be stale), 2 = Ready (authoritative) +pub(crate) const LOAD_STATE_INITIALIZING: u8 = 0; +pub(crate) const LOAD_STATE_CACHE_LOADED: u8 = 1; +pub(crate) const LOAD_STATE_READY: u8 = 2; + pub struct Backend { pub(crate) client: Client, pub(crate) files: Arc>>, @@ -30,8 +84,11 @@ pub struct Backend { pub(crate) module_file_map: Arc>>, /// Maps file URI → source content for loaded project files pub(crate) source_map: Arc>>, + pub(crate) module_cache: Arc>, + pub(crate) completion_index: Arc>, pub(crate) sources_cmd: Option, - pub(crate) ready: Arc, + pub(crate) cache_dir: Option, + pub(crate) load_state: Arc, } #[tower_lsp::async_trait] @@ -58,8 +115,14 @@ impl LanguageServer for Backend { } async fn initialized(&self, _: InitializedParams) { - self.info("pfc language server initialized").await; + self.info("[lsp] pfc language server initializing").await; + let t = std::time::Instant::now(); self.load_sources().await; + self.info(format!( + "[lsp] pfc language server initialized in {:.2?}", + t.elapsed() + )) + .await; } async fn shutdown(&self) -> Result<()> { @@ -67,40 +130,152 @@ impl LanguageServer for Backend { } async fn did_open(&self, params: DidOpenTextDocumentParams) { + let name = params + .text_document + .uri + .path() + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + self.info(format!("[lsp] >> textDocument/didOpen {name}")) + .await; + let t = std::time::Instant::now(); self.on_change(params.text_document.uri, params.text_document.text) .await; + self.info(format!( + "[lsp] << textDocument/didOpen {name}: {:.2?}", + t.elapsed() + )) + .await; } async fn did_change(&self, params: DidChangeTextDocumentParams) { + let name = params + .text_document + .uri + .path() + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + self.info(format!("[lsp] >> textDocument/didChange {name}")) + .await; + let t = std::time::Instant::now(); if let Some(change) = params.content_changes.into_iter().next() { self.on_change(params.text_document.uri, change.text).await; } + self.info(format!( + "[lsp] << textDocument/didChange {name}: {:.2?}", + t.elapsed() + )) + .await; } async fn did_save(&self, params: DidSaveTextDocumentParams) { + let name = params + .text_document + .uri + .path() + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + self.info(format!("[lsp] >> textDocument/didSave {name}")) + .await; + let t = std::time::Instant::now(); if let Some(text) = params.text { self.on_change(params.text_document.uri, text).await; } + self.info(format!( + "[lsp] << textDocument/didSave {name}: {:.2?}", + t.elapsed() + )) + .await; } async fn goto_definition( &self, params: GotoDefinitionParams, ) -> Result> { - self.handle_goto_definition(params).await + self.info("[lsp] >> textDocument/definition").await; + let t = std::time::Instant::now(); + let result = self.handle_goto_definition(params).await; + self.info(format!( + "[lsp] << textDocument/definition: {:.2?}", + t.elapsed() + )) + .await; + result } async fn hover(&self, params: HoverParams) -> Result> { - self.handle_hover(params).await + self.info("[lsp] >> textDocument/hover").await; + let t = std::time::Instant::now(); + let result = self.handle_hover(params).await; + self.info(format!("[lsp] << textDocument/hover: {:.2?}", t.elapsed())) + .await; + result } async fn completion(&self, params: CompletionParams) -> Result> { - self.handle_completion(params).await + self.info("[lsp] >> textDocument/completion").await; + let t = std::time::Instant::now(); + let result = self.handle_completion(params).await; + self.info(format!( + "[lsp] << textDocument/completion: {:.2?}", + t.elapsed() + )) + .await; + result } } impl Backend { - pub fn new(client: Client, sources_cmd: Option) -> Self { + async fn rebuild_module(&self, params: serde_json::Value) -> Result { + self.info("[lsp] >> pfc/rebuildModule").await; + let t = std::time::Instant::now(); + if let Some(uri_str) = params.get("uri").and_then(|v| v.as_str()) { + if let Ok(uri) = Url::parse(uri_str) { + // Try open files first, then source_map, then disk + let source = { + let files = self.files.read().await; + files.get(uri_str).map(|f| f.source.clone()) + }; + let source = match source { + Some(s) => s, + None => { + let smap = self.source_map.read().await; + match smap.get(uri_str) { + Some(s) => s.clone(), + None => { + if let Ok(path) = uri.to_file_path() { + std::fs::read_to_string(path).unwrap_or_default() + } else { + String::new() + } + } + } + } + }; + self.on_change(uri, source).await; + } + } + self.info(format!("[lsp] << pfc/rebuildModule: {:.2?}", t.elapsed())) + .await; + Ok(serde_json::json!({ "success": true })) + } + + async fn rebuild_project(&self) -> Result { + self.info("[lsp] >> pfc/rebuildProject").await; + let t = std::time::Instant::now(); + self.load_sources().await; + self.info(format!("[lsp] << pfc/rebuildProject: {:.2?}", t.elapsed())) + .await; + Ok(serde_json::json!({ "success": true })) + } + + pub fn new(client: Client, sources_cmd: Option, cache_dir: Option) -> Self { Backend { client, files: Arc::new(RwLock::new(HashMap::new())), @@ -109,19 +284,56 @@ impl Backend { resolution_exports: Arc::new(RwLock::new(ResolutionExports::empty())), module_file_map: Arc::new(RwLock::new(HashMap::new())), source_map: Arc::new(RwLock::new(HashMap::new())), + module_cache: Arc::new(RwLock::new(ModuleCache::default())), + completion_index: Arc::new(RwLock::new(CompletionIndex::default())), sources_cmd, - ready: Arc::new(AtomicBool::new(false)), + cache_dir, + load_state: Arc::new(AtomicU8::new(LOAD_STATE_INITIALIZING)), + } + } + + /// Check if the LSP has loaded enough state to serve requests. + pub(crate) fn is_ready(&self) -> bool { + self.load_state.load(Ordering::SeqCst) >= LOAD_STATE_CACHE_LOADED + } + + /// Get source for a file URI, with lazy loading from disk. + /// Tries source_map first, falls back to reading the file. + pub(crate) async fn get_source_for_uri(&self, uri: &str) -> Option { + // Check source_map first + { + let sm = self.source_map.read().await; + if let Some(source) = sm.get(uri) { + return Some(source.clone()); + } + } + // Lazy load from disk + let file_path = Url::parse(uri).ok()?.to_file_path().ok()?; + let source = std::fs::read_to_string(&file_path).ok()?; + // Cache it for next time + { + let mut sm = self.source_map.write().await; + sm.insert(uri.to_string(), source.clone()); } + Some(source) } } -pub fn run_server(sources_cmd: Option) { - let rt = tokio::runtime::Runtime::new().expect("failed to create tokio runtime"); +pub fn run_server(sources_cmd: Option, cache_dir: Option) { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .thread_stack_size(16 * 1024 * 1024) // 16 MB — typechecker needs deep recursion + .build() + .expect("failed to create tokio runtime"); rt.block_on(async { let stdin = tokio::io::stdin(); let stdout = tokio::io::stdout(); - let (service, socket) = LspService::new(|client| Backend::new(client, sources_cmd)); + let (service, socket) = + LspService::build(|client| Backend::new(client, sources_cmd, cache_dir)) + .custom_method("pfc/rebuildModule", Backend::rebuild_module) + .custom_method("pfc/rebuildProject", Backend::rebuild_project) + .finish(); Server::new(stdin, stdout, socket).serve(service).await; }); diff --git a/src/lsp/utils/find_definition.rs b/src/lsp/utils/find_definition.rs index 9c067ec1..7f2879ac 100644 --- a/src/lsp/utils/find_definition.rs +++ b/src/lsp/utils/find_definition.rs @@ -1,7 +1,11 @@ use std::collections::HashMap; +use std::io; +use std::path::Path; + +use serde::{Deserialize, Serialize}; use crate::cst::*; -use crate::interner::Symbol; +use crate::interner::{self, Symbol}; use crate::span::Span; /// What kind of reference we found at the cursor @@ -263,6 +267,80 @@ impl DefinitionIndex { }), } } + + // ===== Disk Serialization ===== + + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let snapshot = PortableDefIndex { + values: serialize_map(&self.values), + types: serialize_map(&self.types), + constructors: serialize_map(&self.constructors), + }; + let encoded = bincode::serialize(&snapshot) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, compressed) + } + + pub fn load_from_disk(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 128 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let snapshot: PortableDefIndex = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + Ok(DefinitionIndex { + values: deserialize_map(&snapshot.values), + types: deserialize_map(&snapshot.types), + constructors: deserialize_map(&snapshot.constructors), + }) + } +} + +#[derive(Serialize, Deserialize)] +struct PortableDefEntry { + module_name: String, + symbol_name: String, + file_path: String, + span_start: usize, + span_end: usize, +} + +#[derive(Serialize, Deserialize)] +struct PortableDefIndex { + values: Vec, + types: Vec, + constructors: Vec, +} + +fn serialize_map(map: &HashMap<(String, Symbol), DefLocation>) -> Vec { + map.iter() + .map(|((module_name, sym), loc)| PortableDefEntry { + module_name: module_name.clone(), + symbol_name: interner::resolve(*sym).unwrap_or_default(), + file_path: loc.file_path.clone(), + span_start: loc.span.start, + span_end: loc.span.end, + }) + .collect() +} + +fn deserialize_map(entries: &[PortableDefEntry]) -> HashMap<(String, Symbol), DefLocation> { + entries + .iter() + .map(|e| { + ( + (e.module_name.clone(), interner::intern(&e.symbol_name)), + DefLocation { + file_path: e.file_path.clone(), + span: Span::new(e.span_start, e.span_end), + }, + ) + }) + .collect() } /// Maps imported names to their source modules. diff --git a/src/lsp/utils/resolve.rs b/src/lsp/utils/resolve.rs index 50538805..188cd7c0 100644 --- a/src/lsp/utils/resolve.rs +++ b/src/lsp/utils/resolve.rs @@ -1,5 +1,9 @@ use std::collections::{HashMap, HashSet}; +use std::io; +use std::path::Path; + +use serde::{Deserialize, Serialize}; use crate::span::Span; use crate::cst::{ @@ -119,6 +123,75 @@ impl ResolutionExports { fn get(&self, module: Symbol) -> Option<&ModuleResolvedNames> { self.modules.get(&module) } + + // ===== Disk Serialization ===== + + pub fn save_to_disk(&self, path: &Path) -> io::Result<()> { + let snapshot: HashMap = self + .modules + .iter() + .map(|(sym, names)| { + let key = interner::resolve(*sym).unwrap_or_default(); + (key, PModuleResolvedNames::from_names(names)) + }) + .collect(); + let encoded = bincode::serialize(&snapshot) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let compressed = zstd::bulk::compress(&encoded, 1) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, compressed) + } + + pub fn load_from_disk(path: &Path) -> io::Result { + let compressed = std::fs::read(path)?; + let data = zstd::bulk::decompress(&compressed, 128 * 1024 * 1024) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("zstd: {e}")))?; + let snapshot: HashMap = bincode::deserialize(&data) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("bincode: {e}")))?; + let modules = snapshot + .into_iter() + .map(|(key, pnames)| (interner::intern(&key), pnames.to_names())) + .collect(); + Ok(ResolutionExports { modules }) + } +} + +#[derive(Serialize, Deserialize)] +struct PModuleResolvedNames { + values: Vec, + types: Vec, + classes: Vec, + type_operators: Vec, + data_constructors: HashMap>, +} + +impl PModuleResolvedNames { + fn from_names(names: &ModuleResolvedNames) -> Self { + PModuleResolvedNames { + values: names.values.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + types: names.types.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + classes: names.classes.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + type_operators: names.type_operators.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect(), + data_constructors: names.data_constructors.iter().map(|(k, v)| { + (interner::resolve(*k).unwrap_or_default(), v.iter().map(|s| interner::resolve(*s).unwrap_or_default()).collect()) + }).collect(), + } + } + + fn to_names(&self) -> ModuleResolvedNames { + ModuleResolvedNames { + values: self.values.iter().map(|s| interner::intern(s)).collect(), + types: self.types.iter().map(|s| interner::intern(s)).collect(), + classes: self.classes.iter().map(|s| interner::intern(s)).collect(), + type_operators: self.type_operators.iter().map(|s| interner::intern(s)).collect(), + data_constructors: self.data_constructors.iter().map(|(k, v)| { + (interner::intern(k), v.iter().map(|s| interner::intern(s)).collect()) + }).collect(), + } + } } /// Result of name resolution for a module. diff --git a/src/main.rs b/src/main.rs index 21d67265..88dac18d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,10 @@ enum Commands { /// Shell command that outputs source file paths (one per line) #[arg(long)] sources_cmd: Option, + + /// Directory for disk cache (enables fast warm startup) + #[arg(long)] + cache_dir: Option, }, } @@ -49,38 +53,55 @@ fn main() { .init(); match cli.command { - Commands::Lsp { sources_cmd } => { - purescript_fast_compiler::lsp::run_server(sources_cmd); + Commands::Lsp { sources_cmd, cache_dir } => { + // Default to the same cache dir as CLI compile (output/.pfc-cache) + let cache_dir = cache_dir.or_else(|| { + let default = PathBuf::from("output/.pfc-cache"); + if default.exists() { Some(default) } else { None } + }); + purescript_fast_compiler::lsp::run_server(sources_cmd, cache_dir); } Commands::Compile { globs, output } => { log::debug!("Starting compile with globs: {:?}", globs); + let output_path = PathBuf::from(&output); + let cache_dir = output_path.join(".pfc-cache"); + + let cache_load_start = std::time::Instant::now(); + let mut cache = build::cache::ModuleCache::load_from_disk(&cache_dir) + .unwrap_or_default(); + log::debug!("Cache load: {:.2?}", cache_load_start.elapsed()); + let glob_refs: Vec<&str> = globs.iter().map(|s| s.as_str()).collect(); - let result = build::build(&glob_refs, Some(PathBuf::from(&output))); + let result = build::build_cached(&glob_refs, Some(output_path.clone()), &mut cache); - let mut error_count = 0; + let cache_save_start = std::time::Instant::now(); + if let Err(e) = cache.save_to_disk(&cache_dir) { + log::debug!("Failed to save build cache: {e}"); + } + log::debug!("Cache save: {:.2?}", cache_save_start.elapsed()); + + let mut error_messages: Vec = Vec::new(); for err in &result.build_errors { - eprintln!("[error] {err}"); - error_count += 1; + error_messages.push(format!("{err}")); } for module in &result.modules { - if module.type_errors.is_empty() { - println!("[ok] {}", module.module_name); - } else { - for err in &module.type_errors { - eprintln!("[error] {}: {err}", module.module_name); - error_count += 1; - } + for err in &module.type_errors { + error_messages.push(format!("{}: {err}", module.module_name)); } } - if error_count > 0 { + if !error_messages.is_empty() { + let error_count = error_messages.len(); eprintln!( - "\nCompilation failed with {error_count} error{}.", + "\nCompilation failed with {error_count} error{}:\n", if error_count == 1 { "" } else { "s" } ); + for msg in &error_messages { + eprintln!(" {msg}"); + } std::process::exit(1); } else { println!( diff --git a/src/parser/grammar.lalrpop b/src/parser/grammar.lalrpop index 64519523..bc3551ec 100644 --- a/src/parser/grammar.lalrpop +++ b/src/parser/grammar.lalrpop @@ -29,6 +29,7 @@ pub Module: Module = { imports, decls, comments: Vec::new(), + doc_comments: Vec::new(), } } }; @@ -152,6 +153,7 @@ ImportDecl: ImportDecl = { )?> => { ImportDecl { span: Span::new(start, end), + module_span: module.span, module: module.value, imports, qualified: qualified.map(|m| m.value), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 43af3375..119e89a7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -147,6 +147,14 @@ fn attach_comments( // Store all comments on the module module.comments = comment_pairs.clone(); + // Attach doc-comments that appear before the `module` keyword to the module itself + let module_start = module.span.start; + module.doc_comments = comment_pairs + .iter() + .filter(|(c, span)| c.is_doc() && span.end <= module_start) + .map(|(c, _)| c.clone()) + .collect(); + if module.decls.is_empty() { return; } @@ -278,6 +286,40 @@ mod tests { assert_eq!(module.decls[0].doc_comments().len(), 2); } + #[test] + fn test_module_doc_comments() { + let module = parse("-- | This module does things\nmodule Main where\nfoo = 1").unwrap(); + assert_eq!(module.doc_comments.len(), 1); + assert!(module.doc_comments[0].is_doc()); + if let Comment::Doc(text) = &module.doc_comments[0] { + assert_eq!(text.trim(), "This module does things"); + } + } + + #[test] + fn test_module_multi_line_doc_comments() { + let module = parse("-- | Line 1\n-- | Line 2\nmodule Main where\nfoo = 1").unwrap(); + assert_eq!(module.doc_comments.len(), 2); + } + + #[test] + fn test_import_module_span() { + let module = parse("module Main where\nimport Data.Maybe").unwrap(); + assert_eq!(module.imports.len(), 1); + let imp = &module.imports[0]; + // "import Data.Maybe" — "Data.Maybe" starts at offset 25 (after "import ") + let src = "module Main where\nimport Data.Maybe"; + assert_eq!(&src[imp.module_span.start..imp.module_span.end], "Data.Maybe"); + } + + #[test] + fn test_module_doc_not_confused_with_decl_doc() { + // Doc comment after `where` should attach to the decl, not the module + let module = parse("module Main where\n-- | Decl doc\nfoo = 1").unwrap(); + assert_eq!(module.doc_comments.len(), 0); + assert_eq!(module.decls[0].doc_comments().len(), 1); + } + // ===== Expression Tests: Literals ===== #[test] diff --git a/src/typechecker/check.rs b/src/typechecker/check.rs index a1e1ed68..a1ab6cd6 100644 --- a/src/typechecker/check.rs +++ b/src/typechecker/check.rs @@ -2190,14 +2190,12 @@ fn check_module_impl(module: &Module, registry: &ModuleRegistry, collect_span_ty } ctx.type_con_arities.insert(qi(name.value), count_kind_arrows(kind)); } - Decl::TypeAlias { name, span, .. } => { - // Type synonyms re-defining an explicitly imported type name are a ScopeConflict. - // Data/newtype declarations are allowed to shadow imports. + Decl::TypeAlias { name, .. } => { + // Local type aliases shadow imported types, just like data/newtype declarations. + // A ScopeConflict is only raised if the ambiguous name is actually referenced + // (not merely declared or exported). Record the conflict for deferred checking. if explicitly_imported_types.contains(&name.value) { - errors.push(TypeError::ScopeConflict { - span: *span, - name: name.value, - }); + ctx.type_scope_conflicts.insert(name.value); } } _ => {} @@ -3252,6 +3250,13 @@ fn check_module_impl(module: &Module, registry: &ModuleRegistry, collect_span_ty collect_type_expr_vars(ty, &HashSet::new(), &mut errors); // Validate constraint class names in the type signature check_constraint_class_names(ty, &known_classes, &class_param_counts, &mut errors); + // Check for type-level scope conflicts (ambiguous type names) + if let Some((conflict_span, conflict_name)) = crate::typechecker::convert::find_type_scope_conflict(ty, &ctx.type_scope_conflicts) { + errors.push(TypeError::ScopeConflict { + span: conflict_span, + name: conflict_name, + }); + } match convert_type_expr(ty, &type_ops) { Ok(converted) => { // Check for partially applied synonyms in type signature @@ -8089,6 +8094,7 @@ fn check_module_impl(module: &Module, registry: &ModuleRegistry, collect_span_ty .collect(), class_superclasses: class_superclasses.clone(), method_own_constraints: ctx.method_own_constraints.iter().map(|(k, v)| (qi(*k), v.clone())).collect(), + module_doc: Vec::new(), // filled in by the outer CST-level wrapper }; // Ensure operator targets (e.g. Tuple for /\) are included in exported values and diff --git a/src/typechecker/convert.rs b/src/typechecker/convert.rs index 2d99f45e..8982814e 100644 --- a/src/typechecker/convert.rs +++ b/src/typechecker/convert.rs @@ -152,6 +152,70 @@ pub fn convert_type_expr(ty: &TypeExpr, type_ops: &HashMap) -> Option<(crate::span::Span, Symbol)> { + match ty { + TypeExpr::Constructor { name, span, .. } => { + if name.module.is_none() && conflicts.contains(&name.name) { + return Some((*span, name.name)); + } + None + } + TypeExpr::App { constructor, arg, .. } => { + find_type_scope_conflict(constructor, conflicts) + .or_else(|| find_type_scope_conflict(arg, conflicts)) + } + TypeExpr::Function { from, to, .. } => { + find_type_scope_conflict(from, conflicts) + .or_else(|| find_type_scope_conflict(to, conflicts)) + } + TypeExpr::Forall { ty, vars, .. } => { + for (_, _, kind) in vars { + if let Some(k) = kind { + if let Some(r) = find_type_scope_conflict(k, conflicts) { + return Some(r); + } + } + } + find_type_scope_conflict(ty, conflicts) + } + TypeExpr::Constrained { constraints, ty, .. } => { + for c in constraints { + for arg in &c.args { + if let Some(r) = find_type_scope_conflict(arg, conflicts) { + return Some(r); + } + } + } + find_type_scope_conflict(ty, conflicts) + } + TypeExpr::Kinded { ty, .. } => { + find_type_scope_conflict(ty, conflicts) + } + TypeExpr::Record { fields, .. } => { + for f in fields { + if let Some(r) = find_type_scope_conflict(&f.ty, conflicts) { + return Some(r); + } + } + None + } + TypeExpr::Row { fields, tail, .. } => { + for f in fields { + if let Some(r) = find_type_scope_conflict(&f.ty, conflicts) { + return Some(r); + } + } + if let Some(t) = tail { + return find_type_scope_conflict(t, conflicts); + } + None + } + _ => None, + } +} + /// Check that kind annotations in forall vars don't forward-reference variables /// declared later in the same forall. E.g. `forall (a :: k) k.` is invalid because /// `k` is used before it's declared. diff --git a/src/typechecker/infer.rs b/src/typechecker/infer.rs index a501561a..c97fbff0 100644 --- a/src/typechecker/infer.rs +++ b/src/typechecker/infer.rs @@ -111,6 +111,9 @@ pub struct InferCtx { /// Names that are ambiguous due to being imported from multiple modules. /// Referencing these names produces a ScopeConflict error. pub scope_conflicts: HashSet, + /// Type names that are ambiguous due to a local type alias shadowing an imported type. + /// Only checked when the type name is actually referenced in a type expression. + pub type_scope_conflicts: HashSet, /// Map from operator → class method target name (e.g. `<>` → `append`). /// Used for tracking deferred constraints on operator usage. pub operator_class_targets: HashMap, @@ -218,6 +221,7 @@ impl InferCtx { method_own_constraints: HashMap::new(), module_mode: false, scope_conflicts: HashSet::new(), + type_scope_conflicts: HashSet::new(), operator_class_targets: HashMap::new(), op_deferred_constraints: Vec::new(), class_fundeps: HashMap::new(), @@ -913,7 +917,7 @@ impl InferCtx { .collect(); // Unify the argument with the instantiated param - self.state.unify(span, &arg_ty, &instantiated_param)?; + self.state.unify(arg.span(), &arg_ty, &instantiated_param)?; // Post-check 1: verify no forall var leaked into ambient vars' solutions. // Catches escapes like `\x -> foo x` where x's type gets constrained @@ -984,7 +988,7 @@ impl InferCtx { let result_ty = Type::Unif(self.state.fresh_var()); let expected_func_ty = Type::fun(arg_ty, result_ty.clone()); - self.state.unify(span, &func_ty, &expected_func_ty)?; + self.state.unify(arg.span(), &func_ty, &expected_func_ty)?; Ok(result_ty) } @@ -992,7 +996,7 @@ impl InferCtx { fn infer_if( &mut self, env: &Env, - span: crate::span::Span, + _span: crate::span::Span, cond: &Expr, then_expr: &Expr, else_expr: &Expr, @@ -1005,7 +1009,7 @@ impl InferCtx { let then_ty = self.infer(env, then_expr)?; let else_ty = self.infer(env, else_expr)?; - self.state.unify(span, &then_ty, &else_ty)?; + self.state.unify(else_expr.span(), &then_ty, &else_ty)?; if is_underscore { Ok(Type::fun(Type::boolean(), then_ty)) @@ -1721,7 +1725,11 @@ impl InferCtx { // Infer the body and unify with result type let body_ty = self.infer_guarded(&alt_env, &alt.result)?; - self.state.unify(span, &result_ty, &body_ty)?; + let body_span = match &alt.result { + GuardedExpr::Unconditional(e) => e.span(), + GuardedExpr::Guarded(_) => alt.span, + }; + self.state.unify(body_span, &result_ty, &body_ty)?; } // Exhaustiveness check: for each scrutinee, verify all constructors are covered @@ -1778,14 +1786,14 @@ impl InferCtx { fn infer_array( &mut self, env: &Env, - span: crate::span::Span, + _span: crate::span::Span, elements: &[Expr], ) -> Result { let elem_ty = Type::Unif(self.state.fresh_var()); for elem in elements { let t = self.infer(env, elem)?; - self.state.unify(span, &elem_ty, &t)?; + self.state.unify(elem.span(), &elem_ty, &t)?; } Ok(Type::array(elem_ty)) @@ -2189,7 +2197,7 @@ impl InferCtx { // Apply: func expr (\_ -> rest) let after_first = Type::Unif(self.state.fresh_var()); - self.state.unify(span, &func_ty, &Type::fun(expr_ty, after_first.clone()))?; + self.state.unify(expr.span(), &func_ty, &Type::fun(expr_ty, after_first.clone()))?; let discard_arg = Type::Unif(self.state.fresh_var()); let cont_ty = Type::fun(discard_arg, rest_ty); let result = Type::Unif(self.state.fresh_var()); @@ -2219,7 +2227,7 @@ impl InferCtx { // Apply: bind expr (\binder -> rest) let after_first = Type::Unif(self.state.fresh_var()); - self.state.unify(span, &func_ty, &Type::fun(expr_ty, after_first.clone()))?; + self.state.unify(expr.span(), &func_ty, &Type::fun(expr_ty, after_first.clone()))?; let cont_ty = Type::fun(binder_ty, rest_ty); let result = Type::Unif(self.state.fresh_var()); self.state.unify(span, &after_first, &Type::fun(cont_ty, result.clone()))?; diff --git a/src/typechecker/mod.rs b/src/typechecker/mod.rs index 05f0ebd2..96dfce8f 100644 --- a/src/typechecker/mod.rs +++ b/src/typechecker/mod.rs @@ -109,11 +109,18 @@ fn check_module_with_options(module: &crate::cst::Module, registry: &ModuleRegis span_types: HashMap::new(), }; } - if collect_span_types { + let mut result = if collect_span_types { check::check_module_for_ide(&ast_module, registry) } else { check::check_module(&ast_module, registry) - } + }; + + // Propagate module-level doc-comments from CST to exports + result.exports.module_doc = module.doc_comments.iter().filter_map(|c| { + if let crate::cst::Comment::Doc(text) = c { Some(text.clone()) } else { None } + }).collect(); + + result } #[cfg(test)] diff --git a/src/typechecker/registry.rs b/src/typechecker/registry.rs index 9d1e65f7..55703320 100644 --- a/src/typechecker/registry.rs +++ b/src/typechecker/registry.rs @@ -76,6 +76,8 @@ pub struct ModuleExports { /// Method-level constraint class names from class definitions. /// Maps method name → constraint class names. Used for current_given_expanded in instance methods. pub method_own_constraints: HashMap>, + /// Module-level doc-comments (appear before the `module` keyword) + pub module_doc: Vec, } /// Registry of compiled modules, used to resolve imports. diff --git a/tests/build.rs b/tests/build.rs index a27b9a16..9c8f8565 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -7,6 +7,7 @@ use ntest_timeout::timeout; use rayon::prelude::*; use purescript_fast_compiler::build::{ build_from_sources_with_js, build_from_sources_with_options, build_from_sources_with_registry, + build_from_sources_incremental, cache::ModuleCache, BuildError, BuildOptions, BuildResult, }; use purescript_fast_compiler::typechecker::error::TypeError; @@ -534,7 +535,6 @@ fn matches_expected_error( "UnsupportedTypeInKind" => has("UnsupportedTypeInKind"), "CannotDeriveInvalidConstructorArg" => has("CannotDeriveInvalidConstructorArg"), "MissingFFIImplementations" => has("MissingFFIImplementations"), - "UnusedFFIImplementations" => has("UnusedFFIImplementations"), "UnsupportedFFICommonJSExports" => has("UnsupportedFFICommonJSExports"), "UnsupportedFFICommonJSImports" => has("UnsupportedFFICommonJSImports"), "DeprecatedFFICommonJSModule" => has("DeprecatedFFICommonJSModule"), @@ -595,6 +595,7 @@ fn build_fixture_original_compiler_failing() { // Run in a separate thread with a large stack to avoid stack overflows // from deeply recursive fixtures, and catch panics. let handle = std::thread::Builder::new() + .name("pfc-test-build".to_string()) .stack_size(64 * 1024 * 1024) // 64 MB stack .spawn(move || { let test_sources: Vec<(&str, &str)> = owned_sources @@ -727,7 +728,6 @@ fn build_all_packages() { module_timeout: Some(std::time::Duration::from_secs(timeout_secs)), output_dir: None, sequential: false, - fail_fast: false, }; // Discover all packages with src/ directories @@ -923,13 +923,11 @@ fn build_from_sources() { .unwrap_or(60); let sequential = std::env::var("SEQUENTIAL").is_ok(); - let fail_fast = std::env::var("FAIL_FAST").is_ok(); let options = BuildOptions { module_timeout: Some(std::time::Duration::from_secs(timeout_secs)), output_dir: None, sequential, - fail_fast, }; // Step 1: Glob all patterns to collect file paths @@ -1230,3 +1228,95 @@ fn build_from_sources() { .join("\n") ); } + +// ===== Incremental build tests ===== + +#[test] +fn incremental_build_caches_modules() { + let sources: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 42\n"), + ("ModB.purs", "module ModB where\n\nimport ModA\n\nvalB :: Int\nvalB = valA\n"), + ]; + + let options = BuildOptions::default(); + let mut cache = ModuleCache::new(); + + // First build: everything should be typechecked + let (result1, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + assert!(result1.build_errors.is_empty(), "First build should succeed"); + assert_eq!(result1.modules.len(), 2); + for m in &result1.modules { + assert!(m.type_errors.is_empty(), "Module {} should have no errors", m.module_name); + } + + // Verify cache has entries + assert!(cache.get_exports("ModA").is_some(), "ModA should be cached"); + assert!(cache.get_exports("ModB").is_some(), "ModB should be cached"); + + // Second build with same sources: should use cache (no rebuild needed) + let (result2, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + assert!(result2.build_errors.is_empty(), "Second build should succeed"); + assert_eq!(result2.modules.len(), 2); + for m in &result2.modules { + assert!(m.type_errors.is_empty(), "Cached module {} should have no errors", m.module_name); + } +} + +#[test] +fn incremental_build_rebuilds_changed_module() { + let sources_v1: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 42\n"), + ("ModB.purs", "module ModB where\n\nimport ModA\n\nvalB :: Int\nvalB = valA\n"), + ]; + + let options = BuildOptions::default(); + let mut cache = ModuleCache::new(); + + // First build + let (result1, _, _) = build_from_sources_incremental(&sources_v1, &None, None, &options, &mut cache); + assert!(result1.build_errors.is_empty()); + + // Change ModA's source + let sources_v2: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 99\n"), + ("ModB.purs", "module ModB where\n\nimport ModA\n\nvalB :: Int\nvalB = valA\n"), + ]; + + // Second build: ModA changed, ModB depends on it, both should rebuild + let (result2, _, _) = build_from_sources_incremental(&sources_v2, &None, None, &options, &mut cache); + assert!(result2.build_errors.is_empty(), "Rebuild should succeed"); + assert_eq!(result2.modules.len(), 2); + for m in &result2.modules { + assert!(m.type_errors.is_empty(), "Module {} should have no errors after rebuild", m.module_name); + } +} + +#[test] +fn incremental_build_disk_roundtrip() { + let sources: Vec<(&str, &str)> = vec![ + ("ModA.purs", "module ModA where\n\nvalA :: Int\nvalA = 42\n"), + ]; + + let options = BuildOptions::default(); + let mut cache = ModuleCache::new(); + + // Build to populate cache + let (result, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut cache); + assert!(result.build_errors.is_empty()); + + // Save to disk + let tmp_dir = std::env::temp_dir().join("pfc-test-cache"); + let cache_path = tmp_dir.join("cache.bin"); + cache.save_to_disk(&cache_path).expect("Failed to save cache"); + + // Load from disk + let mut loaded_cache = ModuleCache::load_from_disk(&cache_path).expect("Failed to load cache"); + assert!(loaded_cache.get_exports("ModA").is_some(), "Loaded cache should have ModA"); + + // Build with loaded cache — should use cached entries + let (result2, _, _) = build_from_sources_incremental(&sources, &None, None, &options, &mut loaded_cache); + assert!(result2.build_errors.is_empty(), "Build with loaded cache should succeed"); + + // Cleanup + let _ = std::fs::remove_dir_all(&tmp_dir); +} diff --git a/tests/fixtures/lsp/hover/Simple.purs b/tests/fixtures/lsp/hover/Simple.purs index 0c1c5642..e899ca83 100644 --- a/tests/fixtures/lsp/hover/Simple.purs +++ b/tests/fixtures/lsp/hover/Simple.purs @@ -137,6 +137,7 @@ getName = myRecord.name -- 56:8 (r) => hover: Int -- -- Line 2: import Simple.Lib (class Cl, member, ...) +-- 2:7 (Simple.Lib) => hover: module Simple.Lib | doc: Utility functions and classes for Simple -- 2:29 (member) => hover: member -- 2:53 (Effect) => hover: Type -> Type | doc: Opaque effect type -- 2:45 (addOne) => hover: addOne | doc: Adds one to a number diff --git a/tests/fixtures/lsp/hover/Simple/Lib.purs b/tests/fixtures/lsp/hover/Simple/Lib.purs index ac92b037..ce146b2f 100644 --- a/tests/fixtures/lsp/hover/Simple/Lib.purs +++ b/tests/fixtures/lsp/hover/Simple/Lib.purs @@ -1,3 +1,4 @@ +-- | Utility functions and classes for Simple module Simple.Lib where import Prelude diff --git a/tests/lsp_e2e.rs b/tests/lsp_e2e.rs index d35fecef..81ad60c8 100644 --- a/tests/lsp_e2e.rs +++ b/tests/lsp_e2e.rs @@ -51,7 +51,7 @@ impl TestServer { let (req_client, req_server) = tokio::io::duplex(1024 * 64); let (resp_server, resp_client) = tokio::io::duplex(1024 * 64); - let (service, socket) = LspService::new(|client| Backend::new(client, sources_cmd)); + let (service, socket) = LspService::new(|client| Backend::new(client, sources_cmd, None)); tokio::spawn(Server::new(req_server, resp_server, socket).serve(service)); let writer = std::sync::Arc::new(Mutex::new(req_client)); @@ -198,7 +198,7 @@ async fn test_lsp_initialize_capabilities() { let (req_client, req_server) = tokio::io::duplex(1024 * 64); let (resp_server, resp_client) = tokio::io::duplex(1024 * 64); - let (service, socket) = LspService::new(|client| Backend::new(client, None)); + let (service, socket) = LspService::new(|client| Backend::new(client, None, None)); tokio::spawn(Server::new(req_server, resp_server, socket).serve(service)); let mut writer = req_client; diff --git a/tests/snapshots.rs b/tests/snapshots.rs index 6b312db6..9cfc8d73 100644 --- a/tests/snapshots.rs +++ b/tests/snapshots.rs @@ -100,7 +100,7 @@ fn snap_expr_negate() { fn snap_expr_error_branch_mismatch() { insta::assert_snapshot!( format_expr_type(r#"if true then 1 else "x""#), - @"ERROR: Could not match type Int with type String at 0:23" + @"ERROR: Could not match type Int with type String at 20:23" ); } diff --git a/tests/typechecker_comprehensive.rs b/tests/typechecker_comprehensive.rs index 77ff680f..6fa9d190 100644 --- a/tests/typechecker_comprehensive.rs +++ b/tests/typechecker_comprehensive.rs @@ -142,6 +142,20 @@ fn assert_module_not_implemented(source: &str) { ); } +/// Assert that a type error's span covers exactly the expected source text. +fn assert_error_span_text(source: &str, error_code: &str, expected_text: &str) { + let (_, errors) = check_module_types(source); + let err = errors.iter().find(|e| e.code() == error_code) + .unwrap_or_else(|| panic!("expected {} error, got errors: {:?}", error_code, errors.iter().map(|e| format!("{} ({})", e.code(), e)).collect::>())); + let span = err.span(); + assert!(span.start <= span.end && span.end <= source.len(), + "error span for {} is invalid: start={}, end={}, source len={}", + error_code, span.start, span.end, source.len()); + let actual = &source[span.start..span.end]; + assert_eq!(actual, expected_text, + "error span for {} should cover '{}' but covers '{}'", error_code, expected_text, actual); +} + // ═══════════════════════════════════════════════════════════════════════════ // 1. LITERALS // ═══════════════════════════════════════════════════════════════════════════ @@ -7945,3 +7959,62 @@ x = Wrap 42"; Type::app(Type::con("A", "Wrapper"), Type::int()) ); } + +// ═══════════════════════════════════════════════════════════════════════════ +// ERROR SPAN PRECISION TESTS +// ═══════════════════════════════════════════════════════════════════════════ + +#[test] +fn error_span_if_else_branch_mismatch() { + assert_error_span_text( + r#"module Test where +x = if true then 1 else "a""#, + "UnificationError", + "\"a\"" + ); +} + +#[test] +fn error_span_case_alternative_body() { + assert_error_span_text( + "module Test where\nx = case true of\n true -> 1\n false -> \"a\"", + "UnificationError", + "\"a\"" + ); +} + +#[test] +fn error_span_array_element_mismatch() { + assert_error_span_text( + "module Test where\nx = [1, 2, \"three\"]", + "UnificationError", + "\"three\"" + ); +} + +#[test] +fn error_span_function_arg_mismatch() { + assert_error_span_text( + "module Test where\nf :: Int -> Int\nf n = n\nx = f \"hello\"", + "UnificationError", + "\"hello\"" + ); +} + +#[test] +fn error_span_if_condition_not_boolean() { + assert_error_span_text( + "module Test where\nx = if 42 then 1 else 2", + "UnificationError", + "42" + ); +} + +#[test] +fn error_span_case_multiple_alternatives() { + assert_error_span_text( + "module Test where\nx = case 1 of\n 1 -> \"a\"\n 2 -> \"b\"\n _ -> true", + "UnificationError", + "true" + ); +}