diff --git a/Cargo.lock b/Cargo.lock index 473ef8b00f..01e1a07c6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -721,6 +721,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "doc-dup-checker" +version = "0.0.0" +dependencies = [ + "cargo_metadata", + "serde", + "serde_json", +] + [[package]] name = "downcast" version = "0.11.0" diff --git a/Cargo.toml b/Cargo.toml index 955b0d0c9d..76eec709df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -341,6 +341,7 @@ members = [ "tests/spanner", "tests/storage", "tools/check-copyright", + "tools/doc-dup-checker", "tools/minimal-version-helper", ] diff --git a/tools/doc-dup-checker/Cargo.toml b/tools/doc-dup-checker/Cargo.toml new file mode 100644 index 0000000000..fbdaff25b7 --- /dev/null +++ b/tools/doc-dup-checker/Cargo.toml @@ -0,0 +1,28 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[package] +name = "doc-dup-checker" +version = "0.0.0" +publish = false +# Inherit other attributes from the workspace. +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +serde.workspace = true +serde_json.workspace = true +cargo_metadata.workspace = true diff --git a/tools/doc-dup-checker/README.md b/tools/doc-dup-checker/README.md new file mode 100644 index 0000000000..2781d2b8a6 --- /dev/null +++ b/tools/doc-dup-checker/README.md @@ -0,0 +1,44 @@ +# Doc Duplicate Checker + +This tool identifies instances of duplicate documentation caused by documented +re-exports (`pub use`) in the Rust workspace. + +## Problem + +When a crate re-exports an item and both the re-export and the target item have +documentation comments, `rustdoc` concatenates them in the generated HTML. This +often leads to redundant or confusing documentation for the user. + +Example: + +```rust +/// Docs for re-export. +pub use target::Item; + +// in target: +/// Docs for item. +pub struct Item; +``` + +The resulting documentation will show both "Docs for re-export." and "Docs for +item." appended together. + +## Solution + +This tool leverages the Rustdoc JSON backend to detect overlapping documentation +between re-exports and their targets. It helps maintain high-quality +documentation standards programmatically. + +## Usage + +For detailed usage instructions and available options, run: + +```bash +cargo run -p doc-dup-checker -- --help +``` + +## Exceptions + +The tool ignores warnings if the doc comment on the re-export contains an +intentional paragraph break (e.g., `\n\n` or ends with a newline), as this might +be a conscious choice to add context. diff --git a/tools/doc-dup-checker/src/main.rs b/tools/doc-dup-checker/src/main.rs new file mode 100644 index 0000000000..f3fee9cce6 --- /dev/null +++ b/tools/doc-dup-checker/src/main.rs @@ -0,0 +1,214 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde_json::Value; +use std::env; +use std::fs::File; +use std::io::BufReader; +use std::process::Command; + +/// Internal structure tracking package metadata retrieved via cargo. +struct PackageInfo { + /// The name of the package. + name: String, + /// Full path to the package's Cargo.toml file. + manifest_path: String, + /// True if the package can be published (publish != []). + publish: bool, +} + + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + + if args.len() > 1 && (args[1] == "--help" || args[1] == "-h") { + eprintln!("Usage: {} [package_name...]", args[0]); + eprintln!(" Checks for potential duplicate documentation."); + eprintln!(" If no packages are specified, checks all relevant crates in the workspace."); + return Ok(()); + } + + let target_packages: Vec<&str> = args.iter().skip(1).map(|s| s.as_str()).collect(); + let (workspace_packages, target_dir) = get_workspace_packages()?; + + // Validate user-supplied package list, if any. + let invalid_packages: Vec<_> = target_packages + .iter() + .copied() + .filter(|&target| !workspace_packages.iter().any(|p| p.name == target)) + .collect(); + + if !invalid_packages.is_empty() { + for name in invalid_packages { + eprintln!("Error: Package '{}' not found in workspace.", name); + } + std::process::exit(1); + } + + let final_packages: Vec = if !target_packages.is_empty() { + target_packages.iter().map(|&s| s.to_string()).collect() + } else { + workspace_packages + .into_iter() + .filter(|p| { + !p.manifest_path.contains("src/generated") + && !p.manifest_path.contains("tests/") + && p.publish + }) + .map(|p| p.name) + .collect() + }; + + generate_and_validate_docs(&final_packages, target_dir.as_str()) +} + +/// Queries the workspace metadata using the `cargo_metadata` crate. +/// +/// Returns a list of extracted `PackageInfo` definitions and the workspace's canonical target directory. +fn get_workspace_packages() -> Result<(Vec, String), Box> { + let metadata = cargo_metadata::MetadataCommand::new().exec()?; + let target_dir = metadata.target_directory.to_string(); + + let mut results = Vec::new(); + for p in metadata.packages { + if metadata.workspace_members.contains(&p.id) { + let publish = if let Some(pub_val) = &p.publish { + !pub_val.is_empty() + } else { + true + }; + + results.push(PackageInfo { + name: p.name.to_string(), + manifest_path: p.manifest_path.to_string(), + publish, + }); + } + } + + Ok((results, target_dir)) +} + +/// Executes `rustdoc --output-format json` across target packages sequentially and evaluates them. +fn generate_and_validate_docs(package_names: &[String], target_dir: &str) -> Result<(), Box> { + if package_names.is_empty() { + println!("No packages to check."); + return Ok(()); + } + + println!("Checking packages: {:?}", package_names); + + let mut total_errors = 0; + let doc_dir = std::path::PathBuf::from(target_dir).join("doc"); + + for name in package_names { + println!("\n=== Processing crate: {} ===", name); + + // Run cargo rustdoc + let status = Command::new("cargo") + .arg("+nightly") + .arg("rustdoc") + .args(&[ + "-p", + name, + "--", + "-Z", + "unstable-options", + "--output-format", + "json", + ]) + .status()?; + + if !status.success() { + return Err(format!("Failed to generate docs for {}", name).into()); + } + + // Load generated JSON + let json_filename = doc_dir.join(format!("{}.json", name.replace("-", "_"))); + let file = File::open(&json_filename).map_err(|e| { + format!("JSON file not found for crate {} at {:?}: {}", name, json_filename, e) + })?; + + let reader = BufReader::new(file); + let doc_v: Value = serde_json::from_reader(reader)?; + let doc_index = doc_v + .get("index") + .and_then(|i| i.as_object()) + .ok_or("No index found in doc JSON")?; + + total_errors += detect_duplicate_reexports(doc_index); + } + + if total_errors > 0 { + eprintln!( + "\nError: Found {} instances of duplicate documentation.", + total_errors + ); + std::process::exit(1); + } + + Ok(()) +} + +/// Analyzes deserialized rustdoc JSON index for document overlaps on crate re-exports (`pub use`). +fn detect_duplicate_reexports(index: &serde_json::Map) -> i32 { + let mut error_count = 0; + + for (id, item) in index { + let docs = item.get("docs").and_then(|d| d.as_str()).unwrap_or(""); + if docs.is_empty() { + continue; + } + + // We only care about re-exports + let Some(inner_use) = item.get("inner").and_then(|inr| inr.get("use")) else { + continue; + }; + + let target_id = inner_use + .get("id") + .map(|v| { + v.as_str() + .map(String::from) + .or_else(|| v.as_i64().map(|n| n.to_string())) + .unwrap_or_default() + }) + .unwrap_or_default(); + + if target_id.is_empty() { + continue; + } + + let name = inner_use.get("name").and_then(|n| n.as_str()).unwrap_or("unnamed"); + let source = inner_use.get("source").and_then(|s| s.as_str()).unwrap_or("unknown"); + + println!("\nFound documented re-export: {} (source: {}, ID: {})", name, source, id); + + if let Some(target_item) = index.get(&target_id) { + let target_docs = target_item.get("docs").and_then(|d| d.as_str()).unwrap_or(""); + if !target_docs.is_empty() && !docs.ends_with("\n\n") { + println!(" ERROR: Both re-export and target have documentation, and re-export docs do not end with double newline!"); + println!(" Re-export docs: {}", docs); + println!(" Target docs: {}", target_docs); + error_count += 1; + } + } else if !docs.ends_with("\n\n") { + println!(" WARNING: Documented re-export of external item (target ID {} not in index), and re-export docs do not end with double newline.", target_id); + println!(" Potential duplicate if target has docs."); + println!(" Re-export docs: {}", docs); + } + } + + error_count +}