From 59ae867c71e4d78f1e45ab69d52b9e8b33a61645 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 23 Feb 2026 00:11:43 +0800 Subject: [PATCH 1/3] feat(doclinks): validate and resolve .md links, fix broken doc links Add Pattern 3 to doclinks that validates existing .md links in docs: - Resolves relative .md paths to absolute links - Validates absolute .md links exist on disk - Falls back to doc_index search for broken links with disambiguation - Handles index.md files by searching parent dir name - Scores candidates by directory overlap + filename match Delete duplicate docs/usage/transports.md (identical to transports/index.md). Fixes 5 broken links across docs/ (agents/docs/index.md, capabilities/ navigation/readme.md, usage/lcm.md). --- dimos/utils/docs/doclinks.py | 140 +++++- dimos/utils/docs/test_doclinks.py | 170 +++++++ docs/agents/docs/index.md | 8 +- docs/agents/pr.md | 51 ++ docs/capabilities/navigation/readme.md | 4 +- docs/platforms/quadruped/go2/index.md | 14 +- docs/usage/data_streams/README.md | 10 +- docs/usage/data_streams/advanced_streams.md | 4 +- docs/usage/data_streams/temporal_alignment.md | 4 +- docs/usage/lcm.md | 2 +- docs/usage/sensor_streams/README.md | 10 +- docs/usage/sensor_streams/advanced_streams.md | 4 +- .../sensor_streams/temporal_alignment.md | 4 +- docs/usage/transports.md | 437 ------------------ docs/usage/transports/index.md | 4 +- 15 files changed, 394 insertions(+), 472 deletions(-) create mode 100644 docs/agents/pr.md delete mode 100644 docs/usage/transports.md diff --git a/dimos/utils/docs/doclinks.py b/dimos/utils/docs/doclinks.py index 67d5897b28..7f0665d41d 100644 --- a/dimos/utils/docs/doclinks.py +++ b/dimos/utils/docs/doclinks.py @@ -151,6 +151,35 @@ def extract_other_backticks(line: str, file_ref: str) -> list[str]: return [m for m in matches if m != file_ref and not m.endswith(".py") and "/" not in m] +def score_path_similarity(candidate: Path, original_path: str) -> int: + """Score how well a candidate matches the original link's path. + + Counts common directory names plus a bonus for matching filename. + Higher = better match. + """ + orig = Path(original_path) + orig_dirs = set(orig.parent.parts) + cand_dirs = set(candidate.parent.parts) + score = len(orig_dirs & cand_dirs) + if candidate.name == orig.name: + score += 1 + return score + + +def pick_best_candidate(candidates: list[Path], original_path: str) -> Path | None: + """Pick the best candidate by path similarity. Returns None if tied.""" + if not candidates: + return None + if len(candidates) == 1: + return candidates[0] + scored = sorted(candidates, key=lambda c: score_path_similarity(c, original_path), reverse=True) + top = score_path_similarity(scored[0], original_path) + second = score_path_similarity(scored[1], original_path) + if top > second: + return scored[0] + return None # Ambiguous tie + + def generate_link( rel_path: Path, root: Path, @@ -254,6 +283,9 @@ def process_markdown( # Pattern 2: [Text](.md) - doc file links doc_pattern = r"\[([^\]]+)\]\(\.md\)" + # Pattern 3: [Text](path.md) or [Text](path.md#fragment) - existing .md links to verify/resolve + md_link_pattern = r"\[([^\]]+)\]\(([^)]+\.md(?:#[^)]*)?)\)" + def replace_code_match(match: re.Match[str]) -> str: file_ref = match.group(1) current_link = match.group(2) @@ -341,15 +373,120 @@ def replace_doc_match(match: re.Match[str]) -> str: return new_match + def replace_md_link_match(match: re.Match[str]) -> str: + """Verify and resolve relative .md links to absolute paths.""" + link_text = match.group(1) + raw_link = match.group(2) + full_match = match.group(0) + + # Skip backtick-wrapped text (code links handled by code_pattern) + if link_text.startswith("`") and link_text.endswith("`"): + return full_match + + # Skip URLs + if raw_link.startswith(("http://", "https://")): + return full_match + + # Extract fragment if present + fragment = "" + link_path = raw_link + if "#" in raw_link: + link_path, frag = raw_link.split("#", 1) + fragment = "#" + frag + + # Skip bare .md placeholder (handled by doc_pattern) + if link_path == ".md": + return full_match + + # Already absolute link - validate only + if link_path.startswith("/"): + target = root / link_path.lstrip("/") + if target.exists(): + return full_match # Valid, leave as-is + # Broken absolute link - try search + stem = Path(link_path).stem.lower() + if stem == "index": + stem = Path(link_path).parent.name.lower() + candidates = doc_index.get(stem, []) if doc_index else [] + if len(candidates) == 1: + resolved = candidates[0] + elif len(candidates) > 1: + resolved = pick_best_candidate(candidates, link_path.lstrip("/")) + else: + resolved = None + if resolved is not None: + new_link = generate_link( + resolved, root, doc_path, link_mode, github_url, github_ref, fragment + ) + changes.append(f" {link_text}: {raw_link} -> {new_link} (fixed broken link)") + return f"[{link_text}]({new_link})" + if len(candidates) > 1: + errors.append( + f"Broken link '{raw_link}': multiple docs match '{stem}': " + f"{[str(c) for c in candidates]}" + ) + else: + errors.append(f"Broken link: '{raw_link}' does not exist") + return full_match + + # Relative link - resolve from doc file's directory + doc_dir = doc_path.parent + resolved = (doc_dir / link_path).resolve() + + try: + rel_to_root = resolved.relative_to(root) + except ValueError: + errors.append(f"Link '{raw_link}' resolves outside repo root") + return full_match + + if resolved.exists(): + # File exists - convert to appropriate link format + new_link = generate_link( + rel_to_root, root, doc_path, link_mode, github_url, github_ref, fragment + ) + result = f"[{link_text}]({new_link})" + if result != full_match: + changes.append(f" {link_text}: {raw_link} -> {new_link}") + return result + else: + # Target doesn't exist - try searching by doc name + stem = Path(link_path).stem.lower() + if stem == "index": + stem = Path(link_path).parent.name.lower() + candidates = doc_index.get(stem, []) if doc_index else [] + if len(candidates) == 1: + resolved_doc = candidates[0] + elif len(candidates) > 1: + resolved_doc = pick_best_candidate(candidates, raw_link) + else: + resolved_doc = None + if resolved_doc is not None: + new_link = generate_link( + resolved_doc, root, doc_path, link_mode, github_url, github_ref, fragment + ) + changes.append(f" {link_text}: {raw_link} -> {new_link} (found by search)") + return f"[{link_text}]({new_link})" + if len(candidates) > 1: + errors.append( + f"Broken link '{raw_link}': multiple docs match '{stem}': " + f"{[str(c) for c in candidates]}" + ) + else: + errors.append( + f"Broken link '{raw_link}': target not found, no doc matching '{stem}'" + ) + return full_match + # Split by ignore regions and only process non-ignored parts regions = split_by_ignore_regions(content) result_parts = [] for region_content, should_process in regions: if should_process: - # Process code links first, then doc links + # Process code links first, then doc links, then .md link verification processed = re.sub(code_pattern, replace_code_match, region_content) processed = re.sub(doc_pattern, replace_doc_match, processed) + processed = re.sub(md_link_pattern, replace_md_link_match, processed) result_parts.append(processed) else: result_parts.append(region_content) @@ -377,6 +514,7 @@ def collect_markdown_files(paths: list[str]) -> list[Path]: Also auto-links symbols: `Configurable` on same line adds #L fragment. Supports doc-to-doc linking: [Modules](.md) resolves to modules.md or modules/index.md. +Verifies existing .md links and fixes broken relative/absolute paths by searching docs. Usage: doclinks [options] diff --git a/dimos/utils/docs/test_doclinks.py b/dimos/utils/docs/test_doclinks.py index f1303a2245..4cd09e0504 100644 --- a/dimos/utils/docs/test_doclinks.py +++ b/dimos/utils/docs/test_doclinks.py @@ -21,7 +21,9 @@ build_file_index, extract_other_backticks, find_symbol_line, + pick_best_candidate, process_markdown, + score_path_similarity, split_by_ignore_regions, ) import pytest @@ -520,5 +522,173 @@ def test_ignores_doc_links_in_region(self, file_index, doc_index): assert "[Configuration](.md) example" in new_content +class TestPathSimilarity: + def test_exact_dir_match(self): + """Same directory components should give high score.""" + candidate = Path("docs/agents/docs/codeblocks.md") + score = score_path_similarity(candidate, "docs/agents/docs_agent/codeblocks.md") + assert score >= 2 # docs, agents + + def test_partial_match(self): + """Some shared dirs should give partial score.""" + candidate = Path("docs/other/codeblocks.md") + score = score_path_similarity(candidate, "docs/agents/docs_agent/codeblocks.md") + assert score == 2 # docs dir + filename match + + def test_no_match(self): + """Unrelated dirs should give filename-only score.""" + candidate = Path("src/lib/codeblocks.md") + score = score_path_similarity(candidate, "docs/agents/docs_agent/codeblocks.md") + assert score == 1 # filename match only, no dir overlap + + def test_pick_best_single(self): + """Single candidate always wins.""" + candidates = [Path("docs/agents/docs/codeblocks.md")] + best = pick_best_candidate(candidates, "docs/agents/docs_agent/codeblocks.md") + assert best == candidates[0] + + def test_pick_best_disambiguates(self): + """Should pick candidate with more directory overlap.""" + candidates = [ + Path("docs/other/codeblocks.md"), + Path("docs/agents/docs/codeblocks.md"), + ] + best = pick_best_candidate(candidates, "docs/agents/docs_agent/codeblocks.md") + assert best == Path("docs/agents/docs/codeblocks.md") + + def test_pick_best_tie_returns_none(self): + """Tied scores should return None.""" + candidates = [ + Path("a/x/file.md"), + Path("b/x/file.md"), + ] + best = pick_best_candidate(candidates, "c/x/file.md") + assert best is None + + +class TestMdLinkResolution: + def _process(self, content, file_index, doc_index, doc_path=None, link_mode="absolute"): + if doc_path is None: + doc_path = REPO_ROOT / "docs/usage/test.md" + return process_markdown( + content, + REPO_ROOT, + doc_path, + file_index, + link_mode=link_mode, + github_url=None, + github_ref="main", + doc_index=doc_index, + ) + + def test_resolves_relative_md_link(self, file_index, doc_index): + """Should resolve a valid relative .md link to absolute path.""" + # docs/usage/configuration.md exists — link from docs/usage/test.md + content = "[Configuration](configuration.md)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 0 + assert "configuration.md" in new_content + + def test_validates_absolute_md_link(self, file_index, doc_index): + """Valid absolute .md link should be left unchanged.""" + content = "[Configuration](/docs/usage/configuration.md)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 0 + assert new_content == content + + def test_reports_broken_absolute_md_link(self, file_index, doc_index): + """Broken absolute .md link with no match should error.""" + content = "[Foo](/docs/nonexistent/xyzzy_no_match.md)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 1 + assert "Broken link" in errors[0] or "does not exist" in errors[0] + + def test_searches_broken_relative_link(self, file_index, doc_index): + """Broken relative .md link should be resolved by name search if unique.""" + # Link to a non-existent relative path, but stem matches a known doc + content = "[Configuration](../nonexistent/configuration.md)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + # Should resolve via search fallback (configuration.md exists) + if "configuration" in doc_index and len(doc_index["configuration"]) == 1: + assert len(errors) == 0 + assert len(changes) == 1 + assert "found by search" in changes[0] + else: + # Multiple matches — disambiguation should kick in + assert len(errors) <= 1 + + def test_disambiguates_by_path_similarity(self, file_index, doc_index): + """Multiple candidates should be disambiguated by directory overlap.""" + # Build a custom doc_index with multiple candidates + from collections import defaultdict + + custom_doc_index: dict[str, list[Path]] = defaultdict(list) + custom_doc_index["testdoc"] = [ + Path("docs/other/testdoc.md"), + Path("docs/agents/docs/testdoc.md"), + ] + + content = "[TestDoc](../agents/docs_agent/testdoc.md)" + doc_path = REPO_ROOT / "docs/usage/test.md" + new_content, changes, errors = process_markdown( + content, + REPO_ROOT, + doc_path, + file_index, + link_mode="absolute", + github_url=None, + github_ref="main", + doc_index=custom_doc_index, + ) + + # Should pick docs/agents/docs/testdoc.md (shares "docs", "agents") + assert len(errors) == 0 + assert len(changes) == 1 + assert "agents/docs/testdoc.md" in new_content + + def test_skips_url_md_links(self, file_index, doc_index): + """HTTP(S) .md links should be left untouched.""" + content = "[External](https://example.com/guide.md)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 0 + assert len(changes) == 0 + assert new_content == content + + def test_preserves_fragment(self, file_index, doc_index): + """Fragment (#section) should be preserved in resolved link.""" + content = "[Config](configuration.md#advanced)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert "#advanced" in new_content + + def test_skips_backtick_wrapped(self, file_index, doc_index): + """Backtick-wrapped .md link text should be skipped by md_link_pattern.""" + content = "[`configuration.md`](configuration.md)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + # The code_pattern handles backtick links; md_link_pattern sees backticks and skips + # No double-processing should occur + assert "configuration.md" in new_content + + def test_md_links_in_ignore_region(self, file_index, doc_index): + """Links in ignore regions should not be processed.""" + content = ( + "[Configuration](configuration.md)\n" + "\n" + "[Configuration](broken_nonexistent.md)\n" + "\n" + "[Configuration](configuration.md)" + ) + new_content, changes, errors = self._process(content, file_index, doc_index) + + # The broken link in ignore region should not produce errors + assert "broken_nonexistent.md" in new_content # Preserved as-is + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/docs/agents/docs/index.md b/docs/agents/docs/index.md index bec2ce79e6..09dabad7ee 100644 --- a/docs/agents/docs/index.md +++ b/docs/agents/docs/index.md @@ -8,7 +8,7 @@ If you're showing an API usage pattern, create a minimal working example that ac After writing a code block in your markdown file, you can run it by executing `md-babel-py run document.md` -more information on this tool is in [codeblocks](/docs/agents/docs_agent/codeblocks.md) +more information on this tool is in [codeblocks](/docs/agents/docs/codeblocks.md) # Code or Docs Links @@ -40,15 +40,15 @@ The `Configurable` class is defined in [`service/spec.py`](/dimos/protocol/servi ### Doc-to-doc references Use `.md` as the link target: ```markdown -See [Configuration](/docs/api/configuration.md) for more details. +See [Configuration](/docs/usage/configuration.md) for more details. ``` Becomes: ```markdown -See [Configuration](/docs/concepts/configuration.md) for more details. +See [Configuration](/docs/usage/configuration.md) for more details. ``` -More information on this in [doclinks](/docs/agents/docs_agent/doclinks.md) +More information on this in [doclinks](/docs/agents/docs/doclinks.md) # Pikchr diff --git a/docs/agents/pr.md b/docs/agents/pr.md new file mode 100644 index 0000000000..79522b8bae --- /dev/null +++ b/docs/agents/pr.md @@ -0,0 +1,51 @@ +# Opening PRs + +## Finding the Linear issue + +When opening a PR for a GitHub issue, find the corresponding Linear issue to reference. +Issues are cloned automatically so titles match exactly. + +1. Get the GitHub issue title: + ``` + gh issue view --repo dimensionalOS/dimos --json title --jq .title + ``` +2. Search Linear using the `@tacticlaunch/mcp-linear` MCP server (`linear_searchIssues` tool): + ``` + linear_searchIssues query="" limit=10 + ``` +3. Match by **exact title** (not substring/fuzzy). If a match is found, use its identifier (e.g. `DIM-569`) in the PR description with `Closes DIM-XXX`. +4. If no matching Linear issue is found, write `No matching Linear issue found` instead of `Closes DIM-XXX`. + +--- + +Below follows the PR description template you should use. + +## Problem + + + + +Closes DIM-XXX + +## Solution + + + + +## Breaking Changes + + + + + +## How to Test + + + +## Contributor License Agreement + +- [ ] I have read and approved the [CLA](https://github.com/dimensionalOS/dimos/blob/main/CLA.md). diff --git a/docs/capabilities/navigation/readme.md b/docs/capabilities/navigation/readme.md index af26c07f94..f36d795e62 100644 --- a/docs/capabilities/navigation/readme.md +++ b/docs/capabilities/navigation/readme.md @@ -3,8 +3,8 @@ ## Non-ROS -- [Go2 Navigation](native/index.md) — column-carving voxel mapping + slope-based costmap +- [Go2 Navigation](/docs/capabilities/navigation/native/index.md) — column-carving voxel mapping + slope-based costmap ## ROS -See [ROS Transports](/docs/api/transports.md) for bridging DimOS streams to ROS topics. +See [ROS Transports](/docs/usage/transports/index.md) for bridging DimOS streams to ROS topics. diff --git a/docs/platforms/quadruped/go2/index.md b/docs/platforms/quadruped/go2/index.md index 40f32bcdd2..07e3784404 100644 --- a/docs/platforms/quadruped/go2/index.md +++ b/docs/platforms/quadruped/go2/index.md @@ -11,9 +11,9 @@ The Unitree Go2 is DimOS's primary reference platform. Full autonomous navigatio ## Install First, install system dependencies for your platform: -- [Ubuntu](../../../installation/ubuntu.md) -- [macOS](../../../installation/osx.md) -- [Nix](../../../installation/nix.md) +- [Ubuntu](/docs/installation/ubuntu.md) +- [macOS](/docs/installation/osx.md) +- [Nix](/docs/installation/nix.md) Then install DimOS: @@ -106,8 +106,8 @@ The agent subscribes to camera, LiDAR, and spatial memory streams — it sees wh ## Deep Dive -- [Navigation Stack](../../../capabilities/navigation/native/index.md) — column-carving voxel mapping, costmap generation, A* planning -- [Visualization](../../../usage/visualization.md) — Rerun, Foxglove, performance tuning +- [Navigation Stack](/docs/capabilities/navigation/native/index.md) — column-carving voxel mapping, costmap generation, A* planning +- [Visualization](/docs/usage/visualization.md) — Rerun, Foxglove, performance tuning - [Data Streams](../../../usage/data_streams/) — RxPY streams, backpressure, quality filtering -- [Transports](../../../usage/transports/index.md) — LCM, SHM, DDS -- [Blueprints](../../../usage/blueprints.md) — composing modules +- [Transports](/docs/usage/transports/index.md) — LCM, SHM, DDS +- [Blueprints](/docs/usage/blueprints.md) — composing modules diff --git a/docs/usage/data_streams/README.md b/docs/usage/data_streams/README.md index dc2ce6c91d..870c25fb34 100644 --- a/docs/usage/data_streams/README.md +++ b/docs/usage/data_streams/README.md @@ -6,11 +6,11 @@ Dimos uses reactive streams (RxPY) to handle sensor data. This approach naturall | Guide | Description | |----------------------------------------------|---------------------------------------------------------------| -| [ReactiveX Fundamentals](reactivex.md) | Observables, subscriptions, and disposables | -| [Advanced Streams](advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters | -| [Quality-Based Filtering](quality_filter.md) | Select highest quality frames when downsampling streams | -| [Temporal Alignment](temporal_alignment.md) | Match messages from multiple sensors by timestamp | -| [Storage & Replay](storage_replay.md) | Record sensor streams to disk and replay with original timing | +| [ReactiveX Fundamentals](/docs/usage/data_streams/reactivex.md) | Observables, subscriptions, and disposables | +| [Advanced Streams](/docs/usage/data_streams/advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters | +| [Quality-Based Filtering](/docs/usage/data_streams/quality_filter.md) | Select highest quality frames when downsampling streams | +| [Temporal Alignment](/docs/usage/data_streams/temporal_alignment.md) | Match messages from multiple sensors by timestamp | +| [Storage & Replay](/docs/usage/data_streams/storage_replay.md) | Record sensor streams to disk and replay with original timing | ## Quick Example diff --git a/docs/usage/data_streams/advanced_streams.md b/docs/usage/data_streams/advanced_streams.md index 187d432af2..e9d9f1d12d 100644 --- a/docs/usage/data_streams/advanced_streams.md +++ b/docs/usage/data_streams/advanced_streams.md @@ -1,6 +1,6 @@ # Advanced Stream Handling -> **Prerequisite:** Read [ReactiveX Fundamentals](reactivex.md) first for Observable basics. +> **Prerequisite:** Read [ReactiveX Fundamentals](/docs/usage/data_streams/reactivex.md) first for Observable basics. ## Backpressure and Parallel Subscribers to Hardware @@ -126,7 +126,7 @@ class MLModel(Module): Sometimes you don't want a stream, you just want to call a function and get the latest value. -If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](reactivex.md) and [official docs](https://rxpy.readthedocs.io/) +If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](/docs/usage/data_streams/reactivex.md) and [official docs](https://rxpy.readthedocs.io/) (TODO we should actually make this example actually executable) diff --git a/docs/usage/data_streams/temporal_alignment.md b/docs/usage/data_streams/temporal_alignment.md index 66230c9d54..c428c04e2e 100644 --- a/docs/usage/data_streams/temporal_alignment.md +++ b/docs/usage/data_streams/temporal_alignment.md @@ -34,7 +34,7 @@ Below we set up replay of real camera and lidar data from the Unitree Go2 robot.
Stream Setup -You can read more about [sensor storage here](storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md). +You can read more about [sensor storage here](/docs/usage/data_streams/storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md). ```python session=align no-result from reactivex import Subject @@ -196,7 +196,7 @@ plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ## Combine Frame Alignment with a Quality Filter -More on [quality filtering here](quality_filter.md). +More on [quality filtering here](/docs/usage/data_streams/quality_filter.md). ```python session=align from dimos.msgs.sensor_msgs.Image import Image, sharpness_barrier diff --git a/docs/usage/lcm.md b/docs/usage/lcm.md index 99437a2458..d089cfcdd3 100644 --- a/docs/usage/lcm.md +++ b/docs/usage/lcm.md @@ -7,7 +7,7 @@ The LCM project provides pubsub clients and code generators for many languages. Our messages are ported from ROS (they are structurally compatible in order to facilitate easy communication to ROS if needed) Repo that hosts our message definitions and autogenerators is at [dimos-lcm](https://github.com/dimensionalOS/dimos-lcm/) -our LCM implementation significantly [outperforms ROS for local communication](/docs/usage/transports.md#benchmarks) +our LCM implementation significantly [outperforms ROS for local communication](/docs/usage/transports/index.md#benchmarks) ## Supported languages diff --git a/docs/usage/sensor_streams/README.md b/docs/usage/sensor_streams/README.md index dc2ce6c91d..0bf61e98ef 100644 --- a/docs/usage/sensor_streams/README.md +++ b/docs/usage/sensor_streams/README.md @@ -6,11 +6,11 @@ Dimos uses reactive streams (RxPY) to handle sensor data. This approach naturall | Guide | Description | |----------------------------------------------|---------------------------------------------------------------| -| [ReactiveX Fundamentals](reactivex.md) | Observables, subscriptions, and disposables | -| [Advanced Streams](advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters | -| [Quality-Based Filtering](quality_filter.md) | Select highest quality frames when downsampling streams | -| [Temporal Alignment](temporal_alignment.md) | Match messages from multiple sensors by timestamp | -| [Storage & Replay](storage_replay.md) | Record sensor streams to disk and replay with original timing | +| [ReactiveX Fundamentals](/docs/usage/sensor_streams/reactivex.md) | Observables, subscriptions, and disposables | +| [Advanced Streams](/docs/usage/sensor_streams/advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters | +| [Quality-Based Filtering](/docs/usage/sensor_streams/quality_filter.md) | Select highest quality frames when downsampling streams | +| [Temporal Alignment](/docs/usage/sensor_streams/temporal_alignment.md) | Match messages from multiple sensors by timestamp | +| [Storage & Replay](/docs/usage/sensor_streams/storage_replay.md) | Record sensor streams to disk and replay with original timing | ## Quick Example diff --git a/docs/usage/sensor_streams/advanced_streams.md b/docs/usage/sensor_streams/advanced_streams.md index 187d432af2..c2cd0dbfca 100644 --- a/docs/usage/sensor_streams/advanced_streams.md +++ b/docs/usage/sensor_streams/advanced_streams.md @@ -1,6 +1,6 @@ # Advanced Stream Handling -> **Prerequisite:** Read [ReactiveX Fundamentals](reactivex.md) first for Observable basics. +> **Prerequisite:** Read [ReactiveX Fundamentals](/docs/usage/sensor_streams/reactivex.md) first for Observable basics. ## Backpressure and Parallel Subscribers to Hardware @@ -126,7 +126,7 @@ class MLModel(Module): Sometimes you don't want a stream, you just want to call a function and get the latest value. -If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](reactivex.md) and [official docs](https://rxpy.readthedocs.io/) +If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](/docs/usage/sensor_streams/reactivex.md) and [official docs](https://rxpy.readthedocs.io/) (TODO we should actually make this example actually executable) diff --git a/docs/usage/sensor_streams/temporal_alignment.md b/docs/usage/sensor_streams/temporal_alignment.md index 66230c9d54..7d1ad074f2 100644 --- a/docs/usage/sensor_streams/temporal_alignment.md +++ b/docs/usage/sensor_streams/temporal_alignment.md @@ -34,7 +34,7 @@ Below we set up replay of real camera and lidar data from the Unitree Go2 robot.
Stream Setup -You can read more about [sensor storage here](storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md). +You can read more about [sensor storage here](/docs/usage/sensor_streams/storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md). ```python session=align no-result from reactivex import Subject @@ -196,7 +196,7 @@ plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}') ## Combine Frame Alignment with a Quality Filter -More on [quality filtering here](quality_filter.md). +More on [quality filtering here](/docs/usage/sensor_streams/quality_filter.md). ```python session=align from dimos.msgs.sensor_msgs.Image import Image, sharpness_barrier diff --git a/docs/usage/transports.md b/docs/usage/transports.md deleted file mode 100644 index 4c80776531..0000000000 --- a/docs/usage/transports.md +++ /dev/null @@ -1,437 +0,0 @@ -# Transports - -Transports connect **module streams** across **process boundaries** and/or **networks**. - -* **Module**: a running component (e.g., camera, mapping, nav). -* **Stream**: a unidirectional flow of messages owned by a module (one broadcaster → many receivers). -* **Topic**: the name/identifier used by a transport or pubsub backend. -* **Message**: payload carried on a stream (often `dimos.msgs.*`, but can be bytes / images / pointclouds / etc.). - -Each edge in the graph is a **transported stream** (potentially different protocols). Each node is a **module**: - -![go2_nav](assets/go2_nav.svg) - -## What the transport layer guarantees (and what it doesn’t) - -Modules **don’t** know or care *how* data moves. They just: - -* emit messages (broadcast) -* subscribe to messages (receive) - -A transport is responsible for the mechanics of delivery (IPC, sockets, Redis, ROS 2, etc.). - -**Important:** delivery semantics depend on the backend: - -* Some are **best-effort** (e.g., UDP multicast / LCM): loss can happen. -* Some can be **reliable** (e.g., TCP-backed, Redis, some DDS configs) but may add latency/backpressure. - -So: treat the API as uniform, but pick a backend whose semantics match the task. - ---- - -## Benchmarks - -Quick view on performance of our pubsub backends: - -```sh skip -python -m pytest -svm tool -k "not bytes" dimos/protocol/pubsub/benchmark/test_benchmark.py -``` - -![Benchmark results](assets/pubsub_benchmark.png) - ---- - -## Abstraction layers - -
Pikchr - -```pikchr output=assets/abstraction_layers.svg fold -color = white -fill = none -linewid = 0.5in -boxwid = 1.0in -boxht = 0.4in - -# Boxes with labels -B: box "Blueprints" rad 10px -arrow -M: box "Modules" rad 5px -arrow -T: box "Transports" rad 5px -arrow -P: box "PubSub" rad 5px - -# Descriptions below -text "robot configs" at B.s + (0.1, -0.2in) -text "camera, nav" at M.s + (0, -0.2in) -text "LCM, SHM, ROS" at T.s + (0, -0.2in) -text "pub/sub API" at P.s + (0, -0.2in) -``` - -
- - -![output](assets/abstraction_layers.svg) - -We’ll go through these layers top-down. - ---- - -## Using transports with blueprints - -See [Blueprints](blueprints.md) for the blueprint API. - -From [`unitree/go2/blueprints/__init__.py`](/dimos/robot/unitree/go2/blueprints/__init__.py). - -Example: rebind a few streams from the default `LCMTransport` to `ROSTransport` (defined at [`transport.py`](/dimos/core/transport.py#L226)) so you can visualize in **rviz2**. - -```python skip -nav = autoconnect( - basic, - voxel_mapper(voxel_size=0.1), - cost_mapper(), - replanning_a_star_planner(), - wavefront_frontier_explorer(), -).global_config(n_dask_workers=6, robot_model="unitree_go2") - -ros = nav.transports( - { - ("lidar", PointCloud2): ROSTransport("lidar", PointCloud2), - ("global_map", PointCloud2): ROSTransport("global_map", PointCloud2), - ("odom", PoseStamped): ROSTransport("odom", PoseStamped), - ("color_image", Image): ROSTransport("color_image", Image), - } -) -``` - ---- - -## Using transports with modules - -Each **stream** on a module can use a different transport. Set `.transport` on the stream **before starting** modules. - -```python ansi=false -import time - -from dimos.core import In, Module, start -from dimos.core.transport import LCMTransport -from dimos.hardware.sensors.camera.module import CameraModule -from dimos.msgs.sensor_msgs import Image - - -class ImageListener(Module): - image: In[Image] - - def start(self): - super().start() - self.image.subscribe(lambda img: print(f"Received: {img.shape}")) - - -if __name__ == "__main__": - # Start local cluster and deploy modules to separate processes - dimos = start(2) - - camera = dimos.deploy(CameraModule, frequency=2.0) - listener = dimos.deploy(ImageListener) - - # Choose a transport for the stream (example: LCM typed channel) - camera.color_image.transport = LCMTransport("/camera/rgb", Image) - - # Connect listener input to camera output - listener.image.connect(camera.color_image) - - camera.start() - listener.start() - - time.sleep(2) - dimos.stop() -``` - - - -``` -Initialized dimos local cluster with 2 workers, memory limit: auto -2026-01-24T13:17:50.190559Z [info ] Deploying module. [dimos/core/__init__.py] module=CameraModule -2026-01-24T13:17:50.218466Z [info ] Deployed module. [dimos/core/__init__.py] module=CameraModule worker_id=1 -2026-01-24T13:17:50.229474Z [info ] Deploying module. [dimos/core/__init__.py] module=ImageListener -2026-01-24T13:17:50.250199Z [info ] Deployed module. [dimos/core/__init__.py] module=ImageListener worker_id=0 -Received: (480, 640, 3) -Received: (480, 640, 3) -Received: (480, 640, 3) -``` - -See [Modules](modules.md) for more on module architecture. - ---- - -## Inspecting LCM traffic (CLI) - -`lcmspy` shows topic frequency/bandwidth stats: - -![lcmspy](assets/lcmspy.png) - -`dimos topic echo /topic` listens on typed channels like `/topic#pkg.Msg` and decodes automatically: - -```sh skip -Listening on /camera/rgb (inferring from typed LCM channels like '/camera/rgb#pkg.Msg')... (Ctrl+C to stop) -Image(shape=(480, 640, 3), format=RGB, dtype=uint8, dev=cpu, ts=2026-01-24 20:28:59) -``` - ---- - -## Implementing a transport - -At the stream layer, a transport is implemented by subclassing `Transport` (see [`core/stream.py`](/dimos/core/stream.py#L83)) and implementing: - -* `broadcast(...)` -* `subscribe(...)` - -Your `Transport.__init__` args can be anything meaningful for your backend: - -* `(ip, port)` -* a shared-memory segment name -* a filesystem path -* a Redis channel - -Encoding is an implementation detail, but we encourage using LCM-compatible message types when possible. - -### Encoding helpers - -Many of our message types provide `lcm_encode` / `lcm_decode` for compact, language-agnostic binary encoding (often faster than pickle). For details, see [LCM](/docs/usage/lcm.md). - ---- - -## PubSub transports - -Even though transport can be anything (TCP connection, unix socket) for now all our transport backends implement the `PubSub` interface. - -* `publish(topic, message)` -* `subscribe(topic, callback) -> unsubscribe` - -```python -from dimos.protocol.pubsub.spec import PubSub -import inspect - -print(inspect.getsource(PubSub.publish)) -print(inspect.getsource(PubSub.subscribe)) -``` - - -```python - @abstractmethod - def publish(self, topic: TopicT, message: MsgT) -> None: - """Publish a message to a topic.""" - ... - - @abstractmethod - def subscribe( - self, topic: TopicT, callback: Callable[[MsgT, TopicT], None] - ) -> Callable[[], None]: - """Subscribe to a topic with a callback. returns unsubscribe function""" - ... -``` - -Topic/message types are flexible: bytes, JSON, or our ROS-compatible [LCM](/docs/usage/lcm.md) types. We also have pickle-based transports for arbitrary Python objects. - -### LCM (UDP multicast) - -LCM is UDP multicast. It’s very fast on a robot LAN, but it’s **best-effort** (packets can drop). -For local emission it autoconfigures system in a way in which it's more robust and faster then other more common protocols like ROS, DDS - -```python -from dimos.protocol.pubsub.lcmpubsub import LCM, Topic -from dimos.msgs.geometry_msgs import Vector3 - -lcm = LCM(autoconf=True) -lcm.start() - -received = [] -topic = Topic("/robot/velocity", Vector3) - -lcm.subscribe(topic, lambda msg, t: received.append(msg)) -lcm.publish(topic, Vector3(1.0, 0.0, 0.5)) - -import time -time.sleep(0.1) - -print(f"Received velocity: x={received[0].x}, y={received[0].y}, z={received[0].z}") -lcm.stop() -``` - - -``` -Received velocity: x=1.0, y=0.0, z=0.5 -``` - -### Shared memory (IPC) - -Shared memory is highest performance, but only works on the **same machine**. - -```python -from dimos.protocol.pubsub.shmpubsub import PickleSharedMemory - -shm = PickleSharedMemory(prefer="cpu") -shm.start() - -received = [] -shm.subscribe("test/topic", lambda msg, topic: received.append(msg)) -shm.publish("test/topic", {"data": [1, 2, 3]}) - -import time -time.sleep(0.1) - -print(f"Received: {received}") -shm.stop() -``` - - -``` -Received: [{'data': [1, 2, 3]}] -``` - -### DDS Transport - -For network communication, DDS uses the Data Distribution Service (DDS) protocol: - -```python session=dds_demo ansi=false -from dataclasses import dataclass -from cyclonedds.idl import IdlStruct - -from dimos.protocol.pubsub.impl.ddspubsub import DDS, Topic - -@dataclass -class SensorReading(IdlStruct): - value: float - -dds = DDS() -dds.start() - -received = [] -sensor_topic = Topic(name="sensors/temperature", data_type=SensorReading) - -dds.subscribe(sensor_topic, lambda msg, t: received.append(msg)) -dds.publish(sensor_topic, SensorReading(value=22.5)) - -import time -time.sleep(0.1) - -print(f"Received: {received}") -dds.stop() -``` - - -``` -Received: [SensorReading(value=22.5)] -``` - ---- - -## A minimal transport: `Memory` - -The simplest toy backend is `Memory` (single process). Start from there when implementing a new pubsub backend. - -```python -from dimos.protocol.pubsub.memory import Memory - -bus = Memory() -received = [] - -unsubscribe = bus.subscribe("sensor/data", lambda msg, topic: received.append(msg)) - -bus.publish("sensor/data", {"temperature": 22.5}) -bus.publish("sensor/data", {"temperature": 23.0}) - -print(f"Received {len(received)} messages:") -for msg in received: - print(f" {msg}") - -unsubscribe() -``` - - -``` -Received 2 messages: - {'temperature': 22.5} - {'temperature': 23.0} -``` - -See [`memory.py`](/dimos/protocol/pubsub/impl/memory.py) for the complete source. - ---- - -## Encode/decode mixins - -Transports often need to serialize messages before sending and deserialize after receiving. - -`PubSubEncoderMixin` at [`pubsub/spec.py`](/dimos/protocol/pubsub/spec.py#L95) provides a clean way to add encoding/decoding to any pubsub implementation. - -### Available mixins - -| Mixin | Encoding | Use case | -|----------------------|-----------------|------------------------------------| -| `PickleEncoderMixin` | Python pickle | Any Python object, Python-only | -| `LCMEncoderMixin` | LCM binary | Cross-language (C/C++/Python/Go/…) | -| `JpegEncoderMixin` | JPEG compressed | Image data, reduces bandwidth | - -`LCMEncoderMixin` is especially useful: you can use LCM message definitions with *any* transport (not just UDP multicast). See [LCM](/docs/usage/lcm.md) for details. - -### Creating a custom mixin - -```python session=jsonencoder no-result -from dimos.protocol.pubsub.spec import PubSubEncoderMixin -import json - -class JsonEncoderMixin(PubSubEncoderMixin[str, dict, bytes]): - def encode(self, msg: dict, topic: str) -> bytes: - return json.dumps(msg).encode("utf-8") - - def decode(self, msg: bytes, topic: str) -> dict: - return json.loads(msg.decode("utf-8")) -``` - -Combine with a pubsub implementation via multiple inheritance: - -```python session=jsonencoder no-result -from dimos.protocol.pubsub.memory import Memory - -class MyJsonPubSub(JsonEncoderMixin, Memory): - pass -``` - -Swap serialization by changing the mixin: - -```python session=jsonencoder no-result -from dimos.protocol.pubsub.spec import PickleEncoderMixin - -class MyPicklePubSub(PickleEncoderMixin, Memory): - pass -``` - ---- - -## Testing and benchmarks - -### Spec tests - -See [`pubsub/test_spec.py`](/dimos/protocol/pubsub/test_spec.py) for the grid tests your new backend should pass. - -### Benchmarks - -Add your backend to benchmarks to compare in context: - -```sh skip -python -m pytest -svm tool -k "not bytes" dimos/protocol/pubsub/benchmark/test_benchmark.py -``` - ---- - -# Available transports - -| Transport | Use case | Cross-process | Network | Notes | -|----------------|-------------------------------------|---------------|---------|--------------------------------------| -| `Memory` | Testing only, single process | No | No | Minimal reference impl | -| `SharedMemory` | Multi-process on same machine | Yes | No | Highest throughput (IPC) | -| `LCM` | Robot LAN broadcast (UDP multicast) | Yes | Yes | Best-effort; can drop packets on LAN | -| `Redis` | Network pubsub via Redis server | Yes | Yes | Central broker; adds hop | -| `ROS` | ROS 2 topic communication | Yes | Yes | Integrates with RViz/ROS tools | -| `DDS` | Cyclone DDS without ROS (WIP) | Yes | Yes | WIP | diff --git a/docs/usage/transports/index.md b/docs/usage/transports/index.md index 748cf03aa1..1c8745d117 100644 --- a/docs/usage/transports/index.md +++ b/docs/usage/transports/index.md @@ -79,7 +79,7 @@ We’ll go through these layers top-down. ## Using transports with blueprints -See [Blueprints](blueprints.md) for the blueprint API. +See [Blueprints](/docs/usage/blueprints.md) for the blueprint API. From [`unitree/go2/blueprints/__init__.py`](/dimos/robot/unitree/go2/blueprints/__init__.py). @@ -160,7 +160,7 @@ Received: (480, 640, 3) Received: (480, 640, 3) ``` -See [Modules](modules.md) for more on module architecture. +See [Modules](/docs/usage/modules.md) for more on module architecture. --- From 4a6848b94a761ecac9d319794c6a04c1ac8f3f7b Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 23 Feb 2026 00:26:29 +0800 Subject: [PATCH 2/3] refactor --- dimos/utils/docs/doclinks.py | 243 ++++++++++++++------------ dimos/utils/docs/test_doclinks.py | 87 ++++++++- docs/platforms/quadruped/go2/index.md | 2 +- 3 files changed, 221 insertions(+), 111 deletions(-) diff --git a/dimos/utils/docs/doclinks.py b/dimos/utils/docs/doclinks.py index 7f0665d41d..2cf5d1702f 100644 --- a/dimos/utils/docs/doclinks.py +++ b/dimos/utils/docs/doclinks.py @@ -30,6 +30,7 @@ import re import subprocess import sys +import time from typing import Any @@ -78,7 +79,7 @@ def get_git_tracked_files(root: Path) -> list[Path]: return [] -def build_file_index(root: Path) -> dict[str, list[Path]]: +def build_file_index(root: Path, tracked_files: list[Path] | None = None) -> dict[str, list[Path]]: """ Build an index mapping filename suffixes to full paths. @@ -89,7 +90,8 @@ def build_file_index(root: Path) -> dict[str, list[Path]]: - dimos/protocol/service/spec.py """ index: dict[str, list[Path]] = defaultdict(list) - tracked_files = get_git_tracked_files(root) + if tracked_files is None: + tracked_files = get_git_tracked_files(root) for rel_path in tracked_files: parts = rel_path.parts @@ -102,7 +104,7 @@ def build_file_index(root: Path) -> dict[str, list[Path]]: return index -def build_doc_index(root: Path) -> dict[str, list[Path]]: +def build_doc_index(root: Path, tracked_files: list[Path] | None = None) -> dict[str, list[Path]]: """ Build an index mapping lowercase doc names to .md file paths. @@ -113,7 +115,8 @@ def build_doc_index(root: Path) -> dict[str, list[Path]]: - "modules" -> [Path("docs/modules/index.md")] (if modules/index.md exists) """ index: dict[str, list[Path]] = defaultdict(list) - tracked_files = get_git_tracked_files(root) + if tracked_files is None: + tracked_files = get_git_tracked_files(root) for rel_path in tracked_files: if rel_path.suffix != ".md": @@ -144,11 +147,40 @@ def find_symbol_line(file_path: Path, symbol: str) -> int | None: return None +# Extensions that indicate a backticked term is a filename, not a symbol +_FILE_EXTENSIONS = frozenset( + ( + ".py", + ".md", + ".ts", + ".js", + ".go", + ".rs", + ".c", + ".h", + ".cpp", + ".hpp", + ".java", + ".rb", + ".yaml", + ".yml", + ".json", + ".toml", + ".sh", + ".lua", + ) +) + + def extract_other_backticks(line: str, file_ref: str) -> list[str]: """Extract other backticked terms from a line, excluding the file reference.""" pattern = r"`([^`]+)`" matches = re.findall(pattern, line) - return [m for m in matches if m != file_ref and not m.endswith(".py") and "/" not in m] + return [ + m + for m in matches + if m != file_ref and "/" not in m and not any(m.endswith(ext) for ext in _FILE_EXTENSIONS) + ] def score_path_similarity(candidate: Path, original_path: str) -> int: @@ -180,6 +212,15 @@ def pick_best_candidate(candidates: list[Path], original_path: str) -> Path | No return None # Ambiguous tie +def resolve_candidates(candidates: list[Path], original_path: str) -> Path | None: + """Resolve candidates to a single path. Returns None if 0 or ambiguous.""" + if len(candidates) == 1: + return candidates[0] + if len(candidates) > 1: + return pick_best_candidate(candidates, original_path) + return None + + def generate_link( rel_path: Path, root: Path, @@ -274,17 +315,32 @@ def process_markdown( Returns (new_content, changes, errors). """ - changes = [] - errors = [] + changes: list[str] = [] + errors: list[str] = [] - # Pattern 1: [`filename`](link) - code file links + # Pattern 1: [`filename`](link) - backtick code links with symbol auto-linking code_pattern = r"\[`([^`]+)`\]\(([^)]*)\)" - # Pattern 2: [Text](.md) - doc file links - doc_pattern = r"\[([^\]]+)\]\(\.md\)" + # Pattern 2: [Text](url) - all non-backtick, non-image links + # (? tuple[Path | None, list[Path]]: + """Search for a broken link's target by name in doc_index or file_index.""" + path = Path(link_path) + if path.suffix == ".md": + stem = path.stem.lower() + if stem == "index": + stem = path.parent.name.lower() + candidates = doc_index.get(stem, []) if doc_index else [] + elif path.suffix: + # Has a file extension — search file_index by filename + candidates = file_index.get(path.name, []) + else: + # No extension (likely a directory) — no fallback search + return None, [] + return resolve_candidates(candidates, original_ref), candidates def replace_code_match(match: re.Match[str]) -> str: file_ref = match.group(1) @@ -299,18 +355,19 @@ def replace_code_match(match: re.Match[str]) -> str: if "." not in file_ref and "/" not in file_ref: return full_match - # Look up in index + # Look up in index, with disambiguation candidates = file_index.get(file_ref, []) + resolved_path = resolve_candidates(candidates, file_ref) - if len(candidates) == 0: - errors.append(f"No file matching '{file_ref}' found in codebase") - return full_match - elif len(candidates) > 1: - errors.append(f"'{file_ref}' matches multiple files: {[str(c) for c in candidates]}") + if resolved_path is None: + if len(candidates) > 1: + errors.append( + f"'{file_ref}' matches multiple files: {[str(c) for c in candidates]}" + ) + else: + errors.append(f"No file matching '{file_ref}' found in codebase") return full_match - resolved_path = candidates[0] - # Determine line fragment line_fragment = "" @@ -345,46 +402,18 @@ def replace_code_match(match: re.Match[str]) -> str: return new_match - def replace_doc_match(match: re.Match[str]) -> str: - """Replace [Text](.md) with resolved doc path.""" - if doc_index is None: - return match.group(0) - - link_text = match.group(1) - full_match = match.group(0) - lookup_key = link_text.lower() - - # Look up in doc index - candidates = doc_index.get(lookup_key, []) - - if len(candidates) == 0: - errors.append(f"No doc matching '{link_text}' found") - return full_match - elif len(candidates) > 1: - errors.append(f"'{link_text}' matches multiple docs: {[str(c) for c in candidates]}") - return full_match - - resolved_path = candidates[0] - new_link = generate_link(resolved_path, root, doc_path, link_mode, github_url, github_ref) - new_match = f"[{link_text}]({new_link})" - - if new_match != full_match: - changes.append(f" {link_text}: .md -> {new_link}") - - return new_match - - def replace_md_link_match(match: re.Match[str]) -> str: - """Verify and resolve relative .md links to absolute paths.""" + def replace_link_match(match: re.Match[str]) -> str: + """Handle all non-backtick links: doc placeholders, path validation.""" link_text = match.group(1) raw_link = match.group(2) full_match = match.group(0) - # Skip backtick-wrapped text (code links handled by code_pattern) - if link_text.startswith("`") and link_text.endswith("`"): + # Skip URLs + if raw_link.startswith(("http://", "https://", "mailto:")): return full_match - # Skip URLs - if raw_link.startswith(("http://", "https://")): + # Skip anchor-only links + if raw_link.startswith("#"): return full_match # Extract fragment if present @@ -394,26 +423,37 @@ def replace_md_link_match(match: re.Match[str]) -> str: link_path, frag = raw_link.split("#", 1) fragment = "#" + frag - # Skip bare .md placeholder (handled by doc_pattern) + # .md placeholder: [Text](.md) → doc_index lookup by link text if link_path == ".md": + if doc_index is None: + return full_match + lookup_key = link_text.lower() + candidates = doc_index.get(lookup_key, []) + resolved = resolve_candidates(candidates, lookup_key) + if resolved is not None: + new_link = generate_link( + resolved, root, doc_path, link_mode, github_url, github_ref, fragment + ) + result = f"[{link_text}]({new_link})" + if result != full_match: + changes.append(f" {link_text}: .md -> {new_link}") + return result + if len(candidates) > 1: + errors.append( + f"'{link_text}' matches multiple docs: {[str(c) for c in candidates]}" + ) + else: + errors.append(f"No doc matching '{link_text}' found") return full_match - # Already absolute link - validate only + # Absolute path if link_path.startswith("/"): target = root / link_path.lstrip("/") if target.exists(): return full_match # Valid, leave as-is - # Broken absolute link - try search - stem = Path(link_path).stem.lower() - if stem == "index": - stem = Path(link_path).parent.name.lower() - candidates = doc_index.get(stem, []) if doc_index else [] - if len(candidates) == 1: - resolved = candidates[0] - elif len(candidates) > 1: - resolved = pick_best_candidate(candidates, link_path.lstrip("/")) - else: - resolved = None + + # Broken — try fallback search + resolved, candidates = _search_fallback(link_path, link_path.lstrip("/")) if resolved is not None: new_link = generate_link( resolved, root, doc_path, link_mode, github_url, github_ref, fragment @@ -422,25 +462,24 @@ def replace_md_link_match(match: re.Match[str]) -> str: return f"[{link_text}]({new_link})" if len(candidates) > 1: errors.append( - f"Broken link '{raw_link}': multiple docs match '{stem}': " - f"{[str(c) for c in candidates]}" + f"Broken link '{raw_link}': ambiguous, matches {[str(c) for c in candidates]}" ) else: errors.append(f"Broken link: '{raw_link}' does not exist") return full_match - # Relative link - resolve from doc file's directory + # Relative path — resolve from doc file's directory doc_dir = doc_path.parent - resolved = (doc_dir / link_path).resolve() + resolved_abs = (doc_dir / link_path).resolve() try: - rel_to_root = resolved.relative_to(root) + rel_to_root = resolved_abs.relative_to(root) except ValueError: errors.append(f"Link '{raw_link}' resolves outside repo root") return full_match - if resolved.exists(): - # File exists - convert to appropriate link format + if resolved_abs.exists(): + # File exists — convert to appropriate link format new_link = generate_link( rel_to_root, root, doc_path, link_mode, github_url, github_ref, fragment ) @@ -448,34 +487,22 @@ def replace_md_link_match(match: re.Match[str]) -> str: if result != full_match: changes.append(f" {link_text}: {raw_link} -> {new_link}") return result + + # Target doesn't exist — try fallback search + resolved, candidates = _search_fallback(link_path, raw_link) + if resolved is not None: + new_link = generate_link( + resolved, root, doc_path, link_mode, github_url, github_ref, fragment + ) + changes.append(f" {link_text}: {raw_link} -> {new_link} (found by search)") + return f"[{link_text}]({new_link})" + if len(candidates) > 1: + errors.append( + f"Broken link '{raw_link}': ambiguous, matches {[str(c) for c in candidates]}" + ) else: - # Target doesn't exist - try searching by doc name - stem = Path(link_path).stem.lower() - if stem == "index": - stem = Path(link_path).parent.name.lower() - candidates = doc_index.get(stem, []) if doc_index else [] - if len(candidates) == 1: - resolved_doc = candidates[0] - elif len(candidates) > 1: - resolved_doc = pick_best_candidate(candidates, raw_link) - else: - resolved_doc = None - if resolved_doc is not None: - new_link = generate_link( - resolved_doc, root, doc_path, link_mode, github_url, github_ref, fragment - ) - changes.append(f" {link_text}: {raw_link} -> {new_link} (found by search)") - return f"[{link_text}]({new_link})" - if len(candidates) > 1: - errors.append( - f"Broken link '{raw_link}': multiple docs match '{stem}': " - f"{[str(c) for c in candidates]}" - ) - else: - errors.append( - f"Broken link '{raw_link}': target not found, no doc matching '{stem}'" - ) - return full_match + errors.append(f"Broken link '{raw_link}': target not found") + return full_match # Split by ignore regions and only process non-ignored parts regions = split_by_ignore_regions(content) @@ -483,10 +510,9 @@ def replace_md_link_match(match: re.Match[str]) -> str: for region_content, should_process in regions: if should_process: - # Process code links first, then doc links, then .md link verification + # Process code links first, then all other links processed = re.sub(code_pattern, replace_code_match, region_content) - processed = re.sub(doc_pattern, replace_doc_match, processed) - processed = re.sub(md_link_pattern, replace_md_link_match, processed) + processed = re.sub(link_pattern, replace_link_match, processed) result_parts.append(processed) else: result_parts.append(region_content) @@ -514,7 +540,7 @@ def collect_markdown_files(paths: list[str]) -> list[Path]: Also auto-links symbols: `Configurable` on same line adds #L fragment. Supports doc-to-doc linking: [Modules](.md) resolves to modules.md or modules/index.md. -Verifies existing .md links and fixes broken relative/absolute paths by searching docs. +Validates all file links and fixes broken relative/absolute paths by searching the index. Usage: doclinks [options] @@ -609,8 +635,9 @@ def main() -> None: sys.exit(1) print(f"Building file index from {root}...") - file_index = build_file_index(root) - doc_index = build_doc_index(root) + tracked_files = get_git_tracked_files(root) + file_index = build_file_index(root, tracked_files) + doc_index = build_doc_index(root, tracked_files) print( f"Indexed {sum(len(v) for v in file_index.values())} file paths, {len(doc_index)} doc names" ) @@ -689,8 +716,6 @@ def on_created(self, event: Any) -> None: observer.start() try: while True: - import time - time.sleep(1) except KeyboardInterrupt: observer.stop() diff --git a/dimos/utils/docs/test_doclinks.py b/dimos/utils/docs/test_doclinks.py index 4cd09e0504..968f465cef 100644 --- a/dimos/utils/docs/test_doclinks.py +++ b/dimos/utils/docs/test_doclinks.py @@ -23,6 +23,7 @@ find_symbol_line, pick_best_candidate, process_markdown, + resolve_candidates, score_path_similarity, split_by_ignore_regions, ) @@ -566,7 +567,28 @@ def test_pick_best_tie_returns_none(self): assert best is None -class TestMdLinkResolution: +class TestResolveCandidates: + def test_single_candidate(self): + candidates = [Path("docs/usage/modules.md")] + assert resolve_candidates(candidates, "modules.md") == candidates[0] + + def test_empty_candidates(self): + assert resolve_candidates([], "modules.md") is None + + def test_disambiguates(self): + candidates = [ + Path("docs/other/codeblocks.md"), + Path("docs/agents/docs/codeblocks.md"), + ] + result = resolve_candidates(candidates, "docs/agents/docs_agent/codeblocks.md") + assert result == Path("docs/agents/docs/codeblocks.md") + + def test_tie_returns_none(self): + candidates = [Path("a/x/file.md"), Path("b/x/file.md")] + assert resolve_candidates(candidates, "c/x/file.md") is None + + +class TestLinkResolution: def _process(self, content, file_index, doc_index, doc_path=None, link_mode="absolute"): if doc_path is None: doc_path = REPO_ROOT / "docs/usage/test.md" @@ -689,6 +711,69 @@ def test_md_links_in_ignore_region(self, file_index, doc_index): # The broken link in ignore region should not produce errors assert "broken_nonexistent.md" in new_content # Preserved as-is + def test_validates_absolute_py_link(self, file_index, doc_index): + """Valid absolute .py link (without backticks) should be left unchanged.""" + content = "[spec](/dimos/protocol/service/spec.py)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 0 + assert new_content == content + + def test_broken_py_link_searches_file_index(self, file_index, doc_index): + """Broken .py link should fall back to file_index search.""" + content = "[spec](/nonexistent/path/service/spec.py)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + # service/spec.py is unique in file_index — should resolve + # But spec.py alone is ambiguous, so it depends on disambiguation + # The fallback searches by filename (spec.py) which has multiple matches + # pick_best_candidate should resolve via path similarity + if len(errors) == 0: + assert "fixed broken link" in changes[0] + # If ambiguous, at least we get an error not a silent pass + else: + assert "Broken link" in errors[0] + + def test_validates_directory_link(self, file_index, doc_index): + """Valid directory link should be left unchanged.""" + content = "[examples](/examples/)" + doc_path = REPO_ROOT / "docs/test.md" + new_content, changes, errors = process_markdown( + content, + REPO_ROOT, + doc_path, + file_index, + link_mode="absolute", + github_url=None, + github_ref="main", + doc_index=doc_index, + ) + + if (REPO_ROOT / "examples").exists(): + assert len(errors) == 0 + assert new_content == content + else: + # Directory doesn't exist — should error + assert len(errors) == 1 + + def test_skips_image_links(self, file_index, doc_index): + """Image links ![alt](path) should not be processed.""" + content = "![screenshot](assets/screenshot.png)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 0 + assert len(changes) == 0 + assert new_content == content + + def test_skips_mailto_links(self, file_index, doc_index): + """mailto: links should be left untouched.""" + content = "[Email](mailto:test@example.com)" + new_content, changes, errors = self._process(content, file_index, doc_index) + + assert len(errors) == 0 + assert len(changes) == 0 + assert new_content == content + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/docs/platforms/quadruped/go2/index.md b/docs/platforms/quadruped/go2/index.md index 07e3784404..df14e134fd 100644 --- a/docs/platforms/quadruped/go2/index.md +++ b/docs/platforms/quadruped/go2/index.md @@ -108,6 +108,6 @@ The agent subscribes to camera, LiDAR, and spatial memory streams — it sees wh - [Navigation Stack](/docs/capabilities/navigation/native/index.md) — column-carving voxel mapping, costmap generation, A* planning - [Visualization](/docs/usage/visualization.md) — Rerun, Foxglove, performance tuning -- [Data Streams](../../../usage/data_streams/) — RxPY streams, backpressure, quality filtering +- [Data Streams](/docs/usage/data_streams) — RxPY streams, backpressure, quality filtering - [Transports](/docs/usage/transports/index.md) — LCM, SHM, DDS - [Blueprints](/docs/usage/blueprints.md) — composing modules From 147da5f9a0b44a00dee1d3975a7fb136930daf05 Mon Sep 17 00:00:00 2001 From: Ivan Nikolic Date: Mon, 23 Feb 2026 00:39:22 +0800 Subject: [PATCH 3/3] chore: remove pr.md from branch --- docs/agents/pr.md | 51 ----------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 docs/agents/pr.md diff --git a/docs/agents/pr.md b/docs/agents/pr.md deleted file mode 100644 index 79522b8bae..0000000000 --- a/docs/agents/pr.md +++ /dev/null @@ -1,51 +0,0 @@ -# Opening PRs - -## Finding the Linear issue - -When opening a PR for a GitHub issue, find the corresponding Linear issue to reference. -Issues are cloned automatically so titles match exactly. - -1. Get the GitHub issue title: - ``` - gh issue view --repo dimensionalOS/dimos --json title --jq .title - ``` -2. Search Linear using the `@tacticlaunch/mcp-linear` MCP server (`linear_searchIssues` tool): - ``` - linear_searchIssues query="" limit=10 - ``` -3. Match by **exact title** (not substring/fuzzy). If a match is found, use its identifier (e.g. `DIM-569`) in the PR description with `Closes DIM-XXX`. -4. If no matching Linear issue is found, write `No matching Linear issue found` instead of `Closes DIM-XXX`. - ---- - -Below follows the PR description template you should use. - -## Problem - - - - -Closes DIM-XXX - -## Solution - - - - -## Breaking Changes - - - - - -## How to Test - - - -## Contributor License Agreement - -- [ ] I have read and approved the [CLA](https://github.com/dimensionalOS/dimos/blob/main/CLA.md).