From 775ca8af373f475c19b136ab44f36e04bab162bf Mon Sep 17 00:00:00 2001 From: Thorrester Date: Fri, 15 May 2026 11:24:36 -0400 Subject: [PATCH 1/2] update contracts --- .claude/skills/opsml-rust-python/SKILL.md | 49 + .../references/agent-harness.md | 55 ++ .../references/architecture.md | 73 ++ .../opsml-rust-python/references/errors.md | 84 ++ .../references/pyo3-boundaries.md | 89 ++ .../references/python-api-and-stubs.md | 59 ++ .../opsml-rust-python/references/rust-core.md | 101 ++ .../references/testing-workflows.md | 64 ++ .claude/skills/opsml-ts-svelte/SKILL.md | 881 ------------------ .claude/skills/opsml-ui/SKILL.md | 1 + .codex/skills/opsml-rust-python/SKILL.md | 49 + .../references/agent-harness.md | 55 ++ .../references/architecture.md | 73 ++ .../opsml-rust-python/references/errors.md | 84 ++ .../references/pyo3-boundaries.md | 89 ++ .../references/python-api-and-stubs.md | 59 ++ .../opsml-rust-python/references/rust-core.md | 101 ++ .../references/testing-workflows.md | 64 ++ .codex/skills/opsml-ui/SKILL.md | 5 + AGENTS.md | 14 + CLAUDE.md | 12 +- Makefile | 232 ----- .../evaluation/AgentEvalDashboard.svelte | 51 +- .../evaluation/AgentEvalRecordTable.svelte | 29 +- .../evaluation/AgentEvalWorkflowTable.svelte | 29 +- .../AgentEvalDashboard.pagination.test.ts | 210 +++++ .../AgentEvalTables.pagination.test.ts | 80 ++ .../agent/observability/AgentsTable.svelte | 33 +- .../__tests__/AgentsTable.test.ts | 32 + .../components/trace/TraceDashboard.svelte | 197 ++-- .../components/trace/TraceDashboard.test.ts | 150 +++ .../lib/components/trace/TraceTable.svelte | 4 +- .../components/trace/TraceWaterfall.svelte | 43 +- .../trace/__tests__/mockData.test.ts | 84 ++ .../src/lib/components/trace/clause.test.ts | 185 ++++ .../src/lib/components/trace/clause.ts | 369 ++++++++ .../lib/components/trace/clauseEvaluator.ts | 46 + .../components/trace/filters/ChipBar.svelte | 10 +- .../trace/filters/FacetSidebar.svelte | 57 +- .../trace/filters/FacetSidebar.test.ts | 62 ++ .../trace/filters/filterState.svelte.ts | 110 +-- .../trace/filters/filterState.test.ts | 177 ++-- .../trace/genai/__tests__/utils.genai.test.ts | 1 + .../src/lib/components/trace/mockData.ts | 84 +- .../src/lib/components/trace/types.ts | 46 +- .../lib/components/trace/validation.test.ts | 66 ++ .../src/lib/components/trace/validation.ts | 278 ++++++ .../lib/components/trace/waterfall.test.ts | 77 ++ .../src/lib/components/trace/waterfall.ts | 60 ++ .../src/lib/server/trace/facets.test.ts | 80 -- .../opsml_ui/src/lib/server/trace/facets.ts | 20 - .../opsml_ui/src/lib/server/trace/mockData.ts | 1 + .../src/lib/server/trace/utils.facets.test.ts | 54 ++ .../opsml_ui/src/lib/server/trace/utils.ts | 40 + .../scouter/observability/trace/+server.ts | 14 +- .../observability/trace/metrics/+server.ts | 22 +- .../trace/metrics/server.test.ts | 101 ++ .../observability/trace/server.test.ts | 94 ++ .../observability/trace/types.test-d.ts | 15 + .../api/scouter/trace/facets/+server.ts | 49 +- .../api/scouter/trace/facets/server.test.ts | 106 +++ .../[name]/[version]/observability/+page.ts | 1 - .../[name]/[version]/observability/+page.ts | 42 +- .../observability/__tests__/page.test.ts | 115 +++ .../src/routes/opsml/observability/+page.ts | 2 - .../opsml_server/tests/api/scouter/trace.rs | 65 +- mise.toml | 225 ++++- py-opsml/dev/integration/agent/__init__.py | 0 .../dev/integration/agent/agents/__init__.py | 12 + .../dev/integration/agent/agents/callbacks.py | 125 +++ .../dev/integration/agent/agents/pipeline.py | 86 ++ .../dev/integration/agent/agents/responder.py | 136 +++ .../dev/integration/agent/agents/triage.py | 104 +++ .../integration/agent/evaluation/__init__.py | 18 + .../integration/agent/evaluation/evaluate.py | 125 +++ .../agent/evaluation/user_agent.py | 101 ++ py-opsml/dev/integration/agent/main.py | 68 ++ py-opsml/dev/integration/agent/opsmlspec.yaml | 48 + .../integration/agent/prompts/responder.yaml | 100 ++ .../dev/integration/agent/prompts/triage.yaml | 88 ++ py-opsml/dev/integration/agent/setup.py | 22 + .../dev/integration/agent/shared/__init__.py | 10 + .../integration/agent/shared/scenarios.jsonl | 2 + .../dev/integration/agent/shared/setup.py | 167 ++++ py-opsml/dev/integration/agent/single.py | 48 + py-opsml/examples/benchmarks/_mlflow/run.py | 5 +- py-opsml/examples/benchmarks/_opsml/run.py | 7 +- py-opsml/examples/docs/overview_genai.py | 2 +- .../examples/docs/overview_traditional.py | 2 +- py-opsml/examples/experiment/advanced.py | 21 +- py-opsml/examples/experiment/agent/agent.py | 26 +- .../examples/experiment/agent/setup/models.py | 3 +- .../examples/experiment/agent/setup/prompt.py | 6 +- .../examples/experiment/agent/setup/tasks.py | 11 +- py-opsml/examples/genai/agent/crewai/agent.py | 4 +- py-opsml/examples/genai/agent/google/agent.py | 4 +- py-opsml/examples/genai/agent/openai/agent.py | 2 +- .../genai/google_adk/app/agent/helper.py | 1 - .../genai/google_adk/app/train/eta.py | 4 +- .../genai/google_adk/app/train/prompt.py | 4 +- .../examples/genai/pydantic/app/train/eta.py | 2 +- .../genai/pydantic/app/train/prompt.py | 4 +- py-opsml/examples/genai/recipe/app/main.py | 10 +- py-opsml/examples/genai/recipe/app/models.py | 3 +- py-opsml/examples/genai/recipe/app/tracing.py | 2 +- .../recipe/app/train/evaluation/tasks.py | 11 +- .../examples/genai/recipe/app/train/prompt.py | 6 +- py-opsml/examples/getting_started.py | 5 +- py-opsml/makefile | 145 --- py-opsml/pyproject.toml | 49 +- py-opsml/python/opsml/__init__.py | 6 +- py-opsml/python/opsml/agent/__init__.py | 8 +- py-opsml/python/opsml/experiment/__init__.py | 7 +- py-opsml/python/opsml/scouter/__init__.py | 4 +- .../python/opsml/scouter/queue/__init__.py | 6 +- .../python/opsml/scouter/tracing/__init__.py | 14 +- py-opsml/tests/cli/test_app.py | 13 +- .../integration/test_adk_agent_harness.py | 181 ++++ py-opsml/uv.lock | 117 --- 119 files changed, 5715 insertions(+), 2149 deletions(-) create mode 100644 .claude/skills/opsml-rust-python/SKILL.md create mode 100644 .claude/skills/opsml-rust-python/references/agent-harness.md create mode 100644 .claude/skills/opsml-rust-python/references/architecture.md create mode 100644 .claude/skills/opsml-rust-python/references/errors.md create mode 100644 .claude/skills/opsml-rust-python/references/pyo3-boundaries.md create mode 100644 .claude/skills/opsml-rust-python/references/python-api-and-stubs.md create mode 100644 .claude/skills/opsml-rust-python/references/rust-core.md create mode 100644 .claude/skills/opsml-rust-python/references/testing-workflows.md delete mode 100644 .claude/skills/opsml-ts-svelte/SKILL.md create mode 100644 .codex/skills/opsml-rust-python/SKILL.md create mode 100644 .codex/skills/opsml-rust-python/references/agent-harness.md create mode 100644 .codex/skills/opsml-rust-python/references/architecture.md create mode 100644 .codex/skills/opsml-rust-python/references/errors.md create mode 100644 .codex/skills/opsml-rust-python/references/pyo3-boundaries.md create mode 100644 .codex/skills/opsml-rust-python/references/python-api-and-stubs.md create mode 100644 .codex/skills/opsml-rust-python/references/rust-core.md create mode 100644 .codex/skills/opsml-rust-python/references/testing-workflows.md delete mode 100644 Makefile create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalDashboard.pagination.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalTables.pagination.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/__tests__/AgentsTable.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/TraceDashboard.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/__tests__/mockData.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/clause.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/clause.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/clauseEvaluator.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/filters/FacetSidebar.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/validation.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/validation.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/waterfall.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/components/trace/waterfall.ts delete mode 100644 crates/opsml_server/opsml_ui/src/lib/server/trace/facets.test.ts delete mode 100644 crates/opsml_server/opsml_ui/src/lib/server/trace/facets.ts create mode 100644 crates/opsml_server/opsml_ui/src/lib/server/trace/utils.facets.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/routes/api/scouter/observability/trace/metrics/server.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/routes/api/scouter/observability/trace/server.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/routes/api/scouter/observability/trace/types.test-d.ts create mode 100644 crates/opsml_server/opsml_ui/src/routes/api/scouter/trace/facets/server.test.ts create mode 100644 crates/opsml_server/opsml_ui/src/routes/opsml/agent/[registry]/card/[space]/[name]/[version]/observability/__tests__/page.test.ts create mode 100644 py-opsml/dev/integration/agent/__init__.py create mode 100644 py-opsml/dev/integration/agent/agents/__init__.py create mode 100644 py-opsml/dev/integration/agent/agents/callbacks.py create mode 100644 py-opsml/dev/integration/agent/agents/pipeline.py create mode 100644 py-opsml/dev/integration/agent/agents/responder.py create mode 100644 py-opsml/dev/integration/agent/agents/triage.py create mode 100644 py-opsml/dev/integration/agent/evaluation/__init__.py create mode 100644 py-opsml/dev/integration/agent/evaluation/evaluate.py create mode 100644 py-opsml/dev/integration/agent/evaluation/user_agent.py create mode 100644 py-opsml/dev/integration/agent/main.py create mode 100644 py-opsml/dev/integration/agent/opsmlspec.yaml create mode 100644 py-opsml/dev/integration/agent/prompts/responder.yaml create mode 100644 py-opsml/dev/integration/agent/prompts/triage.yaml create mode 100644 py-opsml/dev/integration/agent/setup.py create mode 100644 py-opsml/dev/integration/agent/shared/__init__.py create mode 100644 py-opsml/dev/integration/agent/shared/scenarios.jsonl create mode 100644 py-opsml/dev/integration/agent/shared/setup.py create mode 100644 py-opsml/dev/integration/agent/single.py delete mode 100644 py-opsml/makefile create mode 100644 py-opsml/tests/integration/test_adk_agent_harness.py diff --git a/.claude/skills/opsml-rust-python/SKILL.md b/.claude/skills/opsml-rust-python/SKILL.md new file mode 100644 index 0000000000..7f6db89ba3 --- /dev/null +++ b/.claude/skills/opsml-rust-python/SKILL.md @@ -0,0 +1,49 @@ +--- +name: opsml-rust-python +description: Repo-local OpsML skill for Rust core, Python bindings, PyO3, maturin, card/registry/server/client logic, Python API exports, generated stubs, and cross-language tests. Use when working in OpsML crates, `py-opsml`, PyO3-exposed types, Rust errors that cross Python, card or registry behavior, server/client contracts, or Python-visible SDK behavior. Do not use for Svelte UI work; use `opsml-ui` instead. +--- + +# OpsML Rust/Python + +Use this skill as the source of truth for OpsML work where Rust core logic is exposed to Python through PyO3. OpsML is not a generic Rust/Python package: cards are the central abstraction, Rust owns the business logic, and Python is a thin ergonomic API over the Rust core. + +Start by locating the layer you are changing: +- Rust core design, traits, ownership, performance, cloning, async, or crate-local API shape: read `references/rust-core.md`. +- Rust card, registry, storage, SQL, server, auth, events, or shared contracts: read `references/architecture.md`. +- PyO3 classes, `#[pymethods]`, GIL usage, nested `#[pyclass]` fields, or Python lifetimes: read `references/pyo3-boundaries.md`. +- Error types, `PyErr`, server envelopes, CLI errors, or Python exceptions: read `references/errors.md`. +- Python exports, `__all__`, generated stubs, maturin setup, or Python-visible SDK behavior: read `references/python-api-and-stubs.md`. +- Tests, linting, formatting, or command selection: read `references/testing-workflows.md`. +- Agent-readable APIs, structured errors, validation, lint sensors, or harness work: read `references/agent-harness.md`. + +Follow these repo-specific rules: +- Keep core behavior in Rust. Python should expose a typed, ergonomic API and small helpers, not duplicate card, registry, storage, or validation logic. +- Design Rust APIs around domain-owned data, precise traits, and explicit ownership before thinking about the Python binding. +- Keep Python lifetimes out of Rust-only code. Introduce `Python<'py>`, `Bound<'py, PyAny>`, `Py`, and `PyErr` only where code crosses the Python boundary. +- Do not store `PyErr` in reusable Rust error enums. Convert Python errors into string-backed Rust variants, then convert Rust errors back into Python exceptions at the PyO3 boundary. +- For `#[pyclass]` fields whose type is also `#[pyclass]`, do not use `#[pyo3(get, set)]`. Implement manual `#[getter]` and `#[setter]` methods with `IntoPyObjectExt` and `extract`. +- Prefer zero-cost Rust abstractions: enums with delegated trait impls, static dispatch, concrete types, precise errors, iterators, references, ownership transfer, and `Arc` only where shared state is real. +- Treat speculative `Clone`, broad abstractions, unnecessary allocation, and Python-driven core design as design smells. Add them only for concrete call sites. +- Make new Rust core logic testable without Python whenever possible. Add Python tests for Python-visible workflows. +- Write errors and API contracts so humans and coding agents can debug them: stable names, clear fields, concise messages, and actionable hints. +- Use repository workflow tooling from `mise.toml`. Do not invent ad hoc commands when a `mise run ...` task exists. +- Inspect current dependency versions in `Cargo.toml`, `py-opsml/pyproject.toml`, and lockfiles before relying on version-specific behavior. + +When making Rust/PyO3 changes, inspect in this order: +1. The Rust crate that owns the domain behavior. +2. Shared types in `opsml-types/src/contracts/` if the behavior crosses server/client/Python boundaries. +3. PyO3 module registration under `py-opsml/src/`. +4. Python package re-exports under `py-opsml/python/opsml/`. +5. Generated stubs under `py-opsml/python/opsml/_opsml.pyi` and `py-opsml/python/opsml/stubs/`. +6. Rust and Python tests that model the user journey. + +Use these verification commands when relevant: +- Rust formatting: `mise run format` +- Rust linting: `mise run lints` +- Targeted Rust tests: `cargo test -p -- --nocapture --test-threads=1` +- Full Rust aggregate when justified: `mise run test:unit` +- Rebuild Python bindings after PyO3-exposed Rust changes: `mise run py:setup` +- Python linting: `mise run py:lints` +- Python unit tests: `mise run py:test:unit` + +Prefer narrow, local edits that match existing OpsML patterns. Broaden the architecture only when the current domain boundary is truly wrong for the user workflow. diff --git a/.claude/skills/opsml-rust-python/references/agent-harness.md b/.claude/skills/opsml-rust-python/references/agent-harness.md new file mode 100644 index 0000000000..c42b2171dc --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/agent-harness.md @@ -0,0 +1,55 @@ +# Agent-Friendly OpsML Work + +OpsML is being shaped for both human developers and coding agents. The key direction is harness engineering: give agents strong guides before they act and strong sensors after they act. + +## Design For Agents And Humans + +Code should be readable by humans first and stable enough for agents to modify safely: +- Keep names domain-specific and unambiguous. +- Keep functions small enough that invariants are visible. +- Prefer typed contracts over loose dictionaries or stringly-typed conventions. +- Surface failures with stable codes, fields, hints, and docs where the repo supports them. +- Keep side effects at clear boundaries. + +## Same Envelope Principle + +Harness work should converge on the same structured shape across layers: +- HTTP responses. +- PyO3 exceptions. +- CLI output. +- `card.validate()`. +- `opsml lint`. +- Integrity checks. +- Eval results. + +Use fields such as: +- `code` +- `field` +- `hint` or `suggested_action` +- `doc_url` +- `retry` + +Agents should not need to parse paragraphs to understand what field to fix. + +## Validation And Sensors + +When adding governance behavior, think in layers: +- Edit-time or local lint sensors. +- Rust-native validation on core types. +- Registry/server chokepoints. +- Post-hoc integrity checks. +- Behavior evals for prompts and agents. + +The Rust core should own validations that define durable OpsML correctness. Python should expose them ergonomically and test them as user workflows. + +## Documentation Near APIs + +Public Rust and Python APIs should include useful docs when they define: +- User-visible behavior. +- Required invariants. +- Error conditions. +- Security constraints. +- Serialization formats. +- Cross-language boundary behavior. + +Do not add noisy comments that restate simple code. Add concise comments when they preserve hard-won context, such as why a Python lifetime is intentionally kept at the boundary. diff --git a/.claude/skills/opsml-rust-python/references/architecture.md b/.claude/skills/opsml-rust-python/references/architecture.md new file mode 100644 index 0000000000..0998b53ee7 --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/architecture.md @@ -0,0 +1,73 @@ +# OpsML Rust/Python Architecture + +OpsML is an AI lifecycle platform organized around cards: versioned, encrypted, registry-tracked records for data, models, experiments, prompts, services, agents, and skills. Python users create and operate cards, but the durable behavior belongs in Rust. + +## Core Rule + +Rust is the source of truth for: +- Card structure and validation. +- Registry behavior. +- Server contracts and route behavior. +- Storage, SQL, encryption, auth, events, and versioning. +- Serialization formats that persist or cross process boundaries. + +Python should provide: +- Ergonomic constructors and usage patterns. +- Thin re-exports from `_opsml`. +- Small Python-only helpers where Python libraries are the natural boundary. +- User-journey tests for public Python behavior. + +Do not implement durable business logic twice in Rust and Python. If Python and Rust disagree, the design is already drifting. + +## Important Crates + +- `opsml-cards`: PyO3 card structs and card-specific behavior. +- `opsml-registry`: Python-facing `CardRegistry`; dispatches to local or server-backed operations. +- `opsml-types`: shared contract types, enums, and request/response shapes. +- `opsml-server`: Axum routes, middleware, API handlers, server errors. +- `opsml-client`: Rust HTTP client used by Python bindings in server mode. +- `opsml-sql`: database abstraction over SQLite, PostgreSQL, and MySQL. +- `opsml-storage`: storage abstraction over local and cloud backends. +- `opsml-crypt`: artifact encryption. +- `opsml-experiment`, `opsml-genai`, `opsml-service`: domain-specific card logic. +- `py-opsml`: Python package and PyO3 extension wiring. + +Read `AGENTS.md` for the full crate map before changing a cross-cutting path. + +## Enum-Based Backends + +OpsML favors enum dispatch for core backends: +- `StorageClientEnum` delegates `StorageClient` methods to local/S3/GCS/Azure variants. +- `SqlClientEnum` delegates SQL/card logic to SQLite/PostgreSQL/MySQL variants. + +When adding a backend or domain variant, follow this pattern before reaching for `Box`. + +## Contracts And Routes + +Shared request/response types belong in `opsml-types/src/contracts/`. + +Server routes live under `/opsml/api` and follow the existing handler shape: +- `State>` for dependencies. +- `Extension` for protected routes, even read-only routes. +- `Query(...)` or `Json(...)` for inputs. +- `Result, (StatusCode, Json)>` or an established local equivalent. + +Use `parse_qs_query::(&uri)` for query strings containing `Vec`. + +## Registry Modes + +`CardRegistry` supports: +- Local mode: direct filesystem/SQLite-backed registry operations. +- Server mode: HTTP proxy through `opsml-client`. + +New behavior should preserve both modes unless the feature is explicitly server-only. A change that only works through the Python package but not through Rust registry/server paths is usually in the wrong layer. + +## Artifact Encryption + +Card artifacts are encrypted before storage. Do not bypass: +- `create_artifact_key()` +- `create_and_store_encrypted_file()` +- `download_artifact()` plus decryption +- `ArtifactKey` as the database source of truth + +Security-sensitive changes need targeted tests around key lookup, upload/download paths, and error behavior. diff --git a/.claude/skills/opsml-rust-python/references/errors.md b/.claude/skills/opsml-rust-python/references/errors.md new file mode 100644 index 0000000000..ed1c7a8858 --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/errors.md @@ -0,0 +1,84 @@ +# Errors Across Rust And Python + +OpsML errors should be clear enough for humans and structured enough for agents. They should name what failed, where possible include the affected field/resource, and preserve enough context to fix the issue without parsing vague prose. + +## No Stored `PyErr` + +Do not store `PyErr` inside reusable Rust error enums. `PyErr` can pull Python runtime/lifetime concerns into pure Rust code and cause C linker or GIL-related failures in Rust tests. + +Use the canonical pattern in `crates/opsml_cards/src/skill/error.rs`: +- Rust error variants store Rust-owned data such as `String`. +- `From for SkillError` converts to a string-backed variant. +- `From for PyErr` exists only behind the Python feature and maps to a Python exception at the boundary. + +Preferred shape: + +```rust +#[derive(thiserror::Error, Debug)] +pub enum DomainError { + #[error("{0}")] + Error(String), + + #[error(transparent)] + Io(#[from] std::io::Error), +} + +#[cfg(feature = "python")] +impl From for DomainError { + fn from(err: PyErr) -> Self { + DomainError::Error(err.to_string()) + } +} + +#[cfg(feature = "python")] +impl From for PyErr { + fn from(err: DomainError) -> PyErr { + pyo3::exceptions::PyRuntimeError::new_err(err.to_string()) + } +} +``` + +Avoid this in core errors: + +```rust +#[error(transparent)] +Python(#[from] PyErr) +``` + +Only use direct Python error storage in code that is permanently Python-only and cannot be reached by Rust tests or Rust core logic. That should be rare in OpsML. + +## Transitive Error Chains + +The rule applies transitively. If `CardError` wraps `ModelInterfaceError`, and `ModelInterfaceError` stores `PyErr`, then `CardError` is contaminated too. + +When adding `#[from]` variants, inspect wrapped errors for PyO3 types. Prefer converting upstream errors to string-backed variants at the boundary. + +## Server Error Envelope + +Server handlers should use the existing `OpsmlServerError` helpers and structured fields where available. For new agent-facing or validation work, prefer stable data: +- `code` +- `field` +- `suggested_action` or hint +- `doc_url` +- `retry` + +Prefer one stable error shape across HTTP, PyO3, CLI, lint output, validation, and eval. A Python caller, CLI user, UI route, and coding agent should be able to recognize the same failure without parsing unrelated prose. + +## Human And Agent Debuggability + +Error messages should: +- Name the operation that failed. +- Include the resource identifier when safe, such as card UID, space/name/version, file path, or field. +- Avoid generic messages like "invalid input" when the failing field is known. +- Avoid logging secrets, tokens, encryption keys, or provider credentials. +- Keep wording concise and stable enough for tests and agents. + +## Mapping To Python Exceptions + +Choose Python exception types deliberately: +- Invalid user input: `PyValueError`. +- Missing key or field: `PyKeyError` or `PyValueError`, depending on existing local style. +- Filesystem/IO: `PyOSError`. +- Runtime integration failure: `PyRuntimeError`. + +Follow nearby mappings in the same crate before introducing a new exception style. diff --git a/.claude/skills/opsml-rust-python/references/pyo3-boundaries.md b/.claude/skills/opsml-rust-python/references/pyo3-boundaries.md new file mode 100644 index 0000000000..80a90b0705 --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/pyo3-boundaries.md @@ -0,0 +1,89 @@ +# PyO3 Boundary Rules + +PyO3 code is a boundary layer. Keep Python lifetimes, `PyErr`, and object extraction at the edge so Rust core code remains testable with normal Rust tests. + +## Boundary Types + +Use these types only when the code is actually crossing into Python: +- `Python<'py>` +- `Bound<'py, PyAny>` +- `Py` +- `PyErr` +- `#[pyclass]`, `#[pymethods]`, `#[pyfunction]` + +Pure Rust functions should generally accept and return Rust types, not Python-bound objects. + +## Constructors + +For Python constructors, keep extraction and conversion near `#[new]`, then call a Rust-native constructor such as `new_rs`, `from_config`, or a domain-specific builder. + +Preferred shape: + +```rust +#[cfg(feature = "python")] +#[pymethods] +impl SkillCard { + #[new] + pub fn new(skill: &Bound<'_, PyAny>, space: Option<&str>) -> Result { + let skill = skill.extract::()?; + Self::new_rs(skill, space, None, None, None, None, None, None) + } +} +``` + +Keep the Rust-native constructor usable without Python. + +## Nested `#[pyclass]` Fields + +If a `#[pyclass]` struct has a field whose type is itself a `#[pyclass]`, do not put `#[pyo3(get, set)]` on that field. PyO3-generated accessors can leak Python lifetimes into pure Rust call sites and tests. + +Use the canonical pattern in `crates/opsml_cards/src/skill/card.rs`: +- `skill` field getter at `SkillCard::skill`. +- `set_skill` setter using `extract`. +- `dependencies` getter using `IntoPyObjectExt`. +- `set_dependencies` setter using `extract::>()`. + +Preferred shape: + +```rust +#[getter] +pub fn skill<'py>(&self, py: Python<'py>) -> Result, SkillError> { + Ok(self.skill.clone().into_bound_py_any(py)?) +} + +#[setter] +pub fn set_skill(&mut self, skill: &Bound<'_, PyAny>) -> Result<(), SkillError> { + self.skill = skill.extract::()?; + Ok(()) +} +``` + +## Python Objects In Cards + +Some cards hold Python-owned objects, such as model or data interfaces. Keep GIL acquisition scoped tightly: +- Acquire the GIL only where calling Python methods or extracting Python objects. +- Convert Python-side data into Rust metadata before serialization. +- Do not attempt to serialize `Py` directly. +- Reconstruct Python-facing objects only at load/deserialization boundaries where the Python API needs them. + +## Feature Gates + +Respect existing `python` and `server` feature gates. If a Rust unit test fails with Python linking or libpython errors, inspect for leaked PyO3 types in core code or transitive error chains. + +Common causes: +- `PyErr` stored in a reusable error enum. +- `#[pyo3(get, set)]` on nested `#[pyclass]` fields. +- Python-only imports not guarded with `#[cfg(feature = "python")]`. + +## Module Registration + +New Python-visible Rust functions/classes must be wired through `py-opsml/src/lib.rs` and the appropriate submodule registration function, such as: +- `card::add_card_module(m)?` +- `data::add_data_module(m)?` +- `model::add_model_module(m)?` +- `experiment::add_experiment_module(m)?` +- `agent::add_agent_module(m)?` +- `service::add_service_module(m)?` +- `types::add_types_module(m)?` + +Do not stop after adding `#[pyclass]`; registration, Python exports, stubs, and tests are part of the public API surface. diff --git a/.claude/skills/opsml-rust-python/references/python-api-and-stubs.md b/.claude/skills/opsml-rust-python/references/python-api-and-stubs.md new file mode 100644 index 0000000000..2244c9a0d0 --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/python-api-and-stubs.md @@ -0,0 +1,59 @@ +# Python API And Stubs + +The Python package is the user-facing SDK, but the Rust extension owns the core behavior. Python additions should be thin, typed, and tested through user workflows. + +## Public API Wiring + +For new Python-visible Rust API, check every layer: +1. Rust type/function exists in the owning crate. +2. PyO3 class/function is registered in the appropriate `py-opsml/src/*` module. +3. The module is attached from `py-opsml/src/lib.rs`. +4. Python package exports are updated under `py-opsml/python/opsml/`. +5. `__all__` is updated where the package uses it. +6. Stubs are regenerated or updated through the repo workflow. +7. Python tests cover the user-visible behavior. + +Do not leave a type reachable only through `_opsml` unless that is already the local convention for the feature. + +## Stub Workflow + +Generated stubs live under: +- `py-opsml/python/opsml/_opsml.pyi` +- `py-opsml/python/opsml/stubs/` + +Use the repo task: +- `mise run py:setup` + +This task runs stub generation, syncs Python dependencies, and builds the Rust extension through maturin. Run it after Rust changes that affect Python-exposed classes, functions, signatures, docs, or enums. + +## Python Package Style + +Python code should: +- Keep durable business behavior in Rust. +- Use typed, ergonomic public APIs. +- Add docstrings to public functions/classes when intent, side effects, errors, or examples are not obvious. +- Avoid loose `Any` unless the Python boundary is truly dynamic. +- Follow existing import, export, and test conventions in `py-opsml/python/opsml/`. + +When behavior is Python-only because it depends on a Python ecosystem object, keep the boundary explicit. Examples include Python model/data interfaces where user subclasses implement `save`, `load`, or prediction behavior. + +## Python Tests + +Python tests should model how a real user interacts with OpsML: +- Construct the Python-facing object. +- Register or load through `CardRegistry` when relevant. +- Assert stable behavior and error messages. +- Use fixtures such as `mock_db`, `pandas_data`, or `random_forest_classifier` where appropriate. + +Do not test generated implementation details when a user workflow test would prove the contract better. + +## Maturin And Versions + +Do not rely on remembered PyO3 or maturin versions. Inspect: +- Root `Cargo.toml`. +- `py-opsml/Cargo.toml`. +- `py-opsml/pyproject.toml`. +- Lockfiles. +- `mise.toml`. + +Version-specific PyO3 patterns should match the versions currently pinned by the repository. diff --git a/.claude/skills/opsml-rust-python/references/rust-core.md b/.claude/skills/opsml-rust-python/references/rust-core.md new file mode 100644 index 0000000000..d1268ecd6b --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/rust-core.md @@ -0,0 +1,101 @@ +# Rust Core Practices + +OpsML's Python API is only as good as the Rust core underneath it. Design Rust code first as a clean, testable library; then expose the right boundary to Python. + +## API Shape + +Prefer APIs that make invalid states difficult to represent: +- Use domain types instead of raw strings or loose maps for durable concepts. +- Use enums for closed sets of states, backends, card kinds, operations, and variants. +- Use structs with named fields for meaningful records. +- Use traits for behavior shared across real implementations, not for one call site. +- Keep public functions explicit about inputs, outputs, and error behavior. + +Do not shape core Rust APIs around what is easiest to extract from `PyAny`. Convert Python inputs at the boundary, then call Rust-native functions. + +## Zero-Cost Abstractions + +Prefer abstractions that compile down to direct code: +- Generic functions with trait bounds when the caller can be monomorphized. +- Enum dispatch for known backend variants, matching existing `StorageClientEnum` and `SqlClientEnum` patterns. +- Iterators instead of building intermediate vectors. +- Borrowed data such as `&str`, `&Path`, and `&[T]` when ownership is not needed. +- `Cow<'_, str>` only when both borrowed and owned paths are real and the complexity pays for itself. + +Use `Box` only when runtime extensibility is required and the allocation/dynamic dispatch is an intentional tradeoff. + +## Ownership And Cloning + +Treat `.clone()` as a question, not a reflex: +- Prefer borrowing when the callee does not need ownership. +- Prefer moving values when the current scope no longer needs them. +- Use `Arc` for real shared ownership across tasks, state, or handlers. +- Avoid `Arc>` as a default; first ask whether ownership, message passing, a narrower lock, or immutable state is enough. +- Do not derive `Clone` speculatively. Derive it only when concrete call sites need it. + +Common acceptable clones: +- Small identifiers or config values at API boundaries where ownership is clearer than lifetimes. +- `Arc::clone` for shared application state. +- Data copied into an owned response type. + +Common suspicious clones: +- Large vectors, maps, schemas, payloads, or serialized values in loops. +- Cloning to work around a borrow caused by overly broad scopes. +- Cloning Python objects or PyO3 wrappers without a clear GIL/boundary reason. + +## Allocation And Strings + +Avoid accidental allocation in hot or repeated paths: +- Use `String::with_capacity` when building a known-size string. +- Use `Vec::with_capacity` when the item count is known or cheaply estimated. +- Use `write!` into an existing `String` instead of repeated `format!` in loops. +- Avoid serializing/deserializing just to move data between Rust layers. +- Keep JSON conversion at API, storage, or Python boundaries unless the core domain truly stores JSON. + +Do not micro-optimize code that is not on a meaningful path. Prefer clear algorithms first, then optimize measured bottlenecks. + +## Traits + +Use traits when they express a stable capability: +- Storage behavior. +- SQL/card persistence behavior. +- Card lifecycle behavior. +- Interface behavior with multiple implementations. + +Keep traits small enough to implement correctly. Avoid kitchen-sink traits that force unrelated implementations to carry meaningless methods. + +Prefer associated types or generics when they simplify call sites. Avoid clever trait machinery when an enum, function, or concrete type would be easier to read and test. + +## Async And Shared State + +Use async for I/O boundaries: HTTP, database, storage, network calls, and server handlers. Keep pure computation synchronous unless the caller requires async. + +For shared server state: +- Put heavy shared dependencies in `AppState`. +- Wrap shared backend clients in `Arc` as established by the repo. +- Avoid cloning heavy clients or rebuilding pools per request. +- Keep locks out of request hot paths where possible. + +Use timeouts, cancellation-aware APIs, and bounded concurrency for external calls when the surrounding code already supports those patterns. + +## Error Design + +Rust errors should be precise and useful before they become Python exceptions: +- Use `thiserror` enums for domain errors. +- Include the failing operation and field/resource where possible. +- Preserve source errors with `#[from]` only when the wrapped error is safe for Rust-only code. +- Avoid `anyhow` in library surfaces unless the crate already uses it for an internal boundary. + +For errors crossing Python, follow `references/errors.md`. + +## Testing Rust Core + +New core logic should have Rust tests that do not require Python unless the behavior is inherently Python-facing. + +Good Rust tests: +- Exercise domain behavior through public or crate-visible APIs. +- Cover success, edge cases, and stable failures. +- Use local fixtures and mocks instead of external services. +- Keep SQLite/server tests isolated with `--test-threads=1` where the repo requires it. + +If a test requires Python for non-Python behavior, inspect the design for leaked PyO3 types or a misplaced boundary. diff --git a/.claude/skills/opsml-rust-python/references/testing-workflows.md b/.claude/skills/opsml-rust-python/references/testing-workflows.md new file mode 100644 index 0000000000..38ab3716a1 --- /dev/null +++ b/.claude/skills/opsml-rust-python/references/testing-workflows.md @@ -0,0 +1,64 @@ +# Testing And Workflow Commands + +Use `mise` as the default command surface. The tasks encode environment variables, working directories, feature flags, and server setup. + +## Default Commands + +Rust: +- Format: `mise run format` +- Lint: `mise run lints` +- Full Rust aggregate when justified: `mise run test:unit` +- Targeted crate tests: `cargo test -p -- --nocapture --test-threads=1` + +Python: +- Build stubs and extension: `mise run py:setup` +- Format: `mise run py:format` +- Lint/type checks: `mise run py:lints` +- CI lint gate: `mise run py:lints-ci` +- Unit tests: `mise run py:test:unit` +- Service tests: `mise run py:test:service` +- Integration tests: `mise run py:test:integration` + +Frontend work belongs to `opsml-ui`, not this skill. + +## Command Selection + +Use targeted tests first: +- Changed a single Rust crate: run that crate's focused tests. +- Changed SQL behavior: use the relevant `test:sql-*` task. +- Changed server handlers: use targeted `opsml-server` tests with `--test-threads=1`. +- Changed PyO3-exposed API: run `mise run py:setup`, then targeted Python tests or `mise run py:test:unit`. +- Changed Python-only public API: run targeted pytest, then `mise run py:lints` if practical. + +Broaden to aggregate tasks when the change crosses boundaries or before final handoff on high-risk work. + +## Server Test Caution + +Do not run all `opsml_server` tests casually. The repo guidance warns that broad failures can leave stale state. Prefer isolated tests and cleanup. Use `TestHelper::new(None)` patterns and `helper.cleanup()` where applicable. + +Canonical Rust server tests: +- Use `TestHelper::new(None)`. +- Send requests through `helper.send_oneshot(request)` so auth headers are added. +- Use `retry_flaky_test!` for transient SQLite contention. +- Run with `--test-threads=1`. +- Mock Scouter and SSO; do not require live external services. + +## PyO3 Change Checklist + +After changing a Python-exposed Rust type: +1. Confirm Rust core behavior is tested without Python where possible. +2. Confirm PyO3 registration is complete. +3. Run `mise run py:setup`. +4. Confirm imports work from `opsml`, not only from `_opsml`. +5. Add or update Python tests for the user workflow. +6. Run targeted Python tests or `mise run py:test:unit`. + +## Test Philosophy + +Tests should prove user journeys and stable contracts: +- Card creation, serialization, registration, retrieval, and loading. +- Local and server-backed registry behavior where both apply. +- Structured error behavior for invalid inputs. +- Python-visible behavior from the public `opsml` package. + +Avoid tests that mirror private implementation details. Add regression tests when fixing a bug. diff --git a/.claude/skills/opsml-ts-svelte/SKILL.md b/.claude/skills/opsml-ts-svelte/SKILL.md deleted file mode 100644 index b0ac0dba81..0000000000 --- a/.claude/skills/opsml-ts-svelte/SKILL.md +++ /dev/null @@ -1,881 +0,0 @@ ---- -name: opsml-ts-svelte -description: Use this skill whenever a user asks about TypeScript logic, Svelte 5 components, SvelteKit routing or data loading, state management, reactivity, API integration, type definitions, performance optimization, or any frontend logic in the OpsML project. Triggers include: writing or fixing TypeScript, Svelte runes ($state, $derived, $effect, $props), load functions, form actions, server endpoints, stores, utility functions, type safety, generics, async patterns, error handling, data fetching, filtering, sorting, pagination, or any mention of "how do I implement", "how should this work", "write a function", "fix the logic", or "why is this slow". ---- - -You are an expert frontend engineer specializing in **TypeScript, Svelte 5, SvelteKit, and -Tailwind CSS v4** for the OpsML platform — a developer-facing ML registry and observability -system. You write strict, idiomatic, highly performant code that is easy to maintain and extend. - ---- - -## TypeScript Standards - -### Compiler Settings (assume strict mode) - -```jsonc -// tsconfig.json (effective flags) -{ - "strict": true, // enables all strict checks - "noUncheckedIndexedAccess": true, - "exactOptionalPropertyTypes": true, - "noImplicitOverride": true -} -``` - -All code must compile cleanly under these flags. Never use `any` — use `unknown` and narrow -instead. Never suppress errors with `// @ts-ignore` without an explanatory comment. - -### Type Definitions - -**Define types close to where they are used. Co-locate with the module, not in a global barrel -unless shared across 3+ features.** - -```typescript -// ✅ Precise, composable types -type Status = 'active' | 'failed' | 'pending' | 'archived'; - -interface RunSummary { - uid: string; - name: string; - version: string; - status: Status; - createdAt: string; // ISO-8601; parse to Date at boundary, store as string - tags: Record; -} - -// ✅ Discriminated unions for API responses -type ApiResult = - | { ok: true; data: T } - | { ok: false; error: string; code: number }; - -// ✅ Branded types for domain IDs (prevent ID confusion) -type Uid = string & { readonly __brand: 'Uid' }; -type SpaceName = string & { readonly __brand: 'SpaceName' }; - -function toUid(raw: string): Uid { return raw as Uid; } -``` - -### Generics - -Use generics to avoid duplication, not to add abstraction for its own sake. - -```typescript -// ✅ Generic paginated response -interface Page { - items: T[]; - total: number; - page: number; - pageSize: number; -} - -// ✅ Generic sort utility -function sortBy(items: T[], key: keyof T, dir: 'asc' | 'desc' = 'asc'): T[] { - return [...items].sort((a, b) => { - const av = a[key]; - const bv = b[key]; - const cmp = av < bv ? -1 : av > bv ? 1 : 0; - return dir === 'asc' ? cmp : -cmp; - }); -} - -// ✅ Generic filter with type predicate -function filterDefined(items: (T | null | undefined)[]): T[] { - return items.filter((x): x is T => x != null); -} -``` - -### Null Safety & Narrowing - -```typescript -// ✅ Narrow unknown API responses at the boundary -function parseRun(raw: unknown): RunSummary { - if (!raw || typeof raw !== 'object') throw new Error('Invalid run payload'); - const r = raw as Record; - if (typeof r['uid'] !== 'string') throw new Error('Missing uid'); - // ...validate each field - return r as RunSummary; -} - -// ✅ Optional chaining + nullish coalescing -const label = run?.tags?.['display_name'] ?? run.name; - -// ✅ Exhaustive switch (TS will error if a variant is unhandled) -function statusColor(s: Status): string { - switch (s) { - case 'active': return 'bg-secondary-300'; - case 'failed': return 'bg-error-600'; - case 'pending': return 'bg-warning-300'; - case 'archived': return 'bg-surface-300'; - } -} -``` - -### Async Patterns - -```typescript -// ✅ Result pattern — never throw across async boundaries silently -async function fetchRun(uid: string): Promise> { - try { - const res = await fetch(`/api/runs/${uid}`); - if (!res.ok) return { ok: false, error: res.statusText, code: res.status }; - const data: unknown = await res.json(); - return { ok: true, data: parseRun(data) }; - } catch (e) { - return { ok: false, error: String(e), code: 0 }; - } -} - -// ✅ Parallel fetches — use Promise.all, not sequential await -const [runs, metrics] = await Promise.all([ - fetchRuns(spaceId), - fetchMetrics(spaceId), -]); - -// ✅ Abort controller for cancellable fetches -function createFetch(url: string, signal: AbortSignal) { - return fetch(url, { signal }); -} -``` - ---- - -## Svelte 5 Runes — Complete Reference - -### $state - -```svelte - -``` - -### $derived - -```svelte - - - -{#each filtered() as run}...{/each} -{counts().failed} failed -``` - -**Rules:** -- `$derived` re-runs only when its tracked dependencies change — keep derivations pure and cheap -- Do NOT write side effects inside `$derived`; use `$effect` for that -- Memoization is automatic; no need for manual `useMemo` equivalents - -### $effect - -```svelte - -``` - -**Rules:** -- `$effect` runs after the DOM updates, like `useEffect` in React -- Always return a cleanup function when managing subscriptions, timers, or fetch controllers -- Never write to state that the same effect reads — causes infinite loops -- Prefer `$derived` for computed values; use `$effect` only for actual side effects - -### $props - -```svelte - -``` - -### $bindable - -```svelte - - - -``` - ---- - -## SvelteKit Data Loading - -### +page.server.ts — Server Load Functions - -```typescript -// src/routes/runs/[uid]/+page.server.ts -import type { PageServerLoad } from './$types'; -import { error } from '@sveltejs/kit'; -import { fetchRun } from '$lib/api/runs'; - -export const load: PageServerLoad = async ({ params, fetch, locals }) => { - const result = await fetchRun(params.uid, fetch); - - if (!result.ok) { - throw error(result.code || 500, result.error); - } - - return { - run: result.data, - // Return only serializable data — no class instances, no functions - }; -}; -``` - -### +page.ts — Universal Load (runs on server + client) - -```typescript -// src/routes/runs/+page.ts -import type { PageLoad } from './$types'; - -export const load: PageLoad = async ({ url, fetch }) => { - const q = url.searchParams.get('q') ?? ''; - const page = Number(url.searchParams.get('page') ?? '1'); - - const res = await fetch(`/api/runs?q=${encodeURIComponent(q)}&page=${page}`); - if (!res.ok) throw new Error(`Failed to load runs: ${res.status}`); - - const data: Page = await res.json(); - return { runs: data, q, page }; -}; -``` - -### Form Actions - -```typescript -// src/routes/runs/[uid]/+page.server.ts -import type { Actions } from './$types'; -import { fail, redirect } from '@sveltejs/kit'; - -export const actions: Actions = { - deleteRun: async ({ params, fetch }) => { - const res = await fetch(`/api/runs/${params.uid}`, { method: 'DELETE' }); - if (!res.ok) return fail(res.status, { message: 'Delete failed' }); - throw redirect(303, '/runs'); - }, - - updateTags: async ({ params, request, fetch }) => { - const form = await request.formData(); - const tags = Object.fromEntries( - [...form.entries()].filter(([k]) => k.startsWith('tag_')) - .map(([k, v]) => [k.slice(4), String(v)]) - ); - - const res = await fetch(`/api/runs/${params.uid}/tags`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ tags }), - }); - - if (!res.ok) return fail(res.status, { message: 'Update failed' }); - return { success: true }; - }, -}; -``` - -### API Route Handlers (+server.ts) - -```typescript -// src/routes/api/runs/+server.ts -import type { RequestHandler } from './$types'; -import { json, error } from '@sveltejs/kit'; - -export const GET: RequestHandler = async ({ url, locals }) => { - const q = url.searchParams.get('q') ?? ''; - const page = Math.max(1, Number(url.searchParams.get('page') ?? '1')); - const pageSize = 25; - - try { - const runs = await locals.db.runs.search({ q, page, pageSize }); - return json(runs); - } catch (e) { - throw error(500, 'Database error'); - } -}; - -export const POST: RequestHandler = async ({ request, locals }) => { - const body: unknown = await request.json(); - // validate... - const run = await locals.db.runs.create(body); - return json(run, { status: 201 }); -}; -``` - ---- - -## State Architecture - -### When to Use What - -| Pattern | Use When | -|---|---| -| `$state` in component | Local UI state — open/closed, selection, form values | -| `$state` in `.svelte.ts` module | Shared state across sibling components without prop drilling | -| SvelteKit `load` return | Server-fetched data that belongs to a route | -| URL search params | Filterable / shareable state (search query, page, sort) | -| Form actions | Mutations — creates, updates, deletes | -| Context API (`setContext`/`getContext`) | Deep component trees; avoid prop drilling for config/services | - -### Shared Reactive State (`.svelte.ts` module) - -```typescript -// src/lib/state/selection.svelte.ts -// ✅ Use for cross-component shared state (replaces writable stores) - -let _selected = $state(null); -let _multiSelected = $state>(new Set()); - -export const selection = { - get current() { return _selected; }, - get multi() { return _multiSelected; }, - select(uid: string) { _selected = uid; }, - toggle(uid: string) { - if (_multiSelected.has(uid)) _multiSelected.delete(uid); - else _multiSelected.add(uid); - }, - clear() { _selected = null; _multiSelected.clear(); }, -}; -``` - -Usage in any component: -```svelte - - - selection.select(run.uid)} -> -``` - ---- - -## Performance Patterns - -### Debouncing Search / Filter Input - -```typescript -// src/lib/utils/debounce.ts -export function debounce void>( - fn: T, - delay: number -): (...args: Parameters) => void { - let timer: ReturnType; - return (...args) => { - clearTimeout(timer); - timer = setTimeout(() => fn(...args), delay); - }; -} -``` - -```svelte - - - - -``` - -### Virtual / Windowed Lists - -For tables with 500+ rows, avoid rendering all at once: - -```svelte - - -
-
-
- {#each visible() as run (run.uid)} - - {/each} -
-
-
-``` - -### Keyed Each Blocks - -Always key `{#each}` blocks on a stable, unique ID to prevent unnecessary DOM reconciliation: - -```svelte - -{#each runs as run (run.uid)} - -{/each} - - -{#each runs as run} - -{/each} -``` - -### Lazy / Async Components - -```svelte - - -{#await TraceWaterfall} -
-{:then { default: TraceWaterfall }} - -{/await} -``` - -### Avoiding $effect Over-runs - -```svelte - -$effect(() => { - doSomethingWith(items); - doSomethingElseWith(items); -}); - - -const snapshot = $derived(() => items.map(i => i.uid).join(',')); -$effect(() => { - const uids = snapshot(); - doSomethingWith(uids); -}); -``` - ---- - -## API Layer Patterns - -### Typed Fetch Wrapper - -```typescript -// src/lib/api/client.ts -const BASE = '/api'; - -async function apiFetch( - path: string, - options?: RequestInit -): Promise> { - try { - const res = await fetch(`${BASE}${path}`, { - headers: { 'Content-Type': 'application/json' }, - ...options, - }); - - if (!res.ok) { - const msg = await res.text().catch(() => res.statusText); - return { ok: false, error: msg, code: res.status }; - } - - const data: T = await res.json(); - return { ok: true, data }; - } catch (e) { - return { ok: false, error: String(e), code: 0 }; - } -} - -export const api = { - get: (path: string) => apiFetch(path), - post: (path: string, body: unknown) => - apiFetch(path, { method: 'POST', body: JSON.stringify(body) }), - patch: (path: string, body: unknown) => - apiFetch(path, { method: 'PATCH', body: JSON.stringify(body) }), - delete: (path: string) => apiFetch(path, { method: 'DELETE' }), -}; -``` - -### Module-Level API Functions - -```typescript -// src/lib/api/runs.ts -import { api } from './client'; -import type { Page, RunSummary, RunDetail } from '$lib/types'; - -export async function listRuns(params: { - space?: string; - q?: string; - page?: number; - status?: Status; -}): Promise>> { - const query = new URLSearchParams(); - if (params.space) query.set('space', params.space); - if (params.q) query.set('q', params.q); - if (params.page) query.set('page', String(params.page)); - if (params.status) query.set('status', params.status); - - return api.get>(`/runs?${query}`); -} - -export async function getRun(uid: string): Promise> { - return api.get(`/runs/${uid}`); -} - -export async function deleteRun(uid: string): Promise> { - return api.delete(`/runs/${uid}`); -} -``` - ---- - -## Utility Functions - -### Date Formatting - -```typescript -// src/lib/utils/format.ts - -const relativeFormatter = new Intl.RelativeTimeFormat('en', { numeric: 'auto' }); -const dateFormatter = new Intl.DateTimeFormat('en-US', { - month: 'short', day: 'numeric', year: 'numeric', - hour: '2-digit', minute: '2-digit', -}); - -export function formatRelative(iso: string): string { - const diff = (new Date(iso).getTime() - Date.now()) / 1000; - if (Math.abs(diff) < 60) return relativeFormatter.format(Math.round(diff), 'second'); - if (Math.abs(diff) < 3600) return relativeFormatter.format(Math.round(diff / 60), 'minute'); - if (Math.abs(diff) < 86400) return relativeFormatter.format(Math.round(diff / 3600), 'hour'); - return relativeFormatter.format(Math.round(diff / 86400), 'day'); -} - -export function formatDate(iso: string): string { - return dateFormatter.format(new Date(iso)); -} -``` - -### Duration / Metrics Formatting - -```typescript -export function formatDuration(ms: number): string { - if (ms < 1) return `${(ms * 1000).toFixed(0)}µs`; - if (ms < 1000) return `${ms.toFixed(1)}ms`; - if (ms < 60_000) return `${(ms / 1000).toFixed(2)}s`; - return `${(ms / 60_000).toFixed(1)}m`; -} - -export function formatNumber(n: number, decimals = 2): string { - return new Intl.NumberFormat('en-US', { - maximumFractionDigits: decimals, - minimumFractionDigits: decimals, - }).format(n); -} - -export function formatBytes(bytes: number): string { - const units = ['B', 'KB', 'MB', 'GB', 'TB']; - let i = 0; - let v = bytes; - while (v >= 1024 && i < units.length - 1) { v /= 1024; i++; } - return `${v.toFixed(i === 0 ? 0 : 1)} ${units[i]}`; -} -``` - -### URL / Search Param Helpers - -```typescript -// src/lib/utils/url.ts -import { goto } from '$app/navigation'; -import { page } from '$app/stores'; -import { get } from 'svelte/store'; - -export function updateSearchParam(key: string, value: string | null): void { - const url = new URL(get(page).url); - if (value === null || value === '') url.searchParams.delete(key); - else url.searchParams.set(key, value); - goto(url, { keepFocus: true, noScroll: true, replaceState: true }); -} - -export function getSearchParam(key: string, fallback: string = ''): string { - return get(page).url.searchParams.get(key) ?? fallback; -} -``` - ---- - -## Tailwind CSS v4 Patterns - -### Dynamic Class Composition - -```typescript -// ✅ Use a helper to compose conditional classes — avoids string interpolation issues -export function cx(...classes: (string | false | null | undefined)[]): string { - return classes.filter(Boolean).join(' '); -} -``` - -```svelte -
-``` - -### Theme-Driven Color Mapping - -```typescript -// ✅ Map domain values to theme classes in TypeScript, not inline -const STATUS_STYLES: Record = { - active: { bg: 'bg-secondary-100', text: 'text-secondary-900', badge: 'bg-secondary-300' }, - failed: { bg: 'bg-error-100', text: 'text-error-900', badge: 'bg-error-600' }, - pending: { bg: 'bg-warning-100', text: 'text-warning-900', badge: 'bg-warning-300' }, - archived: { bg: 'bg-surface-200', text: 'text-black/60', badge: 'bg-surface-400' }, -}; - -// Usage in template -const styles = $derived(() => STATUS_STYLES[run.status]); -``` - -```svelte - - {run.status.toUpperCase()} - -``` - -**Rule:** Never construct partial Tailwind class names dynamically (e.g., `` `bg-${color}-500` ``). -Tailwind's scanner cannot detect them. Always use complete class strings or map from a lookup -object. - ---- - -## Error Handling Patterns - -### Component-Level Error Boundary - -```svelte - - - -
-
-

- Error {$page.status} -

-

{$page.error?.message ?? 'Something went wrong'}

- - Go Home - -
-
-``` - -### Inline Async Error Handling in Components - -```svelte - - -{#if loading} -
-{:else if result?.ok === false} -
- {result.error} -
-{:else if result?.ok} - -{/if} -``` - ---- - -## Code Style Rules - -### Naming - -| Thing | Convention | Example | -|---|---|---| -| Components | PascalCase | `RunTable.svelte` | -| Functions | camelCase | `fetchRuns`, `formatDate` | -| Types / Interfaces | PascalCase | `RunSummary`, `ApiResult` | -| Constants | UPPER_SNAKE | `MAX_PAGE_SIZE`, `DEFAULT_SORT` | -| State modules | camelCase + `.svelte.ts` | `selection.svelte.ts` | -| Route params | kebab-case | `[run-uid]` | -| Utility files | kebab-case | `format.ts`, `debounce.ts` | - -### Import Order - -```typescript -// 1. SvelteKit / Svelte built-ins -import { goto } from '$app/navigation'; -import { page } from '$app/stores'; - -// 2. Third-party -import { ExternalLink } from 'lucide-svelte'; - -// 3. Internal — absolute ($lib/...) -import { api } from '$lib/api/client'; -import type { RunSummary } from '$lib/types'; - -// 4. Internal — relative -import RunRow from './RunRow.svelte'; -``` - -### File Structure - -``` -src/ - lib/ - api/ # typed fetch wrappers (client.ts, runs.ts, ...) - state/ # shared reactive state (*.svelte.ts) - utils/ # pure utilities (format.ts, debounce.ts, url.ts, cx.ts) - types/ # shared type definitions (index.ts) - components/ - ui/ # generic reusable (Badge, Pill, Modal, ...) - runs/ # feature-specific (RunTable, RunRow, RunDetail, ...) - trace/ # trace-specific (TraceWaterfall, SpanRow, ...) - routes/ - (app)/ # layout group with shared nav - runs/ - +page.svelte - +page.server.ts - [uid]/ - +page.svelte - +page.server.ts - api/ # API route handlers - runs/ - +server.ts - [uid]/ - +server.ts -``` - ---- - -## Anti-Patterns — Never Do These - -| Violation | Correct Alternative | -|---|---| -| `$:` reactive statements | `$derived(() => ...)` rune | -| `writable()` / `readable()` stores | `$state` in `.svelte.ts` module | -| `on:click=` event directive | `onclick=` attribute | -| `any` type | `unknown` + type narrowing | -| Arbitrary hex in `style=` | opsml-theme CSS variables via Tailwind | -| Dynamic partial class names `` `bg-${x}-500` `` | Full class string lookup map | -| Sequential `await` inside a single effect | `Promise.all` for parallel fetches | -| Un-keyed `{#each}` on mutable lists | `{#each items as item (item.uid)}` | -| Mutating props directly | Emit via callback prop; parent owns state | -| `document.querySelector` in components | Bind element with `bind:this` | -| `localStorage` for server-side-rendered routes | URL params or cookies via `locals` | -| Throwing raw errors from load functions | `throw error(status, message)` from `@sveltejs/kit` | diff --git a/.claude/skills/opsml-ui/SKILL.md b/.claude/skills/opsml-ui/SKILL.md index fc7c30218c..56b3a9b176 100644 --- a/.claude/skills/opsml-ui/SKILL.md +++ b/.claude/skills/opsml-ui/SKILL.md @@ -31,6 +31,7 @@ Follow these repo-specific rules: - Prefer server-backed pagination, filtering, sorting, and aggregation for large remote datasets. - Use `VirtualScroller.svelte` or `@tanstack/svelte-virtual` for large scrollable views. - Do not add new UI libraries unless the user explicitly asks. +- All functions, classes, methods, and components must have JSDoc comments. Use TSDoc tags like `@param`, `@returns`, `@throws`, and `@example` where relevant. When making changes, inspect in this order: 1. The route entrypoint (`+page.svelte`, `+page.ts`, `+page.server.ts`, `+layout.*`) diff --git a/.codex/skills/opsml-rust-python/SKILL.md b/.codex/skills/opsml-rust-python/SKILL.md new file mode 100644 index 0000000000..7f6db89ba3 --- /dev/null +++ b/.codex/skills/opsml-rust-python/SKILL.md @@ -0,0 +1,49 @@ +--- +name: opsml-rust-python +description: Repo-local OpsML skill for Rust core, Python bindings, PyO3, maturin, card/registry/server/client logic, Python API exports, generated stubs, and cross-language tests. Use when working in OpsML crates, `py-opsml`, PyO3-exposed types, Rust errors that cross Python, card or registry behavior, server/client contracts, or Python-visible SDK behavior. Do not use for Svelte UI work; use `opsml-ui` instead. +--- + +# OpsML Rust/Python + +Use this skill as the source of truth for OpsML work where Rust core logic is exposed to Python through PyO3. OpsML is not a generic Rust/Python package: cards are the central abstraction, Rust owns the business logic, and Python is a thin ergonomic API over the Rust core. + +Start by locating the layer you are changing: +- Rust core design, traits, ownership, performance, cloning, async, or crate-local API shape: read `references/rust-core.md`. +- Rust card, registry, storage, SQL, server, auth, events, or shared contracts: read `references/architecture.md`. +- PyO3 classes, `#[pymethods]`, GIL usage, nested `#[pyclass]` fields, or Python lifetimes: read `references/pyo3-boundaries.md`. +- Error types, `PyErr`, server envelopes, CLI errors, or Python exceptions: read `references/errors.md`. +- Python exports, `__all__`, generated stubs, maturin setup, or Python-visible SDK behavior: read `references/python-api-and-stubs.md`. +- Tests, linting, formatting, or command selection: read `references/testing-workflows.md`. +- Agent-readable APIs, structured errors, validation, lint sensors, or harness work: read `references/agent-harness.md`. + +Follow these repo-specific rules: +- Keep core behavior in Rust. Python should expose a typed, ergonomic API and small helpers, not duplicate card, registry, storage, or validation logic. +- Design Rust APIs around domain-owned data, precise traits, and explicit ownership before thinking about the Python binding. +- Keep Python lifetimes out of Rust-only code. Introduce `Python<'py>`, `Bound<'py, PyAny>`, `Py`, and `PyErr` only where code crosses the Python boundary. +- Do not store `PyErr` in reusable Rust error enums. Convert Python errors into string-backed Rust variants, then convert Rust errors back into Python exceptions at the PyO3 boundary. +- For `#[pyclass]` fields whose type is also `#[pyclass]`, do not use `#[pyo3(get, set)]`. Implement manual `#[getter]` and `#[setter]` methods with `IntoPyObjectExt` and `extract`. +- Prefer zero-cost Rust abstractions: enums with delegated trait impls, static dispatch, concrete types, precise errors, iterators, references, ownership transfer, and `Arc` only where shared state is real. +- Treat speculative `Clone`, broad abstractions, unnecessary allocation, and Python-driven core design as design smells. Add them only for concrete call sites. +- Make new Rust core logic testable without Python whenever possible. Add Python tests for Python-visible workflows. +- Write errors and API contracts so humans and coding agents can debug them: stable names, clear fields, concise messages, and actionable hints. +- Use repository workflow tooling from `mise.toml`. Do not invent ad hoc commands when a `mise run ...` task exists. +- Inspect current dependency versions in `Cargo.toml`, `py-opsml/pyproject.toml`, and lockfiles before relying on version-specific behavior. + +When making Rust/PyO3 changes, inspect in this order: +1. The Rust crate that owns the domain behavior. +2. Shared types in `opsml-types/src/contracts/` if the behavior crosses server/client/Python boundaries. +3. PyO3 module registration under `py-opsml/src/`. +4. Python package re-exports under `py-opsml/python/opsml/`. +5. Generated stubs under `py-opsml/python/opsml/_opsml.pyi` and `py-opsml/python/opsml/stubs/`. +6. Rust and Python tests that model the user journey. + +Use these verification commands when relevant: +- Rust formatting: `mise run format` +- Rust linting: `mise run lints` +- Targeted Rust tests: `cargo test -p -- --nocapture --test-threads=1` +- Full Rust aggregate when justified: `mise run test:unit` +- Rebuild Python bindings after PyO3-exposed Rust changes: `mise run py:setup` +- Python linting: `mise run py:lints` +- Python unit tests: `mise run py:test:unit` + +Prefer narrow, local edits that match existing OpsML patterns. Broaden the architecture only when the current domain boundary is truly wrong for the user workflow. diff --git a/.codex/skills/opsml-rust-python/references/agent-harness.md b/.codex/skills/opsml-rust-python/references/agent-harness.md new file mode 100644 index 0000000000..c42b2171dc --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/agent-harness.md @@ -0,0 +1,55 @@ +# Agent-Friendly OpsML Work + +OpsML is being shaped for both human developers and coding agents. The key direction is harness engineering: give agents strong guides before they act and strong sensors after they act. + +## Design For Agents And Humans + +Code should be readable by humans first and stable enough for agents to modify safely: +- Keep names domain-specific and unambiguous. +- Keep functions small enough that invariants are visible. +- Prefer typed contracts over loose dictionaries or stringly-typed conventions. +- Surface failures with stable codes, fields, hints, and docs where the repo supports them. +- Keep side effects at clear boundaries. + +## Same Envelope Principle + +Harness work should converge on the same structured shape across layers: +- HTTP responses. +- PyO3 exceptions. +- CLI output. +- `card.validate()`. +- `opsml lint`. +- Integrity checks. +- Eval results. + +Use fields such as: +- `code` +- `field` +- `hint` or `suggested_action` +- `doc_url` +- `retry` + +Agents should not need to parse paragraphs to understand what field to fix. + +## Validation And Sensors + +When adding governance behavior, think in layers: +- Edit-time or local lint sensors. +- Rust-native validation on core types. +- Registry/server chokepoints. +- Post-hoc integrity checks. +- Behavior evals for prompts and agents. + +The Rust core should own validations that define durable OpsML correctness. Python should expose them ergonomically and test them as user workflows. + +## Documentation Near APIs + +Public Rust and Python APIs should include useful docs when they define: +- User-visible behavior. +- Required invariants. +- Error conditions. +- Security constraints. +- Serialization formats. +- Cross-language boundary behavior. + +Do not add noisy comments that restate simple code. Add concise comments when they preserve hard-won context, such as why a Python lifetime is intentionally kept at the boundary. diff --git a/.codex/skills/opsml-rust-python/references/architecture.md b/.codex/skills/opsml-rust-python/references/architecture.md new file mode 100644 index 0000000000..0998b53ee7 --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/architecture.md @@ -0,0 +1,73 @@ +# OpsML Rust/Python Architecture + +OpsML is an AI lifecycle platform organized around cards: versioned, encrypted, registry-tracked records for data, models, experiments, prompts, services, agents, and skills. Python users create and operate cards, but the durable behavior belongs in Rust. + +## Core Rule + +Rust is the source of truth for: +- Card structure and validation. +- Registry behavior. +- Server contracts and route behavior. +- Storage, SQL, encryption, auth, events, and versioning. +- Serialization formats that persist or cross process boundaries. + +Python should provide: +- Ergonomic constructors and usage patterns. +- Thin re-exports from `_opsml`. +- Small Python-only helpers where Python libraries are the natural boundary. +- User-journey tests for public Python behavior. + +Do not implement durable business logic twice in Rust and Python. If Python and Rust disagree, the design is already drifting. + +## Important Crates + +- `opsml-cards`: PyO3 card structs and card-specific behavior. +- `opsml-registry`: Python-facing `CardRegistry`; dispatches to local or server-backed operations. +- `opsml-types`: shared contract types, enums, and request/response shapes. +- `opsml-server`: Axum routes, middleware, API handlers, server errors. +- `opsml-client`: Rust HTTP client used by Python bindings in server mode. +- `opsml-sql`: database abstraction over SQLite, PostgreSQL, and MySQL. +- `opsml-storage`: storage abstraction over local and cloud backends. +- `opsml-crypt`: artifact encryption. +- `opsml-experiment`, `opsml-genai`, `opsml-service`: domain-specific card logic. +- `py-opsml`: Python package and PyO3 extension wiring. + +Read `AGENTS.md` for the full crate map before changing a cross-cutting path. + +## Enum-Based Backends + +OpsML favors enum dispatch for core backends: +- `StorageClientEnum` delegates `StorageClient` methods to local/S3/GCS/Azure variants. +- `SqlClientEnum` delegates SQL/card logic to SQLite/PostgreSQL/MySQL variants. + +When adding a backend or domain variant, follow this pattern before reaching for `Box`. + +## Contracts And Routes + +Shared request/response types belong in `opsml-types/src/contracts/`. + +Server routes live under `/opsml/api` and follow the existing handler shape: +- `State>` for dependencies. +- `Extension` for protected routes, even read-only routes. +- `Query(...)` or `Json(...)` for inputs. +- `Result, (StatusCode, Json)>` or an established local equivalent. + +Use `parse_qs_query::(&uri)` for query strings containing `Vec`. + +## Registry Modes + +`CardRegistry` supports: +- Local mode: direct filesystem/SQLite-backed registry operations. +- Server mode: HTTP proxy through `opsml-client`. + +New behavior should preserve both modes unless the feature is explicitly server-only. A change that only works through the Python package but not through Rust registry/server paths is usually in the wrong layer. + +## Artifact Encryption + +Card artifacts are encrypted before storage. Do not bypass: +- `create_artifact_key()` +- `create_and_store_encrypted_file()` +- `download_artifact()` plus decryption +- `ArtifactKey` as the database source of truth + +Security-sensitive changes need targeted tests around key lookup, upload/download paths, and error behavior. diff --git a/.codex/skills/opsml-rust-python/references/errors.md b/.codex/skills/opsml-rust-python/references/errors.md new file mode 100644 index 0000000000..ed1c7a8858 --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/errors.md @@ -0,0 +1,84 @@ +# Errors Across Rust And Python + +OpsML errors should be clear enough for humans and structured enough for agents. They should name what failed, where possible include the affected field/resource, and preserve enough context to fix the issue without parsing vague prose. + +## No Stored `PyErr` + +Do not store `PyErr` inside reusable Rust error enums. `PyErr` can pull Python runtime/lifetime concerns into pure Rust code and cause C linker or GIL-related failures in Rust tests. + +Use the canonical pattern in `crates/opsml_cards/src/skill/error.rs`: +- Rust error variants store Rust-owned data such as `String`. +- `From for SkillError` converts to a string-backed variant. +- `From for PyErr` exists only behind the Python feature and maps to a Python exception at the boundary. + +Preferred shape: + +```rust +#[derive(thiserror::Error, Debug)] +pub enum DomainError { + #[error("{0}")] + Error(String), + + #[error(transparent)] + Io(#[from] std::io::Error), +} + +#[cfg(feature = "python")] +impl From for DomainError { + fn from(err: PyErr) -> Self { + DomainError::Error(err.to_string()) + } +} + +#[cfg(feature = "python")] +impl From for PyErr { + fn from(err: DomainError) -> PyErr { + pyo3::exceptions::PyRuntimeError::new_err(err.to_string()) + } +} +``` + +Avoid this in core errors: + +```rust +#[error(transparent)] +Python(#[from] PyErr) +``` + +Only use direct Python error storage in code that is permanently Python-only and cannot be reached by Rust tests or Rust core logic. That should be rare in OpsML. + +## Transitive Error Chains + +The rule applies transitively. If `CardError` wraps `ModelInterfaceError`, and `ModelInterfaceError` stores `PyErr`, then `CardError` is contaminated too. + +When adding `#[from]` variants, inspect wrapped errors for PyO3 types. Prefer converting upstream errors to string-backed variants at the boundary. + +## Server Error Envelope + +Server handlers should use the existing `OpsmlServerError` helpers and structured fields where available. For new agent-facing or validation work, prefer stable data: +- `code` +- `field` +- `suggested_action` or hint +- `doc_url` +- `retry` + +Prefer one stable error shape across HTTP, PyO3, CLI, lint output, validation, and eval. A Python caller, CLI user, UI route, and coding agent should be able to recognize the same failure without parsing unrelated prose. + +## Human And Agent Debuggability + +Error messages should: +- Name the operation that failed. +- Include the resource identifier when safe, such as card UID, space/name/version, file path, or field. +- Avoid generic messages like "invalid input" when the failing field is known. +- Avoid logging secrets, tokens, encryption keys, or provider credentials. +- Keep wording concise and stable enough for tests and agents. + +## Mapping To Python Exceptions + +Choose Python exception types deliberately: +- Invalid user input: `PyValueError`. +- Missing key or field: `PyKeyError` or `PyValueError`, depending on existing local style. +- Filesystem/IO: `PyOSError`. +- Runtime integration failure: `PyRuntimeError`. + +Follow nearby mappings in the same crate before introducing a new exception style. diff --git a/.codex/skills/opsml-rust-python/references/pyo3-boundaries.md b/.codex/skills/opsml-rust-python/references/pyo3-boundaries.md new file mode 100644 index 0000000000..80a90b0705 --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/pyo3-boundaries.md @@ -0,0 +1,89 @@ +# PyO3 Boundary Rules + +PyO3 code is a boundary layer. Keep Python lifetimes, `PyErr`, and object extraction at the edge so Rust core code remains testable with normal Rust tests. + +## Boundary Types + +Use these types only when the code is actually crossing into Python: +- `Python<'py>` +- `Bound<'py, PyAny>` +- `Py` +- `PyErr` +- `#[pyclass]`, `#[pymethods]`, `#[pyfunction]` + +Pure Rust functions should generally accept and return Rust types, not Python-bound objects. + +## Constructors + +For Python constructors, keep extraction and conversion near `#[new]`, then call a Rust-native constructor such as `new_rs`, `from_config`, or a domain-specific builder. + +Preferred shape: + +```rust +#[cfg(feature = "python")] +#[pymethods] +impl SkillCard { + #[new] + pub fn new(skill: &Bound<'_, PyAny>, space: Option<&str>) -> Result { + let skill = skill.extract::()?; + Self::new_rs(skill, space, None, None, None, None, None, None) + } +} +``` + +Keep the Rust-native constructor usable without Python. + +## Nested `#[pyclass]` Fields + +If a `#[pyclass]` struct has a field whose type is itself a `#[pyclass]`, do not put `#[pyo3(get, set)]` on that field. PyO3-generated accessors can leak Python lifetimes into pure Rust call sites and tests. + +Use the canonical pattern in `crates/opsml_cards/src/skill/card.rs`: +- `skill` field getter at `SkillCard::skill`. +- `set_skill` setter using `extract`. +- `dependencies` getter using `IntoPyObjectExt`. +- `set_dependencies` setter using `extract::>()`. + +Preferred shape: + +```rust +#[getter] +pub fn skill<'py>(&self, py: Python<'py>) -> Result, SkillError> { + Ok(self.skill.clone().into_bound_py_any(py)?) +} + +#[setter] +pub fn set_skill(&mut self, skill: &Bound<'_, PyAny>) -> Result<(), SkillError> { + self.skill = skill.extract::()?; + Ok(()) +} +``` + +## Python Objects In Cards + +Some cards hold Python-owned objects, such as model or data interfaces. Keep GIL acquisition scoped tightly: +- Acquire the GIL only where calling Python methods or extracting Python objects. +- Convert Python-side data into Rust metadata before serialization. +- Do not attempt to serialize `Py` directly. +- Reconstruct Python-facing objects only at load/deserialization boundaries where the Python API needs them. + +## Feature Gates + +Respect existing `python` and `server` feature gates. If a Rust unit test fails with Python linking or libpython errors, inspect for leaked PyO3 types in core code or transitive error chains. + +Common causes: +- `PyErr` stored in a reusable error enum. +- `#[pyo3(get, set)]` on nested `#[pyclass]` fields. +- Python-only imports not guarded with `#[cfg(feature = "python")]`. + +## Module Registration + +New Python-visible Rust functions/classes must be wired through `py-opsml/src/lib.rs` and the appropriate submodule registration function, such as: +- `card::add_card_module(m)?` +- `data::add_data_module(m)?` +- `model::add_model_module(m)?` +- `experiment::add_experiment_module(m)?` +- `agent::add_agent_module(m)?` +- `service::add_service_module(m)?` +- `types::add_types_module(m)?` + +Do not stop after adding `#[pyclass]`; registration, Python exports, stubs, and tests are part of the public API surface. diff --git a/.codex/skills/opsml-rust-python/references/python-api-and-stubs.md b/.codex/skills/opsml-rust-python/references/python-api-and-stubs.md new file mode 100644 index 0000000000..2244c9a0d0 --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/python-api-and-stubs.md @@ -0,0 +1,59 @@ +# Python API And Stubs + +The Python package is the user-facing SDK, but the Rust extension owns the core behavior. Python additions should be thin, typed, and tested through user workflows. + +## Public API Wiring + +For new Python-visible Rust API, check every layer: +1. Rust type/function exists in the owning crate. +2. PyO3 class/function is registered in the appropriate `py-opsml/src/*` module. +3. The module is attached from `py-opsml/src/lib.rs`. +4. Python package exports are updated under `py-opsml/python/opsml/`. +5. `__all__` is updated where the package uses it. +6. Stubs are regenerated or updated through the repo workflow. +7. Python tests cover the user-visible behavior. + +Do not leave a type reachable only through `_opsml` unless that is already the local convention for the feature. + +## Stub Workflow + +Generated stubs live under: +- `py-opsml/python/opsml/_opsml.pyi` +- `py-opsml/python/opsml/stubs/` + +Use the repo task: +- `mise run py:setup` + +This task runs stub generation, syncs Python dependencies, and builds the Rust extension through maturin. Run it after Rust changes that affect Python-exposed classes, functions, signatures, docs, or enums. + +## Python Package Style + +Python code should: +- Keep durable business behavior in Rust. +- Use typed, ergonomic public APIs. +- Add docstrings to public functions/classes when intent, side effects, errors, or examples are not obvious. +- Avoid loose `Any` unless the Python boundary is truly dynamic. +- Follow existing import, export, and test conventions in `py-opsml/python/opsml/`. + +When behavior is Python-only because it depends on a Python ecosystem object, keep the boundary explicit. Examples include Python model/data interfaces where user subclasses implement `save`, `load`, or prediction behavior. + +## Python Tests + +Python tests should model how a real user interacts with OpsML: +- Construct the Python-facing object. +- Register or load through `CardRegistry` when relevant. +- Assert stable behavior and error messages. +- Use fixtures such as `mock_db`, `pandas_data`, or `random_forest_classifier` where appropriate. + +Do not test generated implementation details when a user workflow test would prove the contract better. + +## Maturin And Versions + +Do not rely on remembered PyO3 or maturin versions. Inspect: +- Root `Cargo.toml`. +- `py-opsml/Cargo.toml`. +- `py-opsml/pyproject.toml`. +- Lockfiles. +- `mise.toml`. + +Version-specific PyO3 patterns should match the versions currently pinned by the repository. diff --git a/.codex/skills/opsml-rust-python/references/rust-core.md b/.codex/skills/opsml-rust-python/references/rust-core.md new file mode 100644 index 0000000000..d1268ecd6b --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/rust-core.md @@ -0,0 +1,101 @@ +# Rust Core Practices + +OpsML's Python API is only as good as the Rust core underneath it. Design Rust code first as a clean, testable library; then expose the right boundary to Python. + +## API Shape + +Prefer APIs that make invalid states difficult to represent: +- Use domain types instead of raw strings or loose maps for durable concepts. +- Use enums for closed sets of states, backends, card kinds, operations, and variants. +- Use structs with named fields for meaningful records. +- Use traits for behavior shared across real implementations, not for one call site. +- Keep public functions explicit about inputs, outputs, and error behavior. + +Do not shape core Rust APIs around what is easiest to extract from `PyAny`. Convert Python inputs at the boundary, then call Rust-native functions. + +## Zero-Cost Abstractions + +Prefer abstractions that compile down to direct code: +- Generic functions with trait bounds when the caller can be monomorphized. +- Enum dispatch for known backend variants, matching existing `StorageClientEnum` and `SqlClientEnum` patterns. +- Iterators instead of building intermediate vectors. +- Borrowed data such as `&str`, `&Path`, and `&[T]` when ownership is not needed. +- `Cow<'_, str>` only when both borrowed and owned paths are real and the complexity pays for itself. + +Use `Box` only when runtime extensibility is required and the allocation/dynamic dispatch is an intentional tradeoff. + +## Ownership And Cloning + +Treat `.clone()` as a question, not a reflex: +- Prefer borrowing when the callee does not need ownership. +- Prefer moving values when the current scope no longer needs them. +- Use `Arc` for real shared ownership across tasks, state, or handlers. +- Avoid `Arc>` as a default; first ask whether ownership, message passing, a narrower lock, or immutable state is enough. +- Do not derive `Clone` speculatively. Derive it only when concrete call sites need it. + +Common acceptable clones: +- Small identifiers or config values at API boundaries where ownership is clearer than lifetimes. +- `Arc::clone` for shared application state. +- Data copied into an owned response type. + +Common suspicious clones: +- Large vectors, maps, schemas, payloads, or serialized values in loops. +- Cloning to work around a borrow caused by overly broad scopes. +- Cloning Python objects or PyO3 wrappers without a clear GIL/boundary reason. + +## Allocation And Strings + +Avoid accidental allocation in hot or repeated paths: +- Use `String::with_capacity` when building a known-size string. +- Use `Vec::with_capacity` when the item count is known or cheaply estimated. +- Use `write!` into an existing `String` instead of repeated `format!` in loops. +- Avoid serializing/deserializing just to move data between Rust layers. +- Keep JSON conversion at API, storage, or Python boundaries unless the core domain truly stores JSON. + +Do not micro-optimize code that is not on a meaningful path. Prefer clear algorithms first, then optimize measured bottlenecks. + +## Traits + +Use traits when they express a stable capability: +- Storage behavior. +- SQL/card persistence behavior. +- Card lifecycle behavior. +- Interface behavior with multiple implementations. + +Keep traits small enough to implement correctly. Avoid kitchen-sink traits that force unrelated implementations to carry meaningless methods. + +Prefer associated types or generics when they simplify call sites. Avoid clever trait machinery when an enum, function, or concrete type would be easier to read and test. + +## Async And Shared State + +Use async for I/O boundaries: HTTP, database, storage, network calls, and server handlers. Keep pure computation synchronous unless the caller requires async. + +For shared server state: +- Put heavy shared dependencies in `AppState`. +- Wrap shared backend clients in `Arc` as established by the repo. +- Avoid cloning heavy clients or rebuilding pools per request. +- Keep locks out of request hot paths where possible. + +Use timeouts, cancellation-aware APIs, and bounded concurrency for external calls when the surrounding code already supports those patterns. + +## Error Design + +Rust errors should be precise and useful before they become Python exceptions: +- Use `thiserror` enums for domain errors. +- Include the failing operation and field/resource where possible. +- Preserve source errors with `#[from]` only when the wrapped error is safe for Rust-only code. +- Avoid `anyhow` in library surfaces unless the crate already uses it for an internal boundary. + +For errors crossing Python, follow `references/errors.md`. + +## Testing Rust Core + +New core logic should have Rust tests that do not require Python unless the behavior is inherently Python-facing. + +Good Rust tests: +- Exercise domain behavior through public or crate-visible APIs. +- Cover success, edge cases, and stable failures. +- Use local fixtures and mocks instead of external services. +- Keep SQLite/server tests isolated with `--test-threads=1` where the repo requires it. + +If a test requires Python for non-Python behavior, inspect the design for leaked PyO3 types or a misplaced boundary. diff --git a/.codex/skills/opsml-rust-python/references/testing-workflows.md b/.codex/skills/opsml-rust-python/references/testing-workflows.md new file mode 100644 index 0000000000..38ab3716a1 --- /dev/null +++ b/.codex/skills/opsml-rust-python/references/testing-workflows.md @@ -0,0 +1,64 @@ +# Testing And Workflow Commands + +Use `mise` as the default command surface. The tasks encode environment variables, working directories, feature flags, and server setup. + +## Default Commands + +Rust: +- Format: `mise run format` +- Lint: `mise run lints` +- Full Rust aggregate when justified: `mise run test:unit` +- Targeted crate tests: `cargo test -p -- --nocapture --test-threads=1` + +Python: +- Build stubs and extension: `mise run py:setup` +- Format: `mise run py:format` +- Lint/type checks: `mise run py:lints` +- CI lint gate: `mise run py:lints-ci` +- Unit tests: `mise run py:test:unit` +- Service tests: `mise run py:test:service` +- Integration tests: `mise run py:test:integration` + +Frontend work belongs to `opsml-ui`, not this skill. + +## Command Selection + +Use targeted tests first: +- Changed a single Rust crate: run that crate's focused tests. +- Changed SQL behavior: use the relevant `test:sql-*` task. +- Changed server handlers: use targeted `opsml-server` tests with `--test-threads=1`. +- Changed PyO3-exposed API: run `mise run py:setup`, then targeted Python tests or `mise run py:test:unit`. +- Changed Python-only public API: run targeted pytest, then `mise run py:lints` if practical. + +Broaden to aggregate tasks when the change crosses boundaries or before final handoff on high-risk work. + +## Server Test Caution + +Do not run all `opsml_server` tests casually. The repo guidance warns that broad failures can leave stale state. Prefer isolated tests and cleanup. Use `TestHelper::new(None)` patterns and `helper.cleanup()` where applicable. + +Canonical Rust server tests: +- Use `TestHelper::new(None)`. +- Send requests through `helper.send_oneshot(request)` so auth headers are added. +- Use `retry_flaky_test!` for transient SQLite contention. +- Run with `--test-threads=1`. +- Mock Scouter and SSO; do not require live external services. + +## PyO3 Change Checklist + +After changing a Python-exposed Rust type: +1. Confirm Rust core behavior is tested without Python where possible. +2. Confirm PyO3 registration is complete. +3. Run `mise run py:setup`. +4. Confirm imports work from `opsml`, not only from `_opsml`. +5. Add or update Python tests for the user workflow. +6. Run targeted Python tests or `mise run py:test:unit`. + +## Test Philosophy + +Tests should prove user journeys and stable contracts: +- Card creation, serialization, registration, retrieval, and loading. +- Local and server-backed registry behavior where both apply. +- Structured error behavior for invalid inputs. +- Python-visible behavior from the public `opsml` package. + +Avoid tests that mirror private implementation details. Add regression tests when fixing a bug. diff --git a/.codex/skills/opsml-ui/SKILL.md b/.codex/skills/opsml-ui/SKILL.md index fc7c30218c..fc2e56ac1b 100644 --- a/.codex/skills/opsml-ui/SKILL.md +++ b/.codex/skills/opsml-ui/SKILL.md @@ -31,6 +31,11 @@ Follow these repo-specific rules: - Prefer server-backed pagination, filtering, sorting, and aggregation for large remote datasets. - Use `VirtualScroller.svelte` or `@tanstack/svelte-virtual` for large scrollable views. - Do not add new UI libraries unless the user explicitly asks. +- All functions, classes, methods, and components must have JSDoc comments. Use TSDoc tags like `@param`, `@returns`, `@throws`, and `@example` where relevant. + +When debugging, use the browser devtools, Svelte devtools, and Rust logging as your primary tools. Add temporary logging or breakpoints in the Svelte code or Rust backend as needed, but remove them before submitting the PR. + +When testing, prefer automated tests with Vitest and Svelte Testing Library. Add tests for new features and bug fixes, and consider adding tests for complex existing features that lack coverage. Use manual testing and build checks for visual polish, accessibility, performance, and exploratory testing of complex interactions. When making changes, inspect in this order: 1. The route entrypoint (`+page.svelte`, `+page.ts`, `+page.server.ts`, `+layout.*`) diff --git a/AGENTS.md b/AGENTS.md index 66ca56c6b4..2843154251 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -428,6 +428,20 @@ Fixtures in `tests/conftest.py`: Test markers: `@pytest.mark.tensorflow` gates TF-specific tests; `mise run py:test:unit` excludes them by default. +### ADK agent harness + +The offline Google ADK harness lives under `py-opsml/dev/integration/agent/`. +Use it to verify agent tracing and attached eval behavior without a live Scouter: + +```bash +mise run py:setup-adk +mise run dev:e2e:smoke:offline +``` + +For live E2E runs, keep a sibling Scouter checkout at `../scouter` or set +`SCOUTER_DIR=/path/to/scouter`, then run `mise run dev:e2e:start:servers` +before `mise run dev:e2e:agent`. + --- ## Feature Flags diff --git a/CLAUDE.md b/CLAUDE.md index f7fca0953c..9aa330bf2e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,14 +2,4 @@ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. -@AGENTS.md - -## Claude Code Notes - -- After modifying any Rust type exposed to Python, run the `pyo3-checklist` skill to verify all 6 wiring layers are complete (Rust impl → re-export → PyO3 registration → `__init__.py` → `__all__` → `.pyi` stub). -- Use the `rust-python` skill when working with PyO3 bindings or the Rust↔Python boundary. -- Use the `opsml-ui` skill when building, editing, or designing any UI component, page, layout, or dashboard. -- Use the `opsml-ts-svelte` skill when writing TypeScript logic, Svelte 5 components, or SvelteKit routing/data loading. -- Use the `agentic-architect` skill when modifying GenAI features, LLM client code, tool-calling logic, or anything in `opsml-mcp` / `opsml-genai`. -- Run the `pre-pr` skill before creating any pull request. -- Run the `review` skill for a full parallel code review (security, style, bugs, frontend) before merging. +@AGENTS.md \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index e664b62ec9..0000000000 --- a/Makefile +++ /dev/null @@ -1,232 +0,0 @@ -.PHONY: format -format: - cargo fmt --all - -.PHONY: lints -lints: - cargo clippy --workspace --all-targets -- -D warnings - -lints.pedantic: - cargo clippy --workspace --all-targets -- -D warnings -W clippy::pedantic -A clippy::must_use_candidate -A clippy::missing_errors_doc - -####### TOML tests -.PHONY: test.toml -test.toml: - cargo test -p opsml-toml -- --nocapture --test-threads=1 - -####### CLI tests -.PHONY: test.cli -test.cli: - cargo test -p opsml-cli -- --nocapture --test-threads=1 - -####### SQL tests -.PHONY: test.sql.sqlite -test.sql.sqlite: - cargo test -p opsml-sql test_sqlite -- --nocapture --test-threads=1 - -.PHONY: test.sql.enum -test.sql.enum: - cargo test -p opsml-sql test_enum -- --nocapture --test-threads=1 - -.PHONY: build.postgres -build.postgres: - docker compose down --volumes - docker compose up -d --build postgres --wait - -.PHONY: test.sql.postgres -test.sql.postgres: build.postgres - cargo test -p opsml-sql test_postgres -- --nocapture --test-threads=1 - -.PHONY: build.mysql -build.mysql: - docker compose down --volumes - docker compose up -d --build mysql --wait - -.PHONY: test.sql.mysql -test.sql.mysql: build.mysql - cargo test -p opsml-sql test_mysql -- --nocapture --test-threads=1 - docker compose down --volumes - -.PHONY: test.sql -test.sql: test.sql.sqlite test.sql.enum test.sql.postgres test.sql.mysql - -test.service: - cargo test -p opsml-service -- --nocapture --test-threads=1 - -######## Storage tests - -.PHONY: test.storage.local.server -test.storage.local.server: - cargo test -p opsml-storage test_local_storage_server -- --nocapture --test-threads 1 - -######## Collective Unit Tests -##.PHONY: test.unit -##test.unit: test.toml test.cli test.sql test.storage.server test.utils - -######## Server tests - -.PHONY: start.server -start.server: stop.server build.ui - cargo build -p opsml-server - ./target/debug/opsml-server - -start.server.background: stop.server build.ui - cargo build -p opsml-server - ./target/debug/opsml-server & - -.PHONY: stop.server -stop.server: - -lsof -ti:3000 | xargs kill -9 2>/dev/null || true -# rm -f opsml.db || true -# rm -rf opsml_registries || true - -######## Storage tests -.PHONY: test.storage.client -test.storage.client: - cargo test -p opsml-storage test_local_storage_client -- --nocapture - -.PHONY: test.storage.server -test.storage.server: - cargo test -p opsml-storage test_local_storage_server -- --nocapture --test-threads 1 - - -.PHONY: test.utils -test.utils: - cargo test -p opsml-utils -- --nocapture - -.PHONY: test.opsml.server -test.server: - cargo test -p opsml-server test_opsml_server -- --nocapture --test-threads=1 - -.PHONY: test.opsml.registry.client -test.opsml.registry.client: - cargo test --features server -p opsml-registry test_registry_client -- --nocapture --test-threads=1 - -.PHONY: test.version -test.version: - cargo test -p opsml-version -- --nocapture --test-threads=1 - -.PHONY: test.unit -test.unit: test.toml test.cli test.sql test.storage.server test.server test.utils test.version - -###### UI ###### -UI_DIR = crates/opsml_server/opsml_ui -PY_DIR = py-opsml - -ui.update.deps: - cd $(UI_DIR) && pnpm update - -.PHONY: ui.install.deps -install.ui.deps: - cd $(UI_DIR) && pnpm install - -.PHONY: ui.install.deps.prod -install.ui.deps.prod: - # remove existing node_modules - rm -rf $(UI_DIR)/node_modules - # install only production dependencies - cd $(UI_DIR) && pnpm install --prod - -.PHONY: build.ui -build.ui: - cd $(UI_DIR) && pnpm install - cd $(UI_DIR) && pnpm build - -ui.dev: - cd $(UI_DIR) && pnpm run dev - -populate.cards: - cd $(PY_DIR) && uv run python -m dev.populate_cards - - -.PHONY: changelog -prepend.changelog: - # get version from Cargo.toml - @VERSION=$(shell grep '^version =' Cargo.toml | cut -d '"' -f 2) && \ - git cliff --unreleased --tag $$VERSION --prepend CHANGELOG.md - - -###### Development & Production - Separate Servers ###### -.PHONY: dev.backend -dev.backend: - cargo build -p opsml-server - OPSML_SERVER_PORT=8080 ./target/debug/opsml-server - -.PHONY: dev.frontend -dev.frontend: - cd $(UI_DIR) && pnpm run dev - -.PHONY: build.backend -build.backend: - cargo build -p opsml-server --target - -.PHONY: start.backend -start.backend: build.backend - OPSML_SERVER_PORT=8080 ./target/release/opsml-server - -.PHONY: start.frontend -start.frontend: build.ui - cd $(UI_DIR) && node build/index.js - -.PHONY: dev.both -dev.both: - @echo "Starting both servers in development mode..." - @echo "Backend API: http://localhost:8080" - @echo "Frontend SSR: http://localhost:3000" - @make -j2 dev.backend dev.frontend - -.PHONY: start.both -start.both: - @echo "Starting both servers in production mode..." - @echo "Backend API: http://localhost:8080" - @echo "Frontend SSR: http://localhost:3000" - @make -j2 dev.backend start.frontend - -.PHONY: stop.both -stop.both: - -lsof -ti:3000 | xargs kill -9 2>/dev/null || true - -lsof -ti:8080 | xargs kill -9 2>/dev/null || true - - -###### Scouter Integration -###### Service B (Scouter) is expected to run independently on port 8000. -###### Service A (opsml) exposes its backend on 8090 and frontend on 3000. -###### Stopping Service B will NOT affect Service A — opsml degrades gracefully -###### (Scouter-backed features show as unavailable; core UI keeps working). -###### -###### Usage: -###### Terminal 1: make dev.both.scouter (starts opsml backend + frontend) -###### Terminal 2: cd && make start.server -###### To stop opsml only: make stop.both.scouter - -.PHONY: dev.backend.scouter -dev.backend.scouter: - cargo build -p opsml-server - OPSML_SERVER_PORT=8090 SCOUTER_SERVER_URI=http://localhost:8000 ./target/debug/opsml-server - -.PHONY: dev.both.scouter -dev.both.scouter: stop.both.scouter - @echo "Building opsml backend..." - @cargo build -p opsml-server - @echo "Starting backend on port 8090 (Scouter integration: http://localhost:8000)" - @echo "Starting frontend on port 3000" - @# Run backend as an independent background process (separate process group so - @# Ctrl-C on the frontend does not propagate to it). - @OPSML_SERVER_PORT=8090 SCOUTER_SERVER_URI=http://localhost:8000 \ - nohup ./target/debug/opsml-server > /tmp/opsml-backend.log 2>&1 & \ - echo $$! > /tmp/opsml-backend.pid && \ - echo "Backend PID: $$(cat /tmp/opsml-backend.pid) — logs: /tmp/opsml-backend.log" - @# Frontend runs in the foreground; kill it with Ctrl-C when done. - cd $(UI_DIR) && OPSML_SERVER_PORT=8090 pnpm run dev - -.PHONY: stop.both.scouter -stop.both.scouter: - @# Kill by saved PID first (clean shutdown), then fall back to port scan. - -[ -f /tmp/opsml-backend.pid ] && kill $$(cat /tmp/opsml-backend.pid) 2>/dev/null || true - -rm -f /tmp/opsml-backend.pid - -lsof -ti:8090 | xargs kill -9 2>/dev/null || true - -lsof -ti:3000 | xargs kill -9 2>/dev/null || true - -.PHONY: logs.backend.scouter -logs.backend.scouter: - tail -f /tmp/opsml-backend.log \ No newline at end of file diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalDashboard.svelte b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalDashboard.svelte index d34b85e71f..2c375f138b 100644 --- a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalDashboard.svelte +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalDashboard.svelte @@ -37,6 +37,8 @@ Loader2, TrendingUp, KeySquare } from 'lucide-svelte'; + type PageDirection = 'next' | 'previous'; + interface Props { agentName: string; agentVersion: string; @@ -93,55 +95,82 @@ } catch (err) { console.error('[AgentEvalDashboard] Refresh failed:', err); } finally { + evalData = [...evalData]; isRefreshing = false; resetRecordTraceMap(); timeRangeState.endRefresh(); } } - /** Advance the record page for all evals that have a cursor in that direction. */ - async function handleRecordPageChange(direction: string) { + /** + * Clone a successful eval entry after refresh so derived merged pages see + * fresh object identities instead of relying on nested `selectedData` + * mutation inside `refreshAgentMonitoringData`. + */ + function cloneEvalEntry(e: AgentPromptEvalData): AgentPromptEvalData { + if (e.monitoringData.status !== 'success') return e; + return { + ...e, + monitoringData: { + ...e.monitoringData, + selectedData: { ...e.monitoringData.selectedData }, + }, + }; + } + + /** + * Advance the record page for every prompt eval with a cursor in the + * requested direction. Evals without that cursor remain on their current page. + */ + async function handleRecordPageChange(direction: PageDirection) { isRefreshing = true; timeRangeState.beginRefresh(); try { - await Promise.all( + const nextEvalData = await Promise.all( evalData.map(async (e) => { - if (e.monitoringData.status !== 'success') return; + if (e.monitoringData.status !== 'success') return e; const page = e.monitoringData.selectedData.records; const canPage = direction === 'next' ? page?.has_next : page?.has_previous; const cursor = direction === 'next' ? page?.next_cursor : page?.previous_cursor; - if (!canPage || !cursor) return; + if (!canPage || !cursor) return e; await refreshAgentMonitoringData(fetch, e.monitoringData, { recordCursor: { cursor, direction }, }); + return cloneEvalEntry(e); }) ); + evalData = nextEvalData; } catch (err) { console.error('[AgentEvalDashboard] Record page change failed:', err); } finally { isRefreshing = false; - appendToRecordTraceMap(); + resetRecordTraceMap(); timeRangeState.endRefresh(); } } - /** Advance the workflow page for all evals that have a cursor in that direction. */ - async function handleWorkflowPageChange(direction: string) { + /** + * Advance the workflow page for every prompt eval with a cursor in the + * requested direction. Record pagination state is intentionally untouched. + */ + async function handleWorkflowPageChange(direction: PageDirection) { isRefreshing = true; timeRangeState.beginRefresh(); try { - await Promise.all( + const nextEvalData = await Promise.all( evalData.map(async (e) => { - if (e.monitoringData.status !== 'success') return; + if (e.monitoringData.status !== 'success') return e; const page = e.monitoringData.selectedData.workflows; const canPage = direction === 'next' ? page?.has_next : page?.has_previous; const cursor = direction === 'next' ? page?.next_cursor : page?.previous_cursor; - if (!canPage || !cursor) return; + if (!canPage || !cursor) return e; await refreshAgentMonitoringData(fetch, e.monitoringData, { workflowCursor: { cursor, direction }, }); + return cloneEvalEntry(e); }) ); + evalData = nextEvalData; } catch (err) { console.error('[AgentEvalDashboard] Workflow page change failed:', err); } finally { diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalRecordTable.svelte b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalRecordTable.svelte index 7077004de9..25a637c389 100644 --- a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalRecordTable.svelte +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalRecordTable.svelte @@ -15,6 +15,8 @@ import { ArrowLeft, ArrowRight } from 'lucide-svelte'; import EvalRecordSideBar from '$lib/components/scouter/agent/record/EvalRecordSideBar.svelte'; + type PageDirection = 'next' | 'previous'; + let { records, hasNext, @@ -25,7 +27,7 @@ records: RecordWithAgent[]; hasNext: boolean; hasPrevious: boolean; - onPageChange: (direction: string) => void; + onPageChange: (direction: PageDirection) => void | Promise; isRefreshing?: boolean; } = $props(); @@ -72,12 +74,23 @@ isSelected = false; } + /** + * Guards the record pagination callback so disabled controls cannot emit + * stale page requests while a refresh is already in flight. + */ + async function requestPage(direction: PageDirection) { + if (isRefreshing) return; + if (direction === 'next' && !hasNext) return; + if (direction === 'previous' && !hasPrevious) return; + await onPageChange(direction); + } + // Prompt column prepended before ID; 1fr on Entity Type consumes whitespace. // min-w-[1050px] = EvalRecordTable's 900px + ~150px for the Prompt column. const gridLayout = "grid-template-columns: 140px 80px 140px 100px 1fr 140px 140px 100px;"; -
+
@@ -176,16 +189,20 @@ {#if records.length > 0}
diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalWorkflowTable.svelte b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalWorkflowTable.svelte index 149cc4c94d..ccb9f363c4 100644 --- a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalWorkflowTable.svelte +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/AgentEvalWorkflowTable.svelte @@ -11,6 +11,8 @@ import { ArrowLeft, ArrowRight, ExternalLink } from 'lucide-svelte'; import AgentEvalWorkflowSideBar from '$lib/components/scouter/agent/workflow/AgentEvalWorkflowSideBar.svelte'; + type PageDirection = 'next' | 'previous'; + let { workflows, hasNext, @@ -21,7 +23,7 @@ workflows: WorkflowWithAgent[]; hasNext: boolean; hasPrevious: boolean; - onPageChange: (direction: string) => void; + onPageChange: (direction: PageDirection) => void | Promise; isRefreshing?: boolean; } = $props(); @@ -61,11 +63,22 @@ isSelected = false; } + /** + * Guards the workflow pagination callback so disabled controls cannot emit + * stale page requests while a refresh is already in flight. + */ + async function requestPage(direction: PageDirection) { + if (isRefreshing) return; + if (direction === 'next' && !hasNext) return; + if (direction === 'previous' && !hasPrevious) return; + await onPageChange(direction); + } + // Prompt + Nav columns added before ID; 1fr on Record UID consumes whitespace const gridLayout = "grid-template-columns: 120px 50px 60px 140px 100px 80px 80px 80px 100px 1fr;"; -
+
@@ -166,16 +179,20 @@ {#if workflows.length > 0}
diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalDashboard.pagination.test.ts b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalDashboard.pagination.test.ts new file mode 100644 index 0000000000..54c8d66ee8 --- /dev/null +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalDashboard.pagination.test.ts @@ -0,0 +1,210 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { fireEvent, render, screen, waitFor } from "@testing-library/svelte"; +import AgentEvalDashboard from "../AgentEvalDashboard.svelte"; +import type { AgentPromptEvalData } from "../types"; +import type { + AgentMonitoringPageData, + AgentRefreshOptions, +} from "$lib/components/scouter/dashboard/utils"; +import { Status } from "$lib/components/scouter/agent/types"; +import { EntityType } from "$lib/components/scouter/types"; + +type SuccessfulAgentMonitoringPageData = Extract< + AgentMonitoringPageData, + { status: "success" } +>; + +const refreshCalls = vi.hoisted(() => [] as unknown[]); + +vi.mock("chart.js/auto", () => ({ + Chart: class { + static register = vi.fn(); + destroy = vi.fn(); + resetZoom = vi.fn(); + }, +})); +vi.mock("chart.js", () => ({ Filler: {} })); +vi.mock("chartjs-plugin-zoom", () => ({ default: {} })); +vi.mock("chartjs-plugin-annotation", () => ({ default: {} })); +vi.mock("chartjs-adapter-date-fns", () => ({})); + +vi.mock("$lib/components/scouter/dashboard/utils", async () => { + const actual = await vi.importActual( + "$lib/components/scouter/dashboard/utils", + ); + return { + ...actual, + refreshAgentMonitoringData: vi.fn(async ( + _fetch: typeof globalThis.fetch, + monitoringData: SuccessfulAgentMonitoringPageData, + options: AgentRefreshOptions = {}, + ) => { + refreshCalls.push(options); + if (options.recordCursor) { + monitoringData.selectedData = { + ...monitoringData.selectedData, + records: { + items: [makeRecord("pagetwo2", 2, "2026-01-02T00:00:00Z")], + has_next: false, + has_previous: true, + previous_cursor: { id: 2, created_at: "2026-01-02T00:00:00Z" }, + }, + }; + } + if (options.workflowCursor) { + monitoringData.selectedData = { + ...monitoringData.selectedData, + workflows: { + items: [makeWorkflow(2, "workflow-record-page-2", "2026-01-02T00:00:00Z")], + has_next: false, + has_previous: true, + previous_cursor: { id: 2, created_at: "2026-01-02T00:00:00Z" }, + }, + }; + } + }), + }; +}); + +/** Builds a minimal evaluation record with cursor fields needed by pagination. */ +function makeRecord(uid: string, id: number, createdAt: string) { + return { + record_id: `record-${id}`, + created_at: createdAt, + uid, + context: {}, + id, + updated_at: null, + processing_started_at: null, + processing_ended_at: null, + processing_duration: null, + entity_id: id, + entity_uid: `entity-${id}`, + status: Status.Processed, + entity_type: EntityType.Agent, + trace_id: `trace-${id}`, + }; +} + +/** Builds a minimal workflow result with cursor fields needed by pagination. */ +function makeWorkflow(id: number, recordUid: string, createdAt: string) { + return { + id, + record_uid: recordUid, + entity_id: id, + entity_uid: `entity-${id}`, + created_at: createdAt, + total_tasks: 4, + passed_tasks: 4, + failed_tasks: 0, + pass_rate: 1, + duration_ms: 1200, + execution_plan: { stages: [], nodes: {} }, + }; +} + +/** Builds dashboard props with one next-page cursor for records and workflows. */ +function makeEvalData(): AgentPromptEvalData[] { + return [ + { + promptCard: { + space: "default", + name: "triage_prompt", + version: "1.0.0", + }, + monitoringData: { + status: "success", + uid: "eval-profile-1", + registryType: "prompt", + selectedTimeRange: { + label: "Past 24 Hours", + value: "24hours", + startTime: "2026-01-01T00:00:00Z", + endTime: "2026-01-02T00:00:00Z", + bucketInterval: "1 hours", + }, + profile: { + config: { + uid: "eval-profile-1", + space: "default", + }, + }, + selectedData: { + metrics: { workflow: { metrics: {} } }, + driftAlerts: { items: [], has_next: false, has_previous: false }, + records: { + items: [makeRecord("pageone1", 1, "2026-01-01T00:00:00Z")], + has_next: true, + next_cursor: { id: 1, created_at: "2026-01-01T00:00:00Z" }, + has_previous: false, + }, + workflows: { + items: [makeWorkflow(1, "workflow-record-page-1", "2026-01-01T00:00:00Z")], + has_next: true, + next_cursor: { id: 1, created_at: "2026-01-01T00:00:00Z" }, + has_previous: false, + }, + }, + }, + } as unknown as AgentPromptEvalData, + ]; +} + +describe("AgentEvalDashboard pagination", () => { + beforeEach(() => { + Object.defineProperty(window, "matchMedia", { + writable: true, + value: vi.fn().mockImplementation((query: string) => ({ + matches: false, + media: query, + onchange: null, + addEventListener: vi.fn(), + removeEventListener: vi.fn(), + addListener: vi.fn(), + removeListener: vi.fn(), + dispatchEvent: vi.fn(), + })), + }); + }); + + it("pages records with recordCursor and replaces visible record rows", async () => { + refreshCalls.length = 0; + render(AgentEvalDashboard, { + props: { + agentName: "support_agent", + agentVersion: "1.0.0", + agentPromptEvals: makeEvalData(), + }, + }); + + expect(screen.getByText("pageone1")).toBeInTheDocument(); + await fireEvent.click(screen.getByRole("button", { name: "Next evaluation records page" })); + + await waitFor(() => expect(screen.getByText("pagetwo2")).toBeInTheDocument()); + expect(refreshCalls).toEqual([ + { recordCursor: { cursor: { id: 1, created_at: "2026-01-01T00:00:00Z" }, direction: "next" } }, + ]); + expect(screen.queryByText("pageone1")).not.toBeInTheDocument(); + }); + + it("pages workflows with workflowCursor and does not send a record cursor", async () => { + refreshCalls.length = 0; + render(AgentEvalDashboard, { + props: { + agentName: "support_agent", + agentVersion: "1.0.0", + agentPromptEvals: makeEvalData(), + }, + }); + + expect(screen.getByText("workflow")).toBeInTheDocument(); + await fireEvent.click(screen.getByRole("button", { name: "Next workflow results page" })); + + await waitFor(() => { + expect(refreshCalls).toEqual([ + { workflowCursor: { cursor: { id: 1, created_at: "2026-01-01T00:00:00Z" }, direction: "next" } }, + ]); + }); + expect(refreshCalls[0]).not.toHaveProperty("recordCursor"); + }); +}); diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalTables.pagination.test.ts b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalTables.pagination.test.ts new file mode 100644 index 0000000000..d828921d18 --- /dev/null +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/evaluation/__tests__/AgentEvalTables.pagination.test.ts @@ -0,0 +1,80 @@ +import { fireEvent, render, screen } from "@testing-library/svelte"; +import { describe, expect, it, vi } from "vitest"; +import AgentEvalRecordTable from "../AgentEvalRecordTable.svelte"; +import AgentEvalWorkflowTable from "../AgentEvalWorkflowTable.svelte"; +import type { RecordWithAgent, WorkflowWithAgent } from "../types"; + +const record = { + _agentName: "triage_prompt", + _evalPath: "/opsml/prompt/card/default/triage/1.0.0/evaluation", + record_id: "record-a", + created_at: "2026-01-01T00:00:00Z", + uid: "record-uid-a", + context: {}, + id: 1, + updated_at: null, + processing_started_at: null, + processing_ended_at: null, + processing_duration: null, + entity_id: 1, + entity_uid: "entity-a", + status: "Processed", + entity_type: "Agent", + trace_id: "trace-a", +} as RecordWithAgent; + +const workflow = { + _agentName: "triage_prompt", + _evalPath: "/opsml/prompt/card/default/triage/1.0.0/evaluation", + _profile: {} as WorkflowWithAgent["_profile"], + _traceId: "trace-a", + id: 1, + record_uid: "record-uid-a", + entity_id: 1, + entity_uid: "entity-a", + created_at: "2026-01-01T00:00:00Z", + total_tasks: 4, + passed_tasks: 4, + failed_tasks: 0, + pass_rate: 1, + duration_ms: 1200, + execution_plan: { stages: [], nodes: {} }, +} as WorkflowWithAgent; + +describe("Agent eval pagination tables", () => { + it("emits record pagination directions only when controls are enabled", async () => { + const onPageChange = vi.fn(); + render(AgentEvalRecordTable, { + props: { + records: [record], + hasNext: true, + hasPrevious: false, + onPageChange, + }, + }); + + await fireEvent.click(screen.getByRole("button", { name: "Next evaluation records page" })); + await fireEvent.click(screen.getByRole("button", { name: "Previous evaluation records page" })); + + expect(onPageChange).toHaveBeenCalledTimes(1); + expect(onPageChange).toHaveBeenCalledWith("next"); + }); + + it("emits workflow pagination directions only when controls are enabled", async () => { + const onPageChange = vi.fn(); + render(AgentEvalWorkflowTable, { + props: { + workflows: [workflow], + hasNext: false, + hasPrevious: true, + onPageChange, + }, + }); + + await fireEvent.click(screen.getByRole("button", { name: "Next workflow results page" })); + await fireEvent.click(screen.getByRole("button", { name: "Previous workflow results page" })); + + expect(onPageChange).toHaveBeenCalledTimes(1); + expect(onPageChange).toHaveBeenCalledWith("previous"); + }); +}); diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/AgentsTable.svelte b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/AgentsTable.svelte index 7f01ee1b35..ccd6e4d08f 100644 --- a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/AgentsTable.svelte +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/AgentsTable.svelte @@ -24,6 +24,35 @@ const cmp = av < bv ? -1 : av > bv ? 1 : 0; return sortAsc ? cmp : -cmp; })); + + /** + * Builds the stable part of a row key from business identifiers only. + * Metric counters and sort position are intentionally excluded so refreshed + * activity data does not remount the same logical agent row. + */ + function agentRowKeyBase(row: GenAiAgentActivity): string { + const key = [ + row.agent_id ?? "", + row.conversation_id ?? "", + row.agent_name ?? "", + ].join("|"); + return key.length > 2 ? key : "unknown-agent"; + } + + /** + * Adds a deterministic duplicate suffix only when two rows share the same + * business identity. The suffix is not sort-position based, so reordering the + * same rows keeps each rendered row attached to the same logical agent. + */ + const keyedRows = $derived.by(() => { + const counts: Record = {}; + return sorted.map((row) => { + const base = agentRowKeyBase(row); + const seen = counts[base] ?? 0; + counts[base] = seen + 1; + return { row, key: seen === 0 ? base : `${base}#${seen}` }; + }); + });
@@ -58,12 +87,12 @@ - {#if sorted.length === 0} + {#if keyedRows.length === 0} No data {:else} - {#each sorted as row (row.agent_id ?? row.agent_name)} + {#each keyedRows as { row, key } (key)} {row.agent_name ?? '—'} {fmtCompact(row.span_count)} diff --git a/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/__tests__/AgentsTable.test.ts b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/__tests__/AgentsTable.test.ts new file mode 100644 index 0000000000..33fe8d7c8d --- /dev/null +++ b/crates/opsml_server/opsml_ui/src/lib/components/card/agent/observability/__tests__/AgentsTable.test.ts @@ -0,0 +1,32 @@ +import { render } from "@testing-library/svelte"; +import { describe, expect, it } from "vitest"; +import AgentsTable from "../AgentsTable.svelte"; +import type { GenAiAgentActivity } from "../types"; + +describe("AgentsTable", () => { + it("renders multiple rows sharing the same agent_name", () => { + const agents: GenAiAgentActivity[] = [ + { + agent_name: "responder_agent", + agent_id: null, + conversation_id: "conv-1", + span_count: 3, + total_input_tokens: 100, + total_output_tokens: 50, + last_seen: null, + }, + { + agent_name: "responder_agent", + agent_id: null, + conversation_id: "conv-2", + span_count: 7, + total_input_tokens: 200, + total_output_tokens: 90, + last_seen: null, + }, + ]; + + const { getAllByText } = render(AgentsTable, { props: { agents } }); + expect(getAllByText("responder_agent")).toHaveLength(2); + }); +}); diff --git a/crates/opsml_server/opsml_ui/src/lib/components/trace/TraceDashboard.svelte b/crates/opsml_server/opsml_ui/src/lib/components/trace/TraceDashboard.svelte index eeb9287029..86deef1809 100644 --- a/crates/opsml_server/opsml_ui/src/lib/components/trace/TraceDashboard.svelte +++ b/crates/opsml_server/opsml_ui/src/lib/components/trace/TraceDashboard.svelte @@ -1,6 +1,7 @@
@@ -232,7 +213,7 @@
- {#each AXIS_MARKS as mark, i} + {#each AXIS_MARKS as mark, i (mark)} {@const pct = mark * 100}
{#if span.depth > 0} - {#each Array.from({ length: span.depth }) as _, depthIndex} + {#each depthLevels(span.depth) as depthIndex (depthIndex)} {@const shouldDrawLine = shouldDrawVerticalLine(span, depthIndex, visibleSpans, parentChildMap)} {@const isCurrentLevel = depthIndex === span.depth - 1} {@const lineLeft = depthIndex * INDENT_PX + 8} @@ -358,7 +339,7 @@ class="flex-1 overflow-y-auto overflow-x-hidden relative bg-surface-50" > - {#each AXIS_MARKS.slice(1) as mark} + {#each AXIS_MARKS.slice(1) as mark (mark)}
{ + it("filters by AND of service and status code", () => { + const page = getMockTracePage({ + clause: andClause(serviceClause("inference-api"), statusCodeClause(2)), + limit: 100, + }); + expect(page.items.length).toBeGreaterThan(0); + expect(page.items.every((item) => item.service_name === "inference-api" && item.status_code === 2)).toBe(true); + }); + + it("filters by OR of two services", () => { + const page = getMockTracePage({ + clause: orClause(serviceClause("inference-api"), serviceClause("llm-gateway")), + limit: 100, + }); + expect(page.items.length).toBeGreaterThan(0); + expect(page.items.every((item) => ["inference-api", "llm-gateway"].includes(item.service_name))).toBe(true); + }); + + it("filters by NOT has_errors", () => { + const page = getMockTracePage({ + clause: notClause(hasErrorsClause(true)), + limit: 100, + }); + expect(page.items.length).toBeGreaterThan(0); + expect(page.items.every((item) => item.has_errors === false)).toBe(true); + }); + + it("matches service namespace through resource attributes", () => { + const matched = getMockTracePage({ + clause: serviceNamespaceClause("models"), + limit: 100, + }); + const missing = getMockTracePage({ + clause: serviceNamespaceClause("missing"), + limit: 100, + }); + expect(matched.items.length).toBeGreaterThan(0); + expect(missing.items).toHaveLength(0); + }); + + it("matches arbitrary resource attributes", () => { + const page = getMockTracePage({ + clause: attrClause("service.version", "1.0.0"), + limit: 100, + }); + expect(page.items.length).toBeGreaterThan(0); + expect(page.items.every((item) => item.service_name === "llm-gateway")).toBe(true); + }); + + it("bounds duration with min and max clauses", () => { + const page = getMockTracePage({ + clause: andClause(durationMinClause(300), durationMaxClause(1000)), + limit: 100, + }); + expect(page.items.length).toBeGreaterThan(0); + expect(page.items.every((item) => (item.duration_ms ?? 0) >= 300 && (item.duration_ms ?? 0) <= 1000)).toBe(true); + }); + + it("returns all items when no clause is present", () => { + const withoutClause = getMockTracePage({ limit: 100 }); + const withImpossibleClause = getMockTracePage({ + clause: serviceClause("does-not-exist"), + limit: 100, + }); + expect(withoutClause.items.length).toBeGreaterThan(withImpossibleClause.items.length); + expect(withImpossibleClause.items).toHaveLength(0); + }); +}); diff --git a/crates/opsml_server/opsml_ui/src/lib/components/trace/clause.test.ts b/crates/opsml_server/opsml_ui/src/lib/components/trace/clause.test.ts new file mode 100644 index 0000000000..5247f82539 --- /dev/null +++ b/crates/opsml_server/opsml_ui/src/lib/components/trace/clause.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from "vitest"; +import { + addToClause, + andClause, + attrClause, + clauseToActiveFilters, + durationMaxClause, + durationMinClause, + findClauses, + hasDimension, + hasErrorsClause, + notClause, + orClause, + phraseClause, + removeAttr, + removeClauseDimension, + replaceClauseDimension, + serviceClause, + serviceInstanceIdClause, + serviceNamespaceClause, + serviceVersionClause, + statusCodeClause, +} from "./clause"; + +describe("FilterClause builders and combinators", () => { + it("builds leaf clauses", () => { + expect(serviceClause("checkout")).toEqual({ op: "service", value: "checkout" }); + expect(serviceNamespaceClause("prod")).toEqual({ op: "service_namespace", value: "prod" }); + expect(serviceVersionClause("1.2.3")).toEqual({ op: "service_version", value: "1.2.3" }); + expect(serviceInstanceIdClause("pod-a")).toEqual({ op: "service_instance_id", value: "pod-a" }); + expect(statusCodeClause(500)).toEqual({ op: "status_code", value: 500 }); + expect(hasErrorsClause(true)).toEqual({ op: "has_errors", value: true }); + expect(durationMinClause(100)).toEqual({ op: "duration_min_ms", value: 100 }); + expect(durationMaxClause(500)).toEqual({ op: "duration_max_ms", value: 500 }); + expect(attrClause("env", "prod")).toEqual({ op: "attr", value: { key: "env", value: "prod" } }); + expect(phraseClause("timeout")).toEqual({ op: "phrase", value: "timeout" }); + }); + + it("handles empty and single-child AND clauses", () => { + const service = serviceClause("checkout"); + expect(andClause()).toBeUndefined(); + expect(andClause(service)).toEqual(service); + }); + + it("builds and flattens AND clauses", () => { + const a = serviceClause("checkout"); + const b = statusCodeClause(500); + const c = hasErrorsClause(true); + expect(andClause(a, b)).toEqual({ op: "and", value: [a, b] }); + expect(andClause(andClause(a, b), c)).toEqual({ op: "and", value: [a, b, c] }); + }); + + it("handles empty and single-child OR clauses", () => { + const service = serviceClause("checkout"); + expect(orClause()).toBeUndefined(); + expect(orClause(service)).toEqual(service); + }); + + it("builds and flattens OR clauses", () => { + const a = serviceClause("checkout"); + const b = serviceClause("payments"); + const c = serviceClause("catalog"); + expect(orClause(a, b)).toEqual({ op: "or", value: [a, b] }); + expect(orClause(orClause(a, b), c)).toEqual({ op: "or", value: [a, b, c] }); + }); + + it("builds explicit NOT clauses without simplification", () => { + const service = serviceClause("checkout"); + expect(notClause(service)).toEqual({ op: "not", value: service }); + expect(notClause(notClause(service))).toEqual({ + op: "not", + value: { op: "not", value: service }, + }); + }); +}); + +describe("FilterClause mutators", () => { + it("adds a clause to an empty tree", () => { + expect(addToClause(undefined, serviceClause("checkout"))).toEqual(serviceClause("checkout")); + }); + + it("adds a clause by flattening with AND", () => { + const service = serviceClause("checkout"); + const status = statusCodeClause(500); + const errors = hasErrorsClause(true); + expect(addToClause(andClause(service, status), errors)).toEqual({ + op: "and", + value: [service, status, errors], + }); + }); + + it("removes one dimension and unwraps the remaining child", () => { + expect(removeClauseDimension(andClause(serviceClause("a"), statusCodeClause(500)), "service")).toEqual( + statusCodeClause(500), + ); + }); + + it("collapses when every matching dimension disappears", () => { + expect(removeClauseDimension(andClause(serviceClause("a"), serviceClause("b")), "service")).toBeUndefined(); + }); + + it("removes duration min and max together", () => { + expect( + removeClauseDimension( + andClause(durationMinClause(100), durationMaxClause(500), statusCodeClause(500)), + "duration", + ), + ).toEqual(statusCodeClause(500)); + }); + + it("collapses NOT when the inner leaf is removed", () => { + expect(removeClauseDimension(notClause(serviceClause("a")), "service")).toBeUndefined(); + }); + + it("removes one matching attribute leaf", () => { + expect(removeAttr(andClause(attrClause("env", "prod"), attrClause("region", "us")), "env", "prod")).toEqual( + attrClause("region", "us"), + ); + }); + + it("replaces an absent dimension by adding it", () => { + expect(replaceClauseDimension(undefined, "service", serviceClause("b"))).toEqual(serviceClause("b")); + }); + + it("replaces an existing dimension in place", () => { + expect( + replaceClauseDimension(andClause(serviceClause("a"), statusCodeClause(500)), "service", serviceClause("b")), + ).toEqual(andClause(serviceClause("b"), statusCodeClause(500))); + }); + + it("finds clauses and dimensions anywhere in the tree", () => { + const clause = andClause(notClause(serviceClause("a")), durationMinClause(100)); + expect(findClauses(clause, "service")).toEqual([serviceClause("a")]); + expect(hasDimension(clause, "duration")).toBe(true); + expect(hasDimension(clause, "status_code")).toBe(false); + }); + + it("removes phrase leaves by dimension", () => { + expect(removeClauseDimension(andClause(phraseClause("timeout"), serviceClause("a")), "phrase")).toEqual( + serviceClause("a"), + ); + }); +}); + +describe("FilterClause projection", () => { + it("returns no chips for an empty clause", () => { + expect(clauseToActiveFilters(undefined)).toEqual([]); + }); + + it("projects a single leaf", () => { + expect(clauseToActiveFilters(serviceClause("checkout")).map((chip) => chip.label)).toEqual([ + "Service: checkout", + ]); + }); + + it("projects AND leaves in document order", () => { + const labels = clauseToActiveFilters( + andClause(serviceClause("checkout"), statusCodeClause(500), attrClause("env", "prod")), + ).map((chip) => chip.label); + expect(labels).toEqual(["Service: checkout", "Status: 500", "Attr: env=prod"]); + }); + + it("removes only the selected attr chip through its closure", () => { + const clause = andClause(attrClause("env", "prod"), attrClause("region", "us")); + const chips = clauseToActiveFilters(clause); + expect(chips[0].remove(clause)).toEqual(attrClause("region", "us")); + }); + + it("removes every service leaf through a service chip closure", () => { + const clause = andClause(serviceClause("a"), serviceClause("b"), statusCodeClause(500)); + const chips = clauseToActiveFilters(clause); + expect(chips[0].remove(clause)).toEqual(statusCodeClause(500)); + }); + + it("gives every chip a unique id", () => { + const chips = clauseToActiveFilters(andClause(serviceClause("a"), serviceClause("a"), statusCodeClause(500))); + expect(new Set(chips.map((chip) => chip.id)).size).toBe(chips.length); + }); + + it("projects phrase chips", () => { + expect(clauseToActiveFilters(phraseClause("timeout")).map((chip) => chip.label)).toEqual([ + "Phrase: timeout", + ]); + }); +}); diff --git a/crates/opsml_server/opsml_ui/src/lib/components/trace/clause.ts b/crates/opsml_server/opsml_ui/src/lib/components/trace/clause.ts new file mode 100644 index 0000000000..0fde221ca4 --- /dev/null +++ b/crates/opsml_server/opsml_ui/src/lib/components/trace/clause.ts @@ -0,0 +1,369 @@ +export type FilterClause = + | { op: "and"; value: FilterClause[] } + | { op: "or"; value: FilterClause[] } + | { op: "not"; value: FilterClause } + | { op: "phrase"; value: string } + | { op: "service"; value: string } + | { op: "service_namespace"; value: string } + | { op: "service_version"; value: string } + | { op: "service_instance_id"; value: string } + | { op: "status_code"; value: number } + | { op: "has_errors"; value: boolean } + | { op: "duration_min_ms"; value: number } + | { op: "duration_max_ms"; value: number } + | { op: "attr"; value: { key: string; value: string } }; + +export type FilterClauseOp = FilterClause["op"]; + +export type ClauseDimension = + | "service" + | "service_namespace" + | "service_version" + | "service_instance_id" + | "status_code" + | "has_errors" + | "duration_min_ms" + | "duration_max_ms" + | "duration" + | "attr" + | "phrase"; + +export interface ActiveFilter { + id: string; + label: string; + remove: (clause: FilterClause | undefined) => FilterClause | undefined; +} + +export const FACET_AXIS_TO_CLAUSE_OP = { + service_name: "service", + status_code: "status_code", +} as const satisfies Record; + +export const serviceClause = (value: string): FilterClause => ({ + op: "service", + value, +}); +export const serviceNamespaceClause = (value: string): FilterClause => ({ + op: "service_namespace", + value, +}); +export const serviceVersionClause = (value: string): FilterClause => ({ + op: "service_version", + value, +}); +export const serviceInstanceIdClause = (value: string): FilterClause => ({ + op: "service_instance_id", + value, +}); +export const statusCodeClause = (value: number): FilterClause => ({ + op: "status_code", + value, +}); +export const hasErrorsClause = (value: boolean): FilterClause => ({ + op: "has_errors", + value, +}); +export const durationMinClause = (value: number): FilterClause => ({ + op: "duration_min_ms", + value, +}); +export const durationMaxClause = (value: number): FilterClause => ({ + op: "duration_max_ms", + value, +}); +export const attrClause = (key: string, value: string): FilterClause => ({ + op: "attr", + value: { key, value }, +}); +export const phraseClause = (value: string): FilterClause => ({ + op: "phrase", + value, +}); + +export function andClause( + ...children: (FilterClause | undefined)[] +): FilterClause | undefined { + return combineClause("and", children); +} + +export function orClause( + ...children: (FilterClause | undefined)[] +): FilterClause | undefined { + return combineClause("or", children); +} + +export function notClause(value: FilterClause): FilterClause { + return { op: "not", value }; +} + +export function* iterateLeaves( + clause: FilterClause | undefined, +): Iterable { + if (!clause) return; + switch (clause.op) { + case "and": + case "or": + for (const child of clause.value) { + yield* iterateLeaves(child); + } + return; + case "not": + yield* iterateLeaves(clause.value); + return; + default: + yield clause; + } +} + +export function findClauses( + clause: FilterClause | undefined, + op: Op, +): Extract[] { + return Array.from(iterateLeaves(clause)).filter( + (leaf): leaf is Extract => leaf.op === op, + ); +} + +export function hasDimension( + clause: FilterClause | undefined, + dim: ClauseDimension, +): boolean { + for (const leaf of iterateLeaves(clause)) { + if (matchesDimension(leaf, dim)) return true; + } + return false; +} + +export function addToClause( + existing: FilterClause | undefined, + added: FilterClause, +): FilterClause { + return andClause(existing, added) ?? added; +} + +export function removeClauseDimension( + clause: FilterClause | undefined, + dim: ClauseDimension, +): FilterClause | undefined { + if (!clause) return undefined; + + switch (clause.op) { + case "and": + case "or": { + const children = clause.value + .map((child) => removeClauseDimension(child, dim)) + .filter((child): child is FilterClause => child !== undefined); + return combineClause(clause.op, children); + } + case "not": { + const inner = removeClauseDimension(clause.value, dim); + return inner ? { op: "not", value: inner } : undefined; + } + default: + return matchesDimension(clause, dim) ? undefined : cloneClause(clause); + } +} + +export function replaceClauseDimension( + clause: FilterClause | undefined, + dim: ClauseDimension, + next: FilterClause | undefined, +): FilterClause | undefined { + if (!next) return removeClauseDimension(clause, dim); + + const [replaced, found] = replaceDimensionInner(clause, dim, next); + if (found) return replaced; + return addToClause(replaced, next); +} + +export function removeAttr( + clause: FilterClause | undefined, + key: string, + value: string, +): FilterClause | undefined { + if (!clause) return undefined; + + switch (clause.op) { + case "and": + case "or": { + const children = clause.value + .map((child) => removeAttr(child, key, value)) + .filter((child): child is FilterClause => child !== undefined); + return combineClause(clause.op, children); + } + case "not": { + const inner = removeAttr(clause.value, key, value); + return inner ? { op: "not", value: inner } : undefined; + } + case "attr": + return clause.value.key === key && clause.value.value === value + ? undefined + : cloneClause(clause); + default: + return cloneClause(clause); + } +} + +export function clauseToActiveFilters( + clause: FilterClause | undefined, +): ActiveFilter[] { + return Array.from(iterateLeaves(clause)).map((leaf, index) => ({ + id: `${index}:${leaf.op}:${leafLabelValue(leaf)}`, + label: labelForLeaf(leaf), + remove: removeForLeaf(leaf), + })); +} + +function combineClause( + op: "and" | "or", + children: (FilterClause | undefined)[], +): FilterClause | undefined { + const flattened: FilterClause[] = []; + for (const child of children) { + if (!child) continue; + if (child.op === op) flattened.push(...child.value.map(cloneClause)); + else flattened.push(cloneClause(child)); + } + if (flattened.length === 0) return undefined; + if (flattened.length === 1) return flattened[0]; + return { op, value: flattened }; +} + +function replaceDimensionInner( + clause: FilterClause | undefined, + dim: ClauseDimension, + next: FilterClause, +): [FilterClause | undefined, boolean] { + if (!clause) return [undefined, false]; + + switch (clause.op) { + case "and": + case "or": { + let found = false; + let inserted = false; + const children: FilterClause[] = []; + for (const child of clause.value) { + const [updated, childFound] = replaceDimensionInner(child, dim, next); + found ||= childFound; + if (childFound && !inserted) { + children.push(cloneClause(next)); + inserted = true; + } + if (updated) children.push(updated); + } + return [combineClause(clause.op, children), found]; + } + case "not": { + const [inner, found] = replaceDimensionInner(clause.value, dim, next); + return [inner ? { op: "not", value: inner } : undefined, found]; + } + default: + return matchesDimension(clause, dim) + ? [undefined, true] + : [cloneClause(clause), false]; + } +} + +function matchesDimension(clause: FilterClause, dim: ClauseDimension): boolean { + if (dim === "duration") { + return clause.op === "duration_min_ms" || clause.op === "duration_max_ms"; + } + return clause.op === dim; +} + +function labelForLeaf(clause: FilterClause): string { + switch (clause.op) { + case "service": + return `Service: ${clause.value}`; + case "service_namespace": + return `Namespace: ${clause.value}`; + case "service_version": + return `Version: ${clause.value}`; + case "service_instance_id": + return `Instance: ${clause.value}`; + case "status_code": + return `Status: ${clause.value}`; + case "has_errors": + return `Has errors: ${clause.value}`; + case "duration_min_ms": + return `Min duration: ${clause.value}ms`; + case "duration_max_ms": + return `Max duration: ${clause.value}ms`; + case "attr": + return `Attr: ${clause.value.key}=${clause.value.value}`; + case "phrase": + return `Phrase: ${clause.value}`; + case "and": + case "or": + case "not": + throw new Error(`Cannot label non-leaf clause ${clause.op}`); + } +} + +function leafLabelValue(clause: FilterClause): string { + return clause.op === "attr" + ? `${clause.value.key}=${clause.value.value}` + : String(clause.value); +} + +function removeForLeaf( + clause: FilterClause, +): (clause: FilterClause | undefined) => FilterClause | undefined { + if (clause.op === "attr") { + const { key, value } = clause.value; + return (current) => removeAttr(current, key, value); + } + const dim = dimensionForLeaf(clause); + return (current) => removeClauseDimension(current, dim); +} + +/** + * Clones a clause without `structuredClone`. + * + * Clause objects can be wrapped in Svelte proxies when they come from component + * state. `structuredClone` rejects those proxies, so the discriminated union is + * copied manually before returning data to mutators. + */ +function cloneClause(clause: FilterClause): FilterClause { + switch (clause.op) { + case "and": + case "or": + return { op: clause.op, value: clause.value.map(cloneClause) }; + case "not": + return { op: "not", value: cloneClause(clause.value) }; + case "attr": + return { op: "attr", value: { key: clause.value.key, value: clause.value.value } }; + case "phrase": + case "service": + case "service_namespace": + case "service_version": + case "service_instance_id": + return { op: clause.op, value: clause.value }; + case "status_code": + case "duration_min_ms": + case "duration_max_ms": + return { op: clause.op, value: clause.value }; + case "has_errors": + return { op: "has_errors", value: clause.value }; + } +} + +function dimensionForLeaf(clause: FilterClause): ClauseDimension { + switch (clause.op) { + case "service": + case "service_namespace": + case "service_version": + case "service_instance_id": + case "status_code": + case "has_errors": + case "duration_min_ms": + case "duration_max_ms": + case "attr": + case "phrase": + return clause.op; + case "and": + case "or": + case "not": + throw new Error(`Cannot derive a filter dimension from ${clause.op}`); + } +} diff --git a/crates/opsml_server/opsml_ui/src/lib/components/trace/clauseEvaluator.ts b/crates/opsml_server/opsml_ui/src/lib/components/trace/clauseEvaluator.ts new file mode 100644 index 0000000000..6453dc5f5e --- /dev/null +++ b/crates/opsml_server/opsml_ui/src/lib/components/trace/clauseEvaluator.ts @@ -0,0 +1,46 @@ +import type { FilterClause } from "./clause"; +import type { Attribute, TraceListItem } from "./types"; + +export function resourceAttr( + attrs: Attribute[] | undefined, + key: string, +): string | undefined { + const hit = attrs?.find((attr) => attr.key === key); + return hit === undefined ? undefined : String(hit.value); +} + +export function evaluateClause( + item: TraceListItem, + clause: FilterClause, +): boolean { + switch (clause.op) { + case "and": + return clause.value.every((child) => evaluateClause(item, child)); + case "or": + return clause.value.some((child) => evaluateClause(item, child)); + case "not": + return !evaluateClause(item, clause.value); + case "service": + return item.service_name === clause.value; + case "service_namespace": + return resourceAttr(item.resource_attributes, "service.namespace") === clause.value; + case "service_version": + return resourceAttr(item.resource_attributes, "service.version") === clause.value; + case "service_instance_id": + return resourceAttr(item.resource_attributes, "service.instance.id") === clause.value; + case "status_code": + return item.status_code === clause.value; + case "has_errors": + return item.has_errors === clause.value; + case "duration_min_ms": + return (item.duration_ms ?? 0) >= clause.value; + case "duration_max_ms": + return (item.duration_ms ?? Number.POSITIVE_INFINITY) <= clause.value; + case "attr": + return resourceAttr(item.resource_attributes, clause.value.key) === clause.value.value; + case "phrase": { + const haystack = `${item.root_operation} ${item.service_name}`.toLowerCase(); + return haystack.includes(clause.value.toLowerCase()); + } + } +} diff --git a/crates/opsml_server/opsml_ui/src/lib/components/trace/filters/ChipBar.svelte b/crates/opsml_server/opsml_ui/src/lib/components/trace/filters/ChipBar.svelte index 97382081d2..2b758a6722 100644 --- a/crates/opsml_server/opsml_ui/src/lib/components/trace/filters/ChipBar.svelte +++ b/crates/opsml_server/opsml_ui/src/lib/components/trace/filters/ChipBar.svelte @@ -1,6 +1,7 @@