From 206f34b8c9a727876dfde816eca96d706f68bcb0 Mon Sep 17 00:00:00 2001 From: Dragos Bobolea Date: Tue, 7 Apr 2026 17:50:36 +0300 Subject: [PATCH] feat: add `uipath trace` CLI command for trajectory visualization Add a new `uipath trace` command that reads JSONL trace files (from `uipath run --trace-file` or `uipath eval --trace-file`) and eval verbose JSON output, rendering a Rich span tree showing the agent's execution trajectory. Features: - Span tree with timing, status icons, and smart labels (LLM/tool/generic) - Key attributes displayed by default (input, output, tokens, model name) - `--full` mode shows all attributes with no truncation - `--no-input`/`--no-output` for structural overview - `--name` filter prunes the tree to matching spans + ancestors - `--contains` extracts full subtrees where any descendant matches, useful for finding specific eval runs across a trace file - `--span-type` and `--status` filters - `--eval-id` to pick a specific evaluation from verbose JSON output - Auto-detects JSONL vs eval JSON format Includes 66 tests covering loading, tree building, filtering, rendering, and CLI integration. --- README.md | 13 + packages/uipath/README.md | 13 + packages/uipath/docs/cli/index.md | 37 + packages/uipath/src/uipath/_cli/__init__.py | 1 + packages/uipath/src/uipath/_cli/cli_trace.py | 789 ++++++++++++++++++ .../uipath/src/uipath/_resources/AGENTS.md | 2 +- .../src/uipath/_resources/CLI_REFERENCE.md | 60 ++ packages/uipath/tests/cli/trace/__init__.py | 0 .../trace/fixtures/sample_error_trace.jsonl | 2 + .../trace/fixtures/sample_eval_output.json | 71 ++ .../trace/fixtures/sample_eval_trace.jsonl | 10 + .../cli/trace/fixtures/sample_trace.jsonl | 4 + packages/uipath/tests/cli/trace/test_trace.py | 516 ++++++++++++ 13 files changed, 1517 insertions(+), 1 deletion(-) create mode 100644 packages/uipath/src/uipath/_cli/cli_trace.py create mode 100644 packages/uipath/tests/cli/trace/__init__.py create mode 100644 packages/uipath/tests/cli/trace/fixtures/sample_error_trace.jsonl create mode 100644 packages/uipath/tests/cli/trace/fixtures/sample_eval_output.json create mode 100644 packages/uipath/tests/cli/trace/fixtures/sample_eval_trace.jsonl create mode 100644 packages/uipath/tests/cli/trace/fixtures/sample_trace.jsonl create mode 100644 packages/uipath/tests/cli/trace/test_trace.py diff --git a/README.md b/README.md index d90c937fc..c84e8ca64 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ This [quickstart guide](https://uipath.github.io/uipath-python/) walks you throu - [Authentication](#authentication) - [Initialize a Project](#initialize-a-project) - [Debug a Project](#debug-a-project) + - [Visualize Traces](#visualize-traces) - [Package a Project](#package-a-project) - [Publish a Package](#publish-a-package) - [Project Structure](#project-structure) @@ -155,6 +156,18 @@ uipath run ENTRYPOINT [INPUT] Executes a Python script with the provided JSON input arguments. +### Visualize Traces + +```bash +# Capture a trace +uipath run main '{"query": "hello"}' --trace-file traces.jsonl + +# Visualize the agent trajectory +uipath trace traces.jsonl +``` + +Renders a tree view of the agent's execution showing tool calls, LLM invocations, inputs/outputs, and timing. Also works with eval traces (`uipath eval --trace-file`) — use `--contains "function_name"` to find specific runs across an eval set. + ### Package a Project ```bash diff --git a/packages/uipath/README.md b/packages/uipath/README.md index d90c937fc..c84e8ca64 100644 --- a/packages/uipath/README.md +++ b/packages/uipath/README.md @@ -26,6 +26,7 @@ This [quickstart guide](https://uipath.github.io/uipath-python/) walks you throu - [Authentication](#authentication) - [Initialize a Project](#initialize-a-project) - [Debug a Project](#debug-a-project) + - [Visualize Traces](#visualize-traces) - [Package a Project](#package-a-project) - [Publish a Package](#publish-a-package) - [Project Structure](#project-structure) @@ -155,6 +156,18 @@ uipath run ENTRYPOINT [INPUT] Executes a Python script with the provided JSON input arguments. +### Visualize Traces + +```bash +# Capture a trace +uipath run main '{"query": "hello"}' --trace-file traces.jsonl + +# Visualize the agent trajectory +uipath trace traces.jsonl +``` + +Renders a tree view of the agent's execution showing tool calls, LLM invocations, inputs/outputs, and timing. Also works with eval traces (`uipath eval --trace-file`) — use `--contains "function_name"` to find specific runs across an eval set. + ### Package a Project ```bash diff --git a/packages/uipath/docs/cli/index.md b/packages/uipath/docs/cli/index.md index deb875353..79e26ae96 100644 --- a/packages/uipath/docs/cli/index.md +++ b/packages/uipath/docs/cli/index.md @@ -143,6 +143,43 @@ uipath run agent '{\"topic\":\"uipath\"}' ``` --- +::: mkdocs-click + :module: uipath._cli + :command: trace + :depth: 1 + :style: table + +Visualize an agent execution trace. Reads JSONL trace files produced by `uipath run --trace-file` or `uipath eval --trace-file`, and renders a span tree showing the agent's trajectory. + + + +```shell +> uipath trace traces.jsonl +Trace abcdef12…34567890 +└── agent (12.5s) ✓ + ├── input: {"messages": [{"role": "user", "content": "Book a flight..."}]} + ├── LLM (gpt-4o) (2.2s) ✓ + │ └── tokens: prompt=847, completion=156, total=1003 + ├── 🔧 search_flights (1.7s) ✓ + │ ├── input: {"origin": "SFO", "destination": "NRT"} + │ └── output: {"flights": [...]} + ├── LLM (gpt-4o) (1.8s) ✓ + │ └── tokens: prompt=1456, completion=203, total=1659 + └── 🔧 book_flight (1.2s) ✓ + └── output: {"confirmation": {"booking_ref": "BK-UA837"}} +9 spans total +``` + +/// tip +Use `--contains` to search across eval traces and extract full agent trajectories where a specific function was called: +```console +uipath eval main eval-set.json --trace-file traces.jsonl +uipath trace traces.jsonl --contains "get_random*" +``` +/// + +--- + ::: mkdocs-click :module: uipath._cli :command: pack diff --git a/packages/uipath/src/uipath/_cli/__init__.py b/packages/uipath/src/uipath/_cli/__init__.py index aa6e177e8..249310ac0 100644 --- a/packages/uipath/src/uipath/_cli/__init__.py +++ b/packages/uipath/src/uipath/_cli/__init__.py @@ -45,6 +45,7 @@ "server": "cli_server", "register": "cli_register", "debug": "cli_debug", + "trace": "cli_trace", "assets": "services.cli_assets", "buckets": "services.cli_buckets", "context-grounding": "services.cli_context_grounding", diff --git a/packages/uipath/src/uipath/_cli/cli_trace.py b/packages/uipath/src/uipath/_cli/cli_trace.py new file mode 100644 index 000000000..042ed903d --- /dev/null +++ b/packages/uipath/src/uipath/_cli/cli_trace.py @@ -0,0 +1,789 @@ +"""CLI command for visualizing agent trajectory traces. + +Reads JSONL trace files (from `uipath run --trace-file`) or eval verbose +JSON output (from `uipath eval --verbose --output-file`) and renders +a span tree using Rich. +""" + +import json +import os +from datetime import datetime +from fnmatch import fnmatch +from typing import Any + +import click + +from ._utils._console import ConsoleLogger + +console = ConsoleLogger() + + +# --------------------------------------------------------------------------- +# Span loading helpers +# --------------------------------------------------------------------------- + + +def _load_jsonl_spans(path: str) -> list[dict[str, Any]]: + """Load spans from a JSONL trace file (one OTel span JSON per line).""" + spans: list[dict[str, Any]] = [] + with open(path, encoding="utf-8") as f: + for line in f: + line = line.strip() + if line: + spans.append(json.loads(line)) + return spans + + +def _load_eval_output_spans(path: str, eval_id: str | None) -> list[dict[str, Any]]: + """Load spans from an eval verbose JSON output file. + + Returns JSONL-style span dicts (normalised from the + ``TrajectoryEvaluationSpan`` format stored under + ``agentExecutionOutput.trace.spans``). + """ + with open(path, encoding="utf-8") as f: + data = json.load(f) + + results: list[dict[str, Any]] = data.get("evaluationSetResults", []) + + all_spans: list[dict[str, Any]] = [] + for result in results: + name = result.get("evaluationName", "") + if eval_id is not None and name != eval_id: + continue + + exec_output = result.get("agentExecutionOutput") + if exec_output is None: + continue + + trace = exec_output.get("trace", {}) + raw_spans = trace.get("spans", []) + + for sp in raw_spans: + all_spans.append(_normalise_eval_span(sp, eval_name=name)) + + return all_spans + + +def _normalise_eval_span(sp: dict[str, Any], eval_name: str) -> dict[str, Any]: + """Convert a ``TrajectoryEvaluationSpan`` dict into the JSONL-like shape. + + The eval format stores ``parent_name`` instead of ``parent_id``, so we + keep both representations so the tree builder can work with either. + """ + return { + "name": sp.get("name", ""), + "status": {"status_code": (sp.get("status", "unset") or "unset").upper()}, + "attributes": sp.get("attributes", {}), + "events": sp.get("events", []), + # Eval-specific: no span IDs, only parent_name + "parent_name": sp.get("parent_name"), + "_eval_name": eval_name, + } + + +# --------------------------------------------------------------------------- +# File-format detection +# --------------------------------------------------------------------------- + + +def _detect_format(path: str) -> str: + """Return ``'jsonl'`` or ``'eval_json'`` based on file content.""" + if path.endswith(".jsonl"): + return "jsonl" + + with open(path, encoding="utf-8") as f: + first_char = f.read(1) + + if first_char == "[" or first_char == "{": + # Peek inside to see if it looks like eval output + with open(path, encoding="utf-8") as f: + try: + data = json.load(f) + except json.JSONDecodeError: + return "jsonl" + + if isinstance(data, dict) and "evaluationSetResults" in data: + return "eval_json" + + return "jsonl" + + +# --------------------------------------------------------------------------- +# Tree building +# --------------------------------------------------------------------------- + + +class SpanNode: + """A node in the span tree.""" + + def __init__(self, span: dict[str, Any]): + self.span = span + self.children: list["SpanNode"] = [] + + @property + def name(self) -> str: + return self.span.get("name", "") + + @property + def status_code(self) -> str: + status = self.span.get("status", {}) + if isinstance(status, dict): + return (status.get("status_code") or "UNSET").upper() + return "UNSET" + + @property + def status_icon(self) -> str: + code = self.status_code + if code == "OK": + return "[green]✓[/green]" + elif code == "ERROR": + return "[red]✗[/red]" + return "[dim]○[/dim]" + + @property + def attributes(self) -> dict[str, Any]: + return self.span.get("attributes", {}) + + @property + def span_type(self) -> str | None: + return self.attributes.get("span_type") + + @property + def duration_ms(self) -> float | None: + """Compute duration in milliseconds from OTel timestamps.""" + start = self.span.get("start_time") + end = self.span.get("end_time") + if start is None or end is None: + return None + try: + t0 = _parse_otel_time(start) + t1 = _parse_otel_time(end) + return (t1 - t0).total_seconds() * 1000 + except Exception: + return None + + @property + def duration_str(self) -> str: + ms = self.duration_ms + if ms is None: + return "" + if ms < 1000: + return f"{ms:.0f}ms" + return f"{ms / 1000:.1f}s" + + @property + def events(self) -> list[dict[str, Any]]: + return self.span.get("events", []) or [] + + +def _parse_otel_time(value: Any) -> datetime: + """Parse an OTel timestamp string to datetime.""" + if isinstance(value, (int, float)): + # Nanoseconds since epoch + return datetime.fromtimestamp(value / 1e9) + s = str(value) + # OTel format: "2024-01-15T10:30:00.000000Z" + for fmt in ( + "%Y-%m-%dT%H:%M:%S.%fZ", + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S.%f", + "%Y-%m-%dT%H:%M:%S", + ): + try: + return datetime.strptime(s, fmt) + except ValueError: + continue + raise ValueError(f"Cannot parse timestamp: {value}") + + +def _build_tree_from_jsonl(spans: list[dict[str, Any]]) -> list[SpanNode]: + """Build a span tree using ``context.span_id`` / ``parent_id``.""" + nodes_by_id: dict[str, SpanNode] = {} + roots: list[SpanNode] = [] + + # First pass: create nodes + for sp in spans: + ctx = sp.get("context", {}) + span_id = ctx.get("span_id") + if span_id is None: + # Eval-normalised span without IDs — handled separately + continue + nodes_by_id[span_id] = SpanNode(sp) + + # Second pass: link parents + for sp in spans: + ctx = sp.get("context", {}) + span_id = ctx.get("span_id") + parent_id = sp.get("parent_id") + if span_id is None: + continue + node = nodes_by_id[span_id] + if parent_id and parent_id in nodes_by_id: + nodes_by_id[parent_id].children.append(node) + else: + roots.append(node) + + return roots + + +def _build_tree_from_eval(spans: list[dict[str, Any]]) -> list[SpanNode]: + """Build a span tree using ``parent_name`` (eval verbose format). + + Since multiple spans may share a name, we use position-based matching: + the first child referencing a parent_name attaches to the first span + with that name that hasn't been used yet as a parent for that same + child name. + """ + nodes = [SpanNode(sp) for sp in spans] + name_to_nodes: dict[str, list[SpanNode]] = {} + for node in nodes: + name_to_nodes.setdefault(node.name, []).append(node) + + roots: list[SpanNode] = [] + for node in nodes: + parent_name = node.span.get("parent_name") + if parent_name and parent_name in name_to_nodes: + # Attach to first matching parent + parent_candidates = name_to_nodes[parent_name] + if parent_candidates: + parent_candidates[0].children.append(node) + else: + roots.append(node) + else: + roots.append(node) + + return roots + + +def _build_tree(spans: list[dict[str, Any]], is_eval: bool) -> list[SpanNode]: + """Build a span tree from loaded spans.""" + if is_eval: + return _build_tree_from_eval(spans) + return _build_tree_from_jsonl(spans) + + +# --------------------------------------------------------------------------- +# Filtering +# --------------------------------------------------------------------------- + + +def _matches_filter( + node: SpanNode, + *, + name_pattern: str | None, + span_type_filter: str | None, + status_filter: str | None, +) -> bool: + """Check if a span matches the filter criteria.""" + if name_pattern: + if not fnmatch(node.name.lower(), name_pattern.lower()): + return False + + if span_type_filter: + st = node.span_type + if st is None or st.lower() != span_type_filter.lower(): + return False + + if status_filter: + if node.status_code.lower() != status_filter.lower(): + return False + + return True + + +def _filter_tree( + roots: list[SpanNode], + *, + name_pattern: str | None = None, + span_type_filter: str | None = None, + status_filter: str | None = None, +) -> list[SpanNode]: + """Filter the span tree, keeping ancestors of matching nodes.""" + if not name_pattern and not span_type_filter and not status_filter: + return roots + + def _keep(node: SpanNode) -> SpanNode | None: + # Recurse into children first + kept_children = [] + for child in node.children: + result = _keep(child) + if result is not None: + kept_children.append(result) + + self_matches = _matches_filter( + node, + name_pattern=name_pattern, + span_type_filter=span_type_filter, + status_filter=status_filter, + ) + + if self_matches or kept_children: + new_node = SpanNode(node.span) + new_node.children = kept_children + return new_node + + return None + + filtered = [] + for root in roots: + result = _keep(root) + if result is not None: + filtered.append(result) + return filtered + + +def _subtree_contains(node: SpanNode, pattern: str) -> bool: + """Check if any node in the subtree matches the name pattern.""" + if fnmatch(node.name.lower(), pattern.lower()): + return True + return any(_subtree_contains(child, pattern) for child in node.children) + + +def _filter_contains( + roots: list[SpanNode], + pattern: str, +) -> list[SpanNode]: + """Keep full subtrees where any descendant matches the pattern. + + Walks from the top and finds the shallowest nodes whose subtree + contains a match *and* that are not just the matching leaf itself. + This way you see the full agent trajectory around the matching span, + not just the span alone. + + For eval traces this typically returns the per-evaluation subtrees + (Evaluation → root → main → …) rather than the top-level + Evaluation Set Run. + """ + kept: list[SpanNode] = [] + + def _collect(node: SpanNode) -> None: + if not _subtree_contains(node, pattern): + return + + # Count how many direct children also contain the match + children_with_match = [ + c for c in node.children if _subtree_contains(c, pattern) + ] + + if len(children_with_match) > 1: + # Multiple children match — drill into each separately + # (e.g. Evaluation Set Run with several matching Evaluations) + for child in children_with_match: + _collect(child) + elif len(children_with_match) == 1: + child = children_with_match[0] + # If the only matching child is a leaf that matches the + # pattern itself, keep *this* node so we show context. + # Otherwise drill deeper. + child_is_leaf_match = ( + not child.children + or not any(_subtree_contains(gc, pattern) for gc in child.children) + ) and fnmatch(child.name.lower(), pattern.lower()) + + if child_is_leaf_match: + kept.append(node) + else: + _collect(child) + else: + # This node itself matches (no child does) — keep its parent + # would have been better, but we're already here, so keep it. + kept.append(node) + + for root in roots: + _collect(root) + + return kept + + +# --------------------------------------------------------------------------- +# Rendering +# --------------------------------------------------------------------------- + + +def _safe_parse_json(value: Any) -> Any: + """Try to parse a JSON string; return as-is if it fails.""" + if isinstance(value, str): + try: + return json.loads(value) + except (json.JSONDecodeError, ValueError): + pass + return value + + +def _truncate(value: str, max_len: int = 200) -> str: + if max_len <= 0: + return value + if len(value) <= max_len: + return value + return value[:max_len] + "..." + + +def _render_span_node( + parent_tree: Any, + node: SpanNode, + *, + show_input: bool = True, + show_output: bool = True, + show_full: bool = False, + max_value_length: int = 200, +) -> None: + """Render a single span node and its children into a Rich Tree.""" + attrs = node.attributes + + # Build the header line + duration = node.duration_str + span_type = node.span_type or "" + status = node.status_icon + + # Determine display label based on span type + label = _span_label(node) + + parts = [f"[bold]{label}[/bold]"] + if duration: + parts.append(f"[dim]({duration})[/dim]") + parts.append(status) + if span_type and show_full: + parts.append(f"[dim]\\[{span_type}][/dim]") + + header = " ".join(parts) + branch = parent_tree.add(header) + + # Show key attributes + if show_full: + _render_all_attributes(branch, attrs, max_value_length=max_value_length) + else: + _render_key_attributes( + branch, + attrs, + node, + show_input=show_input, + show_output=show_output, + max_value_length=max_value_length, + ) + + # Show error events + for event in node.events: + event_name = event.get("name", "") + if event_name == "exception": + event_attrs = event.get("attributes", {}) + exc_type = event_attrs.get("exception.type", "") + exc_msg = event_attrs.get("exception.message", "") + branch.add( + f"[red]⚠ {exc_type}: {_truncate(str(exc_msg), max_value_length)}[/red]" + ) + + # Recurse into children + for child in node.children: + _render_span_node( + branch, + child, + show_input=show_input, + show_output=show_output, + show_full=show_full, + max_value_length=max_value_length, + ) + + +def _span_label(node: SpanNode) -> str: + """Build a human-friendly label for a span.""" + attrs = node.attributes + span_kind = (attrs.get("openinference.span.kind") or "").upper() + + # LLM span + if span_kind == "LLM" or attrs.get("llm.model_name"): + model = attrs.get("llm.model_name", "") + if model: + return f"LLM ({model})" + return "LLM call" + + # Tool span + if span_kind == "TOOL" or (node.span_type or "").upper() == "TOOL": + tool_name = attrs.get("tool.name", node.name) + return f"🔧 {tool_name}" + + return node.name + + +def _render_key_attributes( + branch: Any, + attrs: dict[str, Any], + node: SpanNode, + *, + show_input: bool, + show_output: bool, + max_value_length: int = 200, +) -> None: + """Show only the most interesting attributes.""" + # Input + if show_input: + input_val = attrs.get("input.value") + if input_val: + parsed = _safe_parse_json(input_val) + display = _truncate( + json.dumps(parsed, default=str) + if isinstance(parsed, (dict, list)) + else str(parsed), + max_value_length, + ) + branch.add(f"[cyan]input:[/cyan] {display}") + + # Output + if show_output: + output_val = attrs.get("output.value") + if output_val: + parsed = _safe_parse_json(output_val) + display = _truncate( + json.dumps(parsed, default=str) + if isinstance(parsed, (dict, list)) + else str(parsed), + max_value_length, + ) + branch.add(f"[green]output:[/green] {display}") + + # LLM tokens + prompt_tokens = attrs.get("llm.token_count.prompt") + completion_tokens = attrs.get("llm.token_count.completion") + if prompt_tokens is not None or completion_tokens is not None: + parts = [] + if prompt_tokens is not None: + parts.append(f"prompt={prompt_tokens}") + if completion_tokens is not None: + parts.append(f"completion={completion_tokens}") + total = attrs.get("llm.token_count.total") + if total is not None: + parts.append(f"total={total}") + branch.add(f"[yellow]tokens:[/yellow] {', '.join(parts)}") + + # Run type + run_type = attrs.get("run_type") + if run_type: + branch.add(f"[dim]run_type: {run_type}[/dim]") + + +def _render_all_attributes( + branch: Any, attrs: dict[str, Any], *, max_value_length: int = 200 +) -> None: + """Show all span attributes.""" + if not attrs: + return + attr_branch = branch.add("[dim]attributes[/dim]") + for key, value in sorted(attrs.items()): + display_value = _safe_parse_json(value) if isinstance(value, str) else value + if isinstance(display_value, (dict, list)): + display_str = _truncate( + json.dumps(display_value, default=str), max_value_length + ) + else: + display_str = _truncate(str(display_value), max_value_length) + attr_branch.add(f"[yellow]{key}[/yellow]: {display_str}") + + +def _render_tree( + roots: list[SpanNode], + *, + show_input: bool = True, + show_output: bool = True, + show_full: bool = False, + max_value_length: int = 200, +) -> None: + """Render the full span tree to the console.""" + from rich.console import Console + from rich.tree import Tree + + rich_console = Console(force_terminal=True) + + if not roots: + rich_console.print("[yellow]No spans to display.[/yellow]") + return + + # Group by trace_id if available + trace_groups: dict[str, list[SpanNode]] = {} + ungrouped: list[SpanNode] = [] + for root in roots: + trace_id = root.span.get("context", {}).get("trace_id") + if trace_id: + trace_groups.setdefault(trace_id, []).append(root) + else: + ungrouped.append(root) + + if trace_groups: + for trace_id, group_roots in trace_groups.items(): + # Shorten trace_id for display + display_id = str(trace_id).replace("0x", "") + if len(display_id) > 16: + display_id = display_id[:8] + "…" + display_id[-8:] + tree = Tree(f"[bold magenta]Trace[/bold magenta] [dim]{display_id}[/dim]") + for root in group_roots: + _render_span_node( + tree, + root, + show_input=show_input, + show_output=show_output, + show_full=show_full, + max_value_length=max_value_length, + ) + rich_console.print(tree) + rich_console.print() + else: + tree = Tree("[bold magenta]Trace[/bold magenta]") + for root in ungrouped: + _render_span_node( + tree, + root, + show_input=show_input, + show_output=show_output, + show_full=show_full, + max_value_length=max_value_length, + ) + rich_console.print(tree) + rich_console.print() + + # Summary + total_spans = _count_spans(roots) + rich_console.print(f"[dim]{total_spans} spans total[/dim]") + + +def _count_spans(roots: list[SpanNode]) -> int: + count = 0 + for root in roots: + count += 1 + count += _count_spans(root.children) + return count + + +# --------------------------------------------------------------------------- +# Click command +# --------------------------------------------------------------------------- + + +@click.command() +@click.argument("file", type=click.Path(exists=True)) +@click.option( + "--name", + default=None, + help="Filter spans by name (glob pattern, e.g. 'agent*')", +) +@click.option( + "--eval-id", + default=None, + help="Show trace for a specific evaluation by name (eval JSON only)", +) +@click.option( + "--span-type", + default=None, + help="Filter by span_type attribute (e.g. 'TOOL', 'function_call_async')", +) +@click.option( + "--status", + default=None, + type=click.Choice(["ok", "error", "unset"], case_sensitive=False), + help="Filter by span status", +) +@click.option( + "--contains", + "contains_pattern", + default=None, + help="Show full subtrees containing a span matching this name (glob pattern)", +) +@click.option( + "--no-input", + is_flag=True, + default=False, + help="Hide input values", +) +@click.option( + "--no-output", + is_flag=True, + default=False, + help="Hide output values", +) +@click.option( + "--full", + is_flag=True, + default=False, + help="Show all span attributes (verbose mode)", +) +def trace( + file: str, + name: str | None, + eval_id: str | None, + span_type: str | None, + status: str | None, + contains_pattern: str | None, + no_input: bool, + no_output: bool, + full: bool, +) -> None: + """Visualize an agent execution trace. + + \b + Reads trace files produced by: + - uipath run --trace-file + - uipath eval --trace-file + - uipath eval --verbose --output-file + + \b + Examples: + uipath trace traces.jsonl + uipath trace traces.jsonl --name "get_weather" + uipath trace traces.jsonl --span-type TOOL + uipath trace traces.jsonl --status error + uipath trace traces.jsonl --contains "get_random*" + uipath trace eval_output.json --eval-id "test-case-1" + uipath trace traces.jsonl --full + """ # noqa: D301 + if not os.path.isfile(file): + console.error(f"File not found: {file}") + + fmt = _detect_format(file) + + if fmt == "eval_json": + spans = _load_eval_output_spans(file, eval_id) + if not spans: + if eval_id: + console.error( + f"No traces found for evaluation '{eval_id}'. " + "Make sure the eval was run with --verbose." + ) + else: + console.error( + "No traces found in the eval output. " + "Make sure the eval was run with --verbose." + ) + is_eval = True + else: + spans = _load_jsonl_spans(file) + if not spans: + console.error("No spans found in the trace file.") + is_eval = False + + roots = _build_tree(spans, is_eval=is_eval) + + # --contains: keep full subtrees that contain a matching span + if contains_pattern: + roots = _filter_contains(roots, contains_pattern) + + # Apply filters + roots = _filter_tree( + roots, + name_pattern=name, + span_type_filter=span_type, + status_filter=status, + ) + + if not roots: + from rich.console import Console as RichConsole + + RichConsole(force_terminal=True).print( + "[yellow]No spans match the given filters.[/yellow]" + ) + return + + _render_tree( + roots, + show_input=not no_input, + show_output=not no_output, + show_full=full, + max_value_length=0 if full else 200, + ) diff --git a/packages/uipath/src/uipath/_resources/AGENTS.md b/packages/uipath/src/uipath/_resources/AGENTS.md index b83142f0f..578b5ab5e 100644 --- a/packages/uipath/src/uipath/_resources/AGENTS.md +++ b/packages/uipath/src/uipath/_resources/AGENTS.md @@ -17,5 +17,5 @@ This documentation is split into multiple files for efficient context loading. L - **Contains:** All SDK services and methods with full signatures and type annotations 3. **@.agent/CLI_REFERENCE.md** - CLI commands documentation - - **When to load:** Working with `uipath init`, `uipath run`, or `uipath eval` commands + - **When to load:** Working with `uipath init`, `uipath run`, `uipath eval`, or `uipath trace` commands - **Contains:** Command syntax, options, usage examples, and workflows diff --git a/packages/uipath/src/uipath/_resources/CLI_REFERENCE.md b/packages/uipath/src/uipath/_resources/CLI_REFERENCE.md index 98524ddfd..aa6efc24e 100644 --- a/packages/uipath/src/uipath/_resources/CLI_REFERENCE.md +++ b/packages/uipath/src/uipath/_resources/CLI_REFERENCE.md @@ -9,6 +9,7 @@ The UiPath Python SDK provides a comprehensive CLI for managing coded agents and | `init` | Initialize agent project | Creating a new agent or updating schema | | `run` | Execute agent | Running agent locally or testing | | `eval` | Evaluate agent | Testing agent performance with evaluation sets | +| `trace` | Visualize execution trace | Inspecting agent trajectory after a run or eval | --- @@ -153,6 +154,61 @@ uv run uipath eval --output-file eval_results.json --- +### `uipath trace` + +**Description:** Visualize an agent execution trace. Reads JSONL trace files produced by `uipath run --trace-file` or `uipath eval --trace-file`, or eval verbose JSON output from `uipath eval --verbose --output-file`. Renders a span tree showing the agent's trajectory with timing, inputs, outputs, and tool calls. + +**Arguments:** + +| Argument | Required | Description | +|----------|----------|-------------| +| `file` | Yes | Path to the trace file (`.jsonl` or `.json`) | + +**Options:** + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--name` | value | `None` | Filter spans by name (glob pattern, e.g. `"agent*"`) | +| `--contains` | value | `None` | Show full subtrees containing a span matching this name (glob pattern) | +| `--eval-id` | value | `None` | Show trace for a specific evaluation by name (eval JSON only) | +| `--span-type` | value | `None` | Filter by `span_type` attribute (e.g. `TOOL`, `function_call_async`) | +| `--status` | choice | `None` | Filter by span status: `ok`, `error`, or `unset` | +| `--no-input` | flag | false | Hide input values | +| `--no-output` | flag | false | Hide output values | +| `--full` | flag | false | Show all span attributes with no truncation | + +**Usage Examples:** + +```bash +# Capture traces from a run, then visualize +uv run uipath run main '{"query": "hello"}' --trace-file traces.jsonl +uv run uipath trace traces.jsonl + +# Quick structural overview (no input/output noise) +uv run uipath trace traces.jsonl --no-input --no-output + +# Show only tool calls +uv run uipath trace traces.jsonl --span-type TOOL + +# Show only errored spans +uv run uipath trace traces.jsonl --status error + +# From an eval: find the run where a specific function was called +uv run uipath eval main eval-set.json --trace-file traces.jsonl +uv run uipath trace traces.jsonl --contains "get_random*" + +# Full detail on a specific span (no truncation) +uv run uipath trace traces.jsonl --name "search_flights" --full + +# Visualize eval verbose output +uv run uipath eval main eval-set.json --verbose --output-file results.json +uv run uipath trace results.json --eval-id "test-case-1" +``` + +**When to use:** Run this command to inspect what your agent did during a run — which tools it called, what inputs/outputs it produced, where errors occurred, and how long each step took. Especially useful for debugging failing evaluations by capturing traces with `--trace-file` and then using `--contains` to find specific runs. + +--- + ### Common Workflows **1. Creating a New Agent:** @@ -174,6 +230,10 @@ uv run uipath run main.py '{"input": "test"}' --debug # Test with input file uv run uipath run main.py --file test_input.json --output-file test_output.json + +# Run with tracing, then inspect the trajectory +uv run uipath run main.py '{"input": "test"}' --trace-file traces.jsonl +uv run uipath trace traces.jsonl ``` **3. Schema Updates:** diff --git a/packages/uipath/tests/cli/trace/__init__.py b/packages/uipath/tests/cli/trace/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/uipath/tests/cli/trace/fixtures/sample_error_trace.jsonl b/packages/uipath/tests/cli/trace/fixtures/sample_error_trace.jsonl new file mode 100644 index 000000000..9c154715c --- /dev/null +++ b/packages/uipath/tests/cli/trace/fixtures/sample_error_trace.jsonl @@ -0,0 +1,2 @@ +{"name": "agent", "context": {"trace_id": "0xdeadbeefdeadbeefdeadbeefdeadbeef", "span_id": "0xaaaaaaaaaaaaaaaa", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": null, "start_time": "2024-01-15T10:30:00.000000Z", "end_time": "2024-01-15T10:30:01.000000Z", "status": {"status_code": "ERROR", "description": "Tool call failed"}, "attributes": {"span_type": "function_call_async", "run_type": "agent", "input.mime_type": "application/json", "input.value": "{\"query\": \"Do something\"}"}, "events": [{"name": "exception", "timestamp": "2024-01-15T10:30:00.500000Z", "attributes": {"exception.type": "ValueError", "exception.message": "Invalid input provided"}}], "links": [], "resource": {"attributes": {}}} +{"name": "bad_tool", "context": {"trace_id": "0xdeadbeefdeadbeefdeadbeefdeadbeef", "span_id": "0xbbbbbbbbbbbbbbbb", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0xaaaaaaaaaaaaaaaa", "start_time": "2024-01-15T10:30:00.200000Z", "end_time": "2024-01-15T10:30:00.500000Z", "status": {"status_code": "ERROR"}, "attributes": {"span_type": "TOOL", "openinference.span.kind": "TOOL", "tool.name": "bad_tool"}, "events": [{"name": "exception", "timestamp": "2024-01-15T10:30:00.500000Z", "attributes": {"exception.type": "RuntimeError", "exception.message": "Connection refused"}}], "links": [], "resource": {"attributes": {}}} diff --git a/packages/uipath/tests/cli/trace/fixtures/sample_eval_output.json b/packages/uipath/tests/cli/trace/fixtures/sample_eval_output.json new file mode 100644 index 000000000..d7011af4b --- /dev/null +++ b/packages/uipath/tests/cli/trace/fixtures/sample_eval_output.json @@ -0,0 +1,71 @@ +{ + "evaluationSetName": "weather-eval-set", + "evaluationSetResults": [ + { + "evaluationName": "test-weather-query", + "evaluationRunResults": [ + { + "evaluatorName": "exact-match", + "evaluatorId": "eval-001", + "result": { + "score": 100.0, + "evaluationTime": 0.05 + }, + "isLineResult": false + } + ], + "agentExecutionOutput": { + "executionTime": 2.3, + "trace": { + "spans": [ + { + "name": "agent", + "status": "ok", + "parent_name": null, + "attributes": { + "span_type": "function_call_async", + "run_type": "agent", + "input.value": "{\"query\": \"What is the weather?\"}", + "output.value": "{\"answer\": \"15C and cloudy\"}" + }, + "events": [] + }, + { + "name": "get_weather", + "status": "ok", + "parent_name": "agent", + "attributes": { + "span_type": "TOOL", + "openinference.span.kind": "TOOL", + "tool.name": "get_weather", + "input.value": "{\"city\": \"London\"}", + "output.value": "{\"temp\": 15}" + }, + "events": [] + } + ] + }, + "result": { + "output": {"answer": "15C and cloudy"}, + "status": "Successful", + "error": null + } + } + }, + { + "evaluationName": "test-no-trace", + "evaluationRunResults": [ + { + "evaluatorName": "contains", + "evaluatorId": "eval-002", + "result": { + "score": 0.0, + "evaluationTime": 0.01 + }, + "isLineResult": false + } + ], + "agentExecutionOutput": null + } + ] +} diff --git a/packages/uipath/tests/cli/trace/fixtures/sample_eval_trace.jsonl b/packages/uipath/tests/cli/trace/fixtures/sample_eval_trace.jsonl new file mode 100644 index 000000000..d11c3b1b1 --- /dev/null +++ b/packages/uipath/tests/cli/trace/fixtures/sample_eval_trace.jsonl @@ -0,0 +1,10 @@ +{"name": "Evaluation Set Run", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000001", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": null, "start_time": "2024-01-15T10:30:00.000000Z", "end_time": "2024-01-15T10:30:10.000000Z", "status": {"status_code": "OK"}, "attributes": {"span_type": "eval_set_run"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "Evaluation", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000010", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000001", "start_time": "2024-01-15T10:30:00.100000Z", "end_time": "2024-01-15T10:30:05.000000Z", "status": {"status_code": "OK"}, "attributes": {"span_type": "evaluation"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "root", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000011", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000010", "start_time": "2024-01-15T10:30:00.200000Z", "end_time": "2024-01-15T10:30:01.000000Z", "status": {"status_code": "UNSET"}, "attributes": {}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "main", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000012", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000011", "start_time": "2024-01-15T10:30:00.300000Z", "end_time": "2024-01-15T10:30:00.900000Z", "status": {"status_code": "UNSET"}, "attributes": {"span_type": "function_call_async", "input.value": "{\"a\": 2, \"operator\": \"random\"}", "output.value": "{\"result\": 8}"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "get_random_operator", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000013", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000012", "start_time": "2024-01-15T10:30:00.400000Z", "end_time": "2024-01-15T10:30:00.500000Z", "status": {"status_code": "UNSET"}, "attributes": {"span_type": "function_call_async", "input.value": "{}", "output.value": "{\"result\": \"*\"}"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "apply_operator", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000014", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000012", "start_time": "2024-01-15T10:30:00.600000Z", "end_time": "2024-01-15T10:30:00.800000Z", "status": {"status_code": "UNSET"}, "attributes": {"span_type": "TOOL", "openinference.span.kind": "TOOL", "tool.name": "apply_operator", "input.value": "{\"operator\": \"*\", \"a\": 2, \"b\": 4}", "output.value": "{\"result\": 8}"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "Evaluation", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000020", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000001", "start_time": "2024-01-15T10:30:05.000000Z", "end_time": "2024-01-15T10:30:10.000000Z", "status": {"status_code": "OK"}, "attributes": {"span_type": "evaluation"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "root", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000021", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000020", "start_time": "2024-01-15T10:30:05.100000Z", "end_time": "2024-01-15T10:30:06.000000Z", "status": {"status_code": "UNSET"}, "attributes": {}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "main", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000022", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000021", "start_time": "2024-01-15T10:30:05.200000Z", "end_time": "2024-01-15T10:30:05.900000Z", "status": {"status_code": "UNSET"}, "attributes": {"span_type": "function_call_async", "input.value": "{\"a\": 1, \"operator\": \"+\"}", "output.value": "{\"result\": 5}"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "apply_operator", "context": {"trace_id": "0xaaaa000000000000aaaa000000000000", "span_id": "0x0000000000000023", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x0000000000000022", "start_time": "2024-01-15T10:30:05.300000Z", "end_time": "2024-01-15T10:30:05.800000Z", "status": {"status_code": "UNSET"}, "attributes": {"span_type": "TOOL", "openinference.span.kind": "TOOL", "tool.name": "apply_operator", "input.value": "{\"operator\": \"+\", \"a\": 1, \"b\": 4}", "output.value": "{\"result\": 5}"}, "events": [], "links": [], "resource": {"attributes": {}}} diff --git a/packages/uipath/tests/cli/trace/fixtures/sample_trace.jsonl b/packages/uipath/tests/cli/trace/fixtures/sample_trace.jsonl new file mode 100644 index 000000000..9c42cb364 --- /dev/null +++ b/packages/uipath/tests/cli/trace/fixtures/sample_trace.jsonl @@ -0,0 +1,4 @@ +{"name": "agent", "context": {"trace_id": "0x1234567890abcdef1234567890abcdef", "span_id": "0x1111111111111111", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": null, "start_time": "2024-01-15T10:30:00.000000Z", "end_time": "2024-01-15T10:30:02.300000Z", "status": {"status_code": "OK"}, "attributes": {"span_type": "function_call_async", "run_type": "agent", "input.mime_type": "application/json", "input.value": "{\"query\": \"What is the weather?\"}", "output.mime_type": "application/json", "output.value": "{\"answer\": \"It is 15C and cloudy\"}"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "ChatOpenAI", "context": {"trace_id": "0x1234567890abcdef1234567890abcdef", "span_id": "0x2222222222222222", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x1111111111111111", "start_time": "2024-01-15T10:30:00.100000Z", "end_time": "2024-01-15T10:30:00.900000Z", "status": {"status_code": "OK"}, "attributes": {"openinference.span.kind": "LLM", "llm.model_name": "gpt-4o", "llm.token_count.prompt": 150, "llm.token_count.completion": 45, "llm.token_count.total": 195}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "get_weather", "context": {"trace_id": "0x1234567890abcdef1234567890abcdef", "span_id": "0x3333333333333333", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x1111111111111111", "start_time": "2024-01-15T10:30:01.000000Z", "end_time": "2024-01-15T10:30:02.200000Z", "status": {"status_code": "OK"}, "attributes": {"span_type": "TOOL", "openinference.span.kind": "TOOL", "tool.name": "get_weather", "input.mime_type": "application/json", "input.value": "{\"city\": \"London\"}", "output.mime_type": "application/json", "output.value": "{\"temp\": 15, \"condition\": \"cloudy\"}"}, "events": [], "links": [], "resource": {"attributes": {}}} +{"name": "ChatOpenAI", "context": {"trace_id": "0x1234567890abcdef1234567890abcdef", "span_id": "0x4444444444444444", "trace_state": "[]"}, "kind": "SpanKind.INTERNAL", "parent_id": "0x1111111111111111", "start_time": "2024-01-15T10:30:02.200000Z", "end_time": "2024-01-15T10:30:02.300000Z", "status": {"status_code": "OK"}, "attributes": {"openinference.span.kind": "LLM", "llm.model_name": "gpt-4o", "llm.token_count.prompt": 200, "llm.token_count.completion": 30, "llm.token_count.total": 230}, "events": [], "links": [], "resource": {"attributes": {}}} diff --git a/packages/uipath/tests/cli/trace/test_trace.py b/packages/uipath/tests/cli/trace/test_trace.py new file mode 100644 index 000000000..c7df9db57 --- /dev/null +++ b/packages/uipath/tests/cli/trace/test_trace.py @@ -0,0 +1,516 @@ +"""Tests for the ``uipath trace`` CLI command.""" + +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from uipath._cli.cli_trace import ( + SpanNode, + _build_tree, + _build_tree_from_eval, + _build_tree_from_jsonl, + _count_spans, + _detect_format, + _filter_contains, + _filter_tree, + _load_eval_output_spans, + _load_jsonl_spans, + _parse_otel_time, + _safe_parse_json, + _span_label, + _truncate, + trace, +) + +FIXTURES = Path(__file__).parent / "fixtures" +SAMPLE_TRACE = FIXTURES / "sample_trace.jsonl" +SAMPLE_ERROR_TRACE = FIXTURES / "sample_error_trace.jsonl" +SAMPLE_EVAL_OUTPUT = FIXTURES / "sample_eval_output.json" +SAMPLE_EVAL_TRACE = FIXTURES / "sample_eval_trace.jsonl" + + +# --------------------------------------------------------------------------- +# Loading +# --------------------------------------------------------------------------- + + +class TestLoadJsonlSpans: + def test_loads_all_spans(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + assert len(spans) == 4 + + def test_span_has_expected_fields(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + root = spans[0] + assert root["name"] == "agent" + assert root["context"]["trace_id"] == "0x1234567890abcdef1234567890abcdef" + assert root["parent_id"] is None + + def test_child_span_has_parent_id(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + llm_span = spans[1] + assert llm_span["parent_id"] == "0x1111111111111111" + + def test_empty_file(self, tmp_path): + empty = tmp_path / "empty.jsonl" + empty.write_text("") + spans = _load_jsonl_spans(str(empty)) + assert spans == [] + + +class TestLoadEvalOutputSpans: + def test_loads_spans_from_verbose_output(self): + spans = _load_eval_output_spans(str(SAMPLE_EVAL_OUTPUT), eval_id=None) + assert len(spans) == 2 + + def test_filters_by_eval_id(self): + spans = _load_eval_output_spans( + str(SAMPLE_EVAL_OUTPUT), eval_id="test-weather-query" + ) + assert len(spans) == 2 + assert all(s["_eval_name"] == "test-weather-query" for s in spans) + + def test_missing_eval_id_returns_empty(self): + spans = _load_eval_output_spans(str(SAMPLE_EVAL_OUTPUT), eval_id="nonexistent") + assert spans == [] + + def test_eval_without_agent_output_skipped(self): + # test-no-trace has agentExecutionOutput: null + spans = _load_eval_output_spans( + str(SAMPLE_EVAL_OUTPUT), eval_id="test-no-trace" + ) + assert spans == [] + + def test_normalised_span_has_parent_name(self): + spans = _load_eval_output_spans(str(SAMPLE_EVAL_OUTPUT), eval_id=None) + tool_span = [s for s in spans if s["name"] == "get_weather"][0] + assert tool_span["parent_name"] == "agent" + + +# --------------------------------------------------------------------------- +# Format detection +# --------------------------------------------------------------------------- + + +class TestDetectFormat: + def test_jsonl_by_extension(self, tmp_path): + f = tmp_path / "traces.jsonl" + f.write_text("{}\n") + assert _detect_format(str(f)) == "jsonl" + + def test_eval_json_detected(self): + assert _detect_format(str(SAMPLE_EVAL_OUTPUT)) == "eval_json" + + def test_json_without_eval_keys_is_jsonl(self, tmp_path): + f = tmp_path / "data.json" + f.write_text('{"name": "something"}\n') + assert _detect_format(str(f)) == "jsonl" + + +# --------------------------------------------------------------------------- +# Tree building +# --------------------------------------------------------------------------- + + +class TestBuildTreeJsonl: + def test_builds_correct_hierarchy(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + roots = _build_tree_from_jsonl(spans) + assert len(roots) == 1 + root = roots[0] + assert root.name == "agent" + assert len(root.children) == 3 # 2 LLM + 1 tool + + def test_root_has_no_parent(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + roots = _build_tree_from_jsonl(spans) + assert roots[0].span.get("parent_id") is None + + def test_children_are_correct(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + roots = _build_tree_from_jsonl(spans) + child_names = sorted([c.name for c in roots[0].children]) + assert child_names == ["ChatOpenAI", "ChatOpenAI", "get_weather"] + + +class TestBuildTreeEval: + def test_builds_hierarchy_from_parent_name(self): + spans = _load_eval_output_spans(str(SAMPLE_EVAL_OUTPUT), eval_id=None) + roots = _build_tree_from_eval(spans) + assert len(roots) == 1 + root = roots[0] + assert root.name == "agent" + assert len(root.children) == 1 + assert root.children[0].name == "get_weather" + + +class TestBuildTree: + def test_dispatches_to_jsonl(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + roots = _build_tree(spans, is_eval=False) + assert len(roots) == 1 + + def test_dispatches_to_eval(self): + spans = _load_eval_output_spans(str(SAMPLE_EVAL_OUTPUT), eval_id=None) + roots = _build_tree(spans, is_eval=True) + assert len(roots) == 1 + + +# --------------------------------------------------------------------------- +# SpanNode properties +# --------------------------------------------------------------------------- + + +class TestSpanNode: + def test_status_ok(self): + node = SpanNode({"status": {"status_code": "OK"}}) + assert node.status_code == "OK" + assert "✓" in node.status_icon + + def test_status_error(self): + node = SpanNode({"status": {"status_code": "ERROR"}}) + assert node.status_code == "ERROR" + assert "✗" in node.status_icon + + def test_status_unset(self): + node = SpanNode({"status": {}}) + assert node.status_code == "UNSET" + assert "○" in node.status_icon + + def test_duration_calculation(self): + node = SpanNode( + { + "start_time": "2024-01-15T10:30:00.000000Z", + "end_time": "2024-01-15T10:30:02.300000Z", + } + ) + assert node.duration_ms == pytest.approx(2300, abs=1) + assert node.duration_str == "2.3s" + + def test_duration_ms_range(self): + node = SpanNode( + { + "start_time": "2024-01-15T10:30:00.000000Z", + "end_time": "2024-01-15T10:30:00.500000Z", + } + ) + assert node.duration_str == "500ms" + + def test_duration_no_timestamps(self): + node = SpanNode({}) + assert node.duration_ms is None + assert node.duration_str == "" + + def test_attributes(self): + node = SpanNode({"attributes": {"foo": "bar"}}) + assert node.attributes == {"foo": "bar"} + + def test_span_type(self): + node = SpanNode({"attributes": {"span_type": "TOOL"}}) + assert node.span_type == "TOOL" + + def test_events(self): + events = [{"name": "exception", "attributes": {"exception.type": "ValueError"}}] + node = SpanNode({"events": events}) + assert node.events == events + + +# --------------------------------------------------------------------------- +# Span label +# --------------------------------------------------------------------------- + + +class TestSpanLabel: + def test_llm_span_with_model(self): + node = SpanNode( + {"name": "ChatOpenAI", "attributes": {"llm.model_name": "gpt-4o"}} + ) + assert _span_label(node) == "LLM (gpt-4o)" + + def test_llm_span_by_kind(self): + node = SpanNode( + { + "name": "completion", + "attributes": {"openinference.span.kind": "LLM"}, + } + ) + assert _span_label(node) == "LLM call" + + def test_tool_span(self): + node = SpanNode( + { + "name": "get_weather", + "attributes": { + "span_type": "TOOL", + "tool.name": "get_weather", + }, + } + ) + label = _span_label(node) + assert "get_weather" in label + assert "🔧" in label + + def test_generic_span(self): + node = SpanNode({"name": "my_function", "attributes": {}}) + assert _span_label(node) == "my_function" + + +# --------------------------------------------------------------------------- +# Filtering +# --------------------------------------------------------------------------- + + +class TestFilterTree: + def _build_sample_tree(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + return _build_tree_from_jsonl(spans) + + def test_no_filter_returns_all(self): + roots = self._build_sample_tree() + filtered = _filter_tree(roots) + assert _count_spans(filtered) == 4 + + def test_filter_by_name(self): + roots = self._build_sample_tree() + filtered = _filter_tree(roots, name_pattern="get_weather") + # Should keep root (ancestor) + the matching span + assert _count_spans(filtered) >= 1 + # The matching span should be in the tree + names = _collect_names(filtered) + assert "get_weather" in names + + def test_filter_by_name_glob(self): + roots = self._build_sample_tree() + filtered = _filter_tree(roots, name_pattern="Chat*") + names = _collect_names(filtered) + assert "ChatOpenAI" in names + + def test_filter_by_span_type(self): + roots = self._build_sample_tree() + filtered = _filter_tree(roots, span_type_filter="TOOL") + names = _collect_names(filtered) + assert "get_weather" in names + + def test_filter_by_status(self): + spans = _load_jsonl_spans(str(SAMPLE_ERROR_TRACE)) + roots = _build_tree_from_jsonl(spans) + filtered = _filter_tree(roots, status_filter="error") + assert _count_spans(filtered) >= 1 + + def test_filter_no_match(self): + roots = self._build_sample_tree() + filtered = _filter_tree(roots, name_pattern="nonexistent_tool") + assert filtered == [] + + +class TestFilterContains: + """Tests for --contains: keep full subtrees containing a matching span.""" + + def _build_eval_tree(self): + spans = _load_jsonl_spans(str(SAMPLE_EVAL_TRACE)) + return _build_tree_from_jsonl(spans) + + def test_contains_unique_span_returns_one_subtree(self): + roots = self._build_eval_tree() + # get_random_operator only appears in the first eval run + filtered = _filter_contains(roots, "get_random_operator") + assert len(filtered) == 1 + names = _collect_names(filtered) + assert "get_random_operator" in names + assert "main" in names + assert "apply_operator" in names + + def test_contains_common_span_returns_all_subtrees(self): + roots = self._build_eval_tree() + # apply_operator appears in both eval runs + filtered = _filter_contains(roots, "apply_operator") + # Should return both main subtrees + assert len(filtered) >= 2 + names = _collect_names(filtered) + assert "apply_operator" in names + + def test_contains_glob_pattern(self): + roots = self._build_eval_tree() + filtered = _filter_contains(roots, "get_random*") + assert len(filtered) == 1 + names = _collect_names(filtered) + assert "get_random_operator" in names + + def test_contains_no_match(self): + roots = self._build_eval_tree() + filtered = _filter_contains(roots, "nonexistent_function") + assert filtered == [] + + def test_contains_preserves_full_subtree(self): + roots = self._build_eval_tree() + filtered = _filter_contains(roots, "get_random_operator") + # The returned subtree should include siblings of the match + names = _collect_names(filtered) + # apply_operator is a sibling of get_random_operator under main + assert "apply_operator" in names + + def test_contains_cli_integration(self): + runner = CliRunner() + result = runner.invoke( + trace, [str(SAMPLE_EVAL_TRACE), "--contains", "get_random*"] + ) + assert result.exit_code == 0 + assert "get_random_operator" in result.output + assert "apply_operator" in result.output + + def test_contains_cli_no_match(self): + runner = CliRunner() + result = runner.invoke( + trace, [str(SAMPLE_EVAL_TRACE), "--contains", "nonexistent"] + ) + assert result.exit_code == 0 + assert "No spans match" in result.output + + +# --------------------------------------------------------------------------- +# Utilities +# --------------------------------------------------------------------------- + + +class TestUtilities: + def test_parse_otel_time_iso(self): + dt = _parse_otel_time("2024-01-15T10:30:00.000000Z") + assert dt.year == 2024 + assert dt.month == 1 + assert dt.day == 15 + + def test_parse_otel_time_invalid(self): + with pytest.raises(ValueError): + _parse_otel_time("not-a-date") + + def test_safe_parse_json_valid(self): + assert _safe_parse_json('{"a": 1}') == {"a": 1} + + def test_safe_parse_json_invalid(self): + assert _safe_parse_json("not json") == "not json" + + def test_safe_parse_json_non_string(self): + assert _safe_parse_json(42) == 42 + + def test_truncate_short(self): + assert _truncate("hello", 10) == "hello" + + def test_truncate_long(self): + result = _truncate("a" * 300, 200) + assert len(result) == 203 # 200 + "..." + assert result.endswith("...") + + def test_count_spans(self): + spans = _load_jsonl_spans(str(SAMPLE_TRACE)) + roots = _build_tree_from_jsonl(spans) + assert _count_spans(roots) == 4 + + +# --------------------------------------------------------------------------- +# CLI integration (click runner) +# --------------------------------------------------------------------------- + + +class TestTraceCli: + def test_jsonl_trace(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE)]) + assert result.exit_code == 0 + assert "agent" in result.output + assert "get_weather" in result.output + + def test_jsonl_trace_full(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE), "--full"]) + assert result.exit_code == 0 + assert "attributes" in result.output + + def test_jsonl_trace_no_input(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE), "--no-input"]) + assert result.exit_code == 0 + + def test_jsonl_trace_no_output(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE), "--no-output"]) + assert result.exit_code == 0 + + def test_jsonl_filter_by_name(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE), "--name", "get_weather"]) + assert result.exit_code == 0 + assert "get_weather" in result.output + + def test_jsonl_filter_by_span_type(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE), "--span-type", "TOOL"]) + assert result.exit_code == 0 + assert "get_weather" in result.output + + def test_jsonl_filter_no_match(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_TRACE), "--name", "nonexistent"]) + assert result.exit_code == 0 + assert "No spans match" in result.output + + def test_eval_json_trace(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_EVAL_OUTPUT)]) + assert result.exit_code == 0 + assert "agent" in result.output + assert "get_weather" in result.output + + def test_eval_json_filter_eval_id(self): + runner = CliRunner() + result = runner.invoke( + trace, + [str(SAMPLE_EVAL_OUTPUT), "--eval-id", "test-weather-query"], + ) + assert result.exit_code == 0 + assert "agent" in result.output + + def test_eval_json_missing_eval_id(self): + runner = CliRunner() + result = runner.invoke( + trace, [str(SAMPLE_EVAL_OUTPUT), "--eval-id", "nonexistent"] + ) + # Should error about no traces found + assert result.exit_code != 0 + + def test_error_trace_shows_errors(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_ERROR_TRACE)]) + assert result.exit_code == 0 + assert "✗" in result.output or "error" in result.output.lower() + + def test_file_not_found(self): + runner = CliRunner() + result = runner.invoke(trace, ["/nonexistent/path.jsonl"]) + assert result.exit_code != 0 + + def test_empty_file(self, tmp_path): + empty = tmp_path / "empty.jsonl" + empty.write_text("") + runner = CliRunner() + result = runner.invoke(trace, [str(empty)]) + assert result.exit_code != 0 + + def test_status_filter(self): + runner = CliRunner() + result = runner.invoke(trace, [str(SAMPLE_ERROR_TRACE), "--status", "error"]) + assert result.exit_code == 0 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _collect_names(roots: list[SpanNode]) -> set[str]: + """Collect all span names in a tree.""" + names: set[str] = set() + for root in roots: + names.add(root.name) + names.update(_collect_names(root.children)) + return names