From 821874cf434d31ab75002f27b67a2506596bc5e7 Mon Sep 17 00:00:00 2001 From: venzeles <17001808+venzeles@users.noreply.github.com> Date: Wed, 6 May 2026 15:46:30 +0300 Subject: [PATCH] Add demo seed for local observability --- HACKATHON_SUBMISSION.md | 61 +++++++++ README.md | 19 +++ src/demo_seed.zig | 283 ++++++++++++++++++++++++++++++++++++++++ src/main.zig | 18 +++ 4 files changed, 381 insertions(+) create mode 100644 HACKATHON_SUBMISSION.md create mode 100644 src/demo_seed.zig diff --git a/HACKATHON_SUBMISSION.md b/HACKATHON_SUBMISSION.md new file mode 100644 index 0000000..86638d2 --- /dev/null +++ b/HACKATHON_SUBMISSION.md @@ -0,0 +1,61 @@ +# NullWatch Demo Seed For Agent Flight Recorder + +## Problem Discovered + +NullWatch has useful local observability APIs for spans, evals, run summaries, +OTLP ingest, token usage, cost, and errors. The repository had CLI ingest +examples and an E2E test, but no one-command way to create realistic local demo +data for NullHub or for a new contributor exploring the service. + +## Chosen Solution + +Add `nullwatch demo-seed`, an idempotent local CLI command that creates a small +set of realistic agent observability runs: + +- a passing code-review run +- a failed tool-call run +- a handoff/retry run with checkpoint context + +## Why This Idea Was Chosen + +The command makes NullWatch demoable without API keys, hosted services, or an +external agent runtime. It supports the broader Agent Flight Recorder work in +NullHub while remaining useful by itself for local development and tests. + +## What Was Implemented + +- `demo-seed` CLI routing +- deterministic seed data for spans and evals +- idempotency by skipping existing demo run ids +- a unit test covering seed creation and repeat execution +- README usage and NullHub demo notes + +## Files Changed + +- `src/main.zig` +- `src/demo_seed.zig` +- `README.md` +- `HACKATHON_SUBMISSION.md` + +## How To Test Or Demo + +```bash +zig build test --summary all +zig build run -- demo-seed +zig build run -- summary +zig build run -- run demo-tool-failure +zig build run -- serve --port 7710 +``` + +Then start NullHub with: + +```bash +NULLWATCH_URL=http://127.0.0.1:7710 zig build run -- serve +``` + +## Limitations And Future Improvements + +- The seed data is deterministic and intentionally small. +- Future fixtures could include OpenTelemetry GenAI/OpenInference attributes. +- A later version could support scenario selection, for example + `demo-seed --scenario failures`. diff --git a/README.md b/README.md index e7cdff4..f39f79f 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,14 @@ List evals: zig build run -- evals --dataset prod-shadow --verdict fail ``` +Seed local demo runs: + +```bash +zig build run -- demo-seed +zig build run -- runs --limit 20 +zig build run -- run demo-tool-failure +``` + Ingest a span from the CLI: ```bash @@ -354,6 +362,17 @@ zig build run -- --from-json '{"home":"~/.nullwatch","port":7710,"data_dir":"dat This keeps the service headless while letting `nullhub` own install/setup UI. +For a local NullHub flight-recorder demo: + +```bash +zig build run -- demo-seed +zig build run -- serve --port 7710 +``` + +Start NullHub with `NULLWATCH_URL=http://127.0.0.1:7710` and open the +Observability page to inspect the seeded runs, spans, evals, token usage, cost, +and failure context. + ## CI and releases - `tests/test_e2e.sh` boots a real server and validates auth, ingest, OTLP mapping, and CLI queries. diff --git a/src/demo_seed.zig b/src/demo_seed.zig new file mode 100644 index 0000000..dfb8e23 --- /dev/null +++ b/src/demo_seed.zig @@ -0,0 +1,283 @@ +const std = @import("std"); +const domain = @import("domain.zig"); +const Store = @import("store.zig").Store; + +pub const SeedSummary = struct { + status: []const u8 = "ok", + runs_created: usize = 0, + runs_skipped: usize = 0, + spans_created: usize = 0, + evals_created: usize = 0, +}; + +const base_ms: i64 = 1_710_000_000_000; + +pub fn seed(allocator: std.mem.Allocator, store: *Store) !SeedSummary { + var summary = SeedSummary{}; + + try seedReviewPass(allocator, store, &summary); + try seedToolFailure(allocator, store, &summary); + try seedHandoffRetry(allocator, store, &summary); + + return summary; +} + +fn runExists(allocator: std.mem.Allocator, store: *Store, run_id: []const u8) !bool { + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const detail = try store.getRunDetail(arena.allocator(), run_id); + return detail != null; +} + +fn seedReviewPass(allocator: std.mem.Allocator, store: *Store, summary: *SeedSummary) !void { + const run_id = "demo-code-review-pass"; + if (try runExists(allocator, store, run_id)) { + summary.runs_skipped += 1; + return; + } + + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-review-pass", + .span_id = "review-pass-root", + .source = "nullclaw", + .operation = "agent.run", + .status = "ok", + .started_at_ms = base_ms, + .ended_at_ms = base_ms + 1420, + .agent_id = "reviewer-1", + .task_id = "ticket-demo-101", + .attributes_json = "{\"pipeline_id\":\"code-review\",\"stage\":\"review\"}", + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-review-pass", + .span_id = "review-pass-model", + .parent_span_id = "review-pass-root", + .source = "nullclaw", + .operation = "model.call", + .status = "ok", + .started_at_ms = base_ms + 80, + .ended_at_ms = base_ms + 940, + .agent_id = "reviewer-1", + .model = "gpt-5-mini", + .input_tokens = 1280, + .output_tokens = 420, + .cost_usd = 0.013, + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-review-pass", + .span_id = "review-pass-transition", + .parent_span_id = "review-pass-root", + .source = "nulltickets", + .operation = "tracker.transition", + .status = "ok", + .started_at_ms = base_ms + 1020, + .ended_at_ms = base_ms + 1180, + .task_id = "ticket-demo-101", + .attributes_json = "{\"from\":\"review\",\"to\":\"done\",\"trigger\":\"approve\"}", + }); + try addEval(store, summary, .{ + .run_id = run_id, + .eval_key = "review_quality", + .scorer = "demo-rubric", + .score = 0.94, + .verdict = "pass", + .dataset = "flight-recorder-demo", + .notes = "Review found the intended issue and approved after tests.", + .recorded_at_ms = base_ms + 1500, + }); + + summary.runs_created += 1; +} + +fn seedToolFailure(allocator: std.mem.Allocator, store: *Store, summary: *SeedSummary) !void { + const run_id = "demo-tool-failure"; + if (try runExists(allocator, store, run_id)) { + summary.runs_skipped += 1; + return; + } + + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-tool-failure", + .span_id = "tool-failure-root", + .source = "nullboiler", + .operation = "workflow.step", + .status = "error", + .started_at_ms = base_ms + 10_000, + .ended_at_ms = base_ms + 13_840, + .agent_id = "coder-1", + .task_id = "ticket-demo-202", + .error_message = "workflow failed after shell tool error", + .attributes_json = "{\"workflow_id\":\"bug-fix\",\"node\":\"run-tests\"}", + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-tool-failure", + .span_id = "tool-failure-model", + .parent_span_id = "tool-failure-root", + .source = "nullclaw", + .operation = "model.call", + .status = "ok", + .started_at_ms = base_ms + 10_120, + .ended_at_ms = base_ms + 11_050, + .agent_id = "coder-1", + .model = "gpt-5-mini", + .input_tokens = 2140, + .output_tokens = 620, + .cost_usd = 0.022, + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-tool-failure", + .span_id = "tool-failure-shell", + .parent_span_id = "tool-failure-root", + .source = "nullclaw", + .operation = "tool.call", + .status = "error", + .started_at_ms = base_ms + 11_100, + .ended_at_ms = base_ms + 13_600, + .agent_id = "coder-1", + .tool_name = "shell", + .error_message = "zig build test exited with status 1", + .attributes_json = "{\"command\":\"zig build test --summary all\",\"exit_code\":1}", + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-tool-failure", + .span_id = "tool-failure-event", + .parent_span_id = "tool-failure-root", + .source = "nulltickets", + .operation = "run.event", + .status = "ok", + .started_at_ms = base_ms + 13_660, + .ended_at_ms = base_ms + 13_760, + .task_id = "ticket-demo-202", + .attributes_json = "{\"kind\":\"test_failure\",\"artifact\":\"zig-test-output.txt\"}", + }); + try addEval(store, summary, .{ + .run_id = run_id, + .eval_key = "tool_success", + .scorer = "demo-rubric", + .score = 0.31, + .verdict = "fail", + .dataset = "flight-recorder-demo", + .notes = "The workflow surfaced a failing shell tool call with enough context to debug.", + .recorded_at_ms = base_ms + 14_000, + }); + + summary.runs_created += 1; +} + +fn seedHandoffRetry(allocator: std.mem.Allocator, store: *Store, summary: *SeedSummary) !void { + const run_id = "demo-handoff-retry"; + if (try runExists(allocator, store, run_id)) { + summary.runs_skipped += 1; + return; + } + + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-handoff-retry", + .span_id = "handoff-root", + .source = "nullboiler", + .operation = "workflow.run", + .status = "ok", + .started_at_ms = base_ms + 20_000, + .ended_at_ms = base_ms + 24_500, + .task_id = "ticket-demo-303", + .attributes_json = "{\"workflow_id\":\"feature-dev\",\"checkpoint_count\":3}", + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-handoff-retry", + .span_id = "handoff-analyst", + .parent_span_id = "handoff-root", + .source = "nullclaw", + .operation = "agent.handoff", + .status = "ok", + .started_at_ms = base_ms + 20_100, + .ended_at_ms = base_ms + 21_150, + .agent_id = "analyst-1", + .attributes_json = "{\"to_agent\":\"coder-1\",\"reason\":\"implementation required\"}", + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-handoff-retry", + .span_id = "handoff-coder", + .parent_span_id = "handoff-root", + .source = "nullclaw", + .operation = "agent.handoff", + .status = "ok", + .started_at_ms = base_ms + 21_220, + .ended_at_ms = base_ms + 23_800, + .agent_id = "coder-1", + .attributes_json = "{\"to_agent\":\"reviewer-1\",\"reason\":\"ready for review\",\"retry\":1}", + }); + try addSpan(store, summary, .{ + .run_id = run_id, + .trace_id = "trace-demo-handoff-retry", + .span_id = "handoff-checkpoint", + .parent_span_id = "handoff-root", + .source = "nullboiler", + .operation = "checkpoint.created", + .status = "ok", + .started_at_ms = base_ms + 23_920, + .ended_at_ms = base_ms + 24_020, + .attributes_json = "{\"checkpoint_id\":\"cp-demo-303-3\",\"node\":\"review\"}", + }); + try addEval(store, summary, .{ + .run_id = run_id, + .eval_key = "handoff_budget", + .scorer = "demo-rubric", + .score = 0.78, + .verdict = "pass", + .dataset = "flight-recorder-demo", + .notes = "The handoff chain stayed within the expected retry budget.", + .recorded_at_ms = base_ms + 24_800, + }); + + summary.runs_created += 1; +} + +fn addSpan(store: *Store, summary: *SeedSummary, payload: domain.SpanIngest) !void { + _ = try store.ingestSpan(payload); + summary.spans_created += 1; +} + +fn addEval(store: *Store, summary: *SeedSummary, payload: domain.EvalIngest) !void { + _ = try store.ingestEval(payload); + summary.evals_created += 1; +} + +test "demo seed creates local observability runs and is idempotent" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + const tmp_dir = @import("compat.zig").fs.Dir.wrap(tmp.dir); + const data_dir = try tmp_dir.realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(data_dir); + + var store = try Store.init(std.testing.allocator, data_dir); + defer store.deinit(); + + const first = try seed(std.testing.allocator, &store); + try std.testing.expectEqual(@as(usize, 3), first.runs_created); + try std.testing.expectEqual(@as(usize, 0), first.runs_skipped); + try std.testing.expectEqual(@as(usize, 11), first.spans_created); + try std.testing.expectEqual(@as(usize, 3), first.evals_created); + + const second = try seed(std.testing.allocator, &store); + try std.testing.expectEqual(@as(usize, 0), second.runs_created); + try std.testing.expectEqual(@as(usize, 3), second.runs_skipped); + + const summary = try store.getSystemSummary(std.testing.allocator); + try std.testing.expectEqual(@as(usize, 3), summary.run_count); + try std.testing.expectEqual(@as(usize, 11), summary.span_count); + try std.testing.expectEqual(@as(usize, 3), summary.eval_count); + try std.testing.expectEqual(@as(usize, 2), summary.error_count); + try std.testing.expectEqual(@as(usize, 1), summary.fail_count); +} diff --git a/src/main.zig b/src/main.zig index f82a130..701695a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,6 +2,7 @@ const std = @import("std"); const std_compat = @import("compat.zig"); const api = @import("api.zig"); const config = @import("config.zig"); +const demo_seed = @import("demo_seed.zig"); const domain = @import("domain.zig"); const Store = @import("store.zig").Store; const version = @import("version.zig"); @@ -138,6 +139,13 @@ pub fn main(init: std.process.Init) !void { return; } + if (std.mem.eql(u8, command, "demo-seed")) { + var parsed = try parseCommonArgs(allocator, &cursor); + defer parsed.deinit(allocator); + try runDemoSeedCommand(allocator, parsed.runtime); + return; + } + std.debug.print("unknown command: {s}\n\n", .{command}); printUsage(); std.process.exit(1); @@ -328,6 +336,14 @@ fn runEvalIngestCommand(allocator: std.mem.Allocator, runtime: RuntimeConfig, js try writeJsonToStdout(allocator, record); } +fn runDemoSeedCommand(allocator: std.mem.Allocator, runtime: RuntimeConfig) !void { + var store = try Store.init(allocator, runtime.data_dir); + defer store.deinit(); + + const summary = try demo_seed.seed(allocator, &store); + try writeJsonToStdout(allocator, summary); +} + fn parseServeArgs(allocator: std.mem.Allocator, args: *ArgCursor) !struct { runtime: RuntimeConfig } { var overrides = RuntimeOverrides{}; @@ -598,6 +614,7 @@ fn printUsage() void { \\ nullwatch evals [--run-id ID] [--verdict VERDICT] [--eval-key KEY] [--scorer NAME] [--dataset NAME] [--limit N] \\ nullwatch ingest-span --json '' [--data-dir PATH] [--config PATH] \\ nullwatch ingest-eval --json '' [--data-dir PATH] [--config PATH] + \\ nullwatch demo-seed [--data-dir PATH] [--config PATH] \\ nullwatch --export-manifest \\ nullwatch --from-json '' \\ nullwatch version @@ -625,6 +642,7 @@ fn printUsage() void { test { _ = api; _ = config; + _ = demo_seed; _ = domain; _ = Store; _ = @import("export_manifest.zig");