diff --git a/HACKATHON_SUBMISSION.md b/HACKATHON_SUBMISSION.md new file mode 100644 index 0000000..3ca0382 --- /dev/null +++ b/HACKATHON_SUBMISSION.md @@ -0,0 +1,75 @@ +# WorkflowDef Preflight for NullBoiler + +## Problem discovered + +NullBoiler consumes file-based tracker/pull-mode workflow definitions through +`workflow_loader.loadWorkflows`. That loader intentionally keeps permissive +runtime behavior: missing directories return an empty map, invalid files are +skipped, and files with empty `pipeline_id` are ignored. + +That behavior is useful at runtime, but it makes workflow authoring harder. +Developers can make a typo in JSON, forget `pipeline_id`, or accidentally reuse a +pipeline mapping and only discover it after the server starts. + +## Chosen solution + +Add `nullboiler validate-workflows [PATH]`, a local CLI preflight command for the +same file-based `WorkflowDef` JSON files consumed by `loadWorkflows`. + +The command reports actionable diagnostics before the server starts while +preserving the existing runtime loader semantics. + +## Why this idea was chosen + +This idea had the best developer-impact-to-complexity ratio among the options +found during repository exploration. NullClaw is broad and central, NullHub +changes often span backend and UI, and NullWatch already has a strong CLI. In +NullBoiler, workflow files are a core developer touchpoint, and a focused +preflight command is easy to demo, review, and merge without a large refactor. + +## What was implemented + +- Added a structured workflow-file validation helper in `src/workflow_loader.zig`. +- Added `validate-workflows [PATH]` CLI routing in `src/main.zig`. +- Added help output for the new command. +- Added human-readable diagnostics with separate errors and warnings. +- Added unit tests for valid files, malformed JSON, missing or empty + `pipeline_id`, duplicate `pipeline_id`, missing directories, and warning-only + shapes. +- Documented the new command in `README.md`. + +## Files changed + +- `src/main.zig` +- `src/workflow_loader.zig` +- `README.md` +- `HACKATHON_SUBMISSION.md` + +## How to test or demo it + +```bash +zig build test --summary all +zig build run -- validate-workflows +zig build run -- validate-workflows workflows +``` + +To demo errors, create a temporary workflow directory with malformed JSON or two +files that use the same `pipeline_id`, then run: + +```bash +zig build run -- validate-workflows /path/to/temp/workflows +``` + +Expected behavior: + +- no errors exits with status `0` +- one or more errors exits with status `1` +- warnings are printed but do not fail the command + +## Limitations and future improvements + +- The command validates only file-based tracker/pull-mode `WorkflowDef` files, + not every workflow format exposed by NullBoiler's HTTP graph workflow API. +- The validator scans direct `*.json` children of the target directory, matching + `loadWorkflows`; it does not recurse into nested example directories. +- Future work could add machine-readable JSON output for CI integrations. diff --git a/README.md b/README.md index 7fda7e1..b413536 100644 --- a/README.md +++ b/README.md @@ -92,3 +92,27 @@ state as `__config`. When `NULLBOILER_HOME` is set, `nullboiler` reads `config.json` from that directory and resolves relative paths like `db`, `strategies_dir`, `tracker.workflows_dir`, and `tracker.workspace.root` relative to that config file. + +## Workflow Preflight + +File-based tracker/pull-mode workflows are loaded from JSON files using the +`WorkflowDef` shape in `src/workflow_loader.zig`. Before starting the server, you +can check those files locally: + +```bash +zig build run -- validate-workflows +zig build run -- validate-workflows workflows +``` + +The command defaults to `workflows` and scans direct `*.json` files in the +directory. It reports: + +- errors for missing or unreadable directories, unreadable files, malformed JSON, + JSON that cannot be parsed as `WorkflowDef`, missing or empty `pipeline_id`, + and duplicate `pipeline_id` values +- warnings for suspicious but currently allowed shapes, including empty `id`, + empty `claim_roles`, dispatch workflows without `dispatch.worker_tags`, and + directories with no JSON workflow files + +Validation errors exit with status `1`. Warnings are shown but do not fail the +command, matching the existing runtime loader's permissive behavior. diff --git a/src/main.zig b/src/main.zig index 8ea8451..7a10fd5 100644 --- a/src/main.zig +++ b/src/main.zig @@ -47,6 +47,23 @@ pub fn main(init: std.process.Init) !void { try @import("from_json.zig").run(allocator, all_args[1..]); return; } + if (std.mem.eql(u8, all_args[0], "help") or + std.mem.eql(u8, all_args[0], "--help") or + std.mem.eql(u8, all_args[0], "-h")) + { + printUsage(); + return; + } + if (std.mem.eql(u8, all_args[0], "validate-workflows")) { + if (all_args.len > 2) { + std.debug.print("error: validate-workflows accepts at most one PATH argument\n\n", .{}); + printUsage(); + std.process.exit(2); + } + const workflow_dir = if (all_args.len == 2) all_args[1] else "workflows"; + try runValidateWorkflows(allocator, workflow_dir); + return; + } } var host_override: ?[]const u8 = null; @@ -427,6 +444,64 @@ pub fn main(init: std.process.Init) !void { } } +fn printUsage() void { + std.debug.print( + \\nullboiler v{s} + \\ + \\Usage: + \\ nullboiler [--host HOST] [--port N] [--db PATH] [--config PATH] [--token TOKEN] + \\ nullboiler validate-workflows [PATH] + \\ nullboiler --export-manifest + \\ nullboiler --from-json '' + \\ nullboiler --version + \\ + \\Commands: + \\ validate-workflows [PATH] Preflight file-based tracker/pull-mode WorkflowDef JSON files. + \\ + , .{version}); +} + +fn runValidateWorkflows(allocator: std.mem.Allocator, workflow_dir: []const u8) !void { + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const result = try workflow_loader.validateWorkflowFiles(arena.allocator(), workflow_dir); + + for (result.diagnostics) |diag| { + const level = switch (diag.severity) { + .@"error" => "ERROR", + .warning => "WARNING", + }; + if (diag.field) |field| { + std.debug.print("{s} {s}: {s} ({s})\n", .{ level, diag.file_path, diag.message, field }); + } else { + std.debug.print("{s} {s}: {s}\n", .{ level, diag.file_path, diag.message }); + } + } + + for (result.files) |file| { + if (!file.has_error and file.pipeline_id.len > 0) { + std.debug.print("OK {s} -> {s}\n", .{ file.file_path, file.pipeline_id }); + } + } + + std.debug.print( + "Checked {d} workflow files: {d} valid, {d} warning{s}, {d} error{s}\n", + .{ + result.checked_files, + result.valid_files, + result.warning_count, + if (result.warning_count == 1) "" else "s", + result.error_count, + if (result.error_count == 1) "" else "s", + }, + ); + + if (result.error_count > 0) { + std.process.exit(1); + } +} + fn ensureParentDirForFile(path: []const u8) !void { if (path.len == 0 or std.mem.eql(u8, path, ":memory:") or std.mem.startsWith(u8, path, "file:")) return; diff --git a/src/workflow_loader.zig b/src/workflow_loader.zig index 24084d4..10ed417 100644 --- a/src/workflow_loader.zig +++ b/src/workflow_loader.zig @@ -50,6 +50,54 @@ pub const WorkflowDef = struct { pub const WorkflowMap = std.StringArrayHashMapUnmanaged(WorkflowDef); +pub const WorkflowDiagnosticSeverity = enum { + @"error", + warning, +}; + +pub const WorkflowDiagnostic = struct { + severity: WorkflowDiagnosticSeverity, + file_path: []const u8, + message: []const u8, + field: ?[]const u8 = null, +}; + +pub const WorkflowFileStatus = struct { + file_path: []const u8, + pipeline_id: []const u8, + has_error: bool, +}; + +pub const WorkflowValidationResult = struct { + checked_files: usize = 0, + valid_files: usize = 0, + error_count: usize = 0, + warning_count: usize = 0, + diagnostics: []WorkflowDiagnostic = &.{}, + files: []WorkflowFileStatus = &.{}, +}; + +fn appendWorkflowDiagnostic( + allocator: std.mem.Allocator, + diagnostics: *std.ArrayListUnmanaged(WorkflowDiagnostic), + result: *WorkflowValidationResult, + severity: WorkflowDiagnosticSeverity, + file_path: []const u8, + field: ?[]const u8, + message: []const u8, +) !void { + try diagnostics.append(allocator, .{ + .severity = severity, + .file_path = file_path, + .message = message, + .field = field, + }); + switch (severity) { + .@"error" => result.error_count += 1, + .warning => result.warning_count += 1, + } +} + // ── loadWorkflows ───────────────────────────────────────────────────── pub fn loadWorkflows(allocator: std.mem.Allocator, dir_path: []const u8) WorkflowMap { @@ -77,6 +125,201 @@ pub fn loadWorkflows(allocator: std.mem.Allocator, dir_path: []const u8) Workflo return map; } +// ── validateWorkflowFiles ───────────────────────────────────────────── + +/// Validate file-based tracker/pull-mode WorkflowDef JSON files without +/// changing loadWorkflows runtime semantics. +pub fn validateWorkflowFiles(allocator: std.mem.Allocator, dir_path: []const u8) !WorkflowValidationResult { + var result = WorkflowValidationResult{}; + var diagnostics: std.ArrayListUnmanaged(WorkflowDiagnostic) = .empty; + defer diagnostics.deinit(allocator); + var files: std.ArrayListUnmanaged(WorkflowFileStatus) = .empty; + defer files.deinit(allocator); + + var seen_pipeline_files = std.StringHashMap([]const u8).init(allocator); + defer seen_pipeline_files.deinit(); + + var dir = if (std.fs.path.isAbsolute(dir_path)) + std_compat.fs.openDirAbsolute(dir_path, .{ .iterate = true }) catch { + const path = try allocator.dupe(u8, dir_path); + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + path, + null, + "workflow directory is missing or unreadable", + ); + result.files = try files.toOwnedSlice(allocator); + result.diagnostics = try diagnostics.toOwnedSlice(allocator); + return result; + } + else + std_compat.fs.cwd().openDir(dir_path, .{ .iterate = true }) catch { + const path = try allocator.dupe(u8, dir_path); + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + path, + null, + "workflow directory is missing or unreadable", + ); + result.files = try files.toOwnedSlice(allocator); + result.diagnostics = try diagnostics.toOwnedSlice(allocator); + return result; + }; + defer dir.close(); + + var iter = dir.iterate(); + while (iter.next() catch null) |entry| { + if (entry.kind != .file) continue; + if (!std.mem.endsWith(u8, entry.name, ".json")) continue; + + result.checked_files += 1; + var file_has_error = false; + const file_path = try std.fs.path.join(allocator, &.{ dir_path, entry.name }); + + const contents = dir.readFileAlloc(allocator, entry.name, 1024 * 1024) catch { + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + file_path, + null, + "workflow file is unreadable", + ); + continue; + }; + defer allocator.free(contents); + + const raw_json = std.json.parseFromSlice(std.json.Value, allocator, contents, .{}) catch { + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + file_path, + null, + "invalid workflow JSON", + ); + continue; + }; + raw_json.deinit(); + + const parsed = std.json.parseFromSlice(WorkflowDef, allocator, contents, .{}) catch { + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + file_path, + null, + "JSON does not match file-based WorkflowDef shape", + ); + continue; + }; + defer parsed.deinit(); + const def = parsed.value; + + if (def.pipeline_id.len == 0) { + file_has_error = true; + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + file_path, + "pipeline_id", + "pipeline_id is missing or empty", + ); + } else if (seen_pipeline_files.get(def.pipeline_id)) |first_file| { + file_has_error = true; + const msg = try std.fmt.allocPrint( + allocator, + "duplicate pipeline_id '{s}' also used by {s}", + .{ def.pipeline_id, first_file }, + ); + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .@"error", + file_path, + "pipeline_id", + msg, + ); + } else { + try seen_pipeline_files.put(try allocator.dupe(u8, def.pipeline_id), file_path); + } + + if (def.id.len == 0) { + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .warning, + file_path, + "id", + "id is empty", + ); + } + + if (def.claim_roles.len == 0) { + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .warning, + file_path, + "claim_roles", + "claim_roles is empty", + ); + } + + if (def.execution == .dispatch and def.dispatch.worker_tags.len == 0) { + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .warning, + file_path, + "dispatch.worker_tags", + "dispatch workflow has no worker_tags", + ); + } + + if (!file_has_error) { + result.valid_files += 1; + } + try files.append(allocator, .{ + .file_path = file_path, + .pipeline_id = if (def.pipeline_id.len == 0) "" else try allocator.dupe(u8, def.pipeline_id), + .has_error = file_has_error, + }); + } + + if (result.checked_files == 0) { + const path = try allocator.dupe(u8, dir_path); + try appendWorkflowDiagnostic( + allocator, + &diagnostics, + &result, + .warning, + path, + null, + "directory contains no JSON workflow files", + ); + } + + result.files = try files.toOwnedSlice(allocator); + result.diagnostics = try diagnostics.toOwnedSlice(allocator); + return result; +} + test "loadWorkflows: supports absolute workflow directories" { var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); @@ -400,6 +643,170 @@ test "parse workflow with continuation_prompt" { try std.testing.expectEqualStrings("Continue: attempt #{{attempt}}", parsed.value.subprocess.continuation_prompt.?); } +test "validateWorkflowFiles: valid workflow directory" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "valid.json", + .data = + \\{ + \\ "id": "wf-valid", + \\ "pipeline_id": "pipeline-valid", + \\ "claim_roles": ["developer"] + \\} + , + }); + + const dir_path = try std_compat.fs.Dir.wrap(tmp.dir).realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(dir_path); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), dir_path); + try std.testing.expectEqual(@as(usize, 1), result.checked_files); + try std.testing.expectEqual(@as(usize, 1), result.valid_files); + try std.testing.expectEqual(@as(usize, 0), result.error_count); + try std.testing.expectEqual(@as(usize, 0), result.warning_count); + try std.testing.expectEqual(@as(usize, 1), result.files.len); + try std.testing.expectEqualStrings("pipeline-valid", result.files[0].pipeline_id); +} + +test "validateWorkflowFiles: malformed JSON is an error" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "broken.json", + .data = "{bad json", + }); + + const dir_path = try std_compat.fs.Dir.wrap(tmp.dir).realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(dir_path); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), dir_path); + try std.testing.expectEqual(@as(usize, 1), result.checked_files); + try std.testing.expectEqual(@as(usize, 1), result.error_count); + try std.testing.expectEqualStrings("invalid workflow JSON", result.diagnostics[0].message); +} + +test "validateWorkflowFiles: missing or empty pipeline_id is an error" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "missing.json", + .data = "{\"id\":\"wf-missing\",\"claim_roles\":[\"dev\"]}", + }); + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "empty.json", + .data = "{\"id\":\"wf-empty\",\"pipeline_id\":\"\",\"claim_roles\":[\"dev\"]}", + }); + + const dir_path = try std_compat.fs.Dir.wrap(tmp.dir).realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(dir_path); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), dir_path); + try std.testing.expectEqual(@as(usize, 2), result.checked_files); + try std.testing.expectEqual(@as(usize, 2), result.error_count); + for (result.diagnostics) |diag| { + if (diag.severity == .@"error") { + try std.testing.expectEqualStrings("pipeline_id", diag.field.?); + } + } +} + +test "validateWorkflowFiles: duplicate pipeline_id is an error" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "first.json", + .data = "{\"id\":\"wf-a\",\"pipeline_id\":\"pipeline-dup\",\"claim_roles\":[\"dev\"]}", + }); + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "second.json", + .data = "{\"id\":\"wf-b\",\"pipeline_id\":\"pipeline-dup\",\"claim_roles\":[\"reviewer\"]}", + }); + + const dir_path = try std_compat.fs.Dir.wrap(tmp.dir).realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(dir_path); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), dir_path); + try std.testing.expectEqual(@as(usize, 2), result.checked_files); + try std.testing.expectEqual(@as(usize, 1), result.error_count); + try std.testing.expect(std.mem.indexOf(u8, result.diagnostics[0].message, "duplicate pipeline_id") != null); +} + +test "validateWorkflowFiles: missing directory is an error" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), "nonexistent_workflow_dir_xyz_999"); + try std.testing.expectEqual(@as(usize, 0), result.checked_files); + try std.testing.expectEqual(@as(usize, 1), result.error_count); + try std.testing.expectEqualStrings("workflow directory is missing or unreadable", result.diagnostics[0].message); +} + +test "validateWorkflowFiles: empty claim_roles is a warning" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "warn.json", + .data = "{\"id\":\"wf-warn\",\"pipeline_id\":\"pipeline-warn\"}", + }); + + const dir_path = try std_compat.fs.Dir.wrap(tmp.dir).realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(dir_path); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), dir_path); + try std.testing.expectEqual(@as(usize, 0), result.error_count); + try std.testing.expectEqual(@as(usize, 1), result.warning_count); + try std.testing.expectEqualStrings("claim_roles", result.diagnostics[0].field.?); +} + +test "validateWorkflowFiles: dispatch without worker_tags is a warning" { + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + try std_compat.fs.Dir.wrap(tmp.dir).writeFile(.{ + .sub_path = "dispatch.json", + .data = + \\{ + \\ "id": "wf-dispatch", + \\ "pipeline_id": "pipeline-dispatch", + \\ "claim_roles": ["dev"], + \\ "execution": "dispatch" + \\} + , + }); + + const dir_path = try std_compat.fs.Dir.wrap(tmp.dir).realpathAlloc(std.testing.allocator, "."); + defer std.testing.allocator.free(dir_path); + + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const result = try validateWorkflowFiles(arena.allocator(), dir_path); + try std.testing.expectEqual(@as(usize, 0), result.error_count); + try std.testing.expectEqual(@as(usize, 1), result.warning_count); + try std.testing.expectEqualStrings("dispatch.worker_tags", result.diagnostics[0].field.?); +} + test "WorkflowWatcher: detects file changes" { const allocator = std.testing.allocator; var s = try Store.init(allocator, ":memory:");