|
| 1 | +import { describe, expect, it } from "vitest"; |
| 2 | +import { |
| 3 | + buildMarkdownReport, |
| 4 | + renderDashboardHtml, |
| 5 | +} from "../scripts/bench-format/render.mjs"; |
| 6 | + |
| 7 | +const summary = { |
| 8 | + meta: { |
| 9 | + generatedAt: "2026-03-22T00:00:00.000Z", |
| 10 | + preset: "codex-core", |
| 11 | + models: ["gpt-5-codex"], |
| 12 | + tasks: ["task-1"], |
| 13 | + modes: ["patch", "replace", "hashline", "hashline_v2"], |
| 14 | + runCount: 1, |
| 15 | + warmupCount: 0, |
| 16 | + }, |
| 17 | + rows: [ |
| 18 | + { |
| 19 | + modelId: "gpt-5-codex", |
| 20 | + displayName: "GPT-5 Codex", |
| 21 | + modes: { |
| 22 | + patch: { accuracyPct: 90, wallMsP50: 1000, tokensTotalP50: 100 }, |
| 23 | + replace: { accuracyPct: 85, wallMsP50: 1100, tokensTotalP50: 90 }, |
| 24 | + hashline: { accuracyPct: 88, wallMsP50: 1050, tokensTotalP50: 95 }, |
| 25 | + hashline_v2: { accuracyPct: 92, wallMsP50: 980, tokensTotalP50: 80 }, |
| 26 | + }, |
| 27 | + }, |
| 28 | + ], |
| 29 | + failures: [], |
| 30 | +}; |
| 31 | + |
| 32 | +describe("bench format renderer", () => { |
| 33 | + it("builds markdown report with leaderboard content", () => { |
| 34 | + const markdown = buildMarkdownReport(summary as never); |
| 35 | + expect(markdown).toContain("# Code Edit Format Benchmark"); |
| 36 | + expect(markdown).toContain("## Leaderboard (Accuracy First)"); |
| 37 | + expect(markdown).toContain("GPT-5 Codex"); |
| 38 | + }); |
| 39 | + |
| 40 | + it("renders dashboard html with embedded model data", () => { |
| 41 | + const html = renderDashboardHtml(summary as never); |
| 42 | + expect(html).toContain("<!doctype html>"); |
| 43 | + expect(html).toContain("Code Edit Format Benchmark"); |
| 44 | + expect(html).toContain("GPT-5 Codex"); |
| 45 | + expect(html).toContain("deltaVsReplaceHashline"); |
| 46 | + }); |
| 47 | +}); |
0 commit comments