Skip to content

Commit b4af196

Browse files
committed
Merge remote-tracking branch 'origin/test/pr8-benchmark-render-dashboard-smoke' into release/mainbound-20260323
2 parents dafb9bd + 5c5e80d commit b4af196

1 file changed

Lines changed: 204 additions & 0 deletions

File tree

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
import { spawnSync } from "node:child_process";
2+
import { rm } from "node:fs/promises";
3+
import { mkdtempSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
4+
import { tmpdir } from "node:os";
5+
import path from "node:path";
6+
import { afterEach, describe, expect, it } from "vitest";
7+
8+
const scriptPath = path.resolve(
9+
process.cwd(),
10+
"scripts",
11+
"benchmark-render-dashboard.mjs",
12+
);
13+
const tempRoots: string[] = [];
14+
15+
async function removeWithRetry(
16+
targetPath: string,
17+
options: { recursive?: boolean; force?: boolean },
18+
): Promise<void> {
19+
const retryableCodes = new Set(["ENOTEMPTY", "EPERM", "EBUSY"]);
20+
const maxAttempts = 6;
21+
22+
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
23+
try {
24+
await rm(targetPath, options);
25+
return;
26+
} catch (error) {
27+
const code =
28+
error &&
29+
typeof error === "object" &&
30+
"code" in error &&
31+
typeof error.code === "string"
32+
? error.code
33+
: undefined;
34+
if (!code || !retryableCodes.has(code) || attempt === maxAttempts) {
35+
throw error;
36+
}
37+
await new Promise((resolve) => setTimeout(resolve, attempt * 50));
38+
}
39+
}
40+
}
41+
42+
function createSummaryFixture() {
43+
return {
44+
meta: {
45+
generatedAt: "2026-03-22T00:00:00.000Z",
46+
preset: "codex-core",
47+
models: ["gpt-5-codex"],
48+
tasks: ["task-1"],
49+
modes: ["patch", "replace", "hashline", "hashline_v2"],
50+
runCount: 1,
51+
warmupCount: 0,
52+
},
53+
rows: [
54+
{
55+
modelId: "gpt-5-codex",
56+
displayName: "GPT-5 Codex",
57+
modes: {
58+
patch: {
59+
accuracyPct: 90,
60+
wallMsP50: 1000,
61+
tokensTotalP50: 100,
62+
},
63+
replace: {
64+
accuracyPct: 85,
65+
wallMsP50: 1100,
66+
tokensTotalP50: 90,
67+
},
68+
hashline: {
69+
accuracyPct: 88,
70+
wallMsP50: 1050,
71+
tokensTotalP50: 95,
72+
},
73+
hashline_v2: {
74+
accuracyPct: 92,
75+
wallMsP50: 980,
76+
tokensTotalP50: 80,
77+
},
78+
},
79+
},
80+
],
81+
failures: [],
82+
};
83+
}
84+
85+
function createTempRoot(suffix = ""): string {
86+
const root = mkdtempSync(path.join(tmpdir(), `bench-render${suffix}-`));
87+
tempRoots.push(root);
88+
return root;
89+
}
90+
91+
function writeSummary(inputPath: string): void {
92+
writeFileSync(inputPath, JSON.stringify(createSummaryFixture(), null, 2), "utf8");
93+
}
94+
95+
function runRenderDashboard(args: string[]) {
96+
return spawnSync(process.execPath, [scriptPath, ...args], {
97+
encoding: "utf8",
98+
timeout: 10_000,
99+
});
100+
}
101+
102+
afterEach(async () => {
103+
while (tempRoots.length > 0) {
104+
const root = tempRoots.pop();
105+
if (root) {
106+
await removeWithRetry(root, { recursive: true, force: true });
107+
}
108+
}
109+
});
110+
111+
describe("benchmark render dashboard script", () => {
112+
it("renders HTML from a minimal summary file", () => {
113+
const root = createTempRoot();
114+
const inputPath = path.join(root, "summary.json");
115+
const outputPath = path.join(root, "dashboard.html");
116+
117+
writeSummary(inputPath);
118+
119+
const result = runRenderDashboard([
120+
`--input=${inputPath}`,
121+
`--output=${outputPath}`,
122+
]);
123+
124+
expect(result.error).toBeUndefined();
125+
expect(result.status).toBe(0);
126+
expect(result.stderr).toBe("");
127+
expect(result.stdout).toContain("Dashboard written:");
128+
const html = readFileSync(outputPath, "utf8");
129+
expect(html).toContain("Code Edit Format Benchmark");
130+
expect(html).toContain("GPT-5 Codex");
131+
});
132+
133+
it("renders HTML when input and output paths contain spaces", () => {
134+
const root = createTempRoot(" spaces");
135+
const spacedDir = path.join(root, "with spaces");
136+
mkdirSync(spacedDir, { recursive: true });
137+
const inputPath = path.join(spacedDir, "summary file.json");
138+
const outputPath = path.join(spacedDir, "dashboard output.html");
139+
140+
writeSummary(inputPath);
141+
142+
const result = runRenderDashboard([
143+
`--input=${inputPath}`,
144+
`--output=${outputPath}`,
145+
]);
146+
147+
expect(result.error).toBeUndefined();
148+
expect(result.status).toBe(0);
149+
expect(result.stderr).toBe("");
150+
expect(result.stdout).toContain("Dashboard written:");
151+
expect(readFileSync(outputPath, "utf8")).toContain("GPT-5 Codex");
152+
});
153+
154+
it("fails with stderr when the input file is missing", () => {
155+
const root = createTempRoot();
156+
const inputPath = path.join(root, "missing-summary.json");
157+
const outputPath = path.join(root, "dashboard.html");
158+
159+
const result = runRenderDashboard([
160+
`--input=${inputPath}`,
161+
`--output=${outputPath}`,
162+
]);
163+
164+
expect(result.status).toBe(1);
165+
expect(result.stderr).toContain("Render failed:");
166+
expect(result.stderr.toLowerCase()).toContain("no such file");
167+
expect(result.stdout).not.toContain("Dashboard written:");
168+
});
169+
170+
it("fails with stderr when the summary json is malformed", () => {
171+
const root = createTempRoot();
172+
const inputPath = path.join(root, "summary.json");
173+
const outputPath = path.join(root, "dashboard.html");
174+
175+
writeFileSync(inputPath, "{ not-valid-json", "utf8");
176+
177+
const result = runRenderDashboard([
178+
`--input=${inputPath}`,
179+
`--output=${outputPath}`,
180+
]);
181+
182+
expect(result.status).toBe(1);
183+
expect(result.stderr).toContain("Render failed:");
184+
expect(result.stderr.toLowerCase()).toContain("json");
185+
expect(result.stdout).not.toContain("Dashboard written:");
186+
});
187+
188+
it("fails with stderr when the output directory does not exist", () => {
189+
const root = createTempRoot();
190+
const inputPath = path.join(root, "summary.json");
191+
const outputPath = path.join(root, "missing", "dashboard.html");
192+
193+
writeSummary(inputPath);
194+
195+
const result = runRenderDashboard([
196+
`--input=${inputPath}`,
197+
`--output=${outputPath}`,
198+
]);
199+
200+
expect(result.status).toBe(1);
201+
expect(result.stderr).toContain("Render failed:");
202+
expect(result.stdout).not.toContain("Dashboard written:");
203+
});
204+
});

0 commit comments

Comments
 (0)