-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscaffold_eval_run.py
More file actions
68 lines (54 loc) · 2.32 KB
/
scaffold_eval_run.py
File metadata and controls
68 lines (54 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
from pathlib import Path
def load_manifest(eval_dir: Path) -> dict:
manifest_path = eval_dir / "manifest.json"
return json.loads(manifest_path.read_text(encoding="utf-8"))
def build_run(manifest: dict, run_id: str, variant: str, model: str, notes: str) -> dict:
score_keys = list(manifest.get("scoring", {}).keys()) or [
"correctness",
"completeness",
"code_quality",
"verification_quality",
]
return {
"run_id": run_id,
"variant": variant,
"model": model,
"notes": notes,
"tasks": [
{
"id": task["id"],
"status": "pending",
"scores": {key: None for key in score_keys},
"failure_modes": [],
"summary": "",
"merge_readiness": "unknown",
}
for task in manifest.get("tasks", [])
],
}
def main() -> int:
parser = argparse.ArgumentParser(description="Create a run-result skeleton from a repo eval manifest.")
parser.add_argument("eval_dir", help="Directory containing manifest.json")
parser.add_argument("output_file", help="Path to write the run JSON")
parser.add_argument("--run-id", required=True, help="Run identifier")
parser.add_argument("--variant", required=True, help="Harness or agent variant name")
parser.add_argument("--model", required=True, help="Model identifier")
parser.add_argument("--notes", default="", help="Freeform notes about this variant")
parser.add_argument("--force", action="store_true", help="Overwrite the output file if it exists")
args = parser.parse_args()
eval_dir = Path(args.eval_dir).resolve()
output_path = Path(args.output_file).resolve()
if output_path.exists() and not args.force:
raise SystemExit(f"Output file already exists: {output_path}")
manifest = load_manifest(eval_dir)
run = build_run(manifest, args.run_id, args.variant, args.model, args.notes)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(run, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
print(f"Scaffolded run file at {output_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())