-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathverify_real_data.py
More file actions
101 lines (90 loc) · 3.34 KB
/
verify_real_data.py
File metadata and controls
101 lines (90 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python3
"""Comprehensive verification that real data is being used."""
import sys
from pathlib import Path
print('=' * 70)
print('COMPREHENSIVE REAL DATA VERIFICATION')
print('=' * 70)
print()
# Test 1: Import and load real data
print('Test 1: Loading real data...')
from openadapt_viewer.viewers.benchmark.real_data_loader import load_real_capture_data
run = load_real_capture_data()
print(f' ✓ Loaded: {run.benchmark_name}')
print(f' ✓ Model: {run.model_id}')
print(f' ✓ Tasks: {run.total_tasks}')
print()
# Test 2: Verify episodes
print('Test 2: Verifying episodes...')
assert len(run.tasks) == 2, f'Expected 2 tasks, got {len(run.tasks)}'
assert run.tasks[0].instruction == 'Navigate to System Settings'
assert run.tasks[1].instruction == 'Disable Night Shift'
print(' ✓ Episode 1: Navigate to System Settings')
print(' ✓ Episode 2: Disable Night Shift')
print()
# Test 3: Verify executions
print('Test 3: Verifying executions...')
assert len(run.executions) == 2
assert all(e.success for e in run.executions), 'All executions should succeed'
print(f' ✓ All {len(run.executions)} executions successful')
print()
# Test 4: Verify screenshots
print('Test 4: Verifying screenshots...')
screenshot_count = 0
for execution in run.executions:
for step in execution.steps:
if step.screenshot_path:
screenshot_count += 1
assert 'capture_31807990_step_' in step.screenshot_path
print(f' ✓ Found {screenshot_count} screenshot paths')
print(f' ✓ All paths contain real capture IDs')
print()
# Test 5: Verify metadata
print('Test 5: Verifying metadata...')
assert run.config['source'] == 'real_capture'
assert run.config['recording_id'] == 'turn-off-nightshift'
assert run.config['platform'] == 'darwin'
assert run.config['episode_count'] == 2
print(' ✓ Source: real_capture')
print(' ✓ Recording: turn-off-nightshift')
print(' ✓ Platform: darwin (macOS)')
print()
# Test 6: Verify generated HTML
print('Test 6: Verifying generated HTML...')
html_path = Path('test_benchmark_refactored.html')
if html_path.exists():
with open(html_path) as f:
html = f.read()
checks = {
'Real Capture title': 'Real Capture: Turn Off Night Shift Demo' in html,
'human_demonstration': 'human_demonstration' in html,
'episode_001': 'episode_001' in html,
'episode_002': 'episode_002' in html,
'Navigate to System Settings': 'Navigate to System Settings' in html,
'Disable Night Shift': 'Disable Night Shift' in html,
'Real screenshots': 'capture_31807990_step_' in html,
'No sample data': 'sample_run' not in html,
'No synthetic data': 'synthetic' not in html.lower(),
}
for check_name, passed in checks.items():
status = '✓' if passed else '✗'
print(f' {status} {check_name}')
if not all(checks.values()):
print(' ✗ SOME CHECKS FAILED')
sys.exit(1)
else:
print(' ⚠ test_benchmark_refactored.html not found')
print()
print('=' * 70)
print('ALL TESTS PASSED ✓')
print('=' * 70)
print()
print('Summary:')
print(' • Real data loader working')
print(' • 2 episodes loaded from nightshift recording')
print(' • All executions successful')
print(' • Screenshots paths verified')
print(' • Metadata correct')
print(' • Generated HTML verified')
print()
print('✓ REAL DATA MIGRATION COMPLETE')