openadapt-viewer/verify_real_data.py at main · OpenAdaptAI/openadapt-viewer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python3
"""Comprehensive verification that real data is being used."""

import sys
from pathlib import Path

print('=' * 70)
print('COMPREHENSIVE REAL DATA VERIFICATION')
print('=' * 70)
print()

# Test 1: Import and load real data
print('Test 1: Loading real data...')
from openadapt_viewer.viewers.benchmark.real_data_loader import load_real_capture_data
run = load_real_capture_data()
print(f'  ✓ Loaded: {run.benchmark_name}')
print(f'  ✓ Model: {run.model_id}')
print(f'  ✓ Tasks: {run.total_tasks}')
print()

# Test 2: Verify episodes
print('Test 2: Verifying episodes...')
assert len(run.tasks) == 2, f'Expected 2 tasks, got {len(run.tasks)}'
assert run.tasks[0].instruction == 'Navigate to System Settings'
assert run.tasks[1].instruction == 'Disable Night Shift'
print('  ✓ Episode 1: Navigate to System Settings')
print('  ✓ Episode 2: Disable Night Shift')
print()

# Test 3: Verify executions
print('Test 3: Verifying executions...')
assert len(run.executions) == 2
assert all(e.success for e in run.executions), 'All executions should succeed'
print(f'  ✓ All {len(run.executions)} executions successful')
print()

# Test 4: Verify screenshots
print('Test 4: Verifying screenshots...')
screenshot_count = 0
for execution in run.executions:
    for step in execution.steps:
        if step.screenshot_path:
            screenshot_count += 1
            assert 'capture_31807990_step_' in step.screenshot_path
print(f'  ✓ Found {screenshot_count} screenshot paths')
print(f'  ✓ All paths contain real capture IDs')
print()

# Test 5: Verify metadata
print('Test 5: Verifying metadata...')
assert run.config['source'] == 'real_capture'
assert run.config['recording_id'] == 'turn-off-nightshift'
assert run.config['platform'] == 'darwin'
assert run.config['episode_count'] == 2
print('  ✓ Source: real_capture')
print('  ✓ Recording: turn-off-nightshift')
print('  ✓ Platform: darwin (macOS)')
print()

# Test 6: Verify generated HTML
print('Test 6: Verifying generated HTML...')
html_path = Path('test_benchmark_refactored.html')
if html_path.exists():
    with open(html_path) as f:
        html = f.read()
    checks = {
        'Real Capture title': 'Real Capture: Turn Off Night Shift Demo' in html,
        'human_demonstration': 'human_demonstration' in html,
        'episode_001': 'episode_001' in html,
        'episode_002': 'episode_002' in html,
        'Navigate to System Settings': 'Navigate to System Settings' in html,
        'Disable Night Shift': 'Disable Night Shift' in html,
        'Real screenshots': 'capture_31807990_step_' in html,
        'No sample data': 'sample_run' not in html,
        'No synthetic data': 'synthetic' not in html.lower(),
    }

    for check_name, passed in checks.items():
        status = '✓' if passed else '✗'
        print(f'  {status} {check_name}')

    if not all(checks.values()):
        print('  ✗ SOME CHECKS FAILED')
        sys.exit(1)
else:
    print('  ⚠ test_benchmark_refactored.html not found')
print()

print('=' * 70)
print('ALL TESTS PASSED ✓')
print('=' * 70)
print()
print('Summary:')
print('  • Real data loader working')
print('  • 2 episodes loaded from nightshift recording')
print('  • All executions successful')
print('  • Screenshots paths verified')
print('  • Metadata correct')
print('  • Generated HTML verified')
print()
print('✓ REAL DATA MIGRATION COMPLETE')