fix: update benchmark script for cross-platform compatibility

Intersteller-Apex · Intersteller-Apex · commit bbbc9fa1238d · 2026-03-15T18:35:16.000-07:00
diff --git a/skills/transformation/depth-estimation/scripts/benchmark.py b/skills/transformation/depth-estimation/scripts/benchmark.py
@@ -248,6 +248,14 @@ def run_benchmark(args):
             model.to(device)
             model.eval()
 
+            # ── CRITICAL FIX: Device-mismatch workaround ──────────────────────
+            # The upstream depth_anything_v2 library hardcodes device selection
+            # inside image2tensor(): `DEVICE = 'cuda' if torch.cuda.is_available()`
+            # This ignores the model's actual device, causing crashes when the
+            # model is on CPU but CUDA is available. We store the target device
+            # and correct the tensor placement manually in the inference loop.
+            _benchmark_device = device
+
             model_load_ms = (time.perf_counter() - t0) * 1000
             backend = "pytorch"
             _log(f"PyTorch model loaded in {model_load_ms:.0f}ms on {device}")
@@ -280,11 +288,21 @@ def run_benchmark(args):
                 if depth_map.ndim > 2:
                     depth_map = np.squeeze(depth_map)
             else:
-                # PyTorch inference
+                # PyTorch inference — manual device-correct path
+                # We bypass model.infer_image() because the upstream library's
+                # image2tensor() hardcodes CUDA device selection, causing crashes
+                # when model is on CPU. Instead, we call image2tensor ourselves,
+                # fix the device, then call model.forward() directly.
                 import torch
+                import torch.nn.functional as F
                 rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                 with torch.no_grad():
-                    depth_map = model.infer_image(rgb)
+                    img_tensor, (h, w) = model.image2tensor(rgb, input_size=518)
+                    # FIX: Override the library's hardcoded device with the model's device
+                    img_tensor = img_tensor.to(_benchmark_device)
+                    depth = model.forward(img_tensor)
+                    depth = F.interpolate(depth[:, None], (h, w), mode="bilinear", align_corners=True)[0, 0]
+                    depth_map = depth.cpu().numpy()
 
             # Normalize and colorize
             d_min, d_max = depth_map.min(), depth_map.max()