diff --git a/.gitignore b/.gitignore
index 717cf27b54..959bedc43d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,7 +169,7 @@ pufferlib/resources/drive/binaries/
 pufferlib/resources/drive/binaries/training/
 pufferlib/resources/drive/binaries/validation/
 
-# But keep map_000.bin for the training test
+# Keep map_000.bin for the training test
 !pufferlib/resources/drive/binaries/map_000.bin
 !pufferlib/resources/drive/binaries/training/map_000.bin
 pufferlib/resources/drive/sanity/sanity_binaries/
@@ -178,6 +178,13 @@ pufferlib/resources/drive/sanity/sanity_binaries/
 !pufferlib/resources/drive/binaries/carla/**
 !pufferlib/resources/drive/binaries/carla_2D/**
 
+# Keep driving behaviour eval scenario binaries
+!pufferlib/resources/drive/binaries/dense_traffic/**
+!pufferlib/resources/drive/binaries/lane_change/**
+!pufferlib/resources/drive/binaries/obstacles/**
+!pufferlib/resources/drive/binaries/vru_interaction/**
+!pufferlib/resources/drive/binaries/lead_vehicle_interaction/**
+
 # Compiled drive binary in root
 /drive
 /visualize
diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 19dced1b81..edceebb691 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -238,12 +238,16 @@ wosac_sanity_check = False
 wosac_aggregate_results = True
 ; Evaluation mode: "policy", "ground_truth"
 wosac_eval_mode = "policy"
+; Evaluating different driving behaviours learned by the policy
+driving_behaviours_eval = True
+driving_behaviours_eval_config = "pufferlib/config/ocean/driving_behaviours_eval.ini"
+driving_behaviours_eval_interval = 1000
 
 [safe_eval]
 ; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics
 enabled = True
 ; How often to run safe eval (in training epochs). Defaults to render_interval.
-interval = 250
+interval = 1000
 render_safe_eval = True
 ; Number of agents to run in the eval environment
 num_agents = 50
diff --git a/pufferlib/config/ocean/driving_behaviours_eval.ini b/pufferlib/config/ocean/driving_behaviours_eval.ini
new file mode 100644
index 0000000000..8458c2402c
--- /dev/null
+++ b/pufferlib/config/ocean/driving_behaviours_eval.ini
@@ -0,0 +1,58 @@
+; Configuration for driving behaviour evaluation maps and rewards.
+; Currently evaluates 5 broad driving behaviours: lead vehicle interaction, lane change, dense traffic, obstacles, vulnerable road user interactions (VRUs).
+; Currently uses safe reward conditioning values for evaluation
+
+[eval_lead_vehicle_interaction]
+map_dir = "resources/drive/binaries/lead_vehicle_interaction"
+human_replay_eval = True
+render_eval = True
+episode_length = 91
+
+[eval_lane_change]
+map_dir = "resources/drive/binaries/lane_change"
+human_replay_eval = True
+render_eval = True
+episode_length = 91
+
+[eval_dense_traffic]
+map_dir = "resources/drive/binaries/dense_traffic"
+human_replay_eval = True
+render_eval = True
+episode_length = 91
+
+[eval_obstacles]
+map_dir = "resources/drive/binaries/obstacles"
+human_replay_eval = True
+render_eval = True
+episode_length = 91
+
+[eval_vru_interaction]
+map_dir = "resources/drive/binaries/vru_interaction"
+human_replay_eval = True
+render_eval = True
+episode_length = 91
+
+[eval_driving_rewards]
+; Reward conditioning values (min=max to fix the value).
+; Names match the env reward_bound_* keys.
+; High penalties for unsafe behavior
+collision = -3.0
+offroad = -3.0
+overspeed = -1.0
+traffic_light = -1.0
+reverse = -0.0075
+comfort = -0.1
+
+; Standard driving rewards
+goal_radius = 8.0
+lane_align = 0.0025
+lane_center = -0.00075
+velocity = 0.005
+center_bias = 0.0
+vel_align = 1.0
+timestep = -0.00005
+
+; Neutral scaling factors
+throttle = 1.0
+steer = 1.0
+acc = 1.0
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
index 86dd3d0f03..e562fcc5ee 100644
--- a/pufferlib/ocean/benchmark/evaluator.py
+++ b/pufferlib/ocean/benchmark/evaluator.py
@@ -1346,3 +1346,126 @@ def log_stats(self, global_step=None):
         if global_step is not None:
             payload["train_step"] = global_step
         self.logger.wandb.log(payload)
+
+
+class DrivingBehavioursEvaluator:
+    """Evaluates a policy on the 5 driving behaviour classes using live in-process weights."""
+
+    # Sections in driving_behaviours_eval.ini that describe scenario classes
+    EVAL_SECTIONS_PREFIX = "eval_"
+    REWARD_SECTION = "eval_driving_rewards"
+
+    def __init__(self, env_name: str, behaviours_config: Dict, device="cuda", logger=None):
+        self.env_name = env_name
+        self.behaviours_config = behaviours_config
+        if isinstance(device, int):
+            device = f"cuda:{device}"
+        self.device = device
+        self.logger = logger
+        self.reward_config = behaviours_config.get(self.REWARD_SECTION, {})
+        self.classes = [
+            (name, cfg)
+            for name, cfg in behaviours_config.items()
+            if name.startswith(self.EVAL_SECTIONS_PREFIX) and name != self.REWARD_SECTION
+        ]
+
+    def _build_class_env_config(self, class_cfg: Dict) -> Dict:
+        """Build env config for one scenario class with fixed reward conditioning."""
+        import re
+        import sys
+        from pufferlib.pufferl import load_config
+
+        original_argv = sys.argv
+        sys.argv = ["pufferl"]
+        try:
+            eval_config = load_config(self.env_name)
+        finally:
+            sys.argv = original_argv
+
+        eval_config["vec"] = dict(backend="PufferEnv", num_envs=1)
+        eval_config["train"]["device"] = self.device
+        eval_config["env"]["control_mode"] = "control_sdc_only"
+        eval_config["env"]["init_mode"] = "create_all_valid"
+        eval_config["env"]["episode_length"] = 91
+        eval_config["env"]["resample_frequency"] = 0
+
+        map_dir = class_cfg.get("map_dir", "")
+        if isinstance(map_dir, str):
+            map_dir = map_dir.strip('"')
+        eval_config["env"]["map_dir"] = map_dir
+        # Set num_maps to the number of available bins so we cover all scenarios
+        available_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) if os.path.isdir(map_dir) else 1
+        eval_config["env"]["num_maps"] = available_maps
+
+        # Discover valid reward bound names
+        valid_bounds = set()
+        for key in eval_config["env"]:
+            m = re.match(r"reward_bound_(.+)_min$", key)
+            if m:
+                valid_bounds.add(m.group(1))
+
+        # Fix reward conditioning to eval_driving_rewards values
+        for key, val in self.reward_config.items():
+            if key not in valid_bounds:
+                continue
+            eval_config["env"][f"reward_bound_{key}_min"] = float(val)
+            eval_config["env"][f"reward_bound_{key}_max"] = float(val)
+
+        return eval_config
+
+    def evaluate_class(self, class_cfg: Dict, policy) -> Dict:
+        """Run human-replay rollouts on all maps in the class map_dir and return averaged metrics."""
+        from collections import defaultdict
+        from pufferlib.pufferl import load_env
+
+        print(f"Evaluating class")
+
+        eval_config = self._build_class_env_config(class_cfg)
+        num_maps = eval_config["env"]["num_maps"]
+        print(f"Built eval config for class with map_dir: {eval_config['env']['map_dir']}")
+
+        vecenv = load_env(self.env_name, eval_config)
+        print(f"Loaded vecenv")
+        policy.eval()
+        print(f"Set policy to eval mode")
+        rollout_evaluator = HumanReplayEvaluator(eval_config)
+        all_stats = defaultdict(list)
+        print(f"Starting rollouts for class with {num_maps} maps")
+        try:
+            for _ in range(num_maps):
+                result = rollout_evaluator.rollout(eval_config, vecenv, policy) or {}
+                for k, v in result.items():
+                    try:
+                        all_stats[k].append(float(v))
+                    except (TypeError, ValueError):
+                        pass
+                # Reset for next map
+                vecenv.reset()
+        finally:
+            vecenv.close()
+            import gc
+
+            gc.collect()
+            import torch
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+        return {k: float(np.mean(v)) for k, v in all_stats.items() if v}
+
+    def log_stats(self, all_results: Dict[str, Dict], global_step=None):
+        """Log per-class metrics to wandb under driving_behaviours/<class>/<metric>."""
+        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
+            return
+        payload = {}
+        for class_name, metrics in all_results.items():
+            short = class_name[len(self.EVAL_SECTIONS_PREFIX) :]
+            for k, v in metrics.items():
+                try:
+                    payload[f"driving_behaviours/{short}/{k}"] = float(v)
+                except (TypeError, ValueError):
+                    pass
+        if global_step is not None:
+            payload["train_step"] = global_step
+        if payload:
+            self.logger.wandb.log(payload)
diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
index 57f38623ed..efd67ac318 100644
--- a/pufferlib/ocean/drive/drive.c
+++ b/pufferlib/ocean/drive/drive.c
@@ -130,7 +130,7 @@ void demo(const char *map_name_arg, const char *policy_name_arg, int view_mode,
                 {conf.reward_bound_steer_min, conf.reward_bound_steer_max},
                 {conf.reward_bound_acc_min, conf.reward_bound_acc_max},
             },
-        .map_name = "resources/drive/binaries/carla/carla_3D/map_001.bin",
+        .map_name = "resources/drive/binaries/carla_2D/map_001.bin",
         .render_mode = RENDER_WINDOW,
         .partner_obs_radius = conf.partner_obs_radius,
     };
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 99c84d750a..4957e37e61 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2210,7 +2210,9 @@ void set_active_agents(Drive *env) {
             static_agent_indices[env->static_agent_count] = i;
             env->static_agent_count++;
             env->agents[i].active_agent = 0;
-            if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) {
+
+            if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 ||
+                env->active_agent_count == env->num_agents) {
                 expert_static_agent_indices[env->expert_static_agent_count] = i;
                 env->expert_static_agent_count++;
                 env->agents[i].mark_as_expert = 1;
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 4c66f205d8..c072368089 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -1071,6 +1071,17 @@ def process_all_maps(
             if not success:
                 print(f"  {name}: {error}")
 
+    # Write manifest.json mapping each bin to its source JSON
+    manifest = {}
+    for i, map_path, binary_path, *_ in tasks:
+        _, _, success, _ = results[i]
+        if success:
+            manifest[f"map_{i:03d}.bin"] = map_path.name
+    manifest_path = binary_dir / "manifest.json"
+    with open(manifest_path, "w") as f:
+        json.dump(manifest, f, indent=2)
+    print(f"Wrote manifest to {manifest_path} ({len(manifest)} entries)")
+
 
 def test_performance(timeout=10, atn_cache=1024, num_agents=1024):
     import time
diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
index 22f2706367..5ff6ff1d65 100644
--- a/pufferlib/ocean/drive/visualize.c
+++ b/pufferlib/ocean/drive/visualize.c
@@ -193,11 +193,12 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett
 
 int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
              int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
-             const char *output_agent, int num_maps, int zoom_in) {
+             const char *output_agent, int num_maps, int zoom_in, const char *ini_file) {
 
     // Parse configuration from INI file
     env_init_config conf = {0};
-    const char *ini_file = "pufferlib/config/ocean/drive.ini";
+    if (ini_file == NULL)
+        ini_file = "pufferlib/config/ocean/drive.ini";
     if (ini_parse(ini_file, handler, &conf) < 0) {
         fprintf(stderr, "Error: Could not load %s. Cannot determine environment configuration.\n", ini_file);
         return -1;
@@ -350,25 +351,29 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
     char filename_topdown[256];
     char filename_agent[256];
 
-    if (output_topdown != NULL && output_agent != NULL) {
-        strcpy(filename_topdown, output_topdown);
-        strcpy(filename_agent, output_agent);
-    } else {
-        char policy_base[256];
-        strcpy(policy_base, policy_name);
-        *strrchr(policy_base, '.') = '\0';
+    char policy_base[256];
+    strcpy(policy_base, policy_name);
+    *strrchr(policy_base, '.') = '\0';
 
-        char map[256];
-        strcpy(map, basename((char *)map_name));
-        *strrchr(map, '.') = '\0';
+    char map[256];
+    strcpy(map, basename((char *)map_name));
+    *strrchr(map, '.') = '\0';
 
-        char video_dir[256];
-        sprintf(video_dir, "%s/video", policy_base);
-        char mkdir_cmd[512];
-        snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
-        system(mkdir_cmd);
+    char video_dir[256];
+    sprintf(video_dir, "%s/video", policy_base);
+    char mkdir_cmd[512];
+    snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
+    system(mkdir_cmd);
 
+    if (output_topdown != NULL) {
+        strcpy(filename_topdown, output_topdown);
+    } else {
         sprintf(filename_topdown, "%s/video/%s_topdown.mp4", policy_base, map);
+    }
+
+    if (output_agent != NULL) {
+        strcpy(filename_agent, output_agent);
+    } else {
         sprintf(filename_agent, "%s/video/%s_agent.mp4", policy_base, map);
     }
 
@@ -555,6 +560,6 @@ int main(int argc, char *argv[]) {
     }
 
     eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
-             output_agent, num_maps, zoom_in);
+             output_agent, num_maps, zoom_in, ini_file);
     return 0;
 }
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 58e7a8cfcc..462b8fec20 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -60,6 +60,10 @@
 
 import multiprocessing
 
+import copy
+import traceback
+import gc
+
 signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0))
 
 # Assume advantage kernel has been built if CUDA compiler is available
@@ -564,10 +568,20 @@ def train(self):
         ):
             self._run_safe_eval()
 
+        behaviours_eval_enabled = self.config.get("eval", {}).get("driving_behaviours_eval", False)
+        behaviours_eval_interval = int(
+            self.config.get("eval", {}).get("driving_behaviours_eval_interval", self.eval_interval)
+        )
+        if (
+            is_main
+            and behaviours_eval_enabled
+            and behaviours_eval_interval > 0
+            and (self.epoch % behaviours_eval_interval == 0 or done_training)
+        ):
+            self._run_driving_behaviours_eval()
+
     def _run_safe_eval(self):
-        """Run safe eval in-process using SafeEvaluator."""
-        import copy
-        import traceback
+        """Run safe eval in-process using SafeEvaluator, then render videos."""
 
         vecenv = None
         try:
@@ -609,6 +623,111 @@ def _run_safe_eval(self):
                 except Exception:
                     pass
 
+    def _run_driving_behaviours_eval(self):
+        """Run serial driving behaviours evals across all 5 classes, then render videos."""
+        behaviours_config = self.config.get("driving_behaviours_eval")
+        if not behaviours_config:
+            print("DrivingBehavioursEval: no config loaded, skipping.")
+            return
+
+        from pufferlib.ocean.benchmark.evaluator import DrivingBehavioursEvaluator
+
+        env_name = self.config["env"]
+        evaluator = DrivingBehavioursEvaluator(
+            env_name=env_name,
+            behaviours_config=behaviours_config,
+            device=self.config["device"],
+            logger=self.logger,
+        )
+        print(f"DrivingBehavioursEval: loaded config for {len(evaluator.classes)} classes")
+
+        # Saving checkpoint even though it's not checkpoint interval because eval in subprocess needs it
+        self.save_checkpoint()
+
+        all_results = {}
+        num_ran = 0
+
+        # Evaluate on all driving behaviour classes via subprocess (load_env in training process causes OOM, need to investigate further)
+        for class_name, class_cfg in evaluator.classes:
+            if not class_cfg.get("human_replay_eval", False):
+                continue
+            short = class_name[len(DrivingBehavioursEvaluator.EVAL_SECTIONS_PREFIX) :]
+            self.msg = f"Running driving behaviours eval: {short}..."
+            results = pufferlib.utils.run_driving_behaviour_class_eval_in_subprocess(
+                config=self.config,
+                class_name=class_name,
+                class_cfg=class_cfg,
+                reward_config=evaluator.reward_config,
+                logger=self.logger,
+                global_step=self.global_step,
+            )
+            if results:
+                all_results[class_name] = results
+                num_ran += 1
+
+        # Render a video for each driving behaviour class using the new rollout_loop pipeline
+        for class_name, class_cfg in evaluator.classes:
+            if not class_cfg.get("render_eval", False):
+                continue
+            short = class_name[len(DrivingBehavioursEvaluator.EVAL_SECTIONS_PREFIX) :]
+            map_dir = class_cfg.get("map_dir", "")
+            if isinstance(map_dir, str):
+                map_dir = map_dir.strip('"')
+            try:
+                from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop
+                from pufferlib.ocean.drive.drive import RenderView
+                import copy as _copy
+
+                render_cfg = _copy.deepcopy(self.full_args)
+                render_cfg["env"]["map_dir"] = map_dir
+                render_cfg["env"]["control_mode"] = "control_sdc_only"
+                render_cfg["env"]["init_mode"] = "create_all_valid"
+                episode_length = int(class_cfg.get("episode_length", 91))
+                render_cfg["env"]["episode_length"] = episode_length
+                render_cfg["env"]["resample_frequency"] = 0
+                render_cfg["env"]["render_mode"] = 1
+                render_cfg["vec"] = {"backend": "PufferEnv", "num_envs": 1}
+
+                render_env = load_env(env_name, render_cfg)
+                try:
+                    rollout_loop(
+                        policy=self.uncompiled_policy,
+                        env=render_env,
+                        device=self.config["device"],
+                        use_rnn=self.config.get("use_rnn", False),
+                        max_steps=episode_length,
+                        render_ctx=RenderContext(
+                            view_mode=RenderView.FULL_SIM_STATE,
+                            env_id=0,
+                        ),
+                    )
+                finally:
+                    render_env.close()
+
+                # Log any produced mp4s to wandb
+                import glob as _glob
+
+                video_files = _glob.glob("*.mp4")
+                if hasattr(self.logger, "wandb") and self.logger.wandb and video_files:
+                    import wandb
+
+                    for p in video_files:
+                        stem = os.path.splitext(os.path.basename(p))[0]
+                        self.logger.wandb.log(
+                            {
+                                f"driving_behaviours/{short}/render": wandb.Video(
+                                    p, format="mp4", caption=f"scene_{stem}_epoch_{self.epoch}"
+                                )
+                            }
+                        )
+                for p in video_files:
+                    os.remove(p)
+            except Exception as e:
+                print(f"DrivingBehavioursEval: render failed for {short}: {e}")
+                traceback.print_exc()
+
+        self.msg = f"Driving behaviours eval complete: {num_ran}/{len(evaluator.classes)} classes evaluated"
+
     def mean_and_log(self):
         config = self.config
         for k in list(self.stats.keys()):
@@ -1106,6 +1225,9 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
         env_config=args.get("env", {}),
         eval=args.get("eval", {}),
         safe_eval=args.get("safe_eval", {}),
+        driving_behaviours_eval=args.get("driving_behaviours_eval"),
+        package=args.get("package"),
+        vec_config=args.get("vec", {}),
     )
     if "vec" in args and "num_workers" in args["vec"]:
         train_config["num_workers"] = args["vec"]["num_workers"]
@@ -1200,6 +1322,41 @@ def eval(env_name, args=None, vecenv=None, policy=None):
         vecenv.close()
         return results_dict
 
+    elif human_replay_enabled:
+        args["env"]["map_dir"] = args["eval"].get(
+            "hr_map_dir", args["eval"].get("map_dir", "resources/drive/binaries/training")
+        )
+        dataset_name = args["env"]["map_dir"].split("/")[-1]
+        print(f"Running human replay evaluation with {dataset_name} dataset.\n")
+        from pufferlib.ocean.benchmark.evaluator import HumanReplayEvaluator
+
+        backend = args["eval"].get("backend", "PufferEnv")
+        args["env"]["num_agents"] = args["eval"]["human_replay_num_agents"]
+
+        args["vec"] = dict(backend=backend, num_envs=1)
+        args["env"]["control_mode"] = args["eval"]["human_replay_control_mode"]
+        args["env"]["init_mode"] = "create_all_valid"  # must spawn all agents so non-SDC can follow expert trajectories
+        args["env"]["episode_length"] = 91  # WOMD scenario length
+
+        vecenv = vecenv or load_env(env_name, args)
+        policy = policy or load_policy(args, vecenv, env_name)
+
+        print(f"Effective number of scenarios used: {len(vecenv.driver_env.agent_offsets) - 1}")
+
+        evaluator = HumanReplayEvaluator(args)
+
+        # Run rollouts with human replays
+        results = evaluator.rollout(args, vecenv, policy)
+
+        import json
+
+        print("HUMAN_REPLAY_METRICS_START")
+        print(json.dumps(results))
+        print("HUMAN_REPLAY_METRICS_END")
+
+        vecenv.close()
+        return results
+
     else:  # Standard evaluation: Render
         backend = args["vec"]["backend"]
         if backend != "PufferEnv":
@@ -1575,6 +1732,22 @@ def puffer_type(value):
         prev[subkey] = value
 
     args["train"]["use_rnn"] = args["rnn_name"] is not None
+
+    # Load driving behaviours eval config if specified
+    behaviours_config_path = args.get("eval", {}).get("driving_behaviours_eval_config")
+    if behaviours_config_path:
+        behaviours_config_path = behaviours_config_path.strip('"')
+        if os.path.exists(behaviours_config_path):
+            print(f"Loading driving behaviours eval config from {behaviours_config_path}")
+            bp = configparser.ConfigParser()
+            bp.read(behaviours_config_path)
+            behaviours = {}
+            for section in bp.sections():
+                behaviours[section] = {k: puffer_type(v) for k, v in bp[section].items()}
+            args["driving_behaviours_eval"] = behaviours
+        else:
+            print(f"Warning: driving_behaviours_eval_config not found: {behaviours_config_path}")
+
     return args
 
 
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/manifest.json b/pufferlib/resources/drive/binaries/dense_traffic/manifest.json
new file mode 100644
index 0000000000..857bcbea1d
--- /dev/null
+++ b/pufferlib/resources/drive/binaries/dense_traffic/manifest.json
@@ -0,0 +1,8 @@
+{
+  "map_000.bin": "tfrecord-00021-of-00150_215.json",
+  "map_001.bin": "tfrecord-00021-of-00150_24.json",
+  "map_002.bin": "tfrecord-00061-of-00150_246.json",
+  "map_003.bin": "tfrecord-00076-of-00150_289.json",
+  "map_004.bin": "tfrecord-00090-of-00150_179.json",
+  "map_005.bin": "tfrecord-00105-of-00150_75.json"
+}
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_000.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_000.bin
new file mode 100644
index 0000000000..3b5c9f556f
Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_000.bin differ
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_001.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_001.bin
new file mode 100644
index 0000000000..2abe7f7ce3
Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_001.bin differ
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_002.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_002.bin
new file mode 100644
index 0000000000..603d68f3fe
Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_002.bin differ
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_003.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_003.bin
new file mode 100644
index 0000000000..4f8065e0c7
Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_003.bin differ
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_004.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_004.bin
new file mode 100644
index 0000000000..c8a99730e3
Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_004.bin differ
diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_005.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_005.bin
new file mode 100644
index 0000000000..835a2b4b56
Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_005.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/manifest.json b/pufferlib/resources/drive/binaries/lane_change/manifest.json
new file mode 100644
index 0000000000..78a63716c9
--- /dev/null
+++ b/pufferlib/resources/drive/binaries/lane_change/manifest.json
@@ -0,0 +1,12 @@
+{
+  "map_000.bin": "tfrecord-00001-of-00150_0.json",
+  "map_001.bin": "tfrecord-00028-of-00150_89.json",
+  "map_002.bin": "tfrecord-00035-of-00150_299.json",
+  "map_003.bin": "tfrecord-00048-of-00150_72.json",
+  "map_004.bin": "tfrecord-00063-of-00150_29.json",
+  "map_005.bin": "tfrecord-00066-of-00150_108.json",
+  "map_006.bin": "tfrecord-00083-of-00150_149.json",
+  "map_007.bin": "tfrecord-00090-of-00150_52.json",
+  "map_008.bin": "tfrecord-00113-of-00150_111.json",
+  "map_009.bin": "tfrecord-00147-of-00150_42.json"
+}
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_000.bin b/pufferlib/resources/drive/binaries/lane_change/map_000.bin
new file mode 100644
index 0000000000..99193881b6
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_000.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_001.bin b/pufferlib/resources/drive/binaries/lane_change/map_001.bin
new file mode 100644
index 0000000000..ab92678be1
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_001.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_002.bin b/pufferlib/resources/drive/binaries/lane_change/map_002.bin
new file mode 100644
index 0000000000..9087b5087a
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_002.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_003.bin b/pufferlib/resources/drive/binaries/lane_change/map_003.bin
new file mode 100644
index 0000000000..570c601d6c
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_003.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_004.bin b/pufferlib/resources/drive/binaries/lane_change/map_004.bin
new file mode 100644
index 0000000000..2902691db3
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_004.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_005.bin b/pufferlib/resources/drive/binaries/lane_change/map_005.bin
new file mode 100644
index 0000000000..686afee43b
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_005.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_006.bin b/pufferlib/resources/drive/binaries/lane_change/map_006.bin
new file mode 100644
index 0000000000..de9ea20536
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_006.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_007.bin b/pufferlib/resources/drive/binaries/lane_change/map_007.bin
new file mode 100644
index 0000000000..f52e107a9c
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_007.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_008.bin b/pufferlib/resources/drive/binaries/lane_change/map_008.bin
new file mode 100644
index 0000000000..706e13f35e
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_008.bin differ
diff --git a/pufferlib/resources/drive/binaries/lane_change/map_009.bin b/pufferlib/resources/drive/binaries/lane_change/map_009.bin
new file mode 100644
index 0000000000..0a8b06b1cf
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_009.bin differ
diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/manifest.json b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/manifest.json
new file mode 100644
index 0000000000..8ab2f91726
--- /dev/null
+++ b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/manifest.json
@@ -0,0 +1,5 @@
+{
+  "map_000.bin": "tfrecord-00002-of-00150_244.json",
+  "map_001.bin": "tfrecord-00003-of-00150_94.json",
+  "map_002.bin": "tfrecord-00048-of-00150_72.json"
+}
diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_000.bin b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_000.bin
new file mode 100644
index 0000000000..8fa1ae3136
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_000.bin differ
diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_001.bin b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_001.bin
new file mode 100644
index 0000000000..9d4743ef29
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_001.bin differ
diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_002.bin b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_002.bin
new file mode 100644
index 0000000000..1632628ef0
Binary files /dev/null and b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_002.bin differ
diff --git a/pufferlib/resources/drive/binaries/obstacles/manifest.json b/pufferlib/resources/drive/binaries/obstacles/manifest.json
new file mode 100644
index 0000000000..092fcb9a7c
--- /dev/null
+++ b/pufferlib/resources/drive/binaries/obstacles/manifest.json
@@ -0,0 +1,3 @@
+{
+  "map_000.bin": "tfrecord-00042-of-00150_260.json"
+}
diff --git a/pufferlib/resources/drive/binaries/obstacles/map_000.bin b/pufferlib/resources/drive/binaries/obstacles/map_000.bin
new file mode 100644
index 0000000000..c71166378c
Binary files /dev/null and b/pufferlib/resources/drive/binaries/obstacles/map_000.bin differ
diff --git a/pufferlib/resources/drive/binaries/vru_interaction/manifest.json b/pufferlib/resources/drive/binaries/vru_interaction/manifest.json
new file mode 100644
index 0000000000..1c036af9e6
--- /dev/null
+++ b/pufferlib/resources/drive/binaries/vru_interaction/manifest.json
@@ -0,0 +1,4 @@
+{
+  "map_000.bin": "tfrecord-00010-of-00150_84.json",
+  "map_001.bin": "tfrecord-00147-of-00150_42.json"
+}
diff --git a/pufferlib/resources/drive/binaries/vru_interaction/map_000.bin b/pufferlib/resources/drive/binaries/vru_interaction/map_000.bin
new file mode 100644
index 0000000000..3dbc4e9491
Binary files /dev/null and b/pufferlib/resources/drive/binaries/vru_interaction/map_000.bin differ
diff --git a/pufferlib/resources/drive/binaries/vru_interaction/map_001.bin b/pufferlib/resources/drive/binaries/vru_interaction/map_001.bin
new file mode 100644
index 0000000000..0952bfd79d
Binary files /dev/null and b/pufferlib/resources/drive/binaries/vru_interaction/map_001.bin differ
diff --git a/pufferlib/utils.py b/pufferlib/utils.py
index 9c6c5185ce..82a06f29f3 100644
--- a/pufferlib/utils.py
+++ b/pufferlib/utils.py
@@ -62,6 +62,102 @@ def generate_safe_eval_ini(safe_eval_config, base_ini_path="pufferlib/config/oce
     return tmp_path
 
 
+def run_driving_behaviour_class_eval_in_subprocess(config, class_name, class_cfg, reward_config, logger, global_step):
+    """
+    Run a single driving behaviour class eval in a subprocess via human replay.
+    Uses the latest checkpoint and passes class-specific map_dir and reward bounds as CLI args.
+    Logs results to wandb under driving_behaviours/<short>/<metric>.
+    """
+    EVAL_SECTIONS_PREFIX = "eval_"
+    try:
+        run_id = logger.run_id
+        model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}")
+        model_files = glob.glob(os.path.join(model_dir, "model_*.pt"))
+
+        if not model_files:
+            print(f"[DrivingBehavioursEval] No model files found, skipping {class_name}")
+            return {}
+
+        latest_cpt = max(model_files, key=os.path.getctime)
+
+        map_dir = class_cfg.get("map_dir", "")
+        if isinstance(map_dir, str):
+            map_dir = map_dir.strip('"')
+
+        # control_sdc_only needs exactly 1 agent per sub-env (map).
+        available_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) if os.path.isdir(map_dir) else 1
+
+        cmd = [
+            sys.executable,
+            "-m",
+            "pufferlib.pufferl",
+            "eval",
+            config["env"],
+            "--load-model-path",
+            latest_cpt,
+            "--eval.wosac-realism-eval",
+            "False",
+            "--eval.human-replay-eval",
+            "True",
+            "--eval.human-replay-control-mode",
+            "control_sdc_only",
+            "--eval.hr-map-dir",
+            map_dir,
+            # Load exactly the maps available in the eval set.
+            "--env.num-maps",
+            str(available_maps),
+            # One agent per map (control_sdc_only) avoids OOM and matches coverage.
+            "--eval.human-replay-num-agents",
+            str(available_maps),
+            "--env.resample-frequency",
+            "0",
+        ]
+
+        # Pass safe reward conditioning: set both min and max to the eval value.
+        # Use --flag=value form so argparse doesn't mistake negative numbers for flags.
+        for key, val in reward_config.items():
+            flag = f"--env.reward-bound-{key.replace('_', '-')}"
+            cmd += [f"{flag}-min={val}", f"{flag}-max={val}"]
+
+        print(f"[DrivingBehavioursEval] Running eval for {class_name}")
+
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, cwd=os.getcwd())
+
+        if result.returncode != 0:
+            print(
+                f"[DrivingBehavioursEval] Subprocess failed for {class_name} "
+                f"(exit {result.returncode}):\n{result.stderr}"
+            )
+            return {}
+
+        stdout = result.stdout
+        if "HUMAN_REPLAY_METRICS_START" not in stdout or "HUMAN_REPLAY_METRICS_END" not in stdout:
+            print(f"[DrivingBehavioursEval] No metrics found in subprocess output for {class_name}")
+            return {}
+
+        start = stdout.find("HUMAN_REPLAY_METRICS_START") + len("HUMAN_REPLAY_METRICS_START")
+        end = stdout.find("HUMAN_REPLAY_METRICS_END")
+        metrics = json.loads(stdout[start:end].strip())
+
+        short = class_name[len(EVAL_SECTIONS_PREFIX) :]
+        print(f"[DrivingBehavioursEval] {short}: {metrics}")
+
+        if hasattr(logger, "wandb") and logger.wandb:
+            payload = {f"driving_behaviours/{short}/{k}": float(v) for k, v in metrics.items()}
+            if global_step is not None:
+                payload["train_step"] = global_step
+            logger.wandb.log(payload)
+
+        return metrics
+
+    except subprocess.TimeoutExpired:
+        print(f"[DrivingBehavioursEval] Subprocess timed out for {class_name}")
+        return {}
+    except Exception as e:
+        print(f"[DrivingBehavioursEval] Failed for {class_name}: {e}")
+        return {}
+
+
 def run_wosac_eval_in_subprocess(config, logger, global_step):
     """
     Run WOSAC evaluation in a subprocess and log metrics to wandb.
@@ -158,3 +254,179 @@ def run_wosac_eval_in_subprocess(config, logger, global_step):
         print(f"WOSAC evaluation ran out of memory. Skipping this evaluation: {e}")
     except Exception as e:
         print(f"Failed to run WOSAC evaluation: {type(e).__name__}: {e}")
+
+
+def render_videos(
+    config,
+    run_id,
+    wandb_log,
+    epoch,
+    global_step,
+    bin_path,
+    render_async,
+    render_queue=None,
+    wandb_run=None,
+    config_path=None,
+    wandb_prefix="render",
+    num_maps=None,
+    map_dir=None,
+):
+    """Generate and log training videos using C-based rendering."""
+    if not os.path.exists(bin_path):
+        print(f"Binary weights file does not exist: {bin_path}")
+        return
+
+    model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}")
+
+    # Now call the C rendering function
+    try:
+        # Create output directory for videos
+        video_output_dir = os.path.join(model_dir, "videos")
+        os.makedirs(video_output_dir, exist_ok=True)
+
+        # TODO: Fix memory leaks so that this is not needed
+        # Suppress AddressSanitizer exit code (temp)
+        env_vars = os.environ.copy()
+        env_vars["ASAN_OPTIONS"] = "exitcode=0"
+
+        # Base command with only visualization flags (env config comes from INI)
+        base_cmd = ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"]
+
+        if config_path:
+            base_cmd.extend(["--config", config_path])
+
+        # Visualization config flags only
+        if config.get("show_grid", False):
+            base_cmd.append("--show-grid")
+        if config.get("obs_only", False):
+            base_cmd.append("--obs-only")
+        if config.get("show_lasers", False):
+            base_cmd.append("--lasers")
+        if config.get("show_human_logs", False):
+            base_cmd.append("--log-trajectories")
+        if config.get("zoom_in", False):
+            base_cmd.append("--zoom-in")
+
+        # Frame skip for rendering performance
+        frame_skip = config.get("frame_skip", 1)
+        if frame_skip > 1:
+            base_cmd.extend(["--frame-skip", str(frame_skip)])
+
+        # View mode
+        view_mode = config.get("view_mode", "both")
+        base_cmd.extend(["--view", view_mode])
+
+        if num_maps:
+            base_cmd.extend(["--num-maps", str(num_maps)])
+
+        base_cmd.extend(["--policy-name", bin_path])
+
+        # Handle single or multiple map rendering
+        render_maps = config.get("render_map", None)
+        if render_maps is None or render_maps == "none":
+            pass  # use map_dir passed as parameter
+            if map_dir and os.path.isdir(map_dir):
+                bin_files = [f for f in os.listdir(map_dir) if f.endswith(".bin")]
+                if bin_files:
+                    render_maps = [os.path.join(map_dir, random.choice(bin_files))]
+                else:
+                    print(f"Warning: No .bin files found in {map_dir}, skipping render")
+                    return
+            else:
+                print(f"Warning: map_dir not found or invalid ({map_dir}), skipping render")
+                return
+        elif isinstance(render_maps, (str, os.PathLike)):
+            render_maps = [render_maps]
+        else:
+            render_maps = list(render_maps)
+
+        generated_videos = {"output_topdown": [], "output_agent": []}
+        file_prefix = wandb_prefix.replace("/", "_")
+        output_topdown = f"resources/drive/{file_prefix}_output_topdown_{epoch}"
+        output_agent = f"resources/drive/{file_prefix}_output_agent_{epoch}"
+
+        for i, map_path in enumerate(render_maps):
+            cmd = list(base_cmd)  # copy
+            if os.path.exists(map_path):
+                cmd.extend(["--map-name", str(map_path)])
+
+            output_topdown_map = output_topdown + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4")
+            output_agent_map = output_agent + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4")
+
+            cmd.extend(["--output-topdown", output_topdown_map])
+            cmd.extend(["--output-agent", output_agent_map])
+
+            print(f"Running render: {' '.join(cmd[:6])}...")
+            result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=1200, env=env_vars)
+
+            vids_exist = os.path.exists(output_topdown_map) and os.path.exists(output_agent_map)
+            print(f"Render exit code: {result.returncode}, vids_exist: {vids_exist}")
+            if result.returncode != 0 and result.stderr:
+                print(f"Render stderr: {result.stderr[-500:]}")
+
+            if result.returncode == 0 or (result.returncode == 1 and vids_exist):
+                videos = [
+                    ("output_topdown", output_topdown_map, f"epoch_{epoch:06d}_map{i:02d}_topdown.mp4"),
+                    ("output_agent", output_agent_map, f"epoch_{epoch:06d}_map{i:02d}_agent.mp4"),
+                ]
+
+                for vid_type, source_vid, target_filename in videos:
+                    if os.path.exists(source_vid):
+                        target_path = os.path.join(video_output_dir, target_filename)
+                        shutil.move(source_vid, target_path)
+                        generated_videos[vid_type].append(target_path)
+                    else:
+                        print(f"Video generation completed but {source_vid} not found")
+                        if result.stdout:
+                            print(f"StdOUT: {result.stdout}")
+                        if result.stderr:
+                            print(f"StdERR: {result.stderr}")
+            else:
+                print(f"C rendering failed (map index {i}) with exit code {result.returncode}: {result.stderr}")
+
+        if render_async:
+            render_queue.put(
+                {
+                    "videos": generated_videos,
+                    "step": global_step,
+                    "prefix": wandb_prefix,
+                }
+            )
+        elif wandb_log and wandb_run:
+            import wandb
+
+            payload = {}
+            if generated_videos["output_topdown"]:
+                payload[f"{wandb_prefix}/world_state"] = [
+                    wandb.Video(p, format="mp4") for p in generated_videos["output_topdown"]
+                ]
+            if generated_videos["output_agent"]:
+                payload[f"{wandb_prefix}/agent_view"] = [
+                    wandb.Video(p, format="mp4") for p in generated_videos["output_agent"]
+                ]
+            if payload:
+                print(f"Logging {len(payload)} video keys to wandb: {list(payload.keys())}")
+                payload["train_step"] = global_step
+                wandb_run.log(payload)
+
+    except subprocess.TimeoutExpired:
+        print("C rendering timed out")
+    except Exception as e:
+        print(f"Failed to render videos: {e}")
+
+
+def render_videos_and_cleanup(cleanup_files=None, **render_kwargs):
+    """Wrapper that runs render_videos then cleans up temp files.
+
+    Intended as the target for multiprocessing.Process so that temp files
+    (bin weights, generated INI) are cleaned up inside the spawned process.
+    """
+    try:
+        render_videos(**render_kwargs)
+    finally:
+        for f in cleanup_files or []:
+            try:
+                if os.path.exists(f):
+                    os.remove(f)
+            except OSError:
+                pass
diff --git a/pufferlib/vector.py b/pufferlib/vector.py
index 27001df0e3..0b30d00e34 100644
--- a/pufferlib/vector.py
+++ b/pufferlib/vector.py
@@ -41,14 +41,14 @@ def send_precheck(vecenv, actions):
 
 def reset(vecenv, seed=42):
     vecenv.async_reset(seed)
-    obs, rewards, terminals, truncations, infos, env_ids, masks = vecenv.recv()
+    obs, rewards, terminals, truncations, infos, env_ids, masks, is_invalid_step = vecenv.recv()
     return obs, infos
 
 
 def step(vecenv, actions):
     actions = np.asarray(actions)
     vecenv.send(actions)
-    obs, rewards, terminals, truncations, infos, env_ids, masks = vecenv.recv()
+    obs, rewards, terminals, truncations, infos, env_ids, masks, is_invalid_step = vecenv.recv()
     return obs, rewards, terminals, truncations, infos  # include env_ids or no?
 
 
@@ -549,6 +549,8 @@ def close(self):
         self.driver_env.close()
         for p in self.processes:
             p.terminate()
+        for p in self.processes:
+            p.join(timeout=5)
 
 
 class Ray:
diff --git a/scripts/render_behavior_evals.sh b/scripts/render_behavior_evals.sh
new file mode 100755
index 0000000000..b740c23175
--- /dev/null
+++ b/scripts/render_behavior_evals.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# Render behavior eval videos locally using the visualize binary.
+#
+# Usage:
+#   bash scripts/render_behavior_evals.sh <policy.bin> [output_dir] [view_mode]
+#
+# Example:
+#   bash scripts/render_behavior_evals.sh resources/drive/seed99_003815.bin behavior_eval_videos both
+
+set -euo pipefail
+
+POLICY_NAME="${1:?Usage: $0 <policy.bin> [output_dir] [view_mode]}"
+OUTPUT_DIR="${2:-behavior_eval_videos}"
+VIEW_MODE="${3:-both}"
+
+BEHAVIOR_INI="pufferlib/config/ocean/driving_behaviours_eval.ini"
+BASE_INI="pufferlib/config/ocean/drive.ini"
+
+if [ ! -f "$POLICY_NAME" ]; then
+    echo "Error: policy file not found: $POLICY_NAME"
+    exit 1
+fi
+if [ ! -f ./visualize ]; then
+    echo "Error: visualize binary not found. Run: bash scripts/build_ocean.sh visualize fast"
+    exit 1
+fi
+
+mkdir -p "$OUTPUT_DIR"
+
+# Write reward bound sed commands to a file (avoids shell word-splitting issues)
+REWARD_SED_FILE="/tmp/behavior_eval_rewards_$$.sed"
+.venv/bin/python3 -c "
+import configparser, sys
+cp = configparser.ConfigParser(comment_prefixes=(';','#'), inline_comment_prefixes=(';','#'))
+cp.read('$BEHAVIOR_INI')
+if not cp.has_section('eval_driving_rewards'):
+    sys.exit(0)
+for key, val in cp.items('eval_driving_rewards'):
+    print(f's/reward_bound_{key}_min = .*/reward_bound_{key}_min = {val}/')
+    print(f's/reward_bound_{key}_max = .*/reward_bound_{key}_max = {val}/')
+" > "$REWARD_SED_FILE"
+
+# Build a temp INI from drive.ini with control_sdc_only + create_all_valid + pinned rewards
+make_render_ini() {
+    local map_dir="$1"
+    local tmp_ini="/tmp/behavior_eval_render_$$.ini"
+
+    sed -e 's/control_mode = .*/control_mode = "control_sdc_only"/' \
+        -e 's/init_mode = .*/init_mode = "create_all_valid"/' \
+        -e "s|map_dir = .*|map_dir = \"${map_dir}\"|" \
+        "$BASE_INI" | sed -f "$REWARD_SED_FILE" > "$tmp_ini"
+
+    echo "$tmp_ini"
+}
+
+# Extract class names (skip eval_driving_rewards)
+CLASSES=$(grep -E '^\[eval_' "$BEHAVIOR_INI" | grep -v 'eval_driving_rewards' | sed 's/\[eval_//;s/\]//')
+
+for CLASS in $CLASSES; do
+    MAP_DIR=$(grep -A5 "^\[eval_${CLASS}\]" "$BEHAVIOR_INI" | grep 'map_dir' | sed 's/.*= *//;s/"//g' | tr -d ' ')
+
+    if [ ! -d "$MAP_DIR" ]; then
+        echo "Skipping $CLASS: $MAP_DIR not found"
+        continue
+    fi
+
+    MAPS=$(find "$MAP_DIR" -name '*.bin' | sort)
+    NUM_MAPS=$(echo "$MAPS" | wc -l | tr -d ' ')
+
+    echo ""
+    echo "=== $CLASS: $NUM_MAPS maps in $MAP_DIR ==="
+
+    RENDER_INI=$(make_render_ini "$MAP_DIR")
+
+    i=0
+    for MAP in $MAPS; do
+        i=$((i + 1))
+        MAP_BASE=$(basename "$MAP" .bin)
+
+        OUT_TD="$OUTPUT_DIR/${CLASS}_${MAP_BASE}_topdown.mp4"
+        OUT_AG="$OUTPUT_DIR/${CLASS}_${MAP_BASE}_agent.mp4"
+
+        printf "  [%d/%d] %s... " "$i" "$NUM_MAPS" "$(basename "$MAP")"
+
+        if ./visualize \
+            --config "$RENDER_INI" \
+            --map-name "$MAP" \
+            --policy-name "$POLICY_NAME" \
+            --view "$VIEW_MODE" \
+            --output-topdown "$OUT_TD" \
+            --output-agent "$OUT_AG" \
+            > /dev/null 2>&1; then
+            echo "OK"
+        else
+            echo "FAILED"
+        fi
+    done
+
+    rm -f "$RENDER_INI"
+done
+
+rm -f "$REWARD_SED_FILE"
+
+echo ""
+echo "Done. Videos in $OUTPUT_DIR/"