From 89cef8de8a0fd2cf6707bca43cc4fe564901f358 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 18 Apr 2026 21:38:16 -0400 Subject: [PATCH 01/11] Clean inline eval: zero perturbations/dropout, enforce red lights MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - build_eval_overrides: always force perturbations to 0 (safe: no obs shape change). Add a clean= kwarg that additionally zeros road-segment dropout and flips traffic_light_behavior to 1 (stop at red). - _swap_policy_obs_counts: context manager that temporarily aligns the live training policy's obs_{lane,boundary}_segment_count with the eval env. The GigaFlow encoder's lane/boundary encoders are shared MLPs + max-pool over segments — weights are count-invariant, only slicing depends on these counts. So the same training policy runs correctly on a clean env with zero dropout (larger obs buffer) once we swap. - eval_multi_scenarios accepts clean= and wraps the forward loop with the swap when clean is True. - Inline eval call site (multi_scenario_eval) reads eval.clean_eval from the config and plumbs clean= all the way through. - drive.ini: add eval.clean_eval=True default. Fixes "no validation metrics": the inline render path (which HAD been running on multi_scenario_render_interval) only populates global_infos when an episode completes, but render_max_steps< --- pufferlib/config/ocean/drive.ini | 6 ++ pufferlib/pufferl.py | 106 +++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index e71db0260..c2bdd0e3e 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -206,6 +206,12 @@ multi_scenario_render_interval = 250 multi_scenario_render_backend = egl ; Frequency of evaluation during training (in epochs) eval_interval = 25 +; When True, inline eval zeroes road-segment dropout + perturbations and +; enforces red-light stops. Metrics then reflect performance under clean +; conditions. The live training policy is re-aligned to the clean env's +; obs shape via _swap_policy_obs_counts — safe because the GigaFlow +; encoder is count-invariant (shared MLP + max-pool over segments). +clean_eval = True num_agents = 512 ; Batch size for eval_multi_scenarios (number of scenarios per batch) ; Path to dataset used for evaluation diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 33d1f438e..d6b06e13d 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -501,13 +501,20 @@ def train(self): num_agents_eval = self.config["eval"]["num_agents"] map_dir = self.config["eval"]["map_dir"] - # Build eval_overrides using helper function + # Inline eval runs "clean" by default — perturbations + dropout off, + # red-light stops enforced — so the logged validation metrics + # track progress under controlled conditions rather than noisy + # training perturbations. The live training policy's road slicing + # is re-aligned to the clean env at eval time via + # _swap_policy_obs_counts inside eval_multi_scenarios. + clean_eval = self.config["eval"].get("clean_eval", True) eval_overrides = build_eval_overrides( simulation_mode=eval_simulation_mode, num_agents=num_agents_eval, num_scenarios=self.config["eval"]["multi_scenario_num_scenarios"], map_dir=map_dir, num_carla_maps=self.config["eval"].get("num_carla_maps", 8), + clean=clean_eval, ) # Build eval args by applying overrides to training config @@ -546,6 +553,7 @@ def train(self): logger=self.logger, # Pass logger for TensorBoard logging metric_prefix="validation", # Use validation_ prefix quiet=True, # Suppress verbose output during inline eval + clean=clean_eval, ) # Multi-scenario render — independent interval so the heavier render @@ -1864,20 +1872,30 @@ def load_eval_multi_scenarios_config(env_name, model_path=None, eval_overrides=N return args -def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=None, num_carla_maps=8): +def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=None, num_carla_maps=8, clean=False): """Build evaluation overrides for a given simulation mode. Args: simulation_mode: "gigaflow" or "replay" num_agents: agent slot budget for evaluation map_dir: replay dataset directory, required for replay mode + clean: if True, run a "clean" eval — zero road-segment dropout and + enforce red-light stops. Only safe when the policy is rebuilt + from the eval env (standalone eval / render_scenario.py). Inline + eval during training reuses the live training policy, whose + encoder was built for the training obs shape; zeroing dropout + there changes the obs shape and triggers a CUDA device-side + assert. Perturbation probabilities (partner_blindness, + phantom_braking) are always forced to zero at eval — they're + pure randomness, they don't change the obs shape, and eval + should be deterministic regardless of clean mode. """ # Common reward coefficients (same for both modes) common_env = { "eval_mode": 1, "collision_behavior": 1, "offroad_behavior": 1, - "traffic_light_behavior": 0, + "traffic_light_behavior": 1 if clean else 0, "reward_randomization": False, "reward_vehicle_collision": 3.0, "reward_offroad_collision": 3.0, @@ -1889,15 +1907,22 @@ def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=Non "reward_lane_align": 0.025, "reward_lane_center": 0.0038, "reward_timestep": 0.000025, - # NOTE: do not override lane_segment_dropout, boundary_segment_dropout, - # or max_{lane,boundary}_segment_observations here. All of these change - # the observation vector shape, and the render path reuses the live - # training policy which was built for the training obs sizes. Setting - # dropout to 0.0 here when training uses >0 causes the eval env to - # produce larger observations than the policy expects, triggering a - # CUDA device-side assert (scatter/gather index out of bounds). + # Always zero perturbations at eval. These don't change obs shape so + # it's safe to force even for inline eval, and a deterministic eval + # is what we want for tracking progress. + "partner_blindness_prob": 0.0, + "phantom_braking_prob": 0.0, + "phantom_braking_trigger_prob": 0.0, } + if clean: + # Dropout changes the obs shape. Only safe when the policy is + # rebuilt from the eval env (standalone eval / render_scenario). + # NEVER pass clean=True from an inline-eval call site — the live + # training policy's encoder was built for the training obs shape. + common_env["lane_segment_dropout"] = 0.0 + common_env["boundary_segment_dropout"] = 0.0 + if simulation_mode == "gigaflow": eval_overrides = { "env": { @@ -1938,6 +1963,46 @@ def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=Non return eval_overrides +@contextlib.contextmanager +def _swap_policy_obs_counts(policy, vecenv): + """Temporarily align the policy's road-segment slicing with the eval env. + + Training uses dropout > 0 → smaller obs_{lane,boundary}_segment_count. + Clean eval uses dropout = 0 → larger counts, larger obs buffer. The + GigaFlow encoder (lane_encoder / boundary_encoder) is a shared MLP + applied per-segment with max-pool — its weights are count-invariant. + Only the obs-buffer slicing in DriveBackbone.forward depends on these + counts, so we can just swap them for the duration of the eval and the + same training policy works on the larger clean obs. + """ + try: + eval_env = vecenv.driver_env + new_lane = int(eval_env.obs_lane_segment_count) + new_boundary = int(eval_env.obs_boundary_segment_count) + except AttributeError: + # If the eval env doesn't expose these (unknown wrapper), skip the + # swap — forward will still work when training and eval obs shapes + # coincide (clean=False or no dropout configured). + yield + return + + targets = [] + for m in policy.modules(): + if hasattr(m, "obs_lane_segment_count") and hasattr(m, "obs_boundary_segment_count"): + targets.append(m) + + saved = [(m.obs_lane_segment_count, m.obs_boundary_segment_count) for m in targets] + try: + for m in targets: + m.obs_lane_segment_count = new_lane + m.obs_boundary_segment_count = new_boundary + yield + finally: + for m, (orig_lane, orig_boundary) in zip(targets, saved): + m.obs_lane_segment_count = orig_lane + m.obs_boundary_segment_count = orig_boundary + + def verify_scenario_coverage(csv_path: str, num_scenarios: int) -> dict: """ Verify that episode_metrics.csv contains all expected scenarios. @@ -2087,7 +2152,14 @@ def _log_eval_metrics(logger, avg_infos, args, metric_prefix, quiet): def eval_multi_scenarios( - env_name, args=None, vecenv=None, policy=None, logger=None, metric_prefix="validation", quiet=False + env_name, + args=None, + vecenv=None, + policy=None, + logger=None, + metric_prefix="validation", + quiet=False, + clean=False, ): t0 = time.time() @@ -2097,14 +2169,20 @@ def eval_multi_scenarios( num_agents_eval = tmp_args["eval"]["num_agents"] map_dir = tmp_args["eval"]["map_dir"] + # CLI standalone entry point: read clean_eval from the eval section + # so users can enable it via --eval.clean-eval. Inline callers pass + # clean= directly and come in through the args-provided branch. + clean_from_config = tmp_args["eval"].get("clean_eval", False) eval_overrides = build_eval_overrides( simulation_mode=tmp_args["eval_simulation"], num_agents=num_agents_eval, num_scenarios=tmp_args["num_scenarios"], map_dir=map_dir, num_carla_maps=tmp_args.get("num_carla_maps", 8), + clean=clean_from_config, ) args = load_eval_multi_scenarios_config(env_name, model_path, eval_overrides) + clean = clean or clean_from_config # Reproducibility — same approach as training seed = args["train"]["seed"] or 42 @@ -2183,7 +2261,11 @@ def eval_multi_scenarios( vecenv.async_reset(42) ob, _, _, _, infos, _, _ = vecenv.recv() - with tqdm(total=num_scenarios, desc="Processing scenarios", disable=quiet) as pbar: + # Clean eval may use different road-dropout than training. The shared + # training policy's obs slicing needs to be aligned with this env; see + # _swap_policy_obs_counts. + swap_ctx = _swap_policy_obs_counts(policy, vecenv) if clean else contextlib.nullcontext() + with swap_ctx, tqdm(total=num_scenarios, desc="Processing scenarios", disable=quiet) as pbar: while scenarios_processed < num_scenarios: # Reset LSTM if args["train"]["use_rnn"]: From d50b26cd0c8af5697e75a9965c665574dcc7f3c2 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 18 Apr 2026 21:53:41 -0400 Subject: [PATCH 02/11] Remove stale _sys_instr debug writes from eval_multi_scenarios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _sys_instr.stderr.write instrumentation was added as debug tracing for the render path's close_client sequence. It was also dropped into eval_multi_scenarios by mistake — but _sys_instr is only imported inside eval_multi_scenarios_render, so the non-render eval path crashes with NameError whenever it runs to completion. Just remove the instrumentation from the non-render eval function. Co-Authored-By: Claude Opus 4.7 (1M context) --- pufferlib/pufferl.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index d6b06e13d..6581c6b0c 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -2312,12 +2312,8 @@ def eval_multi_scenarios( print(f"\nTotal evaluation time: {time.time() - t0:.2f} seconds for {num_scenarios} scenarios.") _log_eval_metrics(logger, avg_infos, args, metric_prefix, quiet) - _sys_instr.stderr.write("[render-instr] about to call vecenv.close()\n") - _sys_instr.stderr.flush() # Close vectorized environment to avoid file descriptor leaks vecenv.close() - _sys_instr.stderr.write("[render-instr] vecenv.close() returned\n") - _sys_instr.stderr.flush() def eval_multi_scenarios_render( From 3b8487dc48563c15ad397126438c6c9186c64322 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 19 Apr 2026 19:37:57 -0400 Subject: [PATCH 03/11] Add replay eval block (sibling of multi_scenario_eval) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runs inline eval on replay scenarios (nuPlan mini train bins) alongside the gigaflow multi_scenario_eval. Policy controls only the SDC (control_sdc_only) while other agents follow logged trajectories. Metrics log under metric_prefix "validation_replay" so they're distinguishable from the gigaflow "validation" eval. - drive.ini: add [eval].replay_eval + replay_map_dir + replay_num_scenarios + replay_scenario_length (201 for nuPlan duration_s=20 bins) + replay_control_mode + replay_init_steps. - pufferl.py: sibling block in _train that overrides the default WOMD replay scenario_length (91) on top of build_eval_overrides and calls eval_multi_scenarios with metric_prefix="validation_replay". Shares the clean= plumbing and _swap_policy_obs_counts — the replay env with clean=True has a different obs shape from training, and the swap keeps the live training policy usable. Co-Authored-By: Claude Opus 4.7 (1M context) --- pufferlib/config/ocean/drive.ini | 15 +++++++++ pufferlib/pufferl.py | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index c2bdd0e3e..159d1455f 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -227,6 +227,21 @@ multi_scenario_num_scenarios = 250 ; Cap the render rollout at this many steps. render_max_steps = 200 backend = PufferEnv +; --- Replay eval (sibling of multi_scenario_eval) --- +; When True, fires a second inline eval on replay scenarios (nuPlan, WOMD) +; at the same cadence as eval_interval. Runs alongside the gigaflow +; multi_scenario_eval, logs under metric_prefix "validation_replay". +; Honors clean_eval same as gigaflow eval. +replay_eval = False +replay_map_dir = "/scratch/ev2237/data/nuplan/nuplan_mini_train_bins" +replay_num_scenarios = 16 +; Full trajectory length in the replay bins. nuPlan mini with duration_s=20 +; yields 201 steps (20.1s @ 10Hz); WOMD bins are 91. +replay_scenario_length = 201 +replay_control_mode = "control_sdc_only" +; Timestep to start replay from. Leaves a short pre-roll so agents have +; logged state; policy takes over at this step. +replay_init_steps = 10 ; WOSAC (Waymo Open Sim Agents Challenge) evaluation settings ; If True, enables evaluation on realism metrics each time we save a checkpoint wosac_realism_eval = False diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 6581c6b0c..9b5e81f06 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -556,6 +556,62 @@ def train(self): clean=clean_eval, ) + # Replay eval — sibling of multi_scenario_eval. Runs inline eval on + # replay scenarios (e.g. nuPlan mini train bins) with control_sdc_only + # so the policy drives the SDC while other agents follow their logged + # trajectories. Metrics logged under "validation_replay/". + if self.config["eval"].get("replay_eval") and ( + self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training + ): + clean_eval = self.config["eval"].get("clean_eval", True) + replay_num_scenarios = int(self.config["eval"]["replay_num_scenarios"]) + replay_scenario_length = int(self.config["eval"]["replay_scenario_length"]) + replay_overrides = build_eval_overrides( + simulation_mode="replay", + num_agents=self.config["eval"]["num_agents"], + num_scenarios=replay_num_scenarios, + map_dir=self.config["eval"]["replay_map_dir"], + clean=clean_eval, + ) + # Override defaults that build_eval_overrides sets for WOMD replay + # (scenario_length=91) — nuPlan with duration_s=20 needs 201. + replay_overrides["env"]["scenario_length"] = replay_scenario_length + replay_overrides["env"]["resample_frequency"] = replay_scenario_length + replay_overrides["env"]["control_mode"] = self.config["eval"]["replay_control_mode"] + replay_overrides["env"]["init_steps"] = int(self.config["eval"]["replay_init_steps"]) + + replay_args = load_eval_multi_scenarios_config( + env_name=self.config["env"], + model_path=None, + eval_overrides=replay_overrides, + ) + replay_args["global_step"] = self.global_step + replay_args["num_scenarios"] = replay_num_scenarios + replay_args["eval_simulation"] = "replay" + replay_args["inline_eval"] = True + experiment_name = f"{self.config['env']}_{self.logger.run_id}" + replay_args["load_model_path"] = os.path.join( + self.config["data_dir"], experiment_name, "models", f"inline_epoch_{self.epoch}.pt" + ) + replay_args["eval_results_dir"] = os.path.join( + self.config["data_dir"], + experiment_name, + "validation_replay", + f"epoch_{self.epoch}", + "replay", + ) + print(f"\n🔄 Running replay eval at step {self.global_step}...") + eval_multi_scenarios( + env_name=self.config["env"], + args=replay_args, + vecenv=None, + policy=self.uncompiled_policy, + logger=self.logger, + metric_prefix="validation_replay", + quiet=True, + clean=clean_eval, + ) + # Multi-scenario render — independent interval so the heavier render # path doesn't have to fire every eval_interval. Mirrors the block # above but calls eval_multi_scenarios_render with render=True and From 708a8293872ce476d529139431bf8cfbab4f705d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 19 Apr 2026 19:42:14 -0400 Subject: [PATCH 04/11] render_scenario: add --no-render mode for aggregate metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When --no-render is set, calls eval_multi_scenarios across every .bin in --map-dir instead of rendering a single scenario to mp4. Produces evaluation_summary.csv in --output-dir. Replaces the inline dropout/perturbation-zero overrides with build_eval_overrides(clean=True) — same effect, but centralizes the clean-eval logic in one place. For replay metrics we leave offroad_behavior / collision_behavior at the eval default (=1, terminate on infraction) so the SDC is penalized per normal eval rules. The render path still forces them to 0 so the video shows the full trajectory even when the policy is far off. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/render_scenario.py | 74 ++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/scripts/render_scenario.py b/scripts/render_scenario.py index b77d75aab..8848cb82b 100644 --- a/scripts/render_scenario.py +++ b/scripts/render_scenario.py @@ -62,6 +62,17 @@ def main(): default=None, help="Override control mode (default: control_vehicles for gigaflow, control_sdc_only for replay)", ) + parser.add_argument( + "--no-render", + action="store_true", + help="Skip video output and run eval_multi_scenarios to aggregate metrics across all bins in --map-dir. Ignores --map when set.", + ) + parser.add_argument( + "--num-scenarios", + type=int, + default=None, + help="Number of scenarios to evaluate in --no-render mode (defaults to every bin in --map-dir)", + ) cli = parser.parse_args() # Suppress argparse pollution from pufferl's load_config after our own parse @@ -89,6 +100,11 @@ def main(): # Set up a single-map directory if not provided if cli.map_dir: map_dir = os.path.abspath(cli.map_dir) + elif cli.no_render: + # No-render mode requires --map-dir explicitly (we aggregate across the + # whole directory rather than symlink a single bin). + print("Error: --no-render requires --map-dir pointing at a directory of .bin files.") + sys.exit(1) else: map_dir = tempfile.mkdtemp(prefix=f"render_{cli.map}_") # Search for the map .bin in known directories @@ -116,6 +132,7 @@ def main(): from pufferlib.pufferl import ( build_eval_overrides, + eval_multi_scenarios, eval_multi_scenarios_render, load_config, load_eval_multi_scenarios_config, @@ -124,30 +141,30 @@ def main(): env_name = "puffer_drive" tmp_args = load_config(env_name) + if cli.no_render: + num_scenarios = cli.num_scenarios or len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) + else: + num_scenarios = 1 + eval_overrides = build_eval_overrides( simulation_mode=cli.simulation_mode, num_agents=cli.num_eval_agents, - num_scenarios=1, + num_scenarios=num_scenarios, map_dir=map_dir, - num_carla_maps=1, + num_carla_maps=num_scenarios if cli.no_render else 1, + clean=True, ) # Override control_mode and init_steps after build_eval_overrides eval_overrides["env"]["control_mode"] = control_mode eval_overrides["env"]["init_steps"] = init_steps - # Disable robustness perturbations for eval renders — we want to see - # the policy's clean behavior, not randomly blinded or phantom-braking. - eval_overrides["env"]["partner_blindness_prob"] = 0.0 - eval_overrides["env"]["phantom_braking_prob"] = 0.0 - eval_overrides["env"]["phantom_braking_trigger_prob"] = 0.0 - # Use all road segments at eval (no dropout). The obs vector is always - # max-sized now, so dropout=0 just fills more slots without changing shape. - eval_overrides["env"]["lane_segment_dropout"] = 0.0 - eval_overrides["env"]["boundary_segment_dropout"] = 0.0 if cli.simulation_mode == "replay": - # Don't stop/remove the SDC for offroad — let it drive freely so - # the video shows the full trajectory even with a mismatched policy. - eval_overrides["env"]["offroad_behavior"] = 0 - eval_overrides["env"]["collision_behavior"] = 0 + # For no-render metrics we want offroad/collision to TERMINATE so + # the SDC is penalized per the normal eval rules. For rendering we + # let the SDC keep driving so the video shows the full trajectory + # even when the policy is far off. + if not cli.no_render: + eval_overrides["env"]["offroad_behavior"] = 0 + eval_overrides["env"]["collision_behavior"] = 0 # Match scenario_length to requested steps so the render loop # doesn't cap at the default 91 from build_eval_overrides. eval_overrides["env"]["scenario_length"] = steps @@ -155,11 +172,9 @@ def main(): args = load_eval_multi_scenarios_config(env_name, cli.checkpoint, eval_overrides) args["load_model_path"] = cli.checkpoint - args["num_scenarios"] = 1 - args["num_carla_maps"] = 1 + args["num_scenarios"] = num_scenarios + args["num_carla_maps"] = num_scenarios if cli.no_render else 1 args["eval_simulation"] = cli.simulation_mode - args["render"] = 1 - args["render_obs"] = 0 args["inline_eval"] = True args["eval_results_dir"] = cli.output_dir @@ -172,6 +187,27 @@ def main(): mode_desc = cli.simulation_mode if cli.simulation_mode == "replay": mode_desc += f" (control_mode={control_mode})" + + if cli.no_render: + print(f"No-render eval | mode={mode_desc} | {steps} steps | {num_scenarios} scenarios") + print(f"Map dir: {map_dir}") + print(f"Checkpoint: {cli.checkpoint}") + print(f"Output: {cli.output_dir}/") + eval_multi_scenarios( + env_name=env_name, + args=dict(args), + vecenv=None, + policy=None, + logger=None, + metric_prefix="eval", + quiet=False, + clean=True, + ) + print(f"\nDone. Summary CSV: {cli.output_dir}/evaluation_summary.csv") + return + + args["render"] = 1 + args["render_obs"] = 0 print(f"Rendering {cli.map} | mode={mode_desc} | {steps} steps | view={cli.view}") print(f"Checkpoint: {cli.checkpoint}") print(f"Output: {cli.output_dir}/mp4/") From f26aa19a3757186c69597fd2c0cb57ee2d8cf36a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 19 Apr 2026 19:43:16 -0400 Subject: [PATCH 05/11] render_scenario: cap no-render workers at physical CPU count Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/render_scenario.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/render_scenario.py b/scripts/render_scenario.py index 8848cb82b..5fe70d2de 100644 --- a/scripts/render_scenario.py +++ b/scripts/render_scenario.py @@ -56,7 +56,9 @@ def main(): choices=["gigaflow", "replay"], help="Simulation mode: gigaflow (random spawn) or replay (log trajectories, policy controls SDC)", ) - parser.add_argument("--init-steps", type=int, default=None, help="Timestep to start from (default: 0 gigaflow, 10 replay)") + parser.add_argument( + "--init-steps", type=int, default=None, help="Timestep to start from (default: 0 gigaflow, 10 replay)" + ) parser.add_argument( "--control-mode", default=None, @@ -189,7 +191,16 @@ def main(): mode_desc += f" (control_mode={control_mode})" if cli.no_render: - print(f"No-render eval | mode={mode_desc} | {steps} steps | {num_scenarios} scenarios") + # Cap vec workers at physical CPU count — pufferlib rejects + # num_workers > cores (emerge2 has 16 physical cores, ini default 20). + import psutil + + cpu_cores = psutil.cpu_count(logical=False) or 8 + cap = min(cpu_cores, num_scenarios) + args["vec"]["num_envs"] = cap + args["vec"]["num_workers"] = cap + args["vec"]["batch_size"] = cap + print(f"No-render eval | mode={mode_desc} | {steps} steps | {num_scenarios} scenarios | {cap} workers") print(f"Map dir: {map_dir}") print(f"Checkpoint: {cli.checkpoint}") print(f"Output: {cli.output_dir}/") From ad13b64058599d58a835c16f1e25094f78388130 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 19 Apr 2026 22:41:04 -0400 Subject: [PATCH 06/11] render.h: draw waypoints in BEV (obs_only=1) too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit draw_scene gated the waypoint draw loop on obs_only==0, so BEV view (which uses obs_only=1) had no visible goals. Drop the gate — the goal trail is the main reason to watch a BEV render. Co-Authored-By: Claude Opus 4.7 (1M context) --- pufferlib/ocean/drive/render.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/drive/render.h b/pufferlib/ocean/drive/render.h index 1323d39f3..c47c802bc 100644 --- a/pufferlib/ocean/drive/render.h +++ b/pufferlib/ocean/drive/render.h @@ -1215,8 +1215,10 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, if (!is_active_agent || agent->sim_valid == 0) { continue; } - if (!IsKeyDown(KEY_LEFT_CONTROL) && obs_only == 0) { - // Draw all target waypoints: brightest (first) to darkest (last) + if (!IsKeyDown(KEY_LEFT_CONTROL)) { + // Draw all target waypoints: brightest (first) to darkest (last). + // Drawn in BEV too (obs_only=1) since the goal trail is the main + // thing you want to see when labeling scenarios. int num_wp = env->num_target_waypoints; if (num_wp > MAX_TARGET_WAYPOINTS) num_wp = MAX_TARGET_WAYPOINTS; From dbecd2597c00c3e50903220edc627d81d79a35c1 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 19 Apr 2026 23:01:55 -0400 Subject: [PATCH 07/11] Clean eval: bump max_partner_observations to 32, swap on policy too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends _swap_policy_obs_counts to also swap max_partner_observations and max_traffic_control_observations — they're both shared-MLP + max-pool encoders, so swapping the count on the live training policy is safe and lets the policy consume a wider obs buffer at eval. build_eval_overrides(clean=True) now sets max_partner_observations=32 (training default 16). In BEV render the extra partner observations show up as more visible vehicles, matching the clean lane behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- pufferlib/pufferl.py | 55 +++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 9b5e81f06..52ca3f671 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1973,11 +1973,14 @@ def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=Non if clean: # Dropout changes the obs shape. Only safe when the policy is - # rebuilt from the eval env (standalone eval / render_scenario). - # NEVER pass clean=True from an inline-eval call site — the live - # training policy's encoder was built for the training obs shape. + # rebuilt from the eval env (standalone eval / render_scenario), + # OR when the inline caller uses _swap_policy_obs_counts. common_env["lane_segment_dropout"] = 0.0 common_env["boundary_segment_dropout"] = 0.0 + # Clean eval gets a bigger partner budget too — mirrors the lane + # story. The partner encoder is shared-MLP + max-pool, so the same + # policy weights handle the larger count via _swap_policy_obs_counts. + common_env["max_partner_observations"] = 32 if simulation_mode == "gigaflow": eval_overrides = { @@ -2019,44 +2022,48 @@ def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=Non return eval_overrides +_SWAPPABLE_OBS_COUNTS = ( + "obs_lane_segment_count", + "obs_boundary_segment_count", + "max_partner_observations", + "max_traffic_control_observations", +) + + @contextlib.contextmanager def _swap_policy_obs_counts(policy, vecenv): - """Temporarily align the policy's road-segment slicing with the eval env. + """Temporarily align the policy's obs slicing with the eval env. - Training uses dropout > 0 → smaller obs_{lane,boundary}_segment_count. - Clean eval uses dropout = 0 → larger counts, larger obs buffer. The - GigaFlow encoder (lane_encoder / boundary_encoder) is a shared MLP - applied per-segment with max-pool — its weights are count-invariant. + Training may use dropout > 0 or tighter partner/traffic-control caps, + giving smaller counts. Clean eval (or an eval with --max-partner-observations + bumped) produces a larger obs buffer. The GigaFlow encoders for lanes, + boundaries, partners, and traffic controls are all shared MLPs with + max-pool over their element dimension — weights are count-invariant. Only the obs-buffer slicing in DriveBackbone.forward depends on these - counts, so we can just swap them for the duration of the eval and the - same training policy works on the larger clean obs. + counts, so we swap them for the duration of the eval and the same + training policy works on the larger clean obs. """ try: eval_env = vecenv.driver_env - new_lane = int(eval_env.obs_lane_segment_count) - new_boundary = int(eval_env.obs_boundary_segment_count) + new_counts = {name: int(getattr(eval_env, name)) for name in _SWAPPABLE_OBS_COUNTS} except AttributeError: # If the eval env doesn't expose these (unknown wrapper), skip the # swap — forward will still work when training and eval obs shapes - # coincide (clean=False or no dropout configured). + # coincide (clean=False or no overrides). yield return - targets = [] - for m in policy.modules(): - if hasattr(m, "obs_lane_segment_count") and hasattr(m, "obs_boundary_segment_count"): - targets.append(m) - - saved = [(m.obs_lane_segment_count, m.obs_boundary_segment_count) for m in targets] + targets = [m for m in policy.modules() if all(hasattr(m, n) for n in _SWAPPABLE_OBS_COUNTS)] + saved = [{n: getattr(m, n) for n in _SWAPPABLE_OBS_COUNTS} for m in targets] try: for m in targets: - m.obs_lane_segment_count = new_lane - m.obs_boundary_segment_count = new_boundary + for n, v in new_counts.items(): + setattr(m, n, v) yield finally: - for m, (orig_lane, orig_boundary) in zip(targets, saved): - m.obs_lane_segment_count = orig_lane - m.obs_boundary_segment_count = orig_boundary + for m, orig in zip(targets, saved): + for n, v in orig.items(): + setattr(m, n, v) def verify_scenario_coverage(csv_path: str, num_scenarios: int) -> dict: From f1479cffe4dc234135040b7cb09339bbfd8af099 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 20 Apr 2026 08:04:41 -0400 Subject: [PATCH 08/11] driving_behaviours_eval: force clean-eval overrides + nuPlan categories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit utils.py: run_driving_behaviours_eval_in_subprocess now passes eval_mode=1, traffic_light_behavior=1, zero dropout, zero perturbations, and max_partner_observations=32 — matches build_eval_overrides(clean=True). Previously the subprocess re-parsed drive.ini and inherited whatever defaults were there, so eval_mode stayed 0 (randomized TL cycle) and training-time CLI overrides quietly dropped. driving_behaviours_eval.ini: rebuilt around the nuPlan mini-train bins labeled under /scratch/ev2237/data/nuplan/categories//. Eleven sections (hard_stop, highway_straight, lane_change, merge, parked_cars, roundabout, stopped_traffic, traffic_light_{green,stop}, unprotected_ {left,right}). Scenario length 201 for nuPlan duration_s=20. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../config/ocean/driving_behaviours_eval.ini | 70 +++++++++++++------ pufferlib/utils.py | 21 ++++++ 2 files changed, 70 insertions(+), 21 deletions(-) diff --git a/pufferlib/config/ocean/driving_behaviours_eval.ini b/pufferlib/config/ocean/driving_behaviours_eval.ini index 0951ad1b8..e82718c90 100644 --- a/pufferlib/config/ocean/driving_behaviours_eval.ini +++ b/pufferlib/config/ocean/driving_behaviours_eval.ini @@ -1,28 +1,56 @@ -; Configuration for driving behaviour evaluation using human-replay (WOMD) scenarios. -; Evaluates 5 broad driving behaviour classes: -; lead vehicle interaction (longitudinal), lane change (lateral), dense traffic, -; obstacles, vulnerable road user interactions (VRUs). -; Each class section sets the map_dir. num_agents is set automatically from the -; number of .bin files in the directory. +; Configuration for driving behaviour evaluation using nuPlan mini-train +; scenarios labeled by scene type. Each section points at a folder of +; .bin files under /scratch/ev2237/data/nuplan/categories/. +; ; Eval runs in REPLAY mode (simulation_mode=replay, control_mode=control_sdc_only) -; using the same reward weights as training (no reward conditioning). +; using the same reward weights as training (no reward conditioning). Scenario +; length is 201 (nuPlan with duration_s=20 at 10Hz → 20.1s). +; +; Categories with an empty folder are omitted — driving_behaviours_eval errors +; if map_dir has no .bin files. Add new categories by labeling more scenes +; (see scripts/render_scenario.py --view bev) and copying them into the +; corresponding /scratch/ev2237/data/nuplan/categories// folder. -[eval_lead_vehicle_interaction] -map_dir = "pufferlib/resources/drive/binaries/longitudinal" -scenario_length = 91 +[eval_hard_stop] +map_dir = "/scratch/ev2237/data/nuplan/categories/hard_stop" +scenario_length = 201 + +[eval_highway_straight] +map_dir = "/scratch/ev2237/data/nuplan/categories/highway_straight" +scenario_length = 201 [eval_lane_change] -map_dir = "pufferlib/resources/drive/binaries/lateral" -scenario_length = 91 +map_dir = "/scratch/ev2237/data/nuplan/categories/lane_change" +scenario_length = 201 + +[eval_merge] +map_dir = "/scratch/ev2237/data/nuplan/categories/merge" +scenario_length = 201 + +[eval_parked_cars] +map_dir = "/scratch/ev2237/data/nuplan/categories/parked_cars" +scenario_length = 201 + +[eval_roundabout] +map_dir = "/scratch/ev2237/data/nuplan/categories/roundabout" +scenario_length = 201 + +[eval_stopped_traffic] +map_dir = "/scratch/ev2237/data/nuplan/categories/stopped_traffic" +scenario_length = 201 + +[eval_traffic_light_green] +map_dir = "/scratch/ev2237/data/nuplan/categories/traffic_light_green" +scenario_length = 201 -[eval_dense_traffic] -map_dir = "pufferlib/resources/drive/binaries/dense" -scenario_length = 91 +[eval_traffic_light_stop] +map_dir = "/scratch/ev2237/data/nuplan/categories/traffic_light_stop" +scenario_length = 201 -[eval_obstacles] -map_dir = "pufferlib/resources/drive/binaries/obstacles" -scenario_length = 91 +[eval_unprotected_left] +map_dir = "/scratch/ev2237/data/nuplan/categories/unprotected_left" +scenario_length = 201 -[eval_vru_interaction] -map_dir = "pufferlib/resources/drive/binaries/vru" -scenario_length = 91 +[eval_unprotected_right] +map_dir = "/scratch/ev2237/data/nuplan/categories/unprotected_right" +scenario_length = 201 diff --git a/pufferlib/utils.py b/pufferlib/utils.py index bb4f10b1d..dc6367e78 100644 --- a/pufferlib/utils.py +++ b/pufferlib/utils.py @@ -231,6 +231,27 @@ def run_driving_behaviours_eval_in_subprocess(config, logger, global_step, behav "create_all_valid", "--env.scenario-length", str(scenario_length), + # Clean-eval overrides. Mirrors build_eval_overrides(clean=True): + # deterministic TL cycle (eval_mode=1), red lights enforced, + # no road-segment dropout, no partner blindness or phantom + # braking, wider partner budget. Subprocess re-parses the ini + # so training-time CLI overrides don't leak in here. + "--env.eval-mode", + "1", + "--env.traffic-light-behavior", + "1", + "--env.lane-segment-dropout", + "0.0", + "--env.boundary-segment-dropout", + "0.0", + "--env.partner-blindness-prob", + "0.0", + "--env.phantom-braking-prob", + "0.0", + "--env.phantom-braking-trigger-prob", + "0.0", + "--env.max-partner-observations", + "32", ] print(f"DrivingBehavioursEval: running class '{short}' with map_dir={map_dir}") From 85df54b461ca5e6b2b62d88ada9c4b368bbb116f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 20 Apr 2026 08:14:22 -0400 Subject: [PATCH 09/11] Pin dt=0.1 for all eval paths Training can override dt for curriculum/speed experiments; eval needs to stay at 10Hz so replay-env simulation matches the logged trajectory sample rate. Otherwise waypoints drift against the SDC's actual path. - build_eval_overrides (inline + standalone + render_scenario): dt=0.1 added to common_env so it flows through regardless of clean mode. - run_driving_behaviours_eval_in_subprocess: --env.dt 0.1 added to the subprocess cmd so it overrides whatever drive.ini default / CLI override the parent process had. Co-Authored-By: Claude Opus 4.7 (1M context) --- pufferlib/pufferl.py | 5 +++++ pufferlib/utils.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 52ca3f671..aa208ca2b 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1949,6 +1949,11 @@ def build_eval_overrides(simulation_mode, num_agents, num_scenarios, map_dir=Non # Common reward coefficients (same for both modes) common_env = { "eval_mode": 1, + # Eval always runs at dt=0.1 (10Hz), the rate the logged trajectories + # were sampled at. Training may override dt for curriculum / speed + # experiments, but eval metrics need the logged-rate physics or + # replay positions drift against the real waypoints. + "dt": 0.1, "collision_behavior": 1, "offroad_behavior": 1, "traffic_light_behavior": 1 if clean else 0, diff --git a/pufferlib/utils.py b/pufferlib/utils.py index dc6367e78..cd27a36ac 100644 --- a/pufferlib/utils.py +++ b/pufferlib/utils.py @@ -231,6 +231,11 @@ def run_driving_behaviours_eval_in_subprocess(config, logger, global_step, behav "create_all_valid", "--env.scenario-length", str(scenario_length), + # Pin dt to 0.1 (10Hz). Training may override dt for curriculum + # / speed experiments, but eval metrics need the logged-rate + # physics or replay positions drift against the real waypoints. + "--env.dt", + "0.1", # Clean-eval overrides. Mirrors build_eval_overrides(clean=True): # deterministic TL cycle (eval_mode=1), red lights enforced, # no road-segment dropout, no partner blindness or phantom From 02e9c96565489af349fed7065ec862a8437814e4 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 20 Apr 2026 22:19:14 -0400 Subject: [PATCH 10/11] Render path: honor clean_eval + score >=3 instead of >=4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pufferl.py: - eval_multi_scenarios_render takes a clean= kwarg, wraps the rollout loop with _swap_policy_obs_counts when set. Standalone entry now reads eval.clean_eval from the config. - _render_driving_behaviours builds overrides with clean=True and passes clean=True to eval_multi_scenarios_render. Matches the metric-eval subprocess so the mp4s reflect the same clean conditions the wandb scalars do (no more flashing BEVs from inherited dropout). - _train multi_scenario_render block: same — reads eval.clean_eval, plumbs to build_eval_overrides + eval_multi_scenarios_render. drive.h: - compute_metrics score threshold was hardcoded >=4, but num_target_ waypoints=3 caps num_goals_reached at 3, so score was always 0. Changed to >=3. Removes the TODO/FIXME comments. Co-Authored-By: Claude Opus 4.7 (1M context) --- pufferlib/ocean/drive/drive.h | 7 ++++--- pufferlib/pufferl.py | 19 ++++++++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index af51ec260..5e56efbb2 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2599,9 +2599,10 @@ static void add_log(Drive *env) { env->log.num_waypoints_reached += num_waypoints_reached; int num_goals_reached = env->logs[i].num_goals_reached; env->log.num_goals_reached += num_goals_reached; - // TODO: define better scoring criteria ? - // FIXME - if (num_goals_reached >= 4 && !agent->removed && !agent->stopped) { + // Score: 1 per agent that reached all 3 target waypoints without + // being removed/stopped. Was hardcoded to >=4, unreachable given + // num_target_waypoints=3 in the ini, so score was always 0. + if (num_goals_reached >= 3 && !agent->removed && !agent->stopped) { env->log.score += 1.0f; } if (!offroad && !collided && !red_light_violations && num_waypoints_reached < 1) { diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index aa208ca2b..a0a3d5414 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -623,6 +623,7 @@ def train(self): render_simulation_mode = self.config["eval"]["multi_scenario_simulation_mode"] num_agents_render = self.config["eval"]["num_agents"] render_map_dir = self.config["eval"]["map_dir"] + clean_render = self.config["eval"].get("clean_eval", True) render_overrides = build_eval_overrides( simulation_mode=render_simulation_mode, @@ -630,6 +631,7 @@ def train(self): num_scenarios=self.config["eval"]["multi_scenario_num_scenarios"], map_dir=render_map_dir, num_carla_maps=self.config["eval"].get("num_carla_maps", 8), + clean=clean_render, ) render_args = load_eval_multi_scenarios_config( @@ -690,6 +692,7 @@ def train(self): # the mystery ~500-c_render-call abort is properly diagnosed. # Set to 0/negative to disable the cap entirely. render_max_steps=(self.config["eval"].get("render_max_steps", 50) or None), + clean=clean_render, ) except Exception as e: import traceback @@ -1027,11 +1030,17 @@ def _render_driving_behaviours(self, behaviours_config): short = class_name[len(EVAL_SECTIONS_PREFIX) :] num_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) + # Render under clean-eval conditions (zero dropout, zero + # perturbations, enforced red lights) so the mp4s show what + # the policy does under controlled eval, not the noisy + # training-time perturbations. Matches run_driving_behaviours + # _eval_in_subprocess, so the video matches the metric eval. render_overrides = build_eval_overrides( simulation_mode="replay", num_agents=1, num_scenarios=1, map_dir=map_dir, + clean=True, ) render_overrides["env"]["control_mode"] = "control_sdc_only" render_overrides["env"]["num_maps"] = num_maps @@ -1073,6 +1082,7 @@ def _render_driving_behaviours(self, behaviours_config): video_suffix=vsuffix, log_view_label=vlabel, render_max_steps=(self.config["eval"].get("render_max_steps", 50) or None), + clean=True, ) except Exception as e: import traceback @@ -2398,6 +2408,7 @@ def eval_multi_scenarios_render( log_view_label="render", render_max_steps=None, render_key_prefix=None, + clean=False, ): # Set fixed seed for reproducible evaluation np.random.seed(42) @@ -2408,14 +2419,17 @@ def eval_multi_scenarios_render( model_path = tmp_args.get("load_model_path") num_agents_eval = tmp_args["eval"]["num_agents"] map_dir = tmp_args["eval"]["map_dir"] + clean_from_config = tmp_args["eval"].get("clean_eval", False) eval_overrides = build_eval_overrides( simulation_mode=tmp_args["eval_simulation"], num_agents=num_agents_eval, num_scenarios=tmp_args["num_scenarios"], map_dir=map_dir, num_carla_maps=tmp_args.get("num_carla_maps", 8), + clean=clean_from_config, ) args = load_eval_multi_scenarios_config(env_name, model_path, eval_overrides) + clean = clean or clean_from_config backend = args["vec"]["backend"] if backend != "PufferEnv": @@ -2511,7 +2525,10 @@ def eval_multi_scenarios_render( # Serial/Multiprocessing: need vecenv.envs[0] to reach the underlying env. target_env = vecenv if not hasattr(vecenv, "envs") else vecenv.envs[0] - with tqdm(total=num_scenarios, desc="Processing scenarios", disable=quiet) as pbar: + # Align the live training policy's obs slicing with the (potentially + # clean) eval env for the render. Same swap as eval_multi_scenarios. + swap_ctx = _swap_policy_obs_counts(policy, vecenv) if clean else contextlib.nullcontext() + with swap_ctx, tqdm(total=num_scenarios, desc="Processing scenarios", disable=quiet) as pbar: while scenarios_processed < num_scenarios: ob, _ = vecenv.reset() From ff4e0df4f0a7d50be208fa1781ae46ad8f2a01fb Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 21 Apr 2026 23:29:05 -0400 Subject: [PATCH 11/11] driving_behaviours_eval.ini: point at categories_v021 (0.2.1 reconvert) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../config/ocean/driving_behaviours_eval.ini | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pufferlib/config/ocean/driving_behaviours_eval.ini b/pufferlib/config/ocean/driving_behaviours_eval.ini index e82718c90..651fc27c0 100644 --- a/pufferlib/config/ocean/driving_behaviours_eval.ini +++ b/pufferlib/config/ocean/driving_behaviours_eval.ini @@ -1,6 +1,6 @@ ; Configuration for driving behaviour evaluation using nuPlan mini-train -; scenarios labeled by scene type. Each section points at a folder of -; .bin files under /scratch/ev2237/data/nuplan/categories/. +; scenarios labeled by scene type. Built from py123d 0.2.1 reconvert of +; .bin files under /scratch/ev2237/data/nuplan/categories_v021/. ; ; Eval runs in REPLAY mode (simulation_mode=replay, control_mode=control_sdc_only) ; using the same reward weights as training (no reward conditioning). Scenario @@ -9,48 +9,48 @@ ; Categories with an empty folder are omitted — driving_behaviours_eval errors ; if map_dir has no .bin files. Add new categories by labeling more scenes ; (see scripts/render_scenario.py --view bev) and copying them into the -; corresponding /scratch/ev2237/data/nuplan/categories// folder. +; corresponding /scratch/ev2237/data/nuplan/categories_v021// folder. [eval_hard_stop] -map_dir = "/scratch/ev2237/data/nuplan/categories/hard_stop" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/hard_stop" scenario_length = 201 [eval_highway_straight] -map_dir = "/scratch/ev2237/data/nuplan/categories/highway_straight" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/highway_straight" scenario_length = 201 [eval_lane_change] -map_dir = "/scratch/ev2237/data/nuplan/categories/lane_change" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/lane_change" scenario_length = 201 [eval_merge] -map_dir = "/scratch/ev2237/data/nuplan/categories/merge" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/merge" scenario_length = 201 [eval_parked_cars] -map_dir = "/scratch/ev2237/data/nuplan/categories/parked_cars" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/parked_cars" scenario_length = 201 [eval_roundabout] -map_dir = "/scratch/ev2237/data/nuplan/categories/roundabout" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/roundabout" scenario_length = 201 [eval_stopped_traffic] -map_dir = "/scratch/ev2237/data/nuplan/categories/stopped_traffic" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/stopped_traffic" scenario_length = 201 [eval_traffic_light_green] -map_dir = "/scratch/ev2237/data/nuplan/categories/traffic_light_green" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/traffic_light_green" scenario_length = 201 [eval_traffic_light_stop] -map_dir = "/scratch/ev2237/data/nuplan/categories/traffic_light_stop" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/traffic_light_stop" scenario_length = 201 [eval_unprotected_left] -map_dir = "/scratch/ev2237/data/nuplan/categories/unprotected_left" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/unprotected_left" scenario_length = 201 [eval_unprotected_right] -map_dir = "/scratch/ev2237/data/nuplan/categories/unprotected_right" +map_dir = "/scratch/ev2237/data/nuplan/categories_v021/unprotected_right" scenario_length = 201