diff --git a/.gitignore b/.gitignore index 717cf27b54..959bedc43d 100644 --- a/.gitignore +++ b/.gitignore @@ -169,7 +169,7 @@ pufferlib/resources/drive/binaries/ pufferlib/resources/drive/binaries/training/ pufferlib/resources/drive/binaries/validation/ -# But keep map_000.bin for the training test +# Keep map_000.bin for the training test !pufferlib/resources/drive/binaries/map_000.bin !pufferlib/resources/drive/binaries/training/map_000.bin pufferlib/resources/drive/sanity/sanity_binaries/ @@ -178,6 +178,13 @@ pufferlib/resources/drive/sanity/sanity_binaries/ !pufferlib/resources/drive/binaries/carla/** !pufferlib/resources/drive/binaries/carla_2D/** +# Keep driving behaviour eval scenario binaries +!pufferlib/resources/drive/binaries/dense_traffic/** +!pufferlib/resources/drive/binaries/lane_change/** +!pufferlib/resources/drive/binaries/obstacles/** +!pufferlib/resources/drive/binaries/vru_interaction/** +!pufferlib/resources/drive/binaries/lead_vehicle_interaction/** + # Compiled drive binary in root /drive /visualize diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 19dced1b81..edceebb691 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -238,12 +238,16 @@ wosac_sanity_check = False wosac_aggregate_results = True ; Evaluation mode: "policy", "ground_truth" wosac_eval_mode = "policy" +; Evaluating different driving behaviours learned by the policy +driving_behaviours_eval = True +driving_behaviours_eval_config = "pufferlib/config/ocean/driving_behaviours_eval.ini" +driving_behaviours_eval_interval = 1000 [safe_eval] ; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics enabled = True ; How often to run safe eval (in training epochs). Defaults to render_interval. -interval = 250 +interval = 1000 render_safe_eval = True ; Number of agents to run in the eval environment num_agents = 50 diff --git a/pufferlib/config/ocean/driving_behaviours_eval.ini b/pufferlib/config/ocean/driving_behaviours_eval.ini new file mode 100644 index 0000000000..8458c2402c --- /dev/null +++ b/pufferlib/config/ocean/driving_behaviours_eval.ini @@ -0,0 +1,58 @@ +; Configuration for driving behaviour evaluation maps and rewards. +; Currently evaluates 5 broad driving behaviours: lead vehicle interaction, lane change, dense traffic, obstacles, vulnerable road user interactions (VRUs). +; Currently uses safe reward conditioning values for evaluation + +[eval_lead_vehicle_interaction] +map_dir = "resources/drive/binaries/lead_vehicle_interaction" +human_replay_eval = True +render_eval = True +episode_length = 91 + +[eval_lane_change] +map_dir = "resources/drive/binaries/lane_change" +human_replay_eval = True +render_eval = True +episode_length = 91 + +[eval_dense_traffic] +map_dir = "resources/drive/binaries/dense_traffic" +human_replay_eval = True +render_eval = True +episode_length = 91 + +[eval_obstacles] +map_dir = "resources/drive/binaries/obstacles" +human_replay_eval = True +render_eval = True +episode_length = 91 + +[eval_vru_interaction] +map_dir = "resources/drive/binaries/vru_interaction" +human_replay_eval = True +render_eval = True +episode_length = 91 + +[eval_driving_rewards] +; Reward conditioning values (min=max to fix the value). +; Names match the env reward_bound_* keys. +; High penalties for unsafe behavior +collision = -3.0 +offroad = -3.0 +overspeed = -1.0 +traffic_light = -1.0 +reverse = -0.0075 +comfort = -0.1 + +; Standard driving rewards +goal_radius = 8.0 +lane_align = 0.0025 +lane_center = -0.00075 +velocity = 0.005 +center_bias = 0.0 +vel_align = 1.0 +timestep = -0.00005 + +; Neutral scaling factors +throttle = 1.0 +steer = 1.0 +acc = 1.0 diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py index 86dd3d0f03..e562fcc5ee 100644 --- a/pufferlib/ocean/benchmark/evaluator.py +++ b/pufferlib/ocean/benchmark/evaluator.py @@ -1346,3 +1346,126 @@ def log_stats(self, global_step=None): if global_step is not None: payload["train_step"] = global_step self.logger.wandb.log(payload) + + +class DrivingBehavioursEvaluator: + """Evaluates a policy on the 5 driving behaviour classes using live in-process weights.""" + + # Sections in driving_behaviours_eval.ini that describe scenario classes + EVAL_SECTIONS_PREFIX = "eval_" + REWARD_SECTION = "eval_driving_rewards" + + def __init__(self, env_name: str, behaviours_config: Dict, device="cuda", logger=None): + self.env_name = env_name + self.behaviours_config = behaviours_config + if isinstance(device, int): + device = f"cuda:{device}" + self.device = device + self.logger = logger + self.reward_config = behaviours_config.get(self.REWARD_SECTION, {}) + self.classes = [ + (name, cfg) + for name, cfg in behaviours_config.items() + if name.startswith(self.EVAL_SECTIONS_PREFIX) and name != self.REWARD_SECTION + ] + + def _build_class_env_config(self, class_cfg: Dict) -> Dict: + """Build env config for one scenario class with fixed reward conditioning.""" + import re + import sys + from pufferlib.pufferl import load_config + + original_argv = sys.argv + sys.argv = ["pufferl"] + try: + eval_config = load_config(self.env_name) + finally: + sys.argv = original_argv + + eval_config["vec"] = dict(backend="PufferEnv", num_envs=1) + eval_config["train"]["device"] = self.device + eval_config["env"]["control_mode"] = "control_sdc_only" + eval_config["env"]["init_mode"] = "create_all_valid" + eval_config["env"]["episode_length"] = 91 + eval_config["env"]["resample_frequency"] = 0 + + map_dir = class_cfg.get("map_dir", "") + if isinstance(map_dir, str): + map_dir = map_dir.strip('"') + eval_config["env"]["map_dir"] = map_dir + # Set num_maps to the number of available bins so we cover all scenarios + available_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) if os.path.isdir(map_dir) else 1 + eval_config["env"]["num_maps"] = available_maps + + # Discover valid reward bound names + valid_bounds = set() + for key in eval_config["env"]: + m = re.match(r"reward_bound_(.+)_min$", key) + if m: + valid_bounds.add(m.group(1)) + + # Fix reward conditioning to eval_driving_rewards values + for key, val in self.reward_config.items(): + if key not in valid_bounds: + continue + eval_config["env"][f"reward_bound_{key}_min"] = float(val) + eval_config["env"][f"reward_bound_{key}_max"] = float(val) + + return eval_config + + def evaluate_class(self, class_cfg: Dict, policy) -> Dict: + """Run human-replay rollouts on all maps in the class map_dir and return averaged metrics.""" + from collections import defaultdict + from pufferlib.pufferl import load_env + + print(f"Evaluating class") + + eval_config = self._build_class_env_config(class_cfg) + num_maps = eval_config["env"]["num_maps"] + print(f"Built eval config for class with map_dir: {eval_config['env']['map_dir']}") + + vecenv = load_env(self.env_name, eval_config) + print(f"Loaded vecenv") + policy.eval() + print(f"Set policy to eval mode") + rollout_evaluator = HumanReplayEvaluator(eval_config) + all_stats = defaultdict(list) + print(f"Starting rollouts for class with {num_maps} maps") + try: + for _ in range(num_maps): + result = rollout_evaluator.rollout(eval_config, vecenv, policy) or {} + for k, v in result.items(): + try: + all_stats[k].append(float(v)) + except (TypeError, ValueError): + pass + # Reset for next map + vecenv.reset() + finally: + vecenv.close() + import gc + + gc.collect() + import torch + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + return {k: float(np.mean(v)) for k, v in all_stats.items() if v} + + def log_stats(self, all_results: Dict[str, Dict], global_step=None): + """Log per-class metrics to wandb under driving_behaviours//.""" + if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb): + return + payload = {} + for class_name, metrics in all_results.items(): + short = class_name[len(self.EVAL_SECTIONS_PREFIX) :] + for k, v in metrics.items(): + try: + payload[f"driving_behaviours/{short}/{k}"] = float(v) + except (TypeError, ValueError): + pass + if global_step is not None: + payload["train_step"] = global_step + if payload: + self.logger.wandb.log(payload) diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index 57f38623ed..efd67ac318 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -130,7 +130,7 @@ void demo(const char *map_name_arg, const char *policy_name_arg, int view_mode, {conf.reward_bound_steer_min, conf.reward_bound_steer_max}, {conf.reward_bound_acc_min, conf.reward_bound_acc_max}, }, - .map_name = "resources/drive/binaries/carla/carla_3D/map_001.bin", + .map_name = "resources/drive/binaries/carla_2D/map_001.bin", .render_mode = RENDER_WINDOW, .partner_obs_radius = conf.partner_obs_radius, }; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 99c84d750a..4957e37e61 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2210,7 +2210,9 @@ void set_active_agents(Drive *env) { static_agent_indices[env->static_agent_count] = i; env->static_agent_count++; env->agents[i].active_agent = 0; - if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) { + + if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 || + env->active_agent_count == env->num_agents) { expert_static_agent_indices[env->expert_static_agent_count] = i; env->expert_static_agent_count++; env->agents[i].mark_as_expert = 1; diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 4c66f205d8..c072368089 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -1071,6 +1071,17 @@ def process_all_maps( if not success: print(f" {name}: {error}") + # Write manifest.json mapping each bin to its source JSON + manifest = {} + for i, map_path, binary_path, *_ in tasks: + _, _, success, _ = results[i] + if success: + manifest[f"map_{i:03d}.bin"] = map_path.name + manifest_path = binary_dir / "manifest.json" + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + print(f"Wrote manifest to {manifest_path} ({len(manifest)} entries)") + def test_performance(timeout=10, atn_cache=1024, num_agents=1024): import time diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c index 22f2706367..5ff6ff1d65 100644 --- a/pufferlib/ocean/drive/visualize.c +++ b/pufferlib/ocean/drive/visualize.c @@ -193,11 +193,12 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers, int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown, - const char *output_agent, int num_maps, int zoom_in) { + const char *output_agent, int num_maps, int zoom_in, const char *ini_file) { // Parse configuration from INI file env_init_config conf = {0}; - const char *ini_file = "pufferlib/config/ocean/drive.ini"; + if (ini_file == NULL) + ini_file = "pufferlib/config/ocean/drive.ini"; if (ini_parse(ini_file, handler, &conf) < 0) { fprintf(stderr, "Error: Could not load %s. Cannot determine environment configuration.\n", ini_file); return -1; @@ -350,25 +351,29 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o char filename_topdown[256]; char filename_agent[256]; - if (output_topdown != NULL && output_agent != NULL) { - strcpy(filename_topdown, output_topdown); - strcpy(filename_agent, output_agent); - } else { - char policy_base[256]; - strcpy(policy_base, policy_name); - *strrchr(policy_base, '.') = '\0'; + char policy_base[256]; + strcpy(policy_base, policy_name); + *strrchr(policy_base, '.') = '\0'; - char map[256]; - strcpy(map, basename((char *)map_name)); - *strrchr(map, '.') = '\0'; + char map[256]; + strcpy(map, basename((char *)map_name)); + *strrchr(map, '.') = '\0'; - char video_dir[256]; - sprintf(video_dir, "%s/video", policy_base); - char mkdir_cmd[512]; - snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir); - system(mkdir_cmd); + char video_dir[256]; + sprintf(video_dir, "%s/video", policy_base); + char mkdir_cmd[512]; + snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir); + system(mkdir_cmd); + if (output_topdown != NULL) { + strcpy(filename_topdown, output_topdown); + } else { sprintf(filename_topdown, "%s/video/%s_topdown.mp4", policy_base, map); + } + + if (output_agent != NULL) { + strcpy(filename_agent, output_agent); + } else { sprintf(filename_agent, "%s/video/%s_agent.mp4", policy_base, map); } @@ -555,6 +560,6 @@ int main(int argc, char *argv[]) { } eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown, - output_agent, num_maps, zoom_in); + output_agent, num_maps, zoom_in, ini_file); return 0; } diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 58e7a8cfcc..462b8fec20 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -60,6 +60,10 @@ import multiprocessing +import copy +import traceback +import gc + signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0)) # Assume advantage kernel has been built if CUDA compiler is available @@ -564,10 +568,20 @@ def train(self): ): self._run_safe_eval() + behaviours_eval_enabled = self.config.get("eval", {}).get("driving_behaviours_eval", False) + behaviours_eval_interval = int( + self.config.get("eval", {}).get("driving_behaviours_eval_interval", self.eval_interval) + ) + if ( + is_main + and behaviours_eval_enabled + and behaviours_eval_interval > 0 + and (self.epoch % behaviours_eval_interval == 0 or done_training) + ): + self._run_driving_behaviours_eval() + def _run_safe_eval(self): - """Run safe eval in-process using SafeEvaluator.""" - import copy - import traceback + """Run safe eval in-process using SafeEvaluator, then render videos.""" vecenv = None try: @@ -609,6 +623,111 @@ def _run_safe_eval(self): except Exception: pass + def _run_driving_behaviours_eval(self): + """Run serial driving behaviours evals across all 5 classes, then render videos.""" + behaviours_config = self.config.get("driving_behaviours_eval") + if not behaviours_config: + print("DrivingBehavioursEval: no config loaded, skipping.") + return + + from pufferlib.ocean.benchmark.evaluator import DrivingBehavioursEvaluator + + env_name = self.config["env"] + evaluator = DrivingBehavioursEvaluator( + env_name=env_name, + behaviours_config=behaviours_config, + device=self.config["device"], + logger=self.logger, + ) + print(f"DrivingBehavioursEval: loaded config for {len(evaluator.classes)} classes") + + # Saving checkpoint even though it's not checkpoint interval because eval in subprocess needs it + self.save_checkpoint() + + all_results = {} + num_ran = 0 + + # Evaluate on all driving behaviour classes via subprocess (load_env in training process causes OOM, need to investigate further) + for class_name, class_cfg in evaluator.classes: + if not class_cfg.get("human_replay_eval", False): + continue + short = class_name[len(DrivingBehavioursEvaluator.EVAL_SECTIONS_PREFIX) :] + self.msg = f"Running driving behaviours eval: {short}..." + results = pufferlib.utils.run_driving_behaviour_class_eval_in_subprocess( + config=self.config, + class_name=class_name, + class_cfg=class_cfg, + reward_config=evaluator.reward_config, + logger=self.logger, + global_step=self.global_step, + ) + if results: + all_results[class_name] = results + num_ran += 1 + + # Render a video for each driving behaviour class using the new rollout_loop pipeline + for class_name, class_cfg in evaluator.classes: + if not class_cfg.get("render_eval", False): + continue + short = class_name[len(DrivingBehavioursEvaluator.EVAL_SECTIONS_PREFIX) :] + map_dir = class_cfg.get("map_dir", "") + if isinstance(map_dir, str): + map_dir = map_dir.strip('"') + try: + from pufferlib.ocean.drive.rollout import RenderContext, rollout_loop + from pufferlib.ocean.drive.drive import RenderView + import copy as _copy + + render_cfg = _copy.deepcopy(self.full_args) + render_cfg["env"]["map_dir"] = map_dir + render_cfg["env"]["control_mode"] = "control_sdc_only" + render_cfg["env"]["init_mode"] = "create_all_valid" + episode_length = int(class_cfg.get("episode_length", 91)) + render_cfg["env"]["episode_length"] = episode_length + render_cfg["env"]["resample_frequency"] = 0 + render_cfg["env"]["render_mode"] = 1 + render_cfg["vec"] = {"backend": "PufferEnv", "num_envs": 1} + + render_env = load_env(env_name, render_cfg) + try: + rollout_loop( + policy=self.uncompiled_policy, + env=render_env, + device=self.config["device"], + use_rnn=self.config.get("use_rnn", False), + max_steps=episode_length, + render_ctx=RenderContext( + view_mode=RenderView.FULL_SIM_STATE, + env_id=0, + ), + ) + finally: + render_env.close() + + # Log any produced mp4s to wandb + import glob as _glob + + video_files = _glob.glob("*.mp4") + if hasattr(self.logger, "wandb") and self.logger.wandb and video_files: + import wandb + + for p in video_files: + stem = os.path.splitext(os.path.basename(p))[0] + self.logger.wandb.log( + { + f"driving_behaviours/{short}/render": wandb.Video( + p, format="mp4", caption=f"scene_{stem}_epoch_{self.epoch}" + ) + } + ) + for p in video_files: + os.remove(p) + except Exception as e: + print(f"DrivingBehavioursEval: render failed for {short}: {e}") + traceback.print_exc() + + self.msg = f"Driving behaviours eval complete: {num_ran}/{len(evaluator.classes)} classes evaluated" + def mean_and_log(self): config = self.config for k in list(self.stats.keys()): @@ -1106,6 +1225,9 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): env_config=args.get("env", {}), eval=args.get("eval", {}), safe_eval=args.get("safe_eval", {}), + driving_behaviours_eval=args.get("driving_behaviours_eval"), + package=args.get("package"), + vec_config=args.get("vec", {}), ) if "vec" in args and "num_workers" in args["vec"]: train_config["num_workers"] = args["vec"]["num_workers"] @@ -1200,6 +1322,41 @@ def eval(env_name, args=None, vecenv=None, policy=None): vecenv.close() return results_dict + elif human_replay_enabled: + args["env"]["map_dir"] = args["eval"].get( + "hr_map_dir", args["eval"].get("map_dir", "resources/drive/binaries/training") + ) + dataset_name = args["env"]["map_dir"].split("/")[-1] + print(f"Running human replay evaluation with {dataset_name} dataset.\n") + from pufferlib.ocean.benchmark.evaluator import HumanReplayEvaluator + + backend = args["eval"].get("backend", "PufferEnv") + args["env"]["num_agents"] = args["eval"]["human_replay_num_agents"] + + args["vec"] = dict(backend=backend, num_envs=1) + args["env"]["control_mode"] = args["eval"]["human_replay_control_mode"] + args["env"]["init_mode"] = "create_all_valid" # must spawn all agents so non-SDC can follow expert trajectories + args["env"]["episode_length"] = 91 # WOMD scenario length + + vecenv = vecenv or load_env(env_name, args) + policy = policy or load_policy(args, vecenv, env_name) + + print(f"Effective number of scenarios used: {len(vecenv.driver_env.agent_offsets) - 1}") + + evaluator = HumanReplayEvaluator(args) + + # Run rollouts with human replays + results = evaluator.rollout(args, vecenv, policy) + + import json + + print("HUMAN_REPLAY_METRICS_START") + print(json.dumps(results)) + print("HUMAN_REPLAY_METRICS_END") + + vecenv.close() + return results + else: # Standard evaluation: Render backend = args["vec"]["backend"] if backend != "PufferEnv": @@ -1575,6 +1732,22 @@ def puffer_type(value): prev[subkey] = value args["train"]["use_rnn"] = args["rnn_name"] is not None + + # Load driving behaviours eval config if specified + behaviours_config_path = args.get("eval", {}).get("driving_behaviours_eval_config") + if behaviours_config_path: + behaviours_config_path = behaviours_config_path.strip('"') + if os.path.exists(behaviours_config_path): + print(f"Loading driving behaviours eval config from {behaviours_config_path}") + bp = configparser.ConfigParser() + bp.read(behaviours_config_path) + behaviours = {} + for section in bp.sections(): + behaviours[section] = {k: puffer_type(v) for k, v in bp[section].items()} + args["driving_behaviours_eval"] = behaviours + else: + print(f"Warning: driving_behaviours_eval_config not found: {behaviours_config_path}") + return args diff --git a/pufferlib/resources/drive/binaries/dense_traffic/manifest.json b/pufferlib/resources/drive/binaries/dense_traffic/manifest.json new file mode 100644 index 0000000000..857bcbea1d --- /dev/null +++ b/pufferlib/resources/drive/binaries/dense_traffic/manifest.json @@ -0,0 +1,8 @@ +{ + "map_000.bin": "tfrecord-00021-of-00150_215.json", + "map_001.bin": "tfrecord-00021-of-00150_24.json", + "map_002.bin": "tfrecord-00061-of-00150_246.json", + "map_003.bin": "tfrecord-00076-of-00150_289.json", + "map_004.bin": "tfrecord-00090-of-00150_179.json", + "map_005.bin": "tfrecord-00105-of-00150_75.json" +} diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_000.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_000.bin new file mode 100644 index 0000000000..3b5c9f556f Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_000.bin differ diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_001.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_001.bin new file mode 100644 index 0000000000..2abe7f7ce3 Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_001.bin differ diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_002.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_002.bin new file mode 100644 index 0000000000..603d68f3fe Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_002.bin differ diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_003.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_003.bin new file mode 100644 index 0000000000..4f8065e0c7 Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_003.bin differ diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_004.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_004.bin new file mode 100644 index 0000000000..c8a99730e3 Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_004.bin differ diff --git a/pufferlib/resources/drive/binaries/dense_traffic/map_005.bin b/pufferlib/resources/drive/binaries/dense_traffic/map_005.bin new file mode 100644 index 0000000000..835a2b4b56 Binary files /dev/null and b/pufferlib/resources/drive/binaries/dense_traffic/map_005.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/manifest.json b/pufferlib/resources/drive/binaries/lane_change/manifest.json new file mode 100644 index 0000000000..78a63716c9 --- /dev/null +++ b/pufferlib/resources/drive/binaries/lane_change/manifest.json @@ -0,0 +1,12 @@ +{ + "map_000.bin": "tfrecord-00001-of-00150_0.json", + "map_001.bin": "tfrecord-00028-of-00150_89.json", + "map_002.bin": "tfrecord-00035-of-00150_299.json", + "map_003.bin": "tfrecord-00048-of-00150_72.json", + "map_004.bin": "tfrecord-00063-of-00150_29.json", + "map_005.bin": "tfrecord-00066-of-00150_108.json", + "map_006.bin": "tfrecord-00083-of-00150_149.json", + "map_007.bin": "tfrecord-00090-of-00150_52.json", + "map_008.bin": "tfrecord-00113-of-00150_111.json", + "map_009.bin": "tfrecord-00147-of-00150_42.json" +} diff --git a/pufferlib/resources/drive/binaries/lane_change/map_000.bin b/pufferlib/resources/drive/binaries/lane_change/map_000.bin new file mode 100644 index 0000000000..99193881b6 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_000.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_001.bin b/pufferlib/resources/drive/binaries/lane_change/map_001.bin new file mode 100644 index 0000000000..ab92678be1 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_001.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_002.bin b/pufferlib/resources/drive/binaries/lane_change/map_002.bin new file mode 100644 index 0000000000..9087b5087a Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_002.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_003.bin b/pufferlib/resources/drive/binaries/lane_change/map_003.bin new file mode 100644 index 0000000000..570c601d6c Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_003.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_004.bin b/pufferlib/resources/drive/binaries/lane_change/map_004.bin new file mode 100644 index 0000000000..2902691db3 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_004.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_005.bin b/pufferlib/resources/drive/binaries/lane_change/map_005.bin new file mode 100644 index 0000000000..686afee43b Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_005.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_006.bin b/pufferlib/resources/drive/binaries/lane_change/map_006.bin new file mode 100644 index 0000000000..de9ea20536 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_006.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_007.bin b/pufferlib/resources/drive/binaries/lane_change/map_007.bin new file mode 100644 index 0000000000..f52e107a9c Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_007.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_008.bin b/pufferlib/resources/drive/binaries/lane_change/map_008.bin new file mode 100644 index 0000000000..706e13f35e Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_008.bin differ diff --git a/pufferlib/resources/drive/binaries/lane_change/map_009.bin b/pufferlib/resources/drive/binaries/lane_change/map_009.bin new file mode 100644 index 0000000000..0a8b06b1cf Binary files /dev/null and b/pufferlib/resources/drive/binaries/lane_change/map_009.bin differ diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/manifest.json b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/manifest.json new file mode 100644 index 0000000000..8ab2f91726 --- /dev/null +++ b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/manifest.json @@ -0,0 +1,5 @@ +{ + "map_000.bin": "tfrecord-00002-of-00150_244.json", + "map_001.bin": "tfrecord-00003-of-00150_94.json", + "map_002.bin": "tfrecord-00048-of-00150_72.json" +} diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_000.bin b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_000.bin new file mode 100644 index 0000000000..8fa1ae3136 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_000.bin differ diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_001.bin b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_001.bin new file mode 100644 index 0000000000..9d4743ef29 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_001.bin differ diff --git a/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_002.bin b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_002.bin new file mode 100644 index 0000000000..1632628ef0 Binary files /dev/null and b/pufferlib/resources/drive/binaries/lead_vehicle_interaction/map_002.bin differ diff --git a/pufferlib/resources/drive/binaries/obstacles/manifest.json b/pufferlib/resources/drive/binaries/obstacles/manifest.json new file mode 100644 index 0000000000..092fcb9a7c --- /dev/null +++ b/pufferlib/resources/drive/binaries/obstacles/manifest.json @@ -0,0 +1,3 @@ +{ + "map_000.bin": "tfrecord-00042-of-00150_260.json" +} diff --git a/pufferlib/resources/drive/binaries/obstacles/map_000.bin b/pufferlib/resources/drive/binaries/obstacles/map_000.bin new file mode 100644 index 0000000000..c71166378c Binary files /dev/null and b/pufferlib/resources/drive/binaries/obstacles/map_000.bin differ diff --git a/pufferlib/resources/drive/binaries/vru_interaction/manifest.json b/pufferlib/resources/drive/binaries/vru_interaction/manifest.json new file mode 100644 index 0000000000..1c036af9e6 --- /dev/null +++ b/pufferlib/resources/drive/binaries/vru_interaction/manifest.json @@ -0,0 +1,4 @@ +{ + "map_000.bin": "tfrecord-00010-of-00150_84.json", + "map_001.bin": "tfrecord-00147-of-00150_42.json" +} diff --git a/pufferlib/resources/drive/binaries/vru_interaction/map_000.bin b/pufferlib/resources/drive/binaries/vru_interaction/map_000.bin new file mode 100644 index 0000000000..3dbc4e9491 Binary files /dev/null and b/pufferlib/resources/drive/binaries/vru_interaction/map_000.bin differ diff --git a/pufferlib/resources/drive/binaries/vru_interaction/map_001.bin b/pufferlib/resources/drive/binaries/vru_interaction/map_001.bin new file mode 100644 index 0000000000..0952bfd79d Binary files /dev/null and b/pufferlib/resources/drive/binaries/vru_interaction/map_001.bin differ diff --git a/pufferlib/utils.py b/pufferlib/utils.py index 9c6c5185ce..82a06f29f3 100644 --- a/pufferlib/utils.py +++ b/pufferlib/utils.py @@ -62,6 +62,102 @@ def generate_safe_eval_ini(safe_eval_config, base_ini_path="pufferlib/config/oce return tmp_path +def run_driving_behaviour_class_eval_in_subprocess(config, class_name, class_cfg, reward_config, logger, global_step): + """ + Run a single driving behaviour class eval in a subprocess via human replay. + Uses the latest checkpoint and passes class-specific map_dir and reward bounds as CLI args. + Logs results to wandb under driving_behaviours//. + """ + EVAL_SECTIONS_PREFIX = "eval_" + try: + run_id = logger.run_id + model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}") + model_files = glob.glob(os.path.join(model_dir, "model_*.pt")) + + if not model_files: + print(f"[DrivingBehavioursEval] No model files found, skipping {class_name}") + return {} + + latest_cpt = max(model_files, key=os.path.getctime) + + map_dir = class_cfg.get("map_dir", "") + if isinstance(map_dir, str): + map_dir = map_dir.strip('"') + + # control_sdc_only needs exactly 1 agent per sub-env (map). + available_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) if os.path.isdir(map_dir) else 1 + + cmd = [ + sys.executable, + "-m", + "pufferlib.pufferl", + "eval", + config["env"], + "--load-model-path", + latest_cpt, + "--eval.wosac-realism-eval", + "False", + "--eval.human-replay-eval", + "True", + "--eval.human-replay-control-mode", + "control_sdc_only", + "--eval.hr-map-dir", + map_dir, + # Load exactly the maps available in the eval set. + "--env.num-maps", + str(available_maps), + # One agent per map (control_sdc_only) avoids OOM and matches coverage. + "--eval.human-replay-num-agents", + str(available_maps), + "--env.resample-frequency", + "0", + ] + + # Pass safe reward conditioning: set both min and max to the eval value. + # Use --flag=value form so argparse doesn't mistake negative numbers for flags. + for key, val in reward_config.items(): + flag = f"--env.reward-bound-{key.replace('_', '-')}" + cmd += [f"{flag}-min={val}", f"{flag}-max={val}"] + + print(f"[DrivingBehavioursEval] Running eval for {class_name}") + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, cwd=os.getcwd()) + + if result.returncode != 0: + print( + f"[DrivingBehavioursEval] Subprocess failed for {class_name} " + f"(exit {result.returncode}):\n{result.stderr}" + ) + return {} + + stdout = result.stdout + if "HUMAN_REPLAY_METRICS_START" not in stdout or "HUMAN_REPLAY_METRICS_END" not in stdout: + print(f"[DrivingBehavioursEval] No metrics found in subprocess output for {class_name}") + return {} + + start = stdout.find("HUMAN_REPLAY_METRICS_START") + len("HUMAN_REPLAY_METRICS_START") + end = stdout.find("HUMAN_REPLAY_METRICS_END") + metrics = json.loads(stdout[start:end].strip()) + + short = class_name[len(EVAL_SECTIONS_PREFIX) :] + print(f"[DrivingBehavioursEval] {short}: {metrics}") + + if hasattr(logger, "wandb") and logger.wandb: + payload = {f"driving_behaviours/{short}/{k}": float(v) for k, v in metrics.items()} + if global_step is not None: + payload["train_step"] = global_step + logger.wandb.log(payload) + + return metrics + + except subprocess.TimeoutExpired: + print(f"[DrivingBehavioursEval] Subprocess timed out for {class_name}") + return {} + except Exception as e: + print(f"[DrivingBehavioursEval] Failed for {class_name}: {e}") + return {} + + def run_wosac_eval_in_subprocess(config, logger, global_step): """ Run WOSAC evaluation in a subprocess and log metrics to wandb. @@ -158,3 +254,179 @@ def run_wosac_eval_in_subprocess(config, logger, global_step): print(f"WOSAC evaluation ran out of memory. Skipping this evaluation: {e}") except Exception as e: print(f"Failed to run WOSAC evaluation: {type(e).__name__}: {e}") + + +def render_videos( + config, + run_id, + wandb_log, + epoch, + global_step, + bin_path, + render_async, + render_queue=None, + wandb_run=None, + config_path=None, + wandb_prefix="render", + num_maps=None, + map_dir=None, +): + """Generate and log training videos using C-based rendering.""" + if not os.path.exists(bin_path): + print(f"Binary weights file does not exist: {bin_path}") + return + + model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}") + + # Now call the C rendering function + try: + # Create output directory for videos + video_output_dir = os.path.join(model_dir, "videos") + os.makedirs(video_output_dir, exist_ok=True) + + # TODO: Fix memory leaks so that this is not needed + # Suppress AddressSanitizer exit code (temp) + env_vars = os.environ.copy() + env_vars["ASAN_OPTIONS"] = "exitcode=0" + + # Base command with only visualization flags (env config comes from INI) + base_cmd = ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"] + + if config_path: + base_cmd.extend(["--config", config_path]) + + # Visualization config flags only + if config.get("show_grid", False): + base_cmd.append("--show-grid") + if config.get("obs_only", False): + base_cmd.append("--obs-only") + if config.get("show_lasers", False): + base_cmd.append("--lasers") + if config.get("show_human_logs", False): + base_cmd.append("--log-trajectories") + if config.get("zoom_in", False): + base_cmd.append("--zoom-in") + + # Frame skip for rendering performance + frame_skip = config.get("frame_skip", 1) + if frame_skip > 1: + base_cmd.extend(["--frame-skip", str(frame_skip)]) + + # View mode + view_mode = config.get("view_mode", "both") + base_cmd.extend(["--view", view_mode]) + + if num_maps: + base_cmd.extend(["--num-maps", str(num_maps)]) + + base_cmd.extend(["--policy-name", bin_path]) + + # Handle single or multiple map rendering + render_maps = config.get("render_map", None) + if render_maps is None or render_maps == "none": + pass # use map_dir passed as parameter + if map_dir and os.path.isdir(map_dir): + bin_files = [f for f in os.listdir(map_dir) if f.endswith(".bin")] + if bin_files: + render_maps = [os.path.join(map_dir, random.choice(bin_files))] + else: + print(f"Warning: No .bin files found in {map_dir}, skipping render") + return + else: + print(f"Warning: map_dir not found or invalid ({map_dir}), skipping render") + return + elif isinstance(render_maps, (str, os.PathLike)): + render_maps = [render_maps] + else: + render_maps = list(render_maps) + + generated_videos = {"output_topdown": [], "output_agent": []} + file_prefix = wandb_prefix.replace("/", "_") + output_topdown = f"resources/drive/{file_prefix}_output_topdown_{epoch}" + output_agent = f"resources/drive/{file_prefix}_output_agent_{epoch}" + + for i, map_path in enumerate(render_maps): + cmd = list(base_cmd) # copy + if os.path.exists(map_path): + cmd.extend(["--map-name", str(map_path)]) + + output_topdown_map = output_topdown + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4") + output_agent_map = output_agent + (f"_map{i:02d}.mp4" if len(render_maps) > 1 else ".mp4") + + cmd.extend(["--output-topdown", output_topdown_map]) + cmd.extend(["--output-agent", output_agent_map]) + + print(f"Running render: {' '.join(cmd[:6])}...") + result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=1200, env=env_vars) + + vids_exist = os.path.exists(output_topdown_map) and os.path.exists(output_agent_map) + print(f"Render exit code: {result.returncode}, vids_exist: {vids_exist}") + if result.returncode != 0 and result.stderr: + print(f"Render stderr: {result.stderr[-500:]}") + + if result.returncode == 0 or (result.returncode == 1 and vids_exist): + videos = [ + ("output_topdown", output_topdown_map, f"epoch_{epoch:06d}_map{i:02d}_topdown.mp4"), + ("output_agent", output_agent_map, f"epoch_{epoch:06d}_map{i:02d}_agent.mp4"), + ] + + for vid_type, source_vid, target_filename in videos: + if os.path.exists(source_vid): + target_path = os.path.join(video_output_dir, target_filename) + shutil.move(source_vid, target_path) + generated_videos[vid_type].append(target_path) + else: + print(f"Video generation completed but {source_vid} not found") + if result.stdout: + print(f"StdOUT: {result.stdout}") + if result.stderr: + print(f"StdERR: {result.stderr}") + else: + print(f"C rendering failed (map index {i}) with exit code {result.returncode}: {result.stderr}") + + if render_async: + render_queue.put( + { + "videos": generated_videos, + "step": global_step, + "prefix": wandb_prefix, + } + ) + elif wandb_log and wandb_run: + import wandb + + payload = {} + if generated_videos["output_topdown"]: + payload[f"{wandb_prefix}/world_state"] = [ + wandb.Video(p, format="mp4") for p in generated_videos["output_topdown"] + ] + if generated_videos["output_agent"]: + payload[f"{wandb_prefix}/agent_view"] = [ + wandb.Video(p, format="mp4") for p in generated_videos["output_agent"] + ] + if payload: + print(f"Logging {len(payload)} video keys to wandb: {list(payload.keys())}") + payload["train_step"] = global_step + wandb_run.log(payload) + + except subprocess.TimeoutExpired: + print("C rendering timed out") + except Exception as e: + print(f"Failed to render videos: {e}") + + +def render_videos_and_cleanup(cleanup_files=None, **render_kwargs): + """Wrapper that runs render_videos then cleans up temp files. + + Intended as the target for multiprocessing.Process so that temp files + (bin weights, generated INI) are cleaned up inside the spawned process. + """ + try: + render_videos(**render_kwargs) + finally: + for f in cleanup_files or []: + try: + if os.path.exists(f): + os.remove(f) + except OSError: + pass diff --git a/pufferlib/vector.py b/pufferlib/vector.py index 27001df0e3..0b30d00e34 100644 --- a/pufferlib/vector.py +++ b/pufferlib/vector.py @@ -41,14 +41,14 @@ def send_precheck(vecenv, actions): def reset(vecenv, seed=42): vecenv.async_reset(seed) - obs, rewards, terminals, truncations, infos, env_ids, masks = vecenv.recv() + obs, rewards, terminals, truncations, infos, env_ids, masks, is_invalid_step = vecenv.recv() return obs, infos def step(vecenv, actions): actions = np.asarray(actions) vecenv.send(actions) - obs, rewards, terminals, truncations, infos, env_ids, masks = vecenv.recv() + obs, rewards, terminals, truncations, infos, env_ids, masks, is_invalid_step = vecenv.recv() return obs, rewards, terminals, truncations, infos # include env_ids or no? @@ -549,6 +549,8 @@ def close(self): self.driver_env.close() for p in self.processes: p.terminate() + for p in self.processes: + p.join(timeout=5) class Ray: diff --git a/scripts/render_behavior_evals.sh b/scripts/render_behavior_evals.sh new file mode 100755 index 0000000000..b740c23175 --- /dev/null +++ b/scripts/render_behavior_evals.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# Render behavior eval videos locally using the visualize binary. +# +# Usage: +# bash scripts/render_behavior_evals.sh [output_dir] [view_mode] +# +# Example: +# bash scripts/render_behavior_evals.sh resources/drive/seed99_003815.bin behavior_eval_videos both + +set -euo pipefail + +POLICY_NAME="${1:?Usage: $0 [output_dir] [view_mode]}" +OUTPUT_DIR="${2:-behavior_eval_videos}" +VIEW_MODE="${3:-both}" + +BEHAVIOR_INI="pufferlib/config/ocean/driving_behaviours_eval.ini" +BASE_INI="pufferlib/config/ocean/drive.ini" + +if [ ! -f "$POLICY_NAME" ]; then + echo "Error: policy file not found: $POLICY_NAME" + exit 1 +fi +if [ ! -f ./visualize ]; then + echo "Error: visualize binary not found. Run: bash scripts/build_ocean.sh visualize fast" + exit 1 +fi + +mkdir -p "$OUTPUT_DIR" + +# Write reward bound sed commands to a file (avoids shell word-splitting issues) +REWARD_SED_FILE="/tmp/behavior_eval_rewards_$$.sed" +.venv/bin/python3 -c " +import configparser, sys +cp = configparser.ConfigParser(comment_prefixes=(';','#'), inline_comment_prefixes=(';','#')) +cp.read('$BEHAVIOR_INI') +if not cp.has_section('eval_driving_rewards'): + sys.exit(0) +for key, val in cp.items('eval_driving_rewards'): + print(f's/reward_bound_{key}_min = .*/reward_bound_{key}_min = {val}/') + print(f's/reward_bound_{key}_max = .*/reward_bound_{key}_max = {val}/') +" > "$REWARD_SED_FILE" + +# Build a temp INI from drive.ini with control_sdc_only + create_all_valid + pinned rewards +make_render_ini() { + local map_dir="$1" + local tmp_ini="/tmp/behavior_eval_render_$$.ini" + + sed -e 's/control_mode = .*/control_mode = "control_sdc_only"/' \ + -e 's/init_mode = .*/init_mode = "create_all_valid"/' \ + -e "s|map_dir = .*|map_dir = \"${map_dir}\"|" \ + "$BASE_INI" | sed -f "$REWARD_SED_FILE" > "$tmp_ini" + + echo "$tmp_ini" +} + +# Extract class names (skip eval_driving_rewards) +CLASSES=$(grep -E '^\[eval_' "$BEHAVIOR_INI" | grep -v 'eval_driving_rewards' | sed 's/\[eval_//;s/\]//') + +for CLASS in $CLASSES; do + MAP_DIR=$(grep -A5 "^\[eval_${CLASS}\]" "$BEHAVIOR_INI" | grep 'map_dir' | sed 's/.*= *//;s/"//g' | tr -d ' ') + + if [ ! -d "$MAP_DIR" ]; then + echo "Skipping $CLASS: $MAP_DIR not found" + continue + fi + + MAPS=$(find "$MAP_DIR" -name '*.bin' | sort) + NUM_MAPS=$(echo "$MAPS" | wc -l | tr -d ' ') + + echo "" + echo "=== $CLASS: $NUM_MAPS maps in $MAP_DIR ===" + + RENDER_INI=$(make_render_ini "$MAP_DIR") + + i=0 + for MAP in $MAPS; do + i=$((i + 1)) + MAP_BASE=$(basename "$MAP" .bin) + + OUT_TD="$OUTPUT_DIR/${CLASS}_${MAP_BASE}_topdown.mp4" + OUT_AG="$OUTPUT_DIR/${CLASS}_${MAP_BASE}_agent.mp4" + + printf " [%d/%d] %s... " "$i" "$NUM_MAPS" "$(basename "$MAP")" + + if ./visualize \ + --config "$RENDER_INI" \ + --map-name "$MAP" \ + --policy-name "$POLICY_NAME" \ + --view "$VIEW_MODE" \ + --output-topdown "$OUT_TD" \ + --output-agent "$OUT_AG" \ + > /dev/null 2>&1; then + echo "OK" + else + echo "FAILED" + fi + done + + rm -f "$RENDER_INI" +done + +rm -f "$REWARD_SED_FILE" + +echo "" +echo "Done. Videos in $OUTPUT_DIR/"