diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 1aa203119e..1c10b5f71c 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -188,6 +188,14 @@ wosac_aggregate_results = True human_replay_eval = False ; Control only the self-driving car human_replay_control_mode = "control_sdc_only" +; If True, render evaluation videos at eval_render_interval +eval_render_enabled = False +; How often to render evaluation videos (in epochs) +eval_render_interval = 1000 +; Number of maps to render during evaluation +eval_render_num_maps = 3 +; Show expert trajectories in evaluation renders +eval_render_show_human_logs = True [render] ; Mode to render a bunch of maps with a given policy diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 519bf5cd98..c32d1f046d 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -1518,7 +1518,8 @@ void set_active_agents(Drive *env) { static_agent_indices[env->static_agent_count] = i; env->static_agent_count++; env->agents[i].active_agent = 0; - if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) { + if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 || + env->active_agent_count == env->num_agents) { expert_static_agent_indices[env->expert_static_agent_count] = i; env->expert_static_agent_count++; env->agents[i].mark_as_expert = 1; diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c index b9183b45c1..1e3aaa6e5c 100644 --- a/pufferlib/ocean/drive/visualize.c +++ b/pufferlib/ocean/drive/visualize.c @@ -101,6 +101,32 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int Vector3 prev_point = {0}; bool has_prev = false; + Agent *agent = &env->agents[idx]; + for (int j = 0; j < agent->trajectory_length; j++) { + float x = agent->log_trajectory_x[j]; + float y = agent->log_trajectory_y[j]; + float valid = agent->log_valid[j]; + + if (!valid) { + has_prev = false; + continue; + } + + Vector3 curr_point = {x, y, 0.5f}; + + if (has_prev) { + DrawLine3D(prev_point, curr_point, Fade(LIGHTGREEN, 0.6f)); + } + + prev_point = curr_point; + has_prev = true; + } + } + for (int i = 0; i < env->expert_static_agent_count; i++) { + int idx = env->expert_static_agent_indices[i]; + Vector3 prev_point = {0}; + bool has_prev = false; + Agent *agent = &env->agents[idx]; for (int j = 0; j < agent->trajectory_length; j++) { float x = agent->log_trajectory_x[j]; @@ -193,7 +219,7 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers, int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown, - const char *output_agent, int num_maps, int zoom_in) { + const char *output_agent, int num_maps, int zoom_in, const char *control_mode_override) { // Parse configuration from INI file env_init_config conf = {0}; @@ -228,6 +254,22 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o } fclose(policy_file); + // Override control_mode if specified via CLI + int control_mode_int = conf.control_mode; + if (control_mode_override != NULL) { + if (strcmp(control_mode_override, "control_vehicles") == 0) { + control_mode_int = 0; + } else if (strcmp(control_mode_override, "control_agents") == 0) { + control_mode_int = 1; + } else if (strcmp(control_mode_override, "control_wosac") == 0) { + control_mode_int = 2; + } else if (strcmp(control_mode_override, "control_sdc_only") == 0) { + control_mode_int = 3; + } else { + fprintf(stderr, "Warning: Unknown control mode '%s', using config value\n", control_mode_override); + } + } + // Initialize environment with all config values from INI [env] section Drive env = { .action_type = conf.action_type, @@ -253,7 +295,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o .offroad_behavior = conf.offroad_behavior, .init_steps = conf.init_steps, .init_mode = conf.init_mode, - .control_mode = conf.control_mode, + .control_mode = control_mode_int, // Use overridden or config value .reward_bounds = { {conf.reward_bound_goal_radius_min, conf.reward_bound_goal_radius_max}, @@ -275,7 +317,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o }, .map_name = (char *)map_name, }; - + printf("Control Mode : %.2d\n", control_mode_int); allocate(&env); // Check if map has any active agents @@ -323,6 +365,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o Weights *weights = load_weights(policy_name); printf("Active agents in map: %d\n", env.active_agent_count); + printf("Static expert agents in the map :%d\n", env.expert_static_agent_count); DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning); int frame_count = env.episode_length > 0 ? env.episode_length : TRAJECTORY_LENGTH_DEFAULT; @@ -446,6 +489,7 @@ int main(int argc, char *argv[]) { int frame_skip = 1; int zoom_in = 0; const char *view_mode = "both"; + const char *control_mode_override = NULL; // File paths and num_maps (not in [env] section) const char *map_name = NULL; @@ -518,10 +562,18 @@ int main(int argc, char *argv[]) { num_maps = atoi(argv[i + 1]); i++; } + } else if (strcmp(argv[i], "--control-mode") == 0) { + if (i + 1 < argc) { + control_mode_override = argv[i + 1]; + i++; + } else { + fprintf(stderr, "Error: --control-mode option requires a value\n"); + return 1; + } } } eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown, - output_agent, num_maps, zoom_in); + output_agent, num_maps, zoom_in, control_mode_override); return 0; } diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 1c80de553b..15257cd88b 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -587,6 +587,69 @@ def train(self): ): pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step) + # Render evaluation videos showing policy controlling only SDC with human replays + if self.config["eval"].get("eval_render_enabled", False) and ( + self.epoch % self.config["eval"].get("eval_render_interval", 1000) == 0 or done_training + ): + model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}") + model_files = glob.glob(os.path.join(model_dir, "model_*.pt")) + + if model_files: + # Take the latest checkpoint + latest_cpt = max(model_files, key=os.path.getctime) + bin_path = f"{model_dir}.bin" + + # Export to .bin for rendering with raylib + try: + export_args = {"env_name": self.config["env"], "load_model_path": latest_cpt, **self.config} + + export( + args=export_args, + env_name=self.config["env"], + vecenv=self.vecenv, + policy=self.uncompiled_policy, + path=bin_path, + silent=True, + ) + + bin_path_epoch = f"{model_dir}_epoch_{self.epoch:06d}.bin" + shutil.copy2(bin_path, bin_path_epoch) + + env_cfg = getattr(self.vecenv, "driver_env", None) + wandb_log = True if hasattr(self.logger, "wandb") and self.logger.wandb else False + wandb_run = self.logger.wandb if hasattr(self.logger, "wandb") else None + if self.render_async: + render_proc = multiprocessing.Process( + target=pufferlib.utils.render_eval_videos, + args=( + self.config, + env_cfg, + self.logger.run_id, + wandb_log, + self.epoch, + self.global_step, + bin_path, + self.render_async, + self.render_queue, + ), + ) + render_proc.start() + self.render_processes.append(render_proc) + else: + pufferlib.utils.render_eval_videos( + self.config, + env_cfg, + self.logger.run_id, + wandb_log, + self.epoch, + self.global_step, + bin_path, + self.render_async, + wandb_run=wandb_run, + ) + except Exception as e: + print(f"Failed to render evaluation videos: {e}") + def check_render_queue(self): """Check if any async render jobs finished and log them.""" if not self.render_async or not hasattr(self, "render_queue"): @@ -597,21 +660,39 @@ def check_render_queue(self): result = self.render_queue.get_nowait() step = result["step"] videos = result["videos"] + is_eval = result.get("eval", False) # Check if these are eval videos # Log to wandb if available if hasattr(self.logger, "wandb") and self.logger.wandb: import wandb payload = {} - if videos["output_topdown"]: - payload["render/world_state"] = [wandb.Video(p, format="mp4") for p in videos["output_topdown"]] - if videos["output_agent"]: - payload["render/agent_view"] = [wandb.Video(p, format="mp4") for p in videos["output_agent"]] + if is_eval: + # Use eval_render namespace for eval videos + if videos.get("output_topdown"): + payload["eval_render/world_state"] = [ + wandb.Video(p, format="mp4") for p in videos["output_topdown"] + ] + if videos.get("output_agent"): + payload["eval_render/agent_view"] = [ + wandb.Video(p, format="mp4") for p in videos["output_agent"] + ] + else: + # Use render namespace for training videos + if videos.get("output_topdown"): + payload["render/world_state"] = [ + wandb.Video(p, format="mp4") for p in videos["output_topdown"] + ] + if videos.get("output_agent"): + payload["render/agent_view"] = [ + wandb.Video(p, format="mp4") for p in videos["output_agent"] + ] if payload: # Custom step for render logs to prevent monotonic logic wandb errors payload["render_step"] = step self.logger.wandb.log(payload) + print(f"Logged async {'eval ' if is_eval else ''}render videos to wandb (step {step})") except queue.Empty: pass @@ -1693,7 +1774,7 @@ def render_task(map_path): if render_configs.get("show_lasers", False): cmd.append("--lasers") if render_configs.get("show_human_logs", False): - cmd.append("--show-human-logs") + cmd.append("--log-trajectories") if render_configs.get("zoom_in", False): cmd.append("--zoom-in") cmd.extend(["--view", view_mode]) diff --git a/pufferlib/utils.py b/pufferlib/utils.py index 1f2ccd5142..b403499eee 100644 --- a/pufferlib/utils.py +++ b/pufferlib/utils.py @@ -36,8 +36,6 @@ def run_human_replay_eval_in_subprocess(config, logger, global_step): "False", "--eval.human-replay-eval", "True", - "--eval.human-replay-num-agents", - str(eval_config["human_replay_num_agents"]), "--eval.human-replay-control-mode", str(eval_config["human_replay_control_mode"]), ] @@ -212,7 +210,7 @@ def render_videos( if config.get("show_lasers", False): base_cmd.append("--lasers") if config.get("show_human_logs", False): - base_cmd.append("--show-human-logs") + base_cmd.append("--log-trajectories") if config.get("zoom_in", False): base_cmd.append("--zoom-in") @@ -343,3 +341,171 @@ def render_videos( # Clean up bin weights file if os.path.exists(bin_path): os.remove(bin_path) + + +def render_eval_videos( + config, env_cfg, run_id, wandb_log, epoch, global_step, bin_path, render_async, render_queue=None, wandb_run=None +): + if not os.path.exists(bin_path): + print(f"Binary weights file does not exist: {bin_path}") + return + + model_dir = os.path.join(config["data_dir"], f"{config['env']}_{run_id}") + eval_config = config.get("eval", {}) + + try: + # Create output directory for eval videos + eval_video_output_dir = os.path.join(model_dir, "eval_videos") + os.makedirs(eval_video_output_dir, exist_ok=True) + + # Suppress AddressSanitizer exit code (temp) + env_vars = os.environ.copy() + env_vars["ASAN_OPTIONS"] = "exitcode=0" + + # Base command for eval rendering + base_cmd = ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"] + + # IMPORTANT: Override control mode to ensure control_sdc_only for eval renders + eval_control_mode = eval_config.get("human_replay_control_mode", "control_sdc_only") + base_cmd.extend(["--control-mode", eval_control_mode]) + + # Visualization flags for eval + if eval_config.get("show_grid", True): + base_cmd.append("--show-grid") + if eval_config.get("obs_only", True): + base_cmd.append("--obs-only") + if eval_config.get("show_lasers", False): + base_cmd.append("--lasers") + if eval_config.get("eval_render_show_human_logs", True): + base_cmd.append("--log-trajectories") + if eval_config.get("zoom_in", True): + base_cmd.append("--zoom-in") + + # Frame skip for rendering performance + frame_skip = eval_config.get("frame_skip", 1) + if frame_skip > 1: + base_cmd.extend(["--frame-skip", str(frame_skip)]) + + # View mode: both topdown and agent views + base_cmd.extend(["--view", "both"]) + + # Get num_maps if available + if env_cfg is not None and getattr(env_cfg, "num_maps", None): + base_cmd.extend(["--num-maps", str(env_cfg.num_maps)]) + + base_cmd.extend(["--policy-name", bin_path]) + + # Get eval maps + num_eval_maps = eval_config.get("eval_render_num_maps", 3) + eval_map_dir = eval_config.get("map_dir", "resources/drive/binaries/training") + + if os.path.isdir(eval_map_dir): + import random + + bin_files = [f for f in os.listdir(eval_map_dir) if f.endswith(".bin")] + if bin_files: + # Select random maps for eval rendering + num_to_select = min(num_eval_maps, len(bin_files)) + selected_maps = random.sample(bin_files, num_to_select) + eval_render_maps = [os.path.join(eval_map_dir, f) for f in selected_maps] + else: + print(f"Warning: No .bin files found in {eval_map_dir}, skipping eval render") + return + else: + print(f"Warning: eval map_dir not found ({eval_map_dir}), skipping eval render") + return + + # Collect videos to log + videos_to_log_world = [] + videos_to_log_agent = [] + generated_videos = {"output_topdown": [], "output_agent": []} + + for i, map_path in enumerate(eval_render_maps): + cmd = list(base_cmd) # copy + cmd.extend(["--map-name", str(map_path)]) + + # Output paths with eval prefix + map_basename = os.path.basename(map_path).replace(".bin", "") + output_topdown_map = f"resources/drive/eval_topdown_{epoch}_map{i:02d}_{map_basename}.mp4" + output_agent_map = f"resources/drive/eval_agent_{epoch}_map{i:02d}_{map_basename}.mp4" + + cmd.extend(["--output-topdown", output_topdown_map]) + cmd.extend(["--output-agent", output_agent_map]) + + print(f"Rendering eval video {i + 1}/{len(eval_render_maps)}: {map_basename}") + result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=600, env=env_vars) + + vids_exist = os.path.exists(output_topdown_map) and os.path.exists(output_agent_map) + + if result.returncode == 0 or (result.returncode == 1 and vids_exist): + videos = [ + ( + "output_topdown", + output_topdown_map, + f"eval_epoch_{epoch:06d}_map{i:02d}_{map_basename}_topdown.mp4", + ), + ( + "output_agent", + output_agent_map, + f"eval_epoch_{epoch:06d}_map{i:02d}_{map_basename}_agent.mp4", + ), + ] + + for vid_type, source_vid, target_filename in videos: + if os.path.exists(source_vid): + target_path = os.path.join(eval_video_output_dir, target_filename) + shutil.move(source_vid, target_path) + generated_videos[vid_type].append(target_path) + if render_async: + continue + # Accumulate for wandb logging + if wandb_log: + import wandb + + if "topdown" in target_filename: + videos_to_log_world.append(wandb.Video(target_path, format="mp4")) + else: + videos_to_log_agent.append(wandb.Video(target_path, format="mp4")) + else: + print(f"Eval video generation completed but {source_vid} not found") + if result.stdout: + print(f"StdOUT: {result.stdout}") + if result.stderr: + print(f"StdERR: {result.stderr}") + else: + print(f"C eval rendering failed (map index {i}) with exit code {result.returncode}") + if result.stdout: + print(f"StdOUT: {result.stdout}") + if result.stderr: + print(f"StdERR: {result.stderr}") + + if render_async: + render_queue.put( + { + "videos": generated_videos, + "step": global_step, + "eval": True, # Mark as eval videos + } + ) + + # Log all eval videos at once + if wandb_log and (videos_to_log_world or videos_to_log_agent) and not render_async: + payload = {} + if videos_to_log_world: + payload["eval_render/world_state"] = videos_to_log_world + if videos_to_log_agent: + payload["eval_render/agent_view"] = videos_to_log_agent + wandb_run.log(payload, step=global_step) + print( + f"Logged {len(videos_to_log_world)} topdown and {len(videos_to_log_agent)} agent eval videos to wandb" + ) + + except subprocess.TimeoutExpired: + print("C eval rendering timed out") + except Exception as e: + print(f"Failed to generate eval videos: {e}") + + finally: + # Clean up bin weights file + if os.path.exists(bin_path): + os.remove(bin_path)