Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 30 additions & 29 deletions config/boxoban.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,44 @@
env_name = boxoban

[vec]
total_agents = 32768
num_buffers = 4
num_threads = 1
total_agents = 4096
num_buffers = 8
num_threads = 8

[env]
num_agents = 1
difficulty = 2
difficulty = 5
int_r_coeff = 0.25
target_loss_pen_coeff = 0
max_steps = 150

[policy]
hidden_size = 512
num_layers = 3.32422
expansion_factor = 1
num_layers = 2
hidden_size = 64

[train]
gpus = 1
seed = 42
total_timesteps = 880580001
learning_rate = 0.00134234
anneal_lr = 1
min_lr_ratio = 0.37872
gamma = 0.989717
gae_lambda = 0.759273
replay_ratio = 1.6234
clip_coef = 0.01
vf_coef = 5
vf_clip_coef = 5
max_grad_norm = 1.20325
ent_coef = 0.000188411
beta1 = 0.995526
beta2 = 0.999536
eps = 1e-14
minibatch_size = 32768
horizon = 64
vtrace_rho_clip = 3.13347
vtrace_c_clip = 2.75328
prio_alpha = 0.453827
prio_beta0 = 0.765589
total_timesteps = 94_000_000
beta1 = 0.7279714073125252
beta2 = 0.9986265112492152
clip_coef = 0.6746497927896418
ent_coef = 0.0033240721522812535
eps = 0.00008339460257113628
gae_lambda = 0.948721675814334
gamma = 0.9721246598992744
learning_rate = 0.1
max_grad_norm = 1.8109182724544075
minibatch_size = 65_536
prio_alpha = 0.1
prio_beta0 = 0.8247156461060179
replay_ratio = 1.4242098997083206
vf_clip_coef = 1.2291681640124468
vf_coef = 1.2195502588297364
vtrace_c_clip = 1.0830442742115065
vtrace_rho_clip = 2.1017317041552603

[sweep]
gpus = 1
max_suggestion_cost = 10800


5 changes: 5 additions & 0 deletions ocean/boxoban/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ void my_init(Env* env, Dict* kwargs) {
env->max_steps = (int)dict_get(kwargs, "max_steps")->value;
env->int_r_coeff = (float)dict_get(kwargs, "int_r_coeff")->value;
env->target_loss_pen_coeff = (float)dict_get(kwargs, "target_loss_pen_coeff")->value;
env->curriculum_mode = (env->difficulty_id == 5);
env->curriculum_difficulty = 0;
env->largest_solved_difficulty = -1;
env->episode_maps_solved = 0;
init(env);
}

Expand All @@ -26,4 +30,5 @@ void my_log(Log* log, Dict* out) {
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);
dict_set(out, "targets_hit", log->on_targets);
dict_set(out, "final_puzzle_tick", log->puzzle_ticks);
}
87 changes: 74 additions & 13 deletions ocean/boxoban/boxoban.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct {
float episode_length; // Recommended metric: number of steps of agent episode
// Any extra fields you add here may be exported to Python in binding.c
float on_targets; // Number of targets currently boxed
float puzzle_ticks; // Steps spent on the final puzzle of the episode
float n; // Required as the last field
} Log;

Expand All @@ -48,6 +49,7 @@ typedef struct {
int size;
int num_agents;
int tick;
int puzzle_tick;
int max_steps;
int agent_x;
int agent_y;
Expand All @@ -62,11 +64,24 @@ typedef struct {
Client* client;
int win;
float episode_return;
int curriculum_mode; // 1 when using incremental difficulty mode
int curriculum_difficulty; // current active difficulty in curriculum mode
int largest_solved_difficulty; // highest difficulty solved this episode, -1 if none
int episode_maps_solved; // number of puzzles solved this episode
} Boxoban;

void ensure_map_loaded(void);

static int boxoban_configure_maps_from_env(Boxoban* env) {
if (env->difficulty_id == BOXOBAN_DIFFICULTY_INCREMENTAL) {
reset_incremental_map_cache();
if (boxoban_load_incremental_bins() != 0) {
fprintf(stderr, "Failed to load incremental Boxoban map bins\n");
return -1;
}
return 0;
}

if (env->difficulty_id == -1) {
return 0;
}
Expand Down Expand Up @@ -107,8 +122,8 @@ static inline unsigned char get_intermediate_reward_status(Boxoban *env, int x,
return env->intermediate_rewards[(y)*env->size + (x)];
}

static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) {
int idx = rand_r(&env->rng) % PUZZLE_COUNT;
static inline const uint32_t get_random_puzzle_idx(const Boxoban *env, size_t puzzle_count) {
int idx = rand_r(&env->rng) % puzzle_count;
return idx;
}

Expand All @@ -130,14 +145,29 @@ void init (Boxoban* env) {


void add_log(Boxoban* env) {
float denom = (float)env->n_boxes;
float num = (float)env->on_target;
float perf = (env->win== 1) ? 1.0f : 0.0f;
float perf;
float score;
float targets_hit = 0.0f;
if (env->n_targets > 0) {
targets_hit = (float)env->on_target / (float)env->n_targets;
}
if (env->curriculum_mode) {
score = 0.0f;
if (env->largest_solved_difficulty >= 0) {
score = (float)(env->largest_solved_difficulty + 1);
}
perf = (score + targets_hit) /
(float)(BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES + 1);
} else {
perf = (env->win == 1) ? 1.0f : 0.0f;
score = perf;
}
env->log.perf += perf;
env->log.score += perf;
env->log.score += score;
env->log.episode_length += env->tick;
env->log.episode_return += env->episode_return;
env->log.on_targets += env->on_target;
env->log.on_targets += targets_hit;
env->log.puzzle_ticks += env->puzzle_tick;
env->log.n++;
}

Expand All @@ -149,10 +179,16 @@ bool clear(Boxoban* env, int x, int y) {
return (get_entity(env, WALLS, x, y) == 0) && (get_entity(env, BOXES, x, y) == 0);
}

// Required function
void c_reset(Boxoban* env) {
const uint32_t i = get_random_puzzle_idx(env);
const uint8_t* puzzle = MAP_BASE + (size_t)i * PUZZLE_SIZE;
static void load_random_puzzle(Boxoban* env) {
const uint8_t* map_base = MAP_BASE;
size_t puzzle_count = PUZZLE_COUNT;
if (env->curriculum_mode) {
map_base = INCREMENTAL_MAP_BASES[env->curriculum_difficulty];
puzzle_count = INCREMENTAL_PUZZLE_COUNTS[env->curriculum_difficulty];
}

const uint32_t i = get_random_puzzle_idx(env, puzzle_count);
const uint8_t* puzzle = map_base + (size_t)i * PUZZLE_SIZE;
memcpy(env->observations, puzzle, PUZZLE_OBS_BYTES);

const uint8_t* meta = puzzle + PUZZLE_OBS_BYTES;
Expand All @@ -164,13 +200,26 @@ void c_reset(Boxoban* env) {

memcpy(env->intermediate_rewards,
env->observations + TARGET * env->size * env->size,env->size * env->size);
env->puzzle_tick = 0;
}

// Required function
void c_reset(Boxoban* env) {
env->tick = 0;
env->puzzle_tick = 0;
env->win = 0;
env->episode_return = 0;
if (env->curriculum_mode) {
env->curriculum_difficulty = 0;
env->largest_solved_difficulty = -1;
env->episode_maps_solved = 0;
}

load_random_puzzle(env);

if (!env->initialized) {
env->tick = rand_r(&env->rng) % env->max_steps;
env->puzzle_tick = env->tick;
env->initialized = true;
}
}
Expand Down Expand Up @@ -243,6 +292,7 @@ int take_action(Boxoban* env, int action) {
// Required function
void c_step(Boxoban* env) {
env->tick += 1;
env->puzzle_tick += 1;
env->terminals[0] = 0;
env->rewards[0] = 0.0;

Expand All @@ -260,16 +310,27 @@ void c_step(Boxoban* env) {

//Terminals
if (env->on_target == env->n_targets) {
env->terminals[0] = 1;
env->rewards[0] += 1.0;
env->win = 1;
env->episode_return += env->rewards[0];
if (env->curriculum_mode) {
if (env->curriculum_difficulty > env->largest_solved_difficulty) {
env->largest_solved_difficulty = env->curriculum_difficulty;
}
env->episode_maps_solved += 1;
if (env->curriculum_difficulty < BOXOBAN_INCREMENTAL_MAX_DIFFICULTY) {
env->curriculum_difficulty += 1;
}
load_random_puzzle(env);
return;
}
env->terminals[0] = 1;
add_log(env);
c_reset(env);
return;
}

if (env->tick >= env->max_steps) {
if (env->puzzle_tick >= env->max_steps) {
env->terminals[0] = 1;
env->rewards[0] -= 1.0;
env->episode_return += env->rewards[0];
Expand Down
96 changes: 95 additions & 1 deletion ocean/boxoban/boxoban_maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ extern size_t PUZZLE_COUNT;
extern size_t PUZZLE_SIZE;
extern size_t PUZZLE_OBS_BYTES;

#define BOXOBAN_DIFFICULTY_BASIC 0
#define BOXOBAN_DIFFICULTY_EASY 1
#define BOXOBAN_DIFFICULTY_MEDIUM 2
#define BOXOBAN_DIFFICULTY_HARD 3
#define BOXOBAN_DIFFICULTY_UNFILTERED 4
#define BOXOBAN_DIFFICULTY_INCREMENTAL 5

#define BOXOBAN_INCREMENTAL_MIN_DIFFICULTY BOXOBAN_DIFFICULTY_BASIC
#define BOXOBAN_INCREMENTAL_MAX_DIFFICULTY BOXOBAN_DIFFICULTY_HARD
#define BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES 4

extern uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
extern size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
extern size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];

int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
int boxoban_set_map_path(const char *path);
int boxoban_difficulty_id_from_name(const char* difficulty_name);
Expand All @@ -38,6 +54,9 @@ size_t MAP_FILESIZE = 0;
size_t PUZZLE_COUNT = 0;
size_t PUZZLE_SIZE = BOXOBAN_PUZZLE_BYTES;
size_t PUZZLE_OBS_BYTES = BOXOBAN_PUZZLE_OBS_BYTES;
uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
static char* BOXOBAN_MAP_PATH = NULL;
static const char* BOXOBAN_LEVEL_ROOT = "resources/boxoban/levels";

Expand Down Expand Up @@ -325,7 +344,7 @@ static int boxoban_bin_path(const char* difficulty, char* out_path, size_t out_c
return 0;
}

int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
if (difficulty == NULL || out_path == NULL) {
return -1;
}
Expand Down Expand Up @@ -356,12 +375,87 @@ int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path,
fprintf(stdout, "[Boxoban] Generated %zu puzzles for '%s' at %s\n", puzzle_count, difficulty, out_path);
}

return 0;
}

int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
if (boxoban_ensure_bin_for_difficulty(difficulty, out_path, out_cap) != 0) {
return -1;
}
if (boxoban_set_map_path(out_path) != 0) {
return -1;
}
return 0;
}

static int boxoban_load_incremental_bin_slot(int slot, const char* difficulty) {
char bin_path[512];
int fd;
struct stat st;
uint8_t* map_base;

if (slot < 0 || slot >= BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) {
return -1;
}
if (boxoban_ensure_bin_for_difficulty(difficulty, bin_path, sizeof(bin_path)) != 0) {
return -1;
}

fd = open(bin_path, O_RDONLY);
if (fd < 0) {
return -1;
}
if (fstat(fd, &st) != 0) {
close(fd);
return -1;
}
if ((size_t)st.st_size % PUZZLE_SIZE != 0) {
close(fd);
return -1;
}

map_base = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (map_base == MAP_FAILED) {
return -1;
}

INCREMENTAL_MAP_BASES[slot] = map_base;
INCREMENTAL_MAP_FILESIZES[slot] = st.st_size;
INCREMENTAL_PUZZLE_COUNTS[slot] = (size_t)st.st_size / PUZZLE_SIZE;
return 0;
}

static int boxoban_load_incremental_bins(void) {
const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {
"basic",
"easy",
"medium",
"hard",
};

for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
if (boxoban_load_incremental_bin_slot(i, difficulties[i]) != 0) {
return -1;
}
}

return 0;
}

static void reset_incremental_map_cache(void) {
for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
if (INCREMENTAL_MAP_BASES[i] != NULL &&
INCREMENTAL_MAP_BASES[i] != MAP_FAILED &&
INCREMENTAL_MAP_FILESIZES[i] > 0) {
munmap(INCREMENTAL_MAP_BASES[i], INCREMENTAL_MAP_FILESIZES[i]);
}
INCREMENTAL_MAP_BASES[i] = NULL;
INCREMENTAL_MAP_FILESIZES[i] = 0;
INCREMENTAL_PUZZLE_COUNTS[i] = 0;
}
}

static void reset_map_cache(void) {
if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) {
munmap(MAP_BASE, MAP_FILESIZE);
Expand Down
Loading
Loading