Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions pufferlib/config/ocean/boxoban.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[base]
package = ocean
env_name = puffer_boxoban
policy_name = MinGRU
rnn_name = Recurrent

[vec]
total_agents = 16384
num_buffers = 8
num_threads = 8

[policy]
num_layers = 1
hidden_size = 256

[env]
num_agents = 1
#0 basic, 1 easy, 2 medium, 3 hard, 4 unfiltered
difficulty = 1
#reward per intermediate target (once per episode)
int_r_coeff = 0.25
#moving box off target
target_loss_pen_coeff = 0.0
max_steps = 300


[train]
anneal_lr = 1
beta1 = 0.9774372816193448
beta2 = 0.9659403664380584
clip_coef = 0.6046560670053024
ent_coef = 0.00002079831529141607
eps = 0.00000000000001
gae_lambda = 0.9258914518467392
gamma = 0.9772998708784648
gpus = 1
horizon = 64
learning_rate = 0.004480255741933225
max_grad_norm = 1.221684008665154
min_lr_ratio = 0.37872027027338984
minibatch_size = 8192
prio_alpha = 1
prio_beta0 = 0.8789921736378042
replay_ratio = 3.210300031048168
seed = 42
total_timesteps = 55504884
use_rnn = true
vf_clip_coef = 4.339748010438874
vf_coef = 4.240274862679744
vtrace_c_clip = 1.3625779006162615
vtrace_rho_clip = 3.17260199042977


#EASY

[sweep]
metric = perf

29 changes: 29 additions & 0 deletions pufferlib/ocean/boxoban/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap
#include "boxoban.h"
#define OBS_SIZE 400
#define NUM_ATNS 1
#define ACT_SIZES {5}
#define OBS_TENSOR_T ByteTensor


#define Env Boxoban
#include "vecenv.h"


void my_init(Env* env, Dict* kwargs) {
env->difficulty_id = (int)dict_get(kwargs, "difficulty")->value;
env->size = 10;
env->num_agents = 1;
env->max_steps = (int)dict_get(kwargs, "max_steps")->value;
env->int_r_coeff = (float)dict_get(kwargs, "int_r_coeff")->value;
env->target_loss_pen_coeff = (float)dict_get(kwargs, "target_loss_pen_coeff")->value;
init(env);
}

void my_log(Log* log, Dict* out) {
dict_set(out, "perf", log->perf);
dict_set(out, "score", log->score);
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);
dict_set(out, "targets_hit", log->on_targets);
}
194 changes: 194 additions & 0 deletions pufferlib/ocean/boxoban/boxoban.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/* Pure C demo file for Boxoban. Usage:
* bash scripts/build_ocean.sh boxoban
* ./boxoban [difficulty|path_to_bin]
*
* If you pass one of the known difficulty names (basic, easy, medium,
* hard, unfiltered) the demo looks for pufferlib/ocean/boxoban/boxoban_maps_<difficulty>.bin
* Otherwise the argument is treated as an explicit path to a bin file.
*/

#define BOXOBAN_MAPS_IMPLEMENTATION
#include "boxoban.h"

static int is_named_difficulty(const char* arg) {
return strcmp(arg, "basic") == 0 ||
strcmp(arg, "easy") == 0 ||
strcmp(arg, "medium") == 0 ||
strcmp(arg, "hard") == 0 ||
strcmp(arg, "unfiltered") == 0;
}

static const char* resolve_map_path(int argc, char** argv, char* buffer, size_t buf_sz) {
const char* arg = argc > 1 ? argv[1] : NULL;
if (arg == NULL) {
if (boxoban_prepare_maps_for_difficulty("easy", buffer, buf_sz) != 0) {
return NULL;
}
return buffer;
}
if (strchr(arg, '/')) {
return arg;
}
if (is_named_difficulty(arg)) {
if (boxoban_prepare_maps_for_difficulty(arg, buffer, buf_sz) != 0) {
return NULL;
}
return buffer;
}
snprintf(buffer, buf_sz, "pufferlib/ocean/boxoban/boxoban_maps_%s.bin", arg);
return buffer;
}


int demo(int argc, char** argv) {
char path_buffer[512];
const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
if (chosen_path == NULL) {
fprintf(stderr, "Failed to prepare map path\n");
return 1;
}
if (boxoban_set_map_path(chosen_path) != 0) {
fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
return 1;
}

Boxoban env = {
.size = 10,
.observations = NULL,
.actions = NULL,
.rewards = NULL,
.terminals = NULL,
.max_steps = 500,
.int_r_coeff = 0.1f,
.target_loss_pen_coeff = 0.5f,
.tick = 0,
.agent_x = 0,
.agent_y = 0,
.intermediate_rewards = NULL,
.on_target = 0,
.n_boxes = 0,
.win = 0,
.difficulty_id = -1,
.client = NULL,
.n_targets = 0,

};

size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
env.observations = calloc(obs_count, sizeof(unsigned char));
env.actions = calloc(1, sizeof(int));
env.rewards = calloc(1, sizeof(float));
env.terminals = calloc(1, sizeof(unsigned char));

init(&env);
c_reset(&env);
c_render(&env);
while (!WindowShouldClose()) {
if (IsKeyPressed(KEY_LEFT_SHIFT) || IsKeyPressed(KEY_RIGHT_SHIFT)) {
TraceLog(LOG_INFO, "Shift key pressed");
}
bool manual = IsKeyDown(KEY_LEFT_SHIFT) || IsKeyDown(KEY_RIGHT_SHIFT);
bool stepped = false;
if (manual) {
int new_action = -1;
if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) new_action = UP;
if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) new_action = DOWN;
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) new_action = LEFT;
if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) new_action = RIGHT;

if (new_action >= 0) {
env.actions[0] = new_action;
c_step(&env);
stepped = true;
}
} else {
env.actions[0] = rand() % 5;
c_step(&env);
stepped = true;
}

if (!stepped) {
// Manual mode with no direction: stay paused
}
c_render(&env);
}
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
return 0;
}

void test_performance(int argc, char** argv, int timeout) {
char path_buffer[512];
const char* chosen_path = resolve_map_path(argc, argv, path_buffer, sizeof(path_buffer));
if (chosen_path == NULL) {
fprintf(stderr, "Failed to prepare map path\n");
return;
}
if (boxoban_set_map_path(chosen_path) != 0) {
fprintf(stderr, "Failed to set map path: %s\n", chosen_path);
return;
}
printf("Loaded map: %s\n", chosen_path);

Boxoban env = {
.size = 10,
.observations = NULL,
.actions = NULL,
.rewards = NULL,
.terminals = NULL,
.max_steps = 500,
.int_r_coeff = 0.1f,
.target_loss_pen_coeff = 0.5f,
.tick = 0,
.agent_x = 0,
.agent_y = 0,
.intermediate_rewards = NULL,
.on_target = 0,
.n_boxes = 0,
.win = 0,
.difficulty_id = -1,
.client = NULL,
.n_targets = 0,
};

size_t obs_count = 4u * (size_t)env.size * (size_t)env.size;
env.observations = calloc(obs_count, sizeof(unsigned char));
env.actions = calloc(1, sizeof(int));
env.rewards = calloc(1, sizeof(float));
env.terminals = calloc(1, sizeof(unsigned char));

printf("Initializing...\n");
init(&env);
printf("Resetting...\n");
c_reset(&env);
printf("Starting test...\n");

int start = time(NULL);
int num_steps = 0;
while (time(NULL) - start < timeout) {
env.actions[0] = rand() % 5;
c_step(&env);
num_steps++;
}

int end = time(NULL);
float sps = num_steps / (end - start);
printf("Test Environment SPS: %f\n", sps);
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}

int main(int argc, char** argv) {
demo(argc, argv);
setbuf(stdout, NULL);
fprintf(stderr, "Entered main\n");
fflush(stderr);
//test_performance(argc, argv,10);
return 0;
}
Loading