diff --git a/config/four_rooms.ini b/config/four_rooms.ini new file mode 100644 index 000000000..0a931a2ff --- /dev/null +++ b/config/four_rooms.ini @@ -0,0 +1,62 @@ +[base] +env_name = four_rooms + +[vec] +total_agents = 4096 +num_buffers = 2 +num_threads = 8 + +[env] +size = 19 +# if 0, max_steps = 4 * size. Positive values override it. +max_steps = 0 + +[policy] +hidden_size = 128 +num_layers = 2 +expansion_factor = 1 + +[train] +total_timesteps = 100_000_000 +gamma = 0.99 +gae_lambda = 0.95 +learning_rate = 0.005 +minibatch_size = 32768 +horizon = 64 +ent_coef = 0.01 + +[sweep] +metric = score +metric_distribution = linear +goal = maximize +max_runs = 100 +gpus = 1 +downsample = 5 +sweep_only = hidden_size,num_layers,total_timesteps,learning_rate + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 64 +max = 1024 +mean = 256 +scale = auto + +[sweep.policy.num_layers] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.train.total_timesteps] +distribution = log_normal +min = 20_000_000 +max = 500_000_000 +mean = 100_000_000 +scale = auto + +[sweep.train.learning_rate] +distribution = log_normal +min = 0.0005 +max = 0.01 +scale = auto diff --git a/ocean/four_rooms/binding.c b/ocean/four_rooms/binding.c new file mode 100644 index 000000000..3c9c95c1c --- /dev/null +++ b/ocean/four_rooms/binding.c @@ -0,0 +1,46 @@ +#include "four_rooms.h" + +#define OBS_SIZE (FOUR_ROOMS_VIEW_SIZE * FOUR_ROOMS_VIEW_SIZE * FOUR_ROOMS_OBS_CHANNELS) +#define NUM_ATNS 1 +#define ACT_SIZES {FOUR_ROOMS_NUM_ACTIONS} +#define OBS_TENSOR_T ByteTensor + +#define MY_VEC_STEP four_rooms_vec_step +#define MY_VEC_STEP_RANGE four_rooms_vec_step_range +#define Env FourRooms +#include "vecenv.h" + +void four_rooms_vec_step(StaticVec* vec) { + memset(vec->rewards, 0, vec->total_agents * sizeof(float)); + memset(vec->terminals, 0, vec->total_agents * sizeof(float)); + FourRooms* envs = (FourRooms*)vec->envs; + for (int i = 0; i < vec->size; i++) { + c_step(&envs[i]); + } +} + +void four_rooms_vec_step_range(StaticVec* vec, int env_start, int env_count, int num_workers) { + (void)num_workers; + FourRooms* envs = (FourRooms*)vec->envs; + for (int i = env_start; i < env_start + env_count; i++) { + c_step(&envs[i]); + } +} + +void my_init(Env* env, Dict* kwargs) { + env->num_agents = 1; + env->size = (int)dict_get(kwargs, "size")->value; + env->max_steps = (int)dict_get(kwargs, "max_steps")->value; + if (env->max_steps <= 0) { + env->max_steps = 4 * env->size; + } + env->see_through_walls = 0; + env->grid = (unsigned char*)calloc(env->size * env->size, sizeof(unsigned char)); +} + +void my_log(Log* log, Dict* out) { + dict_set(out, "perf", log->perf); + dict_set(out, "score", log->score); + dict_set(out, "episode_return", log->episode_return); + dict_set(out, "episode_length", log->episode_length); +} diff --git a/ocean/four_rooms/four_rooms.c b/ocean/four_rooms/four_rooms.c new file mode 100644 index 000000000..b73ad5989 --- /dev/null +++ b/ocean/four_rooms/four_rooms.c @@ -0,0 +1,38 @@ +#include "four_rooms.h" + +int main() { + FourRooms env = {}; + env.size = 19; + env.max_steps = 0; + env.num_agents = 1; + env.rng = 0; + env.observations = (unsigned char*)calloc( + FOUR_ROOMS_VIEW_SIZE * FOUR_ROOMS_VIEW_SIZE * FOUR_ROOMS_OBS_CHANNELS, + sizeof(unsigned char) + ); + env.actions = (float*)calloc(1, sizeof(float)); + env.rewards = (float*)calloc(1, sizeof(float)); + env.terminals = (float*)calloc(1, sizeof(float)); + env.grid = (unsigned char*)calloc(env.size * env.size, sizeof(unsigned char)); + + c_reset(&env); + c_render(&env); + while (!WindowShouldClose()) { + if (IsKeyDown(KEY_LEFT_SHIFT)) { + env.actions[0] = 7; // Invalid action = no-op + if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) env.actions[0] = FORWARD; + if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) env.actions[0] = LEFT; + if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) env.actions[0] = RIGHT; + } else { + env.actions[0] = four_rooms_rand(&env, 3); // Only use left, right, forward + } + c_step(&env); + c_render(&env); + } + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); + return 0; +} diff --git a/ocean/four_rooms/four_rooms.h b/ocean/four_rooms/four_rooms.h new file mode 100644 index 000000000..98c9e0469 --- /dev/null +++ b/ocean/four_rooms/four_rooms.h @@ -0,0 +1,407 @@ +#include +#include +#include "raylib.h" + +#define FOUR_ROOMS_VIEW_SIZE 7 +#define FOUR_ROOMS_OBS_CHANNELS 3 +#define FOUR_ROOMS_NUM_ACTIONS 7 + +// Action space +enum { + LEFT = 0, + RIGHT = 1, + FORWARD = 2, + PICKUP = 3, + DROP = 4, + TOGGLE = 5, + DONE = 6, +}; + +// Observation: Objects +enum { + UNSEEN = 0, + EMPTY = 1, + WALL = 2, + GOAL = 8, + AGENT = 10, +}; + +// Observation: Colors +enum { + COLOR_BLACK = 0, + COLOR_GREEN = 1, + COLOR_GREY = 5, +}; + +// PufferLib standard colors for rendering +static const Color PUFF_RED = (Color){187, 0, 0, 255}; +static const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; +static const Color PUFF_BACKGROUND2 = (Color){18, 72, 72, 255}; + +typedef struct { + float perf; + float score; + float episode_return; + float episode_length; + float n; +} Log; + +typedef struct { + Log log; + unsigned char* observations; + float* actions; + float* rewards; + float* terminals; + int num_agents; + int size; + int max_steps; + int tick; + float episode_return; + int agent_x, agent_y; + int agent_dir; + int goal_x, goal_y; + unsigned char* grid; + int see_through_walls; + unsigned int rng; + int texture_loaded; + Texture2D puffers; +} FourRooms; + +static inline int four_rooms_rand(FourRooms* env, int n) { + return rand_r(&env->rng) % n; +} + +static inline int four_rooms_grid_idx(FourRooms* env, int x, int y) { + return y * env->size + x; +} + +void add_log(FourRooms* env) { + env->log.perf += (env->rewards[0] > 0) ? 1.0f : 0.0f; + env->log.score += env->rewards[0]; + env->log.episode_length += env->tick; + env->log.episode_return += env->episode_return; + env->log.n++; +} + +void encode_cell(unsigned char object, unsigned char* object_idx, unsigned char* color_idx, unsigned char* state) { + *state = 0; + if (object == WALL) { + *object_idx = WALL; + *color_idx = COLOR_GREY; + } else if (object == GOAL) { + *object_idx = GOAL; + *color_idx = COLOR_GREEN; + } else { + *object_idx = EMPTY; + *color_idx = COLOR_BLACK; + } +} + +void observation_to_world(FourRooms* env, int obs_x, int obs_y, int* world_x, int* world_y) { + int forward_x = 0; + int forward_y = 0; + if (env->agent_dir == 0) forward_x = 1; + else if (env->agent_dir == 1) forward_y = 1; + else if (env->agent_dir == 2) forward_x = -1; + else forward_y = -1; + + int right_x = -forward_y; + int right_y = forward_x; + int right_offset = obs_x - FOUR_ROOMS_VIEW_SIZE / 2; + int forward_offset = FOUR_ROOMS_VIEW_SIZE - 1 - obs_y; + + *world_x = env->agent_x + forward_x * forward_offset + right_x * right_offset; + *world_y = env->agent_y + forward_y * forward_offset + right_y * right_offset; +} + +void compute_visibility(unsigned char view[FOUR_ROOMS_VIEW_SIZE][FOUR_ROOMS_VIEW_SIZE], + unsigned char visible[FOUR_ROOMS_VIEW_SIZE][FOUR_ROOMS_VIEW_SIZE]) { + memset(visible, 0, FOUR_ROOMS_VIEW_SIZE * FOUR_ROOMS_VIEW_SIZE * sizeof(unsigned char)); + visible[FOUR_ROOMS_VIEW_SIZE - 1][FOUR_ROOMS_VIEW_SIZE / 2] = 1; + + // MiniGrid propagates visibility from the agent at bottom-center after rotating the view. + for (int y = FOUR_ROOMS_VIEW_SIZE - 1; y >= 0; y--) { + for (int x = 0; x < FOUR_ROOMS_VIEW_SIZE - 1; x++) { + if (!visible[y][x] || view[y][x] == WALL) { + continue; + } + visible[y][x + 1] = 1; + if (y > 0) { + visible[y - 1][x] = 1; + visible[y - 1][x + 1] = 1; + } + } + + for (int x = FOUR_ROOMS_VIEW_SIZE - 1; x > 0; x--) { + if (!visible[y][x] || view[y][x] == WALL) { + continue; + } + visible[y][x - 1] = 1; + if (y > 0) { + visible[y - 1][x] = 1; + visible[y - 1][x - 1] = 1; + } + } + } +} + +void generate_observation(FourRooms* env) { + unsigned char view[FOUR_ROOMS_VIEW_SIZE][FOUR_ROOMS_VIEW_SIZE]; + unsigned char visible[FOUR_ROOMS_VIEW_SIZE][FOUR_ROOMS_VIEW_SIZE]; + + for (int y = 0; y < FOUR_ROOMS_VIEW_SIZE; y++) { + for (int x = 0; x < FOUR_ROOMS_VIEW_SIZE; x++) { + int world_x, world_y; + observation_to_world(env, x, y, &world_x, &world_y); + if (world_x < 0 || world_x >= env->size || world_y < 0 || world_y >= env->size) { + view[y][x] = WALL; + } else if (world_x == env->agent_x && world_y == env->agent_y) { + view[y][x] = EMPTY; + } else { + view[y][x] = env->grid[four_rooms_grid_idx(env, world_x, world_y)]; + } + } + } + + if (env->see_through_walls) { + memset(visible, 1, FOUR_ROOMS_VIEW_SIZE * FOUR_ROOMS_VIEW_SIZE * sizeof(unsigned char)); + } else { + compute_visibility(view, visible); + } + + for (int y = 0; y < FOUR_ROOMS_VIEW_SIZE; y++) { + for (int x = 0; x < FOUR_ROOMS_VIEW_SIZE; x++) { + int base_idx = (y * FOUR_ROOMS_VIEW_SIZE + x) * FOUR_ROOMS_OBS_CHANNELS; + if (!visible[y][x]) { + env->observations[base_idx] = UNSEEN; + env->observations[base_idx + 1] = COLOR_BLACK; + env->observations[base_idx + 2] = 0; + continue; + } + + encode_cell( + view[y][x], + &env->observations[base_idx], + &env->observations[base_idx + 1], + &env->observations[base_idx + 2] + ); + } + } +} + +void create_four_rooms_grid(FourRooms* env) { + int size = env->size; + + // Clear grid + memset(env->grid, EMPTY, size * size * sizeof(unsigned char)); + + // Create outer walls + for (int i = 0; i < size; i++) { + env->grid[0 * size + i] = WALL; // Top + env->grid[(size-1) * size + i] = WALL; // Bottom + env->grid[i * size + 0] = WALL; // Left + env->grid[i * size + (size-1)] = WALL; // Right + } + + int room_w = size / 2; + int room_h = size / 2; + + // Create vertical separating wall + for (int y = 0; y < size; y++) { + env->grid[y * size + room_w] = WALL; + } + + // Create horizontal separating wall + for (int x = 0; x < size; x++) { + env->grid[room_h * size + x] = WALL; + } + + // Create 4 gaps in the separating walls + // Gap in vertical wall (top half) + int gap_y1 = 1 + four_rooms_rand(env, room_h - 1); + env->grid[gap_y1 * size + room_w] = EMPTY; + + // Gap in vertical wall (bottom half) + int gap_y2 = room_h + 1 + four_rooms_rand(env, room_h - 1); + env->grid[gap_y2 * size + room_w] = EMPTY; + + // Gap in horizontal wall (left half) + int gap_x1 = 1 + four_rooms_rand(env, room_w - 1); + env->grid[room_h * size + gap_x1] = EMPTY; + + // Gap in horizontal wall (right half) + int gap_x2 = room_w + 1 + four_rooms_rand(env, room_w - 1); + env->grid[room_h * size + gap_x2] = EMPTY; +} + +void c_reset(FourRooms* env) { + if (env->max_steps <= 0) { + env->max_steps = 4 * env->size; + } + + create_four_rooms_grid(env); + + // Place agent randomly in valid position + do { + env->agent_x = 1 + four_rooms_rand(env, env->size - 2); + env->agent_y = 1 + four_rooms_rand(env, env->size - 2); + } while (env->grid[four_rooms_grid_idx(env, env->agent_x, env->agent_y)] != EMPTY); + + // Place goal randomly in valid position (different from agent) + do { + env->goal_x = 1 + four_rooms_rand(env, env->size - 2); + env->goal_y = 1 + four_rooms_rand(env, env->size - 2); + } while (env->grid[four_rooms_grid_idx(env, env->goal_x, env->goal_y)] != EMPTY || + (env->goal_x == env->agent_x && env->goal_y == env->agent_y)); + + // Set agent and goal on grid + env->grid[four_rooms_grid_idx(env, env->agent_x, env->agent_y)] = AGENT; + env->grid[four_rooms_grid_idx(env, env->goal_x, env->goal_y)] = GOAL; + + // Random initial direction + env->agent_dir = four_rooms_rand(env, 4); + env->tick = 0; + env->episode_return = 0.0f; + + generate_observation(env); +} + +void c_step(FourRooms* env) { + env->tick += 1; + + int action = (int)env->actions[0]; + env->terminals[0] = 0; + env->rewards[0] = 0.0; + + // Clear agent from current position + env->grid[four_rooms_grid_idx(env, env->agent_x, env->agent_y)] = EMPTY; + + int new_x = env->agent_x; + int new_y = env->agent_y; + int new_dir = env->agent_dir; + + if (action == LEFT) { + new_dir = (env->agent_dir + 3) % 4; + } else if (action == RIGHT) { + new_dir = (env->agent_dir + 1) % 4; + } else if (action == FORWARD) { + if (env->agent_dir == 0) new_x += 1; + else if (env->agent_dir == 1) new_y += 1; + else if (env->agent_dir == 2) new_x -= 1; + else if (env->agent_dir == 3) new_y -= 1; + + // Check if move is valid + if (new_x >= 0 && new_x < env->size && new_y >= 0 && new_y < env->size && + env->grid[four_rooms_grid_idx(env, new_x, new_y)] != WALL) { + env->agent_x = new_x; + env->agent_y = new_y; + } + } + + env->agent_dir = new_dir; + + // Check if agent reached goal + if (env->agent_x == env->goal_x && env->agent_y == env->goal_y) { + env->terminals[0] = 1; + env->rewards[0] = 1.0f - 0.9f * (float)env->tick / (float)env->max_steps; + env->episode_return += env->rewards[0]; + add_log(env); + c_reset(env); + return; + } + + // Place agent back on grid + env->grid[four_rooms_grid_idx(env, env->agent_x, env->agent_y)] = AGENT; + + // Check timeout + if (env->tick >= env->max_steps) { + env->terminals[0] = 1; + env->rewards[0] = 0.0; + env->episode_return += env->rewards[0]; + add_log(env); + c_reset(env); + return; + } + + env->episode_return += env->rewards[0]; + generate_observation(env); +} + +void c_render(FourRooms* env) { + if (!IsWindowReady()) { + InitWindow(32*env->size, 32*env->size, "PufferLib FourRooms"); + SetTargetFPS(10); + env->puffers = LoadTexture("resources/shared/puffers_128.png"); + env->texture_loaded = 1; + } + + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + BeginDrawing(); + ClearBackground(PUFF_BACKGROUND); + + int px = 32; + + // Draw the main grid + for (int y = 0; y < env->size; y++) { + for (int x = 0; x < env->size; x++) { + int cell = env->grid[y * env->size + x]; + Color color = PUFF_BACKGROUND; + + if (cell == WALL) color = PUFF_BACKGROUND2; + else if (cell == GOAL) color = PUFF_RED; + + if (cell != EMPTY && cell != AGENT) { + DrawRectangle(x*px, y*px, px, px, color); + } + } + } + + Color obs_overlay = (Color){180, 180, 180, 80}; + for (int y = 0; y < FOUR_ROOMS_VIEW_SIZE; y++) { + for (int x = 0; x < FOUR_ROOMS_VIEW_SIZE; x++) { + int world_x, world_y; + observation_to_world(env, x, y, &world_x, &world_y); + if (world_x >= 0 && world_x < env->size && world_y >= 0 && world_y < env->size) { + DrawRectangle(world_x*px, world_y*px, px, px, obs_overlay); + } + } + } + + int starting_sprite_x = 0; + int rotation = 90 * env->agent_dir; + if (rotation == 180) { + starting_sprite_x = 128; + rotation = 0; + } + + DrawTexturePro( + env->puffers, + (Rectangle){starting_sprite_x, 0, 128, 128}, + (Rectangle){ + env->agent_x * px + px/2, + env->agent_y * px + px/2, + px, + px + }, + (Vector2){px/2, px/2}, + rotation, + WHITE + ); + + EndDrawing(); +} + +void c_close(FourRooms* env) { + if (env->texture_loaded) { + UnloadTexture(env->puffers); + CloseWindow(); + env->texture_loaded = 0; + } + if (env->grid) { + free(env->grid); + env->grid = NULL; + } +}