Skip to content

Commit 758eb52

Browse files
committed
improve logs & agents spawn in house
1 parent 5c991de commit 758eb52

1 file changed

Lines changed: 108 additions & 59 deletions

File tree

pufferlib/ocean/predprey/predprey.h

Lines changed: 108 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ struct Biome_idx {
115115

116116
int *dirt_idx;
117117
int dirt_count;
118+
119+
int *house_idx;
120+
int house_count;
118121
};
119122

120123
typedef struct Renderer Renderer;
@@ -162,9 +165,11 @@ void init_biome_idx(PredPrey *env) {
162165
// I only do that once on load - will need to do that every reset if map changes
163166
env->biome_idxs.grass_idx = (int *)calloc(env->width * env->height, sizeof(int));
164167
env->biome_idxs.dirt_idx = (int *)calloc(env->width * env->height, sizeof(int));
168+
env->biome_idxs.house_idx = (int *)calloc(env->width * env->height, sizeof(int));
165169

166170
env->biome_idxs.grass_count = 0;
167171
env->biome_idxs.dirt_count = 0;
172+
env->biome_idxs.house_count = 0;
168173

169174
for (int r = 0; r < env->height; r++) {
170175
for (int c = 0; c < env->width; c++) {
@@ -174,6 +179,8 @@ void init_biome_idx(PredPrey *env) {
174179
env->biome_idxs.grass_idx[env->biome_idxs.grass_count++] = grid_idx;
175180
} else if (tile == TILE_DIRT) {
176181
env->biome_idxs.dirt_idx[env->biome_idxs.dirt_count++] = grid_idx;
182+
} else if (tile == TILE_HOUSE) {
183+
env->biome_idxs.house_idx[env->biome_idxs.house_count++] = grid_idx;
177184
}
178185
}
179186
}
@@ -189,6 +196,22 @@ void add_log(PredPrey *env, Log *log) {
189196
env->log.n += 1;
190197
}
191198

199+
void add_agent_log(PredPrey *env, int agent_id) {
200+
int time_alive = env->tick - env->agents[agent_id].start_tick;
201+
assert(time_alive > 0);
202+
env->agent_logs[agent_id].score = time_alive;
203+
env->agent_logs[agent_id].steals /= time_alive;
204+
env->agent_logs[agent_id].collects /= time_alive;
205+
add_log(env, &env->agent_logs[agent_id]);
206+
207+
//I don't fully reset because the agent might not be dead yet
208+
//So I still need to keep track of collect & steal counts
209+
//episode_returns will accumulate over life
210+
//score will be overwritten next time
211+
env->agent_logs[agent_id].steals *= time_alive;
212+
env->agent_logs[agent_id].collects *= time_alive;
213+
}
214+
192215
void init_cenv(PredPrey *env) {
193216
env->agents = (Agent *)calloc(env->num_agents, sizeof(Agent));
194217
env->vision_window = 2 * env->vision + 1;
@@ -205,6 +228,10 @@ void init_cenv(PredPrey *env) {
205228
env->terrain = (unsigned char *)calloc(env->width * env->height, sizeof(unsigned char));
206229
env->items = (unsigned char *)calloc(env->width * env->height, sizeof(unsigned char));
207230
env->pids = (short *)calloc(env->width * env->height, sizeof(short));
231+
232+
// make_grid_from_scratch(env);
233+
memcpy(env->terrain, terrain, env->width * env->height * sizeof(unsigned char));
234+
init_biome_idx(env);
208235
}
209236

210237
void allocate_cenv(PredPrey *env) {
@@ -223,6 +250,7 @@ void allocate_cenv(PredPrey *env) {
223250
void free_biome(PredPrey *env) {
224251
free(env->biome_idxs.grass_idx);
225252
free(env->biome_idxs.dirt_idx);
253+
free(env->biome_idxs.house_idx);
226254
}
227255

228256
void c_close(PredPrey *env) {
@@ -253,6 +281,19 @@ void reward_agent(PredPrey *env, int agent_id, float reward) {
253281
env->agent_logs[agent_id].episode_return += reward;
254282
}
255283

284+
bool is_obstacle(PredPrey *env, int idx) {
285+
int tile = env->terrain[idx];
286+
if (tile == TILE_WATER) {
287+
return true;
288+
}
289+
290+
short entity_id = env->pids[idx];
291+
if (entity_id != -1){
292+
return true;
293+
}
294+
return false;
295+
}
296+
256297
void init_foods(PredPrey *env) {
257298
// Fill dirt area with food
258299
for (int i = 0; i < env->biome_idxs.dirt_count; i++) {
@@ -329,7 +370,11 @@ void regrow_food(PredPrey *env){
329370
// Regrow food in all dirt tiles that do not have food already with some probability
330371
for (int i = 0; i < env->biome_idxs.dirt_count; i++) {
331372
int grid_idx = env->biome_idxs.dirt_idx[i];
332-
if (env->items[grid_idx] == EMPTY && rand() / (double)RAND_MAX < env->food_base_spawn_rate) {
373+
if (
374+
env->items[grid_idx] == EMPTY &&
375+
rand() / (double)RAND_MAX < env->food_base_spawn_rate &&
376+
env->food_count < env->max_food
377+
) {
333378
env->items[grid_idx] = ITEM_FOOD;
334379
env->food_count += 1;
335380
}
@@ -401,10 +446,9 @@ void add_hp(PredPrey *env, int agent_id, float hp) {
401446
agent->hp = MAX_HP;
402447
} else if (agent->hp <= 0) {
403448
agent->hp = 0;
404-
env->agent_logs[agent->id].score = env->tick - agent->start_tick;
405449
reward_agent(env, agent_id, REWARD_DEATH);
406450
env->terminals[agent->id] = 1;
407-
add_log(env, &env->agent_logs[agent_id]);
451+
add_agent_log(env, agent_id);
408452
remove_agent(env, agent_id);
409453
env->last_agent_dead_tick = env->tick;
410454
}
@@ -490,41 +534,49 @@ void make_grid_from_scratch(PredPrey *env){
490534
save_terrain_to_file(env, "terrain.h");
491535
}
492536

493-
void spawn_agent(PredPrey *env, int i){
494-
Agent *agent = &env->agents[i];
495-
agent->id = i;
537+
void spawn_agent(PredPrey *env, int agent_id){
538+
Agent *agent = &env->agents[agent_id];
539+
agent->id = agent_id;
496540
agent->hp = 100;
497541
agent->start_tick = env->tick;
498542
agent->held_food = 0;
499543

544+
// Spawn only in the house area
500545
int adr = 0;
501-
502-
bool allocated = false;
503-
while (!allocated) {
504-
adr = rand() % (env->height * env->width);
505-
if (env->pids[adr] == -1 && env->terrain[adr] != TILE_WATER) {
506-
int r = adr / env->width;
507-
int c = adr % env->width;
508-
agent->r = r;
509-
agent->c = c;
510-
allocated = true;
546+
for (int i = 0; i < env->biome_idxs.house_count; i++) {
547+
adr = env->biome_idxs.house_idx[i];
548+
if (is_obstacle(env, adr)){
549+
continue;
511550
}
551+
int r = adr / env->width;
552+
int c = adr % env->width;
553+
agent->r = r;
554+
agent->c = c;
555+
break;
512556
}
557+
558+
// bool allocated = false;
559+
// while (!allocated) {
560+
// adr = rand() % (env->height * env->width);
561+
// if (!is_obstacle(env, adr)) {
562+
// int r = adr / env->width;
563+
// int c = adr % env->width;
564+
// agent->r = r;
565+
// agent->c = c;
566+
// allocated = true;
567+
// }
568+
// }
513569
assert(env->pids[adr] == -1);
514570
env->pids[adr] = agent->id;
515-
env->agent_logs[i] = (Log){0};
571+
env->agent_logs[agent_id] = (Log){0};
516572
}
517573
void c_reset(PredPrey *env) {
574+
518575
env->tick = 0;
576+
env->last_agent_dead_tick = 0;
577+
519578
memset(env->agent_logs, 0, env->num_agents * sizeof(Log));
520579
env->log = (Log){0};
521-
env->food_count = 0;
522-
// env->foods->size = 0;
523-
// memset(env->foods->indexes, 0, env->width * env->height * sizeof(int));
524-
525-
// make_grid_from_scratch(env);
526-
memcpy(env->terrain, terrain, env->width * env->height * sizeof(unsigned char));
527-
init_biome_idx(env); // TODO dump that for current map & memcpy
528580

529581
memset(env->items, EMPTY, env->width * env->height * sizeof(unsigned char));
530582
// Carrefull here but -1 works with memset
@@ -534,28 +586,18 @@ void c_reset(PredPrey *env) {
534586
spawn_agent(env, i);
535587
}
536588

589+
env->food_count = 0;
537590
init_foods(env);
591+
538592
memset(env->observations, 0, env->num_agents * env->obs_size * sizeof(float));
539593
memset(env->terminals, 0, env->num_agents * sizeof(unsigned char));
540594
memset(env->masks, 1, env->num_agents * sizeof(unsigned char));
541-
compute_observations(env);
542-
}
543595

544-
bool is_obstacle(PredPrey *env, int idx) {
545-
int tile = env->terrain[idx];
546-
if (tile == TILE_WATER) {
547-
return true;
548-
}
549-
550-
short entity_id = env->pids[idx];
551-
if (entity_id != -1){
552-
return true;
553-
}
554-
return false;
596+
compute_observations(env);
555597
}
556598

557599
void step_agent(PredPrey *env, int i) {
558-
600+
559601
Agent *agent = &env->agents[i];
560602

561603
int action = env->actions[i];
@@ -591,15 +633,15 @@ void step_agent(PredPrey *env, int i) {
591633
int next_r = agent->r + dr;
592634
int next_c = agent->c + dc;
593635

594-
int prev_grid_idx = flat_idx(env, agent->r, agent->c);
636+
int curr_grid_idx = flat_idx(env, agent->r, agent->c);
595637
int next_grid_idx = flat_idx(env, next_r, next_c);
596638
if (is_obstacle(env, next_grid_idx)) {
597-
next_grid_idx = prev_grid_idx;
639+
next_grid_idx = curr_grid_idx;
598640
next_r = agent->r;
599641
next_c = agent->c;
600642
}
601643
// update the grid tiles values
602-
env->pids[prev_grid_idx] = -1;
644+
env->pids[curr_grid_idx] = -1;
603645
env->pids[next_grid_idx] = agent->id;
604646
agent->r = next_r;
605647
agent->c = next_c;
@@ -625,9 +667,9 @@ void step_agent(PredPrey *env, int i) {
625667
break;
626668
}
627669

628-
int facing_tile = env->pids[facing_tile_idx];
629-
if (facing_tile != -1) {
630-
Agent *other_agent = &env->agents[facing_tile];
670+
int facing_agent = env->pids[facing_tile_idx];
671+
if (facing_agent != -1) {
672+
Agent *other_agent = &env->agents[facing_agent];
631673
// Steal food from other agent
632674
if (other_agent->held_food > 0) {
633675
agent->held_food = other_agent->held_food;
@@ -637,26 +679,27 @@ void step_agent(PredPrey *env, int i) {
637679
}
638680
}
639681

640-
if (env->items[next_grid_idx] == ITEM_FOOD) {
682+
if (env->items[curr_grid_idx] == ITEM_FOOD) {
683+
if (agent->held_food >= MAX_INVENTORY_ITEM) {
684+
return;
685+
}
641686
// Pick up food
642687
agent->held_food += 1;
643-
if (agent->held_food > MAX_INVENTORY_ITEM) {
644-
agent->held_food = MAX_INVENTORY_ITEM;
645-
}
646-
env->items[next_grid_idx] = EMPTY;
688+
env->items[curr_grid_idx] = EMPTY;
647689
env->food_count -= 1;
648690
env->agent_logs[i].collects += 1;
649691
agent->anim = ANIM_INTERACT;
650692
}
651693
}
652694

653695
if (action == EAT) {
654-
if (agent->held_food > 0) {
655-
agent->held_food -= 1;
656-
add_hp(env, i, HP_REWARD_FOOD);
657-
reward_agent(env, i, env->reward_food);
658-
agent->anim = ANIM_EAT;
696+
if (agent->held_food <= 0) {
697+
return;
659698
}
699+
agent->held_food -= 1;
700+
add_hp(env, i, HP_REWARD_FOOD);
701+
reward_agent(env, i, env->reward_food);
702+
agent->anim = ANIM_EAT;
660703
}
661704
return;
662705
}
@@ -667,18 +710,24 @@ void c_step(PredPrey *env) {
667710
memset(env->rewards, 0, env->num_agents * sizeof(float));
668711

669712
for (int i = 0; i < env->num_agents; i++) {
670-
if (env->agents[i].hp == 0) {
713+
if (env->agents[i].hp <= 0) {
671714
spawn_agent(env, i);
672715
continue;
673716
}
674717
step_agent(env, i);
675718
remove_hp(env, i, HP_LOSS_PER_STEP);
719+
if ((env->tick - env->agents[i].start_tick) % MAX_TIMESTEPS == 0) {
720+
add_agent_log(env, i);
721+
}
676722
}
677723

678-
if (env->tick - env->last_agent_dead_tick >= MAX_TIMESTEPS) {
679-
c_reset(env);
680-
return;
681-
}
724+
// if (env->tick - env->last_agent_dead_tick >= MAX_TIMESTEPS) {
725+
// for (int i = 0; i < env->num_agents; i++) {
726+
// add_agent_log(env, i);
727+
// }
728+
// env->last_agent_dead_tick = env->tick;
729+
// }
730+
682731
spawn_items(env);
683732
compute_observations(env);
684733
}

0 commit comments

Comments
 (0)