@@ -115,6 +115,9 @@ struct Biome_idx {
115115
116116 int * dirt_idx ;
117117 int dirt_count ;
118+
119+ int * house_idx ;
120+ int house_count ;
118121};
119122
120123typedef struct Renderer Renderer ;
@@ -162,9 +165,11 @@ void init_biome_idx(PredPrey *env) {
162165 // I only do that once on load - will need to do that every reset if map changes
163166 env -> biome_idxs .grass_idx = (int * )calloc (env -> width * env -> height , sizeof (int ));
164167 env -> biome_idxs .dirt_idx = (int * )calloc (env -> width * env -> height , sizeof (int ));
168+ env -> biome_idxs .house_idx = (int * )calloc (env -> width * env -> height , sizeof (int ));
165169
166170 env -> biome_idxs .grass_count = 0 ;
167171 env -> biome_idxs .dirt_count = 0 ;
172+ env -> biome_idxs .house_count = 0 ;
168173
169174 for (int r = 0 ; r < env -> height ; r ++ ) {
170175 for (int c = 0 ; c < env -> width ; c ++ ) {
@@ -174,6 +179,8 @@ void init_biome_idx(PredPrey *env) {
174179 env -> biome_idxs .grass_idx [env -> biome_idxs .grass_count ++ ] = grid_idx ;
175180 } else if (tile == TILE_DIRT ) {
176181 env -> biome_idxs .dirt_idx [env -> biome_idxs .dirt_count ++ ] = grid_idx ;
182+ } else if (tile == TILE_HOUSE ) {
183+ env -> biome_idxs .house_idx [env -> biome_idxs .house_count ++ ] = grid_idx ;
177184 }
178185 }
179186 }
@@ -189,6 +196,22 @@ void add_log(PredPrey *env, Log *log) {
189196 env -> log .n += 1 ;
190197}
191198
199+ void add_agent_log (PredPrey * env , int agent_id ) {
200+ int time_alive = env -> tick - env -> agents [agent_id ].start_tick ;
201+ assert (time_alive > 0 );
202+ env -> agent_logs [agent_id ].score = time_alive ;
203+ env -> agent_logs [agent_id ].steals /= time_alive ;
204+ env -> agent_logs [agent_id ].collects /= time_alive ;
205+ add_log (env , & env -> agent_logs [agent_id ]);
206+
207+ //I don't fully reset because the agent might not be dead yet
208+ //So I still need to keep track of collect & steal counts
209+ //episode_returns will accumulate over life
210+ //score will be overwritten next time
211+ env -> agent_logs [agent_id ].steals *= time_alive ;
212+ env -> agent_logs [agent_id ].collects *= time_alive ;
213+ }
214+
192215void init_cenv (PredPrey * env ) {
193216 env -> agents = (Agent * )calloc (env -> num_agents , sizeof (Agent ));
194217 env -> vision_window = 2 * env -> vision + 1 ;
@@ -205,6 +228,10 @@ void init_cenv(PredPrey *env) {
205228 env -> terrain = (unsigned char * )calloc (env -> width * env -> height , sizeof (unsigned char ));
206229 env -> items = (unsigned char * )calloc (env -> width * env -> height , sizeof (unsigned char ));
207230 env -> pids = (short * )calloc (env -> width * env -> height , sizeof (short ));
231+
232+ // make_grid_from_scratch(env);
233+ memcpy (env -> terrain , terrain , env -> width * env -> height * sizeof (unsigned char ));
234+ init_biome_idx (env );
208235}
209236
210237void allocate_cenv (PredPrey * env ) {
@@ -223,6 +250,7 @@ void allocate_cenv(PredPrey *env) {
223250void free_biome (PredPrey * env ) {
224251 free (env -> biome_idxs .grass_idx );
225252 free (env -> biome_idxs .dirt_idx );
253+ free (env -> biome_idxs .house_idx );
226254}
227255
228256void c_close (PredPrey * env ) {
@@ -253,6 +281,19 @@ void reward_agent(PredPrey *env, int agent_id, float reward) {
253281 env -> agent_logs [agent_id ].episode_return += reward ;
254282}
255283
284+ bool is_obstacle (PredPrey * env , int idx ) {
285+ int tile = env -> terrain [idx ];
286+ if (tile == TILE_WATER ) {
287+ return true;
288+ }
289+
290+ short entity_id = env -> pids [idx ];
291+ if (entity_id != -1 ){
292+ return true;
293+ }
294+ return false;
295+ }
296+
256297void init_foods (PredPrey * env ) {
257298 // Fill dirt area with food
258299 for (int i = 0 ; i < env -> biome_idxs .dirt_count ; i ++ ) {
@@ -329,7 +370,11 @@ void regrow_food(PredPrey *env){
329370 // Regrow food in all dirt tiles that do not have food already with some probability
330371 for (int i = 0 ; i < env -> biome_idxs .dirt_count ; i ++ ) {
331372 int grid_idx = env -> biome_idxs .dirt_idx [i ];
332- if (env -> items [grid_idx ] == EMPTY && rand () / (double )RAND_MAX < env -> food_base_spawn_rate ) {
373+ if (
374+ env -> items [grid_idx ] == EMPTY &&
375+ rand () / (double )RAND_MAX < env -> food_base_spawn_rate &&
376+ env -> food_count < env -> max_food
377+ ) {
333378 env -> items [grid_idx ] = ITEM_FOOD ;
334379 env -> food_count += 1 ;
335380 }
@@ -401,10 +446,9 @@ void add_hp(PredPrey *env, int agent_id, float hp) {
401446 agent -> hp = MAX_HP ;
402447 } else if (agent -> hp <= 0 ) {
403448 agent -> hp = 0 ;
404- env -> agent_logs [agent -> id ].score = env -> tick - agent -> start_tick ;
405449 reward_agent (env , agent_id , REWARD_DEATH );
406450 env -> terminals [agent -> id ] = 1 ;
407- add_log (env , & env -> agent_logs [ agent_id ]);
451+ add_agent_log (env , agent_id );
408452 remove_agent (env , agent_id );
409453 env -> last_agent_dead_tick = env -> tick ;
410454 }
@@ -490,41 +534,49 @@ void make_grid_from_scratch(PredPrey *env){
490534 save_terrain_to_file (env , "terrain.h" );
491535}
492536
493- void spawn_agent (PredPrey * env , int i ){
494- Agent * agent = & env -> agents [i ];
495- agent -> id = i ;
537+ void spawn_agent (PredPrey * env , int agent_id ){
538+ Agent * agent = & env -> agents [agent_id ];
539+ agent -> id = agent_id ;
496540 agent -> hp = 100 ;
497541 agent -> start_tick = env -> tick ;
498542 agent -> held_food = 0 ;
499543
544+ // Spawn only in the house area
500545 int adr = 0 ;
501-
502- bool allocated = false;
503- while (!allocated ) {
504- adr = rand () % (env -> height * env -> width );
505- if (env -> pids [adr ] == -1 && env -> terrain [adr ] != TILE_WATER ) {
506- int r = adr / env -> width ;
507- int c = adr % env -> width ;
508- agent -> r = r ;
509- agent -> c = c ;
510- allocated = true;
546+ for (int i = 0 ; i < env -> biome_idxs .house_count ; i ++ ) {
547+ adr = env -> biome_idxs .house_idx [i ];
548+ if (is_obstacle (env , adr )){
549+ continue ;
511550 }
551+ int r = adr / env -> width ;
552+ int c = adr % env -> width ;
553+ agent -> r = r ;
554+ agent -> c = c ;
555+ break ;
512556 }
557+
558+ // bool allocated = false;
559+ // while (!allocated) {
560+ // adr = rand() % (env->height * env->width);
561+ // if (!is_obstacle(env, adr)) {
562+ // int r = adr / env->width;
563+ // int c = adr % env->width;
564+ // agent->r = r;
565+ // agent->c = c;
566+ // allocated = true;
567+ // }
568+ // }
513569 assert (env -> pids [adr ] == -1 );
514570 env -> pids [adr ] = agent -> id ;
515- env -> agent_logs [i ] = (Log ){0 };
571+ env -> agent_logs [agent_id ] = (Log ){0 };
516572}
517573void c_reset (PredPrey * env ) {
574+
518575 env -> tick = 0 ;
576+ env -> last_agent_dead_tick = 0 ;
577+
519578 memset (env -> agent_logs , 0 , env -> num_agents * sizeof (Log ));
520579 env -> log = (Log ){0 };
521- env -> food_count = 0 ;
522- // env->foods->size = 0;
523- // memset(env->foods->indexes, 0, env->width * env->height * sizeof(int));
524-
525- // make_grid_from_scratch(env);
526- memcpy (env -> terrain , terrain , env -> width * env -> height * sizeof (unsigned char ));
527- init_biome_idx (env ); // TODO dump that for current map & memcpy
528580
529581 memset (env -> items , EMPTY , env -> width * env -> height * sizeof (unsigned char ));
530582 // Carrefull here but -1 works with memset
@@ -534,28 +586,18 @@ void c_reset(PredPrey *env) {
534586 spawn_agent (env , i );
535587 }
536588
589+ env -> food_count = 0 ;
537590 init_foods (env );
591+
538592 memset (env -> observations , 0 , env -> num_agents * env -> obs_size * sizeof (float ));
539593 memset (env -> terminals , 0 , env -> num_agents * sizeof (unsigned char ));
540594 memset (env -> masks , 1 , env -> num_agents * sizeof (unsigned char ));
541- compute_observations (env );
542- }
543595
544- bool is_obstacle (PredPrey * env , int idx ) {
545- int tile = env -> terrain [idx ];
546- if (tile == TILE_WATER ) {
547- return true;
548- }
549-
550- short entity_id = env -> pids [idx ];
551- if (entity_id != -1 ){
552- return true;
553- }
554- return false;
596+ compute_observations (env );
555597}
556598
557599void step_agent (PredPrey * env , int i ) {
558-
600+
559601 Agent * agent = & env -> agents [i ];
560602
561603 int action = env -> actions [i ];
@@ -591,15 +633,15 @@ void step_agent(PredPrey *env, int i) {
591633 int next_r = agent -> r + dr ;
592634 int next_c = agent -> c + dc ;
593635
594- int prev_grid_idx = flat_idx (env , agent -> r , agent -> c );
636+ int curr_grid_idx = flat_idx (env , agent -> r , agent -> c );
595637 int next_grid_idx = flat_idx (env , next_r , next_c );
596638 if (is_obstacle (env , next_grid_idx )) {
597- next_grid_idx = prev_grid_idx ;
639+ next_grid_idx = curr_grid_idx ;
598640 next_r = agent -> r ;
599641 next_c = agent -> c ;
600642 }
601643 // update the grid tiles values
602- env -> pids [prev_grid_idx ] = -1 ;
644+ env -> pids [curr_grid_idx ] = -1 ;
603645 env -> pids [next_grid_idx ] = agent -> id ;
604646 agent -> r = next_r ;
605647 agent -> c = next_c ;
@@ -625,9 +667,9 @@ void step_agent(PredPrey *env, int i) {
625667 break ;
626668 }
627669
628- int facing_tile = env -> pids [facing_tile_idx ];
629- if (facing_tile != -1 ) {
630- Agent * other_agent = & env -> agents [facing_tile ];
670+ int facing_agent = env -> pids [facing_tile_idx ];
671+ if (facing_agent != -1 ) {
672+ Agent * other_agent = & env -> agents [facing_agent ];
631673 // Steal food from other agent
632674 if (other_agent -> held_food > 0 ) {
633675 agent -> held_food = other_agent -> held_food ;
@@ -637,26 +679,27 @@ void step_agent(PredPrey *env, int i) {
637679 }
638680 }
639681
640- if (env -> items [next_grid_idx ] == ITEM_FOOD ) {
682+ if (env -> items [curr_grid_idx ] == ITEM_FOOD ) {
683+ if (agent -> held_food >= MAX_INVENTORY_ITEM ) {
684+ return ;
685+ }
641686 // Pick up food
642687 agent -> held_food += 1 ;
643- if (agent -> held_food > MAX_INVENTORY_ITEM ) {
644- agent -> held_food = MAX_INVENTORY_ITEM ;
645- }
646- env -> items [next_grid_idx ] = EMPTY ;
688+ env -> items [curr_grid_idx ] = EMPTY ;
647689 env -> food_count -= 1 ;
648690 env -> agent_logs [i ].collects += 1 ;
649691 agent -> anim = ANIM_INTERACT ;
650692 }
651693 }
652694
653695 if (action == EAT ) {
654- if (agent -> held_food > 0 ) {
655- agent -> held_food -= 1 ;
656- add_hp (env , i , HP_REWARD_FOOD );
657- reward_agent (env , i , env -> reward_food );
658- agent -> anim = ANIM_EAT ;
696+ if (agent -> held_food <= 0 ) {
697+ return ;
659698 }
699+ agent -> held_food -= 1 ;
700+ add_hp (env , i , HP_REWARD_FOOD );
701+ reward_agent (env , i , env -> reward_food );
702+ agent -> anim = ANIM_EAT ;
660703 }
661704 return ;
662705}
@@ -667,18 +710,24 @@ void c_step(PredPrey *env) {
667710 memset (env -> rewards , 0 , env -> num_agents * sizeof (float ));
668711
669712 for (int i = 0 ; i < env -> num_agents ; i ++ ) {
670- if (env -> agents [i ].hp = = 0 ) {
713+ if (env -> agents [i ].hp < = 0 ) {
671714 spawn_agent (env , i );
672715 continue ;
673716 }
674717 step_agent (env , i );
675718 remove_hp (env , i , HP_LOSS_PER_STEP );
719+ if ((env -> tick - env -> agents [i ].start_tick ) % MAX_TIMESTEPS == 0 ) {
720+ add_agent_log (env , i );
721+ }
676722 }
677723
678- if (env -> tick - env -> last_agent_dead_tick >= MAX_TIMESTEPS ) {
679- c_reset (env );
680- return ;
681- }
724+ // if (env->tick - env->last_agent_dead_tick >= MAX_TIMESTEPS) {
725+ // for (int i = 0; i < env->num_agents; i++) {
726+ // add_agent_log(env, i);
727+ // }
728+ // env->last_agent_dead_tick = env->tick;
729+ // }
730+
682731 spawn_items (env );
683732 compute_observations (env );
684733}
0 commit comments