@@ -12,13 +12,13 @@ num_envs = 64
1212vision = 3
1313num_agents = 4
1414report_interval = 1
15- reward_death_scale = 1.0
16- reward_eat = 0
17- reward_collect = 0
18- timestep_reward = 0
19- reward_steal = 0
20- hp_reward_scale = 0
21- held_food_reward_scale = 0
15+ reward_death_scale = 0.14320154190448353
16+ reward_eat = 0.6301939255961027
17+ reward_collect = 1
18+ timestep_reward = -0.001012632066429986
19+ reward_steal = -0.4586085627124764
20+ hp_reward_scale = 0.07674633247055918
21+ held_food_reward_scale = 0.2643837513970884
2222food_base_spawn_rate = 1e-1
2323
2424[train]
@@ -27,21 +27,21 @@ checkpoint_interval = 100
2727adam_beta1 = 0.9925640021442416
2828adam_beta2 = 0.9
2929adam_eps = 6.225983651908837e-10
30- bptt_horizon = 32
30+ bptt_horizon = 64
3131clip_coef = 1
3232ent_coef = 0.000999567018772538
3333gae_lambda = 0.9948939854010467
3434gamma = 0.9997469057538332
35- learning_rate = 0.008574057351505564
35+ learning_rate = 0.002574057351505564
3636max_grad_norm = 1.8944338753964156
3737max_minibatch_size = 32768
3838minibatch_size = 65536
3939prio_alpha = 0.9328200510590207
40- prio_beta0 = 0.39879635697457694
41- vf_clip_coef = 0.9894773751752602
40+ prio_beta0 = 0.9225942853355249
41+ vf_clip_coef = 0.1
4242vf_coef = 0.6935921910790133
43- vtrace_c_clip = 5
44- vtrace_rho_clip = 4.6674443397379175
43+ vtrace_c_clip = 2.819851610841173
44+ vtrace_rho_clip = 5
4545
4646
4747[sweep]
0 commit comments