Skip to content

Commit 0fb6067

Browse files
committed
use best sweeps in conf
1 parent 0a8e134 commit 0fb6067

1 file changed

Lines changed: 13 additions & 13 deletions

File tree

pufferlib/config/ocean/predprey.ini

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ num_envs = 64
1212
vision = 3
1313
num_agents = 4
1414
report_interval = 1
15-
reward_death_scale = 1.0
16-
reward_eat = 0
17-
reward_collect = 0
18-
timestep_reward = 0
19-
reward_steal = 0
20-
hp_reward_scale = 0
21-
held_food_reward_scale = 0
15+
reward_death_scale = 0.14320154190448353
16+
reward_eat = 0.6301939255961027
17+
reward_collect = 1
18+
timestep_reward = -0.001012632066429986
19+
reward_steal = -0.4586085627124764
20+
hp_reward_scale = 0.07674633247055918
21+
held_food_reward_scale = 0.2643837513970884
2222
food_base_spawn_rate = 1e-1
2323

2424
[train]
@@ -27,21 +27,21 @@ checkpoint_interval = 100
2727
adam_beta1 = 0.9925640021442416
2828
adam_beta2 = 0.9
2929
adam_eps = 6.225983651908837e-10
30-
bptt_horizon = 32
30+
bptt_horizon = 64
3131
clip_coef = 1
3232
ent_coef = 0.000999567018772538
3333
gae_lambda = 0.9948939854010467
3434
gamma = 0.9997469057538332
35-
learning_rate = 0.008574057351505564
35+
learning_rate = 0.002574057351505564
3636
max_grad_norm = 1.8944338753964156
3737
max_minibatch_size = 32768
3838
minibatch_size = 65536
3939
prio_alpha = 0.9328200510590207
40-
prio_beta0 = 0.39879635697457694
41-
vf_clip_coef = 0.9894773751752602
40+
prio_beta0 = 0.9225942853355249
41+
vf_clip_coef = 0.1
4242
vf_coef = 0.6935921910790133
43-
vtrace_c_clip = 5
44-
vtrace_rho_clip = 4.6674443397379175
43+
vtrace_c_clip = 2.819851610841173
44+
vtrace_rho_clip = 5
4545

4646

4747
[sweep]

0 commit comments

Comments
 (0)