Skip to content

Commit b16c395

Browse files
suphoffcinjon
authored andcommitted
Fix step function to not bounce back agents after successful kick.
Undo bomb kicks if the agent's move needs to be reversed. Undo bomb movements after late collisions due to failed kicks. ( Failed kick by another agent causes new collision)
1 parent a1a97b8 commit b16c395

1 file changed

Lines changed: 46 additions & 26 deletions

File tree

pommerman/forward_model.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -182,21 +182,21 @@ def step(actions,
182182
# Gather desired next positions for moving bombs. Handle kicks later.
183183
desired_bomb_positions = [bomb.position for bomb in curr_bombs]
184184

185-
for bomb_num, bomb in enumerate(curr_bombs):
185+
for num_bomb, bomb in enumerate(curr_bombs):
186186
curr_board[bomb.position] = constants.Item.Passage.value
187187
if bomb.is_moving():
188188
desired_position = utility.get_next_position(
189189
bomb.position, bomb.moving_direction)
190190
if utility.position_on_board(curr_board, desired_position) \
191191
and not utility.position_is_powerup(curr_board, desired_position) \
192192
and not utility.position_is_wall(curr_board, desired_position):
193-
desired_bomb_positions[bomb_num] = desired_position
193+
desired_bomb_positions[num_bomb] = desired_position
194194

195195
# Position switches:
196196
# Agent <-> Agent => revert both to previous position.
197197
# Bomb <-> Bomb => revert both to previous position.
198198
# Agent <-> Bomb => revert Bomb to previous position.
199-
crossings = dict()
199+
crossings = {}
200200

201201
def crossing(current, desired):
202202
current_x, current_y = current
@@ -220,19 +220,19 @@ def crossing(current, desired):
220220
else:
221221
crossings[border] = (num_agent, True)
222222

223-
for bomb_num, bomb in enumerate(curr_bombs):
224-
if desired_bomb_positions[bomb_num] != bomb.position:
225-
desired_position = desired_bomb_positions[bomb_num]
223+
for num_bomb, bomb in enumerate(curr_bombs):
224+
if desired_bomb_positions[num_bomb] != bomb.position:
225+
desired_position = desired_bomb_positions[num_bomb]
226226
border = crossing(bomb.position, desired_position)
227227
if border in crossings:
228228
# Crossed - revert to prior position.
229-
desired_bomb_positions[bomb_num] = bomb.position
229+
desired_bomb_positions[num_bomb] = bomb.position
230230
num, isAgent = crossings[border]
231231
if not isAgent:
232232
# Crossed bomb - revert that to prior position as well.
233233
desired_bomb_positions[num] = curr_bombs[num].position
234234
else:
235-
crossings[border] = (bomb_num, False)
235+
crossings[border] = (num_bomb, False)
236236

237237
# Deal with multiple agents or multiple bomb collisions on desired next
238238
# position by resetting desired position to current position for
@@ -260,24 +260,25 @@ def crossing(current, desired):
260260
agent_occupancy[curr_position] += 1
261261
change = True
262262

263-
for bomb_num, bomb in enumerate(curr_bombs):
264-
desired_position = desired_bomb_positions[bomb_num]
263+
for num_bomb, bomb in enumerate(curr_bombs):
264+
desired_position = desired_bomb_positions[num_bomb]
265265
curr_position = bomb.position
266266
if desired_position != curr_position and \
267267
(bomb_occupancy[desired_position] > 1 or agent_occupancy[desired_position] > 1):
268-
desired_bomb_positions[bomb_num] = curr_position
268+
desired_bomb_positions[num_bomb] = curr_position
269269
bomb_occupancy[curr_position] += 1
270270
change = True
271271

272272
# Handle kicks.
273-
bombs_kicked_by = dict()
273+
agent_indexed_by_kicked_bomb = {}
274+
kicked_bomb_indexed_by_agent = {}
274275
delayed_bomb_updates = []
275276
delayed_agent_updates = []
276277

277278
# Loop through all bombs to see if they need a good kicking or cause
278279
# collisions with an agent.
279-
for bomb_num, bomb in enumerate(curr_bombs):
280-
desired_position = desired_bomb_positions[bomb_num]
280+
for num_bomb, bomb in enumerate(curr_bombs):
281+
desired_position = desired_bomb_positions[num_bomb]
281282

282283
if agent_occupancy[desired_position] == 0:
283284
# There was never an agent around to kick or collide.
@@ -299,7 +300,7 @@ def crossing(current, desired):
299300
if desired_position != bomb.position:
300301
# Bomb moved, but agent did not. The bomb should revert
301302
# and stop.
302-
delayed_bomb_updates.append((bomb_num, bomb.position))
303+
delayed_bomb_updates.append((num_bomb, bomb.position))
303304
continue
304305

305306
# NOTE: At this point, we have that the agent in question tried to
@@ -308,7 +309,7 @@ def crossing(current, desired):
308309
# If we move the agent at this point, then we risk having two
309310
# agents on a square in future iterations of the loop. So we
310311
# push this change to the next stage instead.
311-
delayed_bomb_updates.append((bomb_num, bomb.position))
312+
delayed_bomb_updates.append((num_bomb, bomb.position))
312313
delayed_agent_updates.append((num_agent, agent.position))
313314
continue
314315

@@ -323,16 +324,20 @@ def crossing(current, desired):
323324
not utility.position_is_wall(curr_board, target_position):
324325
# Ok to update bomb desired location as we won't iterate over it again here
325326
# but we can not update bomb_occupancy on target position and need to check it again
326-
delayed_bomb_updates.append((bomb_num, target_position))
327-
bombs_kicked_by[bomb_num] = num_agent
327+
# However we need to set the bomb count on the current position to zero so
328+
# that the agent can stay on this position.
329+
bomb_occupancy[desired_position] = 0
330+
delayed_bomb_updates.append((num_bomb, target_position))
331+
agent_indexed_by_kicked_bomb[num_bomb] = num_agent
332+
kicked_bomb_indexed_by_agent[num_agent] = num_bomb
328333
bomb.moving_direction = direction
329334
# Bombs may still collide and we then need to reverse bomb and agent ..
330335
else:
331-
delayed_bomb_updates.append((bomb_num, bomb.position))
336+
delayed_bomb_updates.append((num_bomb, bomb.position))
332337
delayed_agent_updates.append((num_agent, agent.position))
333338

334-
for (bomb_num, bomb_position) in delayed_bomb_updates:
335-
desired_bomb_positions[bomb_num] = bomb_position
339+
for (num_bomb, bomb_position) in delayed_bomb_updates:
340+
desired_bomb_positions[num_bomb] = bomb_position
336341
bomb_occupancy[bomb_position] += 1
337342
change = True
338343

@@ -346,8 +351,20 @@ def crossing(current, desired):
346351
for num_agent, agent in enumerate(alive_agents):
347352
desired_position = desired_agent_positions[num_agent]
348353
curr_position = agent.position
354+
# Agents and bombs can only share a square if they are both in their
355+
# original position (Agent dropped bomb and has not moved)
349356
if desired_position != curr_position and \
350357
(agent_occupancy[desired_position] > 1 or bomb_occupancy[desired_position] != 0):
358+
# Late collisions resulting from failed kicks force this agent to stay at the
359+
# original position. Check if this agent successfully kicked a bomb above and undo
360+
# the kick.
361+
if num_agent in kicked_bomb_indexed_by_agent:
362+
num_bomb = kicked_bomb_indexed_by_agent[num_agent]
363+
bomb = curr_bombs[num_bomb]
364+
desired_bomb_positions[num_bomb] = bomb.position
365+
bomb_occupancy[bomb.position] += 1
366+
del agent_indexed_by_kicked_bomb[num_bomb]
367+
del kicked_bomb_indexed_by_agent[num_agent]
351368
desired_agent_positions[num_agent] = curr_position
352369
agent_occupancy[curr_position] += 1
353370
change = True
@@ -360,25 +377,28 @@ def crossing(current, desired):
360377
# original location it moved from. If it is blocked now, it
361378
# can't be kicked and the agent needs to move back to stay
362379
# consistent with other movements.
363-
if desired_position == curr_position and num_bomb not in bombs_kicked_by:
380+
if desired_position == curr_position and num_bomb not in agent_indexed_by_kicked_bomb:
364381
continue
365382

366383
bomb_occupancy_ = bomb_occupancy[desired_position]
367384
agent_occupancy_ = agent_occupancy[desired_position]
368-
if bomb_occupancy_ > 1 or agent_occupancy_ > 1:
385+
# Agents and bombs can only share a square if they are both in their
386+
# original position (Agent dropped bomb and has not moved)
387+
if bomb_occupancy_ > 1 or agent_occupancy_ != 0:
369388
desired_bomb_positions[num_bomb] = curr_position
370389
bomb_occupancy[curr_position] += 1
371-
num_agent = bombs_kicked_by.get(num_bomb)
390+
num_agent = agent_indexed_by_kicked_bomb.get(num_bomb)
372391
if num_agent is not None:
373392
agent = alive_agents[num_agent]
374393
desired_agent_positions[num_agent] = agent.position
375394
agent_occupancy[agent.position] += 1
376-
del bombs_kicked_by[num_bomb]
395+
del kicked_bomb_indexed_by_agent[num_agent]
396+
del agent_indexed_by_kicked_bomb[num_bomb]
377397
change = True
378398

379399
for num_bomb, bomb in enumerate(curr_bombs):
380400
if desired_bomb_positions[num_bomb] == bomb.position and \
381-
not num_bomb in bombs_kicked_by:
401+
not num_bomb in agent_indexed_by_kicked_bomb:
382402
# Bomb was not kicked this turn and its desired position is its
383403
# current location. Stop it just in case it was moving before.
384404
bomb.stop()

0 commit comments

Comments
 (0)