-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcode.py
More file actions
593 lines (492 loc) · 19.2 KB
/
code.py
File metadata and controls
593 lines (492 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
import random
from collections import deque
from enum import IntEnum, auto
# tile types - each tile can be one of these things
class TileType(IntEnum):
WUMPUS = auto()
PIT = auto()
UNSTABLE = auto()
GOLD = auto()
EMPTY = auto()
# which direction the agent is facing
class Direction(IntEnum):
NORTH = auto()
EAST = auto()
SOUTH = auto()
WEST = auto()
# what the agent is currently trying to do
class Goal(IntEnum):
GET_GOLD = auto()
GO_HOME = auto()
# percepts the agent can sense each turn
class Percept(IntEnum):
STENCH = auto() # wumpus is nearby
BREEZE = auto() # pit is nearby
RUMBLE = auto() # unstable ground nearby
GLITTER = auto() # gold is here
BUMP = auto() # walked into a wall
SCREAM = auto() # arrow killed the wumpus
# how each direction affects position
DIR_VECTORS = {
Direction.NORTH: (0, 1),
Direction.EAST: (1, 0),
Direction.SOUTH: (0, -1),
Direction.WEST: (-1, 0),
}
def format_cells(cells):
if not cells:
return "none"
return ", ".join(f"({x},{y})" for x, y in sorted(cells))
def format_percepts(percepts):
if not percepts:
return "none"
return ", ".join(p.name.lower() for p in sorted(percepts, key=lambda p: p.name))
# stores what is on a single grid tile
class Tile:
def __init__(self):
self.has_wumpus = False
self.wumpus_alive = False
self.has_pit = False
self.is_unstable = False
self.has_gold = False
def place_wumpus(self):
self.has_wumpus = True
self.wumpus_alive = True
def kill_wumpus(self):
self.wumpus_alive = False
def place_pit(self):
self.has_pit = True
def place_gold(self):
self.has_gold = True
def make_unstable(self):
self.is_unstable = True
def collapse(self):
# unstable ground permanently becomes a pit
self.is_unstable = False
self.has_pit = True
# the AI agent that moves around the grid
class Agent:
START_POS = (0, 0)
GRID_SIZE = 5
def __init__(self, actions, verbose=True):
self.pos = Agent.START_POS
self.facing = Direction.NORTH
self.goal = Goal.GET_GOLD
self.actions = actions # callbacks to the grid (move, shoot, grab, etc)
self.verbose = verbose
self.has_arrow = True
self.got_gold = False
self.alive = True
self.done = False
# knowledge base - what the agent believes about the world
self.visited = {(0, 0)}
self.safe = {(0, 0)}
self.maybe_wumpus = set() # cells that might have the wumpus
self.maybe_pit = set() # cells that might have a pit
self.maybe_unstable = set() # cells that might be unstable
self.known_wumpus = None # confirmed wumpus location (if known)
self.wumpus_alive = True
def log(self, message):
if self.verbose:
print(message)
def get_neighbours(self, pos):
# returns all valid adjacent cells (no diagonals)
x, y = pos
result = []
for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
nx, ny = x + dx, y + dy
if 0 <= nx < self.GRID_SIZE and 0 <= ny < self.GRID_SIZE:
result.append((nx, ny))
return result
def update_kb(self, percepts, pos=None):
# update knowledge base from current percepts
if pos is None:
pos = self.pos
percepts = set(percepts)
adj = self.get_neighbours(pos)
unknown = [c for c in adj if c not in self.visited and c not in self.safe]
# if we heard a scream the wumpus is dead
if Percept.SCREAM in percepts:
self.wumpus_alive = False
self.known_wumpus = None
self.maybe_wumpus.clear()
# stench means wumpus is in one of the adjacent unknown cells
if Percept.STENCH in percepts and self.wumpus_alive:
for c in unknown:
self.maybe_wumpus.add(c)
# if only one possibility, we know exactly where it is
if len(self.maybe_wumpus) == 1:
self.known_wumpus = next(iter(self.maybe_wumpus))
else:
# no stench here so none of these cells have the wumpus
for c in adj:
self.maybe_wumpus.discard(c)
if c == self.known_wumpus:
# our previous guess was wrong
self.known_wumpus = None
# check if we can now confirm wumpus from remaining candidates
if self.wumpus_alive and self.known_wumpus is None and len(self.maybe_wumpus) == 1:
self.known_wumpus = next(iter(self.maybe_wumpus))
# breeze means a pit is nearby
if Percept.BREEZE in percepts:
for c in unknown:
self.maybe_pit.add(c)
else:
for c in adj:
self.maybe_pit.discard(c)
# rumble means unstable ground nearby (our twist on the classic game)
if Percept.RUMBLE in percepts:
for c in unknown:
self.maybe_unstable.add(c)
else:
for c in adj:
self.maybe_unstable.discard(c)
# a cell already proved safe must not stay in a danger candidate set
for c in self.safe | self.visited:
self.maybe_wumpus.discard(c)
self.maybe_pit.discard(c)
self.maybe_unstable.discard(c)
if c == self.known_wumpus:
self.known_wumpus = None
# any adjacent cell that is not in any danger set is safe to visit
dangerous = self.maybe_wumpus | self.maybe_pit | self.maybe_unstable
for c in adj:
if c not in dangerous:
self.safe.add(c)
# visited cells are always safe
self.safe |= self.visited
safe_frontier = self.safe - self.visited
self.log(f" [agent] Percepts at {pos}: {format_percepts(percepts)}")
self.log(
" [agent] KB "
f"safe_frontier={format_cells(safe_frontier)} | "
f"possible_wumpus={format_cells(self.maybe_wumpus)} | "
f"possible_pit={format_cells(self.maybe_pit)} | "
f"possible_unstable={format_cells(self.maybe_unstable)}"
)
def bfs(self, target, walkable=None):
# find shortest path to target using breadth first search
start = self.pos
if start == target:
return []
if walkable is None:
walkable = self.visited | self.safe
# queue stores positions still to explore and the path used to reach them
queue = deque([(start, [])])
seen = {start}
while queue:
current, path = queue.popleft()
for nb in self.get_neighbours(current):
if nb == target:
return path + [nb]
if nb in walkable and nb not in seen:
seen.add(nb)
queue.append((nb, path + [nb]))
return None # no path exists
def go_to(self, target):
# navigate the agent step by step along the path to target
path = self.bfs(target)
if path is None:
return False
# maps a direction vector to which way the agent should face
vec_to_dir = {
(0, 1): Direction.NORTH,
(0, -1): Direction.SOUTH,
(1, 0): Direction.EAST,
(-1, 0): Direction.WEST,
}
for step in path:
x, y = self.pos
dx, dy = step[0] - x, step[1] - y
new_dir = vec_to_dir[(dx, dy)]
self.facing = new_dir
self.actions["rotate"](new_dir)
percepts = self.actions["move"]()
if not self.alive:
return False
if Percept.BUMP in percepts:
# hit a wall, update kb and stop
self.update_kb(percepts)
return False
self.pos = step
self.visited.add(step)
self.update_kb(percepts)
return self.pos == target
def try_shoot(self):
# attempt to shoot the wumpus if we know where it is
if not self.has_arrow or not self.wumpus_alive:
return
if self.known_wumpus is None:
return
wx, wy = self.known_wumpus
cx, cy = self.pos
# arrow can only travel in a straight line so we need to be aligned
if cx == wx:
direction = Direction.NORTH if wy > cy else Direction.SOUTH
elif cy == wy:
direction = Direction.EAST if wx > cx else Direction.WEST
else:
return # not in same row or column, cannot shoot
target = self.known_wumpus
self.log(f" [agent] Shooting arrow toward wumpus at {self.known_wumpus}")
self.facing = direction
self.actions["rotate"](direction)
killed = self.actions["shoot"]()
self.has_arrow = False
if killed:
self.wumpus_alive = False
self.known_wumpus = None
self.maybe_wumpus.clear()
self.safe.add(target)
self.update_kb(self.actions["sense"]() + [Percept.SCREAM])
def go_home(self):
if self.pos == Agent.START_POS:
self.actions["climb"]()
return
ok = self.go_to(Agent.START_POS)
if ok:
self.actions["climb"]()
else:
self.log(" [agent] Can't find path home!")
def decide(self, percepts):
# called each turn to decide what to do next
if not self.alive or self.done:
return
self.update_kb(percepts)
if self.goal == Goal.GET_GOLD:
# grab gold if we can see it glittering
if Percept.GLITTER in percepts:
self.log(" [agent] Gold found, grabbing it.")
self.actions["grab"]()
self.got_gold = True
self.goal = Goal.GO_HOME
self.go_home()
return
# try to shoot wumpus if we know where it is
self.try_shoot()
# find unexplored safe cells and go to nearest one
frontier = sorted(self.safe - self.visited)
if frontier:
# pick the closest unexplored cell (manhattan distance)
target = min(frontier, key=lambda c: (abs(c[0] - self.pos[0]) + abs(c[1] - self.pos[1]), c[0], c[1]))
self.log(f" [agent] Exploring {target} because it is proven safe and still unvisited.")
self.go_to(target)
else:
# no more safe places to explore so just go home
self.log(" [agent] No safe cells left, retreating.")
self.goal = Goal.GO_HOME
self.go_home()
elif self.goal == Goal.GO_HOME:
self.go_home()
# the game world - manages the grid and runs the simulation
class Grid:
GRID_SIZE = 5
PIT_CHANCE = 0.15
UNSTABLE_CHANCE = 0.10
COLLAPSE_CHANCE = 0.30 # chance unstable tile collapses when stepped on
def __init__(self, seed=0, verbose=True):
self.seed = seed
self.rng = random.Random(seed)
self.verbose = verbose
self.tiles = []
self.agent = None
self.agent_pos = (0, 0)
self.agent_dir = Direction.NORTH
self.wumpus_pos = None
self.game_over = False
self.score = 0
self.steps = 0
self.setup_agent()
self.build_grid()
self.place_things()
def log(self, message):
if self.verbose:
print(message)
def setup_agent(self):
# pass the grid methods to the agent as a dict of callbacks
actions = {
"shoot": self.shoot,
"move": self.move,
"rotate": self.rotate,
"grab": self.grab,
"climb": self.climb,
"sense": self.get_percepts,
}
self.agent = Agent(actions, verbose=self.verbose)
def build_grid(self):
# create empty grid of tiles
self.tiles = [[Tile() for _ in range(self.GRID_SIZE)] for _ in range(self.GRID_SIZE)]
def place_things(self):
# randomly place wumpus, pits, unstable tiles and gold
used = {(0, 0)} # do not place anything on the start tile
# wumpus goes somewhere random
options = [(x, y) for x in range(self.GRID_SIZE) for y in range(self.GRID_SIZE) if (x, y) not in used]
wx, wy = self.rng.choice(options)
self.wumpus_pos = (wx, wy)
self.get_tile(wx, wy).place_wumpus()
used.add((wx, wy))
# randomly add pits around the grid
for x in range(self.GRID_SIZE):
for y in range(self.GRID_SIZE):
if (x, y) not in used and self.rng.random() < self.PIT_CHANCE:
self.get_tile(x, y).place_pit()
used.add((x, y))
# randomly add unstable tiles (our custom twist)
for x in range(self.GRID_SIZE):
for y in range(self.GRID_SIZE):
if (x, y) not in used and self.rng.random() < self.UNSTABLE_CHANCE:
self.get_tile(x, y).make_unstable()
used.add((x, y))
# gold goes somewhere that is not already occupied
free = [(x, y) for x in range(self.GRID_SIZE) for y in range(self.GRID_SIZE) if (x, y) not in used]
if free:
gx, gy = self.rng.choice(free)
self.get_tile(gx, gy).place_gold()
self.print_grid()
def get_tile(self, x, y):
return self.tiles[y][x]
def get_neighbours(self, x, y):
result = []
for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
nx, ny = x + dx, y + dy
if 0 <= nx < self.GRID_SIZE and 0 <= ny < self.GRID_SIZE:
result.append((nx, ny))
return result
def get_percepts(self):
# work out what the agent can sense from its current tile
percepts = set()
x, y = self.agent_pos
tile = self.get_tile(x, y)
if tile.has_gold:
percepts.add(Percept.GLITTER)
for nx, ny in self.get_neighbours(x, y):
nb = self.get_tile(nx, ny)
if nb.has_wumpus and nb.wumpus_alive:
percepts.add(Percept.STENCH)
if nb.has_pit:
percepts.add(Percept.BREEZE)
if nb.is_unstable:
percepts.add(Percept.RUMBLE)
return list(percepts)
def shoot(self):
# fires the arrow in a straight line, returns true if wumpus dies
self.score -= 10
x, y = self.agent_pos
dx, dy = DIR_VECTORS[self.agent_dir]
cx, cy = x + dx, y + dy
while 0 <= cx < self.GRID_SIZE and 0 <= cy < self.GRID_SIZE:
tile = self.get_tile(cx, cy)
if tile.has_wumpus and tile.wumpus_alive:
tile.kill_wumpus()
self.wumpus_pos = None
self.log(f" [grid] Wumpus at ({cx},{cy}) killed!")
return True
cx += dx
cy += dy
self.log(" [grid] Arrow missed.")
return False
def move(self):
# move agent one step forward, return percepts at new position
self.score -= 1
self.steps += 1
x, y = self.agent_pos
dx, dy = DIR_VECTORS[self.agent_dir]
nx, ny = x + dx, y + dy
# check for wall collision
if not (0 <= nx < self.GRID_SIZE and 0 <= ny < self.GRID_SIZE):
self.log(" [grid] Bump - hit wall.")
return self.get_percepts() + [Percept.BUMP]
self.agent_pos = (nx, ny)
tile = self.get_tile(nx, ny)
self.log(f" [grid] Moved to ({nx},{ny})")
# pit = instant death
if tile.has_pit:
self.log(" [grid] Fell in a pit. Game over.")
self.score -= 1000
self.end_game()
return []
# unstable ground might collapse (the twist - like quicksand)
if tile.is_unstable:
roll = self.rng.random()
self.log(f" [grid] Unstable ground! roll={roll:.2f} threshold={self.COLLAPSE_CHANCE:.2f}")
if roll < self.COLLAPSE_CHANCE:
tile.collapse()
self.log(" [grid] Ground gave way! Game over.")
self.score -= 1000
self.end_game()
return []
self.log(" [grid] Ground held, but it was close.")
# walking into wumpus kills you
if tile.has_wumpus and tile.wumpus_alive:
self.log(" [grid] Walked into the wumpus! Game over.")
self.score -= 1000
self.end_game()
return []
return self.get_percepts()
def rotate(self, direction):
self.agent_dir = direction
def grab(self):
x, y = self.agent_pos
tile = self.get_tile(x, y)
if tile.has_gold:
tile.has_gold = False
self.log(" [grid] Gold picked up!")
else:
self.log(" [grid] Nothing to grab here.")
def climb(self):
if self.agent_pos == (0, 0):
if self.agent.got_gold:
self.score += 1000
self.log(f" [grid] Agent climbed out. Score: {self.score}")
self.agent.done = True
self.game_over = True
else:
self.log(" [grid] Can't climb here, must be at (0,0).")
def end_game(self):
self.game_over = True
self.agent.alive = False
def run(self):
self.log("=" * 50)
self.log(" WUMPUS WORLD - simulation start")
self.log("=" * 50)
while not self.game_over:
percepts = self.get_percepts()
# snapshot state so we can detect if agent gets stuck
before = (self.agent_pos, self.score, self.agent.goal)
self.agent.decide(percepts)
after = (self.agent_pos, self.score, self.agent.goal)
if before == after:
self.log(" [grid] Agent is stuck, ending simulation.")
break
self.print_result()
def print_grid(self):
# show grid layout before simulation starts
self.log("\nGrid layout (S=start W=wumpus P=pit U=unstable G=gold)\n")
for y in range(self.GRID_SIZE - 1, -1, -1):
row = f" y={y} "
for x in range(self.GRID_SIZE):
t = self.get_tile(x, y)
char = "."
if (x, y) == (0, 0):
char = "S"
elif t.has_wumpus:
char = "W"
elif t.has_pit:
char = "P"
elif t.is_unstable:
char = "U"
elif t.has_gold:
char = "G"
row += f"[{char}]"
self.log(row)
self.log(" " + "".join(f" x={x} " for x in range(self.GRID_SIZE)) + "\n")
def print_result(self):
self.log("\n" + "=" * 50)
self.log(" Result: " + ("SURVIVED" if self.agent.alive else "DIED"))
self.log(" Gold: " + ("YES" if self.agent.got_gold else "no"))
self.log(" Exited: " + ("YES" if self.agent.done else "no"))
self.log(f" Steps: {self.steps}")
self.log(f" Score: {self.score}")
self.log("=" * 50)
if __name__ == "__main__":
Grid(seed=0).run()