-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathminimax.py
More file actions
125 lines (103 loc) · 4.31 KB
/
minimax.py
File metadata and controls
125 lines (103 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
# implements Minimax for Tic Tac Toe / Gomoku
n_size = 3
n_connect = 3
def is_done(board):
for i in range(n_size * n_size):
x, y = i % n_size, i // n_size
x_end = x + n_connect
x_rev_end = x - n_connect
y_end = y + n_connect
if ( # -
x_end <= n_size and abs(board[y, x:x_end].sum()) == n_connect
) or ( # |
y_end <= n_size and abs(board[y:y_end, x].sum()) == n_connect
) or ( # \
x_end <= n_size and y_end <= n_size and abs(
board[range(y, y_end), range(x, x_end)].sum()) == n_connect
) or ( # /
x_rev_end >= -1 and y_end <= n_size and abs(
board[range(y, y_end), range(x, x_rev_end, -1)].sum()) == n_connect
):
return board[y, x]
return 0
def play(agents):
board = np.zeros(n_size * n_size).astype(int)
record = np.zeros(n_size * n_size)
winner = 0
n_moves = 0
for move in range(n_size * n_size):
n_moves += 1
player = move % 2 * 2 - 1
action_pos = agents[move % 2].act(board, player)
record[action_pos] = n_moves
board[action_pos] = player
winner = is_done(board.reshape((n_size, n_size)))
if abs(winner) == 1:
break
return record.reshape((n_size, n_size)), winner
def test(agents):
game_records = [0, 0, 0]
for i in range(100):
idx = [0, 1] # np.random.permutation([0, 1]).astype(int)
board, winner = play([agents[idx[0]], agents[idx[1]]])
game_records[-int(winner) * (2 * idx[0] - 1) + 1] += 1
return game_records
class RandomMove(object):
def act(self, board, player):
return np.random.choice(n_size * n_size, p=(1 - np.abs(board)) / (1 - abs(board)).sum())
class MiniMax(object):
def __init__(self, max_depth=4):
self.cache = {}
self.max_depth = max_depth
def heuristic(self, board, player):
# a. in [1, -1], b. score(player1) = -score(player2)
evals = [0, 0] # for player -1 1
for i in range(n_size * n_size):
if board[i] == 0:
continue
evals[(board[i] + 1 ) // 2] += \
(i % n_size < n_size - 1 and board[i] == board[i + 1]) + \
(i + n_size < board.shape[0] - 1 and board[i] == board[i + n_size]) + \
(i + n_size < board.shape[0] - 1 and i % n_size > 0 and board[i] == board[i + n_size - 1]) + \
(i + n_size < board.shape[0] - 1 and i % n_size <
n_size - 1 and board[i] == board[i + n_size + 1])
return (-evals[0] * player + evals[1] * player) / (evals[0] + evals[1] + 1)
def score(self, board, player, depth):
board_str = ''.join([str(i) for i in board])
if board_str in self.cache: # cached before
return self.cache[board_str]
winner = is_done(board.reshape(n_size, n_size))
if np.abs(board).sum() == board.shape[0] or winner != 0: # game end
self.cache[board_str] = ([], winner * player)
return [], winner * player
if depth >= self.max_depth:
return [], self.heuristic(board, player)
# a value less than -1 so next step can pick a legal move
board_scores = np.ones(board.shape[0]) * -2
heuristics_used = False
for i in range(board.shape[0]):
if board[i] != 0:
continue
board[i] = player
board_scores[i] = -self.score(board, -player, depth + 1)[1]
heuristics_used = heuristics_used or ''.join(
[str(i) for i in board]) not in self.cache
board[i] = 0
best_score = np.amax(board_scores)
best_moves = [i for i in range(board.shape[0]) if board_scores[
i] == best_score]
if not heuristics_used:
self.cache[board_str] = best_moves, best_score
return best_moves, best_score
def act(self, board, player):
return np.random.choice(self.score(board, player, 0)[0])
def main():
minimax = MiniMax()
random = RandomMove()
print('\t\t\t\twin/draw/lose')
print('minimax vs. minimax', test([minimax, minimax]))
print('random vs. minimax', test([random, minimax]))
print('minimax vs. random', test([minimax, random]))
if __name__ == "__main__":
main()