黑白棋(Mini AlphaGo)
实验内容
黑白棋(Reversi),也叫苹果棋,翻转棋,是一种经典的策略性游戏。一般棋子双面为黑白两色,故称“黑白棋”。因为行棋之时将对方棋子翻转,变为己方棋子,故又称“翻转棋”(Reversi)。棋子双面为红、绿色的称为“苹果棋”。它使用8x8的棋盘,由两人执黑子和白子轮流下棋,最后子多方为胜方。随着网络的普及,黑白棋作为一种最适合在网上玩的棋类游戏正在逐渐流行起来。中国主要的黑白棋游戏站点有Yahoo游戏、中国游戏网、联众游戏等。
游戏规则
棋局开始时黑棋位于e4和d5,白棋位于d4和e5,如图所示
游戏规则
- 黑方先行,双方交替下棋。
- 一步合法的棋步包括:在一个空格新落下一个棋子,并且翻转对手一个或多个棋子。
- 新落下的棋子与棋盘上已有的同色棋子间,对方被夹住的所有棋子都要翻转过来。可以是横着夹,竖着夹,或是斜着夹。夹住的位置上必须全部是对手的棋子,不能有空格。
- 一步棋可以在数个(横向,纵向,对角线)方向上翻棋,任何被夹住的棋子都必须被翻转过来,棋手无权选择不去翻某个棋子。
- 除非至少翻转了对手的一个棋子,否则就不能落子。如果一方没有合法棋步,也就是说不管他下到哪里,都不能至少翻转对手的一个棋子,那他这一轮只能弃权,而由他的对手继续落子直到他有合法棋步可下。
- 如果一方至少有一步合法棋步可下,他就必须落子,不得弃权。
- 棋局持续下去,直到棋盘填满或者双方都无合法棋步可下。
- 如果某一方落子时间超过1分钟,则判该方失败。
实验要求
- 使用MCTS算法实现Mini AlphaGo for Reversi
- MCTS算法部分需要自己实现,尽量不使用现成的包,工具或者接口
- 在博弈过程中,Mini AlphaGo每一步所花费的时间以及总时间需要显示出来
- 需要有简单的图形界面用于人机博弈交互
- 使用Python语言
题解
import tkinter as tk
import numpy as np
import random
import time
from tkinter import messagebox
class MCTSNode:
def __init__(self, game, parent=None):
self.game = game
self.parent = parent
self.children = []
self.visits = 0
self.wins = 0
def expand(self):
valid_moves = self.game.get_valid_moves()
for move in valid_moves:
new_game = Reversi()
new_game.board = self.game.board.copy()
new_game.current_player = self.game.current_player
new_game.make_move(move[0], move[1])
self.children.append(MCTSNode(new_game, parent=self))
def simulate(self):
current_game = Reversi()
current_game.board = self.game.board.copy()
current_game.current_player = self.game.current_player
while True:
valid_moves = current_game.get_valid_moves()
if not valid_moves:
break
move = random.choice(valid_moves)
current_game.make_move(move[0], move[1])
black_score, white_score = current_game.get_score()
return 1 if black_score > white_score else -1 if white_score > black_score else 0
def backpropagate(self, result):
self.visits += 1
self.wins += result
if self.parent:
self.parent.backpropagate(-result)
class Reversi:
def __init__(self):
self.board = np.zeros((8, 8), dtype=int)
self.current_player = 1 # 1 for black, -1 for white
self.initialize_board()
def initialize_board(self):
self.board[3][4] = 1 # 黑棋在e5 (行3, 列4)
self.board[4][3] = 1 # 黑棋在d4 (行4, 列3)
self.board[3][3] = -1 # 白棋在d5 (行3, 列3)
self.board[4][4] = -1 # 白棋在e4 (行4, 列4)
def is_valid_move(self, row, col):
if self.board[row][col] != 0:
return False
directions = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)]
for dr, dc in directions:
r, c = row + dr, col + dc
found_opponent = False
while 0 <= r < 8 and 0 <= c < 8:
if self.board[r][c] == -self.current_player:
found_opponent = True
elif self.board[r][c] == self.current_player:
if found_opponent:
return True
else:
break
else:
break
r += dr
c += dc
return False
def make_move(self, row, col):
self.board[row][col] = self.current_player
self.flip_pieces(row, col)
self.switch_player()
def flip_pieces(self, row, col):
directions = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)]
for dr, dc in directions:
r, c = row + dr, col + dc
pieces_to_flip = []
while 0 <= r < 8 and 0 <= c < 8:
if self.board[r][c] == -self.current_player:
pieces_to_flip.append((r, c))
elif self.board[r][c] == self.current_player:
for flip_r, flip_c in pieces_to_flip:
self.board[flip_r][flip_c] = self.current_player
break
else:
break
r += dr
c += dc
def get_valid_moves(self):
return [(r, c) for r in range(8) for c in range(8) if self.is_valid_move(r, c)]
def switch_player(self):
self.current_player *= -1
def get_score(self):
return np.sum(self.board == 1), np.sum(self.board == -1) # Black, White
class MiniAlphaGo:
def __init__(self, game):
self.game = game
def mcts(self, iterations):
root = MCTSNode(self.game)
for _ in range(iterations):
node = root
# Selection
while node.children:
node = max(node.children, key=lambda n: n.visits) # Choose best child
# Expansion
if node.visits > 0:
node.expand()
if not node.children: # 如果没有扩展出子节点,则结束
continue
node = random.choice(node.children)
# Simulation
result = node.simulate()
# Backpropagation
node.backpropagate(result)
if root.children:
return max(root.children, key=lambda n: n.visits).game # Return the best game state
return self.game # 如果没有合法落子,返回当前状态
class GUI:
def __init__(self, game):
self.game = game
self.root = tk.Tk()
self.root.title("Mini AlphaGo Reversi")
self.buttons = [[None for _ in range(8)] for _ in range(8)]
self.create_board()
self.update_board()
self.start_time = None
self.timer_label = tk.Label(self.root, text="计时: 0秒")
self.timer_label.grid(row=8, column=0, columnspan=8)
self.elapsed_time = 0
self.game_over = False
self.update_timer()
def create_board(self):
for row in range(8):
for col in range(8):
button = tk.Button(self.root, text='', width=2, height=2,
command=lambda r=row, c=col: self.on_click(r, c))
button.grid(row=row, column=col)
self.buttons[row][col] = button
def on_click(self, row, col):
if self.game_over:
return # 游戏结束后不允许再落子
if self.game.is_valid_move(row, col):
self.start_time = time.time() # 开始计时
self.game.make_move(row, col)
self.update_board()
if self.check_timeout():
self.end_game("玩家超时,白方胜利!")
return
self.run_ai_move()
def update_board(self):
for row in range(8):
for col in range(8):
piece = self.game.board[row][col]
if piece == 1:
self.buttons[row][col].config(text='●', bg='black')
elif piece == -1:
self.buttons[row][col].config(text='○', bg='white')
else:
self.buttons[row][col].config(text='', bg='green')
black_score, white_score = self.game.get_score()
self.root.title(f"Mini AlphaGo Reversi - B: {black_score} W: {white_score}")
def update_timer(self):
if self.start_time is not None:
self.elapsed_time = int(time.time() - self.start_time)
self.timer_label.config(text=f"计时: {self.elapsed_time}秒")
self.root.after(1000, self.update_timer) # 每秒更新一次计时
def run_ai_move(self):
if self.game.get_valid_moves():
self.root.after(500, self.execute_ai_move) # 等待500毫秒后执行AI落子
def execute_ai_move(self):
ai = MiniAlphaGo(self.game)
new_game = ai.mcts(1000) # Run MCTS for 1000 iterations
self.game.board = new_game.board
self.game.current_player = new_game.current_player
self.update_board()
if self.check_timeout():
self.end_game("AI超时,黑方胜利!")
return
if not self.game.get_valid_moves(): # 检查是否还有合法落子
self.end_game("双方均无法落子,游戏结束!")
return
# 允许玩家继续落子
self.check_player_can_play()
def check_player_can_play(self):
if not self.game.get_valid_moves():
self.end_game("玩家无法落子,游戏结束!")
def check_timeout(self):
elapsed_time = time.time() - self.start_time if self.start_time else 0
return elapsed_time > 60
def end_game(self, message):
self.game_over = True # 设置游戏结束标志
black_score, white_score = self.game.get_score()
winner = "黑方胜利!" if black_score > white_score else "白方胜利!" if white_score > black_score else "平局!"
messagebox.showinfo("游戏结束", f"{message}\n黑棋数量: {black_score}\n白棋数量: {white_score}\n{winner}")
self.root.quit()
if __name__ == "__main__":
game = Reversi()
gui = GUI(game)
gui.root.mainloop()