Spaces:

Gomoku-Zero
/

Demo

Running

App Files Files Community

HuskyDoge commited on Nov 19, 2023

Commit

172a1e4

•

1 Parent(s): fc602f9

trial

Browse files

Files changed (33) hide show

.DS_Store +0 -0
Gomoku_MCTS/.DS_Store +0 -0
Gomoku_MCTS/__init__.py +142 -0
Gomoku_MCTS/__pycache__/__init__.cpython-310.pyc +0 -0
Gomoku_MCTS/__pycache__/dueling_net.cpython-310.pyc +0 -0
Gomoku_MCTS/__pycache__/game.cpython-310.pyc +0 -0
Gomoku_MCTS/__pycache__/mcts_alphaZero.cpython-310.pyc +0 -0
Gomoku_MCTS/__pycache__/mcts_pure.cpython-310.pyc +0 -0
Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth +3 -0
Gomoku_MCTS/config/config.yaml +10 -0
Gomoku_MCTS/config/options.py +74 -0
Gomoku_MCTS/config/utils.py +54 -0
Gomoku_MCTS/dueling_net.py +155 -0
Gomoku_MCTS/game.py +281 -0
Gomoku_MCTS/main_worker.py +334 -0
Gomoku_MCTS/mcts_alphaZero.py +250 -0
Gomoku_MCTS/mcts_pure.py +246 -0
Gomoku_MCTS/policy_value_net_pytorch.py +159 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183498.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183516.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183568.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183629.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183640.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183667.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183756.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183820.LAPTOP-5AN2UHOO +3 -0
Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700184097.LAPTOP-5AN2UHOO +3 -0
README.md +3 -3
app.py +56 -0
assets/favicon_circle.png +0 -0
const.py +58 -0
pages/Player_VS_AI.py +409 -0
requirements.txt +7 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Gomoku_MCTS/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Gomoku_MCTS/__init__.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from .mcts_pure import MCTSPlayer as MCTSpure
+from .mcts_alphaZero import MCTSPlayer as alphazero
+from .dueling_net import PolicyValueNet
+import numpy as np
+class Board(object):
+    """board for the game"""
+    def __init__(self, **kwargs):
+        self.last_move = None
+        self.availables = None
+        self.current_player = None
+        self.width = int(kwargs.get('width', 8))  # if no width, default 8
+        self.height = int(kwargs.get('height', 8))
+        self.board_map = np.zeros(shape=(self.width, self.height), dtype=int)
+        # board states stored as a dict,
+        # key: move as location on the board,
+        # value: player as pieces type
+        self.states = {}
+        # need how many pieces in a row to win
+        self.n_in_row = int(kwargs.get('n_in_row', 5))
+        self.players = kwargs.get('players', [1, 2])  # player1 and player2
+        self.init_board(0)
+    def init_board(self, start_player=0):
+        if self.width < self.n_in_row or self.height < self.n_in_row:
+            raise Exception('board width and height can not be '
+                            'less than {}'.format(self.n_in_row))
+        self.current_player = self.players[start_player]  # start player
+        # keep available moves in a list
+        self.availables = list(range(self.width * self.height))
+        self.states = {}
+        self.last_move = -1
+    def move_to_location(self, move: int):
+        """
+        3*3 board's moves like:
+        6 7 8
+        3 4 5
+        0 1 2
+        and move 5's location is (1,2)
+        """
+        h = move // self.width
+        w = move % self.width
+        return [h, w]
+    def location_to_move(self, location):
+        if len(location) != 2:
+            return -1
+        h = location[0]
+        w = location[1]
+        move = h * self.width + w
+        if move not in range(self.width * self.height):
+            return -1
+        return move
+    def current_state(self):
+        """
+        return the board state from the perspective of the current player.
+        state shape: 4*width*height
+        这个状态数组具有四个通道：
+        第一个通道表示当前玩家的棋子位置，第二个通道表示对手的棋子位置，第三个通道表示最后一步移动的位置。
+        第四个通道是一个指示符，用于表示当前轮到哪个玩家（如果棋盘上的总移动次数是偶数，那么这个通道的所有元素都为1，表示是第一个玩家的回合；否则，所有元素都为0，表示是第二个玩家的回合）。
+        每个通道都是一个 width x height 的二维数组，代表着棋盘的布局。对于第一个和第二个通道，如果一个位置上有当前玩家或对手的棋子，那么该位置的值为 1，否则为0。
+        对于第三个通道，只有最后一步移动的位置是1，其余位置都为0。对于第四个通道，如果是第一个玩家的回合，那么所有的位置都是1，否则都是0。
+        最后，状态数组在垂直方向上翻转，以匹配棋盘的实际布局。
+        """
+        square_state = np.zeros((4, self.width, self.height))
+        if self.states:
+            moves, players = np.array(list(zip(*self.states.items())))
+            move_curr = moves[players == self.current_player]
+            move_oppo = moves[players != self.current_player]
+            square_state[0][move_curr // self.width,
+                            move_curr % self.height] = 1.0
+            square_state[1][move_oppo // self.width,
+                            move_oppo % self.height] = 1.0
+            # indicate the last move location
+            square_state[2][self.last_move // self.width,
+                            self.last_move % self.height] = 1.0
+        if len(self.states) % 2 == 0:
+            square_state[3][:, :] = 1.0  # indicate the colour to play
+        return square_state[:, ::-1, :]
+    def do_move(self, move):
+        self.states[move] = self.current_player
+        # get (x,y) of this move
+        x, y = self.move_to_location(move)
+        self.board_map[x][y] = self.current_player
+        self.availables.remove(move)
+        self.current_player = (
+            self.players[0] if self.current_player == self.players[1]
+            else self.players[1]
+        )
+        self.last_move = move
+    def has_a_winner(self):
+        width = self.width
+        height = self.height
+        states = self.states
+        n = self.n_in_row
+        moved = list(set(range(width * height)) - set(self.availables))
+        if len(moved) < self.n_in_row * 2 - 1:
+            return False, -1
+        for m in moved:
+            h = m // width
+            w = m % width
+            player = states[m]
+            if (w in range(width - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n))) == 1):
+                return True, player
+            if (h in range(height - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n * width, width))) == 1):
+                return True, player
+            if (w in range(width - n + 1) and h in range(height - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n * (width + 1), width + 1))) == 1):
+                return True, player
+            if (w in range(n - 1, width) and h in range(height - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n * (width - 1), width - 1))) == 1):
+                return True, player
+        return False, -1
+    def game_end(self):
+        """Check whether the game is ended or not"""
+        win, winner = self.has_a_winner()
+        if win:
+            return True, winner
+        elif not len(self.availables):
+            return True, -1
+        return False, -1
+    def get_current_player(self):
+        return self.current_player

Gomoku_MCTS/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (5.41 kB). View file

Gomoku_MCTS/__pycache__/dueling_net.cpython-310.pyc ADDED Viewed

Binary file (4.71 kB). View file

Gomoku_MCTS/__pycache__/game.cpython-310.pyc ADDED Viewed

Binary file (8.97 kB). View file

Gomoku_MCTS/__pycache__/mcts_alphaZero.cpython-310.pyc ADDED Viewed

Binary file (8.05 kB). View file

Gomoku_MCTS/__pycache__/mcts_pure.cpython-310.pyc ADDED Viewed

Binary file (8.73 kB). View file

Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:878aace7c41962e0817fe8298a1f260b3b83e71c24d7d8c3558ccd6c4996d4f8
+size 481383

Gomoku_MCTS/config/config.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+# ckpt/logger options(dynamic)
+checkpoint_base: checkpoint
+visual_base: visualization
+log_base: log
+# dataset
+data_base: dataset

Gomoku_MCTS/config/options.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import argparse
+import yaml
+parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+# basic settings
+parser.add_argument('--seed', default=1234, type=int)
+parser.add_argument('--savepath', type=str, default="blip_uni_cross_mu", help='')
+# board settings
+parser.add_argument("--board_width", type=int,default=9)
+parser.add_argument("--board_height", type=int,default=9)
+parser.add_argument("--n_in_row", type=int,default=5,help="the condition of winning")
+# device settings
+parser.add_argument('--config', type=str, default='config/config.yaml', help='Path to the config file.')
+parser.add_argument('--gpu_num', type=int, default=1)
+parser.add_argument('--gpu_id', type=str, default='5')
+# save options
+parser.add_argument('--clear_visualizer', dest='clear_visualizer', action='store_true')
+parser.add_argument('--std_log', dest='std_log', action='store_true')
+# mode settings
+parser.add_argument("--split",type=str,default="train",help="the mode of woker")
+# train settings
+parser.add_argument("--expri",type=str, default="",help="the name of experiment")
+parser.add_argument("--learn_rate", type=float,default=2e-3)
+parser.add_argument("--l2_const",type=float,default=1e-4)
+# ???
+parser.add_argument("--lr_multiplier", type=float,default= 1.0 ,help="adaptively adjust the learning rate based on KL")
+parser.add_argument("--buffer_size",type=int,default=10000,help="The size of collection of game data ")
+parser.add_argument("--batch_size",type=int,default=512)
+parser.add_argument("--play_batch_size",type=int, default=1,help="The time of selfplaying when collect the data")
+parser.add_argument("--epochs",type=int,default=5,help="num of train_steps for each update")
+parser.add_argument("--kl_targ",type=float,default=0.02,help="the target kl distance between the old decision function and the new decision function ")
+parser.add_argument("--check_freq",type=int,default=50,help='the frequence of the checking the win ratio when training')
+parser.add_argument("--game_batch_num",type=int,default=1500,help =  "the total training times")
+# parser.add_argument("--l2_const",type=float,default=1e-4,help=" coef of l2 penalty")
+parser.add_argument("--distributed",type=bool,default=False)
+# preload_model setting
+parser.add_argument("--preload_model",type=str, default="")
+# Alphazero  agent setting
+parser.add_argument("--temp", type=float,default= 1.0 ,help="the temperature parameter when calculate the decision function getting the next action")
+parser.add_argument("--n_playout",type=int, default=200, help="num of simulations for each move ")
+parser.add_argument("--c_puct",type=int, default=5, help= "the balance parameter between exploration and exploitative ")
+# prue_mcts agent setting
+parser.add_argument("--pure_mcts_playout_num",type=int, default=200)
+# test settings
+parser.add_argument('--test_ckpt', type=str, default=None, help='ckpt absolute path')
+opts = parser.parse_args()
+# additional parameters
+current_path = os.path.abspath(__file__)
+grandfather_path = os.path.abspath(os.path.dirname(os.path.dirname(current_path)) + os.path.sep + ".")
+with open(os.path.join(grandfather_path, opts.config), 'r') as stream:
+    config = yaml.full_load(stream)

Gomoku_MCTS/config/utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os, shutil
+import torch
+from tensorboardX import SummaryWriter
+from config.options import *
+import torch.distributed as dist
+import time
+""" ==================== Save ======================== """
+def make_path():
+    return "{}_{}_bs{}_lr{}".format(opts.expri,opts.savepath,opts.batch_size,opts.learn_rate)
+def save_model(model,name):
+    save_path = make_path()
+    if not os.path.isdir(os.path.join(config['checkpoint_base'], save_path)):
+        os.makedirs(os.path.join(config['checkpoint_base'], save_path), exist_ok=True)
+    model_name = os.path.join(config['checkpoint_base'], save_path, name)
+    torch.save(model.state_dict(), model_name)
+""" ==================== Tools ======================== """
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+def makedir(path):
+    if not os.path.exists(path):
+        os.makedirs(path, 0o777)
+def visualizer():
+    if get_rank() == 0:
+        # filewriter_path = config['visual_base']+opts.savepath+'/'
+        save_path = make_path()
+        filewriter_path = os.path.join(config['visual_base'], save_path)
+        if opts.clear_visualizer and os.path.exists(filewriter_path):   # 删掉以前的summary，以免重合
+            shutil.rmtree(filewriter_path)
+        makedir(filewriter_path)
+        writer = SummaryWriter(filewriter_path, comment='visualizer')
+        return writer

Gomoku_MCTS/dueling_net.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+def set_learning_rate(optimizer, lr):
+    """Sets the learning rate to the given value"""
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+class DuelingDQNNet(nn.Module):
+    """Dueling DQN network module"""
+    def __init__(self, board_width, board_height):
+        super(DuelingDQNNet, self).__init__()
+        self.board_width = board_width
+        self.board_height = board_height
+        # common layers
+        self.conv1 = nn.Conv2d(4, 32, kernel_size=3, padding=1)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        # advantage layers
+        self.adv_conv1 = nn.Conv2d(128, 4, kernel_size=1)
+        self.adv_fc1 = nn.Linear(4*board_width*board_height,
+                                 board_width*board_height)
+        # value layers
+        self.val_conv1 = nn.Conv2d(128, 2, kernel_size=1)
+        self.val_fc1 = nn.Linear(2*board_width*board_height, 64)
+        self.val_fc2 = nn.Linear(64, 1)
+    def forward(self, state_input):
+        # common layers
+        x = F.relu(self.conv1(state_input))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        # advantage stream
+        adv = F.relu(self.adv_conv1(x))
+        adv = adv.view(-1, 4*self.board_width*self.board_height)
+        adv = self.adv_fc1(adv)
+        # value stream
+        val = F.relu(self.val_conv1(x))
+        val = val.view(-1, 2*self.board_width*self.board_height)
+        val = F.relu(self.val_fc1(val))
+        val = self.val_fc2(val)
+        q_values = val + adv - adv.mean(dim=1, keepdim=True)
+        return F.log_softmax(q_values, dim=1), val
+class PolicyValueNet():
+    """policy-value network """
+    def __init__(self, board_width, board_height,
+                 model_file=None, use_gpu=False):
+        self.use_gpu = use_gpu
+        self.board_width = board_width
+        self.board_height = board_height
+        self.l2_const = 1e-4  # coef of l2 penalty
+        # the policy value net module
+        if self.use_gpu:
+            self.policy_value_net = DuelingDQNNet(board_width, board_height).cuda()
+        else:
+            self.policy_value_net = DuelingDQNNet(board_width, board_height)
+        self.optimizer = optim.Adam(self.policy_value_net.parameters(),
+                                    weight_decay=self.l2_const)
+        if model_file:
+            net_params = torch.load(model_file)
+            self.policy_value_net.load_state_dict(net_params, strict=False)
+    def policy_value(self, state_batch):
+        """
+        input: a batch of states
+        output: a batch of action probabilities and state values
+        """
+        if self.use_gpu:
+            state_batch = Variable(torch.FloatTensor(state_batch).cuda())
+            log_act_probs, value = self.policy_value_net(state_batch)
+            act_probs = np.exp(log_act_probs.data.cpu().numpy())
+            return act_probs, value.data.cpu().numpy()
+        else:
+            state_batch = Variable(torch.FloatTensor(state_batch))
+            log_act_probs, value = self.policy_value_net(state_batch)
+            act_probs = np.exp(log_act_probs.data.numpy())
+            return act_probs, value.data.numpy()
+    def policy_value_fn(self, board):
+        """
+        input: board
+        output: a list of (action, probability) tuples for each available
+        action and the score of the board state
+        """
+        legal_positions = board.availables
+        current_state = np.ascontiguousarray(board.current_state().reshape(
+                -1, 4, self.board_width, self.board_height))
+        if self.use_gpu:
+            log_act_probs, value = self.policy_value_net(
+                    Variable(torch.from_numpy(current_state)).cuda().float())
+            act_probs = np.exp(log_act_probs.data.cpu().numpy().flatten())
+        else:
+            log_act_probs, value = self.policy_value_net(
+                    Variable(torch.from_numpy(current_state)).float())
+            act_probs = np.exp(log_act_probs.data.numpy().flatten())
+        act_probs = zip(legal_positions, act_probs[legal_positions])
+        value = value.data[0][0]
+        return act_probs, value
+    def train_step(self, state_batch, mcts_probs, winner_batch, lr):
+        """perform a training step"""
+        # self.use_gpu = True
+        # wrap in Variable
+        if self.use_gpu:
+            state_batch = Variable(torch.FloatTensor(state_batch).cuda())
+            mcts_probs = Variable(torch.FloatTensor(mcts_probs).cuda())
+            winner_batch = Variable(torch.FloatTensor(winner_batch).cuda())
+        else:
+            state_batch = Variable(torch.FloatTensor(state_batch))
+            mcts_probs = Variable(torch.FloatTensor(mcts_probs))
+            winner_batch = Variable(torch.FloatTensor(winner_batch))
+        # zero the parameter gradients
+        self.optimizer.zero_grad()
+        # set learning rate
+        set_learning_rate(self.optimizer, lr)
+        # forward
+        log_act_probs, value = self.policy_value_net(state_batch)
+        # define the loss = (z - v)^2 - pi^T * log(p) + c||theta||^2
+        # Note: the L2 penalty is incorporated in optimizer
+        value_loss = F.mse_loss(value.view(-1), winner_batch)
+        policy_loss = -torch.mean(torch.sum(mcts_probs*log_act_probs, 1))
+        loss = value_loss + policy_loss
+        # backward and optimize
+        loss.backward()
+        self.optimizer.step()
+        # calc policy entropy, for monitoring only
+        entropy = -torch.mean(
+                torch.sum(torch.exp(log_act_probs) * log_act_probs, 1)
+                )
+        # return loss.data[0], entropy.data[0]
+        #for pytorch version >= 0.5 please use the following line instead.
+        return loss.item(), entropy.item()
+    def get_policy_param(self):
+        net_params = self.policy_value_net.state_dict()
+        return net_params
+    def save_model(self, model_file):
+        """ save model params to file """
+        net_params = self.get_policy_param()  # get model params
+        torch.save(net_params, model_file)

Gomoku_MCTS/game.py ADDED Viewed

	@@ -0,0 +1,281 @@

+"""
+FileName: game.py
+Author: Jiaxin Li
+Create Date: yyyy/mm/dd
+Description: to be completed
+Edit History:
+- 2023/11/18, Sat,  Edited by Hbh (hbh001098hbh@sjtu.edu.cn)
+    - added some comments and optimize import and some structures
+"""
+import numpy as np
+from mcts_pure import MCTSPlayer as MCTS_Pure
+from mcts_pure import Human_Player
+from collections import defaultdict
+from typing import Optional
+class Board(object):
+    """board for the game"""
+    def __init__(self, **kwargs):
+        self.last_move = None
+        self.availables = None
+        self.current_player = None
+        self.width = int(kwargs.get('width', 8))  # if no width, default 8
+        self.height = int(kwargs.get('height', 8))
+        # board states stored as a dict,
+        # key: move as location on the board,
+        # value: player as pieces type
+        self.states = {}
+        # need how many pieces in a row to win
+        self.n_in_row = int(kwargs.get('n_in_row', 5))
+        self.players = [1, 2]  # player1 and player2
+    def init_board(self, start_player=0):
+        if self.width < self.n_in_row or self.height < self.n_in_row:
+            raise Exception('board width and height can not be '
+                            'less than {}'.format(self.n_in_row))
+        self.current_player = self.players[start_player]  # start player
+        # keep available moves in a list
+        self.availables = list(range(self.width * self.height))
+        self.states = {}
+        self.last_move = -1
+    def move_to_location(self, move: int):
+        """
+        3*3 board's moves like:
+        6 7 8
+        3 4 5
+        0 1 2
+        and move 5's location is (1,2)
+        """
+        h = move // self.width
+        w = move % self.width
+        return [h, w]
+    def location_to_move(self, location):
+        if len(location) != 2:
+            return -1
+        h = location[0]
+        w = location[1]
+        move = h * self.width + w
+        if move not in range(self.width * self.height):
+            return -1
+        return move
+    def current_state(self):
+        """
+        return the board state from the perspective of the current player.
+        state shape: 4*width*height
+        这个状态数组具有四个通道：
+        第一个通道表示当前玩家的棋子位置，第二个通道表示对手的棋子位置，第三个通道表示最后一步移动的位置。
+        第四个通道是一个指示符，用于表示当前轮到哪个玩家（如果棋盘上的总移动次数是偶数，那么这个通道的所有元素都为1，表示是第一个玩家的回合；否则，所有元素都为0，表示是第二个玩家的回合）。
+        每个通道都是一个 width x height 的二维数组，代表着棋盘的布局。对于第一个和第二个通道，如果一个位置上有当前玩家或对手的棋子，那么该位置的值为 1，否则为0。
+        对于第三个通道，只有最后一步移动的位置是1，其余位置都为0。对于第四个通道，如果是第一个玩家的回合，那么所有的位置都是1，否则都是0。
+        最后，状态数组在垂直方向上翻转，以匹配棋盘的实际布局。
+        """
+        square_state = np.zeros((4, self.width, self.height))
+        if self.states:
+            moves, players = np.array(list(zip(*self.states.items())))
+            move_curr = moves[players == self.current_player]
+            move_oppo = moves[players != self.current_player]
+            square_state[0][move_curr // self.width,
+                            move_curr % self.height] = 1.0
+            square_state[1][move_oppo // self.width,
+                            move_oppo % self.height] = 1.0
+            # indicate the last move location
+            square_state[2][self.last_move // self.width,
+                            self.last_move % self.height] = 1.0
+        if len(self.states) % 2 == 0:
+            square_state[3][:, :] = 1.0  # indicate the colour to play
+        return square_state[:, ::-1, :]
+    def do_move(self, move):
+        self.states[move] = self.current_player
+        self.availables.remove(move)
+        self.current_player = (
+            self.players[0] if self.current_player == self.players[1]
+            else self.players[1]
+        )
+        self.last_move = move
+    def has_a_winner(self):
+        width = self.width
+        height = self.height
+        states = self.states
+        n = self.n_in_row
+        moved = list(set(range(width * height)) - set(self.availables))
+        if len(moved) < self.n_in_row * 2 - 1:
+            return False, -1
+        for m in moved:
+            h = m // width
+            w = m % width
+            player = states[m]
+            if (w in range(width - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n))) == 1):
+                return True, player
+            if (h in range(height - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n * width, width))) == 1):
+                return True, player
+            if (w in range(width - n + 1) and h in range(height - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n * (width + 1), width + 1))) == 1):
+                return True, player
+            if (w in range(n - 1, width) and h in range(height - n + 1) and
+                    len(set(states.get(i, -1) for i in range(m, m + n * (width - 1), width - 1))) == 1):
+                return True, player
+        return False, -1
+    def game_end(self):
+        """Check whether the game is ended or not"""
+        win, winner = self.has_a_winner()
+        if win:
+            return True, winner
+        elif not len(self.availables):
+            return True, -1
+        return False, -1
+    def get_current_player(self):
+        return self.current_player
+class Game(object):
+    """game server"""
+    def __init__(self, board, **kwargs):
+        self.board = board
+        self.pure_mcts_playout_num = 100  # simulation time
+    def graphic(self, board, player1, player2):
+        """Draw the board and show game info"""
+        width = board.width
+        height = board.height
+        print("Player", player1, "with X".rjust(3))
+        print("Player", player2, "with O".rjust(3))
+        print()
+        for x in range(width):
+            print("{0:8}".format(x), end='')
+        print('\r\n')
+        for i in range(height - 1, -1, -1):
+            print("{0:4d}".format(i), end='')
+            for j in range(width):
+                loc = i * width + j
+                p = board.states.get(loc, -1)
+                if p == player1:
+                    print('X'.center(8), end='')
+                elif p == player2:
+                    print('O'.center(8), end='')
+                else:
+                    print('_'.center(8), end='')
+            print('\r\n\r\n')
+    def start_play(self, player1, player2, start_player=0, is_shown=1):
+        """start a game between two players"""
+        if start_player not in (0, 1):
+            raise Exception('start_player should be either 0 (player1 first) '
+                            'or 1 (player2 f1irst)')
+        self.board.init_board(start_player)
+        p1, p2 = self.board.players
+        player1.set_player_ind(p1)
+        player2.set_player_ind(p2)
+        players = {p1: player1, p2: player2}
+        if is_shown:
+            self.graphic(self.board, player1.player, player2.player)
+        while True:
+            current_player = self.board.get_current_player()
+            player_in_turn = players[current_player]
+            move = player_in_turn.get_action(self.board)
+            self.board.do_move(move)
+            if is_shown:
+                self.graphic(self.board, player1.player, player2.player)
+            end, winner = self.board.game_end()
+            if end:
+                if is_shown:
+                    if winner != -1:
+                        print("Game end. Winner is", players[winner])
+                    else:
+                        print("Game end. Tie")
+                return winner
+    def start_self_play(self, player, is_shown=0, temp=1e-3):
+        """
+        start a self-play game using a MCTS player, reuse the search tree,
+        and store the self-play data: (state, mcts_probs, z) for training
+        """
+        self.board.init_board()
+        p1, p2 = self.board.players
+        states, mcts_probs, current_players = [], [], []
+        while True:
+            move, move_probs = player.get_action(self.board,
+                                                 temp=temp,
+                                                 return_prob=1)
+            # store the data
+            states.append(self.board.current_state())
+            mcts_probs.append(move_probs)
+            current_players.append(self.board.current_player)
+            # perform a move
+            self.board.do_move(move)
+            if is_shown:
+                self.graphic(self.board, p1, p2)
+            end, winner = self.board.game_end()
+            if end:
+                # winner from the perspective of the current player of each state
+                winners_z = np.zeros(len(current_players))
+                if winner != -1:
+                    winners_z[np.array(current_players) == winner] = 1.0
+                    winners_z[np.array(current_players) != winner] = -1.0
+                # reset MCTS root node
+                player.reset_player()
+                if is_shown:
+                    if winner != -1:
+                        print("Game end. Winner is player:", winner)
+                    else:
+                        print("Game end. Tie")
+                return winner, zip(states, mcts_probs, winners_z)
+    # 多了下面这一串测试代码
+    def policy_evaluate(self, n_games=10):
+        """
+        Evaluate the trained policy by playing against the pure MCTS player
+        Note: this is only for monitoring the progress of training
+        """
+        current_mcts_player = MCTS_Pure(c_puct=5,
+                                        n_playout=self.pure_mcts_playout_num)
+        # pure_mcts_player = MCTS_Pure(c_puct=5,
+        #                              n_playout=self.pure_mcts_playout_num)
+        pure_mcts_player = Human_Player()
+        win_cnt = defaultdict(int)
+        for i in range(n_games):
+            winner = self.start_play(current_mcts_player,
+                                     pure_mcts_player,
+                                     start_player=i % 2,
+                                     is_shown=1)
+            win_cnt[winner] += 1
+        win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games
+        print("num_playouts:{}, win: {}, lose: {}, tie:{}".format(
+            self.pure_mcts_playout_num,
+            win_cnt[1], win_cnt[2], win_cnt[-1]))
+        return win_ratio
+if __name__ == '__main__':
+    board_width = 8
+    board_height = 8
+    n_in_row = 5
+    board = Board(width=board_width,
+                  height=board_height,
+                  n_in_row=n_in_row)
+    task = Game(board)
+    task.policy_evaluate(n_games=10)

Gomoku_MCTS/main_worker.py ADDED Viewed

	@@ -0,0 +1,334 @@

+from __future__ import print_function
+import random
+import numpy as np
+from collections import defaultdict, deque
+from game import Board, Game
+from mcts_pure import MCTSPlayer as MCTS_Pure
+from mcts_alphaZero import MCTSPlayer
+import torch.optim as optim
+# from policy_value_net import PolicyValueNet  # Theano and Lasagne
+# from policy_value_net_pytorch import PolicyValueNet  # Pytorch
+from dueling_net import PolicyValueNet
+# from policy_value_net_tensorflow import PolicyValueNet # Tensorflow
+# from policy_value_net_keras import PolicyValueNet # Keras
+# import joblib
+from torch.autograd import Variable
+import torch.nn.functional as F
+from config.options import *
+import sys
+from config.utils  import *
+from torch.backends import cudnn
+import torch
+from tqdm import *
+from torch.utils.tensorboard import SummaryWriter
+from multiprocessing import Pool
+def set_learning_rate(optimizer, lr):
+    """Sets the learning rate to the given value"""
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def std_log():
+    if get_rank() == 0:
+        save_path = make_path()
+        makedir(config['log_base'])
+        sys.stdout = open(os.path.join(config['log_base'], "{}.txt".format(save_path)), "w")
+def init_seeds(seed, cuda_deterministic=True):
+    torch.manual_seed(seed)
+    if cuda_deterministic:  # slower, more reproducible
+       cudnn.deterministic = True
+       cudnn.benchmark = False
+    else:  # faster, less reproducible
+       cudnn.deterministic = False
+       cudnn.benchmark = True
+class MainWorker():
+    def __init__(self,device):
+        #--- init the set of pipeline -------
+        self.board_width = opts.board_width
+        self.board_height = opts.board_height
+        self.n_in_row = opts.n_in_row
+        self.learn_rate = opts.learn_rate
+        self.lr_multiplier = opts.lr_multiplier
+        self.temp = opts.temp
+        self.n_playout = opts.n_playout
+        self.c_puct = opts.c_puct
+        self.buffer_size = opts.buffer_size
+        self.batch_size = opts.batch_size
+        self.play_batch_size = opts.play_batch_size
+        self.epochs = opts.epochs
+        self.kl_targ = opts.kl_targ
+        self.check_freq = opts.check_freq
+        self.game_batch_num = opts.game_batch_num
+        self.pure_mcts_playout_num = opts.pure_mcts_playout_num
+        self.device = device
+        self.use_gpu = torch.device("cuda") ==  self.device
+        self.board = Board(width=self.board_width,
+                           height=self.board_height,
+                           n_in_row=self.n_in_row)
+        self.game = Game(self.board)
+        # The data collection of the history of games
+        self.data_buffer = deque(maxlen=self.buffer_size)
+        # The best win ratio of the training agent
+        self.best_win_ratio = 0.0
+        if opts.preload_model:
+            # start training from an initial policy-value net
+            self.policy_value_net = PolicyValueNet(self.board_width,
+                                                   self.board_height,
+                                                   model_file=opts.preload_model,
+                                                   use_gpu=(self.device == "cuda"))
+        else:
+            # start training from a new policy-value net
+            self.policy_value_net = PolicyValueNet(self.board_width,
+                                                   self.board_height,
+                                                   use_gpu=(self.device == "cuda"))
+        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
+                                      c_puct=self.c_puct,
+                                      n_playout=self.n_playout,
+                                      is_selfplay=1)
+        # The set of optimizer
+        self.optimizer = optim.Adam(self.policy_value_net.policy_value_net.parameters(),
+                                            weight_decay=opts.l2_const)
+        # set learning rate
+        set_learning_rate(self.optimizer, self.learn_rate*self.lr_multiplier)
+    def get_equi_data(self, play_data):
+        """augment the data set by rotation and flipping
+        play_data: [(state, mcts_prob, winner_z), ..., ...]
+        """
+        extend_data = []
+        for state, mcts_porb, winner in play_data:
+            for i in [1, 2, 3, 4]:
+                # rotate counterclockwise
+                equi_state = np.array([np.rot90(s, i) for s in state])
+                equi_mcts_prob = np.rot90(np.flipud(
+                    mcts_porb.reshape(self.board_height, self.board_width)), i)
+                extend_data.append((equi_state,
+                                    np.flipud(equi_mcts_prob).flatten(),
+                                    winner))
+                # flip horizontally
+                equi_state = np.array([np.fliplr(s) for s in equi_state])
+                equi_mcts_prob = np.fliplr(equi_mcts_prob)
+                extend_data.append((equi_state,
+                                    np.flipud(equi_mcts_prob).flatten(),
+                                    winner))
+        return extend_data
+    def job(self, i):
+        game = self.game
+        player = self.mcts_player
+        winner, play_data = game.start_self_play(player,
+                                                    temp=self.temp)
+        play_data = list(play_data)[:]
+        play_data = self.get_equi_data(play_data)
+        return play_data
+    def collect_selfplay_data(self, n_games=1):
+        """collect self-play data for training"""
+        # print("[STAGE] Collecting self-play data for training")
+        # collection_bar = tqdm( range(n_games))
+        collection_bar = range(n_games)
+        with Pool(4) as p:
+            play_data = p.map(self.job, collection_bar, chunksize=1)
+        self.data_buffer.extend(play_data)
+        # print('\n', 'data buffer size:', len(self.data_buffer))
+    def policy_update(self):
+        """update the policy-value net"""
+        mini_batch = random.sample(self.data_buffer, self.batch_size)
+        state_batch = [data[0] for data in mini_batch]
+        mcts_probs_batch = [data[1] for data in mini_batch]
+        winner_batch = [data[2] for data in mini_batch]
+        old_probs, old_v = self.policy_value_net.policy_value(state_batch)
+        epoch_bar = tqdm(range(self.epochs))
+        for i in epoch_bar:
+            """perform a training step"""
+            # wrap in Variable
+            if self.use_gpu:
+                state_batch = Variable(torch.FloatTensor(state_batch).cuda())
+                mcts_probs = Variable(torch.FloatTensor(mcts_probs_batch).cuda())
+                winner_batch = Variable(torch.FloatTensor(winner_batch).cuda())
+            else:
+                state_batch = Variable(torch.FloatTensor(state_batch))
+                mcts_probs = Variable(torch.FloatTensor(mcts_probs_batch))
+                winner_batch = Variable(torch.FloatTensor(winner_batch))
+            # zero the parameter gradients
+            self.optimizer.zero_grad()
+            # forward
+            log_act_probs, value = self.policy_value_net.policy_value_net(state_batch)
+            # define the loss = (z - v)^2 - pi^T * log(p) + c||theta||^2
+            # Note: the L2 penalty is incorporated in optimizer
+            value_loss = F.mse_loss(value.view(-1), winner_batch)
+            policy_loss = -torch.mean(torch.sum(mcts_probs*log_act_probs, 1))
+            loss = value_loss + policy_loss
+            # backward and optimize
+            loss.backward()
+            self.optimizer.step()
+            # calc policy entropy, for monitoring only
+            entropy = -torch.mean(
+                    torch.sum(torch.exp(log_act_probs) * log_act_probs, 1)
+                    )
+            loss = loss.item()
+            entropy = entropy.item()
+            new_probs, new_v = self.policy_value_net.policy_value(state_batch)
+            kl = np.mean(np.sum(old_probs * (
+                    np.log(old_probs + 1e-10) - np.log(new_probs + 1e-10)),
+                    axis=1)
+            )
+            if kl > self.kl_targ * 4:  # early stopping if D_KL diverges badly
+                break
+            epoch_bar.set_description(f"training epoch {i}")
+            epoch_bar.set_postfix( new_v =new_v, kl = kl)
+        # adaptively adjust the learning rate
+        if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1:
+            self.lr_multiplier /= 1.5
+        elif kl < self.kl_targ / 2 and self.lr_multiplier < 10:
+            self.lr_multiplier *= 1.5
+        explained_var_old = (1 -
+                             np.var(np.array(winner_batch) - old_v.flatten()) /
+                             np.var(np.array(winner_batch)))
+        explained_var_new = (1 -
+                             np.var(np.array(winner_batch) - new_v.flatten()) /
+                             np.var(np.array(winner_batch)))
+        return   kl,  loss, entropy,explained_var_old, explained_var_new
+    def policy_evaluate(self, n_games=10):
+        """
+        Evaluate the trained policy by playing against the pure MCTS player
+        Note: this is only for monitoring the progress of training
+        """
+        current_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
+                                         c_puct=self.c_puct,
+                                         n_playout=self.n_playout)
+        pure_mcts_player = MCTS_Pure(c_puct=5,
+                                     n_playout=self.pure_mcts_playout_num)
+        win_cnt = defaultdict(int)
+        for i in range(n_games):
+            winner = self.game.start_play(
+                                          pure_mcts_player,current_mcts_player,
+                                          start_player=i % 2,
+                                          is_shown=0)
+            win_cnt[winner] += 1
+            print(f" {i}_th winner:" , winner)
+        win_ratio = 1.0*(win_cnt[1] + 0.5*win_cnt[-1]) / n_games
+        print("num_playouts:{}, win: {}, lose: {}, tie:{}".format(
+                self.pure_mcts_playout_num,
+                win_cnt[1], win_cnt[2], win_cnt[-1]))
+        return win_ratio
+    def run(self):
+        """run the training pipeline"""
+        try:
+            batch_bar = tqdm(range(self.game_batch_num))
+            for i in batch_bar:
+                self.collect_selfplay_data(self.play_batch_size)
+                if len(self.data_buffer) > self.batch_size:
+                    kl,  loss, entropy,explained_var_old, explained_var_new = self.policy_update()
+                    writer.add_scalar("policy_update/kl", kl ,i )
+                    writer.add_scalar("policy_update/loss", loss ,i)
+                    writer.add_scalar("policy_update/entropy", entropy ,i)
+                    writer.add_scalar("policy_update/explained_var_old", explained_var_old,i)
+                    writer.add_scalar("policy_update/explained_var_new ", explained_var_new ,i)
+                batch_bar.set_description(f"game batch num {i}")
+                # check the performance of the current model,
+                # and save the model params
+                if (i+1) % self.check_freq == 0:
+                    win_ratio = self.policy_evaluate()
+                    batch_bar.set_description(f"game batch num {i+1}")
+                    writer.add_scalar("evaluate/explained_var_new ", win_ratio ,i)
+                    batch_bar.set_postfix(loss= loss, entropy= entropy,win_ratio =win_ratio)
+                    save_model(self.policy_value_net,"current_policy.model")
+                    if win_ratio > self.best_win_ratio:
+                        print("New best policy!!!!!!!!")
+                        self.best_win_ratio = win_ratio
+                        # update the best_policy
+                        save_model(self.policy_value_net,"best_policy.model")
+                        if (self.best_win_ratio == 1.0 and
+                                self.pure_mcts_playout_num < 5000):
+                            self.pure_mcts_playout_num += 1000
+                            self.best_win_ratio = 0.0
+        except KeyboardInterrupt:
+            print('\n\rquit')
+if __name__ == "__main__":
+    print("START train....")
+    # ------init set-----------
+    if opts.std_log:
+        std_log()
+    writer = visualizer()
+    if opts.distributed:
+        torch.distributed.init_process_group(backend="nccl")
+        local_rank = torch.distributed.get_rank()
+        torch.cuda.set_device(local_rank)
+        device = torch.device("cuda", local_rank)
+        init_seeds(opts.seed + local_rank)
+    else:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        init_seeds(opts.seed)
+    print("seed: ",opts.seed )
+    print("device:" , device)
+    if opts.split == "train":
+        training_pipeline = MainWorker(device)
+        training_pipeline.run()
+    if get_rank() == 0 and opts.split == "test":
+        training_pipeline = MainWorker(device)
+        training_pipeline.policy_value_net()

Gomoku_MCTS/mcts_alphaZero.py ADDED Viewed

	@@ -0,0 +1,250 @@

+# -*- coding: utf-8 -*-
+"""
+Monte Carlo Tree Search in AlphaGo Zero style, which uses a policy-value
+network to guide the tree search and evaluate the leaf nodes
+@author: Junxiao Song
+"""
+import numpy as np
+import copy
+import time
+from concurrent.futures import ThreadPoolExecutor
+import threading
+def softmax(x):
+    probs = np.exp(x - np.max(x))
+    probs /= np.sum(probs)
+    return probs
+class TreeNode(object):
+    """A node in the MCTS tree.
+    Each node keeps track of its own value Q, prior probability P, and
+    its visit-count-adjusted prior score u.
+    """
+    def __init__(self, parent, prior_p):
+        self._parent = parent
+        self._children = {}  # a map from action to TreeNode
+        self._n_visits = 0
+        self._Q = 0
+        self._u = 0
+        self._P = prior_p
+    def expand(self, action_priors):
+        """Expand tree by creating new children.
+        action_priors: a list of tuples of actions and their prior probability
+            according to the policy function.
+        """
+        for action, prob in action_priors:
+            if action not in self._children:
+                self._children[action] = TreeNode(self, prob)
+    def select(self, c_puct):
+        """Select action among children that gives maximum action value Q
+        plus bonus u(P).
+        Return: A tuple of (action, next_node)
+        """
+        return max(self._children.items(),
+                   key=lambda act_node: act_node[1].get_value(c_puct))
+    def update(self, leaf_value):
+        """Update node values from leaf evaluation.
+        leaf_value: the value of subtree evaluation from the current player's
+            perspective.
+        """
+        # Count visit.
+        self._n_visits += 1
+        # Update Q, a running average of values for all visits.
+        self._Q += 1.0*(leaf_value - self._Q) / self._n_visits
+    def update_recursive(self, leaf_value):
+        """Like a call to update(), but applied recursively for all ancestors.
+        """
+        # If it is not root, this node's parent should be updated first.
+        if self._parent:
+            self._parent.update_recursive(-leaf_value)
+        self.update(leaf_value)
+    def get_value(self, c_puct):
+        """Calculate and return the value for this node.
+        It is a combination of leaf evaluations Q, and this node's prior
+        adjusted for its visit count, u.
+        c_puct: a number in (0, inf) controlling the relative impact of
+            value Q, and prior probability P, on this node's score.
+        """
+        self._u = (c_puct * self._P *
+                   np.sqrt(self._parent._n_visits) / (1 + self._n_visits))
+        return self._Q + self._u
+    def is_leaf(self):
+        """Check if leaf node (i.e. no nodes below this have been expanded)."""
+        return self._children == {}
+    def is_root(self):
+        return self._parent is None
+class MCTS(object):
+    """An implementation of Monte Carlo Tree Search."""
+    def __init__(self, policy_value_fn, c_puct=5, n_playout=10000):
+        """
+        policy_value_fn: a function that takes in a board state and outputs
+            a list of (action, probability) tuples and also a score in [-1, 1]
+            (i.e. the expected value of the end game score from the current
+            player's perspective) for the current player.
+        c_puct: a number in (0, inf) that controls how quickly exploration
+            converges to the maximum-value policy. A higher value means
+            relying on the prior more.
+        """
+        self._root = TreeNode(None, 1.0)
+        self._policy = policy_value_fn
+        self._c_puct = c_puct
+        self._n_playout = n_playout
+    def _playout(self, state, lock=None):
+        """Run a single playout from the root to the leaf, getting a value at
+        the leaf and propagating it back through its parents.
+        State is modified in-place, so a copy must be provided.
+        """
+        node = self._root
+        if lock is not None:
+            lock.acquire()
+        while(1):
+            if node.is_leaf():
+                break
+            # Greedily select next move.
+            action, node = node.select(self._c_puct)
+            state.do_move(action)
+        if lock is not None:
+            lock.release()
+        # Evaluate the leaf using a network which outputs a list of
+        # (action, probability) tuples p and also a score v in [-1, 1]
+        # for the current player.
+        action_probs, leaf_value = self._policy(state)
+        # Check for end of game.
+        end, winner = state.game_end()
+        if lock is not None:
+            lock.acquire()
+        if not end:
+            node.expand(action_probs)
+        else:
+            # for end state，return the "true" leaf_value
+            if winner == -1:  # tie
+                leaf_value = 0.0
+            else:
+                leaf_value = (
+                    1.0 if winner == state.get_current_player() else -1.0
+                )
+        # Update value and visit count of nodes in this traversal.
+        node.update_recursive(-leaf_value)
+        if lock is not None:
+            lock.release()
+    def get_move_probs(self, state, temp=1e-3):
+        """Run all playouts sequentially and return the available actions and
+        their corresponding probabilities.
+        state: the current game state
+        temp: temperature parameter in (0, 1] controls the level of exploration
+        """
+        start_time_averge = 0
+        ### test multi-thread
+        lock = threading.Lock()
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            for n in range(self._n_playout):
+                start_time = time.time()
+                state_copy = copy.deepcopy(state)
+                executor.submit(self._playout, state_copy, lock)
+                start_time_averge += (time.time() - start_time)
+        ### end test multi-thread
+        # t = time.time()
+        # for n in range(self._n_playout):
+        #     start_time = time.time()
+        #     state_copy = copy.deepcopy(state)
+        #     self._playout(state_copy)
+        #     start_time_averge += (time.time() - start_time)
+        # print('!!time!!:', time.time() - t)
+        # print(f" My MCTS sum_time: {start_time_averge }, total_simulation: {self._n_playout}")
+        # calc the move probabilities based on visit counts at the root node
+        act_visits = [(act, node._n_visits)
+                      for act, node in self._root._children.items()]
+        acts, visits = zip(*act_visits)
+        act_probs = softmax(1.0/temp * np.log(np.array(visits) + 1e-10))
+        return acts, act_probs
+    def update_with_move(self, last_move):
+        """Step forward in the tree, keeping everything we already know
+        about the subtree.
+        """
+        if last_move in self._root._children:
+            self._root = self._root._children[last_move]
+            self._root._parent = None
+        else:
+            self._root = TreeNode(None, 1.0)
+    def __str__(self):
+        return "MCTS"
+class MCTSPlayer(object):
+    """AI player based on MCTS"""
+    def __init__(self, policy_value_function,
+                 c_puct=5, n_playout=2000, is_selfplay=0):
+        self.mcts = MCTS(policy_value_function, c_puct, n_playout)
+        self._is_selfplay = is_selfplay
+    def set_player_ind(self, p):
+        self.player = p
+    def reset_player(self):
+        self.mcts.update_with_move(-1)
+    def get_action(self, board, temp=1e-3, return_prob=0):
+        sensible_moves = board.availables
+        # the pi vector returned by MCTS as in the alphaGo Zero paper
+        move_probs = np.zeros(board.width*board.height)
+        if len(sensible_moves) > 0:
+            acts, probs = self.mcts.get_move_probs(board, temp)
+            move_probs[list(acts)] = probs
+            if self._is_selfplay:
+                # add Dirichlet Noise for exploration (needed for
+                # self-play training)
+                move = np.random.choice(
+                    acts,
+                    p=0.75*probs + 0.25*np.random.dirichlet(0.3*np.ones(len(probs)))
+                )
+                # update the root node and reuse the search tree
+                self.mcts.update_with_move(move)
+            else:
+                # with the default temp=1e-3, it is almost equivalent
+                # to choosing the move with the highest prob
+                move = np.random.choice(acts, p=probs)
+                # reset the root node
+                self.mcts.update_with_move(-1)
+#                location = board.move_to_location(move)
+#                print("AI move: %d,%d\n" % (location[0], location[1]))
+            if return_prob:
+                return move, move_probs
+            else:
+                return move
+        else:
+            print("WARNING: the board is full")
+    def __str__(self):
+        return "MCTS {}".format(self.player)

Gomoku_MCTS/mcts_pure.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# -*- coding: utf-8 -*-
+import numpy as np
+import copy
+from operator import itemgetter
+import time
+def rollout_policy_fn(board):
+    """a coarse, fast version of policy_fn used in the rollout phase."""
+    # rollout randomly
+    action_probs = np.random.rand(len(board.availables))
+    return zip(board.availables, action_probs)
+# 决策价值函数
+def policy_value_fn(board):
+    """a function that takes in a state and outputs a list of (action, probability)
+    tuples and a score for the state"""
+    # return uniform probabilities and 0 score for pure MCTS
+    action_probs = np.ones(len(board.availables))/len(board.availables)
+    return zip(board.availables, action_probs), 0
+class TreeNode(object):
+    """A node in the MCTS tree. Each node keeps track of its own value Q,
+    prior probability P, and its visit-count-adjusted prior score u.
+    """
+    def __init__(self, parent, prior_p):
+        self._parent = parent
+        self._children = {}  # a map from action to TreeNode
+        self._n_visits = 0
+        self._Q = 0
+        self._u = 0
+        self._P = prior_p
+    def expand(self, action_priors):
+        """Expand tree by creating new children.
+        action_priors: a list of tuples of actions and their prior probability
+            according to the policy function.
+        """
+        for action, prob in action_priors:
+            if action not in self._children:
+                self._children[action] = TreeNode(self, prob)
+    def select(self, c_puct):
+        """Select action among children that gives maximum action value Q
+        plus bonus u(P).
+        Return: A tuple of (action, next_node)
+        """
+        return max(self._children.items(),
+                   key=lambda act_node: act_node[1].get_value(c_puct))
+    def update(self, leaf_value):
+        """Update node values from leaf evaluation.
+        leaf_value: the value of subtree evaluation from the current player's
+            perspective.
+        """
+        # Count visit.
+        self._n_visits += 1
+        # Update Q, a running average of values for all visits.
+        # print("=====================================")
+        # print("Before, Q: {}, visits: {}, leaf_value: {}".format(self._Q, self._n_visits,leaf_value))
+        self._Q += 1.0*(leaf_value - self._Q) / self._n_visits
+        # print("After, Q: {}, visits: {}, leaf_value: {}".format(self._Q, self._n_visits,leaf_value))
+    def update_recursive(self, leaf_value):
+        """Like a call to update(), but applied recursively for all ancestors.
+        """
+        # If it is not root, this node's parent should be updated first.
+        if self._parent:
+            self._parent.update_recursive(-leaf_value)
+        self.update(leaf_value)
+    def get_value(self, c_puct):
+        """Calculate and return the value for this node.
+        It is a combination of leaf evaluations Q, and this node's prior
+        adjusted for its visit count, u.
+        c_puct: a number in (0, inf) controlling the relative impact of
+            value Q, and prior probability P, on this node's score.
+        """
+        self._u = (c_puct * self._P *
+                   np.sqrt(self._parent._n_visits) / (1 + self._n_visits))
+        return self._Q + self._u
+    def is_leaf(self):
+        """Check if leaf node (i.e. no nodes below this have been expanded).
+        """
+        return self._children == {}
+    def is_root(self):
+        return self._parent is None
+class MCTS(object):
+    """A simple implementation of Monte Carlo Tree Search."""
+    def __init__(self, policy_value_fn, c_puct=5, n_playout=2000):
+        """
+        policy_value_fn: a function that takes in a board state and outputs
+            a list of (action, probability) tuples and also a score in [-1, 1]
+            (i.e. the expected value of the end game score from the current
+            player's perspective) for the current player.
+        c_puct: a number in (0, inf) that controls how quickly exploration
+            converges to the maximum-value policy. A higher value means
+            relying on the prior more. ???
+        """
+        self._root = TreeNode(None, 1.0)
+        self._policy = policy_value_fn
+        self._c_puct = c_puct
+        self._n_playout = n_playout
+    def _playout(self, state):
+        """Run a single playout from the root to the leaf, getting a value at
+        the leaf and propagating it back through its parents.
+        State is modified in-place, so a copy must be provided.
+        """
+        node = self._root
+        while(1):
+            if node.is_leaf():
+                break
+            # Greedily select next move.
+            action, node = node.select(self._c_puct)
+            state.do_move(action)
+        action_probs, _ = self._policy(state)
+        # Check for end of game
+        end, winner = state.game_end()
+        if not end:
+            node.expand(action_probs)
+        # Evaluate the leaf node by random rollout
+        leaf_value = self._evaluate_rollout(state)
+        # Update value and visit count of nodes in this traversal.
+        node.update_recursive(-leaf_value)
+    def _evaluate_rollout(self, state, limit=1000):
+        """Use the rollout policy to play until the end of the game,
+        returning +1 if the current player wins, -1 if the opponent wins,
+        and 0 if it is a tie.
+        """
+        player = state.get_current_player()
+        for i in range(limit):
+            end, winner = state.game_end()
+            if end:
+                break
+            action_probs = rollout_policy_fn(state)
+            max_action = max(action_probs, key=itemgetter(1))[0]
+            state.do_move(max_action)
+        else:
+            # If no break from the loop, issue a warning.
+            print("WARNING: rollout reached move limit")
+        if winner == -1:  # tie
+            return 0
+        else:
+            return 1 if winner == player else -1
+    def get_move(self, state):
+        """Runs all playouts sequentially and returns the most visited action.
+        state: the current game state
+        Return: the selected action
+        """
+        start_time = time.time()
+        # n_playout 探索的次数
+        for n in range(self._n_playout):
+            state_copy = copy.deepcopy(state)
+            self._playout(state_copy)
+        need_time = time.time() - start_time
+        print(f" PureMCTS sum_time: {need_time / self._n_playout }, total_simulation: {self._n_playout}")
+        return max(self._root._children.items(),key=lambda act_node: act_node[1]._n_visits)[0], need_time / self._n_playout
+    def update_with_move(self, last_move):
+        """Step forward in the tree, keeping everything we already know
+        about the subtree.
+        """
+        if last_move in self._root._children:
+            self._root = self._root._children[last_move]
+            self._root._parent = None
+        else:
+            self._root = TreeNode(None, 1.0)
+    def __str__(self):
+        return "MCTS"
+class MCTSPlayer(object):
+    """AI player based on MCTS"""
+    def __init__(self, c_puct=5, n_playout=2000):
+        self.mcts = MCTS(policy_value_fn, c_puct, n_playout)
+    def set_player_ind(self, p):
+        self.player = p
+    def reset_player(self):
+        self.mcts.update_with_move(-1)
+    def get_action(self, board):
+        sensible_moves = board.availables
+        if len(sensible_moves) > 0:
+            move, simul_mean_time = self.mcts.get_move(board)
+            self.mcts.update_with_move(-1)
+            print("MCTS move:", move)
+            return move, simul_mean_time
+        else:
+            print("WARNING: the board is full")
+    def __str__(self):
+        return "MCTS {}".format(self.player)
+# 多了下面这一串代码
+class Human_Player(object):
+    def __init__(self):
+        pass
+    def set_player_ind(self, p):
+        self.player = p
+    def get_action(self, board):
+        sensible_moves = board.availables
+        if len(sensible_moves) > 0:
+            # print(sensible_moves)
+            move = int(input("Input the move:"))
+            while (move not in sensible_moves ):
+                print(sensible_moves)
+                move = int(input("Input the move again:"))
+            return move
+        else:
+            print("WARNING: the board is full")
+    def __str__(self):
+        return "Human {}".format(self.player)

Gomoku_MCTS/policy_value_net_pytorch.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# -*- coding: utf-8 -*-
+"""
+An implementation of the policyValueNet in PyTorch
+Tested in PyTorch 0.2.0 and 0.3.0
+@author: Junxiao Song
+"""
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+class Net(nn.Module):
+    """policy-value network module"""
+    def __init__(self, board_width, board_height):
+        super(Net, self).__init__()
+        self.board_width = board_width
+        self.board_height = board_height
+        # common layers
+        self.conv1 = nn.Conv2d(4, 32, kernel_size=3, padding=1)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        # action policy layers
+        self.act_conv1 = nn.Conv2d(128, 4, kernel_size=1)
+        self.act_fc1 = nn.Linear(4*board_width*board_height,
+                                 board_width*board_height)
+        # state value layers
+        self.val_conv1 = nn.Conv2d(128, 2, kernel_size=1)
+        self.val_fc1 = nn.Linear(2*board_width*board_height, 64)
+        self.val_fc2 = nn.Linear(64, 1)
+    def forward(self, state_input):
+        # common layers
+        x = F.relu(self.conv1(state_input))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        # action policy layers
+        x_act = F.relu(self.act_conv1(x))
+        x_act = x_act.view(-1, 4*self.board_width*self.board_height)
+        x_act = F.log_softmax(self.act_fc1(x_act))
+        # state value layers
+        x_val = F.relu(self.val_conv1(x))
+        x_val = x_val.view(-1, 2*self.board_width*self.board_height)
+        x_val = F.relu(self.val_fc1(x_val))
+        x_val = F.tanh(self.val_fc2(x_val))
+        return x_act, x_val
+class PolicyValueNet():
+    """policy-value network """
+    def __init__(self, board_width, board_height,
+                 model_file=None, use_gpu=False):
+        self.use_gpu = use_gpu
+        self.board_width = board_width
+        self.board_height = board_height
+        # the policy value net module
+        if self.use_gpu:
+            self.policy_value_net = Net(board_width, board_height).cuda()
+        else:
+            self.policy_value_net = Net(board_width, board_height)
+        if model_file:
+            net_params = torch.load(model_file)
+            self.policy_value_net.load_state_dict(net_params)
+    def policy_value(self, state_batch):
+        """
+        input: a batch of states
+        output: a batch of action probabilities and state values
+        """
+        if self.use_gpu:
+            state_batch = Variable(torch.FloatTensor(state_batch).cuda())
+            log_act_probs, value = self.policy_value_net(state_batch)
+            act_probs = np.exp(log_act_probs.data.cpu().numpy())
+            return act_probs, value.data.cpu().numpy()
+        else:
+            state_batch = Variable(torch.FloatTensor(state_batch))
+            log_act_probs, value = self.policy_value_net(state_batch)
+            act_probs = np.exp(log_act_probs.data.numpy())
+            return act_probs, value.data.numpy()
+    def policy_value_fn(self, board):
+        """
+        input: board
+        output: a list of (action, probability) tuples for each available
+        action and the score of the board state
+        """
+        legal_positions = board.availables
+        current_state = np.ascontiguousarray(board.current_state().reshape(
+                -1, 4, self.board_width, self.board_height))
+        if self.use_gpu:
+            log_act_probs, value = self.policy_value_net(
+                    Variable(torch.from_numpy(current_state)).cuda().float())
+            act_probs = np.exp(log_act_probs.data.cpu().numpy().flatten())
+        else:
+            log_act_probs, value = self.policy_value_net(
+                    Variable(torch.from_numpy(current_state)).float())
+            act_probs = np.exp(log_act_probs.data.numpy().flatten())
+        act_probs = zip(legal_positions, act_probs[legal_positions])
+        value = value.data[0][0]
+        return act_probs, value
+# 搬到main_worker
+    def train_step(self, state_batch, mcts_probs, winner_batch, lr):
+        """perform a training step"""
+        # self.use_gpu = True
+        # wrap in Variable
+        if self.use_gpu:
+            state_batch = Variable(torch.FloatTensor(state_batch).cuda())
+            mcts_probs = Variable(torch.FloatTensor(mcts_probs).cuda())
+            winner_batch = Variable(torch.FloatTensor(winner_batch).cuda())
+        else:
+            state_batch = Variable(torch.FloatTensor(state_batch))
+            mcts_probs = Variable(torch.FloatTensor(mcts_probs))
+            winner_batch = Variable(torch.FloatTensor(winner_batch))
+        # zero the parameter gradients
+        self.optimizer.zero_grad()
+        # set learning rate
+        set_learning_rate(self.optimizer, lr)
+        # forward
+        log_act_probs, value = self.policy_value_net(state_batch)
+        # define the loss = (z - v)^2 - pi^T * log(p) + c||theta||^2
+        # Note: the L2 penalty is incorporated in optimizer
+        value_loss = F.mse_loss(value.view(-1), winner_batch)
+        policy_loss = -torch.mean(torch.sum(mcts_probs*log_act_probs, 1))
+        loss = value_loss + policy_loss
+        # backward and optimize
+        loss.backward()
+        self.optimizer.step()
+        # calc policy entropy, for monitoring only
+        entropy = -torch.mean(
+                torch.sum(torch.exp(log_act_probs) * log_act_probs, 1)
+                )
+        # return loss.data[0], entropy.data[0]
+        #for pytorch version >= 0.5 please use the following line instead.
+        return loss.item(), entropy.item()
+    # def get_policy_param(self):
+    #     net_params = self.policy_value_net.state_dict()
+    #     return net_params
+    # def save_model(self, model_file):
+    #     """ save model params to file """
+    #     net_params = self.get_policy_param()  # get model params
+    #     torch.save(net_params, model_file)

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183498.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f64431c679947fac92ef87e7f3d3b6a75c0cdf82e6fd0383451a98d778b7b21e
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183516.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa7025d5d1daa88dce58231e0fba4d7a04391612c696e4c2e23292ad4169d80
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183568.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:440fac7c819d3368da1126c35e1a146b4ec3a3e614cb3c6e7e10063f9f0ced3c
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183629.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e45fb4fd64ac1d0a3ec9f5376d2122b48aa9c0a56e01ccfdc0a4ea0ed22188ed
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183640.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59b89d395abdaa5c2b8cb0922f4a465a9f06c59a429697c4d138e58033e6e1a0
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183667.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2ffadd87d44bba87f7bcd80fb424959536ff24e7a4e52a67238200c691befac
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183756.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a91a4478ed17986eaf70acf7a0fb3fe0db11cbcbe8eedf7655bfad9e6a4a9650
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700183820.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2641a889839f3eaf83a9ee90b4bdf0073488a9416044507d321a7bfc8bbad83f
+size 40

Gomoku_MCTS/visualization/_blip_uni_cross_mu_bs512_lr0.002/events.out.tfevents.1700184097.LAPTOP-5AN2UHOO ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c33c3e165e65aac3f1f75ddf5a1a4a3fc6e494e5be728a3a455ff453c7a40100
+size 3726

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Demo
-emoji: 🐢
 colorFrom: green
-colorTo: green
 sdk: streamlit
 sdk_version: 1.28.2
 app_file: app.py

 ---
+title: Gomoku Zero
+emoji: 📉
 colorFrom: green
+colorTo: blue
 sdk: streamlit
 sdk_version: 1.28.2
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import streamlit as st
+# 设置页面配置
+st.set_page_config(
+    page_title="AI 3603 Gomoku Project",
+    page_icon="👋",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# 大标题
+st.write('<h1 style="text-align: center; color: black; font-weight: bold;">AI 3603 Gomoku Project 👋</h1>', unsafe_allow_html=True)
+# 项目参与者
+st.write('<p style="text-align: center; font-size: 20px;"><a href="https://github.com" style="color: blue; font-weight: normal; margin-right: 20px; text-decoration: none;">Jiaxin Li</a> \
+<a href="https://github.com" style="color: blue; font-weight: normal; margin-right: 20px; text-decoration: none;">Junzhe Shen</a> \
+<a href="https://github.com" style="color: blue; font-weight: normal; text-decoration: none;">Benhao Huang</a></p>', unsafe_allow_html=True)
+# 标签
+st.markdown("""
+<div style="text-align: center;">
+<a href="#" style="background-color: #343a40; color: white; font-size: 15px; padding: 10px 15px; margin: 5px; border-radius: 15px; text-decoration: none;">📄 Report</a>
+<a href="#" style="background-color: #343a40; color: white; font-size: 15px; padding: 10px 15px; margin: 5px; border-radius: 15px; text-decoration: none;">💻 Code</a>
+<a href="#" style="background-color: #343a40; color: white; font-size: 15px; padding: 10px 15px; margin: 5px; border-radius: 15px; text-decoration: none;">🌐 Space</a>
+<a href="#" style="background-color: #343a40; color: white; font-size: 15px; padding: 10px 15px; margin: 5px; border-radius: 15px; text-decoration: none;">📊 PPT</a>
+</div>
+</br>
+</br>
+""", unsafe_allow_html=True)
+# 项目介绍
+st.markdown("""
+<div style='color: black; font-size:18px'>Gomoku is an abstract strategy board game. Also called <span style='color:red;'>Gobang</span> or <span style='color:red;'>Five in a Row</span>,
+it is traditionally played with Go pieces (black and white stones)
+on a Go board. It is straightforward and fun, but also full of strategy and challenge.
+Our project is aiming to apply Machine Learning techniques to build a powerful Gomoku AI.</div>
+""",
+unsafe_allow_html=True)
+# 创新点和图片展示
+st.write("<h2 style='text-align: center; color: black; font-weight: bold;'>Innovations We Made 👍</h2>", unsafe_allow_html=True)
+col1, col2, col3 = st.columns(3)
+with col1:
+    st.image("assets/favicon_circle.png", width=50)  # 替换为你的图片 URL
+    st.caption("Innovation 1")
+with col2:
+    st.image("assets/favicon_circle.png", width=50)  # 替换为你的图片 URL
+    st.caption("Innovation 2")
+with col3:
+    st.image("assets/favicon_circle.png", width=50)  # 替换为你的图片 URL
+    st.caption("Innovation 3")
+# 代码框架阐述和代码组件
+st.write("<h2 style='text-align: center; color: black; font-weight: bold;'>Code Structure 🛠️</h2>", unsafe_allow_html=True)
+st.code("""
+import os
+import streamlit as st
+def main():
+    # your code here
+if __name__ == "__main__":
+    main()
+""", language="python")
+st.markdown("---")

assets/favicon_circle.png ADDED Viewed

const.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import numpy as np
+_BOARD_SIZE = 8
+_BOARD_SIZE_1D = _BOARD_SIZE * _BOARD_SIZE
+_BLANK = 0
+_BLACK = 1
+_WHITE = 2
+_PLAYER_SYMBOL = {
+    _WHITE: "⚪",
+    _BLANK: "➕",
+    _BLACK: "⚫",
+}
+_PLAYER_COLOR = {
+    _WHITE: "AI",
+    _BLANK: "Blank",
+    _BLACK: "YOU HUMAN",
+}
+_HORIZONTAL = np.array(
+    [
+        [0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0],
+        [1, 1, 1, 1, 1],
+        [0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0],
+    ]
+)
+_VERTICAL = np.array(
+    [
+        [0, 0, 1, 0, 0],
+        [0, 0, 1, 0, 0],
+        [0, 0, 1, 0, 0],
+        [0, 0, 1, 0, 0],
+        [0, 0, 1, 0, 0],
+    ]
+)
+_DIAGONAL_UP_LEFT = np.array(
+    [
+        [1, 0, 0, 0, 0],
+        [0, 1, 0, 0, 0],
+        [0, 0, 1, 0, 0],
+        [0, 0, 0, 1, 0],
+        [0, 0, 0, 0, 1],
+    ]
+)
+_DIAGONAL_UP_RIGHT = np.array(
+    [
+        [0, 0, 0, 0, 1],
+        [0, 0, 0, 1, 0],
+        [0, 0, 1, 0, 0],
+        [0, 1, 0, 0, 0],
+        [1, 0, 0, 0, 0],
+    ]
+)
+_ROOM_COLOR = {
+    True: _BLACK,
+    False: _WHITE,
+}

pages/Player_VS_AI.py ADDED Viewed

	@@ -0,0 +1,409 @@

+"""
+FileName: app.py
+Author: Benhao Huang
+Create Date: 2023/11/18
+Description: this file is used to display our project and add visualization elements to the game, using Streamlit
+"""
+import time
+import pandas as pd
+from copy import deepcopy
+# import torch
+import numpy as np
+import streamlit as st
+from scipy.signal import convolve  # this is used to check if any player wins
+from streamlit import session_state
+from streamlit_server_state import server_state, server_state_lock
+from Gomoku_MCTS import MCTSpure, alphazero, Board, PolicyValueNet
+import matplotlib.pyplot as plt
+from const import (
+    _BLACK,  # 1, for human
+    _WHITE,  # 2 , for AI
+    _BLANK,
+    _PLAYER_COLOR,
+    _PLAYER_SYMBOL,
+    _ROOM_COLOR,
+    _VERTICAL,
+    _HORIZONTAL,
+    _DIAGONAL_UP_LEFT,
+    _DIAGONAL_UP_RIGHT,
+    _BOARD_SIZE,
+    _BOARD_SIZE_1D
+)
+# Utils
+class Room:
+    def __init__(self, room_id) -> None:
+        self.ROOM_ID = room_id
+        # self.BOARD = np.zeros(shape=(_BOARD_SIZE, _BOARD_SIZE), dtype=int)
+        self.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=[_BLACK, _WHITE])
+        self.PLAYER = _BLACK
+        self.TURN = self.PLAYER
+        self.HISTORY = (0, 0)
+        self.WINNER = _BLANK
+        self.TIME = time.time()
+        self.MCTS = MCTSpure(c_puct=5, n_playout=10)
+        self.COORDINATE_1D = [_BOARD_SIZE_1D + 1]
+        self.current_move = -1
+        self.simula_time_list = []
+def change_turn(cur):
+    return cur % 2 + 1
+# Initialize the game
+if "ROOM" not in session_state:
+    session_state.ROOM = Room("local")
+if "OWNER" not in session_state:
+    session_state.OWNER = False
+# Check server health
+if "ROOMS" not in server_state:
+    with server_state_lock["ROOMS"]:
+        server_state.ROOMS = {}
+# # Layout
+# Main
+TITLE = st.empty()
+TITLE.header("🤖 AI 3603 Gomoku")
+ROUND_INFO = st.empty()
+st.markdown("<br>", unsafe_allow_html=True)
+BOARD_PLATE = [
+    [cell.empty() for cell in st.columns([1 for _ in range(_BOARD_SIZE)])] for _ in range(_BOARD_SIZE)
+]
+LOG = st.empty()
+# Sidebar
+SCORE_TAG = st.sidebar.empty()
+SCORE_PLATE = st.sidebar.columns(2)
+# History scores
+SCORE_TAG.subheader("Scores")
+PLAY_MODE_INFO = st.sidebar.container()
+MULTIPLAYER_TAG = st.sidebar.empty()
+with st.sidebar.container():
+    ANOTHER_ROUND = st.empty()
+    RESTART = st.empty()
+    EXIT = st.empty()
+GAME_INFO = st.sidebar.container()
+message = st.empty()
+PLAY_MODE_INFO.write("---\n\n**You are Black, AI agent is White.**")
+GAME_INFO.markdown(
+    """
+    ---
+    # <span style="color:black;">Freestyle Gomoku game. 🎲</span>
+    - no restrictions 🚫
+    - no regrets 😎
+    - swap players after one round is over 🔁
+    Powered by an AlphaZero approach with our own improvements! 🚀 For the specific details, please check out our <a href="insert_report_link_here" style="color:blue;">report</a>.
+    ##### Adapted and improved by us! 🌟  <a href="https://github.com/Lijiaxin0111/AI_3603_BIGHOME" style="color:blue;">Our Github repo</a>
+    """,
+    unsafe_allow_html=True,
+)
+def restart() -> None:
+    """
+    Restart the game.
+    """
+    session_state.ROOM = Room(session_state.ROOM.ROOM_ID)
+RESTART.button(
+    "Reset",
+    on_click=restart,
+    help="Clear the board as well as the scores",
+)
+# Draw the board
+def gomoku():
+    """
+    Draw the board.
+    Handle the main logic.
+    """
+    # Restart the game
+    # Continue new round
+    def another_round() -> None:
+        """
+        Continue new round.
+        """
+        session_state.ROOM = deepcopy(session_state.ROOM)
+        session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5)
+        session_state.ROOM.PLAYER = session_state.ROOM.PLAYER % 2 + 1
+        session_state.ROOM.TURN = session_state.ROOM.PLAYER
+        session_state.ROOM.WINNER = _BLANK  # 0
+        session_state.ROOM.COORDINATE_1D = [_BOARD_SIZE_1D + 1]
+    # Room status sync
+    def sync_room() -> bool:
+        room_id = session_state.ROOM.ROOM_ID
+        if room_id not in server_state.ROOMS.keys():
+            session_state.ROOM = Room("local")
+            return False
+        elif server_state.ROOMS[room_id].TIME == session_state.ROOM.TIME:
+            return False
+        elif server_state.ROOMS[room_id].TIME < session_state.ROOM.TIME:
+            # Only acquire the lock when writing to the server state
+            with server_state_lock["ROOMS"]:
+                server_rooms = server_state.ROOMS
+                server_rooms[room_id] = session_state.ROOM
+                server_state.ROOMS = server_rooms
+            return True
+        else:
+            session_state.ROOM = server_state.ROOMS[room_id]
+            return True
+    # Check if winner emerge from move
+    def check_win() -> int:
+        """
+        Use convolution to check if any player wins.
+        """
+        vertical = convolve(
+            session_state.ROOM.BOARD.board_map,
+            _VERTICAL,
+            mode="same",
+        )
+        horizontal = convolve(
+            session_state.ROOM.BOARD.board_map,
+            _HORIZONTAL,
+            mode="same",
+        )
+        diagonal_up_left = convolve(
+            session_state.ROOM.BOARD.board_map,
+            _DIAGONAL_UP_LEFT,
+            mode="same",
+        )
+        diagonal_up_right = convolve(
+            session_state.ROOM.BOARD.board_map,
+            _DIAGONAL_UP_RIGHT,
+            mode="same",
+        )
+        if (
+                np.max(
+                    [
+                        np.max(vertical),
+                        np.max(horizontal),
+                        np.max(diagonal_up_left),
+                        np.max(diagonal_up_right),
+                    ]
+                )
+                == 5 * _BLACK
+        ):
+            winner = _BLACK
+        elif (
+                np.min(
+                    [
+                        np.min(vertical),
+                        np.min(horizontal),
+                        np.min(diagonal_up_left),
+                        np.min(diagonal_up_right),
+                    ]
+                )
+                == 5 * _WHITE
+        ):
+            winner = _WHITE
+        else:
+            winner = _BLANK
+        return winner
+    # Triggers the board response on click
+    def handle_click(x, y):
+        """
+        Controls whether to pass on / continue current board / may start new round
+        """
+        if session_state.ROOM.BOARD.board_map[x][y] != _BLANK:
+            pass
+        elif (
+                session_state.ROOM.ROOM_ID in server_state.ROOMS.keys()
+                and _ROOM_COLOR[session_state.OWNER]
+                != server_state.ROOMS[session_state.ROOM.ROOM_ID].TURN
+        ):
+            sync_room()
+        # normal play situation
+        elif session_state.ROOM.WINNER == _BLANK:
+            # session_state.ROOM = deepcopy(session_state.ROOM)
+            print("View of human player: ", session_state.ROOM.BOARD.board_map)
+            move = session_state.ROOM.BOARD.location_to_move((x, y))
+            session_state.ROOM.current_move = move
+            session_state.ROOM.BOARD.do_move(move)
+            session_state.ROOM.BOARD.board_map[x][y] = session_state.ROOM.TURN
+            session_state.ROOM.COORDINATE_1D.append(x * _BOARD_SIZE + y)
+            session_state.ROOM.TURN = change_turn(session_state.ROOM.TURN)
+            win, winner = session_state.ROOM.BOARD.game_end()
+            if win:
+                session_state.ROOM.WINNER = winner
+            session_state.ROOM.HISTORY = (
+                session_state.ROOM.HISTORY[0]
+                + int(session_state.ROOM.WINNER == _WHITE),
+                session_state.ROOM.HISTORY[1]
+                + int(session_state.ROOM.WINNER == _BLACK),
+            )
+            session_state.ROOM.TIME = time.time()
+    def forbid_click(x, y):
+        # st.warning('This posistion has been occupied!!!!', icon="⚠️")
+        st.error("({}, {}) has been occupied!!)".format(x, y), icon="🚨")
+        print("asdas")
+    # Draw board
+    def draw_board(response: bool):
+        """construct each buttons for all cells of the board"""
+        if response and session_state.ROOM.TURN == _BLACK:  # human turn
+            print("Your turn")
+            # construction of clickable buttons
+            for i, row in enumerate(session_state.ROOM.BOARD.board_map):
+                # print("row:", row)
+                for j, cell in enumerate(row):
+                    if (
+                            i * _BOARD_SIZE + j
+                            in (session_state.ROOM.COORDINATE_1D)
+                    ):
+                        # disable click for GPT choices
+                        BOARD_PLATE[i][j].button(
+                            _PLAYER_SYMBOL[cell],
+                            key=f"{i}:{j}",
+                            args=(i, j),
+                            on_click=forbid_click
+                        )
+                    else:
+                        # enable click for other cells available for human choices
+                        BOARD_PLATE[i][j].button(
+                            _PLAYER_SYMBOL[cell],
+                            key=f"{i}:{j}",
+                            on_click=handle_click,
+                            args=(i, j),
+                        )
+        elif response and session_state.ROOM.TURN == _WHITE:  # AI turn
+            message.empty()
+            with st.spinner('🔮✨ Waiting for AI response... ⏳🚀'):
+                time.sleep(0.1)
+                print("AI's turn")
+                print("Below are current board under AI's view")
+                print(session_state.ROOM.BOARD.board_map)
+                move, simul_time = session_state.ROOM.MCTS.get_action(session_state.ROOM.BOARD)
+                session_state.ROOM.simula_time_list.append(simul_time)
+                print("AI takes move: ", move)
+                session_state.ROOM.current_move = move
+                gpt_response = move
+                gpt_i, gpt_j = gpt_response // _BOARD_SIZE, gpt_response % _BOARD_SIZE
+                print("AI's move is located at ({}, {}) :".format(gpt_i, gpt_j))
+                move = session_state.ROOM.BOARD.location_to_move((gpt_i, gpt_j))
+                print("Location to move: ", move)
+                session_state.ROOM.BOARD.do_move(move)
+                # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
+                session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
+                # construction of clickable buttons
+                for i, row in enumerate(session_state.ROOM.BOARD.board_map):
+                    # print("row:", row)
+                    for j, cell in enumerate(row):
+                        if (
+                                i * _BOARD_SIZE + j
+                                in (session_state.ROOM.COORDINATE_1D)
+                        ):
+                            # disable click for GPT choices
+                            BOARD_PLATE[i][j].button(
+                                _PLAYER_SYMBOL[cell],
+                                key=f"{i}:{j}",
+                                args=(i, j),
+                                on_click=forbid_click
+                            )
+                        else:
+                            # enable click for other cells available for human choices
+                            BOARD_PLATE[i][j].button(
+                                _PLAYER_SYMBOL[cell],
+                                key=f"{i}:{j}",
+                                on_click=handle_click,
+                                args=(i, j),
+                            )
+            message.markdown(
+                'AI agent has calculated its strategy, which takes <span style="color: blue; font-size: 20px;">{:.3e}</span>s per simulation.'.format(
+                    simul_time),
+                unsafe_allow_html=True
+            )
+            LOG.subheader("Logs")
+            # change turn
+            session_state.ROOM.TURN = change_turn(session_state.ROOM.TURN)
+            # session_state.ROOM.WINNER = check_win()
+            win, winner = session_state.ROOM.BOARD.game_end()
+            if win:
+                session_state.ROOM.WINNER = winner
+            session_state.ROOM.HISTORY = (
+                session_state.ROOM.HISTORY[0]
+                + int(session_state.ROOM.WINNER == _WHITE),
+                session_state.ROOM.HISTORY[1]
+                + int(session_state.ROOM.WINNER == _BLACK),
+            )
+            session_state.ROOM.TIME = time.time()
+        if not response or session_state.ROOM.WINNER != _BLANK:
+            print("Game over")
+            for i, row in enumerate(session_state.ROOM.BOARD.board_map):
+                for j, cell in enumerate(row):
+                    BOARD_PLATE[i][j].write(
+                        _PLAYER_SYMBOL[cell],
+                        key=f"{i}:{j}",
+                    )
+    # Game process control
+    def game_control():
+        if session_state.ROOM.WINNER != _BLANK:
+            draw_board(False)
+        else:
+            draw_board(True)
+        if session_state.ROOM.WINNER != _BLANK or 0 not in session_state.ROOM.BOARD.board_map:
+            ANOTHER_ROUND.button(
+                "Play Next round!",
+                on_click=another_round,
+                help="Clear board and swap first player",
+            )
+    # Infos
+    def update_info() -> None:
+        # Additional information
+        SCORE_PLATE[0].metric("Gomoku-Agent", session_state.ROOM.HISTORY[0])
+        SCORE_PLATE[1].metric("Black", session_state.ROOM.HISTORY[1])
+        if session_state.ROOM.WINNER != _BLANK:
+            st.balloons()
+            ROUND_INFO.write(
+                f"#### **{_PLAYER_COLOR[session_state.ROOM.WINNER]} WIN!**\n**Click buttons on the left for more plays.**"
+            )
+        # elif 0 not in session_state.ROOM.BOARD.board_map:
+        #     ROUND_INFO.write("#### **Tie**")
+        # else:
+        #     ROUND_INFO.write(
+        #         f"#### **{_PLAYER_SYMBOL[session_state.ROOM.TURN]} {_PLAYER_COLOR[session_state.ROOM.TURN]}'s turn...**"
+        #     )
+        # draw the plot for simulation time
+        # 创建一个 DataFrame
+        print(session_state.ROOM.simula_time_list)
+        st.markdown("<br>", unsafe_allow_html=True)
+        st.markdown("<br>", unsafe_allow_html=True)
+        chart_data = pd.DataFrame(session_state.ROOM.simula_time_list, columns=["Simulation Time"])
+        st.line_chart(chart_data)
+    # The main game loop
+    game_control()
+    update_info()
+if __name__ == "__main__":
+    gomoku()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pandas~=2.1.3
+numpy~=1.26.2
+streamlit~=1.28.2
+matplotlib~=3.8.2
+scipy~=1.11.3
+torch~=2.1.1
+streamlit-server-state==0.17.1