Spaces:

sodastar
/

othello-alphazero-docker

Sleeping

App Files Files Community

sodastar commited on Oct 21

Commit

0819f4e

1 Parent(s): b7dd8b4

Deploying Othello Flask backend with Docker

Browse files

Files changed (7) hide show

.DS_Store +0 -0
Dockerfile +23 -0
LICENSE +21 -0
alphazero.py +487 -0
app.py +343 -0
game.py +417 -0
requirements.txt +12 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# othello-backend/Dockerfile
+# 1. 基础镜像：使用官方 Python 镜像，包含所有必要的系统库
+FROM python:3.9-slim
+# 2. 设置工作目录：所有操作都在 /app 目录下进行
+WORKDIR /app
+# 3. 复制依赖文件并安装 Python 库
+# 先复制并安装依赖，以便利用 Docker 缓存
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# 4. 复制所有应用代码和模型
+# 注意：你需要确保 game.py, alphazero.py, app.py, checkpoint/ 都位于 othello-backend 目录
+COPY . .
+# 5. 暴露端口：Hugging Face 默认使用 7860 端口接收 HTTP 流量
+EXPOSE 7860
+# 6. 容器启动命令：运行你的 Flask 应用
+# CMD 会在容器启动时执行
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 wangxuguang
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

alphazero.py ADDED Viewed

	@@ -0,0 +1,487 @@

+import logging
+import math
+import time
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from tqdm import tqdm
+from collections import deque
+from pickle import Pickler, Unpickler
+from random import shuffle
+from game import *
+logging.basicConfig(level = logging.INFO)
+log = logging.getLogger(__name__)
+class MCTS():
+    """
+    This class handles the MCTS tree.
+    """
+    def __init__(self, game, nnet, args):
+        self.game = game
+        self.nnet = nnet
+        self.args = args
+        self.Qsa = {}  # stores Q values for s,a (as defined in the paper)
+        self.Nsa = {}  # stores #times edge s,a was visited
+        self.Ns = {}  # stores #times board s was visited
+        self.Ps = {}  # stores initial policy (returned by neural net)
+        self.Es = {}  # stores game.getGameEnded for board s
+        self.Vs = {}  # stores game.getValidMoves for board s
+    def getActionProb(self, canonicalBoard, temp=1):
+        """
+        This function performs numMCTSSims simulations of MCTS starting from
+        canonicalBoard.
+        Returns:
+            probs: a policy vector where the probability of the ith action is
+                   proportional to Nsa[(s,a)]**(1./temp)
+        """
+        for i in range(self.args.numMCTSSims):
+            self.search(canonicalBoard)
+        s = self.game.stringRepresentation(canonicalBoard)
+        counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
+        if temp == 0:
+            bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()
+            bestA = np.random.choice(bestAs)
+            probs = [0] * len(counts)
+            probs[bestA] = 1
+            return probs
+        counts = [x ** (1. / temp) for x in counts]
+        counts_sum = float(sum(counts))
+        probs = [x / counts_sum for x in counts]
+        return probs
+    def search(self, canonicalBoard):
+        """
+        This function performs one iteration of MCTS. It is recursively called
+        till a leaf node is found. The action chosen at each node is one that
+        has the maximum upper confidence bound as in the paper.
+        Once a leaf node is found, the neural network is called to return an
+        initial policy P and a value v for the state. This value is propagated
+        up the search path. In case the leaf node is a terminal state, the
+        outcome is propagated up the search path. The values of Ns, Nsa, Qsa are
+        updated.
+        NOTE: Since v is in [-1,1] and if v is the value of a
+        state for the current player, then its value is -v for the other player.
+        Returns:
+            v: the value of the current canonicalBoard
+        """
+        s = self.game.stringRepresentation(canonicalBoard)
+        if s not in self.Es:
+            self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
+        if self.Es[s] is not None:
+            # terminal node
+            return self.Es[s]
+        if s not in self.Ps:
+            # leaf node
+            self.Ps[s], v = self.nnet.predict(canonicalBoard)
+            valids = self.game.getValidMoves(canonicalBoard, 1)
+            self.Ps[s] = self.Ps[s] * valids  # masking invalid moves
+            sum_Ps_s = np.sum(self.Ps[s])
+            if sum_Ps_s > 0:
+                self.Ps[s] /= sum_Ps_s  # renormalize
+            else:
+                # if all valid moves were masked make all valid moves equally probable
+                log.error("All valid moves were masked, doing a workaround.")
+                self.Ps[s] = self.Ps[s] + valids
+                self.Ps[s] /= np.sum(self.Ps[s])
+            self.Vs[s] = valids
+            self.Ns[s] = 0
+            return v
+        valids = self.Vs[s]
+        cur_best = -float('inf')
+        best_act = -1
+        # pick the action with the highest upper confidence bound
+        for a in range(self.game.getActionSize()):
+            if valids[a]:
+                u = self.Qsa.get((s, a), 0) + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
+                            1 + self.Nsa.get((s, a), 0))
+                if u > cur_best:
+                    cur_best = u
+                    best_act = a
+        a = best_act
+        next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
+        next_s = self.game.getCanonicalForm(next_s, next_player)
+        v = -self.search(next_s)
+        if (s, a) in self.Qsa:
+            self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
+            self.Nsa[(s, a)] += 1
+        else:
+            self.Qsa[(s, a)] = v
+            self.Nsa[(s, a)] = 1
+        self.Ns[s] += 1
+        return v
+class OthelloNNet(nn.Module):
+    def __init__(self, game, args):
+        # game params
+        self.board_x, self.board_y = game.getBoardSize()
+        self.action_size = game.getActionSize()
+        self.args = args
+        super(OthelloNNet, self).__init__()
+        self.conv1 = nn.Conv2d(1, args.num_channels, 3, stride=1, padding=1)
+        self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)
+        self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)
+        self.bn1 = nn.BatchNorm2d(args.num_channels)
+        self.bn2 = nn.BatchNorm2d(args.num_channels)
+        self.bn3 = nn.BatchNorm2d(args.num_channels)
+        self.bn4 = nn.BatchNorm2d(args.num_channels)
+        self.fc1 = nn.Linear(args.num_channels*(self.board_x-4)*(self.board_y-4), 1024)
+        self.fc_bn1 = nn.BatchNorm1d(1024)
+        self.fc2 = nn.Linear(1024, 512)
+        self.fc_bn2 = nn.BatchNorm1d(512)
+        self.fc3 = nn.Linear(512, self.action_size)
+        self.fc4 = nn.Linear(512, 1)
+    def forward(self, s):
+        # you can add residual to the network
+        #                                                           s: batch_size x board_x x board_y
+        s = s.view(-1, 1, self.board_x, self.board_y)                # batch_size x 1 x board_x x board_y
+        s = F.relu(self.bn1(self.conv1(s)))                          # batch_size x num_channels x board_x x board_y
+        s = F.relu(self.bn2(self.conv2(s)))                          # batch_size x num_channels x board_x x board_y
+        s = F.relu(self.bn3(self.conv3(s)))                          # batch_size x num_channels x (board_x-2) x (board_y-2)
+        s = F.relu(self.bn4(self.conv4(s)))                          # batch_size x num_channels x (board_x-4) x (board_y-4)
+        s = s.view(-1, self.args.num_channels*(self.board_x-4)*(self.board_y-4))
+        s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training)  # batch_size x 1024
+        s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training)  # batch_size x 512
+        pi = self.fc3(s)                                                                         # batch_size x action_size
+        v = self.fc4(s)                                                                          # batch_size x 1
+        return F.log_softmax(pi, dim=1), torch.tanh(v)
+class AverageMeter(object):
+    """From https://github.com/pytorch/examples/blob/master/imagenet/main.py"""
+    def __init__(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def __repr__(self):
+        return f'{self.avg:.2e}'
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+class NNetWrapper():
+    def __init__(self, game, args):
+        self.nnet = OthelloNNet(game, args)
+        self.board_x, self.board_y = game.getBoardSize()
+        self.action_size = game.getActionSize()
+        self.args = args
+        if args.cuda:
+            self.nnet.cuda()
+    def train(self, examples):
+        """
+        examples: list of examples, each example is of form (board, pi, v)
+        """
+        optimizer = optim.Adam(self.nnet.parameters(), lr=self.args.lr)
+        for epoch in range(self.args.epochs):
+            print('EPOCH ::: ' + str(epoch + 1))
+            self.nnet.train()
+            pi_losses = AverageMeter()
+            v_losses = AverageMeter()
+            batch_count = int(len(examples) / self.args.batch_size)
+            t = tqdm(range(batch_count), desc='Training Net')
+            for _ in t:
+                sample_ids = np.random.randint(len(examples), size=self.args.batch_size)
+                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
+                boards = torch.FloatTensor(np.array(boards).astype(np.float32)) #np.float32 or np.float64 have a difference?
+                target_pis = torch.FloatTensor(np.array(pis))
+                target_vs = torch.FloatTensor(np.array(vs).astype(np.float32))
+                if self.args.cuda:
+                    # boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
+                    boards, target_pis, target_vs = boards.cuda(), target_pis.cuda(), target_vs.cuda()
+                # compute output
+                out_pi, out_v = self.nnet(boards)
+                l_pi = self.loss_pi(target_pis, out_pi)
+                l_v = self.loss_v(target_vs, out_v)
+                total_loss = l_pi + l_v
+                # record loss
+                pi_losses.update(l_pi.item(), boards.size(0))
+                v_losses.update(l_v.item(), boards.size(0))
+                t.set_postfix(Loss_pi=pi_losses, Loss_v=v_losses)
+                # compute gradient and do SGD step
+                optimizer.zero_grad()
+                total_loss.backward()
+                optimizer.step()
+    def predict(self, board):
+        """
+        board: np array with board
+        """
+        # timing
+        # start = time.time()
+        # preparing input
+        board = torch.FloatTensor(board.astype(np.float32))
+        if self.args.cuda: board = board.cuda()
+        board = board.view(1, self.board_x, self.board_y)
+        self.nnet.eval()
+        with torch.no_grad():
+            pi, v = self.nnet(board)
+        # print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))
+        return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0]
+    def loss_pi(self, targets, outputs):
+        return -torch.sum(targets * outputs) / targets.size()[0]
+    def loss_v(self, targets, outputs):
+        return torch.sum((targets - outputs.view(-1)) ** 2) / targets.size()[0]
+    def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
+        filepath = os.path.join(folder, filename)
+        if not os.path.exists(folder):
+            print("Checkpoint Directory does not exist! Making directory {}".format(folder))
+            os.mkdir(folder)
+        else:
+            print("Checkpoint Directory exists! ")
+        torch.save({
+            'state_dict': self.nnet.state_dict(),
+        }, filepath)
+    def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
+        filepath = os.path.join(folder, filename)
+        if not os.path.exists(filepath):
+            raise ValueError("No model in path {}".format(filepath))
+        map_location = None if self.args.cuda else 'cpu'
+        checkpoint = torch.load(filepath, map_location=map_location)
+        self.nnet.load_state_dict(checkpoint['state_dict'])
+class SelfPlay():
+    """
+    This class executes the self-play + learning.
+    """
+    def __init__(self, game, nnet, args):
+        self.game = game
+        self.nnet = nnet
+        self.pnet = self.nnet.__class__(self.game, args)  # the competitor network
+        self.args = args
+        self.mcts = MCTS(self.game, self.nnet, self.args)
+        self.trainExamplesHistory = []  # history of examples from args.numItersForTrainExamplesHistory latest iterations
+    def executeEpisode(self):
+        """
+        This function executes one episode of self-play, starting with player 1.
+        As the game is played, each turn is added as a training example to
+        trainExamples. The game is played till the game ends. After the game
+        ends, the outcome of the game is used to assign values to each example
+        in trainExamples.
+        It uses a temp=1 if episodeStep < tempThreshold, and thereafter
+        uses temp=0.
+        Returns:
+            trainExamples: a list of examples of the form (canonicalBoard, pi, v)
+        """
+        trainExamples = []
+        board = self.game.getInitBoard()
+        self.curPlayer = 1
+        episodeStep = 0
+        while True:
+            episodeStep += 1
+            canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer)
+            temp = int(episodeStep < self.args.tempThreshold)
+            pi = self.mcts.getActionProb(canonicalBoard, temp=temp)
+            sym = self.game.getSymmetries(canonicalBoard, pi)
+            for b, p in sym:
+                trainExamples.append([b, self.curPlayer, p, None])
+            action = np.random.choice(len(pi), p=pi)
+            board, self.curPlayer = self.game.getNextState(board, self.curPlayer, action)
+            r = self.game.getGameEnded(board, self.curPlayer)
+            if r is not None:
+                # r * (1 if self.curPlayer == x[1] else -1) means 1 for winner, -1 for loser, 0 for draw.
+                return [(x[0], x[2], r * (1 if self.curPlayer == x[1] else -1)) for x in trainExamples]
+    def learn(self):
+        """
+        Performs numIters iterations with numEps episodes of self-play in each
+        iteration. After every iteration, it retrains neural network with
+        examples in trainExamples (which has a maximum length of maxlenofQueue).
+        It then pits the new neural network against the old one and accepts it
+        only if it wins >= updateThreshold fraction of games.
+        """
+        for i in range(1, self.args.numIters + 1):
+            # bookkeeping
+            log.info(f'Starting Iter #{i} ...')
+            # examples of the iteration
+            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
+            for _ in tqdm(range(self.args.numEps), desc="Self Play"):
+                self.mcts = MCTS(self.game, self.nnet, self.args)  # reset search tree
+                iterationTrainExamples += self.executeEpisode()
+            # save the iteration examples to the history
+            self.trainExamplesHistory.append(iterationTrainExamples)
+            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
+                log.warning(
+                    f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}")
+                self.trainExamplesHistory.pop(0)
+            # shuffle examples before training
+            trainExamples = []
+            for e in self.trainExamplesHistory:
+                trainExamples.extend(e)
+            shuffle(trainExamples)
+            # training new network, keeping a copy of the old one
+            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
+            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
+            pmcts = MCTS(self.game, self.pnet, self.args)
+            self.nnet.train(trainExamples)
+            nmcts = MCTS(self.game, self.nnet, self.args)
+            log.info('PITTING AGAINST PREVIOUS VERSION')
+            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
+                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
+            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)
+            log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
+            if pwins + nwins == 0 or float(nwins) / (pwins + nwins) < self.args.updateThreshold:
+                log.info('REJECTING NEW MODEL')
+                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
+            else:
+                log.info('ACCEPTING NEW MODEL')
+                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
+class dotdict(dict):
+    def __getattr__(self, name):
+        return self[name]
+args = dotdict({
+    'lr': 0.001,
+    'dropout': 0.1,
+    'epochs': 10,
+    'batch_size': 64,
+    'cuda': torch.cuda.is_available(),
+    'num_channels': 512,
+    'numIters': 200,
+    'numEps': 100,              # Number of complete self-play games to simulate during a new iteration.
+    'tempThreshold': 15,        #
+    'updateThreshold': 0.6,     # During arena playoff, new neural net will be accepted if threshold ratio or more of games are won.
+    'maxlenOfQueue': 200000,    # Number of game examples to train the neural networks.
+    'numItersForTrainExamplesHistory': 20,
+    'numMCTSSims': 25,          # Number of games moves for MCTS to simulate.
+    'arenaCompare': 40,         # Number of games to play during arena play to determine if new net will be accepted.
+    'cpuct': 1,
+    'checkpoint': './temp/',
+    'load_model': False,
+    'load_folder_file': ('./temp/','best.pth.tar'),
+    })
+def main():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train', action="store_true")
+    parser.add_argument('--board_size', type=int, default=6)
+    # play arguments
+    parser.add_argument('--play', action="store_true")
+    parser.add_argument('--verbose', action="store_true")
+    parser.add_argument('--round', type=int, default=2)
+    parser.add_argument('--player1', type=str, default='human', choices=['human', 'random', 'greedy', 'alphazero'])
+    parser.add_argument('--player2', type=str, default='alphazero', choices=['human', 'random', 'greedy', 'alphazero'])
+    parser.add_argument('--ckpt_file', type=str, default='best.pth.tar')
+    args_input = vars(parser.parse_args())
+    for k,v in args_input.items():
+        args[k] = v
+    g = OthelloGame(args.board_size)
+    if args.train:
+        nnet = NNetWrapper(g, args)
+        if args.load_model:
+            log.info('Loading checkpoint "%s/%s"...', args.load_folder_file[0], args.load_folder_file[1])
+            nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
+        log.info('Loading the SelfCoach...')
+        s = SelfPlay(g, nnet, args)
+        log.info('Starting the learning process 🎉')
+        s.learn()
+    if args.play:
+        def getPlayFunc(name):
+            if name == 'human':
+                return HumanOthelloPlayer(g).play
+            elif name == 'random':
+                return RandomPlayer(g).play
+            elif name == 'greedy':
+                return GreedyOthelloPlayer(g).play
+            elif name == 'alphazero':
+                nnet = NNetWrapper(g, args)
+                nnet.load_checkpoint(args.checkpoint, args.ckpt_file)
+                mcts = MCTS(g, nnet, dotdict({'numMCTSSims': 200, 'cpuct':1.0}))
+                return lambda x: np.argmax(mcts.getActionProb(x, temp=0))
+            else:
+                raise ValueError('not support player name {}'.format(name))
+        player1 = getPlayFunc(args.player1)
+        player2 = getPlayFunc(args.player2)
+        arena = Arena(player1, player2, g, display=OthelloGame.display)
+        results = arena.playGames(args.round, verbose=args.verbose)
+        print("Final results: Player1 wins {}, Player2 wins {}, Draws {}".format(*results))
+if __name__ == '__main__':
+    main()

app.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import numpy as np
+import torch
+import json
+import os
+import logging
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import time
+# 导入你的游戏和 AI 模块
+from game import OthelloGame #
+from alphazero import NNetWrapper, MCTS, dotdict #
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+app = Flask(__name__)
+# 启用 CORS，允许前端（通常在不同端口）访问
+CORS(app)
+# --- 全局状态和 AI 初始化 ---
+# 默认参数 (与 alphazero.py 中的 args 保持一致)
+args = dotdict({
+    'lr': 0.001,
+    'dropout': 0.1,
+    'epochs': 10,
+    'batch_size': 64,
+    'cuda': torch.cuda.is_available(),
+    'num_channels': 512,
+    'numIters': 200,
+    'numEps': 100,
+    'tempThreshold': 15,
+    'updateThreshold': 0.6,
+    'maxlenOfQueue': 200000,
+    'numItersForTrainExamplesHistory': 20,
+    'numMCTSSims': 25,  # 训练时的 MCTS 模拟次数
+    'arenaCompare': 40,
+    'cpuct': 1,
+    'checkpoint': './temp/',
+    'load_model': True,
+    'load_folder_file': ('./temp/','best.pth.tar'),
+    'board_size': 8 # 默认 8x8
+})
+# 游戏和 AI 实例
+game = None
+nnet = None
+mcts = None
+# 游戏状态
+current_board = None
+current_player = 1 # 1: Human (White), -1: AI (Black)
+last_move_coords = None
+board_size = 8
+def init_game_and_ai(n):
+    """根据板子大小初始化游戏和 AI 模块"""
+    global game, nnet, mcts, board_size
+    board_size = n
+    log.info(f"Initializing game and AI for {n}x{n} board.")
+    game = OthelloGame(n) #
+    # 注意：AlphaZero 模型训练通常针对固定尺寸。
+    # 如果你的模型只支持 8x8，这里需要进行处理或重新训练。
+    # 这里我们假设模型支持当前尺寸 n。
+    # 重新配置 MCTS 参数用于 Play 模式
+    play_args = dotdict({
+        'numMCTSSims': 200,  # 对战时使用更多的模拟次数
+        'cpuct': 1.0,
+        'cuda': args.cuda # 继承 CUDA 设置
+    })
+    nnet = NNetWrapper(game, args) #
+    # 假设你的模型文件已保存到 './checkpoint/best.pth.tar'
+    try:
+        load_folder = args.load_folder_file[0]
+        load_file = args.load_folder_file[1]
+        nnet.load_checkpoint(folder=load_folder, filename=load_file)
+        log.info(f"Successfully loaded model from {load_folder}{load_file}")
+    except ValueError as e:
+        log.error(f"Failed to load model: {e}. AI will likely perform poorly.")
+    mcts = MCTS(game, nnet, play_args) #
+def get_api_moves(board, player):
+    """将 getValidMoves 结果从向量转换为 {x, y} 列表"""
+    if game is None: return []
+    valids = game.getValidMoves(board, player) #
+    moves_list = []
+    # 排除最后一个动作（Pass动作）
+    for i in range(len(valids) - 1): #
+        if valids[i] == 1:
+            x = i // game.n
+            y = i % game.n
+            moves_list.append({'x': int(x), 'y': int(y)})
+    return moves_list
+def check_game_end(board, player):
+    """检查游戏是否结束，并返回状态信息，基于绝对的棋子数量差异。"""
+    # 获取游戏结束的相对结果 (1: player 赢, -1: player 输, 0: 平局)
+    # 注意：这个结果是相对于传入的 player 而言的
+    result = game.getGameEnded(board, player) #
+    status = 'Ongoing'
+    score_diff = 0
+    if result is not None:
+        # 获取白棋 (1) 和黑棋 (-1) 的绝对分数。
+        # 这里的 score_diff 是：(白棋数量 - 黑棋数量)
+        white_count = np.sum(board == 1)
+        black_count = np.sum(board == -1)
+        score_diff = int(white_count - black_count)
+        if result == 0:
+            status = f"Game Over: Draw. Score: {white_count} vs {black_count}"
+        elif score_diff > 0:
+            # 白棋 (Human) 数量多，人赢
+            status = f"Game Over: Human (O) Wins! Score: {white_count} vs {black_count}"
+        elif score_diff < 0:
+            # 黑棋 (AI) 数量多，AI 赢
+            status = f"Game Over: AI (X) Wins! Score: {white_count} vs {black_count}"
+        else:
+            # 理论上 result != 0 时分数不会为 0，但以防万一
+            status = f"Game Over: Draw. Score: {white_count} vs {black_count}"
+    return status
+@app.route('/api/game/new', methods=['POST'])
+def new_game():
+    global current_board, current_player, last_move_coords, board_size
+    data = request.json
+    size = data.get('size', 8)
+    # 【新增代码】接收 first_player 参数，默认为 1 (Human)
+    first_player = data.get('first_player', 1)
+    if game is None or size != board_size:
+        init_game_and_ai(size)
+    current_board = game.getInitBoard() #
+    current_player = first_player # 【修改】使用接收到的 first_player 设置当前玩家
+    last_move_coords = None
+    status = check_game_end(current_board, current_player)
+    # 【新增逻辑】如果 AI 先手，立即触发 AI 移动
+    if current_player == -1 and status == 'Ongoing':
+        return ai_move_logic() # 直接调用 AI 逻辑并返回结果
+    # 对current_board进行flip，以符合前端显示习惯
+    current_board = np.flip(current_board, 0)
+    return jsonify({
+        'board': current_board.tolist(),
+        'legal_moves': get_api_moves(current_board, current_player),
+        'current_player': current_player,
+        'last_move': last_move_coords,
+        'status': status,
+    })
+# @app.route('/api/game/human_move', methods=['POST'])
+# def human_move():
+#     """处理人类玩家移动"""
+#     global current_board, current_player, last_move_coords
+#     if current_player != 1 or check_game_end(current_board, current_player) != 'Ongoing':
+#         return jsonify({'error': 'Not your turn or game is over'}), 400
+#     data = request.json
+#     x = data.get('x')
+#     y = data.get('y')
+#     if x is None or y is None:
+#         # 检查是否是 Pass 动作
+#         if data.get('action') == 'pass':
+#              action = game.n * game.n # Pass action is the last index
+#         else:
+#              return jsonify({'error': 'Invalid move coordinates'}), 400
+#     else:
+#         action = game.n * x + y
+#     valids = game.getValidMoves(current_board, 1) #
+#     if valids[action] == 0:
+#         return jsonify({'error': 'Illegal move'}), 400
+#     current_board, current_player = game.getNextState(current_board, 1, action) #
+#     if action != game.n * game.n:
+#         last_move_coords = {'x': x, 'y': y}
+#     status = check_game_end(current_board, current_player)
+#     # 如果游戏未结束且轮到 AI (-1)
+#     if status == 'Ongoing' and current_player == -1:
+#         # 在这里触发 AI 移动
+#         return ai_move_logic()
+#     return jsonify({
+#         'board': current_board.tolist(),
+#         'legal_moves': get_api_moves(current_board, current_player),
+#         'current_player': current_player,
+#         'last_move': last_move_coords,
+#         'status': status,
+#     })
+def ai_move_logic():
+    """AI 移动的逻辑封装，在 human_move 中调用"""
+    global current_board, current_player, last_move_coords
+    canonical_board = game.getCanonicalForm(current_board, -1) #
+    # 获取 AI 的最佳动作 (temp=0)
+    ai_action = np.argmax(mcts.getActionProb(canonical_board, temp=0)) #
+    # 更新游戏状态
+    current_board, next_player = game.getNextState(current_board, -1, ai_action) #
+    current_player = next_player
+    # 记录 AI 的移动坐标
+    if ai_action != game.n * game.n: # 如果不是 Pass 动作
+        ai_x = ai_action // game.n
+        ai_y = ai_action % game.n
+        last_move_coords = {'x': int(ai_x), 'y': int(ai_y)}
+    status = check_game_end(current_board, current_player)
+    # 对current_board进行flip，以符合前端显示习惯
+    current_board = np.flip(current_board, 0)
+    return jsonify({
+        'board': current_board.tolist(),
+        'legal_moves': get_api_moves(current_board, current_player),
+        'current_player': current_player,
+        'last_move': last_move_coords,
+        'status': status,
+    })
+# app.py (在 @app.route('/api/game/human_move', methods=['POST']) 路由下)
+# 替换原有的 handleHumanMove/human_move 函数
+@app.route('/api/game/human_move', methods=['POST'])
+def human_move():
+    """处理人类玩家移动，并返回给 AI 的中间状态"""
+    global current_board, current_player, last_move_coords
+    if current_player != 1 or check_game_end(current_board, current_player) != 'Ongoing':
+        return jsonify({'error': 'Not your turn or game is over'}), 400
+    data = request.json
+    x = data.get('x')
+    y = data.get('y')
+    if x is None or y is None:
+        # 检查是否是 Pass 动作
+        if data.get('action') == 'pass':
+             action = game.n * game.n # Pass action is the last index
+        else:
+             return jsonify({'error': 'Invalid move coordinates'}), 400
+    else:
+        action = game.n * x + y
+    valids = game.getValidMoves(current_board, 1)
+    if valids[action] == 0:
+        return jsonify({'error': 'Illegal move'}), 400
+    # 执行人类移动
+    current_board, current_player = game.getNextState(current_board, 1, action)
+    if action != game.n * game.n:
+        last_move_coords = {'x': x, 'y': y}
+    status = check_game_end(current_board, current_player)
+    # 对current_board进行flip，以符合前端显示习惯
+    # current_board = np.flip(current_board, 0)
+    # 注意：这里不再包含 AI 移动逻辑，直接返回
+    return jsonify({
+        'board': current_board.tolist(),
+        'legal_moves': get_api_moves(current_board, current_player),
+        'current_player': current_player,
+        'last_move': last_move_coords,
+        'status': status,
+    })
+# B. 新增 `ai_move` 路由
+@app.route('/api/game/ai_move', methods=['POST'])
+def ai_move():
+    start_time = time.time()
+    """触发 AI 移动，并返回最终状态"""
+    global current_board, current_player, last_move_coords
+    if current_player != -1:
+        return jsonify({'error': 'Not AI turn'}), 400
+    canonical_board = game.getCanonicalForm(current_board, -1)
+    ai_action = np.argmax(mcts.getActionProb(canonical_board, temp=0))
+    # 执行 AI 移动
+    current_board, next_player = game.getNextState(current_board, -1, ai_action)
+    current_player = next_player
+    # 记录 AI 的移动坐标
+    if ai_action != game.n * game.n:
+        ai_x = ai_action // game.n
+        ai_y = ai_action % game.n
+        last_move_coords = {'x': int(ai_x), 'y': int(ai_y)}
+    else:
+        last_move_coords = None # AI Pass
+    status = check_game_end(current_board, current_player)
+    # 控制 AI 最少思考时间为 0.5 秒
+    end_time = time.time()
+    used_time = end_time - start_time
+    if used_time < 0.5:
+        time.sleep(0.5 - used_time)  # 确保至少等待0.5秒
+    return jsonify({
+        'board': current_board.tolist(),
+        'legal_moves': get_api_moves(current_board, current_player),
+        'current_player': current_player,
+        'last_move': last_move_coords,
+        'status': status,
+    })
+if __name__ == '__main__':
+    # 初始化一个默认的 8x8 游戏实例
+    init_game_and_ai(8)
+    log.info("Starting Flask server on port 7860...")
+    port = int(os.environ.get('PORT', 7860))
+    # ... (日志) ...
+    app.run(host='0.0.0.0', port=port)

game.py ADDED Viewed

	@@ -0,0 +1,417 @@

+import numpy as np
+import logging
+from tqdm import tqdm
+log = logging.getLogger(__name__)
+class Board():
+    '''
+    Author: Eric P. Nichols
+    Date: Feb 8, 2008.
+    Board class.
+    Board data:
+    1=white, -1=black, 0=empty
+    '''
+    # list of all 8 directions on the board, as (x,y) offsets
+    __directions = [(1,1),(1,0),(1,-1),(0,-1),(-1,-1),(-1,0),(-1,1),(0,1)]
+    def __init__(self, n):
+        "Set up initial board configuration."
+        self.n = n
+        # Create the empty board array.
+        self.pieces = [None]*self.n
+        for i in range(self.n):
+            self.pieces[i] = [0]*self.n
+        # Set up the initial 4 pieces.
+        self.pieces[int(self.n/2)-1][int(self.n/2)] = 1
+        self.pieces[int(self.n/2)][int(self.n/2)-1] = 1
+        self.pieces[int(self.n/2)-1][int(self.n/2)-1] = -1;
+        self.pieces[int(self.n/2)][int(self.n/2)] = -1;
+    # add [][] indexer syntax to the Board
+    def __getitem__(self, index):
+        return self.pieces[index]
+    def countDiff(self, color):
+        """Counts the # pieces of the given color
+        (1 for white, -1 for black, 0 for empty spaces)"""
+        count = 0
+        for y in range(self.n):
+            for x in range(self.n):
+                if self[x][y]==color:
+                    count += 1
+                if self[x][y]==-color:
+                    count -= 1
+        return count
+    def get_legal_moves(self, color):
+        """Returns all the legal moves for the given color.
+        (1 for white, -1 for black)
+        """
+        moves = set()  # stores the legal moves.
+        # Get all the squares with pieces of the given color.
+        for y in range(self.n):
+            for x in range(self.n):
+                if self[x][y]==color:
+                    newmoves = self.get_moves_for_square((x,y))
+                    moves.update(newmoves)
+        return list(moves)
+    def has_legal_moves(self, color):
+        for y in range(self.n):
+            for x in range(self.n):
+                if self[x][y]==color:
+                    newmoves = self.get_moves_for_square((x,y))
+                    if len(newmoves)>0:
+                        return True
+        return False
+    def get_moves_for_square(self, square):
+        """Returns all the legal moves that use the given square as a base.
+        That is, if the given square is (3,4) and it contains a black piece,
+        and (3,5) and (3,6) contain white pieces, and (3,7) is empty, one
+        of the returned moves is (3,7) because everything from there to (3,4)
+        is flipped.
+        """
+        (x,y) = square
+        # determine the color of the piece.
+        color = self[x][y]
+        # skip empty source squares.
+        if color==0:
+            return None
+        # search all possible directions.
+        moves = []
+        for direction in self.__directions:
+            move = self._discover_move(square, direction)
+            if move:
+                moves.append(move)
+        # return the generated move list
+        return moves
+    def execute_move(self, move, color):
+        """Perform the given move on the board; flips pieces as necessary.
+        color gives the color of the piece to play (1=white,-1=black)
+        """
+        #Much like move generation, start at the new piece's square and
+        #follow it on all 8 directions to look for a piece allowing flipping.
+        flips = [flip for direction in self.__directions
+                      for flip in self._get_flips(move, direction, color)]
+        assert len(list(flips))>0
+        for x, y in flips:
+            self[x][y] = color
+    def _discover_move(self, origin, direction):
+        """ Returns the endpoint for a legal move, starting at the given origin,
+        moving by the given increment."""
+        x, y = origin
+        color = self[x][y]
+        flips = []
+        for x, y in Board._increment_move(origin, direction, self.n):
+            if self[x][y] == 0:
+                if flips:
+                    return (x, y)
+                else:
+                    return None
+            elif self[x][y] == color:
+                return None
+            elif self[x][y] == -color:
+                flips.append((x, y))
+    def _get_flips(self, origin, direction, color):
+        """ Gets the list of flips for a vertex and direction to use with the
+        execute_move function """
+        #initialize variables
+        flips = [origin]
+        for x, y in Board._increment_move(origin, direction, self.n):
+            if self[x][y] == 0:
+                return []
+            if self[x][y] == -color:
+                flips.append((x, y))
+            elif self[x][y] == color and len(flips) > 0:
+                return flips
+        return []
+    @staticmethod
+    def _increment_move(move, direction, n):
+        """ Generator expression for incrementing moves """
+        move = list(map(sum, zip(move, direction)))
+        #move = (move[0]+direction[0], move[1]+direction[1])
+        while all(map(lambda x: 0 <= x < n, move)):
+        #while 0<=move[0] and move[0]<n and 0<=move[1] and move[1]<n:
+            yield move
+            move=list(map(sum,zip(move,direction)))
+            #move = (move[0]+direction[0],move[1]+direction[1])
+class OthelloGame():
+    square_content = {
+        -1: "X",
+        +0: "-",
+        +1: "O"
+    }
+    @staticmethod
+    def getSquarePiece(piece):
+        return OthelloGame.square_content[piece]
+    def __init__(self, n):
+        self.n = n
+    def getInitBoard(self):
+        # return initial board (numpy board)
+        b = Board(self.n)
+        return np.array(b.pieces)
+    def getBoardSize(self):
+        # (a,b) tuple
+        return (self.n, self.n)
+    def getActionSize(self):
+        # return number of actions
+        return self.n*self.n + 1
+    def getNextState(self, board, player, action):
+        # if player takes action on board, return next (board,player)
+        # action must be a valid move
+        if action == self.n*self.n:
+            return (board, -player)
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        move = (int(action/self.n), action%self.n)
+        b.execute_move(move, player)
+        return (b.pieces, -player)
+    def getValidMoves(self, board, player):
+        # return a fixed size binary vector
+        valids = [0]*self.getActionSize()
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        legalMoves =  b.get_legal_moves(player)
+        if len(legalMoves)==0:
+            valids[-1]=1
+            return np.array(valids)
+        for x, y in legalMoves:
+            valids[self.n*x+y]=1
+        return np.array(valids)
+    def getGameEnded(self, board, player):
+        # return None if not ended, 1 if player won, -1 if player lost, 0 if draw.
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        if b.has_legal_moves(player):
+            return None
+        if b.has_legal_moves(-player):
+            return None
+        if b.countDiff(player) > 0:
+            return 1
+        elif b.countDiff(player) < 0:
+            return -1
+        else:
+            return 0
+    def getCanonicalForm(self, board, player):
+        # return state if player==1, else return -state if player==-1
+        return player*board
+    def getSymmetries(self, board, pi):
+        # mirror, rotational
+        assert(len(pi) == self.n**2+1)  # 1 for pass
+        pi_board = np.reshape(pi[:-1], (self.n, self.n))
+        l = []
+        for i in range(1, 5):
+            for j in [True, False]:
+                newB = np.rot90(board, i)
+                newPi = np.rot90(pi_board, i)
+                if j:
+                    newB = np.fliplr(newB)
+                    newPi = np.fliplr(newPi)
+                l += [(newB, list(newPi.ravel()) + [pi[-1]])]
+        return l
+    def stringRepresentation(self, board):
+        return board.tostring()
+    def stringRepresentationReadable(self, board):
+        board_s = "".join(self.square_content[square] for row in board for square in row)
+        return board_s
+    def getScore(self, board, player):
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        return b.countDiff(player)
+    @staticmethod
+    def display(board):
+        n = board.shape[0]
+        print("   ", end="")
+        for y in range(n):
+            print(y, end=" ")
+        print("")
+        print("-----------------------")
+        for y in range(n):
+            print(y, "|", end="")
+            for x in range(n):
+                piece = board[y][x]
+                print(OthelloGame.square_content[piece], end=" ")
+            print("|")
+        print("-----------------------")
+class RandomPlayer():
+    def __init__(self, game):
+        self.game = game
+    def play(self, board):
+        a = np.random.randint(self.game.getActionSize())
+        valids = self.game.getValidMoves(board, 1)
+        while valids[a]!=1:
+            a = np.random.randint(self.game.getActionSize())
+        return a
+class GreedyOthelloPlayer():
+    def __init__(self, game):
+        self.game = game
+    def play(self, board):
+        valids = self.game.getValidMoves(board, 1)
+        candidates = []
+        for a in range(self.game.getActionSize()):
+            if valids[a]==0:
+                continue
+            nextBoard, _ = self.game.getNextState(board, 1, a)
+            score = self.game.getScore(nextBoard, 1)
+            candidates += [(-score, a)]
+        candidates.sort()
+        return candidates[0][1]
+class HumanOthelloPlayer():
+    def __init__(self, game):
+        self.game = game
+    def play(self, board):
+        # display(board)
+        valid = self.game.getValidMoves(board, 1)
+        for i in range(len(valid)):
+            if valid[i]:
+                print("[", int(i/self.game.n), int(i%self.game.n), end="] ")
+        while True:
+            input_move = input()
+            input_a = input_move.split(" ")
+            if len(input_a) == 2:
+                try:
+                    x,y = [int(i) for i in input_a]
+                    if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n)) or \
+                            ((x == self.game.n) and (y == 0)):
+                        a = self.game.n * x + y
+                        if valid[a]:
+                            break
+                except ValueError:
+                    'Invalid integer'
+            print('Invalid move')
+        return a
+class Arena():
+    """
+    An Arena class where any 2 agents can be pit against each other.
+    """
+    def __init__(self, player1, player2, game, display=None):
+        """
+        Input:
+            player 1,2: two functions that takes board as input, return action
+            game: Game object
+            display: a function that takes board as input and prints it. Is necessary for verbose
+                     mode.
+        """
+        self.player1 = player1
+        self.player2 = player2
+        self.game = game
+        self.display = display
+    def playGame(self, verbose=False):
+        """
+        Executes one episode of a game.
+        Returns:
+            either
+                winner: player who won the game (1 if player1, -1 if player2, 0 if draw)
+        """
+        players = [self.player2, None, self.player1]
+        curPlayer = 1 # player1 go first
+        board = self.game.getInitBoard()
+        it = 0
+        while self.game.getGameEnded(board, curPlayer) is None:
+            it += 1
+            if verbose:
+                assert self.display
+                print("Turn ", str(it), "Player ", str(curPlayer))
+                self.display(board)
+            action = players[curPlayer + 1](self.game.getCanonicalForm(board, curPlayer))
+            valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)
+            if valids[action] == 0:
+                log.error(f'Action {action} is not valid!')
+                log.debug(f'valids = {valids}')
+                assert valids[action] > 0
+            board, curPlayer = self.game.getNextState(board, curPlayer, action)
+        result = curPlayer * self.game.getGameEnded(board, curPlayer)
+        if verbose:
+            assert self.display
+            print("Game over: Turn ", str(it), "Result ", str(result))
+            self.display(board)
+        return result
+    def playGames(self, num, verbose=False):
+        """
+        Plays num games in which player1 starts num/2 games and player2 starts
+        num/2 games.
+        Returns:
+            oneWon: games won by player1
+            twoWon: games won by player2
+            draws:  games won by nobody
+        """
+        num = int(num / 2)
+        oneWon = 0
+        twoWon = 0
+        draws = 0
+        for _ in tqdm(range(num), desc="Arena.playGames (player1 go first)"):
+            gameResult = self.playGame(verbose=verbose)
+            if gameResult == 1:
+                oneWon += 1
+            elif gameResult == -1:
+                twoWon += 1
+            else:
+                draws += 1
+        self.player1, self.player2 = self.player2, self.player1
+        for _ in tqdm(range(num), desc="Arena.playGames (player2 go first)"):
+            gameResult = self.playGame(verbose=verbose)
+            if gameResult == -1:
+                oneWon += 1
+            elif gameResult == 1:
+                twoWon += 1
+            else:
+                draws += 1
+        return oneWon, twoWon, draws

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+# othello-backend/requirements.txt
+# 核心 Web 框架
+flask
+Flask-CORS
+# AI/模型依赖
+numpy
+torch
+# 其他辅助库
+tqdm
+logging