Spaces:

Gomoku-Zero
/

Demo

Sleeping

App Files Files Community

HuskyDoge commited on Nov 23, 2023

Commit

e7a440c

•

1 Parent(s): 6d4d507

finish model selections

Browse files

Files changed (3) hide show

Gomoku_MCTS/mcts_pure.py +48 -1
const.py +2 -0
pages/Player_VS_AI.py +61 -29

Gomoku_MCTS/mcts_pure.py CHANGED Viewed

@@ -6,6 +6,11 @@ from operator import itemgetter
 import time
 def rollout_policy_fn(board):
     """a coarse, fast version of policy_fn used in the rollout phase."""
     # rollout randomly
@@ -184,6 +189,48 @@ class MCTS(object):
         else:
             self._root = TreeNode(None, 1.0)
     def __str__(self):
         return "MCTS"
@@ -200,7 +247,7 @@ class MCTSPlayer(object):
     def reset_player(self):
         self.mcts.update_with_move(-1)
-    def get_action(self, board):
         sensible_moves = board.availables
         if len(sensible_moves) > 0:
             move, simul_mean_time = self.mcts.get_move(board)

 import time
+def softmax(x):
+    probs = np.exp(x - np.max(x))
+    probs /= np.sum(probs)
+    return probs
 def rollout_policy_fn(board):
     """a coarse, fast version of policy_fn used in the rollout phase."""
     # rollout randomly
         else:
             self._root = TreeNode(None, 1.0)
+    def get_move_probs(self, state, temp=1e-3):
+        """Run all playouts sequentially and return the available actions and
+        their corresponding probabilities.
+        state: the current game state
+        temp: temperature parameter in (0, 1] controls the level of exploration
+        """
+        start_time_averge = 0
+        ### test multi-thread
+        # lock = threading.Lock()
+        # with ThreadPoolExecutor(max_workers=4) as executor:
+        #     for n in range(self._n_playout):
+        #         start_time = time.time()
+        #         state_copy = copy.deepcopy(state)
+        #         executor.submit(self._playout, state_copy, lock)
+        #         start_time_averge += (time.time() - start_time)
+        ### end test multi-thread
+        t = time.time()
+        for n in range(self._n_playout):
+            start_time = time.time()
+            state_copy = copy.deepcopy(state)
+            self._playout(state_copy)
+            start_time_averge += (time.time() - start_time)
+        total_time = time.time() - t
+        # print('!!time!!:', time.time() - t)
+        print(f" My MCTS sum_time: {total_time}, total_simulation: {self._n_playout}")
+        # calc the move probabilities based on visit counts at the root node
+        act_visits = [(act, node._n_visits)
+                      for act, node in self._root._children.items()]
+        acts, visits = zip(*act_visits)
+        act_probs = softmax(1.0 / temp * np.log(np.array(visits) + 1e-10))
+        return 0, acts, act_probs, total_time
     def __str__(self):
         return "MCTS"
     def reset_player(self):
         self.mcts.update_with_move(-1)
+    def get_action(self, board, return_time=False):
         sensible_moves = board.availables
         if len(sensible_moves) > 0:
             move, simul_mean_time = self.mcts.get_move(board)

const.py CHANGED Viewed

@@ -7,6 +7,8 @@ Description: Some const value for Demo
 import numpy as np
 _BOARD_SIZE = 8
 _BOARD_SIZE_1D = _BOARD_SIZE * _BOARD_SIZE
 _BLANK = 0

 import numpy as np
+_AI_AID_INFO = ["Use AI Aid", "Close AI Aid"]
 _BOARD_SIZE = 8
 _BOARD_SIZE_1D = _BOARD_SIZE * _BOARD_SIZE
 _BLANK = 0

pages/Player_VS_AI.py CHANGED Viewed

@@ -30,10 +30,13 @@ from const import (
     _DIAGONAL_UP_LEFT,
     _DIAGONAL_UP_RIGHT,
     _BOARD_SIZE,
-    _BOARD_SIZE_1D
 )
 # Utils
 class Room:
     def __init__(self, room_id) -> None:
@@ -45,8 +48,10 @@ class Room:
         self.HISTORY = (0, 0)
         self.WINNER = _BLANK
         self.TIME = time.time()
-        self.MCTS = MCTSpure(c_puct=5, n_playout=10)
-        self.MCTS = alphazero(PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, 'Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth').policy_value_fn, c_puct=5, n_playout=100)
         self.COORDINATE_1D = [_BOARD_SIZE_1D + 1]
         self.current_move = -1
         self.simula_time_list = []
@@ -69,10 +74,41 @@ if "ROOMS" not in server_state:
     with server_state_lock["ROOMS"]:
         server_state.ROOMS = {}
-# # Layout
-# Main
 TITLE = st.empty()
 TITLE.header("🤖 AI 3603 Gomoku")
 ROUND_INFO = st.empty()
 st.markdown("<br>", unsafe_allow_html=True)
 BOARD_PLATE = [
@@ -93,6 +129,11 @@ with st.sidebar.container():
     RESTART = st.empty()
     AIAID = st.empty()
     EXIT = st.empty()
 GAME_INFO = st.sidebar.container()
 message = st.empty()
 PLAY_MODE_INFO.write("---\n\n**You are Black, AI agent is White.**")
@@ -102,6 +143,7 @@ GAME_INFO.markdown(
     # <span style="color:black;">Freestyle Gomoku game. 🎲</span>
     - no restrictions 🚫
     - no regrets 😎
     - swap players after one round is over 🔁
     Powered by an AlphaZero approach with our own improvements! 🚀 For the specific details, please check out our <a href="insert_report_link_here" style="color:blue;">report</a>.
     ##### Adapted and improved by us! 🌟  <a href="https://github.com/Lijiaxin0111/AI_3603_BIGHOME" style="color:blue;">Our Github repo</a>
@@ -110,6 +152,7 @@ GAME_INFO.markdown(
 )
 def restart() -> None:
     """
     Restart the game.
@@ -217,14 +260,6 @@ def gomoku():
             winner = _BLANK
         return winner
-    def ai_aid() -> None:
-        """
-        Use AI Aid.
-        """
-        session_state.USE_AIAID = not session_state.USE_AIAID
-        print('Use AI Aid: ', session_state.USE_AIAID)
-        draw_board(False)
     # Triggers the board response on click
     def handle_click(x, y):
         """
@@ -270,11 +305,12 @@ def gomoku():
     def draw_board(response: bool):
         """construct each buttons for all cells of the board"""
         if session_state.USE_AIAID and session_state.ROOM.WINNER == _BLANK and session_state.ROOM.TURN == _BLACK:
-            copy_mcts = deepcopy(session_state.ROOM.MCTS.mcts)
-            _, acts, probs, simul_mean_time = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
-            sorted_acts_probs = sorted(zip(acts, probs), key=lambda x: x[1], reverse=True)
-            top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
-            top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
         if response and session_state.ROOM.TURN == _BLACK:  # human turn
             print("Your turn")
             # construction of clickable buttons
@@ -333,11 +369,12 @@ def gomoku():
                 session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
                 if not session_state.ROOM.BOARD.game_end()[0]:
-                    copy_mcts = deepcopy(session_state.ROOM.MCTS.mcts)
-                    _, acts, probs, simul_mean_time = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
-                    sorted_acts_probs = sorted(zip(acts, probs), key=lambda x: x[1], reverse=True)
-                    top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
-                    top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
                 else:
                     top_five_acts = []
                     top_five_probs = []
@@ -449,12 +486,7 @@ def gomoku():
         chart_data = pd.DataFrame(session_state.ROOM.simula_time_list, columns=["Simulation Time"])
         st.line_chart(chart_data)
-    # The main game loop
-    AIAID.button(
-        "Use AI Aid",
-        on_click=ai_aid,
-        help="Use AI Aid to help you make moves",
-    )
     game_control()
     update_info()

     _DIAGONAL_UP_LEFT,
     _DIAGONAL_UP_RIGHT,
     _BOARD_SIZE,
+    _BOARD_SIZE_1D,
+    _AI_AID_INFO
 )
 # Utils
 class Room:
     def __init__(self, room_id) -> None:
         self.HISTORY = (0, 0)
         self.WINNER = _BLANK
         self.TIME = time.time()
+        self.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=10),
+                          'AlphaZero': alphazero(PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, 'Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth').policy_value_fn, c_puct=5, n_playout=100)}
+        self.MCTS = self.MCTS_dict['AlphaZero']
+        self.AID_MCTS = self.MCTS_dict['AlphaZero']
         self.COORDINATE_1D = [_BOARD_SIZE_1D + 1]
         self.current_move = -1
         self.simula_time_list = []
     with server_state_lock["ROOMS"]:
         server_state.ROOMS = {}
+def handle_oppo_model_selection():
+    TreeNode = session_state.ROOM.MCTS.mcts._root
+    new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
+    new_mct.mcts._root = deepcopy(TreeNode)
+    session_state.ROOM.MCTS = new_mct
+    return
+def handle_aid_model_selection():
+    if st.session_state['selected_aid_model'] == 'None':
+        session_state.USE_AIAID = False
+        return
+    session_state.USE_AIAID = True
+    TreeNode = session_state.ROOM.MCTS.mcts._root # use the same tree node
+    new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_aid_model']]
+    new_mct.mcts._root = deepcopy(TreeNode)
+    session_state.ROOM.AID_MCTS = new_mct
+    return
+if 'selected_oppo_model' not in st.session_state:
+    st.session_state['selected_oppo_model'] = 'AlphaZero'  # 默认值
+if 'selected_aid_model' not in st.session_state:
+    st.session_state['selected_aid_model'] = 'AlphaZero'  # 默认值
+# Layout
 TITLE = st.empty()
+Model_Switch = st.empty()
 TITLE.header("🤖 AI 3603 Gomoku")
+selected_oppo_option = Model_Switch.selectbox('Select Opponent Model', ['Pure MCTS', 'AlphaZero'], index=1, key='oppo_model')
+if st.session_state['selected_oppo_model'] != selected_oppo_option:
+    st.session_state['selected_oppo_model'] = selected_oppo_option
+    handle_oppo_model_selection()
 ROUND_INFO = st.empty()
 st.markdown("<br>", unsafe_allow_html=True)
 BOARD_PLATE = [
     RESTART = st.empty()
     AIAID = st.empty()
     EXIT = st.empty()
+selected_aid_option = AIAID.selectbox('Select Assistant Model', ['None', 'Pure MCTS', 'AlphaZero'], index=0, key='aid_model')
+if st.session_state['selected_aid_model'] != selected_aid_option:
+    st.session_state['selected_aid_model'] = selected_aid_option
+    handle_aid_model_selection()
 GAME_INFO = st.sidebar.container()
 message = st.empty()
 PLAY_MODE_INFO.write("---\n\n**You are Black, AI agent is White.**")
     # <span style="color:black;">Freestyle Gomoku game. 🎲</span>
     - no restrictions 🚫
     - no regrets 😎
+    - no regrets 😎
     - swap players after one round is over 🔁
     Powered by an AlphaZero approach with our own improvements! 🚀 For the specific details, please check out our <a href="insert_report_link_here" style="color:blue;">report</a>.
     ##### Adapted and improved by us! 🌟  <a href="https://github.com/Lijiaxin0111/AI_3603_BIGHOME" style="color:blue;">Our Github repo</a>
 )
 def restart() -> None:
     """
     Restart the game.
             winner = _BLANK
         return winner
     # Triggers the board response on click
     def handle_click(x, y):
         """
     def draw_board(response: bool):
         """construct each buttons for all cells of the board"""
         if session_state.USE_AIAID and session_state.ROOM.WINNER == _BLANK and session_state.ROOM.TURN == _BLACK:
+            if session_state.USE_AIAID:
+                copy_mcts = deepcopy(session_state.ROOM.AID_MCTS.mcts)
+                _, acts_aid, probs_aid, simul_mean_time_aid = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
+                sorted_acts_probs = sorted(zip(acts_aid, probs_aid), key=lambda x: x[1], reverse=True)
+                top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
+                top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
         if response and session_state.ROOM.TURN == _BLACK:  # human turn
             print("Your turn")
             # construction of clickable buttons
                 session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
                 if not session_state.ROOM.BOARD.game_end()[0]:
+                    if session_state.USE_AIAID:
+                        copy_mcts = deepcopy(session_state.ROOM.AID_MCTS.mcts)
+                        _, acts_aid, probs_aid, simul_mean_time_aid = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
+                        sorted_acts_probs = sorted(zip(acts_aid, probs_aid), key=lambda x: x[1], reverse=True)
+                        top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
+                        top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
                 else:
                     top_five_acts = []
                     top_five_probs = []
         chart_data = pd.DataFrame(session_state.ROOM.simula_time_list, columns=["Simulation Time"])
         st.line_chart(chart_data)
     game_control()
     update_info()