marci0929
/

TetrisAI

Model card Files Files and versions Community

marci0929 commited on Apr 21, 2023

Commit

13bec41

•

1 Parent(s): c735fe6

Upload with huggingface_hub

Browse files

Files changed (5) hide show

agent.py +13 -0
my_model.zip +3 -0
observation_wrapper.py +74 -0
reward_wrapper.py +66 -0
utils.py +8 -0

agent.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from stable_baselines3 import A2C
+from agent.observation_wrapper import CustomObsWrapper
+class Agent:
+    def __init__(self, env) -> None:
+        self.model = A2C.load("agent/my_model")
+        self.observation_wrapper = CustomObsWrapper(env)
+    def act(self, observation):
+        extended_obsetvation = self.observation_wrapper.observation(observation)
+        return self.model.predict(extended_obsetvation, deterministic=True)

my_model.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2f023b0292ff0d225d43e005826d45ce4e0f24ef202bbc1ba08e6f1960ffcc8
+size 2400942

observation_wrapper.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gym
+from gym import spaces
+from tetris_gym.utils.board_utils import get_heights, get_bumps_from_heights
+from agent.utils import calc_holes_array
+import numpy as np
+class CustomObsWrapper(gym.ObservationWrapper):
+    def __init__(self, env):
+        super().__init__(env)
+        self.observation_space = spaces.Dict({
+            "board": env.observation_space["board"],
+            "piece": env.observation_space["piece"],
+            "holes_list": spaces.Box(
+                low=1,
+                high=env.height,
+                shape=(env.width,),
+                dtype=np.uint8,
+            ),
+            "x": spaces.Discrete(env.width),
+            "y": spaces.Discrete(env.width),
+            "piece_shape": spaces.Box(
+                low=0,
+                high=1,
+                shape=(4, 4),
+                dtype=np.uint8,
+            ),
+            "empty_above": spaces.Box(
+                low=0,
+                high=env.height,
+                shape=(env.width,),
+                dtype=np.uint8,
+            ),
+            "heights": spaces.Box(
+                low=0,
+                high=env.height,
+                shape=(env.width,),
+                dtype=np.uint8,
+            ),
+            "bumps": spaces.Box(
+                low=0,
+                high=env.height,
+                shape=(env.width - 1,),
+                dtype=int,
+            )
+        })
+    def observation(self, obs):
+        board = obs["board"]
+        piece = obs["piece"]
+        heights = get_heights(board)
+        bumps = get_bumps_from_heights(heights)
+        holes_array = calc_holes_array(self, board, heights)
+        empty_above = np.max(heights) - heights[:]
+        piece_shape = np.zeros((4, 4), dtype=np.uint8)
+        piece_shape[:len(self.piece), :len(self.piece[0])] = self.piece[:]
+        obs = {
+            "board": board,
+            "x": self.current_pos["x"],
+            "y": self.current_pos["y"],
+            "piece_shape": piece_shape,
+            "piece": piece,
+            "empty_above": empty_above,
+            "holes_list": holes_array,
+            "heights": heights,
+            "bumps": bumps
+        }
+        return obs

reward_wrapper.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from math import sqrt
+import gym
+import numpy as np
+class CustomRewardWrapper(gym.Wrapper):
+    def __init__(self, env):
+        super().__init__(env)
+        self.prev_max_height = 0
+        self.prev_cleared = 0
+        self.prev_score = 0
+        self.prev_holes = 0
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+        board = obs["board"]
+        heights = obs["heights"]
+        # # Default reward
+        reward = 2
+        # # reward = ((self.height - max(heights)) / self.height)
+        # # reward += np.sum(board)
+        # reward = (self.height - max(heights)) / self.height
+        # reward += 2
+        # #
+        # # reward = (self.score - self.prev_score) + 1
+        # # self.prev_score = self.score
+        # #
+        # # # if max(heights) < self.prev_max_height:
+        # # reward += (self.prev_max_height - max(heights))
+        # # self.prev_max_height = max(heights)
+        # #
+        # reward += self.cleared_lines
+        reward += (self.cleared_lines - self.prev_cleared) ** 3
+        #
+        # # Penalty for big differences between columns
+        reward -= self.get_bumpiness_and_height(board)[0] / self.height
+        #
+        # # Penalty for holes
+        # # holes_val = 0
+        # # for col_num in range(self.width):
+        # #     col_value = 0
+        # #     for row_num in range(self.height - 1, self.height - 1 - heights[col_num], -1):
+        # #         col_value += 1 if board[row_num][col_num] == 1 else -(row_num / self.width)
+        # #     holes_val += col_value / (1 + heights[col_num])
+        #
+        holes = self.get_holes(board)
+        reward -= (holes - self.prev_holes) * 0.8
+        # reward = 1 + ((self.cleared_lines - self.prev_cleared) ** 2) * self.width
+        self.prev_max_height = np.max(heights)
+        self.prev_cleared = self.cleared_lines
+        self.prev_score = self.score
+        self.prev_holes = holes
+        if self.gameover:
+            self.prev_max_height = 0
+            self.prev_cleared = 0
+            self.prev_score = 0
+            self.prev_holes = 0
+        return obs, reward, done, info

utils.py ADDED Viewed

	@@ -0,0 +1,8 @@

+def calc_holes_array(self, board, heights):
+    holes_list = []
+    for col_num in range(self.width):
+        col_value = 0
+        for row_num in range(self.height - 1, self.height - 1 - heights[col_num], -1):
+            col_value += 0 if board[row_num][col_num] == 1 else 1
+        holes_list.append(col_value)
+    return holes_list