from math import sqrt import gym import numpy as np class CustomRewardWrapper(gym.Wrapper): def __init__(self, env): super().__init__(env) self.prev_max_height = 0 self.prev_cleared = 0 self.prev_score = 0 self.prev_holes = 0 def step(self, action): obs, reward, done, info = self.env.step(action) board = obs["board"] heights = obs["heights"] # # Default reward reward = 1 # # reward = ((self.height - max(heights)) / self.height) # # reward += np.sum(board) # reward = (self.height - max(heights)) / self.height # reward += 2 # # # # reward = (self.score - self.prev_score) + 1 # # self.prev_score = self.score # # # # # if max(heights) < self.prev_max_height: # # reward += (self.prev_max_height - max(heights)) # # self.prev_max_height = max(heights) # # # reward += self.cleared_lines reward += (self.cleared_lines - self.prev_cleared) ** 3 # # # Penalty for big differences between columns reward -= self.get_bumpiness_and_height(board)[0] / self.height # Penalty for high columns reward += (self.height / 2 ) - np.max(heights) * 0.5 # # # Penalty for holes # # holes_val = 0 # # for col_num in range(self.width): # # col_value = 0 # # for row_num in range(self.height - 1, self.height - 1 - heights[col_num], -1): # # col_value += 1 if board[row_num][col_num] == 1 else -(row_num / self.width) # # holes_val += col_value / (1 + heights[col_num]) # holes = self.get_holes(board) reward -= (holes - self.prev_holes) * 0.8 # reward = 1 + ((self.cleared_lines - self.prev_cleared) ** 2) * self.width self.prev_max_height = np.max(heights) self.prev_cleared = self.cleared_lines self.prev_score = self.score self.prev_holes = holes if self.gameover: self.prev_max_height = 0 self.prev_cleared = 0 self.prev_score = 0 self.prev_holes = 0 return obs, reward, done, info