import numpy as np
from easydict import EasyDict
from rich import print

from zoo.game_2048.envs.expectimax_search_based_bot import expectimax_search
from zoo.game_2048.envs.game_2048_env import Game2048Env

# Define game configuration
config = EasyDict(dict(
    env_name="game_2048",
    # (str) The render mode. Options are 'None', 'state_realtime_mode', 'image_realtime_mode' or 'image_savefile_mode'.
    # If None, then the game will not be rendered.
    render_mode='image_realtime_mode',
    replay_format='gif',
    replay_name_suffix='bot',
    replay_path=None,
    act_scale=True,
    channel_last=True,
    obs_type='raw_board',  # options=['raw_board', 'raw_encoded_board', 'dict_encoded_board']
    reward_type='raw',  # options=['raw', 'merged_tiles_plus_log_max_tile_num']
    reward_normalize=False,
    reward_norm_scale=100,
    max_tile=int(2 ** 16),
    delay_reward_step=0,
    prob_random_agent=0.,
    max_episode_steps=int(1e4),
    is_collect=False,
    ignore_legal_actions=True,
    need_flatten=False,
    num_of_possible_chance_tile=2,
    possible_tiles=np.array([2, 4]),
    tile_probabilities=np.array([0.9, 0.1]),
))

if __name__ == "__main__":
    game_2048_env = Game2048Env(config)
    obs = game_2048_env.reset()
    print('init board state: ')
    game_2048_env.render()
    step = 0
    while True:
        print('=' * 40)
        grid = obs.astype(np.int64)
        # action = game_2048_env.human_to_action()  # which obtain about 10000 score
        # action = game_2048_env.random_action()  # which obtain about 1000 score
        action = expectimax_search(grid)  # which obtain about 300000~70000 score
        obs, reward, done, info = game_2048_env.step(action)
        step += 1
        print(f"step: {step}, action: {action}, reward: {reward}, raw_reward: {info['raw_reward']}")
        game_2048_env.render(mode='human')
        if done:
            print('total_step_number: {}'.format(step))
            break