File size: 3,249 Bytes
1f8ee4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from abc import ABC

import gymnasium as gym
from gymnasium import spaces
import numpy as np


class NimGameEnv(gym.Env, ABC):
    """Custom environment for a simple Nim game.

    In this game, there are two players and a number of piles of stones.
    Each turn, a player can choose a pile and remove any number of stones from it.
    The player who takes the last stone loses.

    The observation space is a tuple of integers representing the number of stones in each pile.
    The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
    """

    def __init__(self, starting_stick_piles=[3, 5, 7]):
        self.starting_stick_piles = starting_stick_piles
        self.num_piles = len(starting_stick_piles)
        self.max_stones = max(starting_stick_piles)
        self.piles = self._init_piles()
        self.current_player = 0
        self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
        self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)

    def step(self, action):
        """Take a step in the environment.

        Parameters
        ----------
        action: tuple
            The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.

        Returns
        -------
        observation: tuple
            The current number of stones in each pile.
        reward: float
            The reward for the current step.
        done: bool
            Whether the game has ended.
        info: dict
            Additional information about the step.
        """
        # Validate the action
        if not self._is_valid_action(action):
            raise ValueError("Invalid action")

        # Update the piles
        pile, num_stones = action
        self.piles[pile] -= num_stones

        # Determine if the game has ended
        done = self._is_game_over()

        # Calculate the reward
        reward = self._calculate_reward()

        # Switch the current player
        self.current_player = (self.current_player + 1) % 2
        return self.piles, reward, done, {}

    def reset(self):
        """Reset the environment to the initial state."""
        self.piles = self._init_piles()
        self.current_player = 0
        text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
        return text_observation, self.piles

    def _init_piles(self):
        """Initialize the stick piles."""
        return [3, 5, 7]

    def _generate_random_stones(self):
        """Generate a random number of stones (between 1 and max_stones inclusive)."""
        return np.random.randint(1, self.max_stones + 1)

    def _is_valid_action(self, action):
        """Determine if an action is valid."""
        pile, num_stones = action
        return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]

    def _is_game_over(self):
        """Determine if the game has ended."""
        return all(pile == 0 for pile in self.piles)

    def _calculate_reward(self):
        """Calculate the reward for the current step."""
        return 1 if self._is_game_over() else 0