Spaces:

kaushikvr06
/

reasoning-simulator

Build error

App Files Files Community

Kaushik Rajan commited on Jul 12

Commit

5e02c64

1 Parent(s): 06c8d18

Phase 2.2: Implement game environments - TicTacToe and Kuhn Poker with Gymnasium interface, utilities, and tests

Browse files

Files changed (5) hide show

src/games/__init__.py +8 -4
src/games/game_utils.py +212 -0
src/games/kuhn_poker.py +314 -0
src/games/tictactoe.py +237 -0
test_games.py +82 -0

src/games/__init__.py CHANGED Viewed

@@ -5,8 +5,12 @@ This module contains implementations of zero-sum games used for
 self-play training, including Kuhn Poker and TicTacToe.
 """
-from .kuhn_poker import KuhnPokerEnv
-from .tictactoe import TicTacToeEnv
-from .base_game import BaseGameEnv
-__all__ = ["KuhnPokerEnv", "TicTacToeEnv", "BaseGameEnv"]

 self-play training, including Kuhn Poker and TicTacToe.
 """
+from .tictactoe import TicTacToeEnv, create_tictactoe_env
+from .kuhn_poker import KuhnPokerEnv, create_kuhn_poker_env
+__all__ = [
+    "TicTacToeEnv",
+    "KuhnPokerEnv",
+    "create_tictactoe_env",
+    "create_kuhn_poker_env"
+]

src/games/game_utils.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+Game utility functions for SPIRAL training.
+This module contains helper functions for game environments,
+including multi-turn logic and game state management.
+"""
+import gymnasium as gym
+from typing import Dict, Any, Type, Union
+import numpy as np
+from .tictactoe import TicTacToeEnv
+from .kuhn_poker import KuhnPokerEnv
+# Game registry
+GAMES_REGISTRY: Dict[str, Type[gym.Env]] = {
+    "tictactoe": TicTacToeEnv,
+    "kuhn_poker": KuhnPokerEnv,
+}
+def create_game_env(game_name: str, **kwargs) -> gym.Env:
+    """
+    Create a game environment by name.
+    Args:
+        game_name: Name of the game ("tictactoe", "kuhn_poker")
+        **kwargs: Additional arguments for the environment
+    Returns:
+        Game environment instance
+    Raises:
+        ValueError: If game_name is not recognized
+    """
+    if game_name not in GAMES_REGISTRY:
+        available_games = list(GAMES_REGISTRY.keys())
+        raise ValueError(f"Unknown game: {game_name}. Available games: {available_games}")
+    game_class = GAMES_REGISTRY[game_name]
+    return game_class(**kwargs)
+def get_game_info(game_name: str) -> Dict[str, Any]:
+    """
+    Get information about a game environment.
+    Args:
+        game_name: Name of the game
+    Returns:
+        Dictionary with game information
+    """
+    env = create_game_env(game_name)
+    info = {
+        "name": game_name,
+        "action_space": env.action_space,
+        "observation_space": env.observation_space,
+        "max_episode_steps": getattr(env, "_max_episode_steps", None),
+        "render_modes": env.metadata.get("render_modes", []),
+    }
+    # Add game-specific information
+    if game_name == "tictactoe":
+        info.update({
+            "description": "3x3 TicTacToe game with alternating turns",
+            "players": 2,
+            "zero_sum": True,
+            "perfect_information": True,
+        })
+    elif game_name == "kuhn_poker":
+        info.update({
+            "description": "Simplified poker with 3 cards (J, Q, K)",
+            "players": 2,
+            "zero_sum": True,
+            "perfect_information": False,
+        })
+    env.close()
+    return info
+def get_available_games() -> list:
+    """Get list of available game names."""
+    return list(GAMES_REGISTRY.keys())
+def is_game_over(env: gym.Env) -> bool:
+    """
+    Check if the game is over.
+    Args:
+        env: Game environment
+    Returns:
+        True if game is over, False otherwise
+    """
+    if hasattr(env, 'game_over'):
+        return env.game_over
+    return False
+def get_valid_actions(env: gym.Env) -> list:
+    """
+    Get valid actions for the current state.
+    Args:
+        env: Game environment
+    Returns:
+        List of valid actions
+    """
+    if hasattr(env, '_get_valid_actions'):
+        return env._get_valid_actions()
+    elif hasattr(env, 'get_valid_actions'):
+        return env.get_valid_actions()
+    else:
+        # Fallback: assume all actions are valid
+        return list(range(env.action_space.n))
+def get_action_mask(env: gym.Env) -> np.ndarray:
+    """
+    Get action mask for the current state.
+    Args:
+        env: Game environment
+    Returns:
+        Boolean mask where True indicates valid actions
+    """
+    if hasattr(env, 'get_action_mask'):
+        return env.get_action_mask()
+    else:
+        # Fallback: create mask from valid actions
+        valid_actions = get_valid_actions(env)
+        mask = np.zeros(env.action_space.n, dtype=bool)
+        for action in valid_actions:
+            mask[action] = True
+        return mask
+def play_random_game(game_name: str, render: bool = False, seed: int = None) -> Dict[str, Any]:
+    """
+    Play a random game to completion.
+    Args:
+        game_name: Name of the game to play
+        render: Whether to render the game
+        seed: Random seed for reproducibility
+    Returns:
+        Dictionary with game results
+    """
+    env = create_game_env(game_name, render_mode="human" if render else None)
+    if seed is not None:
+        env.reset(seed=seed)
+    else:
+        env.reset()
+    if render:
+        env.render()
+    total_reward = 0
+    step_count = 0
+    actions_taken = []
+    while not is_game_over(env):
+        valid_actions = get_valid_actions(env)
+        action = np.random.choice(valid_actions)
+        obs, reward, terminated, truncated, info = env.step(action)
+        actions_taken.append(action)
+        total_reward += reward
+        step_count += 1
+        if render:
+            print(f"Step {step_count}: Action {action}, Reward: {reward}")
+            env.render()
+        if terminated or truncated:
+            break
+    results = {
+        "game_name": game_name,
+        "total_reward": total_reward,
+        "step_count": step_count,
+        "actions_taken": actions_taken,
+        "winner": getattr(env, 'winner', None),
+        "final_info": info
+    }
+    env.close()
+    return results
+if __name__ == "__main__":
+    # Test the utilities
+    print("Available games:", get_available_games())
+    for game_name in get_available_games():
+        print(f"\n{game_name.upper()} Info:")
+        info = get_game_info(game_name)
+        for key, value in info.items():
+            print(f"  {key}: {value}")
+    # Play a random game
+    print("\nPlaying random TicTacToe game:")
+    result = play_random_game("tictactoe", render=True, seed=42)

src/games/kuhn_poker.py ADDED Viewed

	@@ -0,0 +1,314 @@

+"""
+Kuhn Poker Game Environment
+A simple Kuhn Poker implementation using Gymnasium for SPIRAL training.
+Kuhn Poker is a simplified poker variant with 3 cards (J, Q, K).
+"""
+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+from typing import Tuple, Dict, Any, Optional, List
+import random
+class KuhnPokerEnv(gym.Env):
+    """
+    Kuhn Poker environment for SPIRAL training.
+    Rules:
+    - 3 cards: Jack (0), Queen (1), King (2)
+    - Each player gets 1 card
+    - Each player antes 1 chip
+    - Player 1 acts first: Check or Bet
+    - Player 2 then acts: Check, Call, or Fold
+    - If both check, high card wins
+    - If one bets and other calls, high card wins
+    - If one bets and other folds, bettor wins
+    Action space: [Check/Call=0, Bet=1, Fold=2]
+    Observation space: [player_card, opponent_action, betting_round]
+    """
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 1}
+    # Card values: Jack=0, Queen=1, King=2
+    JACK, QUEEN, KING = 0, 1, 2
+    CARDS = [JACK, QUEEN, KING]
+    CARD_NAMES = ["J", "Q", "K"]
+    # Actions
+    CHECK_CALL, BET, FOLD = 0, 1, 2
+    ACTION_NAMES = ["Check/Call", "Bet", "Fold"]
+    def __init__(self, render_mode: Optional[str] = None):
+        super().__init__()
+        # Observation: [player_card, opponent_last_action, betting_round, pot_size]
+        self.observation_space = spaces.Box(
+            low=0, high=10, shape=(4,), dtype=np.int8
+        )
+        # Actions: Check/Call, Bet, Fold
+        self.action_space = spaces.Discrete(3)
+        self.render_mode = render_mode
+        self.reset()
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
+        """Reset the game to initial state."""
+        super().reset(seed=seed)
+        # Deal cards
+        cards = self.CARDS.copy()
+        random.shuffle(cards)
+        self.player1_card = cards[0]
+        self.player2_card = cards[1]
+        # Game state
+        self.current_player = 1  # Player 1 starts
+        self.pot = 2  # Each player antes 1
+        self.player1_bet = 1  # Ante
+        self.player2_bet = 1  # Ante
+        self.game_over = False
+        self.winner = None
+        self.betting_round = 0
+        self.actions_history = []
+        observation = self._get_observation()
+        info = self._get_info()
+        return observation, info
+    def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict]:
+        """
+        Execute one step in the environment.
+        Args:
+            action: 0=Check/Call, 1=Bet, 2=Fold
+        Returns:
+            observation, reward, terminated, truncated, info
+        """
+        if self.game_over:
+            raise ValueError("Game is already over. Call reset() to start new game.")
+        # Record action
+        self.actions_history.append((self.current_player, action))
+        # Process action
+        if action == self.FOLD:
+            # Current player folds, opponent wins
+            self.game_over = True
+            self.winner = 2 if self.current_player == 1 else 1
+            reward = self._calculate_reward()
+        elif action == self.BET:
+            # Current player bets
+            if self.current_player == 1:
+                self.player1_bet += 1
+                self.pot += 1
+            else:
+                self.player2_bet += 1
+                self.pot += 1
+            # Check if this ends the betting round
+            if self.betting_round == 0:
+                # First bet, opponent gets to act
+                self.current_player = 2
+                self.betting_round = 1
+                reward = 0.0
+            else:
+                # Second bet (raise), go to showdown
+                self.game_over = True
+                self.winner = self._determine_winner_by_cards()
+                reward = self._calculate_reward()
+        else:  # CHECK_CALL
+            if self.betting_round == 0:
+                # First action is check
+                if self.current_player == 1:
+                    # Player 1 checks, player 2 acts
+                    self.current_player = 2
+                    self.betting_round = 1
+                    reward = 0.0
+                else:
+                    # Player 2 checks after player 1 checked, showdown
+                    self.game_over = True
+                    self.winner = self._determine_winner_by_cards()
+                    reward = self._calculate_reward()
+            else:
+                # This is a call
+                if self.current_player == 2:
+                    # Player 2 calls player 1's bet
+                    self.player2_bet = self.player1_bet
+                    self.pot = self.player1_bet + self.player2_bet
+                    self.game_over = True
+                    self.winner = self._determine_winner_by_cards()
+                    reward = self._calculate_reward()
+                else:
+                    # Player 1 calls player 2's bet
+                    self.player1_bet = self.player2_bet
+                    self.pot = self.player1_bet + self.player2_bet
+                    self.game_over = True
+                    self.winner = self._determine_winner_by_cards()
+                    reward = self._calculate_reward()
+        observation = self._get_observation()
+        info = self._get_info()
+        return observation, reward, self.game_over, False, info
+    def _get_observation(self) -> np.ndarray:
+        """Get current observation for the current player."""
+        # Get current player's card
+        player_card = self.player1_card if self.current_player == 1 else self.player2_card
+        # Get opponent's last action (if any)
+        opponent_last_action = -1
+        if self.actions_history:
+            for player, action in reversed(self.actions_history):
+                if player != self.current_player:
+                    opponent_last_action = action
+                    break
+        # Observation: [player_card, opponent_last_action, betting_round, pot_size]
+        observation = np.array([
+            player_card,
+            opponent_last_action + 1,  # -1 becomes 0, 0 becomes 1, etc.
+            self.betting_round,
+            self.pot
+        ], dtype=np.int8)
+        return observation
+    def _get_info(self) -> Dict[str, Any]:
+        """Get additional info about the game state."""
+        return {
+            "current_player": self.current_player,
+            "game_over": self.game_over,
+            "winner": self.winner,
+            "player1_card": self.player1_card,
+            "player2_card": self.player2_card,
+            "pot": self.pot,
+            "betting_round": self.betting_round,
+            "actions_history": self.actions_history.copy(),
+            "valid_actions": self._get_valid_actions()
+        }
+    def _get_valid_actions(self) -> List[int]:
+        """Get list of valid actions."""
+        if self.game_over:
+            return []
+        # All actions are always valid in Kuhn Poker
+        return [self.CHECK_CALL, self.BET, self.FOLD]
+    def _determine_winner_by_cards(self) -> int:
+        """Determine winner by comparing cards."""
+        if self.player1_card > self.player2_card:
+            return 1
+        else:
+            return 2
+    def _calculate_reward(self) -> float:
+        """Calculate reward for the current player."""
+        if not self.game_over:
+            return 0.0
+        if self.winner == self.current_player:
+            # Won - get the pot minus what you put in
+            if self.current_player == 1:
+                return float(self.pot - self.player1_bet)
+            else:
+                return float(self.pot - self.player2_bet)
+        else:
+            # Lost - lose what you put in
+            if self.current_player == 1:
+                return float(-self.player1_bet)
+            else:
+                return float(-self.player2_bet)
+    def render(self) -> Optional[np.ndarray]:
+        """Render the game state."""
+        if self.render_mode == "human":
+            self._render_human()
+        elif self.render_mode == "rgb_array":
+            return self._render_rgb_array()
+    def _render_human(self):
+        """Print the game state to console."""
+        print("\n" + "="*40)
+        print("KUHN POKER")
+        print("="*40)
+        print(f"Player 1 Card: {self.CARD_NAMES[self.player1_card]}")
+        print(f"Player 2 Card: {self.CARD_NAMES[self.player2_card]}")
+        print(f"Pot: {self.pot}")
+        print(f"Current Player: {self.current_player}")
+        print(f"Betting Round: {self.betting_round}")
+        if self.actions_history:
+            print("Actions:")
+            for player, action in self.actions_history:
+                print(f"  Player {player}: {self.ACTION_NAMES[action]}")
+        if self.game_over:
+            print(f"Game Over! Winner: Player {self.winner}")
+        print("="*40)
+    def _render_rgb_array(self) -> np.ndarray:
+        """Render as RGB array for visualization."""
+        # Simple RGB representation (placeholder)
+        rgb = np.zeros((100, 100, 3), dtype=np.uint8)
+        # Color based on current player's card
+        if self.current_player == 1:
+            card_value = self.player1_card
+        else:
+            card_value = self.player2_card
+        # Different colors for different cards
+        if card_value == self.JACK:
+            rgb[:, :] = [255, 0, 0]  # Red for Jack
+        elif card_value == self.QUEEN:
+            rgb[:, :] = [0, 255, 0]  # Green for Queen
+        else:  # King
+            rgb[:, :] = [0, 0, 255]  # Blue for King
+        return rgb
+    def get_action_mask(self) -> np.ndarray:
+        """Get mask of valid actions (1 for valid, 0 for invalid)."""
+        mask = np.zeros(3, dtype=np.int8)
+        for action in self._get_valid_actions():
+            mask[action] = 1
+        return mask
+def create_kuhn_poker_env() -> KuhnPokerEnv:
+    """Factory function to create a Kuhn Poker environment."""
+    return KuhnPokerEnv()
+if __name__ == "__main__":
+    # Test the environment
+    env = KuhnPokerEnv(render_mode="human")
+    # Play a simple game
+    obs, info = env.reset()
+    print("Initial state:")
+    env.render()
+    # Simulate some moves
+    while not env.game_over:
+        valid_actions = env._get_valid_actions()
+        action = random.choice(valid_actions)
+        obs, reward, terminated, truncated, info = env.step(action)
+        print(f"\nPlayer {env.current_player if not env.game_over else 'Previous'} action: {env.ACTION_NAMES[action]}")
+        print(f"Reward: {reward}")
+        env.render()
+        if terminated:
+            print(f"Game terminated! Final reward: {reward}")
+            break

src/games/tictactoe.py ADDED Viewed

	@@ -0,0 +1,237 @@

+"""
+TicTacToe Game Environment
+A simple TicTacToe implementation using Gymnasium for SPIRAL training.
+"""
+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+from typing import Tuple, Dict, Any, Optional
+class TicTacToeEnv(gym.Env):
+    """
+    TicTacToe environment for SPIRAL training.
+    - 3x3 grid
+    - Players alternate turns (1 and -1)
+    - Action space: 9 positions (0-8)
+    - Observation space: 3x3 grid with values {-1, 0, 1}
+    - Reward: +1 for win, -1 for loss, 0 for draw/ongoing
+    """
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 1}
+    def __init__(self, render_mode: Optional[str] = None):
+        super().__init__()
+        # 3x3 grid, each cell can be -1 (player 2), 0 (empty), or 1 (player 1)
+        self.observation_space = spaces.Box(
+            low=-1, high=1, shape=(3, 3), dtype=np.int8
+        )
+        # 9 possible actions (positions 0-8)
+        self.action_space = spaces.Discrete(9)
+        self.render_mode = render_mode
+        self.reset()
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
+        """Reset the game to initial state."""
+        super().reset(seed=seed)
+        # Initialize empty board
+        self.board = np.zeros((3, 3), dtype=np.int8)
+        self.current_player = 1  # Player 1 starts
+        self.game_over = False
+        self.winner = None
+        self.move_count = 0
+        observation = self._get_observation()
+        info = self._get_info()
+        return observation, info
+    def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict]:
+        """
+        Execute one step in the environment.
+        Args:
+            action: Position to place mark (0-8)
+        Returns:
+            observation, reward, terminated, truncated, info
+        """
+        if self.game_over:
+            raise ValueError("Game is already over. Call reset() to start new game.")
+        # Convert action to row, col
+        row, col = divmod(action, 3)
+        # Check if move is valid
+        if self.board[row, col] != 0:
+            # Invalid move - penalize and end game
+            reward = -1.0
+            terminated = True
+            self.game_over = True
+            info = self._get_info()
+            info["invalid_move"] = True
+            return self._get_observation(), reward, terminated, False, info
+        # Make the move
+        self.board[row, col] = self.current_player
+        self.move_count += 1
+        # Check for win
+        winner = self._check_winner()
+        if winner is not None:
+            self.game_over = True
+            self.winner = winner
+            reward = 1.0 if winner == self.current_player else -1.0
+            terminated = True
+        elif self.move_count >= 9:
+            # Draw
+            self.game_over = True
+            reward = 0.0
+            terminated = True
+        else:
+            # Game continues
+            reward = 0.0
+            terminated = False
+            self.current_player *= -1  # Switch player
+        observation = self._get_observation()
+        info = self._get_info()
+        return observation, reward, terminated, False, info
+    def _get_observation(self) -> np.ndarray:
+        """Get current board state."""
+        return self.board.copy()
+    def _get_info(self) -> Dict[str, Any]:
+        """Get additional info about the game state."""
+        return {
+            "current_player": self.current_player,
+            "game_over": self.game_over,
+            "winner": self.winner,
+            "move_count": self.move_count,
+            "valid_actions": self._get_valid_actions()
+        }
+    def _get_valid_actions(self) -> list:
+        """Get list of valid actions (empty positions)."""
+        valid_actions = []
+        for i in range(9):
+            row, col = divmod(i, 3)
+            if self.board[row, col] == 0:
+                valid_actions.append(i)
+        return valid_actions
+    def _check_winner(self) -> Optional[int]:
+        """
+        Check if there's a winner.
+        Returns:
+            1 if player 1 wins, -1 if player 2 wins, None if no winner
+        """
+        # Check rows
+        for row in range(3):
+            if abs(self.board[row, :].sum()) == 3:
+                return self.board[row, 0]
+        # Check columns
+        for col in range(3):
+            if abs(self.board[:, col].sum()) == 3:
+                return self.board[0, col]
+        # Check diagonals
+        if abs(self.board.diagonal().sum()) == 3:
+            return self.board[0, 0]
+        if abs(np.fliplr(self.board).diagonal().sum()) == 3:
+            return self.board[0, 2]
+        return None
+    def render(self) -> Optional[np.ndarray]:
+        """Render the game state."""
+        if self.render_mode == "human":
+            self._render_human()
+        elif self.render_mode == "rgb_array":
+            return self._render_rgb_array()
+    def _render_human(self):
+        """Print the board to console."""
+        print("\n" + "="*13)
+        for row in range(3):
+            print("|", end="")
+            for col in range(3):
+                cell = self.board[row, col]
+                if cell == 1:
+                    print(" X ", end="|")
+                elif cell == -1:
+                    print(" O ", end="|")
+                else:
+                    print(f" {row*3 + col} ", end="|")
+            print()
+            print("="*13)
+        if self.game_over:
+            if self.winner is not None:
+                winner_symbol = "X" if self.winner == 1 else "O"
+                print(f"Game Over! Winner: {winner_symbol}")
+            else:
+                print("Game Over! It's a draw!")
+    def _render_rgb_array(self) -> np.ndarray:
+        """Render as RGB array for visualization."""
+        # Simple RGB representation
+        rgb = np.zeros((3, 3, 3), dtype=np.uint8)
+        # Player 1 (X) = Red, Player 2 (O) = Blue, Empty = White
+        for row in range(3):
+            for col in range(3):
+                if self.board[row, col] == 1:
+                    rgb[row, col] = [255, 0, 0]  # Red
+                elif self.board[row, col] == -1:
+                    rgb[row, col] = [0, 0, 255]  # Blue
+                else:
+                    rgb[row, col] = [255, 255, 255]  # White
+        return rgb
+    def get_action_mask(self) -> np.ndarray:
+        """Get mask of valid actions (1 for valid, 0 for invalid)."""
+        mask = np.zeros(9, dtype=np.int8)
+        for action in self._get_valid_actions():
+            mask[action] = 1
+        return mask
+def create_tictactoe_env() -> TicTacToeEnv:
+    """Factory function to create a TicTacToe environment."""
+    return TicTacToeEnv()
+if __name__ == "__main__":
+    # Test the environment
+    env = TicTacToeEnv(render_mode="human")
+    # Play a simple game
+    obs, info = env.reset()
+    print("Initial state:")
+    env.render()
+    # Make some moves
+    moves = [0, 4, 1, 3, 2]  # X wins
+    for move in moves:
+        if not env.game_over:
+            obs, reward, terminated, truncated, info = env.step(move)
+            print(f"\nMove: {move}, Reward: {reward}")
+            env.render()
+            if terminated:
+                print(f"Game terminated! Final reward: {reward}")
+                break

test_games.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/usr/bin/env python3
+"""
+Test script for game environments.
+"""
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from games import TicTacToeEnv, KuhnPokerEnv, create_tictactoe_env, create_kuhn_poker_env
+from games.game_utils import get_available_games, get_game_info, play_random_game
+def test_tictactoe():
+    """Test TicTacToe environment."""
+    print("Testing TicTacToe...")
+    env = create_tictactoe_env()
+    obs, info = env.reset()
+    print(f"Initial observation shape: {obs.shape}")
+    print(f"Action space: {env.action_space}")
+    print(f"Observation space: {env.observation_space}")
+    # Test a few moves
+    action = 0
+    obs, reward, terminated, truncated, info = env.step(action)
+    print(f"After move {action}: reward={reward}, terminated={terminated}")
+    env.close()
+    print("TicTacToe test passed!\n")
+def test_kuhn_poker():
+    """Test Kuhn Poker environment."""
+    print("Testing Kuhn Poker...")
+    env = create_kuhn_poker_env()
+    obs, info = env.reset()
+    print(f"Initial observation: {obs}")
+    print(f"Action space: {env.action_space}")
+    print(f"Observation space: {env.observation_space}")
+    # Test a move
+    action = 0  # Check/Call
+    obs, reward, terminated, truncated, info = env.step(action)
+    print(f"After action {action}: reward={reward}, terminated={terminated}")
+    env.close()
+    print("Kuhn Poker test passed!\n")
+def test_game_utils():
+    """Test game utility functions."""
+    print("Testing game utilities...")
+    # Test available games
+    games = get_available_games()
+    print(f"Available games: {games}")
+    # Test game info
+    for game_name in games:
+        info = get_game_info(game_name)
+        print(f"{game_name} info: {info['description']}")
+    print("Game utilities test passed!\n")
+def main():
+    """Run all tests."""
+    print("Running game environment tests...\n")
+    try:
+        test_tictactoe()
+        test_kuhn_poker()
+        test_game_utils()
+        print("All tests passed! ✅")
+    except Exception as e:
+        print(f"Test failed: {e}")
+        return 1
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())