from typing import Optional

from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space


@PublicAPI
class Connect4Env(MultiAgentEnv):
    """An interface to the PettingZoo MARL environment library.

    See: https://github.com/Farama-Foundation/PettingZoo

    Inherits from MultiAgentEnv and exposes a given AEC
    (actor-environment-cycle) game from the PettingZoo project via the
    MultiAgentEnv public API.

    Note that the wrapper has some important limitations:

    1. All agents have the same action_spaces and observation_spaces.
       Note: If, within your aec game, agents do not have homogeneous action /
       observation spaces, apply SuperSuit wrappers
       to apply padding functionality: https://github.com/Farama-Foundation/
       SuperSuit#built-in-multi-agent-only-functions
    2. Environments are positive sum games (-> Agents are expected to cooperate
       to maximize reward). This isn't a hard restriction, it just that
       standard algorithms aren't expected to work well in highly competitive
       games."""

    def __init__(self, env):
        super().__init__()
        self.env = env
        env.reset()

        # Since all agents have the same spaces, do not provide full observation-
        # and action-spaces as Dicts, mapping agent IDs to the individual
        # agents' spaces. Instead, `self.[action|observation]_space` are the single
        # agent spaces.
        self._obs_space_in_preferred_format = False
        self._action_space_in_preferred_format = False

        # Collect the individual agents' spaces (they should all be the same):
        first_obs_space = self.env.observation_space(self.env.agents[0])
        first_action_space = self.env.action_space(self.env.agents[0])

        for agent in self.env.agents:
            if self.env.observation_space(agent) != first_obs_space:
                raise ValueError(
                    "Observation spaces for all agents must be identical. Perhaps "
                    "SuperSuit's pad_observations wrapper can help (useage: "
                    "`supersuit.aec_wrappers.pad_observations(env)`"
                )
            if self.env.action_space(agent) != first_action_space:
                raise ValueError(
                    "Action spaces for all agents must be identical. Perhaps "
                    "SuperSuit's pad_action_space wrapper can help (usage: "
                    "`supersuit.aec_wrappers.pad_action_space(env)`"
                )

        # Convert from gym to gymnasium, if necessary.
        self.observation_space = convert_old_gym_space_to_gymnasium_space(
            first_obs_space
        )
        self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)

        self._agent_ids = set(self.env.agents)

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        info = self.env.reset(seed=seed, options=options)
        return (
            {self.env.agent_selection: self.env.observe(self.env.agent_selection)},
            info or {},
        )

    def step(self, action):
        self.env.step(action[self.env.agent_selection])
        obs_d = {}
        rew_d = {}
        terminated_d = {}
        truncated_d = {}
        info_d = {}
        while self.env.agents:
            obs, rew, terminated, truncated, info = self.env.last()
            agent_id = self.env.agent_selection
            obs_d[agent_id] = obs
            rew_d[agent_id] = rew
            terminated_d[agent_id] = terminated
            truncated_d[agent_id] = truncated
            info_d[agent_id] = info
            if (
                self.env.terminations[self.env.agent_selection]
                or self.env.truncations[self.env.agent_selection]
            ):
                self.env.step(None)
            else:
                break

        all_gone = not self.env.agents
        terminated_d["__all__"] = all_gone and all(terminated_d.values())
        truncated_d["__all__"] = all_gone and all(truncated_d.values())

        return obs_d, rew_d, terminated_d, truncated_d, info_d

    def close(self):
        self.env.close()

    def render(self):
        return self.env.render()

    @property
    def get_sub_environments(self):
        return self.env.unwrapped