Spaces:

ClementBM
/

connectfour

Runtime error

App Files Files Community

ClementBM commited on Mar 29, 2023

Commit

ffe7549

•

1 Parent(s): 12d2509

first commit

Browse files

Files changed (47) hide show

.vscode/extensions.json +7 -0
.vscode/settings.json +28 -0
README.md +1 -1
connectfour/__init__.py +0 -0
connectfour/__pycache__/__init__.cpython-38.pyc +0 -0
connectfour/__pycache__/app.cpython-38.pyc +0 -0
connectfour/app.py +250 -0
connectfour/checkpoint/.Rhistory +0 -0
connectfour/checkpoint/.is_checkpoint +0 -0
connectfour/checkpoint/.tune_metadata +0 -0
connectfour/checkpoint/__init__.py +3 -0
connectfour/checkpoint/algorithm_state.pkl +3 -0
connectfour/checkpoint/policies/always_same/policy_state.pkl +3 -0
connectfour/checkpoint/policies/always_same/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/beat_last/policy_state.pkl +3 -0
connectfour/checkpoint/policies/beat_last/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/learned/policy_state.pkl +3 -0
connectfour/checkpoint/policies/learned/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/learned_v1/policy_state.pkl +3 -0
connectfour/checkpoint/policies/learned_v1/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/learned_v2/policy_state.pkl +3 -0
connectfour/checkpoint/policies/learned_v2/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/learned_v3/policy_state.pkl +3 -0
connectfour/checkpoint/policies/learned_v3/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/learned_v4/policy_state.pkl +3 -0
connectfour/checkpoint/policies/learned_v4/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/learned_v5/policy_state.pkl +3 -0
connectfour/checkpoint/policies/learned_v5/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/linear/policy_state.pkl +3 -0
connectfour/checkpoint/policies/linear/rllib_checkpoint.json +1 -0
connectfour/checkpoint/policies/random/policy_state.pkl +3 -0
connectfour/checkpoint/policies/random/rllib_checkpoint.json +1 -0
connectfour/checkpoint/rllib_checkpoint.json +1 -0
connectfour/training/__init__.py +0 -0
connectfour/training/__pycache__/__init__.cpython-38.pyc +0 -0
connectfour/training/__pycache__/callbacks.cpython-38.pyc +0 -0
connectfour/training/__pycache__/dummy_policies.cpython-38.pyc +0 -0
connectfour/training/__pycache__/models.cpython-38.pyc +0 -0
connectfour/training/__pycache__/wrappers.cpython-38.pyc +0 -0
connectfour/training/callbacks.py +88 -0
connectfour/training/dummy_policies.py +130 -0
connectfour/training/models.py +119 -0
connectfour/training/train.py +140 -0
connectfour/training/wrappers.py +112 -0
poetry.lock +0 -0
pyproject.toml +37 -0
requirements.txt +141 -0

.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  // See https://go.microsoft.com/fwlink/?LinkId=827846
+  // for the documentation about the extensions.json format
+  "recommendations": [
+      "ms-python.python"
+  ]
+}

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "python.linting.enabled": true,
+    "python.linting.mypyEnabled": true,
+    "python.pythonPath": "${env:PYTHON_VENV_LOC}",
+    "python.testing.unittestEnabled": false,
+    "python.testing.nosetestsEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "editor.tabSize": 4,
+    "[python]": {
+        "editor.formatOnSave": true
+    },
+    "python.formatting.provider": "black",
+    "files.exclude": {
+        ".mypy_cache": true,
+        ".pytest_cache": true,
+        ".venv": true,
+        "**/__pycache__": true
+    },
+    "files.watcherExclude": {
+        ".venv/**": true,
+        "**/__pycache__/**": true,
+        ".mypy_cache/**": true,
+        ".pytest_cache/**": true
+    }
+}

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: pink
 colorTo: blue
 sdk: gradio
 sdk_version: 3.23.0
-app_file: app.py
 pinned: false
 ---

 colorTo: blue
 sdk: gradio
 sdk_version: 3.23.0
+app_file: connectfour/app.py
 pinned: false
 ---

connectfour/__init__.py ADDED Viewed

File without changes

connectfour/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (173 Bytes). View file

connectfour/__pycache__/app.cpython-38.pyc ADDED Viewed

Binary file (6.51 kB). View file

connectfour/app.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import gradio as gr
+from ray.serve.gradio_integrations import GradioServer, GradioIngress
+import gradio as gr
+from pettingzoo.classic import connect_four_v3
+import ray.rllib.algorithms.ppo as ppo
+import numpy as np
+import time
+from ray.tune import register_env
+from connectfour.training.models import Connect4MaskModel
+from connectfour.checkpoint import CHECKPOINT
+from connectfour.training.wrappers import Connect4Env
+demo = gr.Blocks()
+POLICY_ID = "learned_v5"
+class Connect4:
+    def __init__(self, who_plays_first) -> None:
+        self.init_env(who_plays_first)
+    def init_env(self, who_plays_first):
+        # define how to make the environment
+        env_creator = lambda config: connect_four_v3.env(render_mode="rgb_array")
+        # register that way to make the environment under an rllib name
+        register_env("connect4", lambda config: Connect4Env(env_creator(config)))
+        orig_env = connect_four_v3.env(render_mode="rgb_array")
+        self.env = Connect4Env(orig_env)
+        self.done = False
+        self.obs, info = self.env.reset()
+        if who_plays_first == "You":
+            self.human = self.player_id
+        else:
+            self.play()
+            self.human = self.player_id
+        return self.render_and_state
+    def get_algo(self, checkpoint):
+        config = (
+            ppo.PPOConfig()
+            .environment("connect4")
+            .framework("torch")
+            .training(model={"custom_model": Connect4MaskModel})
+        )
+        config.explore = False
+        self.algo = config.build()
+        self.algo.restore(checkpoint)
+    def play(self, action=None):
+        if self.human != self.player_id:
+            action = self.algo.compute_single_action(
+                self.obs[self.player_id], policy_id=POLICY_ID
+            )
+        if action not in self.legal_moves:
+            action = np.random.choice(self.legal_moves)
+        player_actions = {self.player_id: action}
+        self.obs, self.reward, terminated, truncated, info = self.env.step(
+            player_actions
+        )
+        self.done = terminated["__all__"] or truncated["__all__"]
+        return self.render_and_state
+    @property
+    def render_and_state(self):
+        end_message = "End of the game"
+        if self.done:
+            if self.reward[self.human] > 0:
+                end_message += ": You WIN !!"
+            elif self.reward[self.human] < 0:
+                end_message += ": You LOSE !!"
+            return self.env.render(), end_message
+        return self.env.render(), "Game On"
+    @property
+    def player_id(self):
+        return list(self.obs.keys())[0]
+    @property
+    def legal_moves(self):
+        return np.arange(7)[self.obs[self.player_id]["action_mask"] == 1]
+with demo:
+    connect4 = Connect4("You")
+    connect4.get_algo(str(CHECKPOINT))
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("# Let's Play Connect Four !")
+            who_plays_first = gr.Radio(
+                label="Who plays first", choices=["You", "Bot"], value="You"
+            )
+            reinitialize = gr.Button("New Game")
+            game_state = gr.Text(value="Game On", interactive=False, label="Status")
+        with gr.Column(scale=1):
+            output = gr.Image(
+                label="Connect Four Grid",
+                type="numpy",
+                show_label=False,
+                value=connect4.env.render(),
+            )
+            with gr.Row():
+                with gr.Column(scale=1, min_width=20):
+                    drop_token0_btn = gr.Button("X")
+                with gr.Column(scale=1, min_width=20):
+                    drop_token1_btn = gr.Button("X")
+                with gr.Column(scale=1, min_width=20):
+                    drop_token2_btn = gr.Button("X")
+                with gr.Column(scale=1, min_width=20):
+                    drop_token3_btn = gr.Button("X")
+                with gr.Column(scale=1, min_width=20):
+                    drop_token4_btn = gr.Button("X")
+                with gr.Column(scale=1, min_width=20):
+                    drop_token5_btn = gr.Button("X")
+                with gr.Column(scale=1, min_width=20):
+                    drop_token6_btn = gr.Button("X")
+    who_plays_first.change(
+        connect4.init_env, who_plays_first, outputs=[output, game_state]
+    )
+    def reinit_game(who_plays_first):
+        output, game_state = connect4.init_env(who_plays_first)
+        return output, game_state, gr.Checkbox.update(interactive=True)
+    reinitialize.click(
+        reinit_game, who_plays_first, outputs=[output, game_state, who_plays_first]
+    )
+    def wait(game_state_value):
+        if game_state_value == "Game On":
+            time.sleep(1)
+            return gr.Checkbox.update(interactive=False)
+        else:
+            return gr.Checkbox.update(interactive=True)
+    def bot(game_state_value):
+        if game_state_value == "Game On":
+            rendered_env = connect4.play()
+            return *rendered_env, gr.Checkbox.update(interactive=False) if rendered_env[
+                1
+            ] == "Game On" else gr.Checkbox.update(interactive=True)
+        return (
+            gr.Image.update(),
+            game_state_value,
+            gr.Checkbox.update(interactive=True),
+        )
+    drop_token0_btn.click(
+        lambda: connect4.play(0),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    drop_token1_btn.click(
+        lambda: connect4.play(1),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    drop_token2_btn.click(
+        lambda: connect4.play(2),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    drop_token3_btn.click(
+        lambda: connect4.play(3),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    drop_token4_btn.click(
+        lambda: connect4.play(4),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    drop_token5_btn.click(
+        lambda: connect4.play(5),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    drop_token6_btn.click(
+        lambda: connect4.play(6),
+        outputs=[output, game_state],
+    ).then(
+        wait, inputs=[game_state], outputs=who_plays_first
+    ).then(bot, inputs=[game_state], outputs=[output, game_state, who_plays_first])
+    def game_state_change(value):
+        if value == "Game On":
+            return [
+                gr.Button.update(interactive=True),
+                gr.Button.update(interactive=True),
+                gr.Button.update(interactive=True),
+                gr.Button.update(interactive=True),
+                gr.Button.update(interactive=True),
+                gr.Button.update(interactive=True),
+                gr.Button.update(interactive=True),
+            ]
+        else:
+            return [
+                gr.Button.update(interactive=False),
+                gr.Button.update(interactive=False),
+                gr.Button.update(interactive=False),
+                gr.Button.update(interactive=False),
+                gr.Button.update(interactive=False),
+                gr.Button.update(interactive=False),
+                gr.Button.update(interactive=False),
+            ]
+    game_state.change(
+        game_state_change,
+        game_state,
+        outputs=[
+            drop_token0_btn,
+            drop_token1_btn,
+            drop_token2_btn,
+            drop_token3_btn,
+            drop_token4_btn,
+            drop_token5_btn,
+            drop_token6_btn,
+        ],
+    )
+demo.launch()

connectfour/checkpoint/.Rhistory ADDED Viewed

File without changes

connectfour/checkpoint/.is_checkpoint ADDED Viewed

File without changes

connectfour/checkpoint/.tune_metadata ADDED Viewed

Binary file (15.6 kB). View file

connectfour/checkpoint/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from pathlib import Path
2	+
3	+ CHECKPOINT = Path(__file__).parent.absolute()

connectfour/checkpoint/algorithm_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbbc198c3406897931f5f18046a88181b8abff1aedbea1d869329731c9a50853
+size 66321

connectfour/checkpoint/policies/always_same/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d278413093ad1bc4f227279e3dab7be04ebd70ca1ed156a1363515c69d0a858e
+size 10992

connectfour/checkpoint/policies/always_same/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/beat_last/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd422258c16de0866599730a5a5b2b48e2ee81cbae69f9d5471deeae76c42b47
+size 10992

connectfour/checkpoint/policies/beat_last/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/learned/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a517583e5fcad7e483bca619723583cc6928499390c1fcfc25d907e109cd4b4
+size 2139442

connectfour/checkpoint/policies/learned/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/learned_v1/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:276c26007c2419a688c27f9dfa70c20fecb468a0aa07d28d6a9e8099bbc849be
+size 2139439

connectfour/checkpoint/policies/learned_v1/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/learned_v2/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e37a485d3a54f7a8b194693e7a61f790e67071358130178fa01cdbd840c4a4da
+size 2139439

connectfour/checkpoint/policies/learned_v2/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/learned_v3/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f90899ae98a387e312333b234041c68b9c50da4af92ee5250686087a39eebb3
+size 2139439

connectfour/checkpoint/policies/learned_v3/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/learned_v4/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3af3b3fe41bac489cb693af387b1ccc4437a532a78d539b3abb4cc5f77929592
+size 2139439

connectfour/checkpoint/policies/learned_v4/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/learned_v5/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2b28b979e2f4411d196e03ca75ea7f25f7601bb997aa8bcdcf1d49c9ea30754
+size 2139439

connectfour/checkpoint/policies/learned_v5/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/linear/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f70d44ac661632dc0557204abe34308dfb25b800a668b49c2efd9a2a73a7bc0
+size 10992

connectfour/checkpoint/policies/linear/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/policies/random/policy_state.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3b1ab86bada035779feedb2b92ae0a64f6d9474bb4f0ae44324e17d65659764
+size 10992

connectfour/checkpoint/policies/random/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Policy", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/checkpoint/rllib_checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"type": "Algorithm", "checkpoint_version": "1.0", "ray_version": "2.3.1", "ray_commit": "5f14cee8dfc6d61ec4fd3bc2c440f9944e92b33a"}

connectfour/training/__init__.py ADDED Viewed

File without changes

connectfour/training/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (182 Bytes). View file

connectfour/training/__pycache__/callbacks.cpython-38.pyc ADDED Viewed

Binary file (2.67 kB). View file

connectfour/training/__pycache__/dummy_policies.cpython-38.pyc ADDED Viewed

Binary file (5.36 kB). View file

connectfour/training/__pycache__/models.cpython-38.pyc ADDED Viewed

Binary file (2.91 kB). View file

connectfour/training/__pycache__/wrappers.cpython-38.pyc ADDED Viewed

Binary file (3.72 kB). View file

connectfour/training/callbacks.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from ray.rllib.algorithms.callbacks import DefaultCallbacks
+import numpy as np
+def create_self_play_callback(win_rate_thr, opponent_policies):
+    class SelfPlayCallback(DefaultCallbacks):
+        win_rate_threshold = win_rate_thr
+        def __init__(self):
+            super().__init__()
+            self.current_opponent = 0
+        def on_train_result(self, *, algorithm, result, **kwargs):
+            # Get the win rate for the train batch.
+            # Note that normally, one should set up a proper evaluation config,
+            # such that evaluation always happens on the already updated policy,
+            # instead of on the already used train_batch.
+            main_rew = result["hist_stats"].pop("policy_learned_reward")
+            opponent_rew = result["hist_stats"].pop("episode_reward")
+            if len(main_rew) != len(opponent_rew):
+                raise Exception(
+                    "len(main_rew) != len(opponent_rew)",
+                    len(main_rew),
+                    len(opponent_rew),
+                    result["hist_stats"].keys(),
+                    "episode len",
+                    len(opponent_rew),
+                )
+            won = 0
+            for r_main, r_opponent in zip(main_rew, opponent_rew):
+                if r_main > r_opponent:
+                    won += 1
+            win_rate = won / len(main_rew)
+            result["win_rate"] = win_rate
+            print(f"Iter={algorithm.iteration} win-rate={win_rate} -> ", end="")
+            # If win rate is good -> Snapshot current policy and play against
+            # it next, keeping the snapshot fixed and only improving the "learned"
+            # policy.
+            if win_rate > self.win_rate_threshold:
+                self.current_opponent += 1
+                new_pol_id = f"learned_v{self.current_opponent}"
+                print(
+                    f"Iter={algorithm.iteration} ### Adding new opponent to the mix ({new_pol_id})."
+                )
+                # Re-define the mapping function, such that "learned" is forced
+                # to play against any of the previously played policies
+                # (excluding "random").
+                def policy_mapping_fn(agent_id, episode, worker, **kwargs):
+                    # agent_id = [0|1] -> policy depends on episode ID
+                    # This way, we make sure that both policies sometimes play
+                    # (start player) and sometimes agent1 (player to move 2nd).
+                    return (
+                        "learned"
+                        if episode.episode_id % 2 == int(agent_id[-1:])
+                        else np.random.choice(
+                            opponent_policies
+                            + [
+                                f"learned_v{i}"
+                                for i in range(1, self.current_opponent + 1)
+                            ]
+                        )
+                    )
+                new_policy = algorithm.add_policy(
+                    policy_id=new_pol_id,
+                    policy_cls=type(algorithm.get_policy("learned")),
+                    policy_mapping_fn=policy_mapping_fn,
+                )
+                # Set the weights of the new policy to the learned policy.
+                # We'll keep training the learned policy, whereas `new_pol_id` will
+                # remain fixed.
+                learned_state = algorithm.get_policy("learned").get_state()
+                new_policy.set_state(learned_state)
+                # We need to sync the just copied local weights (from learned policy)
+                # to all the remote workers as well.
+                algorithm.workers.sync_weights()
+            else:
+                print("not good enough; will keep learning ...")
+            result["league_size"] = self.current_opponent + len(opponent_policies) + 1
+    return SelfPlayCallback

connectfour/training/dummy_policies.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import numpy as np
+import random
+from ray.rllib.policy.policy import Policy
+from ray.rllib.utils.annotations import override
+from ray.rllib.models.modelv2 import restore_original_dimensions
+class HeuristicBase(Policy):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.exploration = self._create_exploration()
+    def learn_on_batch(self, samples):
+        pass
+    @override(Policy)
+    def get_weights(self):
+        """No weights to save."""
+        return {}
+    @override(Policy)
+    def set_weights(self, weights):
+        """No weights to set."""
+        pass
+    @override(Policy)
+    def compute_actions(
+        self,
+        obs_batch,
+        state_batches=None,
+        prev_action_batch=None,
+        prev_reward_batch=None,
+        info_batch=None,
+        episodes=None,
+        **kwargs
+    ):
+        obs_batch = restore_original_dimensions(
+            np.array(obs_batch, dtype=np.float32), self.observation_space, tensorlib=np
+        )
+        return self._do_compute_actions(obs_batch)
+    def pick_legal_action(self, legal_action):
+        legal_choices = np.arange(len(legal_action))[legal_action == 1]
+        return np.random.choice(legal_choices)
+class AlwaysSameHeuristic(HeuristicBase):
+    """
+    Pick a random move and stick with it for the entire episode.
+    """
+    _rand_choice = random.choice(range(7))
+    def _do_compute_actions(self, obs_batch):
+        def select_action(legal_action):
+            legal_choices = np.arange(len(legal_action))[legal_action == 1]
+            if self._rand_choice not in legal_choices:
+                self._rand_choice = np.random.choice(legal_choices)
+            return self._rand_choice
+        return [select_action(x) for x in obs_batch["action_mask"]], [], {}
+class LinearHeuristic(HeuristicBase):
+    """
+    Pick a random move and increment column index
+    """
+    _rand_choice = random.choice(range(7))
+    _rand_sign = np.random.choice([-1, 1])
+    def _do_compute_actions(self, obs_batch):
+        def select_action(legal_action):
+            legal_choices = np.arange(len(legal_action))[legal_action == 1]
+            self._rand_choice += 1 * self._rand_sign
+            if self._rand_choice not in legal_choices:
+                self._rand_choice = np.random.choice(legal_choices)
+            return self._rand_choice
+        return [select_action(x) for x in obs_batch["action_mask"]], [], {}
+class BeatLastHeuristic(HeuristicBase):
+    """
+    Play the move the last move of the opponent.
+    """
+    def _do_compute_actions(self, obs_batch):
+        def select_action(legal_action, observation):
+            legal_choices = np.arange(len(legal_action))[legal_action == 1]
+            obs_sums = np.sum(observation, axis=0)
+            desired_actions = np.squeeze(np.argwhere(obs_sums[:, 0] < obs_sums[:, 1]))
+            if desired_actions.size == 0:
+                return np.random.choice(legal_choices)
+            if desired_actions.size == 1:
+                desired_action = desired_actions[()]
+            else:
+                desired_action = np.random.choice(desired_actions)
+            if desired_action in legal_choices:
+                return desired_action
+            return np.random.choice(legal_choices)
+        return (
+            [
+                select_action(x, y)
+                for x, y in zip(obs_batch["action_mask"], obs_batch["observation"])
+            ],
+            [],
+            {},
+        )
+class RandomHeuristic(HeuristicBase):
+    """
+    Just pick a random legal action
+    The outputted state of the environment needs to be a dictionary with an
+    'action_mask' key containing the legal actions for the agent.
+    """
+    def _do_compute_actions(self, obs_batch):
+        return [self.pick_legal_action(x) for x in obs_batch["action_mask"]], [], {}

connectfour/training/models.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
+from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
+from gymnasium.spaces import Dict
+from ray.rllib.utils.torch_utils import FLOAT_MIN
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.algorithms.sac.sac_torch_model import SACTorchModel
+from ray.rllib.utils import override
+torch, nn = try_import_torch()
+class Connect4MaskModel(TorchModelV2, nn.Module):
+    """PyTorch version of above ActionMaskingModel."""
+    def __init__(
+        self,
+        obs_space,
+        action_space,
+        num_outputs,
+        model_config,
+        name,
+        **kwargs,
+    ):
+        orig_space = getattr(obs_space, "original_space", obs_space)
+        assert isinstance(orig_space, Dict)
+        assert "action_mask" in orig_space.spaces
+        assert "observation" in orig_space.spaces
+        TorchModelV2.__init__(
+            self, obs_space, action_space, num_outputs, model_config, name, **kwargs
+        )
+        nn.Module.__init__(self)
+        self.internal_model = TorchFC(
+            orig_space["observation"],
+            action_space,
+            num_outputs,
+            model_config,
+            name + "_internal",
+        )
+    def forward(self, input_dict, state, seq_lens):
+        # Extract the available actions tensor from the observation.
+        action_mask = input_dict["obs"]["action_mask"]
+        # Compute the unmasked logits.
+        logits, _ = self.internal_model({"obs": input_dict["obs"]["observation"]})
+        # Convert action_mask into a [0.0 || -inf]-type mask.
+        inf_mask = torch.clamp(torch.log(action_mask), min=FLOAT_MIN)
+        masked_logits = logits + inf_mask
+        # Return masked logits.
+        return masked_logits, state
+    def value_function(self):
+        return self.internal_model.value_function()
+class SacConnect4MaskModel(SACTorchModel):
+    def __init__(
+        self,
+        obs_space,
+        action_space,
+        num_outputs,
+        model_config,
+        name: str,
+        policy_model_config=None,
+        q_model_config=None,
+        twin_q=False,
+        initial_alpha=1.0,
+        target_entropy=None,
+        **kwargs,
+    ):
+        orig_space = getattr(obs_space, "original_space", obs_space)
+        assert isinstance(orig_space, Dict)
+        assert "action_mask" in orig_space.spaces
+        assert "observation" in orig_space.spaces
+        super().__init__(
+            obs_space,
+            action_space,
+            num_outputs,
+            model_config,
+            policy_model_config,
+            q_model_config,
+            twin_q,
+            initial_alpha,
+            target_entropy,
+            **kwargs,
+        )
+        self.internal_model = TorchFC(
+            orig_space["observation"],
+            action_space,
+            num_outputs,
+            model_config,
+            name + "_internal",
+        )
+    @override(SACTorchModel)
+    def forward(self, input_dict, state, seq_lens):
+        # Extract the available actions tensor from the observation.
+        action_mask = input_dict["obs"]["action_mask"]
+        # Compute the unmasked logits.
+        logits, _ = self.internal_model({"obs": input_dict["obs"]["observation"]})
+        # Convert action_mask into a [0.0 || -inf]-type mask.
+        inf_mask = torch.clamp(torch.log(action_mask), min=FLOAT_MIN)
+        masked_logits = logits + inf_mask
+        # Return masked logits.
+        return masked_logits, state
+    def value_function(self):
+        return self.internal_model.value_function()

connectfour/training/train.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import argparse
+import random
+import ray
+import ray.rllib.algorithms.ppo as ppo
+from pettingzoo.classic import connect_four_v3
+from ray import air, tune
+from ray.rllib.policy.policy import PolicySpec
+from ray.rllib.utils.framework import try_import_torch
+from ray.tune import CLIReporter, register_env
+from connectfour.training.callbacks import create_self_play_callback
+from connectfour.training.dummy_policies import (
+    AlwaysSameHeuristic,
+    BeatLastHeuristic,
+    LinearHeuristic,
+    RandomHeuristic,
+)
+from connectfour.training.models import Connect4MaskModel
+from connectfour.training.wrappers import Connect4Env
+torch, nn = try_import_torch()
+def get_cli_args():
+    """
+    Create CLI parser and return parsed arguments
+    python connectfour/training/train.py --num-cpus 4 --num-gpus 1 --stop-iters 10 --win-rate-threshold 0.50
+    python connectfour/training/train.py --num-gpus 1 --stop-iters 10 --win-rate-threshold 0.50
+    python connectfour/training/train.py --num-cpus 5 --num-gpus 1 --stop-iters 200
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--num-cpus", type=int, default=0)
+    parser.add_argument("--num-gpus", type=int, default=0)
+    parser.add_argument("--num-workers", type=int, default=2)
+    parser.add_argument(
+        "--stop-iters", type=int, default=200, help="Number of iterations to train."
+    )
+    parser.add_argument(
+        "--stop-timesteps",
+        type=int,
+        default=10000000,
+        help="Number of timesteps to train.",
+    )
+    parser.add_argument(
+        "--win-rate-threshold",
+        type=float,
+        default=0.95,
+        help="Win-rate at which we setup another opponent by freezing the "
+        "current main policy and playing against a uniform distribution "
+        "of previously frozen 'main's from here on.",
+    )
+    args = parser.parse_args()
+    print(f"Running with following CLI args: {args}")
+    return args
+def select_policy(agent_id, episode, **kwargs):
+    if episode.episode_id % 2 == int(agent_id[-1:]):
+        return "learned"
+    else:
+        return random.choice(["always_same", "beat_last", "random", "linear"])
+if __name__ == "__main__":
+    args = get_cli_args()
+    ray.init(
+        num_cpus=args.num_cpus or None, num_gpus=args.num_gpus, include_dashboard=False
+    )
+    # define how to make the environment
+    env_creator = lambda config: connect_four_v3.env(render_mode="rgb_array")
+    # register that way to make the environment under an rllib name
+    register_env("connect4", lambda config: Connect4Env(env_creator(config)))
+    config = (
+        ppo.PPOConfig()
+        .environment("connect4")
+        .framework("torch")
+        .training(model={"custom_model": Connect4MaskModel})
+        .callbacks(
+            create_self_play_callback(
+                win_rate_thr=args.win_rate_threshold,
+                opponent_policies=["always_same", "beat_last", "random", "linear"],
+            )
+        )
+        .rollouts(
+            num_rollout_workers=args.num_workers,
+            num_envs_per_worker=5,
+        )
+        .multi_agent(
+            policies={
+                "learned": PolicySpec(),
+                "always_same": PolicySpec(policy_class=AlwaysSameHeuristic),
+                "linear": PolicySpec(policy_class=LinearHeuristic),
+                "beat_last": PolicySpec(policy_class=BeatLastHeuristic),
+                "random": PolicySpec(policy_class=RandomHeuristic),
+            },
+            policy_mapping_fn=select_policy,
+            policies_to_train=["learned"],
+        )
+    )
+    stop = {
+        "timesteps_total": args.stop_timesteps,
+        "training_iteration": args.stop_iters,
+    }
+    results = tune.Tuner(
+        "PPO",
+        param_space=config.to_dict(),
+        run_config=air.RunConfig(
+            stop=stop,
+            verbose=2,
+            progress_reporter=CLIReporter(
+                metric_columns={
+                    "training_iteration": "iter",
+                    "time_total_s": "time_total_s",
+                    "timesteps_total": "ts",
+                    "episodes_this_iter": "train_episodes",
+                    "policy_reward_mean/learned": "reward",
+                    "win_rate": "win_rate",
+                    "league_size": "league_size",
+                },
+                sort_by_metric=True,
+            ),
+            checkpoint_config=air.CheckpointConfig(
+                checkpoint_at_end=True,
+                checkpoint_frequency=10,
+            ),
+        ),
+    ).fit()
+    print("Best checkpoint", results.get_best_result().checkpoint)
+    ray.shutdown()

connectfour/training/wrappers.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from typing import Optional
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from ray.rllib.utils.annotations import PublicAPI
+from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space
+@PublicAPI
+class Connect4Env(MultiAgentEnv):
+    """An interface to the PettingZoo MARL environment library.
+    See: https://github.com/Farama-Foundation/PettingZoo
+    Inherits from MultiAgentEnv and exposes a given AEC
+    (actor-environment-cycle) game from the PettingZoo project via the
+    MultiAgentEnv public API.
+    Note that the wrapper has some important limitations:
+    1. All agents have the same action_spaces and observation_spaces.
+       Note: If, within your aec game, agents do not have homogeneous action /
+       observation spaces, apply SuperSuit wrappers
+       to apply padding functionality: https://github.com/Farama-Foundation/
+       SuperSuit#built-in-multi-agent-only-functions
+    2. Environments are positive sum games (-> Agents are expected to cooperate
+       to maximize reward). This isn't a hard restriction, it just that
+       standard algorithms aren't expected to work well in highly competitive
+       games."""
+    def __init__(self, env):
+        super().__init__()
+        self.env = env
+        env.reset()
+        # Since all agents have the same spaces, do not provide full observation-
+        # and action-spaces as Dicts, mapping agent IDs to the individual
+        # agents' spaces. Instead, `self.[action|observation]_space` are the single
+        # agent spaces.
+        self._obs_space_in_preferred_format = False
+        self._action_space_in_preferred_format = False
+        # Collect the individual agents' spaces (they should all be the same):
+        first_obs_space = self.env.observation_space(self.env.agents[0])
+        first_action_space = self.env.action_space(self.env.agents[0])
+        for agent in self.env.agents:
+            if self.env.observation_space(agent) != first_obs_space:
+                raise ValueError(
+                    "Observation spaces for all agents must be identical. Perhaps "
+                    "SuperSuit's pad_observations wrapper can help (useage: "
+                    "`supersuit.aec_wrappers.pad_observations(env)`"
+                )
+            if self.env.action_space(agent) != first_action_space:
+                raise ValueError(
+                    "Action spaces for all agents must be identical. Perhaps "
+                    "SuperSuit's pad_action_space wrapper can help (usage: "
+                    "`supersuit.aec_wrappers.pad_action_space(env)`"
+                )
+        # Convert from gym to gymnasium, if necessary.
+        self.observation_space = convert_old_gym_space_to_gymnasium_space(
+            first_obs_space
+        )
+        self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)
+        self._agent_ids = set(self.env.agents)
+    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
+        info = self.env.reset(seed=seed, options=options)
+        return (
+            {self.env.agent_selection: self.env.observe(self.env.agent_selection)},
+            info or {},
+        )
+    def step(self, action):
+        self.env.step(action[self.env.agent_selection])
+        obs_d = {}
+        rew_d = {}
+        terminated_d = {}
+        truncated_d = {}
+        info_d = {}
+        while self.env.agents:
+            obs, rew, terminated, truncated, info = self.env.last()
+            agent_id = self.env.agent_selection
+            obs_d[agent_id] = obs
+            rew_d[agent_id] = rew
+            terminated_d[agent_id] = terminated
+            truncated_d[agent_id] = truncated
+            info_d[agent_id] = info
+            if (
+                self.env.terminations[self.env.agent_selection]
+                or self.env.truncations[self.env.agent_selection]
+            ):
+                self.env.step(None)
+            else:
+                break
+        all_gone = not self.env.agents
+        terminated_d["__all__"] = all_gone and all(terminated_d.values())
+        truncated_d["__all__"] = all_gone and all(truncated_d.values())
+        return obs_d, rew_d, terminated_d, truncated_d, info_d
+    def close(self):
+        self.env.close()
+    def render(self):
+        return self.env.render()
+    @property
+    def get_sub_environments(self):
+        return self.env.unwrapped

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,37 @@

+# https://python-poetry.org/docs/pyproject/
+[tool.poetry]
+name = "connectfour"
+version = "0.1.0"
+description = "Connect Four"
+authors = ["Clément Brutti-Mairesse <clement.brutti.mairesse@gmail.com>"]
+license = "MIT"
+readme = "README.md"
+homepage = "https://huggingface.co/spaces/ClementBM/connectfour"
+repository = "https://huggingface.co/spaces/ClementBM/connectfour"
+keywords = ["connectfour", "connect4", "reinforcement learning"]
+include = [
+    "LICENSE",
+]
+[tool.poetry.dependencies]
+python = ">=3.8,<3.11"
+orjson = "3.8.8"
+gradio = "^3.23.0"
+ray = {extras = ["rllib", "serve"], version = "^2.2.0"}
+pettingzoo = "^1.22.4"
+pygame = "^2.3.0"
+torch = "^2.0.0"
+libclang = "15.0.6.1"
+tensorflow-probability = "^0.19.0"
+protobuf = "3.17.0"
+scipy = ">=1.8,<1.9.2"
+[tool.poetry.dev-dependencies]
+pylint = "*"
+pytest = "*"
+mypy = "*"
+black = "*"
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

requirements.txt ADDED Viewed

	@@ -0,0 +1,141 @@

+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+aiofiles==23.1.0 ; python_version >= "3.8" and python_version < "3.11"
+aiohttp-cors==0.7.0 ; python_version >= "3.8" and python_version < "3.11"
+aiohttp==3.8.4 ; python_version >= "3.8" and python_version < "3.11"
+aiorwlock==1.3.0 ; python_version >= "3.8" and python_version < "3.11"
+aiosignal==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+altair==4.2.2 ; python_version >= "3.8" and python_version < "3.11"
+ansicon==1.89.0 ; python_version >= "3.8" and python_version < "3.11" and platform_system == "Windows"
+anyio==3.6.2 ; python_version >= "3.8" and python_version < "3.11"
+async-timeout==4.0.2 ; python_version >= "3.8" and python_version < "3.11"
+attrs==22.2.0 ; python_version >= "3.8" and python_version < "3.11"
+blessed==1.20.0 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2022.12.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+cmake==3.26.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.6 ; python_version >= "3.8" and python_version < "3.11" and platform_system == "Windows"
+colorful==0.5.5 ; python_version >= "3.8" and python_version < "3.11"
+contourpy==1.0.7 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==5.1.1 ; python_version >= "3.8" and python_version < "3.11"
+distlib==0.3.6 ; python_version >= "3.8" and python_version < "3.11"
+dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11"
+entrypoints==0.4 ; python_version >= "3.8" and python_version < "3.11"
+fastapi==0.95.0 ; python_version >= "3.8" and python_version < "3.11"
+ffmpy==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.10.7 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.39.3 ; python_version >= "3.8" and python_version < "3.11"
+frozenlist==1.3.3 ; python_version >= "3.8" and python_version < "3.11"
+fsspec==2023.3.0 ; python_version >= "3.8" and python_version < "3.11"
+gast==0.5.3 ; python_version >= "3.8" and python_version < "3.11"
+google-api-core==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.17.0 ; python_version >= "3.8" and python_version < "3.11"
+googleapis-common-protos==1.56.4 ; python_version >= "3.8" and python_version < "3.11"
+gpustat==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+gradio==3.23.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.49.1 ; python_version >= "3.8" and python_version < "3.11" and sys_platform == "darwin"
+grpcio==1.53.0 ; python_version >= "3.8" and python_version < "3.11" and sys_platform != "darwin"
+gymnasium-notices==0.0.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.26.3 ; python_version >= "3.8" and python_version < "3.11"
+h11==0.14.0 ; python_version >= "3.8" and python_version < "3.11"
+httpcore==0.16.3 ; python_version >= "3.8" and python_version < "3.11"
+httpx==0.23.3 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.13.3 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.27.0 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==6.1.0 ; python_version >= "3.8" and python_version < "3.10"
+importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.10"
+jinja2==3.1.2 ; python_version >= "3.8" and python_version < "3.11"
+jinxed==1.2.0 ; python_version >= "3.8" and python_version < "3.11" and platform_system == "Windows"
+jsonschema==4.17.3 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+lazy-loader==0.2 ; python_version >= "3.8" and python_version < "3.11"
+libclang==15.0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+linkify-it-py==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+lit==16.0.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+lz4==4.3.2 ; python_version >= "3.8" and python_version < "3.11"
+markdown-it-py==2.2.0 ; python_version >= "3.8" and python_version < "3.11"
+markdown-it-py[linkify]==2.2.0 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.7.1 ; python_version >= "3.8" and python_version < "3.11"
+mdit-py-plugins==0.3.3 ; python_version >= "3.8" and python_version < "3.11"
+mdurl==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+mpmath==1.3.0 ; python_version >= "3.8" and python_version < "3.11"
+msgpack==1.0.5 ; python_version >= "3.8" and python_version < "3.11"
+multidict==6.0.4 ; python_version >= "3.8" and python_version < "3.11"
+networkx==3.0 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.2 ; python_version < "3.11" and python_version >= "3.8"
+nvidia-cublas-cu11==11.10.3.66 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cuda-cupti-cu11==11.7.101 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cuda-nvrtc-cu11==11.7.99 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cuda-runtime-cu11==11.7.99 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cudnn-cu11==8.5.0.96 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cufft-cu11==10.9.0.58 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-curand-cu11==10.2.10.91 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cusolver-cu11==11.4.0.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-cusparse-cu11==11.7.4.91 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-ml-py==11.495.46 ; python_version >= "3.8" and python_version < "3.11"
+nvidia-nccl-cu11==2.14.3 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+nvidia-nvtx-cu11==11.7.91 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+opencensus-context==0.1.3 ; python_version >= "3.8" and python_version < "3.11"
+opencensus==0.11.2 ; python_version >= "3.8" and python_version < "3.11"
+orjson==3.8.8 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.0 ; python_version < "3.11" and python_version >= "3.8"
+pandas==1.5.3 ; python_version >= "3.8" and python_version < "3.11"
+pettingzoo==1.22.4 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.4.0 ; python_version >= "3.8" and python_version < "3.11"
+pkgutil-resolve-name==1.3.10 ; python_version >= "3.8" and python_version < "3.9"
+platformdirs==3.2.0 ; python_version >= "3.8" and python_version < "3.11"
+prometheus-client==0.16.0 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.17.0 ; python_version >= "3.8" and python_version < "3.11"
+psutil==5.9.4 ; python_version >= "3.8" and python_version < "3.11"
+py-spy==0.3.14 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.2.8 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.4.8 ; python_version >= "3.8" and python_version < "3.11"
+pydantic==1.10.7 ; python_version >= "3.8" and python_version < "3.11"
+pydub==0.25.1 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.14.0 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+pyrsistent==0.19.3 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+python-multipart==0.0.6 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pywavelets==1.4.1 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0 ; python_version >= "3.8" and python_version < "3.11"
+ray[rllib,serve]==2.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.28.2 ; python_version >= "3.8" and python_version < "3.11"
+rfc3986[idna2008]==1.5.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==13.3.3 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.9 ; python_version >= "3.8" and python_version < "3.11"
+scikit-image==0.20.0 ; python_version >= "3.8" and python_version < "3.11"
+scipy==1.9.1 ; python_version < "3.11" and python_version >= "3.8"
+semantic-version==2.10.0 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.6.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version < "3.11" and python_version >= "3.8"
+smart-open==6.3.0 ; python_version >= "3.8" and python_version < "3.11"
+sniffio==1.3.0 ; python_version >= "3.8" and python_version < "3.11"
+starlette==0.26.1 ; python_version >= "3.8" and python_version < "3.11"
+sympy==1.11.1 ; python_version >= "3.8" and python_version < "3.11"
+tabulate==0.9.0 ; python_version >= "3.8" and python_version < "3.11"
+tensorboardx==2.6 ; python_version >= "3.8" and python_version < "3.11"
+tensorflow-probability==0.19.0 ; python_version >= "3.8" and python_version < "3.11"
+tifffile==2023.3.21 ; python_version >= "3.8" and python_version < "3.11"
+toolz==0.12.0 ; python_version >= "3.8" and python_version < "3.11"
+torch==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+triton==2.0.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+typer==0.7.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+uc-micro-py==1.0.1 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+uvicorn==0.21.1 ; python_version >= "3.8" and python_version < "3.11"
+virtualenv==20.21.0 ; python_version >= "3.8" and python_version < "3.11"
+wcwidth==0.2.6 ; python_version >= "3.8" and python_version < "3.11"
+websockets==10.4 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.8" and python_version < "3.11"
+yarl==1.8.2 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"