Upload folder using huggingface_hub
Browse files
src/envs/pokemon_env/README.md
CHANGED
|
@@ -50,7 +50,7 @@ print(f"Reward: {result.reward}, Done: {result.done}")
|
|
| 50 |
```bash
|
| 51 |
# Build both images (run from project root directory)
|
| 52 |
docker build -t pokemon-showdown:latest -f src/envs/pokemon_env/server/Dockerfile.showdown .
|
| 53 |
-
docker build -t pokemon-env:latest -f src/envs/pokemon_env/server/Dockerfile.
|
| 54 |
|
| 55 |
# Create Docker network for container communication
|
| 56 |
docker network create pokemon-network
|
|
@@ -68,7 +68,7 @@ curl http://localhost:9980/health # Test OpenEnv server
|
|
| 68 |
## Configuration
|
| 69 |
|
| 70 |
Environment variables:
|
| 71 |
-
- `POKEMON_BATTLE_FORMAT` - Battle format (default: `
|
| 72 |
- `POKEMON_REWARD_MODE` - Reward mode: `sparse` or `dense` (default: `sparse`)
|
| 73 |
- `POKEMON_MAX_TURNS` - Maximum turns per battle (default: `1000`)
|
| 74 |
- `POKEMON_PLAYER_USERNAME` - Player username (default: auto-generated)
|
|
|
|
| 50 |
```bash
|
| 51 |
# Build both images (run from project root directory)
|
| 52 |
docker build -t pokemon-showdown:latest -f src/envs/pokemon_env/server/Dockerfile.showdown .
|
| 53 |
+
docker build -t pokemon-env:latest -f src/envs/pokemon_env/server/Dockerfile.pokemonenv .
|
| 54 |
|
| 55 |
# Create Docker network for container communication
|
| 56 |
docker network create pokemon-network
|
|
|
|
| 68 |
## Configuration
|
| 69 |
|
| 70 |
Environment variables:
|
| 71 |
+
- `POKEMON_BATTLE_FORMAT` - Battle format (default: `gen8randombattle`)
|
| 72 |
- `POKEMON_REWARD_MODE` - Reward mode: `sparse` or `dense` (default: `sparse`)
|
| 73 |
- `POKEMON_MAX_TURNS` - Maximum turns per battle (default: `1000`)
|
| 74 |
- `POKEMON_PLAYER_USERNAME` - Player username (default: auto-generated)
|
src/envs/pokemon_env/server/Dockerfile.pokemonenv
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile for Pokemon Battle Environment OpenEnv
|
| 2 |
+
# This image provides Pokemon battles via poke-env
|
| 3 |
+
|
| 4 |
+
# Build OpenEnv base (can be overridden for CI/CD)
|
| 5 |
+
ARG BASE_IMAGE
|
| 6 |
+
FROM ${BASE_IMAGE:-openenv-base:latest} AS final
|
| 7 |
+
|
| 8 |
+
# Install dependencies
|
| 9 |
+
RUN apt-get update && apt-get install -y \
|
| 10 |
+
curl \
|
| 11 |
+
supervisor \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Install poke-env and dependencies
|
| 15 |
+
RUN pip install --no-cache-dir \
|
| 16 |
+
poke-env>=0.9.0 \
|
| 17 |
+
gymnasium>=0.29.0
|
| 18 |
+
|
| 19 |
+
# Copy OpenEnv core (base image already set WORKDIR=/app)
|
| 20 |
+
COPY src/core/ /app/src/core/
|
| 21 |
+
|
| 22 |
+
# Copy Pokemon environment code
|
| 23 |
+
COPY src/envs/pokemon_env/ /app/src/envs/pokemon_env/
|
| 24 |
+
|
| 25 |
+
# Copy README for web interface documentation
|
| 26 |
+
COPY src/envs/pokemon_env/README.md /app/README.md
|
| 27 |
+
|
| 28 |
+
# Pokemon environment variables
|
| 29 |
+
ENV SHOWDOWN_SERVER_URL=pokemon-showdown:8000
|
| 30 |
+
ENV POKEMON_BATTLE_FORMAT=gen8randombattle
|
| 31 |
+
ENV POKEMON_PLAYER_USERNAME=player
|
| 32 |
+
ENV POKEMON_REWARD_MODE=sparse
|
| 33 |
+
ENV POKEMON_MAX_TURNS=1000
|
| 34 |
+
|
| 35 |
+
# Expose OpenEnv port
|
| 36 |
+
EXPOSE 9980
|
| 37 |
+
|
| 38 |
+
# Create supervisor config for OpenEnv
|
| 39 |
+
RUN echo '[supervisord]\n\
|
| 40 |
+
nodaemon=true\n\
|
| 41 |
+
logfile=/dev/null\n\
|
| 42 |
+
logfile_maxbytes=0\n\
|
| 43 |
+
\n\
|
| 44 |
+
[program:openenv]\n\
|
| 45 |
+
command=uvicorn envs.pokemon_env.server.app:app --host 0.0.0.0 --port 9980\n\
|
| 46 |
+
directory=/app\n\
|
| 47 |
+
environment=PYTHONPATH="/app/src"\n\
|
| 48 |
+
autostart=true\n\
|
| 49 |
+
autorestart=true\n\
|
| 50 |
+
stdout_logfile=/dev/fd/1\n\
|
| 51 |
+
stdout_logfile_maxbytes=0\n\
|
| 52 |
+
stderr_logfile=/dev/fd/2\n\
|
| 53 |
+
stderr_logfile_maxbytes=0\n\
|
| 54 |
+
startsecs=10\n' > /etc/supervisor/conf.d/pokemon-env.conf
|
| 55 |
+
|
| 56 |
+
# Health check
|
| 57 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
|
| 58 |
+
CMD curl -f http://localhost:9980/health || exit 1
|
| 59 |
+
|
| 60 |
+
# Run supervisor
|
| 61 |
+
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
|
src/envs/pokemon_env/server/pokemon_environment.py
CHANGED
|
@@ -15,6 +15,7 @@ Key Design:
|
|
| 15 |
import asyncio
|
| 16 |
import logging
|
| 17 |
import uuid
|
|
|
|
| 18 |
from dataclasses import asdict
|
| 19 |
from threading import Event, Lock
|
| 20 |
from typing import Any, Dict, List, Optional
|
|
@@ -25,7 +26,7 @@ from ..models import PokemonAction, PokemonObservation, PokemonData, PokemonStat
|
|
| 25 |
|
| 26 |
try:
|
| 27 |
# Import from top-level poke_env module
|
| 28 |
-
from poke_env import Player, RandomPlayer, AccountConfiguration,
|
| 29 |
# Import battle orders from player submodule
|
| 30 |
from poke_env.player import BattleOrder, ForfeitBattleOrder
|
| 31 |
# Import concurrency from concurrency submodule
|
|
@@ -247,13 +248,17 @@ class PokemonEnvironment(Environment):
|
|
| 247 |
self.player_username = player_username or f"player_{uuid.uuid4().hex[:8]}"
|
| 248 |
self.reward_mode = reward_mode
|
| 249 |
self.max_turns = max_turns
|
|
|
|
| 250 |
|
| 251 |
# Initialize player on POKE_LOOP
|
| 252 |
logger.info(f"Creating player {self.player_username} for format {battle_format}")
|
| 253 |
|
| 254 |
self.player = OpenEnvPokemonPlayer(
|
| 255 |
account_configuration=AccountConfiguration(self.player_username, None),
|
| 256 |
-
server_configuration=
|
|
|
|
|
|
|
|
|
|
| 257 |
battle_format=self.battle_format,
|
| 258 |
max_concurrent_battles=1, # One battle at a time
|
| 259 |
)
|
|
@@ -264,7 +269,10 @@ class PokemonEnvironment(Environment):
|
|
| 264 |
logger.info(f"Creating random opponent {opponent_username}")
|
| 265 |
self.opponent = RandomPlayer(
|
| 266 |
account_configuration=AccountConfiguration(opponent_username, None),
|
| 267 |
-
server_configuration=
|
|
|
|
|
|
|
|
|
|
| 268 |
battle_format=self.battle_format,
|
| 269 |
max_concurrent_battles=1,
|
| 270 |
)
|
|
@@ -275,7 +283,7 @@ class PokemonEnvironment(Environment):
|
|
| 275 |
self._state = PokemonState(
|
| 276 |
battle_format=battle_format,
|
| 277 |
player_username=self.player_username,
|
| 278 |
-
server_url=
|
| 279 |
)
|
| 280 |
|
| 281 |
# Battle tracking
|
|
|
|
| 15 |
import asyncio
|
| 16 |
import logging
|
| 17 |
import uuid
|
| 18 |
+
import os
|
| 19 |
from dataclasses import asdict
|
| 20 |
from threading import Event, Lock
|
| 21 |
from typing import Any, Dict, List, Optional
|
|
|
|
| 26 |
|
| 27 |
try:
|
| 28 |
# Import from top-level poke_env module
|
| 29 |
+
from poke_env import Player, RandomPlayer, AccountConfiguration, ServerConfiguration
|
| 30 |
# Import battle orders from player submodule
|
| 31 |
from poke_env.player import BattleOrder, ForfeitBattleOrder
|
| 32 |
# Import concurrency from concurrency submodule
|
|
|
|
| 248 |
self.player_username = player_username or f"player_{uuid.uuid4().hex[:8]}"
|
| 249 |
self.reward_mode = reward_mode
|
| 250 |
self.max_turns = max_turns
|
| 251 |
+
self.showdown_server_url = os.getenv("SHOWDOWN_SERVER_URL", "localhost:8000")
|
| 252 |
|
| 253 |
# Initialize player on POKE_LOOP
|
| 254 |
logger.info(f"Creating player {self.player_username} for format {battle_format}")
|
| 255 |
|
| 256 |
self.player = OpenEnvPokemonPlayer(
|
| 257 |
account_configuration=AccountConfiguration(self.player_username, None),
|
| 258 |
+
server_configuration=ServerConfiguration(
|
| 259 |
+
f"ws://{self.showdown_server_url}/showdown/websocket",
|
| 260 |
+
"https://play.pokemonshowdown.com/action.php?"
|
| 261 |
+
),
|
| 262 |
battle_format=self.battle_format,
|
| 263 |
max_concurrent_battles=1, # One battle at a time
|
| 264 |
)
|
|
|
|
| 269 |
logger.info(f"Creating random opponent {opponent_username}")
|
| 270 |
self.opponent = RandomPlayer(
|
| 271 |
account_configuration=AccountConfiguration(opponent_username, None),
|
| 272 |
+
server_configuration=ServerConfiguration(
|
| 273 |
+
f"ws://{self.showdown_server_url}/showdown/websocket",
|
| 274 |
+
"https://play.pokemonshowdown.com/action.php?"
|
| 275 |
+
),
|
| 276 |
battle_format=self.battle_format,
|
| 277 |
max_concurrent_battles=1,
|
| 278 |
)
|
|
|
|
| 283 |
self._state = PokemonState(
|
| 284 |
battle_format=battle_format,
|
| 285 |
player_username=self.player_username,
|
| 286 |
+
server_url=self.showdown_server_url,
|
| 287 |
)
|
| 288 |
|
| 289 |
# Battle tracking
|