Spaces:

qpluslab
/

openra-rl

Paused

App Files Files Community

github-actions[bot] commited on Mar 1

Commit

02f4a63

0 Parent(s):

Sync from GitHub ac82c3e

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +21 -0
.github/workflows/ci.yml +32 -0
.github/workflows/docker-publish.yml +52 -0
.github/workflows/pypi-publish.yml +26 -0
.github/workflows/sync-to-hf.yml +25 -0
.gitignore +19 -0
.gitmodules +3 -0
.openenvignore +28 -0
Dockerfile +149 -0
Dockerfile.agent +32 -0
LICENSE +674 -0
OpenRA +1 -0
README.md +479 -0
__init__.py +4 -0
client.py +3 -0
config.yaml +142 -0
docker-compose.yaml +71 -0
docker/build.sh +51 -0
docker/entrypoint.sh +30 -0
docker/replay-viewer.sh +89 -0
examples/README.md +50 -0
examples/config-lmstudio.yaml +14 -0
examples/config-minimal.yaml +21 -0
examples/config-ollama.yaml +14 -0
examples/config-openrouter.yaml +13 -0
examples/llm_agent.py +170 -0
examples/mcp_bot.py +619 -0
examples/scripted_bot.py +831 -0
models.py +7 -0
openenv.yaml +6 -0
openra_env/__init__.py +6 -0
openra_env/agent.py +1156 -0
openra_env/bench_export.py +95 -0
openra_env/bench_submit.py +167 -0
openra_env/cli/__init__.py +0 -0
openra_env/cli/commands.py +464 -0
openra_env/cli/console.py +43 -0
openra_env/cli/docker_manager.py +600 -0
openra_env/cli/main.py +212 -0
openra_env/cli/wizard.py +166 -0
openra_env/client.py +113 -0
openra_env/config.py +535 -0
openra_env/game_data.py +984 -0
openra_env/generated/__init__.py +0 -0
openra_env/generated/rl_bridge_pb2.py +61 -0
openra_env/generated/rl_bridge_pb2_grpc.py +148 -0
openra_env/mcp_server.py +454 -0
openra_env/mcp_ws_client.py +231 -0
openra_env/models.py +222 -0
openra_env/opponent_intel.py +263 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,21 @@

+.git
+.github
+.pytest_cache
+__pycache__
+*.pyc
+*.pyo
+*.egg-info
+.eggs
+dist
+build
+.mypy_cache
+.ruff_cache
+.venv
+venv
+documents/
+tests/
+docs/
+*.pdf
+.claude/
+# OpenRA submodule (cloned from GitHub during Docker build)
+OpenRA/

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: pip install -e ".[dev]"
+      - name: Run tests
+        run: pytest tests/ -v
+      - name: Lint
+        run: ruff check openra_env/

.github/workflows/docker-publish.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+name: Docker Publish
+on:
+  push:
+    tags: ["v*"]
+  release:
+    types: [published]
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: docker/setup-qemu-action@v3
+      - uses: docker/setup-buildx-action@v3
+      - uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - uses: docker/metadata-action@v5
+        id: meta
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,value=latest,enable={{is_default_branch}}
+      - uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max

.github/workflows/pypi-publish.yml ADDED Viewed

	@@ -0,0 +1,26 @@

+name: PyPI Publish
+on:
+  release:
+    types: [published]
+permissions:
+  id-token: write
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Build package
+        run: |
+          pip install build
+          python -m build
+      - uses: pypa/gh-action-pypi-publish@release/v1

.github/workflows/sync-to-hf.yml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: Sync to Hugging Face Space
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+          lfs: true
+      - name: Push to Hugging Face Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          # Create an orphan branch with just the current tree (no history)
+          git checkout --orphan hf-sync
+          git commit -m "Sync from GitHub ${GITHUB_SHA::7}"
+          git remote add hf https://openra-rl:$HF_TOKEN@huggingface.co/spaces/openra-rl/OpenRA-RL
+          git push hf hf-sync:main --force

.gitignore ADDED Viewed

	@@ -0,0 +1,19 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
+.venv/
+venv/
+.env
+*.log
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+.DS_Store
+replays/
+documents/
+*.orarep

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "OpenRA"]
+	path = OpenRA
+	url = https://github.com/yxc20089/OpenRA.git

.openenvignore ADDED Viewed

	@@ -0,0 +1,28 @@

+# Build artifacts (Dockerfile builds fresh from source)
+OpenRA/bin/
+OpenRA/obj/
+# Replay files
+*.orarep
+replays/
+# Log files
+*.log
+# Documents
+documents/
+# Dev/test artifacts
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+.venv/
+venv/
+.env
+.eggs/
+*.egg-info/
+dist/
+build/
+# IDE
+.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,149 @@

+# ==============================================================================
+# Stage 1: Build OpenRA from source (C#/.NET 8.0)
+# ==============================================================================
+FROM mcr.microsoft.com/dotnet/sdk:8.0-bookworm-slim AS openra-build
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    make \
+    git \
+    libsdl2-dev \
+    libopenal-dev \
+    libfreetype-dev \
+    liblua5.1-0-dev \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# Clone OpenRA source from GitHub (works on HF Spaces where submodules aren't initialized)
+ARG OPENRA_REPO=https://github.com/yxc20089/OpenRA.git
+RUN git clone --depth=1 "$OPENRA_REPO" /src/openra
+WORKDIR /src/openra
+# Fix Windows CRLF line endings in shell scripts (git autocrlf on Windows adds \r)
+RUN find . -name '*.sh' -exec sed -i 's/\r$//' {} + && \
+    find . -name '*.sh' -exec chmod +x {} +
+# Build with system libraries (unix-generic avoids bundled native binaries)
+# SKIP_PROTOC=true uses pre-generated protobuf C# files (avoids protoc arm64 crash in Docker)
+ENV SKIP_PROTOC=true
+RUN make TARGETPLATFORM=unix-generic CONFIGURATION=Release
+# Verify critical output (includes Null platform for headless RL operation)
+RUN test -f bin/OpenRA.dll && \
+    test -f bin/OpenRA.Game.dll && \
+    test -f bin/OpenRA.Mods.Common.dll && \
+    test -f bin/OpenRA.Platforms.Null.dll
+# ==============================================================================
+# Stage 2: Install Python dependencies
+# ==============================================================================
+FROM python:3.11-slim-bookworm AS python-build
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY pyproject.toml /app/
+COPY openra_env/ /app/openra_env/
+COPY proto/ /app/proto/
+COPY README.md /app/
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir .
+# ==============================================================================
+# Stage 3: Runtime image
+# ==============================================================================
+FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim AS dotnet-runtime
+FROM python:3.11-slim-bookworm
+LABEL maintainer="OpenRA-RL"
+LABEL description="OpenRA RL Environment - headless game engine with gRPC bridge + OpenEnv API"
+# Copy ASP.NET Core runtime from official Microsoft image
+COPY --from=dotnet-runtime /usr/share/dotnet /usr/share/dotnet
+RUN ln -s /usr/share/dotnet/dotnet /usr/bin/dotnet
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    xvfb \
+    libgl1-mesa-dri \
+    libgl1-mesa-glx \
+    libegl-mesa0 \
+    mesa-vulkan-drivers \
+    libvulkan1 \
+    libsdl2-2.0-0 \
+    libopenal1 \
+    libfreetype6 \
+    liblua5.1-0 \
+    libicu72 \
+    curl procps \
+    x11vnc novnc websockify \
+    && rm -rf /var/lib/apt/lists/*
+# Copy Python packages from builder
+COPY --from=python-build /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
+COPY --from=python-build /usr/local/bin /usr/local/bin
+# Copy built OpenRA (bin, mods, glsl shaders, and global mix database for content resolution)
+COPY --from=openra-build /src/openra/bin /opt/openra/bin
+COPY --from=openra-build /src/openra/mods /opt/openra/mods
+COPY --from=openra-build /src/openra/glsl /opt/openra/glsl
+COPY --from=openra-build ["/src/openra/global mix database.dat", "/opt/openra/global mix database.dat"]
+# Create native library symlinks that OpenRA expects
+# (configure-system-libraries.sh points these to system lib paths)
+RUN LIBDIR=$( [ "$(dpkg --print-architecture)" = "arm64" ] && echo "/usr/lib/aarch64-linux-gnu" || echo "/usr/lib/x86_64-linux-gnu" ) && \
+    ln -sf "$LIBDIR/libSDL2-2.0.so.0" /opt/openra/bin/SDL2.so && \
+    ln -sf "$LIBDIR/libopenal.so.1" /opt/openra/bin/soft_oal.so && \
+    ln -sf "$LIBDIR/libfreetype.so.6" /opt/openra/bin/freetype6.so && \
+    ln -sf "$LIBDIR/liblua5.1.so.0" /opt/openra/bin/lua51.so
+# Copy Python application code
+COPY openra_env/ /app/openra_env/
+COPY proto/ /app/proto/
+COPY pyproject.toml /app/
+# Create OpenRA support directory and pre-install RA game content (best-effort).
+# Only needed for the replay viewer (Game.Platform=Default with full UI).
+# The RL environment works without this content (headless mode).
+RUN mkdir -p /root/.config/openra/Content/ra/v2/expand /root/.config/openra/Content/ra/v2/cnc && \
+    ( curl -sfL --max-time 30 -o /tmp/ra-quickinstall.zip \
+        https://openra.baxxster.no/openra/ra-quickinstall.zip && \
+    apt-get update && apt-get install -y --no-install-recommends unzip && \
+    unzip -o /tmp/ra-quickinstall.zip -d /tmp/ra-content && \
+    cp /tmp/ra-content/*.mix /root/.config/openra/Content/ra/v2/ && \
+    cp /tmp/ra-content/expand/* /root/.config/openra/Content/ra/v2/expand/ && \
+    cp /tmp/ra-content/cnc/* /root/.config/openra/Content/ra/v2/cnc/ && \
+    rm -rf /tmp/ra-quickinstall.zip /tmp/ra-content && \
+    apt-get purge -y unzip && apt-get autoremove -y && rm -rf /var/lib/apt/lists/* \
+    ) || echo "WARNING: RA content download failed (replay viewer will be unavailable)"
+# Copy entrypoints (fix Windows CRLF line endings)
+COPY docker/entrypoint.sh /entrypoint.sh
+COPY docker/replay-viewer.sh /replay-viewer.sh
+RUN sed -i 's/\r$//' /entrypoint.sh /replay-viewer.sh && \
+    chmod +x /entrypoint.sh /replay-viewer.sh
+# Environment
+ENV OPENRA_PATH=/opt/openra
+ENV PYTHONPATH=/app
+ENV PYTHONUNBUFFERED=1
+ENV DISPLAY=:99
+ENV DOTNET_CLI_TELEMETRY_OPTOUT=1
+ENV DOTNET_ROLL_FORWARD=LatestMajor
+ENV LIBGL_ALWAYS_SOFTWARE=1
+ENV MESA_GL_VERSION_OVERRIDE=3.3
+# Game configuration (override at runtime with -e)
+ENV AI_SLOT=Multi0
+ENV BOT_TYPE=normal
+ENV RECORD_REPLAYS=true
+EXPOSE 8000
+HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["python", "-m", "openra_env.server.app"]

Dockerfile.agent ADDED Viewed

	@@ -0,0 +1,32 @@

+# ==============================================================================
+# Lightweight agent container for OpenRA-RL
+#
+# Runs the LLM agent (or MCP bot) that connects to the OpenRA-RL game server.
+# Does NOT include the game engine — only the Python client and agent code.
+#
+# Usage:
+#   docker build -f Dockerfile.agent -t openra-rl-agent .
+#   docker run -e OPENROUTER_API_KEY=sk-or-... openra-rl-agent
+# ==============================================================================
+FROM python:3.11-slim-bookworm
+LABEL description="OpenRA-RL Agent - LLM/MCP bot that plays Red Alert"
+WORKDIR /app
+# Install Python dependencies
+COPY pyproject.toml README.md /app/
+COPY openra_env/ /app/openra_env/
+COPY proto/ /app/proto/
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir . httpx
+# Copy agent scripts
+COPY examples/ /app/examples/
+ENV PYTHONPATH=/app
+ENV PYTHONUNBUFFERED=1
+# Default: run LLM agent
+CMD ["python", "examples/llm_agent.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

OpenRA ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit de92f675141c8ceff6621417ce74f82497765698

README.md ADDED Viewed

	@@ -0,0 +1,479 @@

+---
+title: OpenRA-RL
+emoji: 🎮
+colorFrom: red
+colorTo: blue
+sdk: docker
+app_port: 8000
+tags:
+  - openenv
+  - reinforcement-learning
+  - rts
+models: []
+datasets: []
+pinned: false
+---
+# OpenRA-RL
+Play [Red Alert](https://www.openra.net/) with AI agents. LLMs, scripted bots, or RL — your agent commands armies in the classic RTS through a Python API.
+```
+┌──────────────────┐       HTTP / WS :8000       ┌──────────────────────────────┐
+│   Your Agent     │  ◄────────────────────────►  │  OpenRA-RL Server (Docker)   │
+│                  │       gRPC :9999             │  FastAPI + gRPC bridge       │
+│  LLM / Bot / RL  │  ◄────────────────────────►  │  OpenRA engine (headless)    │
+└──────────────────┘                              └──────────────────────────────┘
+```
+## Quick Start
+```bash
+pip install openra-rl
+openra-rl play
+```
+On first run, an interactive wizard helps you configure your LLM provider (OpenRouter, Ollama, or LM Studio). The CLI pulls the game server Docker image and starts everything automatically.
+### Skip the wizard
+```bash
+# Cloud (OpenRouter)
+openra-rl play --provider openrouter --api-key sk-or-... --model anthropic/claude-sonnet-4-20250514
+# Local (Ollama — free, no API key)
+openra-rl play --provider ollama --model qwen3:32b
+# Developer mode (skip Docker, run server locally)
+openra-rl play --local --provider ollama --model qwen3:32b
+# Reconfigure later
+openra-rl config
+```
+### Prerequisites
+- **Docker** — the game server runs in a container
+- **Python 3.10+**
+- An LLM endpoint (cloud API key or local model server)
+## CLI Reference
+```
+openra-rl play       Run the LLM agent (wizard on first use)
+openra-rl config     Re-run the setup wizard
+openra-rl server     start | stop | status | logs
+openra-rl replay     watch | list | copy | stop
+openra-rl bench      submit   Upload results to the leaderboard
+openra-rl mcp-server Start MCP stdio server (for OpenClaw / Claude Desktop)
+openra-rl doctor     Check system prerequisites
+openra-rl version    Print version
+```
+## MCP Server (OpenClaw / Claude Desktop)
+OpenRA-RL exposes all 48 game tools as a standard MCP server:
+```bash
+openra-rl mcp-server
+```
+Add to your MCP client config (e.g. `~/.openclaw/openclaw.json`):
+```json
+{
+  "mcpServers": {
+    "openra-rl": {
+      "command": "openra-rl",
+      "args": ["mcp-server"]
+    }
+  }
+}
+```
+Then chat: _"Start a game of Red Alert on easy difficulty, build a base, and defeat the enemy."_
+## Architecture
+| Component | Language | Role |
+|-----------|----------|------|
+| **OpenRA-RL** | Python | Environment wrapper, agents, HTTP/WebSocket API |
+| **OpenRA** (submodule) | C# | Modified game engine with embedded gRPC server |
+| **OpenEnv** (pip dep) | Python | Standardized Gymnasium-style environment interface |
+**Data flow:** Agent <-> FastAPI (port 8000) <-> gRPC bridge (port 9999) <-> OpenRA game engine
+The game runs at ~25 ticks/sec independent of agent speed. Observations use a DropOldest channel so the agent always sees the latest game state, even if it's slower than real time.
+## Example Agents
+### Scripted Bot
+A hardcoded state-machine bot that demonstrates all action types. Deploys MCV, builds a base, trains infantry, and attacks.
+```bash
+python examples/scripted_bot.py --url http://localhost:8000 --verbose --max-steps 2000
+```
+### MCP Bot
+A planning-aware bot that uses game knowledge tools (tech tree lookups, faction briefings, map analysis) to formulate strategy before playing.
+```bash
+python examples/mcp_bot.py --url http://localhost:8000 --verbose --max-turns 3000
+```
+### LLM Agent
+An AI agent powered by any OpenAI-compatible model. Supports cloud APIs (OpenRouter, OpenAI) and local model servers (Ollama, LM Studio).
+```bash
+python examples/llm_agent.py \
+  --config examples/config-openrouter.yaml \
+  --api-key sk-or-... \
+  --verbose \
+  --log-file game.log
+```
+CLI flags override config file values. See `python examples/llm_agent.py --help` for all options.
+## Configuration
+OpenRA-RL uses a unified YAML config system. Settings are resolved with this precedence:
+**CLI flags > Environment variables > Config file > Built-in defaults**
+### Config file
+Copy and edit the default config:
+```bash
+cp config.yaml my-config.yaml
+# Edit my-config.yaml, then:
+python examples/llm_agent.py --config my-config.yaml
+```
+Key sections:
+```yaml
+game:
+  openra_path: "/opt/openra"      # Path to OpenRA installation
+  map_name: "singles.oramap"      # Map to play
+  headless: true                  # No GPU rendering
+  record_replays: false           # Save .orarep replay files
+opponent:
+  bot_type: "normal"              # AI difficulty: easy, normal, hard
+  ai_slot: "Multi0"              # AI player slot
+planning:
+  enabled: true                   # Pre-game planning phase
+  max_turns: 10                   # Max planning turns
+  max_time_s: 60.0                # Planning time limit
+llm:
+  base_url: "https://openrouter.ai/api/v1/chat/completions"
+  model: "qwen/qwen3-coder-next"
+  max_tokens: 1500
+  temperature: null               # null = provider default
+tools:
+  categories:                     # Toggle tool groups on/off
+    read: true
+    knowledge: true
+    movement: true
+    production: true
+    # ... see config.yaml for all categories
+  disabled: []                    # Disable specific tools by name
+alerts:
+  under_attack: true
+  low_power: true
+  idle_production: true
+  no_scouting: true
+  # ... see config.yaml for all alerts
+```
+### Example configs
+| File | Use case |
+|------|----------|
+| `examples/config-openrouter.yaml` | Cloud LLM via OpenRouter (Claude, GPT, etc.) |
+| `examples/config-ollama.yaml` | Local LLM via Ollama |
+| `examples/config-lmstudio.yaml` | Local LLM via LM Studio |
+| `examples/config-minimal.yaml` | Reduced tool set for limited-context models |
+### Environment variables
+| Variable | Config path | Description |
+|----------|-------------|-------------|
+| `OPENROUTER_API_KEY` | `llm.api_key` | API key for OpenRouter |
+| `LLM_API_KEY` | `llm.api_key` | Generic LLM API key (overrides OpenRouter key) |
+| `LLM_BASE_URL` | `llm.base_url` | LLM endpoint URL |
+| `LLM_MODEL` | `llm.model` | Model identifier |
+| `BOT_TYPE` | `opponent.bot_type` | AI difficulty: easy, normal, hard |
+| `OPENRA_PATH` | `game.openra_path` | Path to OpenRA installation |
+| `RECORD_REPLAYS` | `game.record_replays` | Save replay files (true/false) |
+| `PLANNING_ENABLED` | `planning.enabled` | Enable planning phase (true/false) |
+## Using Local Models
+### Ollama
+```bash
+# Pull a model with tool-calling support
+ollama pull qwen3:32b
+# For models that need more context (default is often 2048-4096 tokens):
+cat > /tmp/Modelfile <<EOF
+FROM qwen3:32b
+PARAMETER num_ctx 32768
+EOF
+ollama create qwen3-32k -f /tmp/Modelfile
+# Run
+openra-rl play --provider ollama --model qwen3-32k
+```
+> **Note:** Not all Ollama models support tool calling. Check with `ollama show <model>` — the template must include a `tools` block. Models known to work: `qwen3:32b`, `qwen3:4b`.
+### LM Studio
+1. Load a model in LM Studio and start the local server (default port 1234)
+2. Run:
+```bash
+openra-rl play --provider lmstudio --model <model-name>
+```
+## Docker
+### Server management
+```bash
+openra-rl server start              # Start game server container
+openra-rl server start --port 9000  # Custom port
+openra-rl server status             # Check if running
+openra-rl server logs --follow      # Tail logs
+openra-rl server stop               # Stop container
+```
+### Docker Compose (development)
+| Service | Command | Description |
+|---------|---------|-------------|
+| `openra-rl` | `docker compose up openra-rl` | Headless game server (ports 8000, 9999) |
+| `agent` | `docker compose up agent` | LLM agent (requires `OPENROUTER_API_KEY`) |
+| `mcp-bot` | `docker compose run mcp-bot` | MCP bot |
+```bash
+# LLM agent via Docker Compose
+OPENROUTER_API_KEY=sk-or-... docker compose up agent
+```
+### Replays
+After each game, replays are automatically copied to `~/.openra-rl/replays/`. Watch them in your browser:
+```bash
+openra-rl replay watch              # Watch the latest replay (opens browser via VNC)
+openra-rl replay watch <file>       # Watch a specific .orarep file
+openra-rl replay list               # List replays (Docker + local)
+openra-rl replay copy               # Copy replays from Docker to local
+openra-rl replay stop               # Stop the replay viewer
+```
+The replay viewer runs inside Docker using the same engine that recorded the game, so replays always play back correctly. The browser connects via noVNC — no local game install needed.
+> **Version tracking:** Each replay records which Docker image version was used. When you upgrade, old replays are still viewable using their original engine version.
+## Local Development (without Docker)
+For running the game server natively (macOS/Linux):
+### Install dependencies
+```bash
+# Python
+pip install -e ".[dev]"
+# .NET 8.0 SDK
+# macOS: brew install dotnet@8
+# Ubuntu: sudo apt install dotnet-sdk-8.0
+# Native libraries (macOS arm64)
+brew install sdl2 openal-soft freetype luajit
+cp $(brew --prefix sdl2)/lib/libSDL2.dylib OpenRA/bin/SDL2.dylib
+cp $(brew --prefix openal-soft)/lib/libopenal.dylib OpenRA/bin/soft_oal.dylib
+cp $(brew --prefix freetype)/lib/libfreetype.dylib OpenRA/bin/freetype6.dylib
+cp $(brew --prefix luajit)/lib/libluajit-5.1.dylib OpenRA/bin/lua51.dylib
+```
+### Build OpenRA
+```bash
+cd OpenRA && make && cd ..
+```
+### Start the server
+```bash
+python openra_env/server/app.py
+```
+### Run tests
+```bash
+pytest
+```
+## Observation Space
+Each tick, the agent receives structured game state:
+| Field | Description |
+|-------|-------------|
+| `tick` | Current game tick |
+| `cash`, `ore`, `power_provided`, `power_drained` | Economy |
+| `units` | Own units with position, health, type, facing, stance, speed, attack range |
+| `buildings` | Own buildings with production queues, power, rally points |
+| `visible_enemies`, `visible_enemy_buildings` | Fog-of-war limited enemy intel |
+| `spatial_map` | 9-channel spatial tensor (terrain, height, resources, passability, fog, own buildings, own units, enemy buildings, enemy units) |
+| `military` | Kill/death costs, asset value, experience, order count |
+| `available_production` | What can currently be built |
+## Action Space
+18 action types available through the command API:
+| Category | Actions |
+|----------|---------|
+| **Movement** | `move`, `attack_move`, `attack`, `stop` |
+| **Production** | `produce`, `cancel_production` |
+| **Building** | `place_building`, `sell`, `repair`, `power_down`, `set_rally_point`, `set_primary` |
+| **Unit control** | `deploy`, `guard`, `set_stance`, `enter_transport`, `unload`, `harvest` |
+## MCP Tools
+The LLM agent interacts through 48 MCP (Model Context Protocol) tools organized into categories:
+| Category | Tools | Purpose |
+|----------|-------|---------|
+| **Read** | `get_game_state`, `get_economy`, `get_units`, `get_buildings`, `get_enemies`, `get_production`, `get_map_info`, `get_exploration_status` | Query current game state |
+| **Knowledge** | `lookup_unit`, `lookup_building`, `lookup_tech_tree`, `lookup_faction` | Static game data reference |
+| **Bulk Knowledge** | `get_faction_briefing`, `get_map_analysis`, `batch_lookup` | Efficient batch queries |
+| **Planning** | `start_planning_phase`, `end_planning_phase`, `get_opponent_intel`, `get_planning_status` | Pre-game strategy planning |
+| **Game Control** | `advance` | Advance game ticks |
+| **Movement** | `move_units`, `attack_move`, `attack_target`, `stop_units` | Unit movement commands |
+| **Production** | `build_unit`, `build_structure`, `build_and_place` | Build units and structures |
+| **Building Actions** | `place_building`, `cancel_production`, `deploy_unit`, `sell_building`, `repair_building`, `set_rally_point`, `guard_target`, `set_stance`, `harvest`, `power_down`, `set_primary` | Building and unit management |
+| **Placement** | `get_valid_placements` | Query valid building locations |
+| **Unit Groups** | `assign_group`, `add_to_group`, `get_groups`, `command_group` | Group management |
+| **Compound** | `batch`, `plan` | Multi-action sequences |
+| **Utility** | `get_replay_path`, `surrender` | Misc |
+| **Terrain** | `get_terrain_at` | Terrain queries |
+Tools can be toggled per-category or individually via `config.yaml`.
+## Benchmark & Leaderboard
+Game results are automatically submitted to the [OpenRA-Bench leaderboard](https://huggingface.co/spaces/openra-rl/OpenRA-Bench) after each game. Disable with `BENCH_UPLOAD=false` or `bench_upload: false` in config.
+### Agent identity
+Customize how your agent appears on the leaderboard:
+```bash
+# Environment variables
+AGENT_NAME="DeathBot-9000" AGENT_TYPE="RL" openra-rl play
+# Or in config.yaml
+agent:
+  agent_name: "DeathBot-9000"
+  agent_type: "RL"
+  agent_url: "https://github.com/user/deathbot"  # shown as link on leaderboard
+```
+| Variable | Config path | Description |
+|----------|-------------|-------------|
+| `AGENT_NAME` | `agent.agent_name` | Display name (default: model name) |
+| `AGENT_TYPE` | `agent.agent_type` | Scripted / LLM / RL (default: auto-detect) |
+| `AGENT_URL` | `agent.agent_url` | GitHub/project URL shown on leaderboard |
+| `BENCH_UPLOAD` | `agent.bench_upload` | Auto-upload after each game (default: true) |
+| `BENCH_URL` | `agent.bench_url` | Leaderboard URL |
+### Manual submission
+Upload a saved result (with optional replay file):
+```bash
+openra-rl bench submit result.json
+openra-rl bench submit result.json --replay game.orarep --agent-name "MyBot"
+```
+### Custom agents
+If you're building your own agent (RL, CNN, multi-agent, etc.) that doesn't use the built-in LLM agent, use `build_bench_export()` to create a leaderboard submission from a final observation:
+```python
+from openra_env.bench_export import build_bench_export
+# obs = final observation from env.step()
+export = build_bench_export(
+    obs,
+    agent_name="DeathBot-9000",
+    agent_type="RL",
+    opponent="Normal",
+    agent_url="https://github.com/user/deathbot",
+    replay_path="/path/to/replay.orarep",
+)
+# Saves JSON to ~/.openra-rl/bench-exports/ and returns dict with "path" key
+```
+Then submit:
+```bash
+openra-rl bench submit ~/.openra-rl/bench-exports/bench-DeathBot-9000-*.json --replay game.orarep
+```
+## Project Structure
+```
+OpenRA-RL/
+├── OpenRA/                     # Game engine (git submodule, C#)
+├── openra_env/                 # Python package
+│   ├── cli/                    #   CLI entry point (openra-rl command)
+│   ├── mcp_server.py           #   Standard MCP server (stdio transport)
+│   ├── client.py               #   WebSocket client
+│   ├── config.py               #   Unified YAML configuration
+│   ├── models.py               #   Pydantic data models
+│   ├── game_data.py            #   Unit/building stats, tech tree
+│   ├── reward.py               #   Multi-component reward function
+│   ├── bench_export.py         #   Build leaderboard submissions from observations
+│   ├── bench_submit.py         #   Upload results to OpenRA-Bench leaderboard
+│   ├── opponent_intel.py       #   AI opponent profiles
+│   ├── mcp_ws_client.py        #   MCP WebSocket client
+│   ├── server/
+│   │   ├── app.py              #     FastAPI application
+│   │   ├── openra_environment.py  #  OpenEnv environment (reset/step/state)
+│   │   ├── bridge_client.py    #     Async gRPC client
+│   │   └── openra_process.py   #     OpenRA subprocess manager
+│   └── generated/              #   Auto-generated protobuf stubs
+├── examples/
+│   ├── scripted_bot.py         #   Hardcoded strategy bot
+│   ├── mcp_bot.py              #   MCP tool-based bot
+│   ├── llm_agent.py            #   LLM-powered agent
+│   └── config-*.yaml           #   Example configs (ollama, lmstudio, openrouter, minimal)
+├── skill/                      # OpenClaw skill definition
+├── proto/                      # Protobuf definitions (rl_bridge.proto)
+├── tests/                      # Test suite
+├── .github/workflows/          # CI, Docker publish, PyPI publish
+├── config.yaml                 # Default configuration
+├── docker-compose.yaml         # Service orchestration
+├── Dockerfile                  # Game server image
+└── Dockerfile.agent            # Lightweight agent image
+```
+## License
+[GPL-3.0](LICENSE)

__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""OpenRA-RL: OpenEnv environment for Red Alert."""
+from openra_env.client import OpenRAEnv  # noqa: F401
+from openra_env.models import OpenRAAction, OpenRAObservation, OpenRAState  # noqa: F401

client.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ """OpenEnv client re-export."""
2	+
3	+ from openra_env.client import OpenRAEnv # noqa: F401

config.yaml ADDED Viewed

	@@ -0,0 +1,142 @@

+# OpenRA-RL Configuration
+# ========================
+# All values below show the built-in defaults (commented out).
+# Uncomment and change any value to override.
+# Environment variables always take highest priority (see docs for mapping).
+#
+# Precedence: env vars > CLI args > constructor args > this file > defaults
+# ── Game Engine ───────────────────────────────────────────────────────
+game:
+  openra_path: "/Users/berta/Projects/OpenRA-RL/OpenRA"  # Path to OpenRA installation ($OPENRA_PATH)
+#   mod: "ra"                         # Game mod (ra, cnc, d2k)
+#   map_name: "singles.oramap"        # Map to play
+#   grpc_port: 9999                   # gRPC bridge port
+#   headless: true                    # Use Null renderer (no GPU)
+  record_replays: true                # Save .orarep replay files ($RECORD_REPLAYS)
+#   seed: null                        # RNG seed for reproducibility (null = random)
+#   max_ticks: 0                      # End game after N ticks (0 = unlimited)
+#   max_wall_time_s: 0                # End game after N seconds (0 = unlimited)
+# ── Opponent ──────────────────────────────────────────────────────────
+# Enemy bot always spawns by default. Set ai_slot to "" to disable.
+# Difficulty tiers: beginner / easy / medium / hard / brutal
+# Raw play styles also accepted: rush / normal / turtle / naval
+opponent:
+  bot_type: "beginner"                # Difficulty tier ($BOT_TYPE)
+  ai_slot: "Multi0"                   # AI player slot; "" to disable enemy ($AI_SLOT)
+# ── Planning Phase ────────────────────────────────────────────────────
+# planning:
+#   enabled: true                     # Enable pre-game planning phase ($PLANNING_ENABLED)
+#   max_turns: 10                     # Max planning turns ($PLANNING_MAX_TURNS)
+#   max_time_s: 60.0                  # Max planning seconds ($PLANNING_MAX_TIME)
+# ── Reward Function ───────────────────────────────────────────────────
+# reward:
+#   survival: 0.001                   # Per-tick survival bonus
+#   economic_efficiency: 0.01         # Cash delta reward
+#   aggression: 0.1                   # Kill reward multiplier
+#   defense: 0.05                     # Loss penalty multiplier
+#   victory: 1.0                      # Terminal win reward
+#   defeat: -1.0                      # Terminal loss penalty
+# ── Reward Vector ────────────────────────────────────────────────────
+# 8-dimensional skill signal computed per step alongside the scalar reward.
+# Dimensions: combat, economy, infrastructure, intelligence, composition,
+#             tempo, disruption, outcome
+# reward_vector:
+#   enabled: true                        # Enabled by default
+#   weights:                             # Per-dimension weights (for weighted sum)
+#     combat: 0.30
+#     economy: 0.15
+#     infrastructure: 0.10
+#     intelligence: 0.10
+#     composition: 0.10
+#     tempo: 0.10
+#     disruption: 0.15
+#     outcome: 1.00
+# ── MCP Tools ─────────────────────────────────────────────────────────
+# tools:
+#   categories:                       # Toggle tool groups (true/false)
+#     read: true                      # get_game_state, get_economy, get_units, etc.
+#     knowledge: true                 # lookup_unit, lookup_building, etc.
+#     bulk_knowledge: true            # get_faction_briefing, get_map_analysis, batch_lookup
+#     planning: true                  # start/end_planning_phase, get_opponent_intel, etc.
+#     game_control: true              # advance
+#     movement: true                  # move_units, attack_move, attack_target, stop_units
+#     production: true                # build_unit, build_structure, build_and_place
+#     building_actions: true          # place, cancel, deploy, sell, repair, rally, etc.
+#     placement: true                 # get_valid_placements
+#     unit_groups: true               # assign_group, command_group, etc.
+#     compound: true                  # batch, plan
+#     utility: true                   # get_replay_path, surrender
+#     terrain: true                   # get_terrain_at
+#   disabled: []                      # Disable specific tools by name
+# ── Alerts ────────────────────────────────────────────────────────────
+# alerts:
+#   under_attack: true
+#   damaged_building: true
+#   low_power: true
+#   idle_funds: true
+#   ore_full: true
+#   idle_production: true
+#   production_stalled: true
+#   building_ready: true
+#   stance_warning: true
+#   idle_army: true
+#   no_defenses: true
+#   no_scouting: true
+#   loss_tracking: true
+#   minimap: true                    # Show ASCII minimap in turn briefing
+#   max_alerts: 0                    # Max alerts per turn (0 = unlimited)
+# ── LLM Model ────────────────────────────────────────────────────────
+# llm:
+#   base_url: "https://openrouter.ai/api/v1/chat/completions"
+#   api_key: ""                       # Empty = not required (local models)
+#   model: "qwen/qwen3-coder-next"
+#   max_tokens: 1500
+#   temperature: null                 # null = provider default
+#   top_p: null                       # null = provider default
+#   keep_last_messages: 40            # Messages to keep after compression
+#   compression_strategy: "sliding_window"  # "sliding_window" or "none"
+#   compression_trigger: 0            # Compress at this count (0 = keep_last * 2)
+#   max_retries: 4                    # Retry on transient errors
+#   retry_backoff_s: 10               # Base backoff (multiplied by attempt)
+#   request_timeout_s: 120.0          # HTTP timeout per request
+#   extra_headers:                    # Custom headers (OpenRouter-specific)
+#     HTTP-Referer: "https://github.com/openra-rl"
+#     X-Title: "OpenRA-RL Agent"
+# ── Agent Runtime ─────────────────────────────────────────────────────
+# agent:
+#   server_url: "http://localhost:8000"  # OpenRA-RL server ($OPENRA_URL)
+#   max_turns: 0                      # 0 = unlimited
+#   max_time_s: 1800                  # 30 minutes ($MAX_TIME)
+#   verbose: false
+#   log_file: ""                      # Log file path ($LLM_AGENT_LOG)
+#   agent_name: ""                    # Leaderboard display name ($AGENT_NAME); empty = model name
+#   agent_type: ""                    # Scripted/LLM/RL ($AGENT_TYPE); empty = auto-detect
+#   agent_url: ""                     # GitHub/project URL shown on leaderboard ($AGENT_URL)
+#   bench_upload: true                 # Auto-upload results after each game ($BENCH_UPLOAD)
+#   bench_url: "https://openra-rl-openra-bench.hf.space"  # Leaderboard URL ($BENCH_URL)
+# ── Prompts ──────────────────────────────────────────────────────────
+# All LLM-facing text. Override individual fields here, or point
+# prompts_file to a separate YAML (copy openra_env/prompts/default_prompts.yaml).
+# Templates use Python str.format() placeholders: {variable_name}
+# prompts:
+#   system_prompt: ""                 # Inline system prompt (overrides built-in)
+#   system_prompt_file: ""            # Path to .txt system prompt ($SYSTEM_PROMPT_FILE)
+#   prompts_file: ""                  # Path to prompts YAML ($PROMPTS_FILE)
+#   planning_nudge: "Call end_planning_phase(strategy='...') when ready to start."
+#   planning_complete: "Planning complete. Game is now live."
+#   no_tool_nudge: "No tool was called. A tool call is required each turn."
+#   continue_nudge: "The game is still in progress."
+#   alerts:                           # Alert message templates
+#     low_power: "LOW POWER: {balance} — production runs at 1/3 speed"
+#     idle_army: "IDLE ARMY: {count} combat units idle"
+#     # ... see openra_env/prompts/default_prompts.yaml for all fields

docker-compose.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+# Docker Compose for OpenRA-RL development
+#
+# Usage:
+#   Game server only:  docker compose up openra-rl
+#   With LLM agent:    docker compose up agent
+#   With MCP bot:      docker compose run mcp-bot
+#
+# Build:
+#   docker compose build
+services:
+  openra-rl:
+    image: ${OPENRA_RL_IMAGE:-ghcr.io/yxc20089/openra-rl:latest}
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"   # OpenEnv HTTP API
+      - "9999:9999"   # gRPC bridge (direct access)
+    environment:
+      - OPENRA_PATH=/opt/openra
+      - DISPLAY=:99
+      - LIBGL_ALWAYS_SOFTWARE=1
+      - MESA_GL_VERSION_OVERRIDE=3.3
+    deploy:
+      resources:
+        limits:
+          cpus: "4"
+          memory: 4G
+    shm_size: 256m
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 5s
+      start_period: 60s
+      retries: 3
+  agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.agent
+    environment:
+      - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
+      - OPENROUTER_MODEL=${OPENROUTER_MODEL:-anthropic/claude-sonnet-4-20250514}
+      - OPENRA_URL=http://openra-rl:8000
+    command: >
+      python examples/llm_agent.py
+      --url http://openra-rl:8000
+      --max-turns ${MAX_TURNS:-200}
+      --verbose
+    depends_on:
+      openra-rl:
+        condition: service_healthy
+  mcp-bot:
+    build:
+      context: .
+      dockerfile: Dockerfile.agent
+    environment:
+      - OPENRA_URL=http://openra-rl:8000
+    command: >
+      python examples/mcp_bot.py
+      --url http://openra-rl:8000
+      --max-turns ${MAX_TURNS:-3000}
+      --verbose
+    depends_on:
+      openra-rl:
+        condition: service_healthy
+    profiles:
+      - bot

docker/build.sh ADDED Viewed

	@@ -0,0 +1,51 @@

+#!/bin/bash
+# Build the OpenRA-RL Docker image.
+#
+# This script assembles the build context by copying the OpenRA source
+# into the OpenRA-RL directory (Docker can't access files outside context).
+#
+# Usage:
+#   ./docker/build.sh                             # Auto-detect ../OpenRA
+#   OPENRA_DIR=/path/to/OpenRA ./docker/build.sh  # Specify OpenRA path
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+OPENRA_DIR="${OPENRA_DIR:-$PROJECT_DIR/OpenRA}"
+if [ ! -d "$OPENRA_DIR" ]; then
+    echo "ERROR: OpenRA source not found at $OPENRA_DIR"
+    echo "Run: git submodule update --init"
+    exit 1
+fi
+if [ ! -f "$OPENRA_DIR/OpenRA.sln" ]; then
+    echo "ERROR: $OPENRA_DIR doesn't look like an OpenRA repo (no OpenRA.sln)"
+    exit 1
+fi
+echo "=== OpenRA-RL Docker Build ==="
+echo "OpenRA source: $OPENRA_DIR"
+echo "Project dir:   $PROJECT_DIR"
+echo ""
+# If OpenRA source is external (not the submodule), copy it into build context
+REAL_OPENRA="$(cd "$OPENRA_DIR" && pwd)"
+REAL_SUBMODULE="$(cd "$PROJECT_DIR/OpenRA" 2>/dev/null && pwd || echo "")"
+if [ "$REAL_OPENRA" != "$REAL_SUBMODULE" ]; then
+    echo "Copying OpenRA source into build context..."
+    rsync -a --delete \
+        --exclude='.git' \
+        --exclude='bin/' \
+        --exclude='*/obj/' \
+        --exclude='*.user' \
+        "$OPENRA_DIR/" "$PROJECT_DIR/OpenRA/"
+fi
+echo "Building Docker image..."
+docker build -t openra-rl "$PROJECT_DIR" "$@"
+echo ""
+echo "=== Build complete ==="
+echo "Run with: docker run -p 8000:8000 openra-rl"

docker/entrypoint.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/bin/bash
+set -e
+# Start Xvfb (virtual framebuffer) for headless display
+echo "Starting Xvfb on display :99..."
+Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset &
+XVFB_PID=$!
+# Wait for Xvfb to be ready
+sleep 2
+if ! kill -0 $XVFB_PID 2>/dev/null; then
+    echo "ERROR: Xvfb failed to start"
+    exit 1
+fi
+echo "Xvfb started (PID: $XVFB_PID)"
+export DISPLAY=:99
+# Clean shutdown on signals
+cleanup() {
+    echo "Shutting down..."
+    kill $XVFB_PID 2>/dev/null || true
+    wait $XVFB_PID 2>/dev/null || true
+    exit 0
+}
+trap cleanup SIGTERM SIGINT
+# Execute the main command (uvicorn by default)
+echo "Starting OpenRA-RL environment server..."
+exec "$@"

docker/replay-viewer.sh ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/bin/bash
+set -e
+# The base image sets LIBGL_ALWAYS_SOFTWARE=1 for the headless game server.
+# The replay viewer needs GPU rendering, so unset it.
+unset LIBGL_ALWAYS_SOFTWARE
+REPLAY_FILE="$1"
+if [ -z "$REPLAY_FILE" ]; then
+    echo "Usage: /replay-viewer.sh <replay_file_path>"
+    exit 1
+fi
+if [ ! -f "$REPLAY_FILE" ]; then
+    echo "ERROR: Replay file not found: $REPLAY_FILE"
+    exit 1
+fi
+# Tunable settings via environment variables (set by docker_manager.py)
+REPLAY_RESOLUTION="${OPENRA_RL_REPLAY_RESOLUTION:-1280x960}"
+REPLAY_WIDTH="${REPLAY_RESOLUTION%x*}"
+REPLAY_HEIGHT="${REPLAY_RESOLUTION#*x}"
+REPLAY_UI_SCALE="${OPENRA_RL_REPLAY_UI_SCALE:-1}"
+REPLAY_VIEWPORT="${OPENRA_RL_REPLAY_VIEWPORT_DISTANCE:-Medium}"
+REPLAY_MUTE="${OPENRA_RL_REPLAY_MUTE:-True}"
+# Copy replay to the expected directory structure so OpenRA can read metadata
+REPLAY_DIR="/root/.config/openra/Replays/ra/{DEV_VERSION}"
+mkdir -p "$REPLAY_DIR"
+REPLAY_BASENAME=$(basename "$REPLAY_FILE")
+cp "$REPLAY_FILE" "$REPLAY_DIR/$REPLAY_BASENAME"
+REPLAY_PATH="$REPLAY_DIR/$REPLAY_BASENAME"
+echo "Replay copied to: $REPLAY_PATH"
+# Start Xvfb at configured resolution
+echo "Starting Xvfb on display :99 (${REPLAY_WIDTH}x${REPLAY_HEIGHT})..."
+Xvfb :99 -screen 0 ${REPLAY_WIDTH}x${REPLAY_HEIGHT}x24 -ac +extension GLX +render -noreset &
+XVFB_PID=$!
+sleep 2
+if ! kill -0 $XVFB_PID 2>/dev/null; then
+    echo "ERROR: Xvfb failed to start"
+    exit 1
+fi
+export DISPLAY=:99
+# Start x11vnc with performance optimizations
+echo "Starting VNC server on port 5900..."
+x11vnc -display :99 -forever -nopw -shared -rfbport 5900 \
+    -noxdamage -wait 50 -defer 50 -quiet &
+VNC_PID=$!
+sleep 1
+# Start noVNC (websockify proxy)
+echo "Starting noVNC on port 6080..."
+websockify --web /usr/share/novnc 6080 localhost:5900 &
+NOVNC_PID=$!
+sleep 1
+echo ""
+echo "=== Replay viewer ready ==="
+echo "Open in browser: http://localhost:6080/vnc.html"
+echo "Press Ctrl+C to stop"
+echo ""
+# Clean shutdown on signals
+cleanup() {
+    echo "Shutting down replay viewer..."
+    kill $NOVNC_PID 2>/dev/null || true
+    kill $VNC_PID 2>/dev/null || true
+    kill $XVFB_PID 2>/dev/null || true
+    wait 2>/dev/null || true
+    exit 0
+}
+trap cleanup SIGTERM SIGINT
+# Launch OpenRA with rendering settings tuned for VNC replay viewing.
+# CPU is managed by Docker --cpus limit (set in docker_manager.py).
+exec dotnet /opt/openra/bin/OpenRA.dll \
+    Engine.EngineDir=/opt/openra \
+    Game.Mod=ra \
+    Game.Platform=Default \
+    Graphics.Mode=Windowed \
+    Graphics.WindowedSize=${REPLAY_WIDTH},${REPLAY_HEIGHT} \
+    Graphics.UIScale=${REPLAY_UI_SCALE} \
+    Graphics.VSync=False \
+    Graphics.DisableGLDebugMessageCallback=True \
+    Graphics.ViewportDistance=${REPLAY_VIEWPORT} \
+    Sound.Mute=${REPLAY_MUTE} \
+    "Launch.Replay=$REPLAY_PATH"

examples/README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+# OpenRA-RL Examples
+## Scripted Bot
+A hardcoded Red Alert bot that plays a full game through the OpenEnv client API.
+**Strategy:** Deploy MCV → Build Power Plant → Build Barracks → Train 5 Rifle Infantry → Attack-move toward enemy.
+### Prerequisites
+```bash
+# Install the project
+pip install -e .
+# Start the OpenRA-RL server (Docker)
+docker run -p 8000:8000 openra-rl
+# Or build from source first:
+OPENRA_DIR=/path/to/OpenRA ./docker/build.sh
+docker run -p 8000:8000 openra-rl
+```
+### Run
+```bash
+# Basic run
+python examples/scripted_bot.py
+# Custom server URL
+python examples/scripted_bot.py --url http://localhost:8000
+# Verbose mode (prints every bot decision)
+python examples/scripted_bot.py --verbose
+# Limit episode length
+python examples/scripted_bot.py --max-steps 2000
+```
+### Output
+```
+Connecting to http://localhost:8000...
+Game started! Map: singles
+Step    0 | Tick     0 | $ 5000 | Units: 2 (combat: 0) | Buildings: [none] | Phase: deploy_mcv
+Step  100 | Tick   100 | $ 4700 | Units: 1 (combat: 0) | Buildings: [fact] | Phase: build_base
+Step  200 | Tick   200 | $ 4100 | Units: 1 (combat: 0) | Buildings: [fact, powr] | Phase: build_base
+...
+Game over: win after 3421 steps (tick 3421)
+Total reward: 2.150
+```

examples/config-lmstudio.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+# OpenRA-RL config for LM Studio (local)
+# Usage: python examples/llm_agent.py --config examples/config-lmstudio.yaml
+llm:
+  base_url: "http://localhost:1234/v1/chat/completions"
+  model: "lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF"
+  api_key: ""                         # No key needed for LM Studio
+  max_tokens: 2000
+  extra_headers: {}
+  request_timeout_s: 180.0
+agent:
+  max_time_s: 3600
+  verbose: true

examples/config-minimal.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+# OpenRA-RL config with minimal tool set
+# Reduces tool count for models with limited context or tool-calling ability.
+# Usage: python examples/llm_agent.py --config examples/config-minimal.yaml
+planning:
+  enabled: false
+tools:
+  categories:
+    knowledge: false                  # Disable lookup_unit, lookup_building, etc.
+    bulk_knowledge: false             # Disable get_faction_briefing, get_map_analysis, etc.
+    planning: false                   # Disabled automatically when planning.enabled=false
+    unit_groups: false                # Disable assign_group, command_group, etc.
+    terrain: false                    # Disable get_terrain_at
+    compound: false                   # Disable batch, plan
+alerts:
+  stance_warning: false
+  idle_army: false
+  no_scouting: false
+  no_defenses: false

examples/config-ollama.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+# OpenRA-RL config for Ollama (local)
+# Usage: python examples/llm_agent.py --config examples/config-ollama.yaml
+llm:
+  base_url: "http://localhost:11434/v1/chat/completions"
+  model: "qwen3:32b"
+  api_key: ""                         # No key needed for Ollama
+  max_tokens: 2000
+  extra_headers: {}
+  request_timeout_s: 300.0            # Local models need more time (auto-set if <= 120)
+agent:
+  max_time_s: 3600                    # 1 hour (local models are slower)
+  verbose: true

examples/config-openrouter.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# OpenRA-RL config for OpenRouter (cloud)
+# Usage: OPENROUTER_API_KEY=sk-or-... python examples/llm_agent.py --config examples/config-openrouter.yaml
+llm:
+  base_url: "https://openrouter.ai/api/v1/chat/completions"
+  model: "anthropic/claude-sonnet-4-20250514"
+  # api_key: set via OPENROUTER_API_KEY env var
+  extra_headers:
+    HTTP-Referer: "https://github.com/openra-rl"
+    X-Title: "OpenRA-RL Agent"
+agent:
+  max_time_s: 1800                    # 30 minutes

examples/llm_agent.py ADDED Viewed

	@@ -0,0 +1,170 @@

+#!/usr/bin/env python3
+"""LLM agent that plays Red Alert using any OpenAI-compatible model.
+Supports OpenRouter, Ollama, LM Studio, or any local/remote endpoint
+that implements the OpenAI Chat Completions API with tool calling.
+Usage:
+    # With OpenRouter (cloud)
+    export OPENROUTER_API_KEY=sk-or-...
+    python examples/llm_agent.py --verbose
+    # With a YAML config file
+    python examples/llm_agent.py --config examples/config-ollama.yaml
+    # With LM Studio (local, no API key needed)
+    python examples/llm_agent.py --base-url http://localhost:1234/v1/chat/completions --model my-model
+"""
+import argparse
+import asyncio
+import sys
+from dotenv import load_dotenv
+load_dotenv()
+from openra_env.config import load_config
+from openra_env.agent import run_agent
+# Re-export for backwards compatibility
+from openra_env.agent import (  # noqa: F401
+    SYSTEM_PROMPT,
+    load_system_prompt,
+    compose_pregame_briefing,
+    format_state_briefing,
+    mcp_tools_to_openai,
+    _sanitize_messages,
+    chat_completion,
+    compress_history,
+)
+# Line-buffered stdout so output is observable in real time
+sys.stdout.reconfigure(line_buffering=True)
+def main():
+    parser = argparse.ArgumentParser(
+        description="LLM agent that plays Red Alert via any OpenAI-compatible model",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=(
+            "Examples:\n"
+            "  %(prog)s --config examples/config-ollama.yaml --verbose\n"
+            "  %(prog)s --api-key sk-or-... --verbose\n"
+            "  %(prog)s --base-url http://localhost:1234/v1/chat/completions --model my-model\n"
+        ),
+    )
+    parser.add_argument(
+        "--config", "-c",
+        default=None,
+        help="Path to YAML config file (default: auto-discover config.yaml)",
+    )
+    parser.add_argument(
+        "--url",
+        default=None,
+        help="OpenRA-RL server URL (overrides config agent.server_url)",
+    )
+    parser.add_argument(
+        "--base-url",
+        default=None,
+        help="LLM API endpoint URL (overrides config llm.base_url)",
+    )
+    parser.add_argument(
+        "--model",
+        default=None,
+        help="Model ID (overrides config llm.model)",
+    )
+    parser.add_argument(
+        "--api-key",
+        default=None,
+        help="API key for LLM endpoint (overrides config llm.api_key)",
+    )
+    parser.add_argument(
+        "--max-turns",
+        type=int,
+        default=None,
+        help="Maximum LLM turns, 0 = unlimited (overrides config agent.max_turns)",
+    )
+    parser.add_argument(
+        "--max-time",
+        type=int,
+        default=None,
+        help="Maximum wall-clock time in seconds (overrides config agent.max_time_s)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print detailed LLM reasoning and tool calls",
+    )
+    parser.add_argument(
+        "--log-file",
+        default=None,
+        help="Write all output to this log file in addition to stdout",
+    )
+    parser.add_argument(
+        "--system-prompt",
+        default=None,
+        help="Path to a custom system prompt .txt file (overrides built-in default)",
+    )
+    args = parser.parse_args()
+    # Build config: YAML file + env vars + CLI overrides (CLI wins over .env)
+    cli: dict = {}
+    if args.url is not None:
+        cli.setdefault("agent", {})["server_url"] = args.url
+    if args.base_url is not None:
+        cli.setdefault("llm", {})["base_url"] = args.base_url
+    if args.model is not None:
+        cli.setdefault("llm", {})["model"] = args.model
+    if args.api_key is not None:
+        cli.setdefault("llm", {})["api_key"] = args.api_key
+    if args.max_turns is not None:
+        cli.setdefault("agent", {})["max_turns"] = args.max_turns
+    if args.max_time is not None:
+        cli.setdefault("agent", {})["max_time_s"] = args.max_time
+    if args.verbose:
+        cli.setdefault("agent", {})["verbose"] = True
+    if args.log_file is not None:
+        cli.setdefault("agent", {})["log_file"] = args.log_file
+    if args.system_prompt is not None:
+        cli.setdefault("agent", {})["system_prompt_file"] = args.system_prompt
+    config = load_config(config_path=args.config, cli_overrides=cli)
+    verbose = config.agent.verbose
+    # Set up logging to file if requested — tee all print() to both stdout and file
+    if config.agent.log_file:
+        import builtins
+        _builtin_print = builtins.print
+        _log_fh = open(config.agent.log_file, "w", encoding="utf-8")
+        def _tee_print(*pargs, **kwargs):
+            _builtin_print(*pargs, **kwargs)
+            kwargs.pop("file", None)
+            _builtin_print(*pargs, file=_log_fh, **kwargs)
+            _log_fh.flush()
+        builtins.print = _tee_print
+    # API key validation: only required for remote endpoints
+    is_local = any(h in config.llm.base_url for h in ("localhost", "127.0.0.1", "0.0.0.0"))
+    if not config.llm.api_key and not is_local:
+        print("Error: API key required for remote LLM endpoints.")
+        print("  Set OPENROUTER_API_KEY or LLM_API_KEY environment variable, use --api-key,")
+        print("  or use a config file with llm.api_key set.")
+        print("  For local models (Ollama, LM Studio), use --base-url http://localhost:...")
+        sys.exit(1)
+    try:
+        asyncio.run(run_agent(config, verbose))
+    except KeyboardInterrupt:
+        print("\nInterrupted by user")
+        sys.exit(0)
+    except ConnectionRefusedError:
+        print(f"\nCould not connect to {config.agent.server_url}")
+        print("Is the OpenRA-RL server running?")
+        print("  docker run -p 8000:8000 openra-rl")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

examples/mcp_bot.py ADDED Viewed

	@@ -0,0 +1,619 @@

+#!/usr/bin/env python3
+"""MCP tool-based Red Alert bot that plays entirely through MCP tools.
+Validates the full MCP integration path: tool discovery, game knowledge
+lookups, read tools for state, and action tools for commands. Uses
+OpenRAMCPClient to interact with the OpenRA-RL server via WebSocket.
+Exercises ALL 30 MCP tools:
+  - Read tools: get_game_state, get_economy, get_units, get_buildings,
+    get_enemies, get_production, get_map_info
+  - Knowledge tools: lookup_unit, lookup_building, lookup_tech_tree, lookup_faction,
+    get_faction_briefing, get_map_analysis, batch_lookup
+  - Action tools: advance, deploy_unit, build_structure, place_building,
+    build_unit, move_units, attack_move, attack_target, stop_units,
+    set_rally_point, guard_target, set_stance, sell_building, repair_building,
+    harvest, power_down, set_primary
+  - Replay tool: get_replay_path
+Usage:
+    docker run -p 8000:8000 openra-rl
+    python examples/mcp_bot.py --verbose
+"""
+import argparse
+import asyncio
+import json
+import sys
+from typing import Any, Optional
+# Line-buffered stdout so output is observable in real time
+sys.stdout.reconfigure(line_buffering=True)
+from openra_env.mcp_ws_client import OpenRAMCPClient
+class MCPBot:
+    """State-machine bot that plays Red Alert using MCP tool calls.
+    Phases:
+        startup     - Look up tech tree and faction info
+        deploy_mcv  - Find and deploy MCV
+        build_base  - Build power/barracks/refinery/war factory
+        train_army  - Train infantry + vehicles, set rally points
+        attack      - Attack-move toward enemy
+        sustain     - Repair, sell damaged, power management
+    """
+    BARRACKS_TYPES = {"tent", "barr"}
+    WAR_FACTORY_TYPES = {"weap"}
+    BUILD_ORDER = ["powr", "barracks", "proc", "weap", "powr"]
+    INFANTRY_TARGET = 6
+    GUARD_COUNT = 2
+    COMBAT_TYPES = {"e1", "e2", "e3", "e4", "1tnk", "2tnk", "3tnk", "arty", "jeep", "apc"}
+    INFANTRY_TYPES = {"e1", "e2", "e3", "e4"}
+    def __init__(self, env: OpenRAMCPClient, verbose: bool = False, no_planning: bool = False):
+        self.env = env
+        self.verbose = verbose
+        self.no_planning = no_planning
+        self.phase = "startup"
+        self.build_index = 0
+        self.placement_count = 0
+        self.deploy_issued = False
+        self._guards_assigned: set[int] = set()
+        self._stances_set: set[int] = set()
+        self._rally_set: set[int] = set()
+        self._repair_issued: set[int] = set()
+        self._sold: set[int] = set()
+        self._powered_down: set[int] = set()
+        self._primary_set: set[int] = set()
+        self._apc_trained = False
+        self._tools_exercised: set[str] = set()
+    async def call(self, tool_name: str, **kwargs: Any) -> Any:
+        """Call an MCP tool and track which tools have been exercised."""
+        self._tools_exercised.add(tool_name)
+        result = await self.env.call_tool(tool_name, **kwargs)
+        return result
+    def _log(self, msg: str):
+        if self.verbose:
+            print(f"  [MCPBot] {msg}")
+    # ── Main loop ─────────────────────────────────────────────────
+    async def run(self, max_turns: int) -> dict:
+        """Run the bot for up to max_turns."""
+        # Phase: startup — exercise knowledge tools
+        await self._startup()
+        turn = 0
+        while turn < max_turns:
+            state = await self.call("get_game_state")
+            if state.get("done"):
+                self._log(f"Game over: {state.get('result', '?')}")
+                break
+            turn += 1
+            await self._tick(state, turn)
+            if turn % 100 == 0:
+                self._print_status(turn, state)
+        # End-of-game report
+        final_state = await self.call("get_game_state")
+        replay = await self.call("get_replay_path")
+        self._log(f"Replay: {replay}")
+        return {
+            "turns": turn,
+            "final_state": final_state,
+            "replay": replay,
+            "tools_exercised": sorted(self._tools_exercised),
+            "tools_count": len(self._tools_exercised),
+            "planning_strategy": getattr(self, "_planning_strategy", ""),
+        }
+    # ── Startup: knowledge tools ──────────────────────────────────
+    async def _startup(self):
+        """Run planning phase and look up game knowledge at game start."""
+        if self.no_planning:
+            self._log("=== Startup: Planning DISABLED ===")
+            # Use bulk knowledge tool instead of individual lookups
+            briefing = await self.call("get_faction_briefing")
+            self._log(f"Faction briefing: {briefing.get('side', '?')}, "
+                      f"{len(briefing.get('units', {}))} units, "
+                      f"{len(briefing.get('buildings', {}))} buildings")
+        else:
+            self._log("=== Startup: Planning Phase ===")
+            # Try the planning phase
+            planning = await self.call("start_planning_phase")
+            if planning.get("planning_active"):
+                self._log(f"Planning active — opponent: {planning.get('opponent_summary', '')[:120]}")
+                # Use bulk tools for efficient research
+                briefing = await self.call("get_faction_briefing")
+                self._log(f"Faction briefing: {briefing.get('side', '?')}, "
+                          f"{len(briefing.get('units', {}))} units, "
+                          f"{len(briefing.get('buildings', {}))} buildings")
+                map_analysis = await self.call("get_map_analysis")
+                self._log(f"Map analysis: {map_analysis.get('map_type', '?')}, "
+                          f"{len(map_analysis.get('resource_patches', []))} resource patches")
+                intel = await self.call("get_opponent_intel")
+                aggressiveness = intel.get("aggressiveness", "unknown")
+                self._log(f"Opponent aggressiveness: {aggressiveness}")
+                # Formulate strategy based on opponent profile
+                if aggressiveness in ("high", "very_high"):
+                    strategy = (
+                        "Defensive opening: power plant, barracks, turrets at base entrance, "
+                        "then ore refinery for economy. Build war factory for tanks once stable. "
+                        "Scout early to find and deny enemy expansion."
+                    )
+                else:
+                    strategy = (
+                        "Rush opening: power plant, barracks, infantry rush while building "
+                        "ore refinery. Transition to tanks from war factory."
+                    )
+                result = await self.call("end_planning_phase", strategy=strategy)
+                self._planning_strategy = strategy
+                self._log(f"Planning complete: {result.get('planning_duration_seconds', '?')}s, strategy: {strategy[:80]}")
+            else:
+                # Planning disabled server-side
+                self._log(f"Planning: {planning.get('message', 'disabled')}")
+                briefing = await self.call("get_faction_briefing")
+                self._log(f"Faction briefing: {briefing.get('side', '?')}, "
+                          f"{len(briefing.get('units', {}))} units, "
+                          f"{len(briefing.get('buildings', {}))} buildings")
+        map_info = await self.call("get_map_info")
+        self._log(f"Map: {map_info.get('map_name', '?')} ({map_info.get('width')}x{map_info.get('height')})")
+        self.phase = "deploy_mcv"
+        self._log("Phase → deploy_mcv")
+    # ── Per-tick decision ─────────────────────────────────────────
+    async def _tick(self, state: dict, turn: int):
+        """Make decisions for one game tick."""
+        # Update phase based on state
+        await self._update_phase()
+        if self.phase == "deploy_mcv":
+            await self._do_deploy()
+        elif self.phase == "build_base":
+            await self._do_build()
+        elif self.phase == "train_army":
+            await self._do_build()
+            await self._do_train()
+        elif self.phase == "attack":
+            await self._do_build()
+            await self._do_train()
+            await self._do_combat()
+            await self._do_sustain()
+        # Advance game
+        await self.call("advance", ticks=1)
+    async def _update_phase(self):
+        """Transition phases based on game state."""
+        buildings = await self.call("get_buildings")
+        units = await self.call("get_units")
+        has_cy = any(b["type"] == "fact" for b in buildings)
+        has_barracks = any(b["type"] in self.BARRACKS_TYPES for b in buildings)
+        combat_units = [u for u in units if u["type"] in self.COMBAT_TYPES]
+        non_guard = [u for u in combat_units if u["actor_id"] not in self._guards_assigned]
+        if self.phase == "deploy_mcv" and has_cy:
+            self.phase = "build_base"
+            self._log("Phase → build_base")
+        elif self.phase == "build_base" and self.build_index >= len(self.BUILD_ORDER):
+            self.phase = "train_army"
+            self._log("Phase → train_army")
+        elif self.phase == "train_army" and len(non_guard) >= self.INFANTRY_TARGET:
+            self.phase = "attack"
+            self._log(f"Phase → attack ({len(non_guard)} combat units)")
+    # ── Deploy MCV ────────────────────────────────────────────────
+    async def _do_deploy(self):
+        """Find and deploy MCV."""
+        if self.deploy_issued:
+            return
+        units = await self.call("get_units")
+        mcv = next((u for u in units if u["type"] == "mcv"), None)
+        if mcv:
+            self._log(f"Deploying MCV (actor {mcv['actor_id']})")
+            await self.call("deploy_unit", unit_id=mcv["actor_id"])
+            self.deploy_issued = True
+    # ── Build base ────────────────────────────────────────────────
+    async def _do_build(self):
+        """Handle building construction and placement."""
+        # Check for completed buildings to place
+        production = await self.call("get_production")
+        buildings = await self.call("get_buildings")
+        for p in production.get("queue", []):
+            if p["queue_type"] == "Building" and p["progress"] >= 0.99:
+                cy = next((b for b in buildings if b["type"] == "fact"), None)
+                if cy:
+                    x, y = self._placement_offset(cy)
+                    self._log(f"Placing {p['item']} at ({x}, {y})")
+                    await self.call("place_building", building_type=p["item"], cell_x=x, cell_y=y)
+                    self.placement_count += 1
+        # Start new building if nothing in queue
+        if self.build_index >= len(self.BUILD_ORDER):
+            return
+        building_in_queue = any(p["queue_type"] == "Building" for p in production.get("queue", []))
+        if building_in_queue:
+            return
+        item = self.BUILD_ORDER[self.build_index]
+        # Resolve faction-agnostic barracks
+        if item == "barracks":
+            available = production.get("available", [])
+            if "tent" in available:
+                item = "tent"
+            elif "barr" in available:
+                item = "barr"
+            else:
+                return
+        # Check if already built
+        already = sum(1 for b in buildings if b["type"] == item)
+        if already > 0 and self.build_index < len(self.BUILD_ORDER) - 1:
+            # Skip if not a duplicate in build order
+            count_in_order = sum(1 for x in self.BUILD_ORDER[:self.build_index + 1]
+                                 if x == item or (x == "barracks" and item in self.BARRACKS_TYPES))
+            if already >= count_in_order:
+                self.build_index += 1
+                return
+        available = production.get("available", [])
+        if item in available:
+            economy = await self.call("get_economy")
+            building_stats = await self.call("lookup_building", building_type=item)
+            cost = building_stats.get("cost", 0)
+            if economy.get("cash", 0) >= cost:
+                self._log(f"Building {item} (#{self.build_index + 1}/{len(self.BUILD_ORDER)}, cost=${cost})")
+                await self.call("build_structure", building_type=item)
+                self.build_index += 1
+        # Set rally points on production buildings
+        await self._do_rally_points(buildings)
+    async def _do_rally_points(self, buildings: list[dict]):
+        """Set rally points on barracks and war factories."""
+        cy = next((b for b in buildings if b["type"] == "fact"), None)
+        if not cy:
+            return
+        for b in buildings:
+            if b["type"] in ("tent", "barr", "weap") and b["actor_id"] not in self._rally_set:
+                rally_x = cy["cell_x"] if cy["cell_x"] > 0 else cy.get("pos_x", 0) // 1024
+                rally_y = cy["cell_y"] if cy["cell_y"] > 0 else cy.get("pos_y", 0) // 1024
+                self._log(f"Setting rally on {b['type']} (actor {b['actor_id']}) → ({rally_x}, {rally_y})")
+                await self.call("set_rally_point", building_id=b["actor_id"], cell_x=rally_x, cell_y=rally_y)
+                self._rally_set.add(b["actor_id"])
+    def _placement_offset(self, cy: dict) -> tuple[int, int]:
+        """Calculate placement position relative to CY."""
+        cx = cy.get("pos_x", 0) // 1024 if cy.get("cell_x", 0) == 0 else cy["cell_x"]
+        cy_y = cy.get("pos_y", 0) // 1024 if cy.get("cell_y", 0) == 0 else cy["cell_y"]
+        offsets = [
+            (3, 0), (-3, 0), (0, 3), (0, -3),
+            (3, 3), (-3, 3), (3, -3), (-3, -3),
+            (6, 0), (-6, 0), (0, 6), (0, -6),
+        ]
+        idx = self.placement_count % len(offsets)
+        dx, dy = offsets[idx]
+        return cx + dx, cy_y + dy
+    # ── Train army ────────────────────────────────────────────────
+    async def _do_train(self):
+        """Train infantry and vehicles."""
+        production = await self.call("get_production")
+        buildings = await self.call("get_buildings")
+        units = await self.call("get_units")
+        economy = await self.call("get_economy")
+        has_barracks = any(b["type"] in self.BARRACKS_TYPES for b in buildings)
+        infantry_training = any(
+            p["queue_type"] == "Infantry" and p["progress"] < 0.99
+            for p in production.get("queue", [])
+        )
+        infantry = [u for u in units if u["type"] in self.INFANTRY_TYPES]
+        total_target = self.INFANTRY_TARGET + self.GUARD_COUNT
+        # Train infantry
+        if has_barracks and not infantry_training and len(infantry) < total_target:
+            available = production.get("available", [])
+            if "e1" in available and economy.get("cash", 0) >= 100:
+                self._log(f"Training e1 ({len(infantry)}/{total_target})")
+                await self.call("build_unit", unit_type="e1")
+        # Train APC from war factory
+        has_weap = any(b["type"] == "weap" for b in buildings)
+        vehicle_training = any(
+            p["queue_type"] == "Vehicle" and p["progress"] < 0.99
+            for p in production.get("queue", [])
+        )
+        if has_weap and not vehicle_training and not self._apc_trained:
+            available = production.get("available", [])
+            if "apc" in available and economy.get("cash", 0) >= 800:
+                self._log("Training APC")
+                await self.call("build_unit", unit_type="apc")
+                self._apc_trained = True
+        # Continuous vehicle production in attack phase
+        if self.phase == "attack" and has_weap and not vehicle_training:
+            available = production.get("available", [])
+            if "1tnk" in available and economy.get("cash", 0) >= 700:
+                self._log("Training 1tnk (continuous)")
+                await self.call("build_unit", unit_type="1tnk")
+        # Set stances on new units
+        for u in units:
+            if u["actor_id"] in self._stances_set:
+                continue
+            if u["type"] not in self.COMBAT_TYPES:
+                continue
+            stance = "defend" if u["actor_id"] in self._guards_assigned else "attack_anything"
+            await self.call("set_stance", unit_ids=str(u["actor_id"]), stance=stance)
+            self._stances_set.add(u["actor_id"])
+        # Assign guards to CY
+        if len(self._guards_assigned) < self.GUARD_COUNT:
+            cy = next((b for b in buildings if b["type"] == "fact"), None)
+            if cy:
+                for u in units:
+                    if len(self._guards_assigned) >= self.GUARD_COUNT:
+                        break
+                    if (u["type"] in self.INFANTRY_TYPES
+                            and u["is_idle"]
+                            and u["actor_id"] not in self._guards_assigned):
+                        self._log(f"Assigning {u['type']} (actor {u['actor_id']}) to guard CY")
+                        await self.call("guard_target", unit_ids=str(u["actor_id"]), target_actor_id=cy["actor_id"])
+                        self._guards_assigned.add(u["actor_id"])
+        # Set primary on multiple production buildings
+        for btype_set in [self.BARRACKS_TYPES, self.WAR_FACTORY_TYPES]:
+            bldgs_of_type = [b for b in buildings if b["type"] in btype_set]
+            if len(bldgs_of_type) >= 2:
+                newest = max(bldgs_of_type, key=lambda b: b["actor_id"])
+                if newest["actor_id"] not in self._primary_set:
+                    self._log(f"Setting primary: {newest['type']} (actor {newest['actor_id']})")
+                    await self.call("set_primary", building_id=newest["actor_id"])
+                    self._primary_set.add(newest["actor_id"])
+    # ── Combat ────────────────────────────────────────────────────
+    async def _do_combat(self):
+        """Attack-move idle combat units toward enemies."""
+        units = await self.call("get_units")
+        enemies = await self.call("get_enemies")
+        idle_fighters = [
+            u for u in units
+            if (u["type"] in self.COMBAT_TYPES
+                and u["is_idle"]
+                and u["actor_id"] not in self._guards_assigned)
+        ]
+        if len(idle_fighters) < 2:
+            return
+        # Find attack target
+        target_x, target_y = self._find_attack_target(enemies, units)
+        unit_id_list = [u["actor_id"] for u in idle_fighters]
+        unit_ids = ",".join(str(i) for i in unit_id_list)
+        self._log(f"Attacking with {len(unit_id_list)} units toward ({target_x}, {target_y})")
+        await self.call("attack_move", unit_ids=unit_ids, target_x=target_x, target_y=target_y)
+        # Attack specific visible enemy if close
+        if enemies.get("units"):
+            enemy = enemies["units"][0]
+            nearby = [u for u in idle_fighters[:3] if u["can_attack"]]
+            if nearby:
+                nearby_ids = ",".join(str(u["actor_id"]) for u in nearby)
+                await self.call(
+                    "attack_target",
+                    unit_ids=nearby_ids,
+                    target_actor_id=enemy["actor_id"],
+                )
+    def _find_attack_target(self, enemies: dict, units: list[dict]) -> tuple[int, int]:
+        """Find best attack target: enemy buildings > units > map center."""
+        if enemies.get("buildings"):
+            b = enemies["buildings"][0]
+            return b["cell_x"], b["cell_y"]
+        if enemies.get("units"):
+            u = enemies["units"][0]
+            return u["cell_x"], u["cell_y"]
+        return 64, 64  # fallback: map center
+    # ── Sustain ───────────────────────────────────────────────────
+    async def _do_sustain(self):
+        """Repair, sell, and manage power."""
+        buildings = await self.call("get_buildings")
+        economy = await self.call("get_economy")
+        for b in buildings:
+            # Repair damaged buildings
+            if (b["hp_percent"] < 0.7
+                    and not b.get("is_repairing", False)
+                    and b["actor_id"] not in self._repair_issued
+                    and economy.get("cash", 0) >= 500):
+                self._log(f"Repairing {b['type']} (actor {b['actor_id']}, hp={b['hp_percent']:.0%})")
+                await self.call("repair_building", building_id=b["actor_id"])
+                self._repair_issued.add(b["actor_id"])
+            # Sell heavily damaged buildings
+            if (b["hp_percent"] < 0.2
+                    and b["type"] != "fact"
+                    and b["actor_id"] not in self._sold):
+                self._log(f"Selling {b['type']} (actor {b['actor_id']}, hp={b['hp_percent']:.0%})")
+                await self.call("sell_building", building_id=b["actor_id"])
+                self._sold.add(b["actor_id"])
+        # Power management
+        power_balance = economy.get("power_provided", 0) - economy.get("power_drained", 0)
+        if power_balance < 0:
+            power_down_priority = ["dome", "spen", "syrd", "hpad", "afld", "fix"]
+            for btype in power_down_priority:
+                for b in buildings:
+                    if (b["type"] == btype
+                            and b.get("is_powered", True)
+                            and b["actor_id"] not in self._powered_down):
+                        self._log(f"Powering down {b['type']} (actor {b['actor_id']}) — power: {power_balance}")
+                        await self.call("power_down", building_id=b["actor_id"])
+                        self._powered_down.add(b["actor_id"])
+                        return  # one at a time
+        # Send idle harvesters to harvest
+        units = await self.call("get_units")
+        for u in units:
+            if u["type"] == "harv" and u["is_idle"]:
+                self._log(f"Sending harvester {u['actor_id']} to harvest")
+                await self.call("harvest", unit_id=u["actor_id"])
+                break  # one at a time
+        # Stop fleeing units
+        fleeing = [u for u in units if u["type"] in self.COMBAT_TYPES
+                   and u.get("current_activity") == "Flee"]
+        if fleeing:
+            await self.call("stop_units", unit_ids=",".join(str(u["actor_id"]) for u in fleeing[:3]))
+        # Move scouts
+        idle_scouts = [u for u in units
+                       if u["type"] in ("jeep", "e1") and u["is_idle"]
+                       and u["actor_id"] not in self._guards_assigned]
+        if idle_scouts and len(idle_scouts) > 3:
+            scout = idle_scouts[0]
+            await self.call("move_units", unit_ids=str(scout["actor_id"]), target_x=64, target_y=64)
+    # ── Status display ────────────────────────────────────────────
+    def _print_status(self, turn: int, state: dict):
+        eco = state.get("economy", {})
+        power = eco.get("power_provided", 0) - eco.get("power_drained", 0)
+        print(
+            f"Turn {turn:4d} | Tick {state.get('tick', 0):5d} | "
+            f"${eco.get('cash', 0):5d} | Pwr:{power:+d} | "
+            f"Units:{state.get('own_units', 0)} | "
+            f"Enemy:{state.get('visible_enemies', 0)} | "
+            f"Bldgs:{state.get('own_buildings', 0)} | {self.phase}"
+        )
+# ── Main ──────────────────────────────────────────────────────────
+async def run_mcp_bot(url: str, max_turns: int, verbose: bool, no_planning: bool = False):
+    """Connect to the OpenRA-RL server and play using MCP tools."""
+    print(f"Connecting to {url}...")
+    async with OpenRAMCPClient(base_url=url, message_timeout_s=300.0) as env:
+        print("Resetting environment (launching OpenRA)...")
+        await env.reset()
+        # Discover available tools
+        tools = await env.list_tools()
+        tool_names = sorted(t.name for t in tools)
+        print(f"Discovered {len(tools)} MCP tools: {tool_names}")
+        # Run bot
+        bot = MCPBot(env, verbose=verbose, no_planning=no_planning)
+        result = bot.run(max_turns)
+        if asyncio.iscoroutine(result):
+            result = await result
+        # Final report
+        print()
+        print("=" * 70)
+        final = result["final_state"]
+        print(f"Game finished after {result['turns']} turns")
+        if final.get("done"):
+            print(f"Result: {final.get('result', '?').upper()}")
+        # Score card
+        mil = final.get("military", {})
+        eco = final.get("economy", {})
+        planning = result.get("planning_strategy", "")
+        print()
+        print("--- SCORECARD ---")
+        print(f"  Planning:         {'ON — ' + planning if planning else 'OFF'}")
+        print(f"  Ticks played:     {final.get('tick', '?')}")
+        print(f"  Units killed:     {mil.get('units_killed', 0)} (value: ${mil.get('kills_cost', 0)})")
+        print(f"  Units lost:       {mil.get('units_lost', 0)} (value: ${mil.get('deaths_cost', 0)})")
+        print(f"  Buildings killed: {mil.get('buildings_killed', 0)}")
+        print(f"  Buildings lost:   {mil.get('buildings_lost', 0)}")
+        print(f"  Army value:       ${mil.get('army_value', 0)}")
+        print(f"  Assets value:     ${mil.get('assets_value', 0)}")
+        print(f"  Experience:       {mil.get('experience', 0)}")
+        print(f"  Orders issued:    {mil.get('order_count', 0)}")
+        print(f"  Cash remaining:   ${eco.get('cash', 0)}")
+        print(f"  K/D cost ratio:   {mil.get('kills_cost', 0) / max(mil.get('deaths_cost', 1), 1):.2f}")
+        print()
+        print(f"Tools exercised: {result['tools_count']}/{len(tools)}")
+        print(f"  {result['tools_exercised']}")
+        if result.get("replay", {}).get("path"):
+            print(f"Replay: {result['replay']['path']}")
+        print("=" * 70)
+def main():
+    parser = argparse.ArgumentParser(description="MCP tool-based Red Alert bot")
+    parser.add_argument(
+        "--url",
+        default="http://localhost:8000",
+        help="OpenRA-RL server URL (default: http://localhost:8000)",
+    )
+    parser.add_argument(
+        "--max-turns",
+        type=int,
+        default=3000,
+        help="Maximum turns before stopping (default: 3000)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print detailed bot decisions",
+    )
+    parser.add_argument(
+        "--no-planning",
+        action="store_true",
+        help="Disable planning phase (for comparison runs)",
+    )
+    args = parser.parse_args()
+    try:
+        asyncio.run(run_mcp_bot(args.url, args.max_turns, args.verbose, no_planning=args.no_planning))
+    except KeyboardInterrupt:
+        print("\nInterrupted by user")
+        sys.exit(0)
+    except ConnectionRefusedError:
+        print(f"\nCould not connect to {args.url}")
+        print("Is the OpenRA-RL server running?")
+        print("  docker run -p 8000:8000 openra-rl")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

examples/scripted_bot.py ADDED Viewed

	@@ -0,0 +1,831 @@

+#!/usr/bin/env python3
+"""Scripted Red Alert bot that plays a full game via the OpenEnv client API.
+Exercises ALL Sprint 4+5 observation fields and action types:
+  - Observations: spatial_map, visible_enemy_buildings, unit facing/stance/speed/
+    attack_range/experience/passengers, building cell coords/can_produce/power/
+    rally/repair/sell_value
+  - Actions: all 20 types including GUARD, SET_STANCE, ENTER_TRANSPORT, UNLOAD,
+    SET_RALLY_POINT, REPAIR, SELL, POWER_DOWN, SET_PRIMARY
+Usage:
+    docker run -p 8000:8000 openra-rl
+    python examples/scripted_bot.py --verbose
+"""
+import argparse
+import asyncio
+import base64
+import sys
+from typing import List, Optional, Tuple
+from openra_env.client import OpenRAEnv
+from openra_env.models import (
+    ActionType,
+    BuildingInfoModel,
+    CommandModel,
+    OpenRAAction,
+    OpenRAObservation,
+    UnitInfoModel,
+)
+# Stance constants matching C# AutoTarget.UnitStance enum
+STANCE_HOLD_FIRE = 0
+STANCE_RETURN_FIRE = 1
+STANCE_DEFEND = 2
+STANCE_ATTACK_ANYTHING = 3
+STANCE_NAMES = {0: "HoldFire", 1: "ReturnFire", 2: "Defend", 3: "AttackAnything"}
+class ScriptedBot:
+    """State-machine bot with a Red Alert build order exercising all actions.
+    Phases:
+        deploy_mcv   - Deploy MCV, set stance on starting units
+        build_base   - Build power/barracks/war factory, set rally points
+        train_army   - Train infantry + APC, guard CY, load transport
+        attack       - Attack-move toward enemy buildings, unload APC
+        sustain      - Continuous production, repair, sell damaged buildings
+    """
+    # Build order uses both faction names — bot picks whichever is available
+    BARRACKS_TYPES = {"tent", "barr"}  # Allied / Soviet
+    WAR_FACTORY_TYPES = {"weap"}
+    BUILD_PRIORITY = [
+        "powr",       # Power Plant ($300) — shared
+        "barracks",   # Placeholder: tent (Allied) or barr (Soviet)
+        "proc",       # Ore Refinery ($2000) — needed before war factory
+        "weap",       # War Factory ($2000) — shared
+        "powr",       # Second Power Plant
+    ]
+    INFANTRY_TRAIN_TARGET = 6
+    GUARD_COUNT = 2  # infantry to guard CY
+    TRANSPORT_TYPE = "apc"
+    COMBAT_UNIT_TYPES = {"e1", "e2", "e3", "e4", "1tnk", "2tnk", "3tnk", "arty", "jeep", "apc"}
+    INFANTRY_TYPES = {"e1", "e2", "e3", "e4"}
+    VEHICLE_TYPES = {"1tnk", "2tnk", "3tnk", "arty", "jeep"}
+    def __init__(self, verbose: bool = False):
+        self.phase = "deploy_mcv"
+        self.build_index = 0
+        self.placement_count = 0
+        self.deploy_issued = False
+        self.verbose = verbose
+        self._guards_assigned: set[int] = set()  # actor IDs guarding CY
+        self._stances_set: set[int] = set()  # actor IDs with stance already set
+        self._rally_set: set[int] = set()  # building actor IDs with rally point set
+        self._apc_trained = False
+        self._apc_loaded = False
+        self._repair_issued: set[int] = set()  # building actor IDs being repaired
+        self._sold: set[int] = set()  # building actor IDs sold
+        self._powered_down: set[int] = set()  # building actor IDs powered down
+        self._primary_set: set[int] = set()  # building actor IDs set as primary
+    def decide(self, obs: OpenRAObservation) -> OpenRAAction:
+        """Given current observation, return commands for this tick."""
+        commands: List[CommandModel] = []
+        self._update_phase(obs)
+        # Priority 1: Place completed buildings
+        commands.extend(self._handle_placement(obs))
+        # Priority 2: Deploy MCV
+        if self.phase == "deploy_mcv":
+            cmd = self._handle_deploy(obs)
+            if cmd:
+                commands.append(cmd)
+        # Priority 3: Set rally points on production buildings
+        commands.extend(self._handle_rally_points(obs))
+        # Priority 4: Power management (power down buildings if power negative)
+        commands.extend(self._handle_power_management(obs))
+        # Priority 5: Set primary production buildings
+        commands.extend(self._handle_set_primary(obs))
+        # Priority 6: Repair damaged buildings
+        commands.extend(self._handle_repairs(obs))
+        # Priority 7: Queue production (buildings + units)
+        commands.extend(self._handle_production(obs))
+        # Priority 8: Set stances on new units
+        commands.extend(self._handle_stances(obs))
+        # Priority 9: Assign guards to CY
+        commands.extend(self._handle_guards(obs))
+        # Priority 10: Load infantry into APC
+        commands.extend(self._handle_transport(obs))
+        # Priority 11: Combat — attack + unload
+        commands.extend(self._handle_combat(obs))
+        # Priority 12: Sell heavily damaged buildings
+        commands.extend(self._handle_sell(obs))
+        if not commands:
+            commands.append(CommandModel(action=ActionType.NO_OP))
+        return OpenRAAction(commands=commands)
+    # ── Phase transitions ──────��───────────────────────────────────
+    def _update_phase(self, obs: OpenRAObservation):
+        has_cy = any(b.type == "fact" for b in obs.buildings)
+        has_barracks = any(b.type in self.BARRACKS_TYPES for b in obs.buildings)
+        combat_units = [u for u in obs.units if u.type in self.COMBAT_UNIT_TYPES]
+        non_guard_combat = [u for u in combat_units if u.actor_id not in self._guards_assigned]
+        if self.phase == "deploy_mcv" and has_cy:
+            self.phase = "build_base"
+            self._log("Phase → build_base")
+        elif self.phase == "build_base" and self.build_index >= len(self.BUILD_PRIORITY):
+            self.phase = "train_army"
+            self._log("Phase → train_army")
+        elif self.phase == "train_army" and len(non_guard_combat) >= self.INFANTRY_TRAIN_TARGET:
+            self.phase = "attack"
+            self._log(f"Phase → attack ({len(non_guard_combat)} combat units ready)")
+        elif self.phase == "attack" and has_barracks:
+            # Stay in attack but also sustain production
+            pass
+    # ── Deploy MCV ─────────────────────────────────────────────────
+    def _handle_deploy(self, obs: OpenRAObservation) -> Optional[CommandModel]:
+        if self.deploy_issued:
+            return None
+        mcv = next((u for u in obs.units if u.type == "mcv"), None)
+        if mcv:
+            self.deploy_issued = True
+            self._log(f"Deploying MCV (actor {mcv.actor_id}, facing={mcv.facing})")
+            return CommandModel(action=ActionType.DEPLOY, actor_id=mcv.actor_id)
+        return None
+    # ── Building placement ─────────────────────────────────────────
+    def _handle_placement(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        cy = self._find_building(obs, "fact")
+        if not cy:
+            return commands
+        for prod in obs.production:
+            if prod.queue_type == "Building" and prod.progress >= 0.99:
+                x, y = self._placement_offset(cy)
+                self._log(f"Placing {prod.item} at cell ({x}, {y}) [attempt {self.placement_count}]")
+                commands.append(CommandModel(
+                    action=ActionType.PLACE_BUILDING,
+                    item_type=prod.item,
+                    target_x=x,
+                    target_y=y,
+                ))
+                self.placement_count += 1
+        return commands
+    def _placement_offset(self, cy: BuildingInfoModel) -> Tuple[int, int]:
+        """Calculate placement position relative to CY using cell coords."""
+        # Use pos_x // 1024 as CenterPosition maps to cell more reliably
+        cx = cy.pos_x // 1024
+        cy_y = cy.pos_y // 1024
+        # Many offsets to maximize chance of finding valid terrain
+        offsets = [
+            (3, 0), (-3, 0), (0, 3), (0, -3),
+            (3, 3), (-3, 3), (3, -3), (-3, -3),
+            (6, 0), (-6, 0), (0, 6), (0, -6),
+            (2, 0), (-2, 0), (0, 2), (0, -2),
+            (4, 0), (-4, 0), (0, 4), (0, -4),
+        ]
+        idx = self.placement_count % len(offsets)
+        dx, dy = offsets[idx]
+        return cx + dx, cy_y + dy
+    # ── Rally points (Sprint 4 action) ─────────────────────────────
+    def _handle_rally_points(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        cy = self._find_building(obs, "fact")
+        if not cy:
+            return commands
+        # Set rally point on barracks and war factory toward CY
+        for b in obs.buildings:
+            if b.type in ("tent", "weap") and b.actor_id not in self._rally_set:
+                rally_x = cy.cell_x if cy.cell_x > 0 else cy.pos_x // 1024
+                rally_y = cy.cell_y if cy.cell_y > 0 else cy.pos_y // 1024
+                self._log(f"Setting rally on {b.type} (actor {b.actor_id}) → ({rally_x}, {rally_y})")
+                commands.append(CommandModel(
+                    action=ActionType.SET_RALLY_POINT,
+                    actor_id=b.actor_id,
+                    target_x=rally_x,
+                    target_y=rally_y,
+                ))
+                self._rally_set.add(b.actor_id)
+        return commands
+    # ── Power management (Sprint 5 action) ─────────────────────────
+    def _handle_power_management(self, obs: OpenRAObservation) -> List[CommandModel]:
+        """Power down non-essential buildings when power balance is negative."""
+        commands = []
+        power_balance = obs.economy.power_provided - obs.economy.power_drained
+        if power_balance >= 0:
+            return commands
+        # Power down radar/tech buildings first (keep production running)
+        POWER_DOWN_PRIORITY = ["dome", "spen", "syrd", "hpad", "afld", "fix"]
+        for btype in POWER_DOWN_PRIORITY:
+            for b in obs.buildings:
+                if b.type == btype and b.is_powered and b.actor_id not in self._powered_down:
+                    commands.append(CommandModel(action=ActionType.POWER_DOWN, actor_id=b.actor_id))
+                    self._powered_down.add(b.actor_id)
+                    self._log(f"Powering down {b.type} (actor {b.actor_id}) — power balance: {power_balance}")
+                    return commands  # one at a time
+        return commands
+    # ── Set primary building (Sprint 5 action) ───────────────────
+    def _handle_set_primary(self, obs: OpenRAObservation) -> List[CommandModel]:
+        """Set primary on newest production building of each type."""
+        commands = []
+        for btype_set in [self.BARRACKS_TYPES, self.WAR_FACTORY_TYPES]:
+            buildings_of_type = [b for b in obs.buildings if b.type in btype_set]
+            if len(buildings_of_type) >= 2:
+                newest = max(buildings_of_type, key=lambda b: b.actor_id)
+                if newest.actor_id not in self._primary_set:
+                    commands.append(CommandModel(action=ActionType.SET_PRIMARY, actor_id=newest.actor_id))
+                    self._primary_set.add(newest.actor_id)
+                    self._log(f"Setting primary: {newest.type} (actor {newest.actor_id})")
+        return commands
+    # ── Repair damaged buildings (Sprint 4 observation + existing action) ──
+    def _handle_repairs(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        for b in obs.buildings:
+            if (b.hp_percent < 0.7
+                    and not b.is_repairing
+                    and b.actor_id not in self._repair_issued
+                    and obs.economy.cash >= 500):
+                self._log(f"Repairing {b.type} (actor {b.actor_id}, hp={b.hp_percent:.0%})")
+                commands.append(CommandModel(
+                    action=ActionType.REPAIR,
+                    actor_id=b.actor_id,
+                ))
+                self._repair_issued.add(b.actor_id)
+        return commands
+    # ── Production ─────────────────────────────────────────────────
+    def _handle_production(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        # Building construction — treat any Building queue item as "in progress"
+        # (includes completed-but-unplaced buildings that block the queue)
+        building_in_queue = any(
+            p.queue_type == "Building"
+            for p in obs.production
+        )
+        if not building_in_queue and self.build_index < len(self.BUILD_PRIORITY):
+            item_type = self._resolve_build_item(obs, self.BUILD_PRIORITY[self.build_index])
+            if item_type is None:
+                # Can't resolve this item yet, skip
+                pass
+            elif self._has_building_type(obs, item_type, self.build_index):
+                self.build_index += 1
+            elif self._can_produce_item(obs, item_type):
+                self._log(f"Building {item_type} (#{self.build_index + 1}/{len(self.BUILD_PRIORITY)})")
+                commands.append(CommandModel(action=ActionType.BUILD, item_type=item_type))
+                self.build_index += 1
+        # Infantry training
+        has_barracks = any(b.type in self.BARRACKS_TYPES for b in obs.buildings)
+        infantry_training = any(
+            p.queue_type == "Infantry" and p.progress < 0.99
+            for p in obs.production
+        )
+        infantry = [u for u in obs.units if u.type in self.INFANTRY_TYPES]
+        total_target = self.INFANTRY_TRAIN_TARGET + self.GUARD_COUNT
+        if has_barracks and not infantry_training and len(infantry) < total_target:
+            if self._can_produce_item(obs, "e1") and obs.economy.cash >= 100:
+                self._log(f"Training e1 ({len(infantry)}/{total_target})")
+                commands.append(CommandModel(action=ActionType.TRAIN, item_type="e1"))
+        # APC from war factory
+        has_weap = any(b.type == "weap" for b in obs.buildings)
+        vehicle_training = any(
+            p.queue_type == "Vehicle" and p.progress < 0.99
+            for p in obs.production
+        )
+        if (has_weap and not vehicle_training and not self._apc_trained
+                and self._can_produce_item(obs, self.TRANSPORT_TYPE)
+                and obs.economy.cash >= 800):
+            self._log("Training APC for transport ops")
+            commands.append(CommandModel(action=ActionType.TRAIN, item_type=self.TRANSPORT_TYPE))
+            self._apc_trained = True
+        # Continuous vehicle production in attack phase
+        if (self.phase == "attack" and has_weap and not vehicle_training
+                and obs.economy.cash >= 800):
+            # Build light tanks if available
+            if self._can_produce_item(obs, "1tnk"):
+                self._log("Training 1tnk (continuous production)")
+                commands.append(CommandModel(action=ActionType.TRAIN, item_type="1tnk"))
+        return commands
+    def _can_produce_item(self, obs: OpenRAObservation, item_type: str) -> bool:
+        """Check if item is buildable using per-building can_produce (Sprint 4)."""
+        # First check global available_production
+        if item_type in obs.available_production:
+            return True
+        # Also check per-building can_produce lists
+        for b in obs.buildings:
+            if item_type in b.can_produce:
+                return True
+        return False
+    # ── Stances (Sprint 4 action) ──────────────────────────────────
+    def _handle_stances(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        for u in obs.units:
+            if u.actor_id in self._stances_set:
+                continue
+            if u.type not in self.COMBAT_UNIT_TYPES:
+                continue
+            # Guards get Defend stance, attackers get AttackAnything
+            if u.actor_id in self._guards_assigned:
+                desired = STANCE_DEFEND
+            else:
+                desired = STANCE_ATTACK_ANYTHING
+            if u.stance != desired:
+                self._log(
+                    f"Setting {u.type} (actor {u.actor_id}) stance: "
+                    f"{STANCE_NAMES.get(u.stance, '?')} → {STANCE_NAMES[desired]}"
+                )
+                commands.append(CommandModel(
+                    action=ActionType.SET_STANCE,
+                    actor_id=u.actor_id,
+                    target_x=desired,
+                ))
+            self._stances_set.add(u.actor_id)
+        return commands
+    # ── Guard CY (Sprint 4 action) ────────────────────────────────
+    def _handle_guards(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        if len(self._guards_assigned) >= self.GUARD_COUNT:
+            return commands
+        cy = self._find_building(obs, "fact")
+        if not cy:
+            return commands
+        # Find idle infantry not yet guarding
+        for u in obs.units:
+            if len(self._guards_assigned) >= self.GUARD_COUNT:
+                break
+            if (u.type in self.INFANTRY_TYPES
+                    and u.is_idle
+                    and u.actor_id not in self._guards_assigned):
+                self._log(
+                    f"Assigning {u.type} (actor {u.actor_id}, "
+                    f"range={u.attack_range}) to guard CY"
+                )
+                commands.append(CommandModel(
+                    action=ActionType.GUARD,
+                    actor_id=u.actor_id,
+                    target_actor_id=cy.actor_id,
+                ))
+                self._guards_assigned.add(u.actor_id)
+        return commands
+    # ── Transport: load/unload (Sprint 4 actions) ─────────────────
+    def _handle_transport(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        if self._apc_loaded:
+            return commands
+        apc = next(
+            (u for u in obs.units
+             if u.type == self.TRANSPORT_TYPE and u.passenger_count == 0),
+            None,
+        )
+        if not apc:
+            return commands
+        # Load idle infantry (not guards) into the APC
+        loaded = 0
+        for u in obs.units:
+            if loaded >= 4:  # APC capacity
+                break
+            if (u.type in self.INFANTRY_TYPES
+                    and u.is_idle
+                    and u.actor_id not in self._guards_assigned):
+                self._log(
+                    f"Loading {u.type} (actor {u.actor_id}, "
+                    f"speed={u.speed}) into APC {apc.actor_id}"
+                )
+                commands.append(CommandModel(
+                    action=ActionType.ENTER_TRANSPORT,
+                    actor_id=u.actor_id,
+                    target_actor_id=apc.actor_id,
+                ))
+                loaded += 1
+        if loaded > 0:
+            self._apc_loaded = True
+        return commands
+    # ── Combat ─────────────────────────────────────────────────────
+    def _handle_combat(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        if self.phase != "attack":
+            return commands
+        # Unload APC near enemy
+        commands.extend(self._handle_unload(obs))
+        # Attack-move idle fighters toward enemy
+        idle_fighters = [
+            u for u in obs.units
+            if (u.type in self.COMBAT_UNIT_TYPES
+                and u.is_idle
+                and u.actor_id not in self._guards_assigned)
+        ]
+        if len(idle_fighters) < 2:
+            return commands
+        target_x, target_y = self._find_attack_target(obs)
+        for unit in idle_fighters:
+            commands.append(CommandModel(
+                action=ActionType.ATTACK_MOVE,
+                actor_id=unit.actor_id,
+                target_x=target_x,
+                target_y=target_y,
+            ))
+        if idle_fighters:
+            self._log(
+                f"Attacking with {len(idle_fighters)} units "
+                f"toward ({target_x}, {target_y})"
+            )
+        return commands
+    def _handle_unload(self, obs: OpenRAObservation) -> List[CommandModel]:
+        """Unload APC when near enemies."""
+        commands = []
+        for u in obs.units:
+            if u.type != self.TRANSPORT_TYPE or u.passenger_count <= 0:
+                continue
+            # Check if any enemy is within ~15 cells
+            for enemy in obs.visible_enemies:
+                dx = abs(u.cell_x - enemy.cell_x)
+                dy = abs(u.cell_y - enemy.cell_y)
+                if dx + dy < 15:
+                    self._log(
+                        f"Unloading APC (actor {u.actor_id}, "
+                        f"{u.passenger_count} passengers) near enemy"
+                    )
+                    commands.append(CommandModel(
+                        action=ActionType.UNLOAD,
+                        actor_id=u.actor_id,
+                    ))
+                    break
+            # Also unload near enemy buildings
+            for eb in obs.visible_enemy_buildings:
+                dx = abs(u.cell_x - eb.cell_x)
+                dy = abs(u.cell_y - eb.cell_y)
+                if dx + dy < 15:
+                    self._log(
+                        f"Unloading APC near enemy building {eb.type} "
+                        f"(hp={eb.hp_percent:.0%})"
+                    )
+                    commands.append(CommandModel(
+                        action=ActionType.UNLOAD,
+                        actor_id=u.actor_id,
+                    ))
+                    break
+        return commands
+    def _find_attack_target(self, obs: OpenRAObservation) -> Tuple[int, int]:
+        """Prioritize enemy buildings > enemy units > map center."""
+        # Priority 1: visible enemy buildings (Sprint 4 field)
+        if obs.visible_enemy_buildings:
+            # Prefer production buildings
+            prod_buildings = [
+                b for b in obs.visible_enemy_buildings
+                if b.type in ("fact", "tent", "weap", "hpad", "afld")
+            ]
+            target = prod_buildings[0] if prod_buildings else obs.visible_enemy_buildings[0]
+            return target.cell_x, target.cell_y
+        # Priority 2: visible enemy units
+        if obs.visible_enemies:
+            enemy = obs.visible_enemies[0]
+            return enemy.cell_x, enemy.cell_y
+        # Fallback: map center
+        if obs.map_info.width > 0:
+            return obs.map_info.width // 2, obs.map_info.height // 2
+        return 64, 64
+    # ── Sell heavily damaged buildings ─────────────────────────────
+    def _handle_sell(self, obs: OpenRAObservation) -> List[CommandModel]:
+        commands = []
+        for b in obs.buildings:
+            if (b.hp_percent < 0.2
+                    and b.type != "fact"  # never sell CY
+                    and b.actor_id not in self._sold):
+                self._log(
+                    f"Selling {b.type} (actor {b.actor_id}, hp={b.hp_percent:.0%}, "
+                    f"refund=${b.sell_value})"
+                )
+                commands.append(CommandModel(
+                    action=ActionType.SELL,
+                    actor_id=b.actor_id,
+                ))
+                self._sold.add(b.actor_id)
+        return commands
+    # ── Helpers ────────────────────────────────────────────────────
+    def _resolve_build_item(self, obs: OpenRAObservation, placeholder: str) -> Optional[str]:
+        """Resolve faction-agnostic build item to actual producible type."""
+        if placeholder == "barracks":
+            # Find which barracks type is available
+            for btype in self.BARRACKS_TYPES:
+                if self._can_produce_item(obs, btype):
+                    return btype
+            return None
+        return placeholder
+    def _has_building_type(self, obs: OpenRAObservation, item_type: str, build_index: int) -> bool:
+        """Check if we already have enough of this building type."""
+        already_built = sum(1 for b in obs.buildings if b.type == item_type)
+        # Count how many times this item appears up to current index
+        resolved_order = []
+        for i, p in enumerate(self.BUILD_PRIORITY[:build_index + 1]):
+            if p == "barracks":
+                resolved_order.append(item_type if item_type in self.BARRACKS_TYPES else p)
+            else:
+                resolved_order.append(p)
+        target_count = resolved_order.count(item_type)
+        return already_built >= target_count
+    def _find_building(self, obs: OpenRAObservation, btype: str) -> Optional[BuildingInfoModel]:
+        return next((b for b in obs.buildings if b.type == btype), None)
+    def _log(self, msg: str):
+        if self.verbose:
+            print(f"  [Bot] {msg}")
+# ── Status display ─────────────────────────────────────────────────
+def print_status(step: int, obs: OpenRAObservation, bot: ScriptedBot):
+    """Print a rich status line using Sprint 4 observation fields."""
+    combat = [u for u in obs.units if u.type in bot.COMBAT_UNIT_TYPES]
+    buildings = ", ".join(sorted(set(b.type for b in obs.buildings))) or "none"
+    power_balance = obs.economy.power_provided - obs.economy.power_drained
+    # Count enemy intel
+    enemy_units = len(obs.visible_enemies)
+    enemy_buildings = len(obs.visible_enemy_buildings)
+    print(
+        f"Step {step:4d} | Tick {obs.tick:5d} | "
+        f"${obs.economy.cash:5d} | Pwr:{power_balance:+d} | "
+        f"Units:{len(obs.units)} (combat:{len(combat)}) | "
+        f"Enemy:{enemy_units}u/{enemy_buildings}b | "
+        f"Bldgs:[{buildings}] | {bot.phase}"
+    )
+def print_detailed_status(obs: OpenRAObservation):
+    """Print full observation details using all Sprint 4 fields."""
+    print("\n── Detailed Observation ──")
+    # Spatial map
+    if obs.spatial_channels > 0 and obs.spatial_map:
+        raw_bytes = base64.b64decode(obs.spatial_map)
+        w, h = obs.map_info.width, obs.map_info.height
+        expected_bytes = w * h * obs.spatial_channels * 4
+        print(
+            f"  Spatial: {w}x{h} map, {obs.spatial_channels} channels, "
+            f"{len(raw_bytes)} bytes (expected {expected_bytes})"
+        )
+    else:
+        print("  Spatial: not populated")
+    # Economy
+    e = obs.economy
+    print(
+        f"  Economy: ${e.cash} cash, {e.ore} ore, "
+        f"power {e.power_provided}/{e.power_drained} "
+        f"({e.power_provided - e.power_drained:+d}), "
+        f"{e.harvester_count} harvesters"
+    )
+    # Production queue
+    if obs.production:
+        print(f"  Production queue ({len(obs.production)}):")
+        for p in obs.production:
+            print(f"    {p.queue_type}: {p.item} @ {p.progress:.0%} (paused={p.paused})")
+    if obs.available_production:
+        print(f"  Available production: {', '.join(obs.available_production[:15])}")
+    else:
+        print("  Available production: (none)")
+    # Own buildings with Sprint 4 fields
+    print(f"  Buildings ({len(obs.buildings)}):")
+    for b in obs.buildings:
+        extras = []
+        if b.power_amount != 0:
+            extras.append(f"pwr={b.power_amount:+d}")
+        if b.is_producing:
+            extras.append(f"producing={b.producing_item}@{b.production_progress:.0%}")
+        if b.is_repairing:
+            extras.append("REPAIRING")
+        if b.rally_x >= 0:
+            extras.append(f"rally=({b.rally_x},{b.rally_y})")
+        if b.can_produce:
+            extras.append(f"can_produce=[{','.join(b.can_produce[:5])}{'...' if len(b.can_produce) > 5 else ''}]")
+        extra_str = f" ({', '.join(extras)})" if extras else ""
+        print(
+            f"    {b.type:6s} #{b.actor_id:4d} "
+            f"cell=({b.cell_x},{b.cell_y}) "
+            f"hp={b.hp_percent:.0%} "
+            f"sell=${b.sell_value}{extra_str}"
+        )
+    # Own units with Sprint 4 fields
+    print(f"  Units ({len(obs.units)}):")
+    for u in obs.units[:10]:  # cap at 10 for readability
+        stance_name = STANCE_NAMES.get(u.stance, f"?{u.stance}")
+        extras = []
+        if u.experience_level > 0:
+            extras.append(f"vet={u.experience_level}")
+        if u.passenger_count >= 0:
+            extras.append(f"cargo={u.passenger_count}")
+        extra_str = f" ({', '.join(extras)})" if extras else ""
+        print(
+            f"    {u.type:6s} #{u.actor_id:4d} "
+            f"cell=({u.cell_x},{u.cell_y}) "
+            f"hp={u.hp_percent:.0%} "
+            f"face={u.facing:4d} spd={u.speed:3d} "
+            f"rng={u.attack_range:5d} "
+            f"stance={stance_name} "
+            f"{'IDLE' if u.is_idle else u.current_activity}{extra_str}"
+        )
+    if len(obs.units) > 10:
+        print(f"    ... and {len(obs.units) - 10} more")
+    # Visible enemies
+    if obs.visible_enemies:
+        print(f"  Visible enemy units ({len(obs.visible_enemies)}):")
+        for u in obs.visible_enemies[:5]:
+            print(
+                f"    {u.type:6s} #{u.actor_id:4d} "
+                f"cell=({u.cell_x},{u.cell_y}) hp={u.hp_percent:.0%} "
+                f"spd={u.speed} rng={u.attack_range}"
+            )
+    # Visible enemy buildings (Sprint 4 field)
+    if obs.visible_enemy_buildings:
+        print(f"  Visible enemy buildings ({len(obs.visible_enemy_buildings)}):")
+        for b in obs.visible_enemy_buildings[:5]:
+            print(
+                f"    {b.type:6s} #{b.actor_id:4d} "
+                f"cell=({b.cell_x},{b.cell_y}) hp={b.hp_percent:.0%} "
+                f"pwr={b.power_amount:+d}"
+            )
+# ── Main loop ──────────────────────────────────────────────────────
+async def run_bot(url: str, max_steps: int, verbose: bool):
+    """Connect to the OpenRA-RL server and play one full game."""
+    print(f"Connecting to {url}...")
+    bot = ScriptedBot(verbose=verbose)
+    async with OpenRAEnv(base_url=url, message_timeout_s=300.0) as env:
+        print("Resetting environment...")
+        result = await env.reset()
+        obs = result.observation
+        print(f"Game started! Map: {obs.map_info.map_name} ({obs.map_info.width}x{obs.map_info.height})")
+        # Print initial detailed status
+        if verbose:
+            print_detailed_status(obs)
+        print_status(0, obs, bot)
+        step = 0
+        total_reward = 0.0
+        while not result.done and step < max_steps:
+            action = bot.decide(result.observation)
+            result = await env.step(action)
+            step += 1
+            total_reward += result.reward or 0.0
+            obs = result.observation
+            if step % 100 == 0:
+                print_status(step, obs, bot)
+            # Detailed dump at key milestones
+            if verbose and step in (50, 200, 500, 1000):
+                print_detailed_status(obs)
+        # Final report
+        print()
+        print("=" * 70)
+        obs = result.observation
+        if obs.done:
+            print(f"GAME OVER: {obs.result.upper()} after {step} steps (tick {obs.tick})")
+        else:
+            print(f"Reached max steps ({max_steps}) at tick {obs.tick}")
+        print(f"Total reward:        {total_reward:.3f}")
+        print(f"Final cash:          ${obs.economy.cash}")
+        print(f"Power balance:       {obs.economy.power_provided - obs.economy.power_drained:+d}")
+        print(f"Units killed:        {obs.military.units_killed}")
+        print(f"Units lost:          {obs.military.units_lost}")
+        print(f"Buildings killed:    {obs.military.buildings_killed}")
+        print(f"Buildings lost:      {obs.military.buildings_lost}")
+        print(f"Army value:          ${obs.military.army_value}")
+        print(f"Own buildings:       {len(obs.buildings)}")
+        print(f"Visible enemies:     {len(obs.visible_enemies)} units, {len(obs.visible_enemy_buildings)} buildings")
+        # Spatial map stats
+        if obs.spatial_channels > 0 and obs.spatial_map:
+            raw_bytes = base64.b64decode(obs.spatial_map)
+            n_floats = len(raw_bytes) // 4
+            print(f"Spatial map:         {n_floats} floats ({obs.spatial_channels} channels)")
+        else:
+            print("Spatial map:         not populated")
+        # Show veteran units
+        vets = [u for u in obs.units if u.experience_level > 0]
+        if vets:
+            print(f"Veterans:            {', '.join(f'{u.type}#{u.actor_id}(lvl{u.experience_level})' for u in vets)}")
+        if verbose:
+            print_detailed_status(obs)
+        print("=" * 70)
+def main():
+    parser = argparse.ArgumentParser(description="Scripted Red Alert bot via OpenEnv")
+    parser.add_argument(
+        "--url",
+        default="http://localhost:8000",
+        help="OpenRA-RL server URL (default: http://localhost:8000)",
+    )
+    parser.add_argument(
+        "--max-steps",
+        type=int,
+        default=5000,
+        help="Maximum steps before stopping (default: 5000)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print detailed bot decisions and observation dumps",
+    )
+    args = parser.parse_args()
+    try:
+        asyncio.run(run_bot(args.url, args.max_steps, args.verbose))
+    except KeyboardInterrupt:
+        print("\nInterrupted by user")
+        sys.exit(0)
+    except ConnectionRefusedError:
+        print(f"\nCould not connect to {args.url}")
+        print("Is the OpenRA-RL server running?")
+        print("  docker run -p 8000:8000 openra-rl")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

models.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""OpenEnv models re-export."""
+from openra_env.models import (  # noqa: F401
+    OpenRAAction,
+    OpenRAObservation,
+    OpenRAState,
+)

openenv.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+spec_version: 1
+name: openra_env
+type: space
+runtime: fastapi
+app: openra_env.server.app:app
+port: 8000

openra_env/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""OpenRA-RL: Reinforcement Learning Environment for the OpenRA RTS Engine."""
+from openra_env.client import OpenRAEnv
+from openra_env.models import OpenRAAction, OpenRAObservation, OpenRAState
+__all__ = ["OpenRAEnv", "OpenRAAction", "OpenRAObservation", "OpenRAState"]

openra_env/agent.py ADDED Viewed

	@@ -0,0 +1,1156 @@

+"""LLM agent that plays Red Alert using any OpenAI-compatible model.
+Supports OpenRouter, Ollama, LM Studio, or any local/remote endpoint
+that implements the OpenAI Chat Completions API with tool calling.
+"""
+import asyncio
+import json
+import logging
+import time
+from collections import defaultdict
+import httpx
+from openra_env.config import LLMConfig
+from openra_env.game_data import get_building_stats, get_faction_info, get_tech_tree, get_unit_stats
+from openra_env.mcp_ws_client import OpenRAMCPClient
+logger = logging.getLogger("llm_agent")
+def _looks_like_tool_capability_error(error_text: str) -> bool:
+    """Best-effort detection of provider errors indicating no tool support."""
+    text = error_text.lower()
+    # Only match phrases that unambiguously refer to tool-calling capability.
+    # "no endpoints found" is too generic on its own — guard it with "tool".
+    if "no endpoints found" in text and "tool" in text:
+        return True
+    markers = (
+        "support tool use",
+        "does not support tool",
+        "tool calling",
+        "tools are not supported",
+    )
+    return any(m in text for m in markers)
+def _bench_export_policy(encountered_agent_error: bool) -> tuple[bool, bool, str]:
+    """Decide whether bench export and upload should run for this match.
+    Returns:
+        (should_export, should_upload, reason)
+        Local export always happens (useful for debugging).
+        Upload is skipped when runtime errors occurred.
+    """
+    if encountered_agent_error:
+        return True, False, "runtime [ERROR] occurred during the match"
+    return True, True, ""
+def _format_llm_api_error(status_code: int, error_text: str, llm_config: LLMConfig) -> str:
+    """Map raw provider errors to clear, actionable runtime messages."""
+    error_lower = error_text.lower()
+    if status_code in (401, 403):
+        return (
+            f"Authentication failed ({status_code}). "
+            "Check your API key: openra-rl config"
+        )
+    if status_code == 400 and "model" in error_lower:
+        return (
+            f"Invalid model ID '{llm_config.model}'. "
+            "Update with: openra-rl config"
+        )
+    if status_code == 429:
+        return "Rate limited by LLM provider. Wait a minute and retry."
+    if status_code == 404 and _looks_like_tool_capability_error(error_text):
+        is_openrouter = "openrouter.ai" in llm_config.base_url.lower()
+        if is_openrouter:
+            return (
+                f"Model '{llm_config.model}' has no OpenRouter route that supports tool calling. "
+                "OpenRA-RL requires tool-calling models. "
+                "Use a tool-capable model/route (often not ':free'), or use Ollama "
+                "with qwen3:32b or qwen3:4b."
+            )
+        return (
+            f"Model '{llm_config.model}' does not support tool calling on this endpoint. "
+            "OpenRA-RL requires tool-calling models."
+        )
+    return f"LLM API error {status_code}: {error_text}"
+async def _preflight_tool_calling_support(llm_config: LLMConfig) -> tuple[bool, str]:
+    """Check OpenRouter model route support for tool calling before game start.
+    Returns:
+        (True, "") when preflight passes or does not apply.
+        (False, reason) when preflight confirms tools are unsupported.
+    """
+    if "openrouter.ai" not in llm_config.base_url.lower():
+        return True, ""
+    preflight_cfg = llm_config.model_copy(
+        update={
+            "max_tokens": 1,
+            "request_timeout_s": min(llm_config.request_timeout_s, 30.0),
+        }
+    )
+    preflight_messages = [
+        {"role": "user", "content": "Tool-calling preflight check. Reply briefly."},
+    ]
+    preflight_tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "preflight_ping",
+                "description": "Preflight-only tool for capability check.",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+    ]
+    try:
+        await chat_completion(preflight_messages, preflight_tools, preflight_cfg, verbose=False, prompts=None)
+        return True, ""
+    except RuntimeError as e:
+        msg = str(e)
+        if _looks_like_tool_capability_error(msg):
+            return False, msg
+        raise
+def _load_default_prompt() -> str:
+    """Load the default system prompt shipped with the package."""
+    from openra_env.prompts import load_default_prompt
+    return load_default_prompt()
+# Public constant for backward compatibility (lazy-loaded on first access)
+SYSTEM_PROMPT = _load_default_prompt()
+def load_system_prompt(config) -> str:
+    """Resolve system prompt from config: inline > file > default.
+    Priority:
+      1. config.prompts.system_prompt (inline string)
+      2. config.prompts.system_prompt_file (path to .txt file)
+      3. config.agent.system_prompt (deprecated, backward compat)
+      4. config.agent.system_prompt_file (deprecated, backward compat)
+      5. Built-in default (openra_env/prompts/default.txt)
+    """
+    from pathlib import Path
+    # Check prompts.* first (canonical location)
+    prompts_cfg = getattr(config, "prompts", None)
+    if prompts_cfg:
+        if getattr(prompts_cfg, "system_prompt", ""):
+            return prompts_cfg.system_prompt
+        prompt_file = getattr(prompts_cfg, "system_prompt_file", "")
+        if prompt_file:
+            p = Path(prompt_file).expanduser()
+            if p.is_file():
+                return p.read_text(encoding="utf-8").strip()
+            raise FileNotFoundError(f"system_prompt_file not found: {p}")
+    # Backward compat: check agent.* (deprecated)
+    agent_cfg = config.agent if hasattr(config, "agent") else config
+    if getattr(agent_cfg, "system_prompt", ""):
+        return agent_cfg.system_prompt
+    prompt_file = getattr(agent_cfg, "system_prompt_file", "")
+    if prompt_file:
+        p = Path(prompt_file).expanduser()
+        if p.is_file():
+            return p.read_text(encoding="utf-8").strip()
+        raise FileNotFoundError(f"system_prompt_file not found: {p}")
+    # Default
+    return SYSTEM_PROMPT
+def compose_pregame_briefing(state: dict) -> str:
+    """Compose a strategic briefing from initial game state + static game data.
+    Sent once at game start so the LLM knows map, base position, faction, tech tree,
+    and available units/buildings without needing extra tool calls.
+    """
+    map_info = state.get("map", {})
+    map_w = map_info.get("width", 0)
+    map_h = map_info.get("height", 0)
+    map_name = map_info.get("map_name", "?")
+    # Determine base position from buildings/units
+    buildings = state.get("buildings_summary", [])
+    units = state.get("units_summary", [])
+    all_positions = [(b["cell_x"], b["cell_y"]) for b in buildings] + \
+                    [(u["cell_x"], u["cell_y"]) for u in units]
+    if all_positions:
+        base_x = sum(p[0] for p in all_positions) // len(all_positions)
+        base_y = sum(p[1] for p in all_positions) // len(all_positions)
+    else:
+        base_x, base_y = map_w // 2, map_h // 2
+    # Estimate enemy spawn — opposite side of map
+    enemy_x = max(2, min(map_w - 2, map_w - base_x))
+    enemy_y = max(2, min(map_h - 2, map_h - base_y))
+    # Determine faction and side
+    faction = state.get("faction", "")
+    allied_factions = {"england", "france", "germany"}
+    soviet_factions = {"russia", "ukraine"}
+    if faction in allied_factions:
+        side = "Allied"
+        barracks = "tent"
+    elif faction in soviet_factions:
+        side = "Soviet"
+        barracks = "barr"
+    else:
+        # Infer from available production or buildings
+        avail = state.get("available_production", [])
+        bldg_types = state.get("building_types", [])
+        if "tent" in avail or "tent" in bldg_types:
+            side, barracks = "Allied", "tent"
+        else:
+            side, barracks = "Soviet", "barr"
+    # Get tech tree — returns {side: [order]} dict
+    tech = get_tech_tree(side.lower())
+    tech_order = tech.get(side.lower(), tech.get("build_order", []))
+    # Get faction info for available units/buildings
+    faction_info = get_faction_info(faction) if faction else get_faction_info(side.lower())
+    avail_units = faction_info.get("available_units", []) if faction_info else []
+    avail_buildings = faction_info.get("available_buildings", []) if faction_info else []
+    # Format key units with costs
+    unit_lines = []
+    for utype in avail_units[:12]:  # Cap at 12 to keep concise
+        stats = get_unit_stats(utype)
+        if stats:
+            unit_lines.append(f"  {utype}: {stats['name']} — ${stats['cost']}, {stats.get('category', '?')}")
+    # Format key buildings with costs and power
+    bldg_lines = []
+    for btype in avail_buildings[:10]:
+        stats = get_building_stats(btype)
+        if stats:
+            power = stats.get("power", 0)
+            power_str = f", {power:+d} power" if power else ""
+            bldg_lines.append(f"  {btype}: {stats['name']} — ${stats['cost']}{power_str}")
+    # Calculate defense direction
+    dx = enemy_x - base_x
+    dy = enemy_y - base_y
+    dir_parts = []
+    if dy < -map_h // 6:
+        dir_parts.append("North")
+    elif dy > map_h // 6:
+        dir_parts.append("South")
+    if dx > map_w // 6:
+        dir_parts.append("East")
+    elif dx < -map_w // 6:
+        dir_parts.append("West")
+    defense_direction = "".join(dir_parts) if dir_parts else "Center"
+    parts = [
+        "## Strategic Briefing",
+        f"Map: {map_name} ({map_w}x{map_h})",
+        f"Your faction: {faction or side} ({side})",
+        f"Your base: ({base_x}, {base_y})",
+        f"Enemy likely near: ({enemy_x}, {enemy_y})",
+        f"Enemy approach direction: {defense_direction}",
+        "",
+        f"Tech tree: {' → '.join(tech_order[:8])}{'...' if len(tech_order) > 8 else ''}",
+        f"Barracks type: {barracks}",
+        "",
+        "Available units:",
+        *unit_lines,
+        "",
+        "Available buildings:",
+        *bldg_lines,
+    ]
+    return "\n".join(parts)
+def format_state_briefing(state: dict) -> str:
+    """Format game state (from get_game_state tool) into a compact turn briefing with positions."""
+    if not isinstance(state, dict) or "tick" not in state:
+        return ""
+    eco = state.get("economy", {})
+    tick = state["tick"]
+    cash = eco.get("cash", 0)
+    ore = eco.get("ore", 0)
+    funds = cash + ore
+    parts = [
+        f"--- TURN BRIEFING (tick {tick}, ~{tick // 25}s game time) ---",
+        f"Funds: ${funds} (cash=${cash} + ore=${ore}) | Power: {state.get('power_balance', 0):+d} | Harvesters: {eco.get('harvester_count', 0)} | Explored: {state.get('explored_percent', 0)}%",
+    ]
+    # Minimap (ASCII spatial overview)
+    minimap = state.get("minimap", "")
+    if minimap:
+        parts.append(minimap)
+    # Base center from buildings
+    buildings = state.get("buildings_summary", [])
+    if buildings:
+        base_x = sum(b["cell_x"] for b in buildings) // len(buildings)
+        base_y = sum(b["cell_y"] for b in buildings) // len(buildings)
+        parts.append(f"Base center: ({base_x},{base_y})")
+    # Compact unit summary grouped by type, with IDs, positions, and activity
+    units = state.get("units_summary", [])
+    if units:
+        by_type = defaultdict(list)
+        idle_ids = []
+        for u in units:
+            by_type[u["type"]].append(u)
+            if u.get("idle") and u.get("can_attack"):
+                idle_ids.append(u["id"])
+        unit_parts = []
+        for utype, us in by_type.items():
+            entries = []
+            for u in us:
+                pos = f"{u['id']}@({u['cell_x']},{u['cell_y']})"
+                if u.get("target_x") is not None:
+                    pos += f"→({u['target_x']},{u['target_y']})"
+                elif not u.get("idle"):
+                    # Show short activity tag for non-idle units without tracked target
+                    act = u.get("activity", "")
+                    if act and act not in ("Idle", "Unknown", "Wait"):
+                        tag = act[:3].lower()
+                        pos += f"→{tag}"
+                entries.append(pos)
+            unit_parts.append(f"{len(us)}x{utype}[{','.join(entries)}]")
+        line = f"Units: {' '.join(unit_parts)}"
+        if idle_ids:
+            line += f" | Idle: [{','.join(str(i) for i in idle_ids)}]"
+        parts.append(line)
+    else:
+        parts.append(f"Units: {state.get('own_units', '?')}")
+    # Compact building summary with IDs, positions, and production category
+    _BLDG_CATEGORY = {"tent": "infantry", "barr": "infantry", "weap": "vehicle",
+                       "hpad": "aircraft", "afld": "aircraft", "syrd": "ship", "spen": "ship",
+                       "gun": "defense", "ftur": "defense", "tsla": "defense",
+                       "sam": "defense", "agun": "defense", "pbox": "defense", "hbox": "defense"}
+    if buildings:
+        bldg_parts = []
+        for b in buildings:
+            cat = _BLDG_CATEGORY.get(b["type"], "")
+            cat_str = f"[{cat}]" if cat else ""
+            bldg_parts.append(f"{b['type']}({b['id']})@({b['cell_x']},{b['cell_y']}){cat_str}")
+        parts.append(f"Buildings: {' '.join(bldg_parts)}")
+    else:
+        parts.append(f"Buildings: {state.get('own_buildings', '?')} ({', '.join(state.get('building_types', []))})")
+    # Enemy summary with IDs and positions (units + buildings)
+    enemies = state.get("enemy_summary", [])
+    enemy_bldgs = state.get("enemy_buildings_summary", [])
+    if enemies or enemy_bldgs:
+        enemy_parts = []
+        if enemies:
+            eby_type = defaultdict(list)
+            for e in enemies:
+                eby_type[e["type"]].append(e)
+            for etype, es in eby_type.items():
+                entries = ",".join(f"{e['id']}@({e['cell_x']},{e['cell_y']})" for e in es)
+                enemy_parts.append(f"{len(es)}x{etype}[{entries}]")
+        if enemy_bldgs:
+            ebby_type = defaultdict(list)
+            for b in enemy_bldgs:
+                ebby_type[b["type"]].append(b)
+            for btype, bs in ebby_type.items():
+                entries = ",".join(f"{b['id']}@({b['cell_x']},{b['cell_y']})" for b in bs)
+                enemy_parts.append(f"{len(bs)}x{btype}[{entries}]")
+        # Average position of all visible enemies
+        all_enemy_pos = (
+            [(e["cell_x"], e["cell_y"]) for e in enemies]
+            + [(b["cell_x"], b["cell_y"]) for b in enemy_bldgs]
+        )
+        avg_x = sum(p[0] for p in all_enemy_pos) // len(all_enemy_pos)
+        avg_y = sum(p[1] for p in all_enemy_pos) // len(all_enemy_pos)
+        parts.append(f"Enemies: {' '.join(enemy_parts)} center ({avg_x},{avg_y})")
+    else:
+        n_enemy = state.get("visible_enemy_units", 0)
+        parts.append(f"Enemies: {'none visible' if n_enemy == 0 else f'{n_enemy} visible'}")
+    prod = state.get("production_items", [])
+    if prod:
+        active = [p for p in prod if "@100%" not in p]
+        ready = [p.split("@")[0] for p in prod if "@100%" in p]
+        parts_prod = []
+        if active:
+            parts_prod.append(", ".join(active))
+        if ready:
+            parts_prod.append(f"READY TO PLACE: {', '.join(ready)}")
+        parts.append(f"Production: {' | '.join(parts_prod)}")
+    else:
+        parts.append("Production: IDLE")
+    available = state.get("available_production", [])
+    if available:
+        parts.append(f"Can build: {', '.join(available)}")
+    alerts = state.get("alerts", [])
+    if alerts:
+        parts.append("ALERTS:")
+        for a in alerts:
+            parts.append(f"  ** {a}")
+    parts.append("---")
+    if state.get("done"):
+        parts.append(f"GAME OVER: {state.get('result', '?')}")
+    return "\n".join(parts)
+def mcp_tools_to_openai(tools: list) -> list[dict]:
+    """Convert MCP Tool schemas to OpenAI function calling format."""
+    result = []
+    for tool in tools:
+        schema = tool.input_schema if hasattr(tool, 'input_schema') else {}
+        # Clean up schema — remove 'title' which confuses some models
+        params = dict(schema) if schema else {}
+        params.pop("title", None)
+        if "properties" not in params:
+            params["properties"] = {}
+            params["type"] = "object"
+        result.append({
+            "type": "function",
+            "function": {
+                "name": tool.name,
+                "description": tool.description or "",
+                "parameters": params,
+            },
+        })
+    return result
+def _sanitize_messages(messages: list[dict], prompts=None) -> list[dict]:
+    """Merge consecutive same-role messages for strict-alternation models (e.g. Mistral).
+    Some models require strict user/assistant alternation and reject sequences
+    like ``user → user`` or ``tool → user``.  This helper:
+    1. Merges consecutive ``user`` messages by joining their content with newlines.
+    2. Inserts a bridge ``assistant`` message when a ``tool`` result is followed
+       by a ``user`` message (Mistral requires tool → assistant → user).
+    """
+    if not messages:
+        return messages
+    bridge = prompts.sanitize_bridge if prompts else "Acknowledged. Continuing."
+    merged: list[dict] = [dict(messages[0])]
+    for msg in messages[1:]:
+        prev = merged[-1]
+        # Merge consecutive user messages
+        if msg["role"] == "user" and prev["role"] == "user":
+            merged[-1] = {**prev, "content": prev["content"] + "\n\n" + msg["content"]}
+            continue
+        # Bridge: tool → user needs an assistant message in between
+        if msg["role"] == "user" and prev["role"] == "tool":
+            merged.append({"role": "assistant", "content": bridge})
+        merged.append(msg)
+    return merged
+async def chat_completion(
+    messages: list[dict],
+    tools: list[dict],
+    llm_config: LLMConfig,
+    verbose: bool = False,
+    prompts=None,
+) -> dict:
+    """Call an OpenAI-compatible chat completions API.
+    Works with OpenRouter, Ollama, LM Studio, or any endpoint
+    implementing the OpenAI Chat Completions spec with tool calling.
+    """
+    clean_messages = _sanitize_messages(messages, prompts=prompts)
+    payload = {
+        "model": llm_config.model,
+        "messages": clean_messages,
+        "max_tokens": llm_config.max_tokens,
+    }
+    if tools:
+        payload["tools"] = tools
+        payload["tool_choice"] = "auto"
+    if llm_config.temperature is not None:
+        payload["temperature"] = llm_config.temperature
+    if llm_config.top_p is not None:
+        payload["top_p"] = llm_config.top_p
+    if llm_config.reasoning_effort is not None:
+        payload["reasoning"] = {"effort": llm_config.reasoning_effort}
+    headers = dict(llm_config.extra_headers)
+    if llm_config.api_key:
+        headers["Authorization"] = f"Bearer {llm_config.api_key}"
+    async with httpx.AsyncClient() as client:
+        if verbose:
+            n_msgs = len(clean_messages)
+            roles = [m.get("role", "?") for m in clean_messages]
+            print(f"  [LLM] Sending {n_msgs} messages to {llm_config.model}...")
+            print(f"  [LLM] Roles: {' → '.join(roles)}")
+        response = await client.post(
+            llm_config.base_url,
+            headers=headers,
+            json=payload,
+            timeout=llm_config.request_timeout_s,
+        )
+        if response.status_code != 200:
+            error_text = response.text[:2000]
+            raise RuntimeError(
+                _format_llm_api_error(response.status_code, error_text, llm_config)
+            )
+        try:
+            data = response.json()
+        except (json.JSONDecodeError, ValueError) as e:
+            raise RuntimeError(f"LLM API error 502: invalid JSON response ({e})")
+        if "error" in data:
+            raise RuntimeError(f"LLM API error 500: {data['error']}")
+        if verbose:
+            usage = data.get("usage", {})
+            print(
+                f"  [LLM] Response: {usage.get('prompt_tokens', '?')} prompt + "
+                f"{usage.get('completion_tokens', '?')} completion tokens"
+            )
+        return data
+def compress_history(messages: list[dict], keep_last: int = 40,
+                     trigger: int = 0, prompts=None, compression=None) -> list[dict]:
+    """Compress message history to stay within context limits.
+    Keeps the system prompt and the last ``keep_last`` messages, replacing
+    earlier messages with a state-aware summary that preserves critical
+    game context (buildings, economy, strategy, military, errors).
+    Args:
+        keep_last: Number of recent messages to keep after compression.
+        trigger: Compress when total messages exceed this threshold.
+            0 (default) means ``keep_last * 2``.
+        prompts: PromptsConfig for customizable text.
+        compression: CompressionConfig controlling what to include in summary.
+    """
+    threshold = trigger if trigger > 0 else keep_last * 2
+    if len(messages) <= threshold:
+        return messages
+    system = messages[0]
+    # Find a clean cut point: recent must not start with tool role
+    cut = len(messages) - keep_last
+    while cut < len(messages) and messages[cut].get("role") == "tool":
+        cut += 1  # move cut forward to skip orphaned tool results
+    if cut >= len(messages) - 2:
+        return messages  # can't compress safely
+    old_messages = messages[1:cut]
+    recent = messages[cut:]
+    # Compression config defaults
+    inc_strategy = compression.include_strategy if compression else True
+    inc_military = compression.include_military if compression else True
+    inc_production = compression.include_production if compression else True
+    # Extract game state context from old messages
+    last_state = {}
+    building_types = set()
+    unit_types_produced = set()
+    strategy_text = ""
+    errors = []
+    for msg in old_messages:
+        # Extract planning strategy from early user messages
+        if inc_strategy and msg.get("role") == "user" and not strategy_text:
+            content_str = msg.get("content", "")
+            if isinstance(content_str, str):
+                for line in content_str.split("\n"):
+                    if line.strip().startswith("Strategy:"):
+                        strategy_text = line.strip()
+                        break
+        if msg.get("role") != "tool":
+            continue
+        try:
+            content = json.loads(msg["content"]) if isinstance(msg["content"], str) else msg["content"]
+            if not isinstance(content, dict):
+                continue
+            # Track latest state snapshot
+            if "tick" in content and "economy" in content:
+                last_state = content
+            # Track buildings built
+            for bt in content.get("building_types", []):
+                building_types.add(bt)
+            # Track units produced (from build_unit notes)
+            if inc_production and "note" in content:
+                note = content["note"]
+                if isinstance(note, str) and "queued" in note:
+                    # Extract unit/building name from "'name' ... queued"
+                    import re
+                    m = re.search(r"'(\w+)'.*queued", note)
+                    if m:
+                        name = m.group(1)
+                        # Distinguish units from buildings
+                        if "per unit" in note or "each" in note:
+                            unit_types_produced.add(name)
+                        else:
+                            building_types.add(name)
+            # Track placement failures and errors
+            if content.get("placement_failed"):
+                errors.append("placement failed")
+            elif "error" in content and isinstance(content["error"], str):
+                err = content["error"]
+                if len(err) < 80:
+                    errors.append(err)
+        except (json.JSONDecodeError, TypeError):
+            pass
+    # Build summary
+    parts = [f"[History: {len(old_messages)} earlier messages removed]"]
+    if last_state:
+        eco = last_state.get("economy", {})
+        parts.append(
+            f"Last state at tick {last_state.get('tick', '?')}: "
+            f"${eco.get('cash', '?')} cash, "
+            f"{last_state.get('own_units', '?')} units, "
+            f"{last_state.get('own_buildings', '?')} buildings"
+        )
+    if inc_strategy and strategy_text:
+        parts.append(strategy_text)
+    if building_types:
+        parts.append(f"Buildings built: {', '.join(sorted(building_types))}")
+    if inc_production and unit_types_produced:
+        parts.append(f"Units produced: {', '.join(sorted(unit_types_produced))}")
+    if inc_military and last_state:
+        mil = last_state.get("military", {})
+        if mil:
+            parts.append(
+                f"Military: {mil.get('units_killed', 0)} kills, "
+                f"{mil.get('units_lost', 0)} losses"
+            )
+    if errors:
+        unique = list(dict.fromkeys(errors))[-3:]
+        parts.append(f"Recent issues: {'; '.join(unique)}")
+    suffix = prompts.compression_suffix if prompts else "Game continues from current state."
+    parts.append(suffix)
+    return [
+        system,
+        {"role": "user", "content": "\n".join(parts)},
+        *recent,
+    ]
+async def run_agent(config, verbose: bool = False):
+    """Connect to OpenRA-RL and play a game using an LLM agent."""
+    url = config.agent.server_url
+    llm_config = config.llm
+    max_turns = config.agent.max_turns
+    max_time = config.agent.max_time_s
+    # Auto-increase timeout for local models (they're slower than cloud APIs)
+    is_local = any(h in llm_config.base_url for h in ("localhost", "127.0.0.1"))
+    if is_local and llm_config.request_timeout_s <= 120.0:
+        llm_config = llm_config.model_copy(update={"request_timeout_s": 300.0})
+    print(f"Connecting to {url}...")
+    print(f"Model: {llm_config.model} @ {llm_config.base_url}")
+    if is_local:
+        print(f"Timeout: {int(llm_config.request_timeout_s)}s (local model)")
+    if "openrouter.ai" in llm_config.base_url.lower():
+        print("Checking model route for tool-calling support...")
+        try:
+            preflight_ok, preflight_err = await _preflight_tool_calling_support(llm_config)
+        except Exception as e:
+            print(f"  [ERROR] Preflight check failed: {e}")
+            print("  Aborting before game launch (no match started).")
+            return
+        if not preflight_ok:
+            print(f"  [ERROR] Preflight check failed: {preflight_err}")
+            print("  Aborting before game launch (no match started).")
+            return
+    async with OpenRAMCPClient(base_url=url, message_timeout_s=300.0) as env:
+        print("Resetting environment (launching OpenRA)...")
+        await env.reset()
+        # Discover and convert tools
+        mcp_tools = await env.list_tools()
+        openai_tools = mcp_tools_to_openai(mcp_tools)
+        tool_names = {t["function"]["name"] for t in openai_tools}
+        print(f"Discovered {len(mcp_tools)} MCP tools")
+        if verbose:
+            for t in mcp_tools:
+                print(f"  - {t.name}: {t.description[:60]}...")
+        # Initialize conversation
+        system_prompt = load_system_prompt(config)
+        messages = [{"role": "system", "content": system_prompt}]
+        # ─── Pre-Game Planning Phase ──────────────────────────────────
+        planning_strategy = ""
+        planning_status = await env.call_tool("get_planning_status")
+        if planning_status.get("planning_enabled", True) is not False:
+            print("Starting pre-game planning phase...")
+            planning_data = await env.call_tool("start_planning_phase")
+            if planning_data.get("planning_active"):
+                max_planning_turns = planning_data.get("max_turns", 10)
+                opponent_summary = planning_data.get("opponent_summary", "")
+                prompts = config.prompts
+                planning_prompt = prompts.planning_prompt.format(
+                    max_turns=max_planning_turns,
+                    map_name=planning_data.get("map", {}).get("map_name", "?"),
+                    map_width=planning_data.get("map", {}).get("width", "?"),
+                    map_height=planning_data.get("map", {}).get("height", "?"),
+                    base_x=planning_data.get("base_position", {}).get("x", "?"),
+                    base_y=planning_data.get("base_position", {}).get("y", "?"),
+                    enemy_x=planning_data.get("enemy_estimated_position", {}).get("x", "?"),
+                    enemy_y=planning_data.get("enemy_estimated_position", {}).get("y", "?"),
+                    faction=planning_data.get("your_faction", "?"),
+                    side=planning_data.get("your_side", "?"),
+                    opponent_summary=opponent_summary,
+                    planning_nudge=prompts.planning_nudge,
+                )
+                messages.append({"role": "user", "content": planning_prompt})
+                # Planning loop (bounded by max_planning_turns + margin)
+                planning_done = False
+                for planning_turn in range(max_planning_turns + 2):
+                    try:
+                        response = await chat_completion(messages, openai_tools, llm_config, verbose, prompts=config.prompts)
+                    except (RuntimeError, httpx.ReadTimeout, httpx.ConnectTimeout) as e:
+                        print(f"  [Planning] API error: {e}")
+                        print("  Skipping planning phase.")
+                        break
+                    if response is None:
+                        break
+                    choice = response["choices"][0]
+                    assistant_msg = choice["message"]
+                    messages.append(assistant_msg)
+                    if verbose and assistant_msg.get("content"):
+                        print(f"  [Planning] {assistant_msg['content'][:200]}")
+                    tool_calls = assistant_msg.get("tool_calls", [])
+                    if not tool_calls:
+                        messages.append({
+                            "role": "user",
+                            "content": prompts.planning_nudge,
+                        })
+                        continue
+                    for tc in tool_calls:
+                        fn_name = tc["function"]["name"]
+                        try:
+                            fn_args = json.loads(tc["function"].get("arguments", "{}"))
+                        except (json.JSONDecodeError, TypeError):
+                            fn_args = {}
+                        if verbose:
+                            args_str = json.dumps(fn_args)
+                            if len(args_str) > 80:
+                                args_str = args_str[:80] + "..."
+                            print(f"  [Planning Tool] {fn_name}({args_str})")
+                        try:
+                            result = await env.call_tool(fn_name, **fn_args)
+                        except Exception as e:
+                            result = {"error": str(e)}
+                        messages.append({
+                            "role": "tool",
+                            "tool_call_id": tc["id"],
+                            "content": json.dumps(result) if not isinstance(result, str) else result,
+                        })
+                        # Check if planning ended
+                        if isinstance(result, dict):
+                            if result.get("planning_complete"):
+                                planning_strategy = result.get("strategy", "")
+                                planning_done = True
+                                if verbose:
+                                    print(f"  [Planning] Strategy: {planning_strategy[:150]}...")
+                            elif result.get("planning_expired"):
+                                planning_strategy = result.get("strategy", "")
+                                planning_done = True
+                                print(f"  [Planning] Expired: {result.get('reason', '?')}")
+                    if planning_done:
+                        break
+                if not planning_done:
+                    # Force end planning
+                    try:
+                        result = await env.call_tool(
+                            "end_planning_phase",
+                            strategy="(planning timed out, no explicit strategy)"
+                        )
+                        planning_strategy = result.get("strategy", "")
+                    except Exception:
+                        pass
+                    print("  Planning phase timed out, proceeding to gameplay.")
+                print(f"Planning phase complete. Strategy recorded: {bool(planning_strategy)}")
+            else:
+                if verbose:
+                    print(f"  Planning: {planning_data.get('message', 'skipped')}")
+        # ─── Game Start ───────────────────────────────────────────────
+        # Reset messages to just system prompt — planning context is captured
+        # in the strategy text below. This avoids tool/user role alternation
+        # issues with models that enforce strict message ordering (e.g. Mistral).
+        messages = [messages[0]]  # keep only system prompt
+        state = await env.call_tool("get_game_state")
+        briefing = compose_pregame_briefing(state)
+        strategy_section = ""
+        if planning_strategy:
+            strategy_section = f"\n\n## Your Pre-Game Strategy\n{planning_strategy}\n"
+        # Find MCV unit ID and barracks type for context
+        mcv_id = None
+        for u in state.get("units_summary", []):
+            if u.get("type") == "mcv":
+                mcv_id = u["id"]
+                break
+        faction = state.get("faction", "")
+        barracks_type = "tent" if faction in {"england", "france", "germany"} else "barr"
+        mcv_note = f" Your MCV is unit {mcv_id}." if mcv_id else ""
+        game_start_prompts = config.prompts
+        messages.append({
+            "role": "user",
+            "content": game_start_prompts.game_start.format(
+                strategy_section=strategy_section,
+                briefing=briefing,
+                barracks_type=barracks_type,
+                mcv_note=mcv_note,
+            ),
+        })
+        total_tool_calls = 0
+        total_api_calls = 0
+        start_time = time.time()
+        game_done = False
+        encountered_agent_error = False
+        consecutive_errors = 0
+        MAX_CONSECUTIVE_ERRORS = 3
+        turn = 0
+        while True:
+            # Check limits
+            elapsed = time.time() - start_time
+            if max_time and elapsed >= max_time:
+                print(f"\n  TIME LIMIT reached ({max_time}s). Stopping.")
+                break
+            if max_turns and turn >= max_turns:
+                break
+            turn += 1
+            # Compress history periodically (unless disabled)
+            if llm_config.compression_strategy != "none":
+                messages = compress_history(
+                    messages, keep_last=llm_config.keep_last_messages,
+                    trigger=llm_config.compression_trigger,
+                    prompts=config.prompts,
+                    compression=config.prompts.compression)
+            # Inject state briefing before LLM thinks (skip first turn — initial state already provided)
+            if total_api_calls > 0:
+                try:
+                    briefing_state = await env.call_tool("get_game_state")
+                    briefing = format_state_briefing(briefing_state)
+                    if briefing:
+                        messages.append({"role": "user", "content": briefing})
+                        if verbose:
+                            # Print just the alerts
+                            for a in briefing_state.get("alerts", []):
+                                print(f"  [ALERT] {a}")
+                    # Check game over from briefing
+                    if isinstance(briefing_state, dict) and briefing_state.get("done"):
+                        game_done = True
+                        print(f"\n  GAME OVER: {briefing_state.get('result', '?').upper()} at tick {briefing_state.get('tick', '?')}")
+                        break
+                except Exception:
+                    pass
+            # Call LLM with retry for rate limits
+            response = None
+            max_retries = llm_config.max_retries
+            is_local = any(h in llm_config.base_url for h in ("localhost", "127.0.0.1"))
+            for attempt in range(max_retries):
+                try:
+                    response = await chat_completion(messages, openai_tools, llm_config, verbose, prompts=config.prompts)
+                    break
+                except (httpx.ReadTimeout, httpx.ConnectTimeout):
+                    timeout_s = int(llm_config.request_timeout_s)
+                    print(f"\n  [ERROR] Request timed out after {timeout_s}s.")
+                    encountered_agent_error = True
+                    if is_local:
+                        print("  [HINT] Local models can be slow. Increase timeout in config.yaml:")
+                        print(f"         llm.request_timeout_s: {timeout_s * 2}")
+                    break
+                except RuntimeError as e:
+                    err_str = str(e)
+                    retriable = any(code in err_str for code in ("429", "500", "502", "503", "504"))
+                    if retriable and attempt < max_retries - 1:
+                        wait = llm_config.retry_backoff_s * (attempt + 1)
+                        print(f"\n  [RETRY] Provider error, waiting {wait}s ({attempt + 1}/{max_retries})...")
+                        print(f"          {e}")
+                        await asyncio.sleep(wait)
+                    else:
+                        print(f"\n  [ERROR] API call failed: {e}")
+                        encountered_agent_error = True
+                        break
+            if response is None:
+                print("  [ERROR] Stopping agent.")
+                encountered_agent_error = True
+                break
+            total_api_calls += 1
+            choice = response["choices"][0]
+            assistant_msg = choice["message"]
+            # Add assistant response to history
+            messages.append(assistant_msg)
+            # Print assistant's reasoning
+            if assistant_msg.get("content") and verbose:
+                print(f"\n  [LLM thinks] {assistant_msg['content'][:200]}")
+            # Handle tool calls
+            tool_calls = assistant_msg.get("tool_calls", [])
+            if not tool_calls:
+                # No tool calls — prompt to act
+                if verbose:
+                    content = assistant_msg.get("content", "(no content)")
+                    print(f"  [LLM] No tool calls. Response: {content[:100]}")
+                messages.append({
+                    "role": "user",
+                    "content": config.prompts.no_tool_nudge,
+                })
+                continue
+            # Execute each tool call
+            for tc in tool_calls:
+                fn_name = tc["function"]["name"]
+                try:
+                    fn_args = json.loads(tc["function"].get("arguments", "{}"))
+                except (json.JSONDecodeError, TypeError):
+                    fn_args = {}
+                total_tool_calls += 1
+                if verbose:
+                    args_str = json.dumps(fn_args)
+                    if len(args_str) > 80:
+                        args_str = args_str[:80] + "..."
+                    print(f"  [Tool] {fn_name}({args_str})")
+                try:
+                    result = await env.call_tool(fn_name, **fn_args)
+                    consecutive_errors = 0
+                except Exception as e:
+                    result = {"error": str(e)}
+                    # Suggest similar tools for unknown tool errors
+                    if fn_name not in tool_names:
+                        import difflib
+                        close = difflib.get_close_matches(fn_name, tool_names, n=3, cutoff=0.4)
+                        # Always include canonical build tools for build-related names
+                        build_keywords = {"build", "place", "train", "produce", "construct"}
+                        if any(kw in fn_name.lower() for kw in build_keywords):
+                            for bt in ("build_unit", "build_structure", "build_and_place"):
+                                if bt in tool_names and bt not in close:
+                                    close.append(bt)
+                        if close:
+                            result["suggested_tools"] = close
+                # Detect game connection lost
+                if isinstance(result, dict) and "connection lost" in str(result.get("error", "")).lower():
+                    consecutive_errors += 1
+                    if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
+                        print(f"\n  GAME CRASHED: {consecutive_errors} consecutive connection errors. Stopping.")
+                        encountered_agent_error = True
+                        game_done = True
+                # Format result for message
+                result_str = json.dumps(result) if not isinstance(result, str) else result
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": tc["id"],
+                    "content": result_str,
+                })
+                # Check for game over
+                if isinstance(result, dict) and result.get("done"):
+                    game_done = True
+                    print(f"\n  GAME OVER: {result.get('result', '?').upper()} at tick {result.get('tick', '?')}")
+                if verbose and isinstance(result, dict):
+                    result_preview = json.dumps(result)
+                    if len(result_preview) > 500:
+                        result_preview = result_preview[:500] + "..."
+                    print(f"  [Result] {result_preview}")
+            # Status update
+            if total_api_calls % 5 == 0 or game_done:
+                elapsed = time.time() - start_time
+                limit_str = f"/{max_turns}" if max_turns else ""
+                time_str = f"{elapsed:.0f}/{max_time}s" if max_time else f"{elapsed:.0f}s"
+                print(
+                    f"  Turn {turn}{limit_str} | "
+                    f"API calls: {total_api_calls} | "
+                    f"Tool calls: {total_tool_calls} | "
+                    f"Time: {time_str}"
+                )
+            if game_done:
+                break
+            # Check finish reason
+            if choice.get("finish_reason") == "stop" and not tool_calls:
+                messages.append({
+                    "role": "user",
+                    "content": config.prompts.continue_nudge,
+                })
+        # Surrender so the replay has a proper ending
+        if not game_done:
+            try:
+                await env.call_tool("surrender")
+                print("\n  Surrendered (replay will have proper ending)")
+            except Exception:
+                pass
+        # Final report
+        elapsed = time.time() - start_time
+        print()
+        print("=" * 70)
+        print(f"Agent finished after {total_api_calls} API calls, {total_tool_calls} tool calls")
+        print(f"Time: {elapsed:.1f}s ({elapsed / max(total_api_calls, 1):.1f}s per API call)")
+        # Get final state and scorecard
+        try:
+            final = await env.call_tool("get_game_state")
+            mil = final.get("military", {})
+            eco = final.get("economy", {})
+            print(f"Result: {final.get('result', 'ongoing').upper()}")
+            print()
+            print("--- SCORECARD ---")
+            print(f"  Planning:         {'ON — ' + planning_strategy[:100] if planning_strategy else 'OFF'}")
+            print(f"  Ticks played:     {final.get('tick', '?')}")
+            print(f"  Units killed:     {mil.get('units_killed', 0)} (value: ${mil.get('kills_cost', 0)})")
+            print(f"  Units lost:       {mil.get('units_lost', 0)} (value: ${mil.get('deaths_cost', 0)})")
+            print(f"  Buildings killed: {mil.get('buildings_killed', 0)}")
+            print(f"  Buildings lost:   {mil.get('buildings_lost', 0)}")
+            print(f"  Army value:       ${mil.get('army_value', 0)}")
+            print(f"  Assets value:     ${mil.get('assets_value', 0)}")
+            print(f"  Experience:       {mil.get('experience', 0)}")
+            print(f"  Orders issued:    {mil.get('order_count', 0)}")
+            print(f"  Cash remaining:   ${eco.get('cash', 0)}")
+            print(f"  K/D cost ratio:   {mil.get('kills_cost', 0) / max(mil.get('deaths_cost', 1), 1):.2f}")
+            print(f"  Own units:        {final.get('own_units', '?')}")
+            print(f"  Own buildings:    {final.get('own_buildings', '?')}")
+            print(f"  Explored:         {final.get('explored_percent', 0)}%")
+            rv = final.get("reward_vector", {})
+            if rv:
+                print("  Reward vector:")
+                for dim, val in rv.items():
+                    print(f"    {dim:15s} {val:+.3f}")
+            print()
+        except Exception as e:
+            print(f"  (could not get final state: {e})")
+        # Get replay
+        replay = {}
+        try:
+            replay = await env.call_tool("get_replay_path")
+            if replay.get("path"):
+                print(f"Replay: {replay['path']}")
+        except Exception:
+            pass
+        # Auto-export bench submission JSON (always local, upload gated on errors)
+        should_export, should_upload, skip_reason = _bench_export_policy(encountered_agent_error)
+        try:
+            from datetime import datetime, timezone
+            from pathlib import Path
+            resolved_name = config.agent.agent_name or llm_config.model
+            sub = {
+                "agent_name": resolved_name,
+                "agent_type": config.agent.agent_type or "LLM",
+                "agent_url": config.agent.agent_url,
+                "opponent": config.opponent.bot_type.capitalize(),
+                "games": 1,
+                "result": final.get("result", ""),
+                "win": final.get("result") == "win",
+                "ticks": final.get("tick", 0),
+                "kills_cost": mil.get("kills_cost", 0),
+                "deaths_cost": mil.get("deaths_cost", 0),
+                "kd_ratio": round(mil.get("kills_cost", 0) / max(mil.get("deaths_cost", 1), 1), 2),
+                "assets_value": mil.get("assets_value", 0),
+                "explored_percent": final.get("explored_percent", 0),
+                "reward_vector": final.get("reward_vector", {}),
+                "replay_path": replay.get("path", ""),
+                "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+            }
+            export_dir = Path.home() / ".openra-rl" / "bench-exports"
+            export_dir.mkdir(parents=True, exist_ok=True)
+            ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+            slug = resolved_name.replace("/", "_")[:40]
+            export_path = export_dir / f"bench-{slug}-{ts}.json"
+            export_path.write_text(json.dumps(sub, indent=2))
+            print(f"Bench export: {export_path}")
+            # Auto-upload to bench if enabled (skip when agent errors occurred)
+            bench_url = config.agent.bench_url
+            if config.agent.bench_upload and bench_url:
+                if not should_upload:
+                    print(f"Skipping bench upload: {skip_reason}")
+                else:
+                    try:
+                        from openra_env.bench_submit import gradio_submit
+                        msg = gradio_submit(bench_url, sub, replay_path=replay.get("path", ""))
+                        print(f"Uploaded to bench: {msg}")
+                    except Exception as e:
+                        print(f"  (bench upload failed: {e})")
+        except Exception as e:
+            print(f"  (bench export failed: {e})")
+        print("=" * 70)

openra_env/bench_export.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Build bench export JSON from a final game observation.
+Custom agents that use OpenRAEnv directly (CNN, RL, multi-agent, etc.)
+can call build_bench_export() after their game loop to produce a bench
+submission JSON — the same format the built-in LLM agent auto-exports.
+Usage:
+    from openra_env.bench_export import build_bench_export
+    obs = await env.step(action)  # final observation (obs.done == True)
+    export = build_bench_export(
+        obs,
+        agent_name="DeathBot-9000",
+        agent_type="RL",
+        opponent="Normal",
+    )
+    print(f"Saved to {export['path']}")
+    # Then submit:
+    #   openra-rl bench submit <path>
+"""
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, Optional
+def build_bench_export(
+    obs: Any,
+    agent_name: str,
+    agent_type: str = "RL",
+    opponent: str = "Normal",
+    agent_url: str = "",
+    replay_path: str = "",
+    export_dir: Optional[Path] = None,
+) -> Dict[str, Any]:
+    """Build and save a bench export JSON from a final observation.
+    Args:
+        obs: Final observation — either a dict or a Pydantic model with
+             .military, .economy, .tick, .result, .explored_percent attributes.
+        agent_name: Display name for the leaderboard.
+        agent_type: One of "Scripted", "LLM", "RL".
+        opponent: Difficulty tier (Beginner/Easy/Medium/Normal/Hard).
+        agent_url: Optional GitHub/project URL.
+        replay_path: Optional path to .orarep replay file.
+        export_dir: Where to save the JSON (default: ~/.openra-rl/bench-exports/).
+    Returns:
+        Dict with all submission fields plus "path" pointing to the saved file.
+    """
+    # Normalize obs to dict
+    if hasattr(obs, "model_dump"):
+        obs_dict = obs.model_dump()
+    elif hasattr(obs, "__dict__") and not isinstance(obs, dict):
+        obs_dict = vars(obs)
+    else:
+        obs_dict = dict(obs)
+    mil = obs_dict.get("military") or {}
+    kills = mil.get("kills_cost", 0)
+    deaths = mil.get("deaths_cost", 0)
+    sub = {
+        "agent_name": agent_name,
+        "agent_type": agent_type,
+        "agent_url": agent_url,
+        "opponent": opponent,
+        "games": 1,
+        "result": obs_dict.get("result", ""),
+        "win": obs_dict.get("result") == "win",
+        "ticks": obs_dict.get("tick", 0),
+        "kills_cost": kills,
+        "deaths_cost": deaths,
+        "kd_ratio": round(kills / max(deaths, 1), 2),
+        "assets_value": mil.get("assets_value", 0),
+        "explored_percent": obs_dict.get("explored_percent", 0),
+        "reward_vector": obs_dict.get("reward_vector", {}),
+        "replay_path": replay_path,
+        "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+    }
+    # Save to disk
+    if export_dir is None:
+        export_dir = Path.home() / ".openra-rl" / "bench-exports"
+    export_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+    slug = agent_name.replace("/", "_").replace(" ", "_")[:40]
+    export_path = export_dir / f"bench-{slug}-{ts}.json"
+    export_path.write_text(json.dumps(sub, indent=2))
+    sub["path"] = str(export_path)
+    return sub

openra_env/bench_submit.py ADDED Viewed

	@@ -0,0 +1,167 @@

+"""CLI tool to upload bench export JSON to OpenRA-Bench leaderboard.
+Usage:
+    openra-rl bench submit result.json
+    openra-rl bench submit result.json --agent-name DeathBot-9000 --agent-type RL
+    openra-rl bench submit result.json --replay game.orarep
+    openra-rl bench submit result.json --bench-url http://localhost:7860
+"""
+import argparse
+import json
+import sys
+from pathlib import Path
+import httpx
+DEFAULT_BENCH_URL = "https://openra-rl-openra-bench.hf.space"
+def _gradio_call(bench_url: str, api_name: str, payload: dict, timeout: float = 30) -> str:
+    """Call a Gradio SSE endpoint (two-step protocol).
+    1. POST /gradio_api/call/<api_name> → {"event_id": "..."}
+    2. GET  /gradio_api/call/<api_name>/<event_id> → SSE stream
+    """
+    base = bench_url.rstrip("/")
+    resp = httpx.post(
+        f"{base}/gradio_api/call/{api_name}",
+        json=payload,
+        timeout=timeout,
+    )
+    if resp.status_code != 200:
+        raise RuntimeError(f"HTTP {resp.status_code}: {resp.text[:200]}")
+    event_id = resp.json().get("event_id")
+    if not event_id:
+        raise RuntimeError(f"No event_id in response: {resp.text[:200]}")
+    with httpx.stream(
+        "GET",
+        f"{base}/gradio_api/call/{api_name}/{event_id}",
+        timeout=timeout,
+    ) as stream:
+        for line in stream.iter_lines():
+            if line.startswith("data: "):
+                result = json.loads(line[6:])
+                if isinstance(result, list) and result:
+                    return result[0]
+                return str(result)
+    raise RuntimeError("No result received from SSE stream")
+def gradio_upload_file(bench_url: str, file_path: str, timeout: float = 30) -> dict:
+    """Upload a file to a Gradio app and return the file reference.
+    Returns a dict like {"path": "...", "orig_name": "...", "size": ...}
+    that can be passed as a file input in a Gradio API call.
+    """
+    base = bench_url.rstrip("/")
+    path = Path(file_path)
+    with open(path, "rb") as f:
+        resp = httpx.post(
+            f"{base}/gradio_api/upload",
+            files={"files": (path.name, f)},
+            timeout=timeout,
+        )
+    if resp.status_code != 200:
+        raise RuntimeError(f"File upload failed: HTTP {resp.status_code}: {resp.text[:200]}")
+    paths = resp.json()
+    if not paths:
+        raise RuntimeError("File upload returned empty response")
+    return {
+        "path": paths[0],
+        "orig_name": path.name,
+        "size": path.stat().st_size,
+        "meta": {"_type": "gradio.FileData"},
+    }
+def gradio_submit(
+    bench_url: str,
+    data: dict,
+    replay_path: str = "",
+    timeout: float = 30,
+) -> str:
+    """Submit bench results (and optional replay) to the Gradio leaderboard.
+    If replay_path points to an existing file, uploads it and uses
+    the submit_with_replay endpoint. Otherwise uses the JSON-only submit.
+    """
+    if replay_path and Path(replay_path).is_file():
+        file_ref = gradio_upload_file(bench_url, replay_path, timeout=timeout)
+        return _gradio_call(
+            bench_url,
+            "submit_with_replay",
+            {"data": [json.dumps(data), file_ref]},
+            timeout=timeout,
+        )
+    return _gradio_call(
+        bench_url,
+        "submit",
+        {"data": [json.dumps(data)]},
+        timeout=timeout,
+    )
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Upload bench export JSON to OpenRA-Bench leaderboard"
+    )
+    parser.add_argument(
+        "json_file",
+        type=Path,
+        help="Path to bench export JSON file",
+    )
+    parser.add_argument("--agent-name", default=None, help="Override agent name in the submission")
+    parser.add_argument("--agent-type", default=None, help="Override agent type (Scripted/LLM/RL)")
+    parser.add_argument("--agent-url", default=None, help="GitHub/project URL for the agent")
+    parser.add_argument("--replay", default=None, help="Path to .orarep replay file")
+    parser.add_argument(
+        "--bench-url",
+        default=DEFAULT_BENCH_URL,
+        help=f"Bench leaderboard URL (default: {DEFAULT_BENCH_URL})",
+    )
+    args = parser.parse_args()
+    if not args.json_file.exists():
+        print(f"Error: file not found: {args.json_file}")
+        sys.exit(1)
+    try:
+        data = json.loads(args.json_file.read_text())
+    except json.JSONDecodeError as e:
+        print(f"Error: invalid JSON: {e}")
+        sys.exit(1)
+    # Apply CLI overrides
+    if args.agent_name:
+        data["agent_name"] = args.agent_name
+    if args.agent_type:
+        data["agent_type"] = args.agent_type
+    if args.agent_url:
+        data["agent_url"] = args.agent_url
+    print(f"Submitting {data.get('agent_name', '?')} vs {data.get('opponent', '?')}...")
+    print(f"  Bench: {args.bench_url}")
+    try:
+        msg = gradio_submit(args.bench_url, data, replay_path=args.replay or "")
+        print(f"  {msg}")
+    except httpx.ConnectError:
+        print(f"Error: could not connect to {args.bench_url}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

openra_env/cli/__init__.py ADDED Viewed

File without changes

openra_env/cli/commands.py ADDED Viewed

	@@ -0,0 +1,464 @@

+"""Subcommand implementations for the openra-rl CLI."""
+import shutil
+import subprocess
+import sys
+import webbrowser
+from pathlib import Path
+from typing import Optional
+from openra_env.cli.console import dim, error, header, info, step, success, warn
+from openra_env.cli import docker_manager as docker
+from openra_env.cli.wizard import (
+    CONFIG_PATH,
+    has_saved_config,
+    load_saved_config,
+    merge_cli_into_config,
+    run_wizard,
+)
+def cmd_play(
+    provider: Optional[str] = None,
+    model: Optional[str] = None,
+    api_key: Optional[str] = None,
+    difficulty: str = "normal",
+    verbose: bool = False,
+    port: int = 8000,
+    server_url: Optional[str] = None,
+    local: bool = False,
+    image_version: Optional[str] = None,
+) -> None:
+    """Run the LLM agent against the game server."""
+    use_docker = server_url is None and not local
+    # 1. Check Docker (unless --local or --server-url)
+    if use_docker and not docker.check_docker():
+        sys.exit(1)
+    # 1b. Version selection — let user pick if multiple versions exist locally
+    if use_docker and image_version is None:
+        versions = docker.list_local_versions()
+        # Filter out "latest" for display — only show concrete version tags
+        concrete = [v for v in versions if v != "latest"]
+        if len(concrete) > 1:
+            info(f"Multiple engine versions available: {', '.join(concrete)}")
+            try:
+                choice = input(f"  Version to use [{concrete[0]}]: ").strip()
+            except (EOFError, KeyboardInterrupt):
+                choice = ""
+            if choice:
+                image_version = choice
+            else:
+                image_version = concrete[0]
+    # 2. Load or create config
+    has_cli_overrides = any([provider, model, api_key])
+    if has_cli_overrides:
+        config = load_saved_config() or {}
+        config = merge_cli_into_config(config, provider=provider, model=model, api_key=api_key)
+    elif has_saved_config():
+        config = load_saved_config() or {}
+    else:
+        config = run_wizard()
+    # Validate we have enough config to proceed
+    llm_cfg = config.get("llm", {})
+    base_url = llm_cfg.get("base_url", "")
+    is_local_llm = any(h in base_url for h in ("localhost", "127.0.0.1", "0.0.0.0"))
+    if not llm_cfg.get("api_key") and not is_local_llm:
+        error("No API key configured. Run `openra-rl config` or pass --api-key.")
+        sys.exit(1)
+    if not llm_cfg.get("model"):
+        error("No model configured. Run `openra-rl config` or pass --model.")
+        sys.exit(1)
+    # 3. Start/reuse server
+    actual_url = server_url or f"http://localhost:{port}"
+    we_started_server = False
+    local_server_proc = None
+    if local:
+        # Run the server locally (for developers with local OpenRA build)
+        header("Starting local server...")
+        local_server_proc = subprocess.Popen(
+            [sys.executable, "-m", "openra_env.server.app"],
+            stdout=sys.stdout,
+            stderr=sys.stderr,
+        )
+        we_started_server = True
+        # Wait for it to be ready
+        import time
+        import urllib.request
+        import urllib.error
+        step(f"Waiting for local server on port {port}...")
+        start = time.time()
+        while time.time() - start < 60:
+            try:
+                req = urllib.request.urlopen(f"{actual_url}/health", timeout=3)
+                if req.status == 200:
+                    success("Local server is ready!")
+                    break
+            except (urllib.error.URLError, OSError):
+                pass
+            time.sleep(2)
+        else:
+            error("Local server did not become ready within 60s.")
+            local_server_proc.terminate()
+            sys.exit(1)
+    elif use_docker:
+        if docker.is_running():
+            info(f"Server already running on port {port}.")
+        else:
+            if not docker.start_server(port=port, difficulty=difficulty, version=image_version):
+                sys.exit(1)
+            we_started_server = True
+            if not docker.wait_for_health(port=port):
+                sys.exit(1)
+    # 4. Run the LLM agent
+    header("Starting LLM agent...")
+    provider_name = config.get("provider", "custom")
+    info(f"Model: {llm_cfg.get('model', '?')} via {provider_name}")
+    print()
+    try:
+        _run_llm_agent(config, actual_url, verbose)
+    except KeyboardInterrupt:
+        print("\nInterrupted.")
+    except ConnectionRefusedError:
+        error(f"Could not connect to {actual_url}.")
+        info("Try: openra-rl server start")
+        info("Check: openra-rl doctor")
+    except Exception as e:
+        error(f"Agent error: {e}")
+        info("Run with --verbose for full details, or check: openra-rl doctor")
+    # 5. Auto-copy replays from Docker
+    if use_docker and docker.is_running():
+        new_replays = docker.copy_replays()
+        if new_replays:
+            print()
+            for f in new_replays:
+                success(f"Replay saved: {docker.LOCAL_REPLAY_DIR / f}")
+            info("Watch with: openra-rl replay watch")
+    # 6. Cleanup
+    if we_started_server:
+        print()
+        if local_server_proc:
+            try:
+                answer = input("  Stop local server? [Y/n] ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                answer = "y"
+            if answer in ("", "y", "yes"):
+                local_server_proc.terminate()
+                local_server_proc.wait(timeout=10)
+                success("Local server stopped.")
+        elif use_docker:
+            try:
+                answer = input("  Stop game server? [Y/n] ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                answer = "y"
+            if answer in ("", "y", "yes"):
+                docker.stop_server()
+def _run_llm_agent(config: dict, server_url: str, verbose: bool) -> None:
+    """Import and run the LLM agent with the given config."""
+    import asyncio
+    from openra_env.config import load_config
+    # Build overrides from saved config
+    cli_overrides: dict = {}
+    llm_cfg = config.get("llm", {})
+    if llm_cfg:
+        cli_overrides["llm"] = llm_cfg
+    cli_overrides.setdefault("agent", {})["server_url"] = server_url
+    if verbose:
+        cli_overrides.setdefault("agent", {})["verbose"] = True
+    app_config = load_config(cli_overrides=cli_overrides)
+    from openra_env.agent import run_agent
+    asyncio.run(run_agent(app_config, verbose))
+def cmd_config() -> None:
+    """Re-run the setup wizard."""
+    run_wizard()
+def cmd_server_start(port: int = 8000, difficulty: str = "normal", detach: bool = True) -> None:
+    """Start the game server."""
+    if not docker.check_docker():
+        sys.exit(1)
+    if not docker.start_server(port=port, difficulty=difficulty, detach=detach):
+        sys.exit(1)
+    if detach:
+        docker.wait_for_health(port=port)
+def cmd_server_stop() -> None:
+    """Stop the game server."""
+    docker.stop_server()
+def cmd_server_status() -> None:
+    """Show game server status."""
+    status = docker.server_status()
+    if status:
+        success(f"Server is running: {status['status']}")
+        if status.get("ports"):
+            dim(f"  Ports: {status['ports']}")
+    else:
+        info("Server is not running.")
+def cmd_server_logs(follow: bool = False) -> None:
+    """Show game server logs."""
+    docker.get_logs(follow=follow)
+def cmd_doctor() -> None:
+    """Check system prerequisites."""
+    header("OpenRA-RL Doctor")
+    ok = True
+    # Docker
+    if shutil.which("docker"):
+        success("Docker CLI: installed")
+        from openra_env.cli.docker_manager import _run
+        result = _run(["docker", "info"])
+        if result.returncode == 0:
+            success("Docker daemon: running")
+        else:
+            warn("Docker daemon: not running")
+            ok = False
+    else:
+        error("Docker CLI: not found")
+        dim("  Install from https://docs.docker.com/get-docker/")
+        ok = False
+    # Image
+    if docker.image_exists():
+        success(f"Game image: available ({docker.IMAGE})")
+    else:
+        warn("Game image: not pulled yet (will be pulled on first `openra-rl play`)")
+    # Server
+    if docker.is_running():
+        success("Game server: running")
+    else:
+        dim("Game server: not running")
+    # Python
+    py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+    if sys.version_info >= (3, 10):
+        success(f"Python: {py_version}")
+    else:
+        error(f"Python: {py_version} (requires 3.10+)")
+        ok = False
+    # Saved config
+    if has_saved_config():
+        cfg = load_saved_config() or {}
+        provider = cfg.get("provider", "unknown")
+        model = cfg.get("llm", {}).get("model", "unknown")
+        success(f"Config: {CONFIG_PATH}")
+        dim(f"  Provider: {provider}, Model: {model}")
+    else:
+        dim("Config: not yet configured (run `openra-rl play` or `openra-rl config`)")
+    print()
+    if ok:
+        success("All checks passed!")
+    else:
+        warn("Some checks failed. Fix the issues above and try again.")
+def cmd_version() -> None:
+    """Print version."""
+    try:
+        from importlib.metadata import version
+        v = version("openra-rl")
+    except Exception:
+        v = "dev"
+    print(f"openra-rl {v}")
+def cmd_mcp_server(server_url: Optional[str] = None, port: int = 8000) -> None:
+    """Start the MCP stdio server."""
+    from openra_env.mcp_server import main as mcp_main
+    mcp_main(server_url=server_url or f"http://localhost:{port}")
+# ── Replay commands ──────────────────────────────────────────────────
+def cmd_replay_watch(
+    file: Optional[str] = None,
+    port: int = 6080,
+    resolution: Optional[str] = None,
+    render_mode: Optional[str] = None,
+    vnc_quality: Optional[int] = None,
+    vnc_compression: Optional[int] = None,
+    cpu_cores: Optional[int] = None,
+) -> None:
+    """Watch a replay in the browser via VNC-in-Docker."""
+    if not docker.check_docker():
+        sys.exit(1)
+    try:
+        viewer_settings = docker.load_replay_viewer_settings(
+            resolution=resolution,
+            render_mode=render_mode,
+            vnc_quality=vnc_quality,
+            vnc_compression=vnc_compression,
+            cpu_cores=cpu_cores,
+        )
+    except ValueError as exc:
+        error(f"Invalid replay viewer setting: {exc}")
+        sys.exit(1)
+    replay_path = file
+    if replay_path is None:
+        # Check local replays first (most reliable — file is mounted directly)
+        local_replays = sorted(docker.LOCAL_REPLAY_DIR.glob("*.orarep"))
+        if local_replays:
+            replay_path = str(local_replays[-1])
+            info(f"Latest local replay: {local_replays[-1].name}")
+        elif docker.is_running():
+            # Fall back to container path (uses --volumes-from, less reliable)
+            replay_path = docker.get_latest_replay()
+            if replay_path:
+                info(f"Latest container replay: {Path(replay_path).name}")
+        if replay_path is None:
+            error("No replays found. Play a game first with: openra-rl play")
+            sys.exit(1)
+    header("Starting replay viewer...")
+    info(
+        f"Settings: {viewer_settings.width}x{viewer_settings.height}, "
+        f"render={viewer_settings.render_mode}, "
+        f"vnc q/c={viewer_settings.vnc_quality}/{viewer_settings.vnc_compression}"
+    )
+    if not docker.start_replay_viewer(replay_path, port=port, settings=viewer_settings):
+        sys.exit(1)
+    import time
+    import urllib.error
+    import urllib.request
+    url = (
+        f"http://localhost:{port}/vnc.html?autoconnect=1&resize=scale"
+        f"&quality={viewer_settings.vnc_quality}"
+        f"&compression={viewer_settings.vnc_compression}"
+    )
+    step("Waiting for viewer to be ready...")
+    ready = False
+    start_time = time.time()
+    timeout = 30
+    while time.time() - start_time < timeout:
+        if not docker.is_replay_viewer_running():
+            error("Replay viewer exited before it became ready.")
+            logs = docker.get_replay_viewer_logs()
+            if logs:
+                print()
+                info("Replay viewer logs:")
+                print(logs)
+            sys.exit(1)
+        try:
+            req = urllib.request.urlopen(f"http://localhost:{port}/vnc.html", timeout=2)
+            if 200 <= req.status < 500:
+                ready = True
+                break
+        except (urllib.error.URLError, OSError):
+            pass
+        time.sleep(1)
+    if not ready:
+        error(f"Viewer did not become ready within {timeout}s.")
+        logs = docker.get_replay_viewer_logs()
+        if logs:
+            print()
+            info("Replay viewer logs:")
+            print(logs)
+        sys.exit(1)
+    info(f"Opening {url}")
+    webbrowser.open(url)
+    print()
+    info("Tip: press F12 in the viewer for maximum replay speed.")
+    info("Tip: tune with --resolution, --render, --vnc-quality, --vnc-compression.")
+    info("Press Ctrl+C to stop the replay viewer")
+    print()
+    try:
+        # Wait until container exits or user presses Ctrl+C
+        while docker.is_replay_viewer_running():
+            time.sleep(2)
+        info("Replay viewer has stopped.")
+    except KeyboardInterrupt:
+        print()
+        docker.stop_replay_viewer()
+def cmd_replay_list() -> None:
+    """List available replays from Docker and local."""
+    header("Game Replays")
+    # Docker replays
+    if docker.is_running():
+        docker_replays = docker.list_replays()
+        if docker_replays:
+            info(f"In Docker container ({len(docker_replays)}):")
+            for r in docker_replays:
+                dim(f"    {Path(r).name}")
+        else:
+            dim("  No replays in Docker container.")
+    else:
+        dim("  Docker server not running — cannot list container replays.")
+    # Local replays
+    print()
+    local_dir = docker.LOCAL_REPLAY_DIR
+    if local_dir.exists():
+        local_replays = sorted(local_dir.glob("*.orarep"))
+        if local_replays:
+            info(f"Local ({len(local_replays)}) — {local_dir}:")
+            for r in local_replays:
+                dim(f"    {r.name}")
+        else:
+            dim(f"  No local replays in {local_dir}")
+    else:
+        dim(f"  No local replay directory ({local_dir})")
+def cmd_replay_copy() -> None:
+    """Copy replays from Docker container to local directory."""
+    if not docker.check_docker():
+        sys.exit(1)
+    if not docker.is_running():
+        error("Game server is not running. Start it first or use: openra-rl server start")
+        sys.exit(1)
+    header("Copying replays from Docker...")
+    new_files = docker.copy_replays()
+    if new_files:
+        for f in new_files:
+            success(f"  Copied: {f}")
+        success(f"Copied {len(new_files)} new replay(s) to {docker.LOCAL_REPLAY_DIR}")
+    else:
+        info(f"No new replays to copy. Replays are in {docker.LOCAL_REPLAY_DIR}")
+def cmd_replay_stop() -> None:
+    """Stop the replay viewer."""
+    docker.stop_replay_viewer()

openra_env/cli/console.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""ANSI colored console output helpers (no external deps)."""
+import sys
+# ANSI codes — disabled when not a TTY
+_IS_TTY = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
+_RESET = "\033[0m" if _IS_TTY else ""
+_BOLD = "\033[1m" if _IS_TTY else ""
+_GREEN = "\033[32m" if _IS_TTY else ""
+_YELLOW = "\033[33m" if _IS_TTY else ""
+_RED = "\033[31m" if _IS_TTY else ""
+_CYAN = "\033[36m" if _IS_TTY else ""
+_DIM = "\033[2m" if _IS_TTY else ""
+def info(msg: str) -> None:
+    print(f"  {msg}")
+def success(msg: str) -> None:
+    print(f"  {_GREEN}{msg}{_RESET}")
+def error(msg: str) -> None:
+    print(f"  {_RED}{msg}{_RESET}", file=sys.stderr)
+def warn(msg: str) -> None:
+    print(f"  {_YELLOW}{msg}{_RESET}")
+def step(msg: str) -> None:
+    """Print a progress step (e.g. 'Pulling image...')."""
+    print(f"  {_CYAN}{msg}{_RESET}")
+def header(msg: str) -> None:
+    print(f"\n  {_BOLD}{msg}{_RESET}")
+def dim(msg: str) -> None:
+    print(f"  {_DIM}{msg}{_RESET}")

openra_env/cli/docker_manager.py ADDED Viewed

	@@ -0,0 +1,600 @@

+"""Docker orchestration for the OpenRA-RL game server."""
+import json
+import os
+import shutil
+import subprocess
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+from openra_env.cli.console import error, info, step, success
+IMAGE_REPO = "ghcr.io/yxc20089/openra-rl"
+IMAGE = f"{IMAGE_REPO}:latest"
+CONTAINER_NAME = "openra-rl-server"
+REPLAY_CONTAINER = "openra-rl-replay"
+REPLAY_DIR_IN_CONTAINER = "/root/.config/openra/Replays/ra"
+LOCAL_REPLAY_DIR = Path.home() / ".openra-rl" / "replays"
+MANIFEST_PATH = LOCAL_REPLAY_DIR / "manifest.json"
+def _run(args: list[str], capture: bool = True, **kwargs) -> subprocess.CompletedProcess:
+    """Run a subprocess command, capturing output by default."""
+    return subprocess.run(
+        args,
+        capture_output=capture,
+        text=True,
+        encoding="utf-8",
+        **kwargs,
+    )
+def check_docker() -> bool:
+    """Verify docker CLI is available and daemon is running."""
+    if not shutil.which("docker"):
+        error("Docker not found. Install it from https://docs.docker.com/get-docker/")
+        return False
+    result = _run(["docker", "info"])
+    if result.returncode != 0:
+        error("Docker daemon is not running. Start Docker Desktop and try again.")
+        return False
+    return True
+def _image_tag(version: Optional[str] = None) -> str:
+    """Return the full image tag for a given version (default: latest)."""
+    tag = version or "latest"
+    return f"{IMAGE_REPO}:{tag}"
+def pull_image(version: Optional[str] = None, quiet: bool = False) -> bool:
+    """Pull the game server image from GHCR."""
+    image = _image_tag(version)
+    if not quiet:
+        step(f"Pulling game server image ({image})...")
+    result = subprocess.run(
+        ["docker", "pull", image],
+        stdout=sys.stdout if not quiet else subprocess.DEVNULL,
+        stderr=sys.stderr if not quiet else subprocess.DEVNULL,
+    )
+    if result.returncode != 0:
+        error(f"Failed to pull {image}")
+        return False
+    if not quiet:
+        success("Image pulled successfully.")
+    return True
+def image_exists(version: Optional[str] = None) -> bool:
+    """Check if the game server image is available locally."""
+    image = _image_tag(version)
+    result = _run(["docker", "images", "-q", image])
+    return bool(result.stdout.strip())
+def list_local_versions() -> list[str]:
+    """List all locally available openra-rl image versions (tags), newest first."""
+    result = _run([
+        "docker", "images", IMAGE_REPO,
+        "--format", "{{.Tag}}",
+    ])
+    if result.returncode != 0:
+        return []
+    tags = [t.strip() for t in result.stdout.splitlines() if t.strip()]
+    # Put "latest" first, then sort the rest in reverse
+    versions = sorted([t for t in tags if t != "latest"], reverse=True)
+    if "latest" in tags:
+        versions.insert(0, "latest")
+    return versions
+def get_running_image_tag() -> Optional[str]:
+    """Get the image tag of the currently running game server container."""
+    if not is_running():
+        return None
+    result = _run([
+        "docker", "inspect", CONTAINER_NAME,
+        "--format", "{{.Config.Image}}",
+    ])
+    if result.returncode != 0:
+        return None
+    image = result.stdout.strip()
+    # Extract tag from "ghcr.io/yxc20089/openra-rl:0.2.1"
+    if ":" in image:
+        return image.split(":")[-1]
+    return "latest"
+# ── Replay manifest ──────────────────────────────────────────────────
+def _load_manifest() -> dict:
+    """Load the replay manifest (replay filename → image tag)."""
+    if MANIFEST_PATH.exists():
+        try:
+            return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            pass
+    return {}
+def _save_manifest(manifest: dict) -> None:
+    """Save the replay manifest."""
+    MANIFEST_PATH.parent.mkdir(parents=True, exist_ok=True)
+    MANIFEST_PATH.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8")
+def get_replay_image_tag(replay_filename: str) -> Optional[str]:
+    """Look up which image tag was used to record a replay."""
+    manifest = _load_manifest()
+    return manifest.get(replay_filename)
+def _record_replays_in_manifest(filenames: list[str], image_tag: str) -> None:
+    """Record which image tag was used for newly copied replays."""
+    if not filenames:
+        return
+    manifest = _load_manifest()
+    for f in filenames:
+        manifest[f] = image_tag
+    _save_manifest(manifest)
+def is_running() -> bool:
+    """Check if the game server container is running."""
+    result = _run([
+        "docker", "ps", "--filter", f"name={CONTAINER_NAME}",
+        "--format", "{{.Names}}"
+    ])
+    return CONTAINER_NAME in result.stdout
+def start_server(
+    port: int = 8000,
+    difficulty: str = "normal",
+    detach: bool = True,
+    version: Optional[str] = None,
+) -> bool:
+    """Start the game server container."""
+    if is_running():
+        info(f"Server already running on port {port}.")
+        return True
+    image = _image_tag(version)
+    # Ensure image exists
+    if not image_exists(version):
+        if not pull_image(version):
+            return False
+    step(f"Starting game server on port {port} ({image})...")
+    cmd = [
+        "docker", "run", "--rm",
+        "-d" if detach else "",
+        "-p", f"{port}:8000",
+        "--name", CONTAINER_NAME,
+        "-e", f"BOT_TYPE={difficulty}",
+        image,
+    ]
+    # Remove empty strings from cmd
+    cmd = [c for c in cmd if c]
+    result = _run(cmd)
+    if result.returncode != 0:
+        error(f"Failed to start server: {result.stderr.strip()}")
+        return False
+    return True
+def stop_server() -> bool:
+    """Stop and remove the game server container."""
+    if not is_running():
+        info("Server is not running.")
+        return True
+    step("Stopping game server...")
+    result = _run(["docker", "stop", CONTAINER_NAME])
+    if result.returncode != 0:
+        error(f"Failed to stop server: {result.stderr.strip()}")
+        return False
+    success("Server stopped.")
+    return True
+def wait_for_health(port: int = 8000, timeout: int = 120) -> bool:
+    """Poll the health endpoint until the server is ready."""
+    import urllib.request
+    import urllib.error
+    url = f"http://localhost:{port}/health"
+    step(f"Waiting for server to be ready (timeout {timeout}s)...")
+    start = time.time()
+    while time.time() - start < timeout:
+        try:
+            req = urllib.request.urlopen(url, timeout=3)
+            if req.status == 200:
+                success("Server is ready!")
+                return True
+        except (urllib.error.URLError, OSError):
+            pass
+        time.sleep(2)
+    error(f"Server did not become healthy within {timeout}s.")
+    return False
+def get_logs(follow: bool = False) -> None:
+    """Print container logs."""
+    if not is_running():
+        # Try to get logs from stopped container too
+        pass
+    cmd = ["docker", "logs"]
+    if follow:
+        cmd.append("-f")
+    cmd.append(CONTAINER_NAME)
+    subprocess.run(cmd)
+def server_status() -> Optional[dict]:
+    """Get server container status info."""
+    if not is_running():
+        return None
+    result = _run([
+        "docker", "ps", "--filter", f"name={CONTAINER_NAME}",
+        "--format", "{{.Status}}\t{{.Ports}}"
+    ])
+    if result.stdout.strip():
+        parts = result.stdout.strip().split("\t")
+        return {
+            "status": parts[0] if parts else "unknown",
+            "ports": parts[1] if len(parts) > 1 else "",
+        }
+    return None
+# ── Replay viewer settings ───────────────────────────────────────────
+@dataclass(frozen=True)
+class ReplayViewerSettings:
+    """Tunable replay viewer settings for quality/performance tradeoffs."""
+    width: int = 1280
+    height: int = 960
+    ui_scale: float = 1.0
+    viewport_distance: str = "Medium"
+    mute: bool = True
+    render_mode: str = "auto"  # auto | gpu | cpu
+    vnc_quality: int = 8
+    vnc_compression: int = 4
+    cpu_cores: int = 4  # Docker --cpus limit for software rendering (0 = all available)
+def _parse_resolution(value: str) -> tuple[int, int]:
+    """Parse a WxH resolution string."""
+    raw = value.strip().lower().replace(" ", "")
+    for sep in ("x", ","):
+        if sep in raw:
+            left, right = raw.split(sep, 1)
+            try:
+                w, h = int(left), int(right)
+            except ValueError:
+                break
+            if w < 320 or h < 240 or w > 7680 or h > 4320:
+                raise ValueError(f"resolution out of range (320x240..7680x4320): {value}")
+            return w, h
+    raise ValueError(f"resolution must be WxH (e.g. 960x540), got: {value!r}")
+def _normalize_render_mode(value: str) -> str:
+    """Validate and normalize render mode."""
+    mode = value.strip().lower()
+    if mode not in ("auto", "gpu", "cpu"):
+        raise ValueError(f"render mode must be auto/gpu/cpu, got: {value!r}")
+    return mode
+def _normalize_viewport(value: str) -> str:
+    """Validate and normalize viewport distance."""
+    mapping = {"close": "Close", "medium": "Medium", "far": "Far"}
+    key = value.strip().lower()
+    if key not in mapping:
+        raise ValueError(f"viewport must be close/medium/far, got: {value!r}")
+    return mapping[key]
+def load_replay_viewer_settings(
+    resolution: Optional[str] = None,
+    render_mode: Optional[str] = None,
+    vnc_quality: Optional[int] = None,
+    vnc_compression: Optional[int] = None,
+    cpu_cores: Optional[int] = None,
+) -> ReplayViewerSettings:
+    """Load replay viewer settings from CLI overrides → env vars → defaults."""
+    env = os.environ
+    res = resolution or env.get("OPENRA_RL_REPLAY_RESOLUTION", "1280x960")
+    w, h = _parse_resolution(res)
+    mode = _normalize_render_mode(
+        render_mode if render_mode is not None else env.get("OPENRA_RL_REPLAY_RENDER", "auto")
+    )
+    vq = vnc_quality if vnc_quality is not None else int(env.get("OPENRA_RL_REPLAY_VNC_QUALITY", "8"))
+    vc = vnc_compression if vnc_compression is not None else int(env.get("OPENRA_RL_REPLAY_VNC_COMPRESSION", "4"))
+    vq = max(0, min(9, vq))
+    vc = max(0, min(9, vc))
+    cores = cpu_cores if cpu_cores is not None else int(env.get("OPENRA_RL_REPLAY_CPU_CORES", "4"))
+    if cores <= 0:
+        cores = os.cpu_count() or 4
+    cores = max(1, min(32, cores))
+    ui_scale = float(env.get("OPENRA_RL_REPLAY_UI_SCALE", "1"))
+    viewport = _normalize_viewport(env.get("OPENRA_RL_REPLAY_VIEWPORT_DISTANCE", "medium"))
+    mute_raw = env.get("OPENRA_RL_REPLAY_MUTE", "true").strip().lower()
+    mute = mute_raw not in ("0", "false", "no", "off")
+    return ReplayViewerSettings(
+        width=w, height=h, ui_scale=ui_scale, viewport_distance=viewport,
+        mute=mute, render_mode=mode, vnc_quality=vq, vnc_compression=vc,
+        cpu_cores=cores,
+    )
+def _settings_env_args(settings: ReplayViewerSettings) -> list[str]:
+    """Convert settings to docker -e KEY=VAL args."""
+    return [
+        "-e", f"OPENRA_RL_REPLAY_RESOLUTION={settings.width}x{settings.height}",
+        "-e", f"OPENRA_RL_REPLAY_UI_SCALE={settings.ui_scale}",
+        "-e", f"OPENRA_RL_REPLAY_VIEWPORT_DISTANCE={settings.viewport_distance}",
+        "-e", f"OPENRA_RL_REPLAY_MUTE={'True' if settings.mute else 'False'}",
+        "-e", "SDL_AUDIODRIVER=dummy",
+        "-e", "OPENRA_DISPLAY_SCALE=1",
+    ]
+def _gpu_docker_args(mode: str, cpu_cores: int = 4) -> list[list[str]]:
+    """Return docker arg variants for GPU passthrough, in preference order.
+    auto: try GPU variants first, fall back to CPU.
+    gpu: only try GPU variants (fail if none work).
+    cpu: only try CPU (software rendering).
+    cpu_cores: number of llvmpipe threads for software rendering.
+    """
+    cpu = ["-e", "LIBGL_ALWAYS_SOFTWARE=1", "-e", f"LP_NUM_THREADS={cpu_cores}"]
+    gpu_variants = [
+        ["--gpus", "all"],                                      # NVIDIA
+        ["--device", "/dev/dxg:/dev/dxg",                       # WSL2 (AMD/NVIDIA/Intel)
+         "-v", "/usr/lib/wsl:/usr/lib/wsl:ro",
+         "-e", "LD_LIBRARY_PATH=/usr/lib/wsl/lib"],
+        ["--device", "/dev/kfd:/dev/kfd",                       # AMD ROCm (native Linux)
+         "--device", "/dev/dri:/dev/dri",
+         "--group-add", "video"],
+        ["--device", "/dev/dri:/dev/dri"],                      # Generic DRI (AMD/Intel)
+    ]
+    if mode == "cpu":
+        return [cpu]
+    if mode == "gpu":
+        return gpu_variants
+    # auto: try all GPU variants, then CPU fallback
+    return gpu_variants + [cpu]
+# ── Replay viewer ────────────────────────────────────────────────────
+def list_replays() -> list[str]:
+    """List .orarep files inside the game server container."""
+    if not is_running():
+        return []
+    result = _run([
+        "docker", "exec", CONTAINER_NAME,
+        "find", REPLAY_DIR_IN_CONTAINER, "-name", "*.orarep", "-type", "f",
+    ])
+    if result.returncode != 0:
+        return []
+    files = [line.strip() for line in result.stdout.splitlines() if line.strip()]
+    files.sort()
+    return files
+def get_latest_replay() -> Optional[str]:
+    """Return the path of the newest replay inside the game server container."""
+    replays = list_replays()
+    return replays[-1] if replays else None
+def copy_replays() -> list[str]:
+    """Copy all replays from the game server container to ~/.openra-rl/replays/.
+    Returns list of newly copied filenames.
+    Also records the image tag in the manifest so replay watch uses the right version.
+    """
+    if not is_running():
+        error("Game server is not running — cannot copy replays.")
+        return []
+    LOCAL_REPLAY_DIR.mkdir(parents=True, exist_ok=True)
+    # Get list of replays in container
+    replays = list_replays()
+    if not replays:
+        return []
+    # Get existing local files to detect new ones
+    existing = {f.name for f in LOCAL_REPLAY_DIR.iterdir() if f.suffix == ".orarep"}
+    # Copy each replay individually (docker cp doesn't glob well)
+    for replay_path in replays:
+        filename = os.path.basename(replay_path)
+        result = _run([
+            "docker", "cp",
+            f"{CONTAINER_NAME}:{replay_path}",
+            str(LOCAL_REPLAY_DIR / filename),
+        ])
+        if result.returncode != 0:
+            error(f"Failed to copy {filename}: {result.stderr.strip()}")
+    # Determine which files are new
+    after = {f.name for f in LOCAL_REPLAY_DIR.iterdir() if f.suffix == ".orarep"}
+    new_files = sorted(after - existing)
+    # Record the image version that produced these replays
+    if new_files:
+        tag = get_running_image_tag() or "latest"
+        _record_replays_in_manifest(new_files, tag)
+    return new_files
+def is_replay_viewer_running() -> bool:
+    """Check if the replay viewer container is running."""
+    result = _run([
+        "docker", "ps", "--filter", f"name={REPLAY_CONTAINER}",
+        "--format", "{{.Names}}"
+    ])
+    return REPLAY_CONTAINER in result.stdout
+def replay_viewer_exists() -> bool:
+    """Check if the replay viewer container exists (running or exited)."""
+    result = _run([
+        "docker", "ps", "-a", "--filter", f"name={REPLAY_CONTAINER}",
+        "--format", "{{.Names}}"
+    ])
+    return REPLAY_CONTAINER in result.stdout
+def get_replay_viewer_logs(tail: int = 200) -> str:
+    """Return recent replay viewer logs, or empty string if unavailable."""
+    if not replay_viewer_exists():
+        return ""
+    result = _run(["docker", "logs", "--tail", str(tail), REPLAY_CONTAINER])
+    if result.returncode != 0:
+        return result.stderr.strip() or result.stdout.strip()
+    return result.stdout.strip()
+def start_replay_viewer(
+    replay_path: str,
+    port: int = 6080,
+    version: Optional[str] = None,
+    settings: Optional[ReplayViewerSettings] = None,
+) -> bool:
+    """Start the replay viewer container.
+    Args:
+        replay_path: Path to .orarep file (container path or local path).
+        port: noVNC port to expose (default 6080).
+        version: Docker image version to use (default: auto-detect from manifest).
+        settings: Replay viewer tuning (resolution, render mode, etc.).
+    """
+    if settings is None:
+        settings = load_replay_viewer_settings()
+    if is_replay_viewer_running():
+        error("Replay viewer is already running. Stop it first with: openra-rl replay stop")
+        return False
+    # Clean up stale (exited) container if it exists
+    if replay_viewer_exists():
+        _run(["docker", "rm", "-f", REPLAY_CONTAINER])
+    # Auto-detect version from manifest if not specified
+    if version is None:
+        filename = os.path.basename(replay_path)
+        version = get_replay_image_tag(filename)
+        if version:
+            info(f"Using image version '{version}' (from manifest)")
+    image = _image_tag(version)
+    if not image_exists(version):
+        step(f"Image {image} not found locally, pulling...")
+        if not pull_image(version):
+            return False
+    # Determine if this is a local file or a container path.
+    local_file = None
+    container_replay_path = replay_path
+    local_path = Path(replay_path).resolve()
+    if local_path.exists():
+        local_file = str(local_path)
+        container_replay_path = f"/tmp/replay/{local_path.name}"
+    elif replay_path.startswith("/") and is_running():
+        # Container path — copy locally first so we can mount it reliably
+        # (--volumes-from only shares Docker volumes, not the writable layer)
+        filename = os.path.basename(replay_path)
+        LOCAL_REPLAY_DIR.mkdir(parents=True, exist_ok=True)
+        local_dest = LOCAL_REPLAY_DIR / filename
+        cp_result = _run(["docker", "cp", f"{CONTAINER_NAME}:{replay_path}", str(local_dest)])
+        if cp_result.returncode == 0 and local_dest.exists():
+            local_file = str(local_dest)
+            container_replay_path = f"/tmp/replay/{filename}"
+    elif not replay_path.startswith("/"):
+        error(f"Replay file not found: {local_path}")
+        return False
+    step(f"Starting replay viewer on port {port} ({image})...")
+    # Build base docker command
+    base_cmd = [
+        "docker", "run", "-d",
+        "-p", f"{port}:6080",
+        "--name", REPLAY_CONTAINER,
+        "--entrypoint", "/replay-viewer.sh",
+    ]
+    base_cmd.extend(_settings_env_args(settings))
+    if local_file:
+        base_cmd.extend(["-v", f"{local_file}:{container_replay_path}:ro"])
+    elif is_running():
+        base_cmd.extend(["--volumes-from", CONTAINER_NAME])
+    # Try GPU variants in order, fall back to CPU
+    last_stderr = ""
+    for gpu_args in _gpu_docker_args(settings.render_mode, cpu_cores=settings.cpu_cores):
+        is_gpu = "--gpus" in gpu_args or "--device" in gpu_args
+        # Limit CPU for software rendering to prevent runaway usage.
+        # llvmpipe busy-loops without GPU; --cpus caps Docker scheduler.
+        cpu_limit = [] if is_gpu else ["--cpus", str(settings.cpu_cores)]
+        cmd = base_cmd + cpu_limit + gpu_args + [image, container_replay_path]
+        result = _run(cmd)
+        if result.returncode == 0:
+            if is_gpu:
+                gpu_args_str = " ".join(gpu_args)
+                if "--gpus" in gpu_args_str:
+                    info("Rendering mode: GPU (NVIDIA)")
+                elif "/dev/dxg" in gpu_args_str:
+                    info("Rendering mode: GPU (WSL2 DirectX)")
+                elif "/dev/kfd" in gpu_args_str:
+                    info("Rendering mode: GPU (AMD ROCm)")
+                else:
+                    info("Rendering mode: GPU (DRI)")
+            else:
+                info(f"Rendering mode: CPU (software, {settings.cpu_cores} cores)")
+            success("Replay viewer started.")
+            return True
+        last_stderr = result.stderr.strip()
+        # Clean up the failed container before trying next variant
+        _run(["docker", "rm", "-f", REPLAY_CONTAINER])
+    error(f"Failed to start replay viewer: {last_stderr}")
+    return False
+def stop_replay_viewer() -> bool:
+    """Stop and remove the replay viewer container."""
+    if not replay_viewer_exists():
+        info("Replay viewer is not running.")
+        return True
+    step("Stopping replay viewer...")
+    result = _run(["docker", "rm", "-f", REPLAY_CONTAINER])
+    if result.returncode != 0:
+        error(f"Failed to stop replay viewer: {result.stderr.strip()}")
+        return False
+    success("Replay viewer stopped.")
+    return True

openra_env/cli/main.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""CLI entry point for openra-rl."""
+import argparse
+import sys
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="openra-rl",
+        description="Play Red Alert with AI agents",
+    )
+    parser.add_argument(
+        "--version", action="store_true",
+        help="Print version and exit",
+    )
+    subparsers = parser.add_subparsers(dest="command")
+    # ── play ────────────────────────────────────────────────────────
+    play_parser = subparsers.add_parser(
+        "play", help="Run the LLM agent against the game",
+    )
+    play_parser.add_argument(
+        "--provider", choices=["openrouter", "ollama", "lmstudio"],
+        help="LLM provider (overrides saved config)",
+    )
+    play_parser.add_argument("--model", help="Model ID")
+    play_parser.add_argument("--api-key", help="API key for LLM endpoint")
+    play_parser.add_argument(
+        "--difficulty", choices=["easy", "normal", "hard"], default="normal",
+        help="AI opponent difficulty (default: normal)",
+    )
+    play_parser.add_argument("--verbose", action="store_true", help="Verbose output")
+    play_parser.add_argument("--port", type=int, default=8000, help="Game server port (default: 8000)")
+    play_parser.add_argument("--server-url", help="Connect to existing server URL (skip Docker)")
+    play_parser.add_argument("--local", action="store_true", help="Run server locally instead of Docker (for developers)")
+    play_parser.add_argument("--version", dest="image_version", default=None, help="Docker image version to use (default: latest)")
+    # ── config ──────────────────────────────────────────────────────
+    subparsers.add_parser("config", help="Re-run the setup wizard")
+    # ── server ──────────────────────────────────────────────────────
+    server_parser = subparsers.add_parser("server", help="Manage the game server")
+    server_sub = server_parser.add_subparsers(dest="server_command")
+    start_parser = server_sub.add_parser("start", help="Start the game server")
+    start_parser.add_argument("--port", type=int, default=8000, help="Port (default: 8000)")
+    start_parser.add_argument(
+        "--difficulty", choices=["easy", "normal", "hard"], default="normal",
+    )
+    start_parser.add_argument("--detach", action="store_true", default=True, help="Run in background (default)")
+    server_sub.add_parser("stop", help="Stop the game server")
+    server_sub.add_parser("status", help="Show server status")
+    logs_parser = server_sub.add_parser("logs", help="Show server logs")
+    logs_parser.add_argument("--follow", "-f", action="store_true", help="Follow log output")
+    # ── mcp-server ──────────────────────────────────────────────────
+    mcp_parser = subparsers.add_parser("mcp-server", help="Start MCP stdio server")
+    mcp_parser.add_argument("--server-url", help="Game server URL")
+    mcp_parser.add_argument("--port", type=int, default=8000, help="Game server port (default: 8000)")
+    # ── replay ─────────────────────────────────────────────────────
+    replay_parser = subparsers.add_parser("replay", help="Manage and watch game replays")
+    replay_sub = replay_parser.add_subparsers(dest="replay_command")
+    watch_parser = replay_sub.add_parser("watch", help="Watch a replay in your browser (via VNC)")
+    watch_parser.add_argument("file", nargs="?", default=None, help="Replay file (local path or container path; default: latest)")
+    watch_parser.add_argument("--port", type=int, default=6080, help="noVNC port (default: 6080)")
+    watch_parser.add_argument(
+        "--resolution", default=None,
+        help="Replay viewer resolution WxH (default: 1280x960)",
+    )
+    watch_parser.add_argument(
+        "--render", dest="render_mode", choices=["auto", "gpu", "cpu"], default=None,
+        help="Render backend: auto tries GPU then CPU (default: auto)",
+    )
+    watch_parser.add_argument(
+        "--vnc-quality", type=int, default=None,
+        help="VNC quality 0-9, higher = sharper (default: 8)",
+    )
+    watch_parser.add_argument(
+        "--vnc-compression", type=int, default=None,
+        help="VNC compression 0-9, higher = smaller (default: 4)",
+    )
+    watch_parser.add_argument(
+        "--cpus", type=int, default=None,
+        help="CPU cores for software rendering (default: 4, 0 = all available).",
+    )
+    replay_sub.add_parser("list", help="List available replays")
+    replay_sub.add_parser("copy", help="Copy replays from Docker to ~/.openra-rl/replays/")
+    replay_sub.add_parser("stop", help="Stop the replay viewer")
+    # ── bench ─────────────────────────────────────────────────────────
+    bench_parser = subparsers.add_parser("bench", help="Benchmark leaderboard tools")
+    bench_sub = bench_parser.add_subparsers(dest="bench_command")
+    bench_submit_parser = bench_sub.add_parser("submit", help="Upload game result JSON to the leaderboard")
+    bench_submit_parser.add_argument("json_file", type=str, help="Path to bench export JSON file")
+    bench_submit_parser.add_argument("--agent-name", default=None, help="Override agent name")
+    bench_submit_parser.add_argument("--agent-type", default=None, help="Override agent type (Scripted/LLM/RL)")
+    bench_submit_parser.add_argument("--agent-url", default=None, help="GitHub/project URL")
+    bench_submit_parser.add_argument("--replay", default=None, help="Path to .orarep replay file")
+    bench_submit_parser.add_argument(
+        "--bench-url", default=None,
+        help="Bench leaderboard URL (default: https://openra-rl-openra-bench.hf.space)",
+    )
+    # ── doctor ──────────────────────────────────────────────────────
+    subparsers.add_parser("doctor", help="Check system prerequisites")
+    # ── version ─────────────────────────────────────────────────────
+    subparsers.add_parser("version", help="Print version")
+    args = parser.parse_args()
+    # Handle --version at top level
+    if args.version:
+        from openra_env.cli.commands import cmd_version
+        cmd_version()
+        return
+    if args.command is None:
+        parser.print_help()
+        sys.exit(0)
+    # Dispatch
+    from openra_env.cli import commands
+    if args.command == "play":
+        commands.cmd_play(
+            provider=args.provider,
+            model=args.model,
+            api_key=args.api_key,
+            difficulty=args.difficulty,
+            verbose=args.verbose,
+            port=args.port,
+            server_url=args.server_url,
+            local=args.local,
+            image_version=args.image_version,
+        )
+    elif args.command == "config":
+        commands.cmd_config()
+    elif args.command == "server":
+        if args.server_command == "start":
+            commands.cmd_server_start(
+                port=args.port,
+                difficulty=args.difficulty,
+                detach=args.detach,
+            )
+        elif args.server_command == "stop":
+            commands.cmd_server_stop()
+        elif args.server_command == "status":
+            commands.cmd_server_status()
+        elif args.server_command == "logs":
+            commands.cmd_server_logs(follow=args.follow)
+        else:
+            server_parser.print_help()
+    elif args.command == "replay":
+        if args.replay_command == "watch":
+            commands.cmd_replay_watch(
+                file=args.file,
+                port=args.port,
+                resolution=args.resolution,
+                render_mode=args.render_mode,
+                vnc_quality=args.vnc_quality,
+                vnc_compression=args.vnc_compression,
+                cpu_cores=args.cpus,
+            )
+        elif args.replay_command == "list":
+            commands.cmd_replay_list()
+        elif args.replay_command == "copy":
+            commands.cmd_replay_copy()
+        elif args.replay_command == "stop":
+            commands.cmd_replay_stop()
+        else:
+            replay_parser.print_help()
+    elif args.command == "mcp-server":
+        commands.cmd_mcp_server(
+            server_url=args.server_url,
+            port=args.port,
+        )
+    elif args.command == "bench":
+        if args.bench_command == "submit":
+            from openra_env.bench_submit import main as bench_submit_main
+            # Patch sys.argv so bench_submit's argparse sees the right args
+            submit_argv = ["openra-rl bench submit", args.json_file]
+            if args.agent_name:
+                submit_argv += ["--agent-name", args.agent_name]
+            if args.agent_type:
+                submit_argv += ["--agent-type", args.agent_type]
+            if args.agent_url:
+                submit_argv += ["--agent-url", args.agent_url]
+            if args.replay:
+                submit_argv += ["--replay", args.replay]
+            if args.bench_url:
+                submit_argv += ["--bench-url", args.bench_url]
+            sys.argv = submit_argv
+            bench_submit_main()
+        else:
+            bench_parser.print_help()
+    elif args.command == "doctor":
+        commands.cmd_doctor()
+    elif args.command == "version":
+        commands.cmd_version()
+    else:
+        parser.print_help()
+if __name__ == "__main__":
+    main()

openra_env/cli/wizard.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""Interactive first-run setup wizard."""
+from pathlib import Path
+from typing import Optional
+import yaml
+from openra_env.cli.console import dim, error, header, info, success, warn
+CONFIG_DIR = Path.home() / ".openra-rl"
+CONFIG_PATH = CONFIG_DIR / "config.yaml"
+# Provider presets
+PROVIDERS = {
+    "openrouter": {
+        "name": "OpenRouter",
+        "base_url": "https://openrouter.ai/api/v1/chat/completions",
+        "needs_key": True,
+        "key_help": "Get one at https://openrouter.ai/keys",
+        "default_model": "qwen/qwen3-coder-next",
+    },
+    "ollama": {
+        "name": "Ollama",
+        "base_url": "http://localhost:11434/v1/chat/completions",
+        "needs_key": False,
+        "default_model": "qwen3:32b",
+    },
+    "lmstudio": {
+        "name": "LM Studio",
+        "base_url": "http://localhost:1234/v1/chat/completions",
+        "needs_key": False,
+        "default_model": "",
+        "models": [],
+    },
+}
+def _prompt(question: str, default: str = "") -> str:
+    """Prompt user for input with optional default."""
+    if default:
+        raw = input(f"  {question} [{default}]: ").strip()
+        return raw or default
+    else:
+        while True:
+            raw = input(f"  {question}: ").strip()
+            if raw:
+                return raw
+            error("Please enter a value.")
+def _choose(question: str, options: list[tuple[str, str]], allow_custom: bool = False) -> str:
+    """Present numbered options and get user choice."""
+    print(f"\n  {question}")
+    for i, (value, label) in enumerate(options, 1):
+        print(f"    [{i}] {label}")
+    if allow_custom:
+        print(f"    [{len(options) + 1}] Enter custom value")
+    max_choice = len(options) + (1 if allow_custom else 0)
+    while True:
+        raw = input("  > ").strip()
+        try:
+            idx = int(raw)
+            if 1 <= idx <= len(options):
+                return options[idx - 1][0]
+            if allow_custom and idx == max_choice:
+                return _prompt("Enter value")
+        except ValueError:
+            # Allow typing the value directly
+            if raw:
+                return raw
+        error(f"Please enter a number 1-{max_choice}.")
+def has_saved_config() -> bool:
+    """Check if a saved config exists."""
+    return CONFIG_PATH.exists()
+def load_saved_config() -> Optional[dict]:
+    """Load saved config if it exists."""
+    if not CONFIG_PATH.exists():
+        return None
+    try:
+        with open(CONFIG_PATH, encoding="utf-8") as f:
+            return yaml.safe_load(f) or {}
+    except Exception:
+        return None
+def save_config(config: dict) -> None:
+    """Save config to ~/.openra-rl/config.yaml."""
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    with open(CONFIG_PATH, "w", encoding="utf-8") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+    success(f"Config saved to {CONFIG_PATH}")
+def run_wizard() -> dict:
+    """Run the interactive setup wizard. Returns a config dict."""
+    header("Welcome to OpenRA-RL!")
+    info("Let's set up your LLM provider.\n")
+    # Choose provider
+    provider_key = _choose(
+        "Choose provider:",
+        [
+            ("openrouter", "OpenRouter (cloud — Claude, GPT, Qwen, Mistral, etc.)"),
+            ("ollama", "Ollama (local, free)"),
+            ("lmstudio", "LM Studio (local, free)"),
+        ],
+    )
+    provider = PROVIDERS.get(provider_key, PROVIDERS["openrouter"])
+    config: dict = {"provider": provider_key, "llm": {"base_url": provider["base_url"]}}
+    # API key (if needed)
+    if provider.get("needs_key"):
+        print()
+        api_key = _prompt(f"Enter your {provider['name']} API key ({provider.get('key_help', '')})")
+        config["llm"]["api_key"] = api_key
+    # Model selection
+    if provider.get("models"):
+        model = _choose(
+            "Choose a model:",
+            [(m, label) for m, label in provider["models"]],
+            allow_custom=True,
+        )
+    else:
+        model = _prompt("Enter model ID", default=provider.get("default_model", ""))
+    config["llm"]["model"] = model
+    # Ollama: warn about context window
+    if provider_key == "ollama":
+        print()
+        warn("Tip: If you see truncation errors, increase the context window:")
+        dim(f"  ollama create {model}-32k --from {model} --parameter num_ctx 32768")
+    print()
+    save_config(config)
+    dim("Run `openra-rl config` to change these settings later.\n")
+    return config
+def merge_cli_into_config(
+    config: dict,
+    provider: Optional[str] = None,
+    model: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> dict:
+    """Apply CLI flag overrides onto a config dict."""
+    if provider and provider in PROVIDERS:
+        p = PROVIDERS[provider]
+        config.setdefault("llm", {})["base_url"] = p["base_url"]
+        config["provider"] = provider
+    if model:
+        config.setdefault("llm", {})["model"] = model
+    if api_key:
+        config.setdefault("llm", {})["api_key"] = api_key
+    return config

openra_env/client.py ADDED Viewed

	@@ -0,0 +1,113 @@

+"""OpenRA-RL environment client.
+Provides the EnvClient subclass for connecting to the OpenRA-RL
+environment server over WebSocket.
+"""
+import os
+from typing import Any, Dict
+from openenv.core.client_types import StepResult
+from openenv.core.env_client import EnvClient
+from websockets.asyncio.client import connect as ws_connect
+from openra_env.models import (
+    BuildingInfoModel,
+    EconomyInfo,
+    MapInfoModel,
+    MilitaryInfo,
+    OpenRAAction,
+    OpenRAObservation,
+    OpenRAState,
+    ProductionInfoModel,
+    UnitInfoModel,
+)
+class OpenRAEnv(EnvClient[OpenRAAction, OpenRAObservation, OpenRAState]):
+    """WebSocket client for the OpenRA-RL environment.
+    Usage:
+        async with OpenRAEnv(base_url="http://localhost:8000") as env:
+            result = await env.reset()
+            while not result.done:
+                action = OpenRAAction(commands=[...])
+                result = await env.step(action)
+    """
+    async def connect(self) -> "OpenRAEnv":
+        """Connect with ping keepalive disabled.
+        OpenRA operations (especially reset) can take 60-120+ seconds
+        with software rendering. The default websockets ping_interval=20s
+        would kill the connection before the server responds.
+        """
+        if self._ws is not None:
+            return self
+        ws_url_lower = self._ws_url.lower()
+        is_localhost = "localhost" in ws_url_lower or "127.0.0.1" in ws_url_lower
+        old_no_proxy = os.environ.get("NO_PROXY")
+        if is_localhost:
+            current_no_proxy = old_no_proxy or ""
+            if "localhost" not in current_no_proxy.lower():
+                os.environ["NO_PROXY"] = (
+                    f"{current_no_proxy},localhost,127.0.0.1"
+                    if current_no_proxy
+                    else "localhost,127.0.0.1"
+                )
+        try:
+            self._ws = await ws_connect(
+                self._ws_url,
+                open_timeout=self._connect_timeout,
+                max_size=self._max_message_size,
+                ping_interval=None,
+            )
+        except Exception as e:
+            raise ConnectionError(f"Failed to connect to {self._ws_url}: {e}") from e
+        finally:
+            if is_localhost:
+                if old_no_proxy is None:
+                    os.environ.pop("NO_PROXY", None)
+                else:
+                    os.environ["NO_PROXY"] = old_no_proxy
+        return self
+    def _step_payload(self, action: OpenRAAction) -> Dict[str, Any]:
+        """Convert action to JSON for WebSocket transport."""
+        return action.model_dump()
+    def _parse_result(self, data: Dict[str, Any]) -> StepResult[OpenRAObservation]:
+        """Parse server response into StepResult."""
+        obs_data = data.get("observation", data)
+        observation = OpenRAObservation(
+            tick=obs_data.get("tick", 0),
+            economy=EconomyInfo(**obs_data.get("economy", {})),
+            military=MilitaryInfo(**obs_data.get("military", {})),
+            units=[UnitInfoModel(**u) for u in obs_data.get("units", [])],
+            buildings=[BuildingInfoModel(**b) for b in obs_data.get("buildings", [])],
+            production=[ProductionInfoModel(**p) for p in obs_data.get("production", [])],
+            visible_enemies=[UnitInfoModel(**u) for u in obs_data.get("visible_enemies", [])],
+            visible_enemy_buildings=[BuildingInfoModel(**b) for b in obs_data.get("visible_enemy_buildings", [])],
+            map_info=MapInfoModel(**obs_data.get("map_info", {})),
+            available_production=obs_data.get("available_production", []),
+            done=obs_data.get("done", False),
+            reward=obs_data.get("reward"),
+            result=obs_data.get("result", ""),
+            spatial_map=obs_data.get("spatial_map", ""),
+            spatial_channels=obs_data.get("spatial_channels", 0),
+        )
+        return StepResult(
+            observation=observation,
+            reward=data.get("reward", obs_data.get("reward")),
+            done=data.get("done", obs_data.get("done", False)),
+        )
+    def _parse_state(self, data: Dict[str, Any]) -> OpenRAState:
+        """Parse state response into OpenRAState."""
+        return OpenRAState(**data)

openra_env/config.py ADDED Viewed

	@@ -0,0 +1,535 @@

+"""Unified configuration for OpenRA-RL.
+Provides a single YAML-based configuration system with Pydantic validation.
+Supports multiple override layers:
+  env vars > constructor overrides > config file > built-in defaults
+Usage:
+    from openra_env.config import load_config
+    config = load_config()                         # auto-find config.yaml
+    config = load_config("path/to/config.yaml")    # explicit path
+    config = load_config(game={"mod": "cnc"})      # with overrides
+"""
+import os
+from pathlib import Path
+from typing import Optional
+import yaml
+from pydantic import BaseModel, Field, model_validator
+# ── Pydantic Config Models ────────────────────────────────────────────
+class GameConfig(BaseModel):
+    openra_path: str = "/opt/openra"
+    mod: str = "ra"
+    map_name: str = "singles.oramap"
+    grpc_port: int = 9999
+    headless: bool = True
+    record_replays: bool = False
+    seed: Optional[int] = None
+    max_ticks: int = 0  # 0 = unlimited
+    max_wall_time_s: int = 0  # 0 = unlimited
+class OpponentConfig(BaseModel):
+    # bot_type: difficulty tiers (beginner/easy/medium/hard/brutal)
+    # or raw OpenRA play styles (rush/normal/turtle/naval)
+    # ai_slot: player slot for AI; set to "" to disable enemy spawning
+    bot_type: str = "easy"
+    ai_slot: str = "Multi0"
+class PlanningConfig(BaseModel):
+    enabled: bool = True
+    max_turns: int = 10
+    max_time_s: float = 60.0
+class RewardConfig(BaseModel):
+    survival: float = 0.001
+    economic_efficiency: float = 0.01
+    aggression: float = 0.1
+    defense: float = 0.05
+    victory: float = 1.0
+    defeat: float = -1.0
+class RewardVectorConfig(BaseModel):
+    """Configuration for the multi-dimensional reward vector.
+    When enabled, each step returns an 8-dimensional reward vector
+    (combat, economy, infrastructure, intelligence, composition,
+    tempo, disruption, outcome) alongside the scalar reward.
+    """
+    enabled: bool = True  # 8-dimensional skill signal (combat, economy, etc.)
+    weights: dict[str, float] = Field(default_factory=lambda: {
+        "combat": 0.30,
+        "economy": 0.15,
+        "infrastructure": 0.10,
+        "intelligence": 0.10,
+        "composition": 0.10,
+        "tempo": 0.10,
+        "disruption": 0.15,
+        "outcome": 1.00,
+    })
+class ToolCategoriesConfig(BaseModel):
+    read: bool = True
+    knowledge: bool = True
+    bulk_knowledge: bool = True
+    planning: bool = True
+    game_control: bool = True
+    movement: bool = True
+    production: bool = True
+    building_actions: bool = True
+    placement: bool = True
+    unit_groups: bool = True
+    compound: bool = True
+    utility: bool = True
+    terrain: bool = True
+class ToolsConfig(BaseModel):
+    categories: ToolCategoriesConfig = Field(default_factory=ToolCategoriesConfig)
+    disabled: list[str] = Field(default_factory=list)
+class AlertsConfig(BaseModel):
+    under_attack: bool = True
+    damaged_building: bool = True
+    low_power: bool = True
+    idle_funds: bool = True
+    ore_full: bool = True
+    idle_production: bool = True
+    production_stalled: bool = True
+    building_ready: bool = True
+    stance_warning: bool = True
+    idle_army: bool = True
+    no_defenses: bool = True
+    no_scouting: bool = True
+    loss_tracking: bool = True
+    minimap: bool = True  # Show ASCII minimap in turn briefing
+    max_alerts: int = 0  # 0 = unlimited; set >0 to cap alerts per turn
+class LLMConfig(BaseModel):
+    base_url: str = "https://openrouter.ai/api/v1/chat/completions"
+    api_key: str = ""
+    model: str = "qwen/qwen3-coder-next"
+    max_tokens: int = 1500
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    keep_last_messages: int = 40
+    compression_strategy: str = "sliding_window"  # "sliding_window" or "none"
+    compression_trigger: int = 0  # 0 = keep_last_messages * 2
+    max_retries: int = 4
+    retry_backoff_s: int = 10
+    request_timeout_s: float = 120.0
+    reasoning_effort: Optional[str] = None  # "none", "low", "medium", "high"
+    extra_headers: dict[str, str] = Field(
+        default_factory=lambda: {
+            "HTTP-Referer": "https://github.com/openra-rl",
+            "X-Title": "OpenRA-RL Agent",
+        }
+    )
+class AgentConfig(BaseModel):
+    server_url: str = "http://localhost:8000"
+    max_turns: int = 0  # 0 = unlimited
+    max_time_s: int = 1800
+    verbose: bool = False
+    log_file: str = ""
+    agent_name: str = ""  # Display name on leaderboard; empty = model name
+    agent_type: str = ""  # Scripted/LLM/RL; empty = auto-detect
+    agent_url: str = ""  # GitHub/project URL shown on leaderboard
+    bench_upload: bool = True  # Auto-upload results to bench after each game
+    bench_url: str = "https://openra-rl-openra-bench.hf.space"
+    system_prompt: str = ""  # deprecated — use prompts.system_prompt
+    system_prompt_file: str = ""  # deprecated — use prompts.system_prompt_file
+class AlertPromptsConfig(BaseModel):
+    """Templates for in-game alert messages.
+    All templates use Python str.format() placeholders (e.g. {balance}).
+    """
+    under_attack: str = "UNDER ATTACK: enemy {type} id={id} near base"
+    under_attack_mass: str = "UNDER ATTACK: {count} enemies near base ({breakdown})"
+    damaged: str = "DAMAGED: {type} id={id} at {hp} HP"
+    low_power: str = "LOW POWER: {balance} — production runs at 1/3 speed"
+    power_tight: str = "POWER TIGHT: {balance} surplus — next building may cause low power"
+    idle_funds: str = "IDLE FUNDS: ${funds} available, {harvesters} harvester(s)"
+    ore_full: str = "ORE FULL: {ore}/{cap} storage — income is being lost"
+    idle_production: str = "IDLE PRODUCTION: no active production queue"
+    stalled: str = "STALLED: {item}@{progress} — $0 funds, production paused"
+    building_stuck: str = "BUILDING STUCK: {building} — auto-placement failing"
+    ready_to_place: str = "READY TO PLACE: {building} — completed, awaiting placement"
+    stance: str = "STANCE: {count} combat unit(s) on ReturnFire (only fire when fired upon)"
+    idle_army: str = "IDLE ARMY: {count} combat units idle"
+    no_defenses: str = "NO DEFENSES: no defense structures built"
+    no_scouting: str = (
+        "NO SCOUTING: enemy not found — {explored} of map explored, "
+        "{idle} idle combat units available"
+    )
+class CompressionConfig(BaseModel):
+    """Controls what context is preserved in history compression summaries."""
+    include_strategy: bool = True  # Preserve planning strategy
+    include_military: bool = True  # Include kill/death counts
+    include_production: bool = True  # Track what was produced
+class PromptsConfig(BaseModel):
+    """All LLM-facing text, configurable for customization.
+    Templates use Python str.format() placeholders. Override individual
+    fields in config.yaml, or point prompts_file to a YAML with all prompts.
+    """
+    # ── System prompt ────────────────────────────────────────────────
+    system_prompt: str = ""  # inline override (highest priority)
+    system_prompt_file: str = ""  # path to .txt file override
+    prompts_file: str = ""  # path to YAML with all prompts below
+    # ── Planning phase ───────────────────────────────────────────────
+    # Variables: {max_turns}, {map_name}, {map_width}, {map_height},
+    #   {base_x}, {base_y}, {enemy_x}, {enemy_y}, {faction}, {side},
+    #   {opponent_summary}, {planning_nudge}
+    planning_prompt: str = (
+        "## PRE-GAME PLANNING PHASE\n"
+        "You have {max_turns} turns to plan.\n\n"
+        "### Map Intel\n"
+        "Map: {map_name} ({map_width}x{map_height})\n"
+        "Your base: ({base_x}, {base_y})\n"
+        "Enemy estimated: ({enemy_x}, {enemy_y})\n"
+        "Your faction: {faction} ({side})\n\n"
+        "### Opponent Intelligence\n{opponent_summary}\n\n"
+        "{planning_nudge}"
+    )
+    planning_nudge: str = "Call end_planning_phase(strategy='...') when ready to start."
+    planning_instructions: str = (
+        "Planning phase active. Available tools: get_faction_briefing "
+        "(all unit/building stats), get_map_analysis (terrain/resources), "
+        "get_opponent_intel (enemy profile), batch_lookup (multi-item queries). "
+        "Call end_planning_phase(strategy=...) to begin gameplay."
+    )
+    planning_complete: str = "Planning complete. Game is now live."
+    # ── Game start ───────────────────────────────────────────────────
+    # Variables: {strategy_section}, {briefing}, {barracks_type}, {mcv_note}
+    game_start: str = (
+        "Game started!{strategy_section}\n\n{briefing}\n\n"
+        "Your barracks type is '{barracks_type}'.{mcv_note}"
+    )
+    # ── Agent nudges ─────────────────────────────────────────────────
+    no_tool_nudge: str = "No tool was called. A tool call is required each turn."
+    continue_nudge: str = "The game is still in progress."
+    compression_suffix: str = "Game continues from current state."
+    sanitize_bridge: str = "Acknowledged. Continuing."
+    # ── Tool warnings ────────────────────────────────────────────────
+    # Variables: {building}, {drain}, {balance}
+    power_warning: str = (
+        "POWER WARNING: {building} drains {drain} power. "
+        "Balance will be {balance}."
+    )
+    # Variables: {available}, {item}, {cost}
+    insufficient_funds: str = (
+        "Insufficient funds: ${available} available, "
+        "{item} costs ${cost}."
+    )
+    # ── Placement feedback ───────────────────────────────────────────
+    placement_success: str = "AUTO-PLACED: {building}"
+    placement_failed: str = "PLACEMENT FAILED: {building} — {reason}. Auto-cancelling."
+    placement_water: str = "WATER BUILDING: {building} requires water tiles for placement."
+    # ── Build confirmations ───────────────────────────────────────────
+    # Variables: {building}, {cost}, {ticks}, {seconds}
+    build_queued: str = (
+        "'{building}' (${cost}) queued, auto-places on completion. "
+        "~{ticks} ticks (~{seconds}s)."
+    )
+    build_structure_queued: str = (
+        "'{building}' (${cost}) queued. ~{ticks} ticks (~{seconds}s) to complete."
+    )
+    # Variables: {count}, {unit}, {cost}, {ticks_each}, {ticks_total}, {seconds_total}
+    build_unit_queued: str = (
+        "{count}x '{unit}' (${cost} each) queued. "
+        "~{ticks_each} ticks per unit, ~{ticks_total} ticks (~{seconds_total}s) total."
+    )
+    # ── Build guards ──────────────────────────────────────────────────
+    # Variables: {building}
+    build_already_pending: str = "'{building}' is already queued and pending auto-placement."
+    place_auto_managed: str = (
+        "'{building}' is queued via build_and_place — placement is automatic."
+    )
+    # ── Movement feedback ────────────────────────────────────────────
+    # Variables: {ticks}, {seconds}
+    move_eta: str = "Units moving. Slowest arrives in ~{ticks} ticks (~{seconds}s)."
+    # ── Compression ──────────────────────────────────────────────────
+    compression: CompressionConfig = Field(default_factory=CompressionConfig)
+    # ── Alerts ───────────────────────────────────────────────────────
+    alerts: AlertPromptsConfig = Field(default_factory=AlertPromptsConfig)
+class OpenRARLConfig(BaseModel):
+    """Root configuration for the OpenRA-RL system."""
+    game: GameConfig = Field(default_factory=GameConfig)
+    opponent: OpponentConfig = Field(default_factory=OpponentConfig)
+    planning: PlanningConfig = Field(default_factory=PlanningConfig)
+    reward: RewardConfig = Field(default_factory=RewardConfig)
+    reward_vector: RewardVectorConfig = Field(default_factory=RewardVectorConfig)
+    tools: ToolsConfig = Field(default_factory=ToolsConfig)
+    alerts: AlertsConfig = Field(default_factory=AlertsConfig)
+    llm: LLMConfig = Field(default_factory=LLMConfig)
+    agent: AgentConfig = Field(default_factory=AgentConfig)
+    prompts: PromptsConfig = Field(default_factory=PromptsConfig)
+    @model_validator(mode="after")
+    def sync_planning_tools(self) -> "OpenRARLConfig":
+        """Auto-disable planning tools when planning is disabled."""
+        if not self.planning.enabled:
+            self.tools.categories.planning = False
+        return self
+    @model_validator(mode="after")
+    def migrate_system_prompt(self) -> "OpenRARLConfig":
+        """Backward compat: copy agent.system_prompt* to prompts.* if prompts.* empty."""
+        if not self.prompts.system_prompt and self.agent.system_prompt:
+            self.prompts.system_prompt = self.agent.system_prompt
+        if not self.prompts.system_prompt_file and self.agent.system_prompt_file:
+            self.prompts.system_prompt_file = self.agent.system_prompt_file
+        return self
+# ── Tool Category Mapping ─────────────────────────────────────────────
+TOOL_CATEGORIES: dict[str, str] = {
+    # Read
+    "get_game_state": "read",
+    "get_economy": "read",
+    "get_units": "read",
+    "get_buildings": "read",
+    "get_enemies": "read",
+    "get_production": "read",
+    "get_map_info": "read",
+    "get_exploration_status": "read",
+    # Knowledge
+    "lookup_unit": "knowledge",
+    "lookup_building": "knowledge",
+    "lookup_tech_tree": "knowledge",
+    "lookup_faction": "knowledge",
+    # Bulk Knowledge
+    "get_faction_briefing": "bulk_knowledge",
+    "get_map_analysis": "bulk_knowledge",
+    "batch_lookup": "bulk_knowledge",
+    # Planning
+    "get_opponent_intel": "planning",
+    "start_planning_phase": "planning",
+    "end_planning_phase": "planning",
+    "get_planning_status": "planning",
+    # Game Control
+    "advance": "game_control",
+    # Movement
+    "move_units": "movement",
+    "attack_move": "movement",
+    "attack_target": "movement",
+    "stop_units": "movement",
+    # Production
+    "build_unit": "production",
+    "build_structure": "production",
+    "build_and_place": "production",
+    # Building/Unit Actions
+    "place_building": "building_actions",
+    "cancel_production": "building_actions",
+    "deploy_unit": "building_actions",
+    "sell_building": "building_actions",
+    "repair_building": "building_actions",
+    "set_rally_point": "building_actions",
+    "guard_target": "building_actions",
+    "set_stance": "building_actions",
+    "harvest": "building_actions",
+    "power_down": "building_actions",
+    "set_primary": "building_actions",
+    # Placement
+    "get_valid_placements": "placement",
+    # Unit Groups
+    "assign_group": "unit_groups",
+    "add_to_group": "unit_groups",
+    "get_groups": "unit_groups",
+    "command_group": "unit_groups",
+    # Compound
+    "batch": "compound",
+    "plan": "compound",
+    # Utility
+    "get_replay_path": "utility",
+    "surrender": "utility",
+    # Terrain
+    "get_terrain_at": "terrain",
+}
+# ── Env Var Mapping ───────────────────────────────────────────────────
+# Ordered so that more-specific vars (LLM_*) overwrite less-specific (OPENROUTER_*)
+_ENV_VAR_MAP: list[tuple[str, str]] = [
+    # game
+    ("OPENRA_PATH", "game.openra_path"),
+    ("RECORD_REPLAYS", "game.record_replays"),
+    # opponent
+    ("BOT_TYPE", "opponent.bot_type"),
+    ("AI_SLOT", "opponent.ai_slot"),
+    # planning
+    ("PLANNING_ENABLED", "planning.enabled"),
+    ("PLANNING_MAX_TURNS", "planning.max_turns"),
+    ("PLANNING_MAX_TIME", "planning.max_time_s"),
+    # llm — legacy OpenRouter names first, then generic LLM_ names (override)
+    ("OPENROUTER_API_KEY", "llm.api_key"),
+    ("OPENROUTER_MODEL", "llm.model"),
+    ("LLM_BASE_URL", "llm.base_url"),
+    ("LLM_API_KEY", "llm.api_key"),
+    ("LLM_MODEL", "llm.model"),
+    # agent
+    ("OPENRA_URL", "agent.server_url"),
+    ("MAX_TIME", "agent.max_time_s"),
+    ("LLM_AGENT_LOG", "agent.log_file"),
+    ("AGENT_NAME", "agent.agent_name"),
+    ("AGENT_TYPE", "agent.agent_type"),
+    ("AGENT_URL", "agent.agent_url"),
+    ("BENCH_UPLOAD", "agent.bench_upload"),
+    ("BENCH_URL", "agent.bench_url"),
+    ("SYSTEM_PROMPT_FILE", "agent.system_prompt_file"),
+    # prompts
+    ("SYSTEM_PROMPT_FILE", "prompts.system_prompt_file"),  # also maps to prompts.*
+    ("PROMPTS_FILE", "prompts.prompts_file"),
+]
+# ── Helper Functions ──────────────────────────────────────────────────
+def _deep_merge(base: dict, override: dict) -> None:
+    """Recursively merge *override* into *base* in place."""
+    for key, value in override.items():
+        if key in base and isinstance(base[key], dict) and isinstance(value, dict):
+            _deep_merge(base[key], value)
+        else:
+            base[key] = value
+def _set_nested(d: dict, path: str, value: object) -> None:
+    """Set a value in a nested dict via dotted path (e.g. ``'game.mod'``)."""
+    keys = path.split(".")
+    for key in keys[:-1]:
+        d = d.setdefault(key, {})
+    d[keys[-1]] = value
+def _coerce_value(value: str) -> object:
+    """Coerce a string env-var value to bool / int / float / str."""
+    lower = value.lower()
+    if lower in ("true", "1", "yes"):
+        return True
+    if lower in ("false", "0", "no"):
+        return False
+    try:
+        return int(value)
+    except ValueError:
+        pass
+    try:
+        return float(value)
+    except ValueError:
+        pass
+    return value
+def should_register_tool(tool_name: str, tools_config: ToolsConfig) -> bool:
+    """Return True if *tool_name* should be registered based on config."""
+    if tool_name in tools_config.disabled:
+        return False
+    category = TOOL_CATEGORIES.get(tool_name)
+    if category is not None:
+        return getattr(tools_config.categories, category, True)
+    return True  # unknown tools default to enabled
+# ── Config Loading ────────────────────────────────────────────────────
+def load_config(
+    config_path: Optional[str] = None,
+    cli_overrides: Optional[dict] = None,
+    **overrides: object,
+) -> OpenRARLConfig:
+    """Load configuration with precedence: CLI > env vars > overrides > file > defaults.
+    Parameters
+    ----------
+    config_path:
+        Explicit path to a YAML config file. When ``None``, searches for
+        ``config.yaml`` in the current working directory and the project root.
+    cli_overrides:
+        Dict of overrides from explicit CLI flags. Applied last (highest
+        priority), beating even environment variables. Use this for values
+        the user typed on the command line.
+    **overrides:
+        Keyword arguments that are deep-merged on top of the file values.
+        Keys should be top-level section names (e.g. ``game={...}``).
+    """
+    config_dict: dict = {}
+    # 1. Load YAML file
+    resolved_path = _resolve_config_path(config_path)
+    if resolved_path is not None:
+        with open(resolved_path, encoding="utf-8") as f:
+            file_dict = yaml.safe_load(f) or {}
+        _deep_merge(config_dict, file_dict)
+    # 2. Apply programmatic overrides (e.g. constructor args)
+    if overrides:
+        _deep_merge(config_dict, overrides)
+    # 3. Apply environment variable overrides
+    for env_var, dotted_path in _ENV_VAR_MAP:
+        value = os.environ.get(env_var)
+        if value is not None:
+            _set_nested(config_dict, dotted_path, _coerce_value(value))
+    # 4. Apply CLI overrides (highest priority — explicit user intent)
+    if cli_overrides:
+        _deep_merge(config_dict, cli_overrides)
+    # 5. Validate and return
+    return OpenRARLConfig(**config_dict)
+def _resolve_config_path(config_path: Optional[str]) -> Optional[str]:
+    """Find the config file to load, or None if none exists."""
+    if config_path is not None:
+        p = Path(config_path)
+        return str(p) if p.exists() else None
+    # Auto-discover: CWD first, then project root
+    candidates = [
+        Path.cwd() / "config.yaml",
+        Path(__file__).resolve().parent.parent / "config.yaml",
+    ]
+    for candidate in candidates:
+        if candidate.exists():
+            return str(candidate)
+    return None

openra_env/game_data.py ADDED Viewed

	@@ -0,0 +1,984 @@

+"""Static Red Alert mod data for game knowledge tools.
+Provides unit stats, building stats, tech tree, and faction information
+extracted from OpenRA Red Alert mod rules. This gives an LLM agent the same
+reference knowledge a human player would have from experience.
+"""
+from typing import Optional
+# ─── Unit Data ────────────────────────────────────────────────────────────────
+RA_UNITS: dict[str, dict] = {
+    # Infantry
+    "e1": {
+        "name": "Rifle Infantry",
+        "category": "infantry",
+        "cost": 100,
+        "hp": 5000,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Basic infantry unit. Cheap and fast to produce.",
+    },
+    "e2": {
+        "name": "Grenadier",
+        "category": "infantry",
+        "cost": 150,
+        "hp": 5000,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Anti-structure infantry. Grenades deal area damage.",
+    },
+    "e3": {
+        "name": "Rocket Soldier",
+        "category": "infantry",
+        "cost": 300,
+        "hp": 4500,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Anti-armor and anti-air infantry.",
+    },
+    "e4": {
+        "name": "Flamethrower",
+        "category": "infantry",
+        "cost": 300,
+        "hp": 4000,
+        "speed": 56,
+        "armor": "none",
+        "side": "soviet",
+        "prerequisites": ["barr", "ftur"],
+        "description": "Short-range anti-infantry/structure. Soviet only.",
+    },
+    "e6": {
+        "name": "Engineer",
+        "category": "infantry",
+        "cost": 400,
+        "hp": 4000,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Captures enemy buildings. Cannot attack.",
+    },
+    "e7": {
+        "name": "Tanya",
+        "category": "infantry",
+        "cost": 1800,
+        "hp": 10000,
+        "speed": 68,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "atek"],
+        "build_limit": 1,
+        "description": "Elite commando. Destroys buildings with C4, kills infantry instantly. Allied only.",
+    },
+    "medi": {
+        "name": "Medic",
+        "category": "infantry",
+        "cost": 200,
+        "hp": 6000,
+        "speed": 49,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent"],
+        "description": "Heals nearby infantry. Cannot attack.",
+    },
+    "mech": {
+        "name": "Mechanic",
+        "category": "infantry",
+        "cost": 500,
+        "hp": 8000,
+        "speed": 49,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "fix"],
+        "description": "Repairs nearby vehicles. Cannot attack.",
+    },
+    "spy": {
+        "name": "Spy",
+        "category": "infantry",
+        "cost": 500,
+        "hp": 2500,
+        "speed": 56,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "dome"],
+        "description": "Disguises as enemy infantry. Infiltrates buildings for bonuses.",
+    },
+    "thf": {
+        "name": "Thief",
+        "category": "infantry",
+        "cost": 500,
+        "hp": 5000,
+        "speed": 68,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "dome"],
+        "description": "Steals credits from enemy refineries.",
+    },
+    "shok": {
+        "name": "Shock Trooper",
+        "category": "infantry",
+        "cost": 350,
+        "hp": 5000,
+        "speed": 49,
+        "armor": "none",
+        "side": "soviet",
+        "prerequisites": ["barr", "stek", "tsla"],
+        "description": "Tesla infantry. High damage vs all targets. Soviet only.",
+    },
+    "dog": {
+        "name": "Attack Dog",
+        "category": "infantry",
+        "cost": 200,
+        "hp": 2000,
+        "speed": 99,
+        "armor": "none",
+        "side": "soviet",
+        "prerequisites": ["kenn"],
+        "description": "Fast anti-infantry unit. Kills spies. Soviet only.",
+    },
+    # Vehicles
+    "1tnk": {
+        "name": "Light Tank",
+        "category": "vehicle",
+        "cost": 700,
+        "hp": 23000,
+        "speed": 113,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap"],
+        "description": "Fast medium tank. Good all-around. Allied only.",
+    },
+    "2tnk": {
+        "name": "Medium Tank",
+        "category": "vehicle",
+        "cost": 850,
+        "hp": 30000,
+        "speed": 72,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap", "fix"],
+        "description": "Main battle tank. Balanced stats. Allied only. Requires Repair Facility.",
+    },
+    "3tnk": {
+        "name": "Heavy Tank",
+        "category": "vehicle",
+        "cost": 1150,
+        "hp": 46000,
+        "speed": 64,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap", "fix"],
+        "description": "Powerful main battle tank. Dual cannons. Soviet only. Requires Repair Facility.",
+    },
+    "4tnk": {
+        "name": "Mammoth Tank",
+        "category": "vehicle",
+        "cost": 2000,
+        "hp": 60000,
+        "speed": 43,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap", "fix", "stek"],
+        "description": "Heaviest tank. Dual cannons + missiles. Self-healing. Soviet only.",
+    },
+    "v2rl": {
+        "name": "V2 Rocket Launcher",
+        "category": "vehicle",
+        "cost": 900,
+        "hp": 15000,
+        "speed": 72,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap", "dome"],
+        "description": "Long-range artillery. High damage, inaccurate. Soviet only.",
+    },
+    "jeep": {
+        "name": "Ranger",
+        "category": "vehicle",
+        "cost": 500,
+        "hp": 15000,
+        "speed": 164,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap"],
+        "description": "Fast scout vehicle with machine gun. Allied only.",
+    },
+    "apc": {
+        "name": "APC",
+        "category": "vehicle",
+        "cost": 850,
+        "hp": 20000,
+        "speed": 128,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap"],
+        "description": "Armored troop transport. Carries 5 infantry. Soviet only.",
+    },
+    "arty": {
+        "name": "Artillery",
+        "category": "vehicle",
+        "cost": 850,
+        "hp": 7500,
+        "speed": 54,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap", "dome"],
+        "description": "Long-range siege weapon. Allied only.",
+    },
+    "harv": {
+        "name": "Ore Truck",
+        "category": "vehicle",
+        "cost": 1100,
+        "hp": 60000,
+        "speed": 72,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["proc"],
+        "description": "Harvests ore and delivers to refinery. Free with refinery.",
+    },
+    "mcv": {
+        "name": "MCV",
+        "category": "vehicle",
+        "cost": 2000,
+        "hp": 60000,
+        "speed": 60,
+        "armor": "light",
+        "side": "both",
+        "prerequisites": ["weap", "fix"],
+        "description": "Deploys into Construction Yard. Mobile base.",
+    },
+    "ftrk": {
+        "name": "Flak Truck",
+        "category": "vehicle",
+        "cost": 600,
+        "hp": 15000,
+        "speed": 113,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap"],
+        "description": "Mobile anti-air unit. Soviet only.",
+    },
+    "mnly": {
+        "name": "Minelayer",
+        "category": "vehicle",
+        "cost": 800,
+        "hp": 30000,
+        "speed": 113,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["weap", "fix"],
+        "description": "Lays anti-tank mines.",
+    },
+    "ttnk": {
+        "name": "Tesla Tank",
+        "category": "vehicle",
+        "cost": 1350,
+        "hp": 30000,
+        "speed": 92,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap", "stek", "tsla"],
+        "description": "Tesla weapon on tracks. Effective vs all targets. Soviet only.",
+    },
+    "ctnk": {
+        "name": "Chrono Tank",
+        "category": "vehicle",
+        "cost": 1350,
+        "hp": 20000,
+        "speed": 86,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Teleporting tank. Hit and run tactics. Allied only.",
+    },
+    "stnk": {
+        "name": "Phase Transport",
+        "category": "vehicle",
+        "cost": 1000,
+        "hp": 11000,
+        "speed": 128,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Cloaked APC. Invisible when not firing. Allied only.",
+    },
+    "qtnk": {
+        "name": "MAD Tank",
+        "category": "vehicle",
+        "cost": 2000,
+        "hp": 22000,
+        "speed": 46,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap", "stek"],
+        "description": "Deploys seismic charge, destroying self and nearby vehicles. Soviet only.",
+    },
+    "dtrk": {
+        "name": "Demolition Truck",
+        "category": "vehicle",
+        "cost": 2500,
+        "hp": 11000,
+        "speed": 113,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap", "stek"],
+        "description": "Suicide vehicle. Massive area nuclear explosion on death. Soviet only.",
+    },
+    "mgg": {
+        "name": "Mobile Gap Generator",
+        "category": "vehicle",
+        "cost": 1000,
+        "hp": 11000,
+        "speed": 72,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Creates mobile shroud area. Allied only.",
+    },
+    "mrj": {
+        "name": "Mobile Radar Jammer",
+        "category": "vehicle",
+        "cost": 1000,
+        "hp": 11000,
+        "speed": 68,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Jams enemy radar in area. Allied only.",
+    },
+    "truk": {
+        "name": "Supply Truck",
+        "category": "vehicle",
+        "cost": 500,
+        "hp": 11000,
+        "speed": 113,
+        "armor": "light",
+        "side": "both",
+        "prerequisites": ["weap"],
+        "description": "Delivers cash when reaching allied structures.",
+    },
+    # Aircraft
+    "heli": {
+        "name": "Longbow",
+        "category": "aircraft",
+        "cost": 2000,
+        "hp": 12000,
+        "speed": 149,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["hpad"],
+        "description": "Anti-armor helicopter with missiles. Allied only.",
+    },
+    "hind": {
+        "name": "Hind",
+        "category": "aircraft",
+        "cost": 1500,
+        "hp": 12000,
+        "speed": 112,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["afld"],
+        "description": "Anti-ground attack helicopter. Soviet only.",
+    },
+    "mh60": {
+        "name": "Black Hawk",
+        "category": "aircraft",
+        "cost": 1500,
+        "hp": 12000,
+        "speed": 112,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["hpad"],
+        "description": "Transport/attack helicopter. Allied only.",
+    },
+    "tran": {
+        "name": "Chinook",
+        "category": "aircraft",
+        "cost": 900,
+        "hp": 14000,
+        "speed": 128,
+        "armor": "light",
+        "side": "both",
+        "prerequisites": ["hpad|afld"],
+        "description": "Transport helicopter. Carries 5 infantry.",
+    },
+    "yak": {
+        "name": "Yak",
+        "category": "aircraft",
+        "cost": 1350,
+        "hp": 6000,
+        "speed": 178,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["afld"],
+        "description": "Fast anti-infantry attack plane. Soviet only.",
+    },
+    "mig": {
+        "name": "MiG",
+        "category": "aircraft",
+        "cost": 2000,
+        "hp": 8000,
+        "speed": 223,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["afld", "stek"],
+        "description": "Anti-structure/armor attack plane with missiles. Soviet only.",
+    },
+    # Ships
+    "ss": {
+        "name": "Submarine",
+        "category": "ship",
+        "cost": 950,
+        "hp": 25000,
+        "speed": 78,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["spen"],
+        "description": "Invisible anti-ship unit. Soviet only.",
+    },
+    "dd": {
+        "name": "Destroyer",
+        "category": "ship",
+        "cost": 1000,
+        "hp": 40000,
+        "speed": 92,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["syrd", "dome"],
+        "description": "Multi-role warship. Anti-sub, anti-air, anti-surface. Allied only.",
+    },
+    "ca": {
+        "name": "Cruiser",
+        "category": "ship",
+        "cost": 2400,
+        "hp": 80000,
+        "speed": 44,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["syrd", "atek"],
+        "description": "Heavy bombardment ship. Long range. Allied only.",
+    },
+    "pt": {
+        "name": "Gunboat",
+        "category": "ship",
+        "cost": 500,
+        "hp": 20000,
+        "speed": 142,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["syrd|spen"],
+        "description": "Fast patrol boat.",
+    },
+    "lst": {
+        "name": "Transport",
+        "category": "ship",
+        "cost": 500,
+        "hp": 40000,
+        "speed": 115,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["syrd|spen"],
+        "description": "Naval transport. Carries vehicles and infantry.",
+    },
+    "msub": {
+        "name": "Missile Submarine",
+        "category": "ship",
+        "cost": 2000,
+        "hp": 40000,
+        "speed": 44,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["spen", "stek"],
+        "description": "Long-range missile submarine. Soviet only.",
+    },
+}
+# ─── Building Data ────────────────────────────────────────────────────────────
+RA_BUILDINGS: dict[str, dict] = {
+    "fact": {
+        "name": "Construction Yard",
+        "cost": 2000,
+        "hp": 150000,
+        "power": 0,
+        "side": "both",
+        "prerequisites": [],
+        "produces": ["Building", "Defense"],
+        "description": "Primary base structure. Required to build other structures.",
+    },
+    "powr": {
+        "name": "Power Plant",
+        "cost": 300,
+        "hp": 40000,
+        "power": 100,
+        "side": "both",
+        "prerequisites": [],
+        "produces": [],
+        "description": "Basic power supply. Most structures need power to function.",
+    },
+    "apwr": {
+        "name": "Advanced Power Plant",
+        "cost": 500,
+        "hp": 70000,
+        "power": 200,
+        "side": "both",
+        "prerequisites": ["dome"],
+        "produces": [],
+        "description": "Double power output. Requires radar dome tech.",
+    },
+    "barr": {
+        "name": "Soviet Barracks",
+        "cost": 500,
+        "hp": 60000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["powr"],
+        "produces": ["Infantry"],
+        "description": "Soviet infantry production. Required for all Soviet infantry.",
+    },
+    "tent": {
+        "name": "Allied Barracks",
+        "cost": 500,
+        "hp": 60000,
+        "power": -20,
+        "side": "allied",
+        "prerequisites": ["powr"],
+        "produces": ["Infantry"],
+        "description": "Allied infantry production. Required for all Allied infantry.",
+    },
+    "proc": {
+        "name": "Ore Refinery",
+        "cost": 1400,
+        "hp": 90000,
+        "power": -30,
+        "side": "both",
+        "prerequisites": ["powr"],
+        "produces": [],
+        "description": "Processes ore into credits. Comes with a free Ore Truck.",
+    },
+    "weap": {
+        "name": "War Factory",
+        "cost": 2000,
+        "hp": 150000,
+        "power": -30,
+        "side": "both",
+        "prerequisites": ["proc"],
+        "produces": ["Vehicle"],
+        "description": "Vehicle production facility. Required for all vehicles.",
+    },
+    "dome": {
+        "name": "Radar Dome",
+        "cost": 1500,
+        "hp": 100000,
+        "power": -40,
+        "side": "both",
+        "prerequisites": ["proc"],
+        "produces": [],
+        "description": "Provides minimap radar. Unlocks advanced tech.",
+    },
+    "fix": {
+        "name": "Service Depot",
+        "cost": 1200,
+        "hp": 80000,
+        "power": -30,
+        "side": "both",
+        "prerequisites": ["weap"],
+        "produces": [],
+        "description": "Repairs vehicles. Unlocks MCV and Minelayer.",
+    },
+    "atek": {
+        "name": "Allied Tech Center",
+        "cost": 1500,
+        "hp": 60000,
+        "power": -200,
+        "side": "allied",
+        "prerequisites": ["dome", "weap"],
+        "produces": [],
+        "description": "Unlocks advanced Allied units. GPS satellite.",
+    },
+    "stek": {
+        "name": "Soviet Tech Center",
+        "cost": 1500,
+        "hp": 80000,
+        "power": -100,
+        "side": "soviet",
+        "prerequisites": ["dome", "weap"],
+        "produces": [],
+        "description": "Unlocks advanced Soviet units.",
+    },
+    "hpad": {
+        "name": "Helipad",
+        "cost": 500,
+        "hp": 80000,
+        "power": -10,
+        "side": "allied",
+        "prerequisites": ["dome"],
+        "produces": ["Aircraft"],
+        "description": "Allied aircraft production. Rearming pad.",
+    },
+    "afld": {
+        "name": "Airfield",
+        "cost": 500,
+        "hp": 100000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["dome"],
+        "produces": ["Aircraft"],
+        "description": "Soviet aircraft production. Rearming strip.",
+    },
+    "spen": {
+        "name": "Sub Pen",
+        "cost": 800,
+        "hp": 100000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["powr"],
+        "produces": ["Ship"],
+        "terrain": "water",
+        "description": "Soviet naval production. Repairs ships. REQUIRES WATER — cannot build on land maps.",
+    },
+    "syrd": {
+        "name": "Naval Yard",
+        "cost": 1000,
+        "hp": 100000,
+        "power": -20,
+        "side": "allied",
+        "prerequisites": ["powr"],
+        "produces": ["Ship"],
+        "terrain": "water",
+        "description": "Allied naval production. Repairs ships. REQUIRES WATER — cannot build on land maps.",
+    },
+    "silo": {
+        "name": "Ore Silo",
+        "cost": 150,
+        "hp": 30000,
+        "power": -10,
+        "side": "both",
+        "prerequisites": ["proc"],
+        "produces": [],
+        "description": "Additional ore storage capacity.",
+    },
+    "kenn": {
+        "name": "Kennel",
+        "cost": 200,
+        "hp": 30000,
+        "power": -10,
+        "side": "soviet",
+        "prerequisites": ["powr"],
+        "produces": ["Infantry"],
+        "description": "Produces attack dogs. Soviet only.",
+    },
+    # Defenses
+    "pbox": {
+        "name": "Pillbox",
+        "cost": 600,
+        "hp": 40000,
+        "power": 0,
+        "side": "allied",
+        "prerequisites": ["tent"],
+        "produces": [],
+        "description": "Anti-infantry defense turret. Allied only.",
+    },
+    "hbox": {
+        "name": "Camo Pillbox",
+        "cost": 750,
+        "hp": 40000,
+        "power": 0,
+        "side": "allied",
+        "prerequisites": ["tent"],
+        "produces": [],
+        "description": "Hidden anti-infantry defense. Allied only.",
+    },
+    "gun": {
+        "name": "Turret",
+        "cost": 800,
+        "hp": 40000,
+        "power": -20,
+        "side": "allied",
+        "prerequisites": ["weap"],
+        "produces": [],
+        "description": "Anti-armor defense turret. Allied only.",
+    },
+    "ftur": {
+        "name": "Flame Tower",
+        "cost": 600,
+        "hp": 40000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["barr"],
+        "produces": [],
+        "description": "Short-range anti-infantry defense. Soviet only.",
+    },
+    "tsla": {
+        "name": "Tesla Coil",
+        "cost": 1200,
+        "hp": 40000,
+        "power": -75,
+        "side": "soviet",
+        "prerequisites": ["weap"],
+        "produces": [],
+        "description": "Powerful anti-ground defense. High power cost. Soviet only.",
+    },
+    "agun": {
+        "name": "AA Gun",
+        "cost": 800,
+        "hp": 40000,
+        "power": -50,
+        "side": "allied",
+        "prerequisites": ["dome"],
+        "produces": [],
+        "description": "Anti-air defense turret. Allied only.",
+    },
+    "sam": {
+        "name": "SAM Site",
+        "cost": 700,
+        "hp": 40000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["dome"],
+        "produces": [],
+        "description": "Anti-air missile defense. Soviet only.",
+    },
+    "gap": {
+        "name": "Gap Generator",
+        "cost": 800,
+        "hp": 50000,
+        "power": -60,
+        "side": "allied",
+        "prerequisites": ["atek"],
+        "produces": [],
+        "description": "Creates shroud area over your base. Allied only.",
+    },
+    # Superweapons
+    "iron": {
+        "name": "Iron Curtain",
+        "cost": 2000,
+        "hp": 100000,
+        "power": -200,
+        "side": "soviet",
+        "prerequisites": ["stek"],
+        "produces": [],
+        "build_limit": 1,
+        "description": "Superweapon: Makes one unit/building invulnerable temporarily.",
+    },
+    "pdox": {
+        "name": "Chronosphere",
+        "cost": 1500,
+        "hp": 100000,
+        "power": -200,
+        "side": "allied",
+        "prerequisites": ["atek"],
+        "produces": [],
+        "build_limit": 1,
+        "description": "Superweapon: Teleports units across the map.",
+    },
+    "mslo": {
+        "name": "Missile Silo",
+        "cost": 2500,
+        "hp": 100000,
+        "power": -150,
+        "side": "soviet",
+        "prerequisites": ["stek"],
+        "produces": [],
+        "build_limit": 1,
+        "description": "Superweapon: Launches nuclear missile at target location.",
+    },
+}
+# ─── Tech Tree ────────────────────────────────────────────────────────────────
+RA_TECH_TREE: dict[str, list[str]] = {
+    "soviet": [
+        "powr",     # Power Plant (base)
+        "barr",     # Barracks → infantry (requires powr)
+        "kenn",     # Kennel → dogs (requires powr)
+        "proc",     # Ore Refinery (requires powr)
+        "weap",     # War Factory (requires proc)
+        "spen",     # Sub Pen (requires powr, needs water)
+        "dome",     # Radar Dome (requires proc)
+        "fix",      # Service Depot (requires weap)
+        "afld",     # Airfield (requires dome)
+        "stek",     # Tech Center (requires dome + weap)
+        "tsla",     # Tesla Coil (requires weap)
+        "sam",      # SAM Site (requires dome)
+        "ftur",     # Flame Tower (requires barr)
+        "iron",     # Iron Curtain (requires stek)
+        "mslo",     # Missile Silo (requires stek)
+    ],
+    "allied": [
+        "powr",     # Power Plant (base)
+        "tent",     # Barracks → infantry (requires powr)
+        "proc",     # Ore Refinery (requires powr)
+        "weap",     # War Factory (requires proc)
+        "syrd",     # Naval Yard (requires powr, needs water)
+        "dome",     # Radar Dome (requires proc)
+        "fix",      # Service Depot (requires weap)
+        "hpad",     # Helipad (requires dome)
+        "atek",     # Tech Center (requires dome + weap)
+        "gun",      # Turret (requires weap)
+        "pbox",     # Pillbox (requires tent)
+        "agun",     # AA Gun (requires dome)
+        "gap",      # Gap Generator (requires atek)
+        "pdox",     # Chronosphere (requires atek)
+    ],
+}
+# ─── Faction Data ─────────────────────────────────────────────────────────────
+RA_FACTIONS: dict[str, dict] = {
+    "england": {
+        "side": "allied",
+        "display_name": "England",
+        "unique_units": [],
+        "description": "Standard Allied faction.",
+    },
+    "france": {
+        "side": "allied",
+        "display_name": "France",
+        "unique_units": ["stnk"],
+        "description": "Allied faction with Phase Transport (cloaked APC).",
+    },
+    "germany": {
+        "side": "allied",
+        "display_name": "Germany",
+        "unique_units": ["ctnk"],
+        "description": "Allied faction with Chrono Tank (teleporting tank).",
+    },
+    "russia": {
+        "side": "soviet",
+        "display_name": "Russia",
+        "unique_units": ["ttnk"],
+        "description": "Soviet faction with Tesla Tank.",
+    },
+    "ukraine": {
+        "side": "soviet",
+        "display_name": "Ukraine",
+        "unique_units": ["dtrk"],
+        "description": "Soviet faction with Demolition Truck (nuclear suicide vehicle).",
+    },
+}
+# ─── Query Functions ──────────────────────────────────────────────────────────
+def get_unit_stats(unit_type: str) -> Optional[dict]:
+    """Get stats for a unit type. Returns None if not found."""
+    return RA_UNITS.get(unit_type.lower())
+def get_building_stats(building_type: str) -> Optional[dict]:
+    """Get stats for a building type. Returns None if not found."""
+    return RA_BUILDINGS.get(building_type.lower())
+def get_tech_tree(faction: Optional[str] = None) -> dict:
+    """Get the tech tree build order.
+    Args:
+        faction: Faction name (e.g., 'russia') or side ('allied', 'soviet').
+                If None, returns both sides.
+    """
+    if faction is None:
+        return RA_TECH_TREE
+    # Map faction to side
+    side = faction.lower()
+    if side in RA_FACTIONS:
+        side = RA_FACTIONS[side]["side"]
+    if side in RA_TECH_TREE:
+        return {side: RA_TECH_TREE[side]}
+    return {}
+def get_faction_info(faction: str) -> Optional[dict]:
+    """Get faction info including available units and buildings."""
+    faction = faction.lower()
+    info = RA_FACTIONS.get(faction)
+    if info is None:
+        return None
+    side = info["side"]
+    # Collect units available to this faction
+    available_units = []
+    for unit_type, data in RA_UNITS.items():
+        unit_side = data.get("side", "")
+        if unit_side == "both" or unit_side == side:
+            available_units.append(unit_type)
+    # Add faction-unique units
+    for u in info.get("unique_units", []):
+        if u not in available_units and u in RA_UNITS:
+            available_units.append(u)
+    # Collect buildings
+    available_buildings = []
+    for bldg_type, data in RA_BUILDINGS.items():
+        bldg_side = data.get("side", "")
+        if bldg_side == "both" or bldg_side == side:
+            available_buildings.append(bldg_type)
+    return {
+        **info,
+        "faction": faction,
+        "available_units": sorted(available_units),
+        "available_buildings": sorted(available_buildings),
+    }
+def get_all_unit_types() -> list[str]:
+    """Get all available unit type names."""
+    return sorted(RA_UNITS.keys())
+def get_all_building_types() -> list[str]:
+    """Get all available building type names."""
+    return sorted(RA_BUILDINGS.keys())
+def get_all_units_for_side(side: str) -> dict[str, dict]:
+    """Get all units available to a side ('allied' or 'soviet') with full stats.
+    Returns dict keyed by unit type name, each value is the full stats dict.
+    Includes units with side='both' plus units specific to the given side.
+    """
+    side = side.lower()
+    return {
+        utype: dict(data)
+        for utype, data in RA_UNITS.items()
+        if data.get("side") in (side, "both")
+    }
+def get_all_buildings_for_side(side: str) -> dict[str, dict]:
+    """Get all buildings available to a side ('allied' or 'soviet') with full stats.
+    Returns dict keyed by building type name, each value is the full stats dict.
+    Includes buildings with side='both' plus buildings specific to the given side.
+    """
+    side = side.lower()
+    return {
+        btype: dict(data)
+        for btype, data in RA_BUILDINGS.items()
+        if data.get("side") in (side, "both")
+    }

openra_env/generated/__init__.py ADDED Viewed

File without changes

openra_env/generated/rl_bridge_pb2.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: rl_bridge.proto
+# Protobuf Python Version: 6.31.1
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC,
+    6,
+    31,
+    1,
+    '',
+    'rl_bridge.proto'
+)
+# @@protoc_insertion_point(imports)
+_sym_db = _symbol_database.Default()
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0frl_bridge.proto\x12\topenra.rl\"\x97\x04\n\x0fGameObservation\x12\x0c\n\x04tick\x18\x01 \x01(\x05\x12\x12\n\nepisode_id\x18\x02 \x01(\t\x12%\n\x07\x65\x63onomy\x18\x03 \x01(\x0b\x32\x14.openra.rl.RlEconomy\x12\'\n\x08military\x18\x04 \x01(\x0b\x32\x15.openra.rl.RlMilitary\x12$\n\x05units\x18\x05 \x03(\x0b\x32\x15.openra.rl.RlUnitInfo\x12,\n\tbuildings\x18\x06 \x03(\x0b\x32\x19.openra.rl.RlBuildingInfo\x12/\n\nproduction\x18\x07 \x03(\x0b\x32\x1b.openra.rl.RlProductionInfo\x12.\n\x0fvisible_enemies\x18\x08 \x03(\x0b\x32\x15.openra.rl.RlUnitInfo\x12&\n\x08map_info\x18\t \x01(\x0b\x32\x14.openra.rl.RlMapInfo\x12\x13\n\x0bspatial_map\x18\n \x01(\x0c\x12\x18\n\x10spatial_channels\x18\x0b \x01(\x05\x12\x0c\n\x04\x64one\x18\x0c \x01(\x08\x12\x0e\n\x06reward\x18\r \x01(\x02\x12\x0e\n\x06result\x18\x0e \x01(\t\x12\x1c\n\x14\x61vailable_production\x18\x0f \x03(\t\x12:\n\x17visible_enemy_buildings\x18\x10 \x03(\x0b\x32\x19.openra.rl.RlBuildingInfo\"\x89\x01\n\tRlEconomy\x12\x0c\n\x04\x63\x61sh\x18\x01 \x01(\x05\x12\x0b\n\x03ore\x18\x02 \x01(\x05\x12\x16\n\x0epower_provided\x18\x03 \x01(\x05\x12\x15\n\rpower_drained\x18\x04 \x01(\x05\x12\x19\n\x11resource_capacity\x18\x05 \x01(\x05\x12\x17\n\x0fharvester_count\x18\x06 \x01(\x05\"\xff\x01\n\nRlMilitary\x12\x14\n\x0cunits_killed\x18\x01 \x01(\x05\x12\x12\n\nunits_lost\x18\x02 \x01(\x05\x12\x18\n\x10\x62uildings_killed\x18\x03 \x01(\x05\x12\x16\n\x0e\x62uildings_lost\x18\x04 \x01(\x05\x12\x12\n\narmy_value\x18\x05 \x01(\x05\x12\x19\n\x11\x61\x63tive_unit_count\x18\x06 \x01(\x05\x12\x12\n\nkills_cost\x18\x07 \x01(\x05\x12\x13\n\x0b\x64\x65\x61ths_cost\x18\x08 \x01(\x05\x12\x14\n\x0c\x61ssets_value\x18\t \x01(\x05\x12\x12\n\nexperience\x18\n \x01(\x05\x12\x13\n\x0border_count\x18\x0b \x01(\x05\"\xe7\x02\n\nRlUnitInfo\x12\x10\n\x08\x61\x63tor_id\x18\x01 \x01(\r\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\r\n\x05pos_x\x18\x03 \x01(\x05\x12\r\n\x05pos_y\x18\x04 \x01(\x05\x12\x0e\n\x06\x63\x65ll_x\x18\x05 \x01(\x05\x12\x0e\n\x06\x63\x65ll_y\x18\x06 \x01(\x05\x12\x12\n\nhp_percent\x18\x07 \x01(\x02\x12\x0f\n\x07is_idle\x18\x08 \x01(\x08\x12\x18\n\x10\x63urrent_activity\x18\t \x01(\t\x12\r\n\x05owner\x18\n \x01(\t\x12\x0c\n\x04\x61mmo\x18\x0b \x01(\x05\x12\x12\n\ncan_attack\x18\x0c \x01(\x08\x12\x0e\n\x06\x66\x61\x63ing\x18\r \x01(\x05\x12\x18\n\x10\x65xperience_level\x18\x0e \x01(\x05\x12\x0e\n\x06stance\x18\x0f \x01(\x05\x12\r\n\x05speed\x18\x10 \x01(\x05\x12\x14\n\x0c\x61ttack_range\x18\x11 \x01(\x05\x12\x17\n\x0fpassenger_count\x18\x12 \x01(\x05\x12\x13\n\x0bis_building\x18\x13 \x01(\x08\"\xe7\x02\n\x0eRlBuildingInfo\x12\x10\n\x08\x61\x63tor_id\x18\x01 \x01(\r\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\r\n\x05pos_x\x18\x03 \x01(\x05\x12\r\n\x05pos_y\x18\x04 \x01(\x05\x12\x12\n\nhp_percent\x18\x05 \x01(\x02\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x14\n\x0cis_producing\x18\x07 \x01(\x08\x12\x1b\n\x13production_progress\x18\x08 \x01(\x02\x12\x16\n\x0eproducing_item\x18\t \x01(\t\x12\x12\n\nis_powered\x18\n \x01(\x08\x12\x14\n\x0cis_repairing\x18\x0b \x01(\x08\x12\x12\n\nsell_value\x18\x0c \x01(\x05\x12\x0f\n\x07rally_x\x18\r \x01(\x05\x12\x0f\n\x07rally_y\x18\x0e \x01(\x05\x12\x14\n\x0cpower_amount\x18\x0f \x01(\x05\x12\x13\n\x0b\x63\x61n_produce\x18\x10 \x03(\t\x12\x0e\n\x06\x63\x65ll_x\x18\x11 \x01(\x05\x12\x0e\n\x06\x63\x65ll_y\x18\x12 \x01(\x05\"\x87\x01\n\x10RlProductionInfo\x12\x12\n\nqueue_type\x18\x01 \x01(\t\x12\x0c\n\x04item\x18\x02 \x01(\t\x12\x10\n\x08progress\x18\x03 \x01(\x02\x12\x17\n\x0fremaining_ticks\x18\x04 \x01(\x05\x12\x16\n\x0eremaining_cost\x18\x05 \x01(\x05\x12\x0e\n\x06paused\x18\x06 \x01(\x08\"<\n\tRlMapInfo\x12\r\n\x05width\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\x10\n\x08map_name\x18\x03 \x01(\t\"3\n\x0b\x41gentAction\x12$\n\x08\x63ommands\x18\x01 \x03(\x0b\x32\x12.openra.rl.Command\"\xa2\x01\n\x07\x43ommand\x12%\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x15.openra.rl.ActionType\x12\x10\n\x08\x61\x63tor_id\x18\x02 \x01(\r\x12\x17\n\x0ftarget_actor_id\x18\x03 \x01(\r\x12\x10\n\x08target_x\x18\x04 \x01(\x05\x12\x10\n\x08target_y\x18\x05 \x01(\x05\x12\x11\n\titem_type\x18\x06 \x01(\t\x12\x0e\n\x06queued\x18\x07 \x01(\x08\"\x91\x01\n\tGameState\x12\x12\n\nepisode_id\x18\x01 \x01(\t\x12\x0c\n\x04tick\x18\x02 \x01(\x05\x12\r\n\x05phase\x18\x03 \x01(\t\x12\x0e\n\x06winner\x18\x04 \x01(\t\x12\x14\n\x0cplayer_count\x18\x05 \x01(\x05\x12\x16\n\x0eplayer_faction\x18\x06 \x01(\t\x12\x15\n\renemy_faction\x18\x07 \x01(\t\"\x0e\n\x0cStateRequest*\xb9\x02\n\nActionType\x12\t\n\x05NO_OP\x10\x00\x12\x08\n\x04MOVE\x10\x01\x12\x0f\n\x0b\x41TTACK_MOVE\x10\x02\x12\n\n\x06\x41TTACK\x10\x03\x12\x08\n\x04STOP\x10\x04\x12\x0b\n\x07HARVEST\x10\x05\x12\t\n\x05\x42UILD\x10\x06\x12\t\n\x05TRAIN\x10\x07\x12\n\n\x06\x44\x45PLOY\x10\x08\x12\x08\n\x04SELL\x10\t\x12\n\n\x06REPAIR\x10\n\x12\x12\n\x0ePLACE_BUILDING\x10\x0b\x12\x15\n\x11\x43\x41NCEL_PRODUCTION\x10\x0c\x12\x13\n\x0fSET_RALLY_POINT\x10\r\x12\t\n\x05GUARD\x10\x0e\x12\x0e\n\nSET_STANCE\x10\x0f\x12\x13\n\x0f\x45NTER_TRANSPORT\x10\x10\x12\n\n\x06UNLOAD\x10\x11\x12\x0e\n\nPOWER_DOWN\x10\x12\x12\x0f\n\x0bSET_PRIMARY\x10\x13\x12\r\n\tSURRENDER\x10\x14\x32\x8c\x01\n\x08RLBridge\x12\x45\n\x0bGameSession\x12\x16.openra.rl.AgentAction\x1a\x1a.openra.rl.GameObservation(\x01\x30\x01\x12\x39\n\x08GetState\x12\x17.openra.rl.StateRequest\x1a\x14.openra.rl.GameStateB\x18\xaa\x02\x15OpenRA.Mods.Common.RLb\x06proto3')
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'rl_bridge_pb2', _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+  _globals['DESCRIPTOR']._loaded_options = None
+  _globals['DESCRIPTOR']._serialized_options = b'\252\002\025OpenRA.Mods.Common.RL'
+  _globals['_ACTIONTYPE']._serialized_start=2273
+  _globals['_ACTIONTYPE']._serialized_end=2586
+  _globals['_GAMEOBSERVATION']._serialized_start=31
+  _globals['_GAMEOBSERVATION']._serialized_end=566
+  _globals['_RLECONOMY']._serialized_start=569
+  _globals['_RLECONOMY']._serialized_end=706
+  _globals['_RLMILITARY']._serialized_start=709
+  _globals['_RLMILITARY']._serialized_end=964
+  _globals['_RLUNITINFO']._serialized_start=967
+  _globals['_RLUNITINFO']._serialized_end=1326
+  _globals['_RLBUILDINGINFO']._serialized_start=1329
+  _globals['_RLBUILDINGINFO']._serialized_end=1688
+  _globals['_RLPRODUCTIONINFO']._serialized_start=1691
+  _globals['_RLPRODUCTIONINFO']._serialized_end=1826
+  _globals['_RLMAPINFO']._serialized_start=1828
+  _globals['_RLMAPINFO']._serialized_end=1888
+  _globals['_AGENTACTION']._serialized_start=1890
+  _globals['_AGENTACTION']._serialized_end=1941
+  _globals['_COMMAND']._serialized_start=1944
+  _globals['_COMMAND']._serialized_end=2106
+  _globals['_GAMESTATE']._serialized_start=2109
+  _globals['_GAMESTATE']._serialized_end=2254
+  _globals['_STATEREQUEST']._serialized_start=2256
+  _globals['_STATEREQUEST']._serialized_end=2270
+  _globals['_RLBRIDGE']._serialized_start=2589
+  _globals['_RLBRIDGE']._serialized_end=2729
+# @@protoc_insertion_point(module_scope)

openra_env/generated/rl_bridge_pb2_grpc.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+from openra_env.generated import rl_bridge_pb2 as rl__bridge__pb2
+GRPC_GENERATED_VERSION = '1.75.1'
+GRPC_VERSION = grpc.__version__
+_version_not_supported = False
+try:
+    from grpc._utilities import first_version_is_lower
+    _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
+except ImportError:
+    _version_not_supported = True
+if _version_not_supported:
+    raise RuntimeError(
+        f'The grpc package installed is at version {GRPC_VERSION},'
+        + ' but the generated code in rl_bridge_pb2_grpc.py depends on'
+        + f' grpcio>={GRPC_GENERATED_VERSION}.'
+        + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+        + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
+    )
+class RLBridgeStub(object):
+    """The RL Bridge service allows an external agent to interact with OpenRA
+    via bidirectional streaming (lock-step) or unary state queries.
+    """
+    def __init__(self, channel):
+        """Constructor.
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.GameSession = channel.stream_stream(
+                '/openra.rl.RLBridge/GameSession',
+                request_serializer=rl__bridge__pb2.AgentAction.SerializeToString,
+                response_deserializer=rl__bridge__pb2.GameObservation.FromString,
+                _registered_method=True)
+        self.GetState = channel.unary_unary(
+                '/openra.rl.RLBridge/GetState',
+                request_serializer=rl__bridge__pb2.StateRequest.SerializeToString,
+                response_deserializer=rl__bridge__pb2.GameState.FromString,
+                _registered_method=True)
+class RLBridgeServicer(object):
+    """The RL Bridge service allows an external agent to interact with OpenRA
+    via bidirectional streaming (lock-step) or unary state queries.
+    """
+    def GameSession(self, request_iterator, context):
+        """Bidirectional streaming: game sends observations, agent sends actions.
+        Each observation waits for an action before advancing to the next tick.
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+    def GetState(self, request, context):
+        """Unary: query current game state on demand.
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
+def add_RLBridgeServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+            'GameSession': grpc.stream_stream_rpc_method_handler(
+                    servicer.GameSession,
+                    request_deserializer=rl__bridge__pb2.AgentAction.FromString,
+                    response_serializer=rl__bridge__pb2.GameObservation.SerializeToString,
+            ),
+            'GetState': grpc.unary_unary_rpc_method_handler(
+                    servicer.GetState,
+                    request_deserializer=rl__bridge__pb2.StateRequest.FromString,
+                    response_serializer=rl__bridge__pb2.GameState.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'openra.rl.RLBridge', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+    server.add_registered_method_handlers('openra.rl.RLBridge', rpc_method_handlers)
+ # This class is part of an EXPERIMENTAL API.
+class RLBridge(object):
+    """The RL Bridge service allows an external agent to interact with OpenRA
+    via bidirectional streaming (lock-step) or unary state queries.
+    """
+    @staticmethod
+    def GameSession(request_iterator,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.stream_stream(
+            request_iterator,
+            target,
+            '/openra.rl.RLBridge/GameSession',
+            rl__bridge__pb2.AgentAction.SerializeToString,
+            rl__bridge__pb2.GameObservation.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+            _registered_method=True)
+    @staticmethod
+    def GetState(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(
+            request,
+            target,
+            '/openra.rl.RLBridge/GetState',
+            rl__bridge__pb2.StateRequest.SerializeToString,
+            rl__bridge__pb2.GameState.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+            _registered_method=True)

openra_env/mcp_server.py ADDED Viewed

	@@ -0,0 +1,454 @@

+"""Standard MCP server for OpenRA-RL (stdio transport).
+Exposes all game tools over the MCP protocol using FastMCP.
+Connects to the game server WebSocket and proxies tool calls.
+Usage:
+    openra-rl mcp-server
+    openra-rl mcp-server --server-url http://localhost:8000
+Works with OpenClaw, Claude Desktop, and any MCP client.
+"""
+import json
+import logging
+from typing import Any, Optional
+from mcp.server.fastmcp import FastMCP
+logger = logging.getLogger("openra-rl-mcp")
+# Lazy-initialized shared state
+_client = None
+_server_url = "http://localhost:8000"
+_game_started = False
+mcp = FastMCP(
+    "openra-rl",
+    instructions="Play Command & Conquer: Red Alert via AI tool calls",
+)
+async def _get_client():
+    """Get or create the WebSocket client connection."""
+    global _client
+    if _client is not None:
+        return _client
+    from openra_env.mcp_ws_client import OpenRAMCPClient
+    _client = OpenRAMCPClient(base_url=_server_url, message_timeout_s=300.0)
+    await _client.connect()
+    return _client
+async def _ensure_game() -> None:
+    """Ensure game server is running and a game is started."""
+    global _game_started
+    if _game_started:
+        return
+    # Check if server is healthy
+    import urllib.request
+    import urllib.error
+    try:
+        req = urllib.request.urlopen(f"{_server_url}/health", timeout=3)
+        if req.status == 200:
+            client = await _get_client()
+            await client.reset()
+            _game_started = True
+            return
+    except (urllib.error.URLError, OSError):
+        pass
+    # Try starting Docker container
+    try:
+        from openra_env.cli.docker_manager import (
+            check_docker, is_running, start_server, wait_for_health,
+        )
+        if not is_running():
+            if not check_docker():
+                raise RuntimeError(
+                    "Docker is not available. Start the game server manually: "
+                    "docker run -p 8000:8000 ghcr.io/yxc20089/openra-rl:latest"
+                )
+            port = int(_server_url.split(":")[-1].split("/")[0]) if ":" in _server_url else 8000
+            start_server(port=port)
+            wait_for_health(port=port)
+    except ImportError:
+        raise RuntimeError(
+            f"Game server not reachable at {_server_url}. "
+            "Start it manually: docker run -p 8000:8000 ghcr.io/yxc20089/openra-rl:latest"
+        )
+    client = await _get_client()
+    await client.reset()
+    _game_started = True
+async def _call(tool_name: str, **kwargs) -> Any:
+    """Call a game tool and return the result."""
+    await _ensure_game()
+    client = await _get_client()
+    return await client.call_tool(tool_name, **kwargs)
+def _format(result: Any) -> str:
+    """Format a tool result as a string."""
+    if isinstance(result, str):
+        return result
+    return json.dumps(result, indent=2, default=str)
+# ── Game Lifecycle ─────────────────────────────────────────────────
+@mcp.tool()
+async def start_game(difficulty: str = "normal") -> str:
+    """Start a new Red Alert game. Returns initial game state."""
+    global _game_started
+    _game_started = False
+    await _ensure_game()
+    state = await _call("get_game_state")
+    return _format(state)
+@mcp.tool()
+async def get_game_state() -> str:
+    """Get current game state: economy, units, buildings, enemies, production."""
+    return _format(await _call("get_game_state"))
+@mcp.tool()
+async def advance(ticks: int = 50) -> str:
+    """Advance the game by N ticks (~25 ticks = 1 second).
+    Production, movement, combat, and auto-placement all require game time.
+    Also triggers auto-placement of buildings queued via build_and_place().
+    Typical build times: power plant ~300 ticks, barracks ~500, war factory ~750."""
+    return _format(await _call("advance", ticks=ticks))
+# ── Economy & Info ─────────────────────────────────────────────────
+@mcp.tool()
+async def get_economy() -> str:
+    """Get economy info: cash, ore, power, harvesters."""
+    return _format(await _call("get_economy"))
+@mcp.tool()
+async def get_units() -> str:
+    """Get list of your units with positions, health, type."""
+    return _format(await _call("get_units"))
+@mcp.tool()
+async def get_buildings() -> str:
+    """Get list of your buildings with positions, production, power."""
+    return _format(await _call("get_buildings"))
+@mcp.tool()
+async def get_enemies() -> str:
+    """Get visible enemy units and buildings."""
+    return _format(await _call("get_enemies"))
+@mcp.tool()
+async def get_production() -> str:
+    """Get current production queue and available builds."""
+    return _format(await _call("get_production"))
+@mcp.tool()
+async def get_map_info() -> str:
+    """Get map dimensions, name, and metadata."""
+    return _format(await _call("get_map_info"))
+@mcp.tool()
+async def get_exploration_status() -> str:
+    """Get fog-of-war data: explored %, quadrants, enemy found."""
+    return _format(await _call("get_exploration_status"))
+# ── Knowledge ──────────────────────────────────────────────────────
+@mcp.tool()
+async def lookup_unit(unit_type: str) -> str:
+    """Look up stats for a unit type (e.g. 'e1', '3tnk')."""
+    return _format(await _call("lookup_unit", unit_type=unit_type))
+@mcp.tool()
+async def lookup_building(building_type: str) -> str:
+    """Look up stats for a building type (e.g. 'powr', 'weap')."""
+    return _format(await _call("lookup_building", building_type=building_type))
+@mcp.tool()
+async def lookup_tech_tree(faction: str = "soviet") -> str:
+    """Get full tech tree and build order for a faction ('allied' or 'soviet')."""
+    return _format(await _call("lookup_tech_tree", faction=faction))
+@mcp.tool()
+async def lookup_faction(faction: str) -> str:
+    """Get all available units and buildings for a faction."""
+    return _format(await _call("lookup_faction", faction=faction))
+@mcp.tool()
+async def get_faction_briefing() -> str:
+    """Get ALL units and buildings for your faction with full stats. Best for planning."""
+    return _format(await _call("get_faction_briefing"))
+@mcp.tool()
+async def get_map_analysis() -> str:
+    """Get strategic map analysis: resources, terrain, chokepoints, quadrants."""
+    return _format(await _call("get_map_analysis"))
+@mcp.tool()
+async def batch_lookup(queries: list[dict]) -> str:
+    """Batch multiple lookups. Example: [{"type":"unit","name":"3tnk"}, {"type":"building","name":"weap"}]"""
+    return _format(await _call("batch_lookup", queries=queries))
+# ── Planning ───────────────────────────────────────────────────────
+@mcp.tool()
+async def get_opponent_intel() -> str:
+    """Get intelligence on the AI opponent: difficulty, tendencies, counters."""
+    return _format(await _call("get_opponent_intel"))
+@mcp.tool()
+async def start_planning_phase() -> str:
+    """Start pre-game planning phase with map intel and opponent report."""
+    return _format(await _call("start_planning_phase"))
+@mcp.tool()
+async def end_planning_phase(strategy: str = "") -> str:
+    """End planning phase with your strategy. Begins gameplay."""
+    return _format(await _call("end_planning_phase", strategy=strategy))
+@mcp.tool()
+async def get_planning_status() -> str:
+    """Check if planning phase is active and remaining turns."""
+    return _format(await _call("get_planning_status"))
+# ── Movement ───────────────────────────────────────────────────────
+@mcp.tool()
+async def move_units(unit_ids: str, target_x: int, target_y: int, queued: bool = False) -> str:
+    """Move units to a position. unit_ids: comma-separated IDs, 'all_combat', 'type:e1', etc."""
+    return _format(await _call("move_units", unit_ids=unit_ids, target_x=target_x, target_y=target_y, queued=queued))
+@mcp.tool()
+async def attack_move(unit_ids: str, target_x: int, target_y: int, queued: bool = False) -> str:
+    """Move units, engaging enemies en route. Best for advancing your army."""
+    return _format(await _call("attack_move", unit_ids=unit_ids, target_x=target_x, target_y=target_y, queued=queued))
+@mcp.tool()
+async def attack_target(unit_ids: str, target_actor_id: int, queued: bool = False) -> str:
+    """Order units to attack a specific enemy by actor ID."""
+    return _format(await _call("attack_target", unit_ids=unit_ids, target_actor_id=target_actor_id, queued=queued))
+@mcp.tool()
+async def stop_units(unit_ids: str) -> str:
+    """Stop units from moving or attacking."""
+    return _format(await _call("stop_units", unit_ids=unit_ids))
+# ── Production ─────────────────────────────────────────────────────
+@mcp.tool()
+async def build_unit(unit_type: str, count: int = 1) -> str:
+    """Train units. Requires the right production building (barracks, war factory)."""
+    return _format(await _call("build_unit", unit_type=unit_type, count=count))
+@mcp.tool()
+async def build_structure(building_type: str) -> str:
+    """Start constructing a building (manual placement workflow).
+    Call advance(ticks) to let construction finish, then place_building() to place it.
+    Prefer build_and_place() which handles placement automatically."""
+    return _format(await _call("build_structure", building_type=building_type))
+@mcp.tool()
+async def build_and_place(building_type: str, cell_x: int = 0, cell_y: int = 0) -> str:
+    """Build a structure and auto-place it when construction finishes.
+    Call advance(ticks) after this to let construction complete — placement is automatic.
+    Do NOT call place_building() on buildings queued this way."""
+    return _format(await _call("build_and_place", building_type=building_type, cell_x=cell_x, cell_y=cell_y))
+# ── Building/Unit Actions ─────────────────────────────────────────
+@mcp.tool()
+async def place_building(building_type: str, cell_x: int = 0, cell_y: int = 0) -> str:
+    """Place a completed building on the map (only for build_structure workflow).
+    Do NOT use on buildings queued via build_and_place() — those auto-place via advance().
+    Cell coordinates are optional — engine auto-finds position if omitted."""
+    return _format(await _call("place_building", building_type=building_type, cell_x=cell_x, cell_y=cell_y))
+@mcp.tool()
+async def cancel_production(item_type: str) -> str:
+    """Cancel production of a unit or building type."""
+    return _format(await _call("cancel_production", item_type=item_type))
+@mcp.tool()
+async def deploy_unit(unit_id: int) -> str:
+    """Deploy a unit (e.g. MCV → Construction Yard)."""
+    return _format(await _call("deploy_unit", unit_id=unit_id))
+@mcp.tool()
+async def sell_building(building_id: int) -> str:
+    """Sell a building for partial refund."""
+    return _format(await _call("sell_building", building_id=building_id))
+@mcp.tool()
+async def repair_building(building_id: int) -> str:
+    """Toggle repair on a building."""
+    return _format(await _call("repair_building", building_id=building_id))
+@mcp.tool()
+async def set_rally_point(building_id: int, cell_x: int, cell_y: int) -> str:
+    """Set rally point for a production building. New units go here automatically."""
+    return _format(await _call("set_rally_point", building_id=building_id, cell_x=cell_x, cell_y=cell_y))
+@mcp.tool()
+async def guard_target(unit_ids: str, target_actor_id: int, queued: bool = False) -> str:
+    """Order units to guard a specific actor."""
+    return _format(await _call("guard_target", unit_ids=unit_ids, target_actor_id=target_actor_id, queued=queued))
+@mcp.tool()
+async def set_stance(unit_ids: str, stance: str) -> str:
+    """Set unit stance: 'holdfire', 'returnfire', 'defend', 'attackanything'."""
+    return _format(await _call("set_stance", unit_ids=unit_ids, stance=stance))
+@mcp.tool()
+async def harvest(unit_id: int, cell_x: int = 0, cell_y: int = 0) -> str:
+    """Send a harvester to harvest at a location."""
+    return _format(await _call("harvest", unit_id=unit_id, cell_x=cell_x, cell_y=cell_y))
+@mcp.tool()
+async def power_down(building_id: int) -> str:
+    """Toggle power on a building to save electricity."""
+    return _format(await _call("power_down", building_id=building_id))
+@mcp.tool()
+async def set_primary(building_id: int) -> str:
+    """Set a building as the primary production facility."""
+    return _format(await _call("set_primary", building_id=building_id))
+# ── Placement ──────────────────────────────────────────────────────
+@mcp.tool()
+async def get_valid_placements(building_type: str, max_results: int = 8) -> str:
+    """Get valid placement locations for a building type."""
+    return _format(await _call("get_valid_placements", building_type=building_type, max_results=max_results))
+# ── Unit Groups ────────────────────────────────────────────────────
+@mcp.tool()
+async def assign_group(group_name: str, unit_ids: list[int]) -> str:
+    """Create a named group of units."""
+    return _format(await _call("assign_group", group_name=group_name, unit_ids=unit_ids))
+@mcp.tool()
+async def add_to_group(group_name: str, unit_ids: list[int]) -> str:
+    """Add units to an existing group."""
+    return _format(await _call("add_to_group", group_name=group_name, unit_ids=unit_ids))
+@mcp.tool()
+async def get_groups() -> str:
+    """List all unit groups and their members."""
+    return _format(await _call("get_groups"))
+@mcp.tool()
+async def command_group(
+    group_name: str,
+    command_type: str,
+    target_x: int = 0,
+    target_y: int = 0,
+    target_actor_id: int = 0,
+    queued: bool = False,
+) -> str:
+    """Issue a command to a unit group. command_type: move, attack_move, attack, stop, guard."""
+    kwargs = dict(
+        group_name=group_name, command_type=command_type,
+        target_x=target_x, target_y=target_y,
+        target_actor_id=target_actor_id, queued=queued,
+    )
+    return _format(await _call("command_group", **kwargs))
+# ── Compound ───────────────────────────────────────���───────────────
+@mcp.tool()
+async def batch(actions: list[dict]) -> str:
+    """Execute multiple actions simultaneously in one tick. Does NOT advance game time.
+    Cannot contain advance() or query tools. Example: [{"tool":"build_unit","unit_type":"e1"}]"""
+    return _format(await _call("batch", actions=actions))
+@mcp.tool()
+async def plan(steps: list[dict]) -> str:
+    """Execute steps sequentially with state refresh between each.
+    Does NOT advance game time between steps — use advance() standalone for that."""
+    return _format(await _call("plan", steps=steps))
+# ── Utility ────────────────────────────────────────────────────────
+@mcp.tool()
+async def get_replay_path() -> str:
+    """Get the path to the current game's replay file."""
+    return _format(await _call("get_replay_path"))
+@mcp.tool()
+async def surrender() -> str:
+    """Surrender the current game."""
+    return _format(await _call("surrender"))
+# ── Terrain ────────────────────────────────────────────────────────
+@mcp.tool()
+async def get_terrain_at(cell_x: int, cell_y: int) -> str:
+    """Get terrain type at a specific cell."""
+    return _format(await _call("get_terrain_at", cell_x=cell_x, cell_y=cell_y))
+# ── Entry Point ────────────────────────────────────────────────────
+def main(server_url: Optional[str] = None) -> None:
+    """Run the MCP stdio server."""
+    global _server_url
+    if server_url:
+        _server_url = server_url
+    mcp.run(transport="stdio")

openra_env/mcp_ws_client.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""WebSocket MCP client for OpenRA-RL.
+Talks to the OpenEnv server's /ws endpoint using the correct message
+protocol for MCP tool calls:
+  - {"type": "reset"}                    → reset environment
+  - {"type": "mcp", "data": {...}}       → JSON-RPC MCP call (tools/list, tools/call)
+  - {"type": "step", "data": {...}}      → Gym-style step (OpenRAAction)
+MCPToolClient from OpenEnv sends ListToolsAction via "step" which the
+server tries to parse as OpenRAAction and fails. This client uses the
+correct "mcp" message type instead.
+"""
+import asyncio
+import json
+import os
+from dataclasses import dataclass
+from typing import Any, Optional
+from websockets.asyncio.client import connect as ws_connect
+@dataclass
+class Tool:
+    """MCP tool descriptor."""
+    name: str
+    description: str
+    input_schema: dict
+class OpenRAMCPClient:
+    """Async WebSocket client for OpenRA-RL with MCP tool support.
+    Usage:
+        async with OpenRAMCPClient("http://localhost:8000") as client:
+            await client.reset()
+            tools = await client.list_tools()
+            result = await client.call_tool("get_game_state")
+            result = await client.call_tool("build_structure", building_type="powr")
+    """
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8000",
+        message_timeout_s: float = 300.0,
+    ):
+        # Convert HTTP URL to WebSocket URL
+        ws_url = base_url.replace("http://", "ws://").replace("https://", "wss://")
+        ws_url = ws_url.rstrip("/")
+        self._ws_url = f"{ws_url}/ws"
+        self._timeout = message_timeout_s
+        self._ws = None
+        self._rpc_id = 0
+        self._tools_cache: Optional[list[Tool]] = None
+    async def connect(self) -> "OpenRAMCPClient":
+        """Connect to the WebSocket endpoint."""
+        if self._ws is not None:
+            return self
+        # Handle proxy bypass for localhost
+        ws_lower = self._ws_url.lower()
+        is_localhost = "localhost" in ws_lower or "127.0.0.1" in ws_lower
+        old_no_proxy = os.environ.get("NO_PROXY")
+        if is_localhost:
+            current = old_no_proxy or ""
+            if "localhost" not in current.lower():
+                os.environ["NO_PROXY"] = (
+                    f"{current},localhost,127.0.0.1" if current else "localhost,127.0.0.1"
+                )
+        try:
+            self._ws = await ws_connect(
+                self._ws_url,
+                open_timeout=30.0,
+                max_size=50 * 1024 * 1024,  # 50 MB
+                ping_interval=None,
+            )
+        except (asyncio.TimeoutError, OSError, ConnectionRefusedError) as e:
+            raise RuntimeError(
+                f"Could not connect to game server at {self._ws_url}: {e}\n"
+                f"  Is the server running? Try: openra-rl server start"
+            ) from e
+        finally:
+            if is_localhost:
+                if old_no_proxy is None:
+                    os.environ.pop("NO_PROXY", None)
+                else:
+                    os.environ["NO_PROXY"] = old_no_proxy
+        return self
+    async def close(self):
+        """Close the WebSocket connection."""
+        if self._ws:
+            try:
+                await self._ws.close()
+            except Exception:
+                pass
+            self._ws = None
+    async def __aenter__(self) -> "OpenRAMCPClient":
+        return await self.connect()
+    async def __aexit__(self, *args):
+        await self.close()
+    async def _send_recv(self, message: dict) -> dict:
+        """Send a message and wait for response."""
+        if self._ws is None:
+            raise RuntimeError("Not connected. Call connect() first.")
+        await self._ws.send(json.dumps(message))
+        raw = await asyncio.wait_for(self._ws.recv(), timeout=self._timeout)
+        return json.loads(raw)
+    # ── Environment Control ───────────────────────────────────────
+    async def reset(self, **kwargs) -> dict:
+        """Reset the environment and start a new game."""
+        response = await self._send_recv({"type": "reset", "data": kwargs})
+        if response.get("type") == "error":
+            raise RuntimeError(f"Reset failed: {response.get('data', {}).get('message', '?')}")
+        return response.get("data", {})
+    # ── MCP Tool Operations ───────────────────────────────────────
+    async def list_tools(self, use_cache: bool = True) -> list[Tool]:
+        """List available MCP tools."""
+        if use_cache and self._tools_cache is not None:
+            return self._tools_cache
+        self._rpc_id += 1
+        rpc_request = {
+            "jsonrpc": "2.0",
+            "method": "tools/list",
+            "params": {},
+            "id": self._rpc_id,
+        }
+        response = await self._send_recv({"type": "mcp", "data": rpc_request})
+        rpc_response = response.get("data", {})
+        if "error" in rpc_response:
+            raise RuntimeError(f"tools/list failed: {rpc_response['error']}")
+        tools_data = rpc_response.get("result", {}).get("tools", [])
+        self._tools_cache = [
+            Tool(
+                name=t.get("name", ""),
+                description=t.get("description", ""),
+                input_schema=t.get("inputSchema", t.get("input_schema", {})),
+            )
+            for t in tools_data
+        ]
+        return self._tools_cache
+    async def call_tool(self, name: str, **kwargs) -> Any:
+        """Call an MCP tool by name with keyword arguments."""
+        self._rpc_id += 1
+        rpc_request = {
+            "jsonrpc": "2.0",
+            "method": "tools/call",
+            "params": {"name": name, "arguments": kwargs},
+            "id": self._rpc_id,
+        }
+        response = await self._send_recv({"type": "mcp", "data": rpc_request})
+        rpc_response = response.get("data", {})
+        if "error" in rpc_response:
+            error = rpc_response["error"]
+            raise RuntimeError(f"Tool '{name}' failed: {error.get('message', error)}")
+        result = rpc_response.get("result")
+        return self._unwrap_mcp_result(result)
+    @staticmethod
+    def _unwrap_mcp_result(result: Any) -> Any:
+        """Unwrap FastMCP tool result to plain Python data.
+        FastMCP wraps results as:
+          {
+            "content": [{"type": "text", "text": "..."}],
+            "structured_content": {"result": <actual_data>},
+            "data": <actual_data>,
+            "is_error": false
+          }
+        Priority: structured_content.result > data > content text > raw result
+        """
+        if not isinstance(result, dict):
+            return result
+        # data field is correct for dicts, buggy ([{}]) for lists.
+        # structured_content.result is correct for lists, empty string for dicts.
+        # Strategy: use data if it's a non-empty dict, else structured_content.result,
+        # else fall back to content text parsing.
+        data = result.get("data")
+        if isinstance(data, dict) and data:
+            return data
+        sc = result.get("structured_content")
+        if isinstance(sc, dict):
+            sc_result = sc.get("result")
+            if sc_result is not None and sc_result != "":
+                return sc_result
+        # data for empty lists (both data=[] and sc.result=[])
+        if isinstance(data, list) and data != [{}]:
+            return data
+        # Fallback: parse content text items
+        content = result.get("content")
+        if isinstance(content, list) and content:
+            texts = []
+            for item in content:
+                if isinstance(item, dict) and item.get("type") == "text":
+                    text = item.get("text", "")
+                    try:
+                        texts.append(json.loads(text))
+                    except (json.JSONDecodeError, TypeError):
+                        texts.append(text)
+                else:
+                    texts.append(item)
+            if len(texts) == 1:
+                return texts[0]
+            return texts
+        return result

openra_env/models.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""Pydantic models for the OpenRA-RL environment.
+Defines the Action, Observation, and State types used across
+the OpenEnv client-server boundary.
+"""
+from enum import Enum
+from typing import Dict, List, Optional
+from pydantic import Field
+from openenv.core.env_server.types import Action, Observation, State
+# ─── Action Types ─────────────────────────────────────────────────────────────
+class ActionType(str, Enum):
+    """Available command types matching the protobuf ActionType enum."""
+    NO_OP = "no_op"
+    MOVE = "move"
+    ATTACK_MOVE = "attack_move"
+    ATTACK = "attack"
+    STOP = "stop"
+    HARVEST = "harvest"
+    BUILD = "build"
+    TRAIN = "train"
+    DEPLOY = "deploy"
+    SELL = "sell"
+    REPAIR = "repair"
+    PLACE_BUILDING = "place_building"
+    CANCEL_PRODUCTION = "cancel_production"
+    SET_RALLY_POINT = "set_rally_point"
+    GUARD = "guard"
+    SET_STANCE = "set_stance"
+    ENTER_TRANSPORT = "enter_transport"
+    UNLOAD = "unload"
+    POWER_DOWN = "power_down"
+    SET_PRIMARY = "set_primary"
+    SURRENDER = "surrender"
+class CommandModel(Action):
+    """A single command to issue to the game engine."""
+    action: ActionType = Field(..., description="Type of command to execute")
+    actor_id: int = Field(default=0, description="Subject actor ID (for unit commands)")
+    target_actor_id: int = Field(default=0, description="Target actor ID (for attack, etc.)")
+    target_x: int = Field(default=0, description="Target cell X coordinate")
+    target_y: int = Field(default=0, description="Target cell Y coordinate")
+    item_type: str = Field(default="", description="Actor type for build/train commands")
+    queued: bool = Field(default=False, description="Queue after current activity vs interrupt")
+class OpenRAAction(Action):
+    """Action sent from the agent to the OpenRA environment.
+    Contains a list of commands to execute in a single game step.
+    Multiple commands can be issued per step (e.g., move unit A and build unit B).
+    """
+    commands: List[CommandModel] = Field(
+        default_factory=list, description="List of commands to execute this step"
+    )
+# ─── Observation Types ────────────────────────────────────────────────────────
+class EconomyInfo(Action):
+    """Player economic state."""
+    cash: int = Field(default=0, description="Available cash")
+    ore: int = Field(default=0, description="Raw ore in silos")
+    power_provided: int = Field(default=0, description="Total power generation")
+    power_drained: int = Field(default=0, description="Total power consumption")
+    resource_capacity: int = Field(default=0, description="Maximum resource storage")
+    harvester_count: int = Field(default=0, description="Number of active harvesters")
+class MilitaryInfo(Action):
+    """Player military statistics."""
+    units_killed: int = Field(default=0, description="Enemy units destroyed")
+    units_lost: int = Field(default=0, description="Own units lost")
+    buildings_killed: int = Field(default=0, description="Enemy buildings destroyed")
+    buildings_lost: int = Field(default=0, description="Own buildings lost")
+    army_value: int = Field(default=0, description="Total value of active army")
+    active_unit_count: int = Field(default=0, description="Number of active units")
+    kills_cost: int = Field(default=0, description="Total cost of enemy units/buildings killed")
+    deaths_cost: int = Field(default=0, description="Total cost of own units/buildings lost")
+    assets_value: int = Field(default=0, description="Total value of all assets (units + buildings)")
+    experience: int = Field(default=0, description="Player experience points")
+    order_count: int = Field(default=0, description="Total orders issued")
+class UnitInfoModel(Action):
+    """Information about a single unit."""
+    actor_id: int = Field(..., description="Unique actor ID")
+    type: str = Field(..., description="Actor type (e.g., 'e1', '1tnk', 'harv')")
+    pos_x: int = Field(default=0, description="World position X")
+    pos_y: int = Field(default=0, description="World position Y")
+    cell_x: int = Field(default=0, description="Cell position X")
+    cell_y: int = Field(default=0, description="Cell position Y")
+    hp_percent: float = Field(default=1.0, description="Health percentage 0.0-1.0")
+    is_idle: bool = Field(default=True, description="Whether the unit is idle")
+    current_activity: str = Field(default="", description="Current activity name")
+    owner: str = Field(default="", description="Owner player internal name")
+    can_attack: bool = Field(default=False, description="Whether the unit can attack")
+    # Sprint 4: enriched unit data
+    facing: int = Field(default=0, description="WAngle 0-1023 direction unit faces")
+    experience_level: int = Field(default=0, description="Veterancy level (0=none)")
+    stance: int = Field(default=0, description="0=HoldFire, 1=ReturnFire, 2=Defend, 3=AttackAnything")
+    speed: int = Field(default=0, description="Base movement speed")
+    attack_range: int = Field(default=0, description="Max attack range in WDist units")
+    passenger_count: int = Field(default=-1, description="Cargo count (0 if transport empty, -1 if N/A)")
+    is_building: bool = Field(default=False, description="False for units, helps distinguish in visible_enemies")
+class BuildingInfoModel(Action):
+    """Information about a single building."""
+    actor_id: int = Field(..., description="Unique actor ID")
+    type: str = Field(..., description="Actor type (e.g., 'powr', 'barr', 'weap')")
+    pos_x: int = Field(default=0, description="World position X")
+    pos_y: int = Field(default=0, description="World position Y")
+    hp_percent: float = Field(default=1.0, description="Health percentage 0.0-1.0")
+    owner: str = Field(default="", description="Owner player internal name")
+    is_producing: bool = Field(default=False, description="Whether actively producing")
+    production_progress: float = Field(default=0.0, description="Production progress 0.0-1.0")
+    producing_item: str = Field(default="", description="Item currently being produced")
+    is_powered: bool = Field(default=True, description="Whether powered")
+    # Sprint 4: enriched building data
+    is_repairing: bool = Field(default=False, description="Actively being repaired")
+    sell_value: int = Field(default=0, description="Refund amount if sold")
+    rally_x: int = Field(default=-1, description="Rally point cell X (-1 if none)")
+    rally_y: int = Field(default=-1, description="Rally point cell Y (-1 if none)")
+    power_amount: int = Field(default=0, description="Power provided (+) or consumed (-)")
+    can_produce: List[str] = Field(default_factory=list, description="Items this building can produce")
+    cell_x: int = Field(default=0, description="Cell position X")
+    cell_y: int = Field(default=0, description="Cell position Y")
+class ProductionInfoModel(Action):
+    """Information about a production queue entry."""
+    queue_type: str = Field(..., description="Queue type: Building, Infantry, Vehicle, Aircraft")
+    item: str = Field(..., description="Actor type being produced")
+    progress: float = Field(default=0.0, description="Progress 0.0-1.0")
+    remaining_ticks: int = Field(default=0, description="Ticks until completion")
+    remaining_cost: int = Field(default=0, description="Remaining cost")
+    paused: bool = Field(default=False, description="Whether production is paused")
+class MapInfoModel(Action):
+    """Basic map information."""
+    width: int = Field(default=0, description="Map width in cells")
+    height: int = Field(default=0, description="Map height in cells")
+    map_name: str = Field(default="", description="Map display name")
+class OpenRAObservation(Observation):
+    """Observation returned from the OpenRA environment each step.
+    Contains structured game state data matching the protobuf GameObservation.
+    """
+    tick: int = Field(default=0, description="Current game tick")
+    economy: EconomyInfo = Field(default_factory=EconomyInfo, description="Economic state")
+    military: MilitaryInfo = Field(default_factory=MilitaryInfo, description="Military statistics")
+    units: List[UnitInfoModel] = Field(default_factory=list, description="Own units")
+    buildings: List[BuildingInfoModel] = Field(default_factory=list, description="Own buildings")
+    production: List[ProductionInfoModel] = Field(default_factory=list, description="Active production queues")
+    visible_enemies: List[UnitInfoModel] = Field(default_factory=list, description="Visible enemy units")
+    visible_enemy_buildings: List[BuildingInfoModel] = Field(
+        default_factory=list, description="Visible enemy buildings"
+    )
+    map_info: MapInfoModel = Field(default_factory=MapInfoModel, description="Map metadata")
+    available_production: List[str] = Field(
+        default_factory=list, description="Actor types available for production"
+    )
+    result: str = Field(default="", description="Game result: 'win', 'lose', 'draw', or ''")
+    # Spatial map tensor (base64-encoded float32 array for JSON transport)
+    spatial_map: str = Field(default="", description="Base64-encoded spatial tensor: H×W×C float32 array")
+    spatial_channels: int = Field(default=0, description="Number of spatial channels")
+    # Multi-dimensional reward vector (when reward_vector.enabled=True)
+    reward_vector: Optional[Dict[str, float]] = Field(
+        default=None,
+        description="8-dimensional reward: combat, economy, infrastructure, intelligence, composition, tempo, disruption, outcome",
+    )
+    # Inherited from Observation:
+    # done: bool = False
+    # reward: float | None = None
+    # metadata: Dict[str, Any] = {}
+# ─── State ────────────────────────────────────────────────────────────────────
+class OpenRAState(State):
+    """Environment state tracking episode metadata.
+    Extends the base State with OpenRA-specific fields.
+    """
+    game_tick: int = Field(default=0, description="Current game tick")
+    map_name: str = Field(default="", description="Active map name")
+    opponent_type: str = Field(default="bot_normal", description="Opponent type: bot_easy, bot_normal, bot_hard")
+    planning_strategy: str = Field(default="", description="Agent's pre-game strategy if planning was used")
+    planning_turns_used: int = Field(default=0, description="Number of planning turns used")
+    # Inherited from State:
+    # episode_id: Optional[str] = None
+    # step_count: int = 0

openra_env/opponent_intel.py ADDED Viewed

	@@ -0,0 +1,263 @@

+"""Hardcoded opponent intelligence profiles for OpenRA AI bots.
+Provides scouting reports and behavioral profiles based on the AI difficulty
+level. These are static assessments based on observed AI behavior patterns.
+"""
+from typing import Optional
+# ── Opponent Profiles ──────────────────────────────────────────────────────
+AI_PROFILES: dict[str, dict] = {
+    "beginner": {
+        "difficulty": "Beginner",
+        "display_name": "Beginner AI",
+        "aggressiveness": "minimal",
+        "expansion_tendency": "none",
+        "unit_diversity": "very_low",
+        "build_order_quality": "very_poor",
+        "estimated_win_rate_vs_new_player": 0.10,
+        "typical_first_attack_tick": 150000,
+        "behavioral_traits": [
+            "Almost never attacks — first attack after 100+ minutes",
+            "Builds only basic infantry (rifle soldiers, grenadiers)",
+            "No vehicles, no aircraft, no navy",
+            "Tiny squads of 3-5 units that pose almost no threat",
+            "Stays at starting base, never expands",
+            "Extremely slow economy — one refinery, one harvester",
+            "Does not repair damaged buildings",
+            "Very slow construction speed — 8x slower than normal AI",
+            "Does not use superweapons or advanced tech",
+            "Barely defends base — minimal turrets placed very late",
+        ],
+        "recommended_counters": [
+            "Any military force will win — even 3-4 infantry can overwhelm",
+            "Take your time building economy and army — no rush needed",
+            "Good difficulty for learning basic game mechanics",
+            "Practice build orders without pressure",
+        ],
+        "typical_army_composition": {
+            "infantry": 1.0,
+            "vehicles": 0.0,
+            "aircraft": 0.0,
+            "ships": 0.0,
+        },
+        "recent_match_history": [
+            {"result": "loss", "duration_ticks": 8000, "score": 400},
+            {"result": "loss", "duration_ticks": 6000, "score": 300},
+            {"result": "loss", "duration_ticks": 10000, "score": 600},
+        ],
+    },
+    "easy": {
+        "difficulty": "Easy",
+        "display_name": "Easy AI",
+        "aggressiveness": "low",
+        "expansion_tendency": "very_low",
+        "unit_diversity": "low",
+        "build_order_quality": "poor",
+        "estimated_win_rate_vs_new_player": 0.25,
+        "typical_first_attack_tick": 80000,
+        "behavioral_traits": [
+            "Passive — first attack after ~50 minutes of game time",
+            "Builds basic infantry and some light vehicles (light tanks, APCs)",
+            "No aircraft, no navy, no advanced tech",
+            "Small attack squads of 8-12 units",
+            "Rarely expands beyond starting base",
+            "Slow economy — 1-2 refineries with 2-4 harvesters",
+            "Repairs buildings slowly (5x slower than normal)",
+            "Moderate construction speed — 3x slower than normal AI",
+            "Limited unit caps — cannot mass large armies",
+            "Defenses delayed but eventually builds pillboxes and turrets",
+        ],
+        "recommended_counters": [
+            "Build a small army of 10-15 units and attack before their defenses solidify",
+            "Any combined arms force (infantry + tanks) will overwhelm them",
+            "Economy is their weakness — denying resources cripples them further",
+            "No need to rush — focus on good build order first",
+        ],
+        "typical_army_composition": {
+            "infantry": 0.6,
+            "vehicles": 0.4,
+            "aircraft": 0.0,
+            "ships": 0.0,
+        },
+        "recent_match_history": [
+            {"result": "loss", "duration_ticks": 5000, "score": 800},
+            {"result": "loss", "duration_ticks": 7000, "score": 1200},
+            {"result": "win", "duration_ticks": 15000, "score": 2500},
+        ],
+    },
+    "medium": {
+        "difficulty": "Medium",
+        "display_name": "Medium AI",
+        "aggressiveness": "moderate",
+        "expansion_tendency": "moderate",
+        "unit_diversity": "moderate",
+        "build_order_quality": "decent",
+        "estimated_win_rate_vs_new_player": 0.50,
+        "typical_first_attack_tick": 5000,
+        "behavioral_traits": [
+            "Moderately aggressive — sends first attack around tick 5000 (~3 minutes)",
+            "Builds a balanced ground force (infantry, tanks, artillery)",
+            "No aircraft or naval units — ground-focused only",
+            "Medium-sized attack squads of 20-35 units",
+            "Will expand to a second base if resources allow",
+            "Decent economy — 2-3 refineries with up to 6 harvesters",
+            "Repairs buildings at normal speed",
+            "Slightly slower construction than Hard/Brutal AI",
+            "Builds advanced tech eventually (tech centers delayed ~8 minutes)",
+            "Uses superweapons if available but slowly",
+            "Limited production capacity — fewer factories than Hard AI",
+        ],
+        "recommended_counters": [
+            "Build early defenses — first attack comes around tick 5000",
+            "Scout by tick 2000 to identify expansion attempts",
+            "Match their economy with 2+ refineries minimum",
+            "Combined arms with anti-armor focus works well",
+            "Their lack of air power means you can skip AA early",
+            "Deny expansion to keep resource advantage",
+        ],
+        "typical_army_composition": {
+            "infantry": 0.35,
+            "vehicles": 0.65,
+            "aircraft": 0.0,
+            "ships": 0.0,
+        },
+        "recent_match_history": [
+            {"result": "win", "duration_ticks": 7000, "score": 3200},
+            {"result": "loss", "duration_ticks": 9000, "score": 3800},
+            {"result": "win", "duration_ticks": 8000, "score": 4200},
+            {"result": "loss", "duration_ticks": 10000, "score": 3500},
+        ],
+    },
+    "normal": {
+        "difficulty": "Normal",
+        "display_name": "Normal AI",
+        "aggressiveness": "high",
+        "expansion_tendency": "high",
+        "unit_diversity": "high",
+        "build_order_quality": "good",
+        "estimated_win_rate_vs_new_player": 0.65,
+        "typical_first_attack_tick": 1500,
+        "behavioral_traits": [
+            "Very aggressive — sends attack waves frequently starting around tick 1500",
+            "Masters all different unit types (infantry, tanks, aircraft, ships)",
+            "Eager to open a second base near your position or mid-way on the map",
+            "Strong economy — builds 2-3 refineries with multiple harvesters",
+            "Rebuilds destroyed buildings quickly and adapts composition",
+            "Will target your harvesters and exposed, undefended buildings",
+            "Uses combined arms effectively (infantry + vehicles + air strikes)",
+            "Scouts your base early and adjusts strategy based on what you build",
+        ],
+        "recommended_counters": [
+            "Build early defenses (turrets) at base entrance — first attack comes ~tick 1500",
+            "Scout early (by tick 500) to find and deny expansion attempts",
+            "Send a small raiding force to destroy their second base before it's established",
+            "Maintain power surplus at all times — their attacks exploit brownouts",
+            "Build anti-air (SAM/AA Gun) by mid-game to counter their aircraft",
+            "Match their economy: build 2+ refineries minimum to keep up",
+            "Don't turtle — they will out-expand and out-resource you",
+        ],
+        "typical_army_composition": {
+            "infantry": 0.30,
+            "vehicles": 0.45,
+            "aircraft": 0.15,
+            "ships": 0.10,
+        },
+        "recent_match_history": [
+            {"result": "win", "duration_ticks": 8000, "score": 5200},
+            {"result": "win", "duration_ticks": 6500, "score": 4800},
+            {"result": "loss", "duration_ticks": 10000, "score": 6100},
+            {"result": "win", "duration_ticks": 7200, "score": 5500},
+            {"result": "loss", "duration_ticks": 9000, "score": 4000},
+        ],
+    },
+    "hard": {
+        "difficulty": "Hard",
+        "display_name": "Hard AI",
+        "aggressiveness": "very_high",
+        "expansion_tendency": "very_high",
+        "unit_diversity": "very_high",
+        "build_order_quality": "optimal",
+        "estimated_win_rate_vs_new_player": 0.85,
+        "typical_first_attack_tick": 1000,
+        "behavioral_traits": [
+            "Extremely aggressive — attacks within first 1000 ticks with combined forces",
+            "Optimal build orders — wastes no time or resources, perfect macro",
+            "Expands aggressively with multiple bases across the map",
+            "Uses superweapons if tech allows (nuclear missile, iron curtain)",
+            "Coordinates multi-front attacks simultaneously from different angles",
+            "Excellent at resource denial — prioritizes harvesters and refineries",
+            "Rapid tech progression to advanced units (Mammoth tanks, MiGs)",
+            "Will cheat slightly on resource gathering speed",
+        ],
+        "recommended_counters": [
+            "MUST build defenses immediately — turrets before second refinery",
+            "Scout by tick 300 — their expansion is very fast",
+            "Deny expansions aggressively or you'll be completely out-resourced",
+            "Build multiple production buildings for faster unit output",
+            "Never let power go negative — they will exploit it ruthlessly",
+            "Mix anti-air into every attack group — they will use aircraft",
+            "Prepare for superweapons by mid-game — keep army spread out",
+        ],
+        "typical_army_composition": {
+            "infantry": 0.20,
+            "vehicles": 0.45,
+            "aircraft": 0.25,
+            "ships": 0.10,
+        },
+        "recent_match_history": [
+            {"result": "win", "duration_ticks": 5000, "score": 7200},
+            {"result": "win", "duration_ticks": 4500, "score": 6800},
+            {"result": "win", "duration_ticks": 6000, "score": 8100},
+            {"result": "loss", "duration_ticks": 12000, "score": 9500},
+            {"result": "win", "duration_ticks": 5500, "score": 7500},
+        ],
+    },
+}
+def get_opponent_profile(difficulty: str) -> Optional[dict]:
+    """Get the opponent intelligence profile for a given AI difficulty.
+    Args:
+        difficulty: One of "beginner", "easy", "medium", "normal", "hard".
+                   Also accepts "bot_" prefix (strips it).
+    Returns:
+        Profile dict or None if not found.
+    """
+    clean = difficulty.lower().replace("bot_", "")
+    return AI_PROFILES.get(clean)
+def get_opponent_summary(difficulty: str) -> str:
+    """Get a human-readable scouting report for LLM consumption."""
+    profile = get_opponent_profile(difficulty)
+    if profile is None:
+        return f"Unknown AI difficulty: {difficulty}"
+    traits = "\n".join(f"  - {t}" for t in profile["behavioral_traits"])
+    counters = "\n".join(f"  - {c}" for c in profile["recommended_counters"])
+    wins = sum(1 for m in profile["recent_match_history"] if m["result"] == "win")
+    total = len(profile["recent_match_history"])
+    avg_score = sum(m["score"] for m in profile["recent_match_history"]) // total
+    army = profile["typical_army_composition"]
+    army_str = ", ".join(f"{k}: {v:.0%}" for k, v in army.items() if v > 0)
+    return (
+        f"## Opponent Scouting Report: {profile['display_name']}\n"
+        f"Aggressiveness: {profile['aggressiveness']}\n"
+        f"Expansion tendency: {profile['expansion_tendency']}\n"
+        f"Unit diversity: {profile['unit_diversity']}\n"
+        f"Build order quality: {profile['build_order_quality']}\n"
+        f"Estimated first attack: ~tick {profile['typical_first_attack_tick']}\n"
+        f"Win rate vs new players: {profile['estimated_win_rate_vs_new_player']:.0%}\n"
+        f"Recent record: {wins}W-{total - wins}L (avg score: {avg_score})\n"
+        f"Typical army mix: {army_str}\n"
+        f"\nBehavioral traits:\n{traits}\n"
+        f"\nRecommended counters:\n{counters}"
+    )