Spaces:
Running
Running
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Dockerfile +76 -0
- README.md +193 -5
- __init__.py +13 -0
- client.py +62 -0
- models.py +45 -0
- openenv.yaml +7 -0
- pyproject.toml +51 -0
- server/Gym/__init__.py +0 -0
- server/Gym/environment.py +217 -0
- server/Gym/environments/__init__.py +802 -0
- server/Gym/environments/ab_program_simulation/__init__.py +1 -0
- server/Gym/environments/ab_program_simulation/environment.py +109 -0
- server/Gym/environments/add_multiple_divisible_counting/__init__.py +1 -0
- server/Gym/environments/add_multiple_divisible_counting/environment.py +122 -0
- server/Gym/environments/addition_table/__init__.py +1 -0
- server/Gym/environments/addition_table/environment.py +132 -0
- server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py +1 -0
- server/Gym/environments/almost_complete_graph_cycle_counting/environment.py +94 -0
- server/Gym/environments/and_or_sequence_counting/__init__.py +1 -0
- server/Gym/environments/and_or_sequence_counting/environment.py +147 -0
- server/Gym/environments/anti_palindromic_substring_counting/__init__.py +1 -0
- server/Gym/environments/anti_palindromic_substring_counting/environment.py +142 -0
- server/Gym/environments/axis_k_center/__init__.py +1 -0
- server/Gym/environments/axis_k_center/environment.py +129 -0
- server/Gym/environments/baj_bytecomputer/__init__.py +1 -0
- server/Gym/environments/baj_bytecomputer/environment.py +109 -0
- server/Gym/environments/banned_point_superset_path_counting/__init__.py +1 -0
- server/Gym/environments/banned_point_superset_path_counting/environment.py +170 -0
- server/Gym/environments/banyan_heart/__init__.py +1 -0
- server/Gym/environments/banyan_heart/environment.py +165 -0
- server/Gym/environments/bez_minimalist_security/__init__.py +1 -0
- server/Gym/environments/bez_minimalist_security/environment.py +221 -0
- server/Gym/environments/bezout_identity/__init__.py +1 -0
- server/Gym/environments/bezout_identity/environment.py +134 -0
- server/Gym/environments/binario/__init__.py +1 -0
- server/Gym/environments/binario/environment.py +188 -0
- server/Gym/environments/binario_no_adjacency_requirement/__init__.py +1 -0
- server/Gym/environments/binario_no_adjacency_requirement/environment.py +114 -0
- server/Gym/environments/binary_alternation/__init__.py +1 -0
- server/Gym/environments/binary_alternation/environment.py +121 -0
- server/Gym/environments/binary_linear_equation_solution_counting/__init__.py +1 -0
- server/Gym/environments/binary_linear_equation_solution_counting/environment.py +187 -0
- server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py +1 -0
- server/Gym/environments/binary_tree_leaf_num_expectation/environment.py +76 -0
- server/Gym/environments/bit_equation_counting/__init__.py +1 -0
- server/Gym/environments/bit_equation_counting/environment.py +91 -0
- server/Gym/environments/bitand_zero_path_counting/__init__.py +1 -0
- server/Gym/environments/bitand_zero_path_counting/environment.py +135 -0
- server/Gym/environments/bitwise_operation_sequence_counting/__init__.py +1 -0
- server/Gym/environments/bitwise_operation_sequence_counting/environment.py +150 -0
Dockerfile
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local src/core)
|
| 10 |
+
# - Standalone environments (with openenv-core from pip)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 19 |
+
ARG BUILD_MODE=in-repo
|
| 20 |
+
ARG ENV_NAME=RLVE_Gym
|
| 21 |
+
|
| 22 |
+
# Copy environment code (always at root of build context)
|
| 23 |
+
COPY . /app/env
|
| 24 |
+
|
| 25 |
+
# For in-repo builds, openenv-core is already in the pyproject.toml dependencies
|
| 26 |
+
# For standalone builds, openenv-core will be installed from pip via pyproject.toml
|
| 27 |
+
WORKDIR /app/env
|
| 28 |
+
|
| 29 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 30 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 31 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 32 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 33 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 34 |
+
fi
|
| 35 |
+
|
| 36 |
+
# Install dependencies using uv sync
|
| 37 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 38 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 39 |
+
if [ -f uv.lock ]; then \
|
| 40 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 41 |
+
else \
|
| 42 |
+
uv sync --no-install-project --no-editable; \
|
| 43 |
+
fi
|
| 44 |
+
|
| 45 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 46 |
+
if [ -f uv.lock ]; then \
|
| 47 |
+
uv sync --frozen --no-editable; \
|
| 48 |
+
else \
|
| 49 |
+
uv sync --no-editable; \
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
# Final runtime stage
|
| 53 |
+
FROM ${BASE_IMAGE}
|
| 54 |
+
|
| 55 |
+
WORKDIR /app
|
| 56 |
+
|
| 57 |
+
# Copy the virtual environment from builder
|
| 58 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 59 |
+
|
| 60 |
+
# Copy the environment code
|
| 61 |
+
COPY --from=builder /app/env /app/env
|
| 62 |
+
|
| 63 |
+
# Set PATH to use the virtual environment
|
| 64 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 65 |
+
|
| 66 |
+
# Set PYTHONPATH so imports work correctly
|
| 67 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 68 |
+
|
| 69 |
+
# Health check
|
| 70 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 71 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 72 |
+
|
| 73 |
+
# Run the FastAPI server
|
| 74 |
+
# The module path is constructed to work with the /app/env structure
|
| 75 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 76 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,198 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Rlve Gym Environment Server
|
| 3 |
+
emoji: 📡
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Rlve Gym Environment
|
| 15 |
+
|
| 16 |
+
A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
|
| 17 |
+
|
| 18 |
+
## Quick Start
|
| 19 |
+
|
| 20 |
+
The simplest way to use the Rlve Gym environment is through the `RlveGymEnv` class:
|
| 21 |
+
|
| 22 |
+
```python
|
| 23 |
+
from RLVE_Gym import RlveGymAction, RlveGymEnv
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
# Create environment from Docker image
|
| 27 |
+
RLVE_Gymenv = RlveGymEnv.from_docker_image("RLVE_Gym-env:latest")
|
| 28 |
+
|
| 29 |
+
# Reset
|
| 30 |
+
result = RLVE_Gymenv.reset()
|
| 31 |
+
print(f"Reset: {result.observation.echoed_message}")
|
| 32 |
+
|
| 33 |
+
# Send multiple messages
|
| 34 |
+
messages = ["Hello, World!", "Testing echo", "Final message"]
|
| 35 |
+
|
| 36 |
+
for msg in messages:
|
| 37 |
+
result = RLVE_Gymenv.step(RlveGymAction(message=msg))
|
| 38 |
+
print(f"Sent: '{msg}'")
|
| 39 |
+
print(f" → Echoed: '{result.observation.echoed_message}'")
|
| 40 |
+
print(f" → Length: {result.observation.message_length}")
|
| 41 |
+
print(f" → Reward: {result.reward}")
|
| 42 |
+
|
| 43 |
+
finally:
|
| 44 |
+
# Always clean up
|
| 45 |
+
RLVE_Gymenv.close()
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
That's it! The `RlveGymEnv.from_docker_image()` method handles:
|
| 49 |
+
- Starting the Docker container
|
| 50 |
+
- Waiting for the server to be ready
|
| 51 |
+
- Connecting to the environment
|
| 52 |
+
- Container cleanup when you call `close()`
|
| 53 |
+
|
| 54 |
+
## Building the Docker Image
|
| 55 |
+
|
| 56 |
+
Before using the environment, you need to build the Docker image:
|
| 57 |
+
|
| 58 |
+
```bash
|
| 59 |
+
# From project root
|
| 60 |
+
docker build -t RLVE_Gym-env:latest -f server/Dockerfile .
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## Deploying to Hugging Face Spaces
|
| 64 |
+
|
| 65 |
+
You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# From the environment directory (where openenv.yaml is located)
|
| 69 |
+
openenv push
|
| 70 |
+
|
| 71 |
+
# Or specify options
|
| 72 |
+
openenv push --namespace my-org --private
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
The `openenv push` command will:
|
| 76 |
+
1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
|
| 77 |
+
2. Prepare a custom build for Hugging Face Docker space (enables web interface)
|
| 78 |
+
3. Upload to Hugging Face (ensuring you're logged in)
|
| 79 |
+
|
| 80 |
+
### Prerequisites
|
| 81 |
+
|
| 82 |
+
- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
|
| 83 |
+
|
| 84 |
+
### Options
|
| 85 |
+
|
| 86 |
+
- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
|
| 87 |
+
- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
|
| 88 |
+
- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
|
| 89 |
+
- `--private`: Deploy the space as private (default: public)
|
| 90 |
+
|
| 91 |
+
### Examples
|
| 92 |
+
|
| 93 |
+
```bash
|
| 94 |
+
# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
|
| 95 |
+
openenv push
|
| 96 |
+
|
| 97 |
+
# Push to a specific repository
|
| 98 |
+
openenv push --repo-id my-org/my-env
|
| 99 |
+
|
| 100 |
+
# Push with a custom base image
|
| 101 |
+
openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
|
| 102 |
+
|
| 103 |
+
# Push as a private space
|
| 104 |
+
openenv push --private
|
| 105 |
+
|
| 106 |
+
# Combine options
|
| 107 |
+
openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
After deployment, your space will be available at:
|
| 111 |
+
`https://huggingface.co/spaces/<repo-id>`
|
| 112 |
+
|
| 113 |
+
The deployed space includes:
|
| 114 |
+
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 115 |
+
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 116 |
+
- **Health Check** at `/health` - Container health monitoring
|
| 117 |
+
|
| 118 |
+
## Environment Details
|
| 119 |
+
|
| 120 |
+
### Action
|
| 121 |
+
**RlveGymAction**: Contains a single field
|
| 122 |
+
- `message` (str) - The message to echo back
|
| 123 |
+
|
| 124 |
+
### Observation
|
| 125 |
+
**RlveGymObservation**: Contains the echo response and metadata
|
| 126 |
+
- `echoed_message` (str) - The message echoed back
|
| 127 |
+
- `message_length` (int) - Length of the message
|
| 128 |
+
- `reward` (float) - Reward based on message length (length × 0.1)
|
| 129 |
+
- `done` (bool) - Always False for echo environment
|
| 130 |
+
- `metadata` (dict) - Additional info like step count
|
| 131 |
+
|
| 132 |
+
### Reward
|
| 133 |
+
The reward is calculated as: `message_length × 0.1`
|
| 134 |
+
- "Hi" → reward: 0.2
|
| 135 |
+
- "Hello, World!" → reward: 1.3
|
| 136 |
+
- Empty message → reward: 0.0
|
| 137 |
+
|
| 138 |
+
## Advanced Usage
|
| 139 |
+
|
| 140 |
+
### Connecting to an Existing Server
|
| 141 |
+
|
| 142 |
+
If you already have a Rlve Gym environment server running, you can connect directly:
|
| 143 |
+
|
| 144 |
+
```python
|
| 145 |
+
from RLVE_Gym import RlveGymEnv
|
| 146 |
+
|
| 147 |
+
# Connect to existing server
|
| 148 |
+
RLVE_Gymenv = RlveGymEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 149 |
+
|
| 150 |
+
# Use as normal
|
| 151 |
+
result = RLVE_Gymenv.reset()
|
| 152 |
+
result = RLVE_Gymenv.step(RlveGymAction(message="Hello!"))
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
Note: When connecting to an existing server, `RLVE_Gymenv.close()` will NOT stop the server.
|
| 156 |
+
|
| 157 |
+
## Development & Testing
|
| 158 |
+
|
| 159 |
+
### Direct Environment Testing
|
| 160 |
+
|
| 161 |
+
Test the environment logic directly without starting the HTTP server:
|
| 162 |
+
|
| 163 |
+
```bash
|
| 164 |
+
# From the server directory
|
| 165 |
+
python3 server/RLVE_Gym_environment.py
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
This verifies that:
|
| 169 |
+
- Environment resets correctly
|
| 170 |
+
- Step executes actions properly
|
| 171 |
+
- State tracking works
|
| 172 |
+
- Rewards are calculated correctly
|
| 173 |
+
|
| 174 |
+
### Running Locally
|
| 175 |
+
|
| 176 |
+
Run the server locally for development:
|
| 177 |
+
|
| 178 |
+
```bash
|
| 179 |
+
uvicorn server.app:app --reload
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
## Project Structure
|
| 183 |
+
|
| 184 |
+
```
|
| 185 |
+
RLVE_Gym/
|
| 186 |
+
├── __init__.py # Module exports
|
| 187 |
+
├── README.md # This file
|
| 188 |
+
├── openenv.yaml # OpenEnv manifest
|
| 189 |
+
├── pyproject.toml # Project metadata and dependencies
|
| 190 |
+
├── uv.lock # Locked dependencies (generated)
|
| 191 |
+
├── client.py # RlveGymEnv client implementation
|
| 192 |
+
├── models.py # Action and Observation models
|
| 193 |
+
└── server/
|
| 194 |
+
├── __init__.py # Server module exports
|
| 195 |
+
├── RLVE_Gym_environment.py # Core environment logic
|
| 196 |
+
├── app.py # FastAPI application
|
| 197 |
+
└── Dockerfile # Container image definition
|
| 198 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Rlve Gym Environment - A simple test environment for HTTP server."""
|
| 8 |
+
|
| 9 |
+
from .client import RlveGymEnv
|
| 10 |
+
from .models import RlveGymAction, RlveGymObservation
|
| 11 |
+
|
| 12 |
+
__all__ = ["RlveGymAction", "RlveGymObservation", "RlveGymEnv"]
|
| 13 |
+
|
client.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Rlve Gym Environment HTTP Client.
|
| 9 |
+
|
| 10 |
+
This module provides the client for connecting to a Rlve Gym Environment server
|
| 11 |
+
over HTTP.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from typing import Dict
|
| 15 |
+
|
| 16 |
+
from openenv_core.client_types import StepResult
|
| 17 |
+
from openenv_core.http_env_client import HTTPEnvClient
|
| 18 |
+
|
| 19 |
+
from .models import RlveGymState, RlveGymAction, RlveGymObservation
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class RlveGymEnv(HTTPEnvClient[RlveGymAction, RlveGymObservation]):
|
| 23 |
+
"""
|
| 24 |
+
HTTP client for the Rlve Gym Environment.
|
| 25 |
+
|
| 26 |
+
This client connects to a RlveGymEnvironment HTTP server and provides
|
| 27 |
+
methods to interact with it: reset(), step(), and state access.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
def _step_payload(self, action: RlveGymAction) -> Dict:
|
| 31 |
+
"""
|
| 32 |
+
Convert RlveGymAction to JSON payload for step request.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
action: RlveGymAction instance
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Dictionary representation suitable for JSON encoding
|
| 39 |
+
"""
|
| 40 |
+
return {
|
| 41 |
+
"output": action.output,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
def _parse_result(self, payload: Dict) -> StepResult[RlveGymObservation]:
|
| 45 |
+
"""
|
| 46 |
+
Parse server response into StepResult[RlveGymObservation].
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
payload: JSON response from server
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
StepResult with RlveGymObservation
|
| 53 |
+
"""
|
| 54 |
+
obs = RlveGymObservation(**payload["observation"])
|
| 55 |
+
return StepResult(
|
| 56 |
+
observation=obs,
|
| 57 |
+
reward=payload.get("reward"),
|
| 58 |
+
done=payload.get("done", False),
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
def _parse_state(self, payload: Dict) -> RlveGymState:
|
| 62 |
+
return RlveGymState(**payload)
|
models.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Data models for the Rlve Gym Environment.
|
| 9 |
+
|
| 10 |
+
The RLVE_Gym environment is a simple test environment that echoes back messages.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from dataclasses import dataclass
|
| 14 |
+
|
| 15 |
+
from openenv_core.env_server.types import Action, Observation, State
|
| 16 |
+
|
| 17 |
+
from typing import Dict, Union
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass(kw_only=True)
|
| 21 |
+
class RlveGymState(State):
|
| 22 |
+
"""State of the RLVE_Gym containing the seed."""
|
| 23 |
+
seed: int
|
| 24 |
+
problem_input: str = None
|
| 25 |
+
|
| 26 |
+
num_samples: int = 0
|
| 27 |
+
sum_accuracy: int = 0
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass(kw_only=True)
|
| 31 |
+
class RlveGymAction(Action):
|
| 32 |
+
"""Action for the RLVE_Gym environment - just a model output."""
|
| 33 |
+
|
| 34 |
+
output: str
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass(kw_only=True)
|
| 38 |
+
class RlveGymObservation(Observation):
|
| 39 |
+
"""Observation from the RLVE_Gym environment."""
|
| 40 |
+
|
| 41 |
+
problem_input: str
|
| 42 |
+
verifier_result: Dict[str, Union[float, int]]
|
| 43 |
+
|
| 44 |
+
success: bool
|
| 45 |
+
message: str
|
openenv.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: RLVE_Gym
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-RLVE_Gym"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Rlve Gym environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv dependencies (required for server functionality)
|
| 18 |
+
# "openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@main#subdirectory=src/core",
|
| 19 |
+
"openenv-core>=0.1.0",
|
| 20 |
+
"fastapi>=0.115.0",
|
| 21 |
+
"pydantic>=2.0.0",
|
| 22 |
+
"uvicorn>=0.24.0",
|
| 23 |
+
"requests>=2.31.0",
|
| 24 |
+
# Environment-specific dependencies
|
| 25 |
+
# Add all dependencies needed for your environment here
|
| 26 |
+
# Examples:
|
| 27 |
+
# "numpy>=1.19.0",
|
| 28 |
+
# "torch>=2.0.0",
|
| 29 |
+
# "gymnasium>=0.29.0",
|
| 30 |
+
# "openspiel>=1.0.0",
|
| 31 |
+
# "smolagents>=1.22.0,<2",
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
[project.optional-dependencies]
|
| 35 |
+
dev = [
|
| 36 |
+
"pytest>=8.0.0",
|
| 37 |
+
"pytest-cov>=4.0.0",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
[project.scripts]
|
| 41 |
+
# Server entry point - enables running via: uv run --project . server
|
| 42 |
+
# or: python -m RLVE_Gym.server.app
|
| 43 |
+
server = "RLVE_Gym.server.app:main"
|
| 44 |
+
|
| 45 |
+
[tool.setuptools]
|
| 46 |
+
packages = ["RLVE_Gym", "RLVE_Gym.server"]
|
| 47 |
+
package-dir = { "RLVE_Gym" = ".", "RLVE_Gym.server" = "server" }
|
| 48 |
+
|
| 49 |
+
[tool.setuptools.packages.find]
|
| 50 |
+
where = ["."]
|
| 51 |
+
|
server/Gym/__init__.py
ADDED
|
File without changes
|
server/Gym/environment.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import copy
|
| 4 |
+
from abc import ABC, abstractmethod
|
| 5 |
+
from typing import Dict, Optional, Tuple, Any, Union
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
import functools
|
| 10 |
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 11 |
+
|
| 12 |
+
class TimeoutException(Exception) :
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
def timeout(seconds) :
|
| 16 |
+
def decorator(func) :
|
| 17 |
+
@functools.wraps(func)
|
| 18 |
+
def wrapper(*args, **kwargs) :
|
| 19 |
+
executor = ThreadPoolExecutor(max_workers = 1)
|
| 20 |
+
future = executor.submit(func, *args, **kwargs)
|
| 21 |
+
try :
|
| 22 |
+
return future.result(timeout=seconds)
|
| 23 |
+
except FutureTimeoutError :
|
| 24 |
+
raise TimeoutException("Function timed out after {} seconds".format(seconds))
|
| 25 |
+
finally :
|
| 26 |
+
executor.shutdown(wait=False, cancel_futures=True)
|
| 27 |
+
return wrapper
|
| 28 |
+
return decorator
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
import torch
|
| 33 |
+
import random
|
| 34 |
+
import numpy as np
|
| 35 |
+
def manual_seed(args_or_seed : int, fix_cudnn = False) :
|
| 36 |
+
random.seed(args_or_seed)
|
| 37 |
+
np.random.seed(args_or_seed)
|
| 38 |
+
torch.manual_seed(args_or_seed)
|
| 39 |
+
torch.cuda.manual_seed_all(args_or_seed)
|
| 40 |
+
os.environ["PYTHONHASHSEED"] = str(args_or_seed)
|
| 41 |
+
if fix_cudnn :
|
| 42 |
+
torch.backends.cudnn.deterministic = True # noqa
|
| 43 |
+
torch.backends.cudnn.benchmark = False # noqa
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class VerifiableEnvironment(ABC) :
|
| 48 |
+
"""
|
| 49 |
+
Abstract base class for a verifiable environment.
|
| 50 |
+
"""
|
| 51 |
+
def __init__(self, answer_markers : Optional[Tuple[str, str]] = None) :
|
| 52 |
+
"""
|
| 53 |
+
Initializes the environment with default seed and parameter values.
|
| 54 |
+
"""
|
| 55 |
+
self.seed = None
|
| 56 |
+
self.parameter = None
|
| 57 |
+
|
| 58 |
+
if answer_markers is None :
|
| 59 |
+
answer_markers = (r"<answer>", r"</answer>")
|
| 60 |
+
assert hasattr(answer_markers, "__len__"), "answer_markers should have __len__"
|
| 61 |
+
assert len(answer_markers) == 2 and isinstance(answer_markers[0], str) and isinstance(answer_markers[1], str), "answer_markers should be a tuple of two strings"
|
| 62 |
+
self.answer_markers = answer_markers
|
| 63 |
+
|
| 64 |
+
self.passing_reward_threshold = 1.0
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def generator(self, seed : int, parameter : Optional[Dict] = None, timeout_second : int = 10) -> bool :
|
| 68 |
+
"""
|
| 69 |
+
Initializes the environment with the given seed and (initial) parameters, and samples environment-specific parameters to generate a problem.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
seed (int): Random seed for reproducibility.
|
| 73 |
+
parameter (Optional[Dict]): Dictionary of (initial) problem parameters.
|
| 74 |
+
timeout_second (int): Timeout in seconds for the generation process.
|
| 75 |
+
|
| 76 |
+
Returns:
|
| 77 |
+
bool: True if the generation was successful, False otherwise.
|
| 78 |
+
"""
|
| 79 |
+
@timeout(timeout_second)
|
| 80 |
+
def self_generate() :
|
| 81 |
+
self.seed = seed
|
| 82 |
+
self.parameter = copy.deepcopy(parameter) if parameter is not None else {}
|
| 83 |
+
|
| 84 |
+
manual_seed(self.seed)
|
| 85 |
+
self._generate()
|
| 86 |
+
try :
|
| 87 |
+
self_generate()
|
| 88 |
+
except :
|
| 89 |
+
return False
|
| 90 |
+
return self.parameter is not None
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@abstractmethod
|
| 94 |
+
def _generate(self) -> None :
|
| 95 |
+
"""
|
| 96 |
+
Subclasses must implement problem generation using self.seed and self.parameter.
|
| 97 |
+
"""
|
| 98 |
+
pass
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def prompt_generator(self) -> str :
|
| 102 |
+
"""
|
| 103 |
+
Generates the prompt string for the problem.
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
str: The formatted prompt for the problem.
|
| 107 |
+
"""
|
| 108 |
+
assert self.seed is not None and self.parameter is not None, "generator() should be called before prompt_generator()"
|
| 109 |
+
|
| 110 |
+
return self._prompt_generate()
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@abstractmethod
|
| 114 |
+
def _prompt_generate(self) -> str :
|
| 115 |
+
"""
|
| 116 |
+
Subclasses must implement prompt generation using self.seed and self.parameter.
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
str: The problem prompt.
|
| 120 |
+
"""
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def processor(self, output : str) -> Any :
|
| 125 |
+
"""
|
| 126 |
+
Processes the model's output to extract useful information.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
output (str): The string output from a model.
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
Any: Any useful information that may be used for following steps (e.g., scoring).
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
# Remove everything before the first "Assistant:" (if possible)
|
| 136 |
+
if "Assistant:" in output :
|
| 137 |
+
output = output.split("Assistant:", 1)[1]
|
| 138 |
+
elif "<|im_start|>assistant" in output :
|
| 139 |
+
output = output.split("<|im_start|>assistant", 1)[1]
|
| 140 |
+
else :
|
| 141 |
+
pass
|
| 142 |
+
|
| 143 |
+
answer_pattern = re.escape(self.answer_markers[0]) + r"(.*?)" + re.escape(self.answer_markers[1])
|
| 144 |
+
matches = list(re.finditer(answer_pattern, output, re.DOTALL))
|
| 145 |
+
if matches :
|
| 146 |
+
answer = matches[-1].group(1)
|
| 147 |
+
else :
|
| 148 |
+
answer = None
|
| 149 |
+
return self._process(answer)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
@abstractmethod
|
| 153 |
+
def _process(self, answer : Optional[str]) -> Any :
|
| 154 |
+
"""
|
| 155 |
+
Subclasses must implement the processing of the answer.
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
answer (str): The model's answer. If it is None, it means the model did not provide an answer in the expected format.
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
Any: The processed answer, which may be used for scoring.
|
| 162 |
+
"""
|
| 163 |
+
pass
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@abstractmethod
|
| 167 |
+
def scorer(self, output : str) -> float :
|
| 168 |
+
"""
|
| 169 |
+
Computes a numeric score for the output, which should be in [-1.0, +1.0].
|
| 170 |
+
|
| 171 |
+
Args:
|
| 172 |
+
output (str): The model's output.
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
float: The score for the given output, between -1.0 and +1.0.
|
| 176 |
+
"""
|
| 177 |
+
pass
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def verifier(self, output : str) -> Dict[str, Union[float, int]] :
|
| 181 |
+
"""
|
| 182 |
+
Verifies the model's output.
|
| 183 |
+
"""
|
| 184 |
+
try :
|
| 185 |
+
score = self.scorer(output)
|
| 186 |
+
except :
|
| 187 |
+
score = -1.0
|
| 188 |
+
assert -1.0 <= score <= +1.0, "Score out of bounds: score={}\n\nPrompt:\n{}".format(score, self.prompt_generator())
|
| 189 |
+
|
| 190 |
+
eps = 1E-6
|
| 191 |
+
return dict(
|
| 192 |
+
reward = score, # [-1.0, +1.0]
|
| 193 |
+
accuracy = int(score >= self.passing_reward_threshold - eps), # 0 or 1
|
| 194 |
+
format_score = int(score >= -1.0 + eps), # 0 or 1
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def get_config(self) -> Dict :
|
| 199 |
+
"""
|
| 200 |
+
Returns the configuration of the current problem.
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
Dict: Dictionary with keys 'seed' and 'parameter'.
|
| 204 |
+
"""
|
| 205 |
+
return dict(seed = self.seed, parameter = self.parameter, passing_reward_threshold = self.passing_reward_threshold)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def set_config(self, config : Dict) -> None :
|
| 209 |
+
"""
|
| 210 |
+
Sets the configuration for the current problem.
|
| 211 |
+
|
| 212 |
+
Args:
|
| 213 |
+
config (Dict): Dictionary with 'seed' and 'parameter' keys.
|
| 214 |
+
"""
|
| 215 |
+
assert "seed" in config, "seed is required in config"
|
| 216 |
+
assert "parameter" in config, "parameter is required in config"
|
| 217 |
+
self.seed, self.parameter, self.passing_reward_threshold = config["seed"], config["parameter"], config.get("passing_reward_threshold", 1.0)
|
server/Gym/environments/__init__.py
ADDED
|
@@ -0,0 +1,802 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .ab_program_simulation import ABProgramSimulation_Environment
|
| 2 |
+
from .add_multiple_divisible_counting import AddMultiple_Divisible_Counting_Environment
|
| 3 |
+
from .addition_table import AdditionTable_Environment
|
| 4 |
+
from .almost_complete_graph_cycle_counting import AlmostCompleteGraphCycleCounting_Environment
|
| 5 |
+
from .and_or_sequence_counting import AndOr_Sequence_Counting_Environment
|
| 6 |
+
from .anti_palindromic_substring_counting import AntiPalindromicSubstringCounting_Environment
|
| 7 |
+
from .axis_k_center import Axis_KCenter_Environment
|
| 8 |
+
from .baj_bytecomputer import BAJBytecomputer_Environment
|
| 9 |
+
from .banned_point_superset_path_counting import BannedPointSupersetPathCounting_Environment
|
| 10 |
+
from .banyan_heart import BanyanHeart_Environment
|
| 11 |
+
from .bez_minimalist_security import BEZMinimalistSecurity_Environment
|
| 12 |
+
from .bezout_identity import BezoutIdentity_Environment
|
| 13 |
+
from .binario import Binario_Environment
|
| 14 |
+
from .binario_no_adjacency_requirement import Binario_NoAdjacencyRequirement_Environment
|
| 15 |
+
from .binary_alternation import BinaryAlternation_Environment
|
| 16 |
+
from .binary_linear_equation_solution_counting import BinaryLinearEquation_SolutionCounting_Environment
|
| 17 |
+
from .binary_tree_leaf_num_expectation import BinaryTreeLeafNumExpectation_Environment
|
| 18 |
+
from .bit_equation_counting import BitEquationCounting_Environment
|
| 19 |
+
from .bitand_zero_path_counting import BitAndZero_PathCounting_Environment
|
| 20 |
+
from .bitwise_operation_sequence_counting import BitwiseOperationSequenceCounting_Environment
|
| 21 |
+
from .block_image import BlockImage_Environment
|
| 22 |
+
from .bounded_adjacency_difference_permutation_counting import BoundedAdjacencyDifference_Permutation_Counting_Environment
|
| 23 |
+
from .bounded_interval_intersection import BoundedIntervalIntersection_Environment
|
| 24 |
+
from .bounded_mean_subarray_counting import BoundedMeanSubarrayCounting_Environment
|
| 25 |
+
from .bounded_subarray_counting import BoundedSubarrayCounting_Environment
|
| 26 |
+
from .box_scheduling import BoxScheduling_Environment
|
| 27 |
+
from .bridge import Bridge_Environment
|
| 28 |
+
from .bubble_swap_lower_bound_permutation_counting import BubbleSwapLowerBound_PermutationCounting_Environment
|
| 29 |
+
from .bucket_sorting import BucketSorting_Environment
|
| 30 |
+
from .campfire_party import CampfireParty_Environment
|
| 31 |
+
from .campsite_puzzle import CampsitePuzzle_Environment
|
| 32 |
+
from .canon import Canon_Environment
|
| 33 |
+
from .cantor_expansion import CantorExpansion_Environment
|
| 34 |
+
from .capital_city_effect import CapitalCityEffect_Environment
|
| 35 |
+
from .card_coloring_counting import CardColoringCounting_Environment
|
| 36 |
+
from .catalan_number_mod import CatalanNumberMod_Environment
|
| 37 |
+
from .check_all_cycle_xor_zero import CheckAllCycleXorZero_Environment
|
| 38 |
+
from .cho_hamsters import ChoHamsters_Environment
|
| 39 |
+
from .cinema import Cinema_Environment
|
| 40 |
+
from .circuit import Circuit_Environment
|
| 41 |
+
from .circulating_decimal_counting import CirculatingDecimalCounting_Environment
|
| 42 |
+
from .circulating_grid import CirculatingGrid_Environment
|
| 43 |
+
from .cleaning_up import CleaningUp_Environment
|
| 44 |
+
from .clear_symmetry import ClearSymmetry_Environment
|
| 45 |
+
from .clique_independent_set_partitioning_counting import Clique_IndependentSet_Partitioning_Counting_Environment
|
| 46 |
+
from .coin_square_game import CoinSquareGame_Environment
|
| 47 |
+
from .coloring_counting import ColoringCounting_Environment
|
| 48 |
+
from .combination_odd_subsequence_counting import CombinationOddSubsequenceCounting_Environment
|
| 49 |
+
from .concatenation_partition_counting_sum import ConcatenationPartitionCountingSum_Environment
|
| 50 |
+
from .congruent_equation import CongruentEquation_Environment
|
| 51 |
+
from .construct_hack_interval import ConstructHackInterval_Environment
|
| 52 |
+
from .convex_hull import ConvexHull_Environment
|
| 53 |
+
from .cornfield import Cornfield_Environment
|
| 54 |
+
from .countdown import CountdownEqual_Environment, CountdownClose_Environment
|
| 55 |
+
from .cow_dance_show import CowDanceShow_Environment
|
| 56 |
+
from .crt import CRT_Environment
|
| 57 |
+
from .cryptarithmetic import Cryptarithmetic_Environment
|
| 58 |
+
from .cube_fixed_local_maximum_counting import Cube_FixedLocalMaximumCounting_Environment
|
| 59 |
+
from .cycle_counting import CycleCounting_Environment
|
| 60 |
+
from .decreasing_digit_counting import DecreasingDigitCounting_Environment
|
| 61 |
+
from .degree_fixed_spanning_tree import DegreeFixed_SpanningTree_Environment
|
| 62 |
+
from .delta_min_popcount import DeltaMinPopcount_Environment
|
| 63 |
+
from .delta_nim_game import DeltaNimGame_Environment
|
| 64 |
+
from .derangement_extension import DerangementExtension_Environment
|
| 65 |
+
from .difference_constraint_system import DifferenceConstraintSystem_Environment
|
| 66 |
+
from .difference_constraint_system_dag import DifferenceConstraintSystemDAG_Environment
|
| 67 |
+
from .different_color_pairing import DifferentColorPairing_Environment
|
| 68 |
+
from .differentiate import Differentiate_Environment
|
| 69 |
+
from .digit_lis_counting import DigitLISCounting_Environment
|
| 70 |
+
from .discrete_logarithm import DiscreteLogarithm_Environment
|
| 71 |
+
from .disinfection import Disinfection_Environment
|
| 72 |
+
from .distinct_array_permutation import DistinctArrayPermutation_Environment
|
| 73 |
+
from .distinct_edge_colored_complete_graph_counting import DistinctEdgeColoredCompleteGraphCounting_Environment
|
| 74 |
+
from .division import Division_Environment
|
| 75 |
+
from .divisor_flip_expectation import DivisorFlipExpectation_Environment
|
| 76 |
+
from .double_cross_counting import DoubleCrossCounting_Environment
|
| 77 |
+
from .double_palindromic_string_counting import DoublePalindromicStringCounting_Environment
|
| 78 |
+
from .double_stack_sorting import DoubleStackSorting_Environment
|
| 79 |
+
from .dyn_dynamite import DynDynamite_Environment
|
| 80 |
+
from .eight_digit_puzzle import EightDigitPuzzle_Environment
|
| 81 |
+
from .emperor_worries import EmperorWorries_Environment
|
| 82 |
+
from .energy_storage_meter import EnergyStorageMeter_Environment
|
| 83 |
+
from .euclid_game import EuclidGame_Environment
|
| 84 |
+
from .even_degree_graph_partitioning import EvenDegreeGraphPartitioning_Environment
|
| 85 |
+
from .expression_adding_parenthese_counting import Expression_AddingParenthese_Counting_Environment
|
| 86 |
+
from .face_right_way import FaceRightWay_Environment
|
| 87 |
+
from .factorial_trailing_zero_count import FactorialTrailingZeroCount_Environment
|
| 88 |
+
from .fbi_binary_tree import FBI_BinaryTree_Environment
|
| 89 |
+
from .fibonacci import Fibonacci_Environment
|
| 90 |
+
from .fibonacci_containing_counting import FibonacciContainingCounting_Environment
|
| 91 |
+
from .fibtrain import Fibtrain_Environment
|
| 92 |
+
from .firework_show import FireworkShow_Environment
|
| 93 |
+
from .fixed_mod_k_selection_counting import FixedModK_Selection_Counting_Environment
|
| 94 |
+
from .fixed_one_edge_num_spanning_tree import FixedOneEdgeNum_SpanningTree_Environment
|
| 95 |
+
from .fractional_programming import FractionalProgramming_Environment
|
| 96 |
+
from .fractional_programming_bipartite_graph_matching import FractionalProgramming_BipartiteGraphMatching_Environment
|
| 97 |
+
from .futoshiki_puzzle import FutoshikiPuzzle_Environment
|
| 98 |
+
from .gas_fire_extinguishers import GasFireExtinguishers_Environment
|
| 99 |
+
from .gaussian_elimination import GaussianElimination_Environment
|
| 100 |
+
from .gcd_fibonacci_product import GCDFibonacciProduct_Environment
|
| 101 |
+
from .gcd_lcm_counting import GcdLcmCounting_Environment
|
| 102 |
+
from .gcd_one_counting import GCDOne_Counting_Environment
|
| 103 |
+
from .gcd_prime_counting import GCDPrime_Counting_Environment
|
| 104 |
+
from .gold_washing import GoldWashing_Environment
|
| 105 |
+
from .gra_minima_game import GraMinimaGame_Environment
|
| 106 |
+
from .grade_ranking_counting import GradeRankingCounting_Environment
|
| 107 |
+
from .graph_contain_tree_counting import GraphContainTreeCounting_Environment
|
| 108 |
+
from .graph_isomorphism import GraphIsomorphism_Environment
|
| 109 |
+
from .grid_bfs import GridBFS_Environment
|
| 110 |
+
from .grid_coloring_counting import GridColoringCounting_Environment
|
| 111 |
+
from .grid_component import GridComponent_Environment
|
| 112 |
+
from .grid_local_minimum_counting import GridLocalMinimumCounting_Environment
|
| 113 |
+
from .grid_parity_construction import GridParityConstruction_Environment
|
| 114 |
+
from .grid_triangle_counting import GridTriangleCounting_Environment
|
| 115 |
+
from .halving_chain_counting import HalvingChainCounting_Environment
|
| 116 |
+
from .hamiltonian_path import HamiltonianPath_Environment
|
| 117 |
+
from .hamiltonian_path_existence import HamiltonianPathExistence_Environment
|
| 118 |
+
from .heap_counting import HeapCounting_Environment
|
| 119 |
+
from .hitori_puzzle import HitoriPuzzle_Environment
|
| 120 |
+
from .hungry_rabbit import HungryRabbit_Environment
|
| 121 |
+
from .hur_warehouse_store import HURWarehouseStore_Environment
|
| 122 |
+
from .imp_party import ImpParty_Environment
|
| 123 |
+
from .individual_sum_bounded_sequence_counting import IndividualSumBounded_SequenceCounting_Environment
|
| 124 |
+
from .integer_factorization_counting import IntegerFactorizationCounting_Environment
|
| 125 |
+
from .integer_programming import IntegerProgramming_Environment
|
| 126 |
+
from .integral import Integral_Environment
|
| 127 |
+
from .inversion_pair import InversionPair_Environment
|
| 128 |
+
from .inversion_pair_k_counting import InversionPairK_Counting_Environment
|
| 129 |
+
from .josephus import Josephus_Environment
|
| 130 |
+
from .jug_puzzle import JugPuzzle_Environment
|
| 131 |
+
from .k_partition import KPartition_Environment
|
| 132 |
+
from .kakurasu import Kakurasu_Environment
|
| 133 |
+
from .kidding_me import KiddingMe_Environment
|
| 134 |
+
from .king_sorting import KingSorting_Environment
|
| 135 |
+
from .klo_blocks import KloBlocks_Environment
|
| 136 |
+
from .knapsack import Knapsack_Environment
|
| 137 |
+
from .knights_and_knaves import KnightsAndKnaves_Environment
|
| 138 |
+
from .kos_dicing import KosDicing_Environment
|
| 139 |
+
from .kth_binary_tree import Kth_BinaryTree_Environment
|
| 140 |
+
from .kth_semi_balanced_bracket_sequence import Kth_SemiBalancedBracketSequence_Environment
|
| 141 |
+
from .kth_subsequence import KthSubsequence_Environment
|
| 142 |
+
from .kur import KUR_Environment
|
| 143 |
+
from .lamp_changing import LampChanging_Environment
|
| 144 |
+
from .land_acquisition import LandAcquisition_Environment
|
| 145 |
+
from .landform_generation_counting import LandformGenerationCounting_Environment
|
| 146 |
+
from .largest_convex_polygon import LargestConvexPolygon_Environment
|
| 147 |
+
from .largest_rectangle_among_points import LargestRectangle_AmongPoints_Environment
|
| 148 |
+
from .las import LAS_Environment
|
| 149 |
+
from .las_laser import LASLaser_Environment
|
| 150 |
+
from .lcm import LCM_Environment
|
| 151 |
+
from .lds_two_counting import LDSTwo_Counting_Environment
|
| 152 |
+
from .light_up_puzzle import LightUpPuzzle_Environment
|
| 153 |
+
from .link_beads import LinkBeads_Environment
|
| 154 |
+
from .lis_lds_concatenation import LIS_LDS_Concatenation_Environment
|
| 155 |
+
from .liz_lollipop import LIZ_Lollipop_Environment
|
| 156 |
+
from .longest_double_palindrome import Longest_DoublePalindrome_Environment
|
| 157 |
+
from .longest_matching_subsequence import Longest_MatchingSubsequence_Environment
|
| 158 |
+
from .longest_maxdiff_bounded_interval import LongestMaxDiffBoundedInterval_Environment
|
| 159 |
+
from .longest_path import LongestPath_Environment
|
| 160 |
+
from .longest_repeated_palindrome import Longest_RepeatedPalindrome_Environment
|
| 161 |
+
from .maf_mafia import MafMafia_Environment
|
| 162 |
+
from .magic_square_puzzle import MagicSquarePuzzle_Environment
|
| 163 |
+
from .making_grade import MakingGrade_Environment
|
| 164 |
+
from .matrix_binary_exponentiation import Matrix_BinaryExponentiation_Environment
|
| 165 |
+
from .matrix_permutation_both_diagonal_one import MatrixPermutation_BothDiagonalOne_Environment
|
| 166 |
+
from .matrix_permutation_equivalence import MatrixPermutationEquivalence_Environment
|
| 167 |
+
from .matrix_permutation_main_diagonal_one import MatrixPermutation_MainDiagonalOne_Environment
|
| 168 |
+
from .matrix_pooling import MatrixPooling_Environment
|
| 169 |
+
from .matrix_rmq_counting import MatrixRMQCounting_Environment
|
| 170 |
+
from .max_different_group_pair_division import MaxDifferentGroupPairDivision_Environment
|
| 171 |
+
from .max_grid_path_intersection import MaxGridPathIntersection_Environment
|
| 172 |
+
from .max_minimum_after_interval_addition import MaxMinimum_AfterIntervalAddition_Environment
|
| 173 |
+
from .max_mult_split import MaxMultSplit_Environment
|
| 174 |
+
from .max_multiplication_fixed_sum import MaxMultiplicationFixedSum_Environment
|
| 175 |
+
from .max_no_conflicting_bombs import MaxNoConflictingBombs_Environment
|
| 176 |
+
from .max_nonadjacent_k_element_sum import Max_NonAdjacent_KElementSum_Environment
|
| 177 |
+
from .max_permutation import MaxPermutation_Environment
|
| 178 |
+
from .max_rmq_expectation import MaxRMQExpectation_Environment
|
| 179 |
+
from .max_segment_coverage_constraint import MaxSegmentCoverageConstraint_Environment
|
| 180 |
+
from .max_sum_lds import MaxSumLDS_Environment
|
| 181 |
+
from .max_three_square_sum import MaxThreeSquareSum_Environment
|
| 182 |
+
from .max_tree_constrained_permutation_weight import Max_TreeConstrainedPermutation_Weight_Environment
|
| 183 |
+
from .max_tree_k_path_coverage import MaxTree_KPathCoverahe_Environment
|
| 184 |
+
from .max_tree_xor_path import MaxTreeXorPath_Environment
|
| 185 |
+
from .max_weight_palindromic_substring import MaxWeightPalindromicSubstring_Environment
|
| 186 |
+
from .max_xor_path import MaxXorPath_Environment
|
| 187 |
+
from .max_xor_set import MaxXorSet_Environment
|
| 188 |
+
from .maximum_achromatic_number import MaximumAchromaticNumber_Environment
|
| 189 |
+
from .maximum_clique import MaximumClique_Environment
|
| 190 |
+
from .maximum_divisor import MaximumDivisor_Environment
|
| 191 |
+
from .maximum_independent_set_grid import MaximumIndependentSetGrid_Environment
|
| 192 |
+
from .maximum_independent_set_tree import Maximum_IndependentSet_Tree_Environment
|
| 193 |
+
from .maximum_lexicographical_order_subsequence import MaximumLexicographicalOrderSubsequence_Environment
|
| 194 |
+
from .maximum_point_segment_matching import MaximumPointSegmentMatching_Environment
|
| 195 |
+
from .maximum_subsequence_num import Maximum_SubsequenceNum_Environment
|
| 196 |
+
from .maximum_weight_matching import MaximumWeightMatching_Environment
|
| 197 |
+
from .maze import Maze_Environment
|
| 198 |
+
from .min_conversion_to_cycle_cost import MinConversionToCycleCost_Environment
|
| 199 |
+
from .min_cost_reducing_lnds import MinCostReducingLNDS_Environment
|
| 200 |
+
from .min_cost_tree_coverage import MinCostTreeCoverage_Environment
|
| 201 |
+
from .min_cube_assignment import MinCubeAssignment_Environment
|
| 202 |
+
from .min_division_sum_xor import MinDivisionSumXor_Environment
|
| 203 |
+
from .min_inorder_binary_tree import MinInorderBinaryTree_Environment
|
| 204 |
+
from .min_kdivisor_number import MinKDivisorNumber_Environment
|
| 205 |
+
from .min_no_solution_linear_diophantine_equation import MinNoSolutionLinearDiophantineEquation_Environment
|
| 206 |
+
from .min_nonsubstring import MinNonsubstring_Environment
|
| 207 |
+
from .min_pairsum_multiplication_permutation import MinPairSumMultiplicationPermutation_Environment
|
| 208 |
+
from .min_path_cover_dag import MinPathCover_DAG_Environment
|
| 209 |
+
from .min_sum_chebyshev_distance import MinSumChebyshevDistance_Environment
|
| 210 |
+
from .min_sum_distance_square import MinSumDistanceSquare_Environment
|
| 211 |
+
from .min_sum_pre_xor import MinSumPreXor_Environment
|
| 212 |
+
from .min_swap_two_permutations import MinSwapTwoPermutations_Environment
|
| 213 |
+
from .min_xor_pair import MinXorPair_Environment
|
| 214 |
+
from .minesweeping import Minesweeping_Environment
|
| 215 |
+
from .minimal_cyclic_shift import MinimalCyclicShift_Environment
|
| 216 |
+
from .minimum_chromatic_number import MinimumChromaticNumber_Environment
|
| 217 |
+
from .minimum_chromatic_number_segment_overlap import MinimumChromaticNumber_SegmentOverlap_Environment
|
| 218 |
+
from .minimum_cost_maximum_flow import MinimumCost_MaximumFlow_Environment
|
| 219 |
+
from .minimum_crossing_edges_graph_partition import Minimum_CrossingEdges_GraphPartition_Environment
|
| 220 |
+
from .minimum_directed_spanning_tree import MinimumDirectedSpanningTree_Environment
|
| 221 |
+
from .minimum_dominating_interval import Minimum_DominatingInterval_Environment
|
| 222 |
+
from .minimum_dominating_set import Minimum_DominatingSet_Environment
|
| 223 |
+
from .minimum_dominating_set_grid import Minimum_DominatingSet_Grid_Environment
|
| 224 |
+
from .minimum_fibonacci_representation import MinimumFibonacciRepresentation_Environment
|
| 225 |
+
from .minimum_harmonious_chromatic_number import MinimumHarmoniousChromaticNumber_Environment
|
| 226 |
+
from .minimum_interval_coverage import MinimumIntervalCoverage_Environment
|
| 227 |
+
from .minimum_max_abs_slicer import Minimum_MaxAbsSlicer_Environment
|
| 228 |
+
from .minimum_max_slicer import Minimum_MaxSlicer_Environment
|
| 229 |
+
from .minimum_ratio_path import MinimumRatioPath_Environment
|
| 230 |
+
from .minimum_spanning_tree import MinimumSpanningTree_Environment
|
| 231 |
+
from .minimum_spanning_tree_counting import MinimumSpanningTreeCounting_Environment
|
| 232 |
+
from .minimum_steiner_tree import MinimumSteinerTree_Environment
|
| 233 |
+
from .minimum_sum_difference_submatrix import MinimumSumDifferenceSubmatrix_Environment
|
| 234 |
+
from .minimum_tree_weighted_dominating_ancestor import MinimumTreeWeightedDominatingAncestor_Environment
|
| 235 |
+
from .minimum_unconflicted_grid_kmax import MinimumUnconflictedGridKMax_Environment
|
| 236 |
+
from .minimum_vertex_cover import Minimum_VertexCover_Environment
|
| 237 |
+
from .minimum_weighted_spanning_tree import MinimumWeightedSpanningTree_Environment
|
| 238 |
+
from .mitter_transportation import MitterTransportation_Environment
|
| 239 |
+
from .mixed_graph_eulerian_circuit import MixedGraphEulerianCircuit_Environment
|
| 240 |
+
from .money_charging_game import MoneyChargingGame_Environment
|
| 241 |
+
from .monochrome_block_counting import MonochromeBlockCounting_Environment
|
| 242 |
+
from .monotonic_stack import MonotonicStack_Environment
|
| 243 |
+
from .most_component_tree_removing_two_paths import MostComponentTreeRemovingTwoPaths_Environment
|
| 244 |
+
from .most_num_edge_non_self_isomorphism import MostNumEdge_NonSelfIsomorphism_Environment
|
| 245 |
+
from .multidrink import MultiDrink_Environment
|
| 246 |
+
from .multiple_flipping_game import MultipleFlippingGame_Environment
|
| 247 |
+
from .multiplication import Multiplication_Environment
|
| 248 |
+
from .myj import MYJ_Environment
|
| 249 |
+
from .nand_result_counting import NANDResultCounting_Environment
|
| 250 |
+
from .negative_base import NegativeBase_Environment
|
| 251 |
+
from .new_nim_game import NewNimGame_Environment
|
| 252 |
+
from .next_palindromic import NextPalindromic_Environment
|
| 253 |
+
from .nine_puzzle import NinePuzzle_Environment
|
| 254 |
+
from .no_adjacent_girl_counting import NoAdjacentGirlCounting_Environment
|
| 255 |
+
from .no_double_triple_counting import NoDoubleTripleCounting_Environment
|
| 256 |
+
from .not_containing_string_counting import NotContainingStringCounting_Environment
|
| 257 |
+
from .number_partition_counting import NumberPartitionCounting_Environment
|
| 258 |
+
from .numbrix import Numbrix_Environment
|
| 259 |
+
from .odd_visitation import OddVisitation_Environment
|
| 260 |
+
from .odl_distance import ODLDistance_Environment
|
| 261 |
+
from .pair_more_one_counting import PairMoreOneCounting_Environment
|
| 262 |
+
from .palembang_bridges import PalembangBridges_Environment
|
| 263 |
+
from .palindrome_partition_counting import PalindromePartitionCounting_Environment
|
| 264 |
+
from .palindromic_substring_number_counting import PalindromicSubstringNumberCounting_Environment
|
| 265 |
+
from .pan_solar_panels import PanSolarPanels_Environment
|
| 266 |
+
from .path_no_going_back_counting import Path_NoGoingBack_Counting_Environment
|
| 267 |
+
from .patrol import Patrol_Environment
|
| 268 |
+
from .pcp_permutation import PCPPermutation_Environment
|
| 269 |
+
from .pipeline_arrangement import PipelineArrangement_Environment
|
| 270 |
+
from .pol_polarization import POLPolarization_Environment
|
| 271 |
+
from .polya_model import PolyaModel_Environment
|
| 272 |
+
from .polynomial_factorization import PolynomialFactorization_Environment
|
| 273 |
+
from .polynomial_interpolation import PolynomialInterpolation_Environment
|
| 274 |
+
from .polynomial_minimum import PolynomialMinimum_Environment
|
| 275 |
+
from .polynomial_remainder import PolynomialRemainder_Environment
|
| 276 |
+
from .power_cycle import PowerCycle_Environment
|
| 277 |
+
from .power_shortcut import PowerShortcut_Environment
|
| 278 |
+
from .powernest import PowerNest_Environment
|
| 279 |
+
from .prefix_concatenation import PrefixConcatenation_Environment
|
| 280 |
+
from .prefix_product_mod_distinct_permutation import PrefixProductMODDistinctPermutation_Environment
|
| 281 |
+
from .prefix_sum_mod_distinct_permutation import PrefixSumMODDistinctPermutation_Environment
|
| 282 |
+
from .prefixuffix import Prefixuffix_Environment
|
| 283 |
+
from .preorder_traversal import PreorderTraversal_Environment
|
| 284 |
+
from .prime_graph_minimum_chromatic_number import PrimeGraph_MinimumChromaticNumber_Environment
|
| 285 |
+
from .protecting_flowers import ProtectingFlowers_Environment
|
| 286 |
+
from .pythagorean_graph_independent_set_counting import PythagoreanGraph_IndependentSetCounting_Environment
|
| 287 |
+
from .quad_magic_items import QuadMagicItems_Environment
|
| 288 |
+
from .quadratic_function_segmentation import QuadraticFunctionSegmentation_Environment
|
| 289 |
+
from .quantum_lock_puzzle import QuantumLockPuzzle_Environment
|
| 290 |
+
from .queen_placement import QueenPlacement_Environment
|
| 291 |
+
from .random_range_max_expectation import RandomRangeMaxExpectation_Environment
|
| 292 |
+
from .range_constrained_increasing_sequence_counting import RangeConstrained_IncreasingSequence_Counting_Environment
|
| 293 |
+
from .range_four_sequence_construction import RangeFourSequenceConstruction_Environment
|
| 294 |
+
from .range_shrinking_sequence_counting import RangeShrinkingSequenceCounting_Environment
|
| 295 |
+
from .recursive_function import RecursiveFunction_Environment
|
| 296 |
+
from .recursive_sequence_sum_construction import RecursiveSequenceSumConstruction_Environment
|
| 297 |
+
from .repeat_sequence_lnds import RepeatSequenceLNDS_Environment
|
| 298 |
+
from .root_extraction import RootExtraction_Environment
|
| 299 |
+
from .round_robin import RoundRobin_Environment
|
| 300 |
+
from .roundtable_assignment import RoundTableAssignment_Environment
|
| 301 |
+
from .royal_lock_counting import RoyalLockCounting_Environment
|
| 302 |
+
from .salad_bar import SaladBar_Environment
|
| 303 |
+
from .salesman_fatigue import SalesmanFatigue_Environment
|
| 304 |
+
from .same_adjacency_counting import SameAdjacencyCounting_Environment
|
| 305 |
+
from .sat import SAT_Environment
|
| 306 |
+
from .scc_sequence_counting import SCC_Sequence_Counting_Environment
|
| 307 |
+
from .secret_cow_code import SecretCowCode_Environment
|
| 308 |
+
from .segment_min_length_equal_counting import SegmentMinLengthEqual_Counting_Environment
|
| 309 |
+
from .segment_tree_sorting_counting import SegmentTreeSortingCounting_Environment
|
| 310 |
+
from .self_power_sequence_mod import SelfPowerSequenceMOD_Environment
|
| 311 |
+
from .set_cover import SetCover_Environment
|
| 312 |
+
from .set_splitting import SetSplitting_Environment
|
| 313 |
+
from .shared_substring_counting import SharedSubstringCounting_Environment
|
| 314 |
+
from .shortest_path import ShortestPath_Environment
|
| 315 |
+
from .shortest_path_count_construction import ShortestPathCountConstruction_Environment
|
| 316 |
+
from .shortest_unicolor_substring import ShortestUnicolorSubstring_Environment
|
| 317 |
+
from .singing_girl_story import SingingGirlStory_Environment
|
| 318 |
+
from .single_stack_sorting import SingleStackSorting_Environment
|
| 319 |
+
from .ska_rock_garden import SkaRockGarden_Environment
|
| 320 |
+
from .skyscraper_puzzle import SkyscraperPuzzle_Environment
|
| 321 |
+
from .skyscraper_sum_puzzle import SkyscraperSumPuzzle_Environment
|
| 322 |
+
from .sliding_window import SlidingWindow_Environment
|
| 323 |
+
from .slo_elephants import SLOElephants_Environment
|
| 324 |
+
from .smallest_binary_multiple import SmallestBinaryMultiple_Environment
|
| 325 |
+
from .smallest_circle import SmallestCircle_Environment
|
| 326 |
+
from .sorting import Sorting_Environment
|
| 327 |
+
from .spiral_matrix import SpiralMatrix_Environment
|
| 328 |
+
from .splitting_game import SplittingGame_Environment
|
| 329 |
+
from .spy_network import SpyNetwork_Environment
|
| 330 |
+
from .squ_squarks import SquSquarks_Environment
|
| 331 |
+
from .square_undamaged_point_counting import SquareUndamagedPointCounting_Environment
|
| 332 |
+
from .star_battle import StarBattle_Environment
|
| 333 |
+
from .stirling_second import StirlingSecond_Environment
|
| 334 |
+
from .stone_game import StoneGame_Environment
|
| 335 |
+
from .stone_intervals_game import StoneIntervalsGame_Environment
|
| 336 |
+
from .string_partition_shuffle import StringPartitionShuffle_Environment
|
| 337 |
+
from .string_reversal_construction import StringReversalConstruction_Environment
|
| 338 |
+
from .stu_well import STUWell_Environment
|
| 339 |
+
from .stunt_flying import StuntFlying_Environment
|
| 340 |
+
from .subarray_sum_xor import SubarraySumXor_Environment
|
| 341 |
+
from .subarray_xor_sum import SubarrayXorSum_Environment
|
| 342 |
+
from .subgraph_isomorphism import SubgraphIsomorphism_Environment
|
| 343 |
+
from .submatrix_sum_divisible_counting import SubmatrixSumDivisibleCounting_Environment
|
| 344 |
+
from .subsequence_reversal_lnds import SubsequenceReversalLNDS_Environment
|
| 345 |
+
from .subset_sum import SubsetSum_Environment
|
| 346 |
+
from .subset_sum_sequence import SubsetSumSequence_Environment
|
| 347 |
+
from .sudoku import Sudoku_Environment
|
| 348 |
+
from .sum_divisor_num import Sum_DivisorNum_Environment
|
| 349 |
+
from .sum_gcd import SumGCD_Environment
|
| 350 |
+
from .sum_gcd_with_individual import SumGCDWithIndividual_Environment
|
| 351 |
+
from .sum_lcm import SumLCM_Environment
|
| 352 |
+
from .sum_manhattan_curved_surface import SumManhattan_CurvedSurface_Environment
|
| 353 |
+
from .sum_mod import SumMOD_Environment
|
| 354 |
+
from .sum_phi_interval import SumPHIInterval_Environment
|
| 355 |
+
from .sum_product_divisor_num import SumProductDivisorNum_Environment
|
| 356 |
+
from .sum_pseudo_euclidean import SumPseudoEuclidean_Environment
|
| 357 |
+
from .sum_set_multiplication import SumSetMultiplication_Environment
|
| 358 |
+
from .sum_spanning_tree_gcd import SumSpanningTreeGCD_Environment
|
| 359 |
+
from .sum_triangle_area import SumTriangleArea_Environment
|
| 360 |
+
from .sum_xor_divisor_num import SumXorDivisorNum_Environment
|
| 361 |
+
from .survo_puzzle import SurvoPuzzle_Environment
|
| 362 |
+
from .taking_prime_game import TakingPrimeGame_Environment
|
| 363 |
+
from .task_arrangement import TaskArrangement_Environment
|
| 364 |
+
from .tetris_attack import TetrisAttack_Environment
|
| 365 |
+
from .three_string_common_subsequence_counting import ThreeStringCommonSubsequenceCounting_Environment
|
| 366 |
+
from .three_vertex_cycle_counting import ThreeVertexCycleCounting_Environment
|
| 367 |
+
from .topological_sort import TopologicalSort_Environment
|
| 368 |
+
from .topological_sort_minimal_lexicographical_order import TopologicalSort_MinimalLexicographicalOrder_Environment
|
| 369 |
+
from .tournament_longest_path import Tournament_LongestPath_Environment
|
| 370 |
+
from .transmission_delay import TransmissionDelay_Environment
|
| 371 |
+
from .tree_add_one_edge_diameter import TreeAddOneEdgeDiameter_Environment
|
| 372 |
+
from .tree_center import TreeCenter_Environment
|
| 373 |
+
from .tree_change_one_edge_diameter import TreeChangeOneEdgeDiameter_Environment
|
| 374 |
+
from .tree_coloring import TreeColoring_Environment
|
| 375 |
+
from .tree_distance_equal_triad_counting import Tree_DistanceEqualTriad_Counting_Environment
|
| 376 |
+
from .tree_dynamic_xor_zero_path import TreeDynamic_XORZeroPath_Environment
|
| 377 |
+
from .tree_elimination_expectation import TreeElimination_Expectation_Environment
|
| 378 |
+
from .tree_even_partitioning import TreeEvenPartitioning_Environment
|
| 379 |
+
from .tree_maximum_visited_vertex import TreeMaximumVisitedVertex_Environment
|
| 380 |
+
from .tree_random_walk_expectation import TreeRandomWalkExpectation_Environment
|
| 381 |
+
from .tree_topological_sequence_counting import TreeTopologicalSequenceCounting_Environment
|
| 382 |
+
from .triumphal_arch import TriumphalArch_Environment
|
| 383 |
+
from .twiddle_puzzle import TwiddlePuzzle_Environment
|
| 384 |
+
from .two_sat import TwoSAT_Environment
|
| 385 |
+
from .two_set_all_coprime_counting import TwoSet_AllCoprime_Counting_Environment
|
| 386 |
+
from .undamaged_submatrix_counting import UndamagedSubmatrixCounting_Environment
|
| 387 |
+
from .value_diminishing_selection import ValueDiminishingSelection_Environment
|
| 388 |
+
from .vertex_k_center import Vertex_KCenter_Environment
|
| 389 |
+
from .virus_synthesis import VirusSynthesis_Environment
|
| 390 |
+
from .visible_line import VisibleLine_Environment
|
| 391 |
+
from .warehouse_construction import WarehouseConstruction_Environment
|
| 392 |
+
from .weighted_binarytree import WeightedBinaryTree_Environment
|
| 393 |
+
from .weighted_lis import WeightedLIS_Environment
|
| 394 |
+
from .whack_a_mole import WhackAMole_Environment
|
| 395 |
+
from .wil import WIL_Environment
|
| 396 |
+
from .wyc import WYC_Environment
|
| 397 |
+
from .wyr_leveling_ground import WYRLevelingGround_Environment
|
| 398 |
+
from .xor_equation_counting import XorEquationCounting_Environment
|
| 399 |
+
from .zero_prefix_subset_counting import ZeroPrefixSubsetCounting_Environment
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
identifier2environment = {
|
| 403 |
+
"ABProgramSimulation" : ABProgramSimulation_Environment,
|
| 404 |
+
"AddMultiple_Divisible_Counting" : AddMultiple_Divisible_Counting_Environment,
|
| 405 |
+
"AdditionTable" : AdditionTable_Environment,
|
| 406 |
+
"AlmostCompleteGraphCycleCounting" : AlmostCompleteGraphCycleCounting_Environment,
|
| 407 |
+
"AndOr_Sequence_Counting" : AndOr_Sequence_Counting_Environment,
|
| 408 |
+
"AntiPalindromicSubstringCounting" : AntiPalindromicSubstringCounting_Environment,
|
| 409 |
+
"Axis_KCenter" : Axis_KCenter_Environment,
|
| 410 |
+
"BAJBytecomputer" : BAJBytecomputer_Environment,
|
| 411 |
+
"BannedPointSupersetPathCounting" : BannedPointSupersetPathCounting_Environment,
|
| 412 |
+
"BanyanHeart" : BanyanHeart_Environment,
|
| 413 |
+
"BEZMinimalistSecurity" : BEZMinimalistSecurity_Environment,
|
| 414 |
+
"BezoutIdentity" : BezoutIdentity_Environment,
|
| 415 |
+
"Binario" : Binario_Environment,
|
| 416 |
+
"Binario_NoAdjacencyRequirement" : Binario_NoAdjacencyRequirement_Environment,
|
| 417 |
+
"BinaryAlternation" : BinaryAlternation_Environment,
|
| 418 |
+
"BinaryLinearEquation_SolutionCounting" : BinaryLinearEquation_SolutionCounting_Environment,
|
| 419 |
+
"BinaryTreeLeafNumExpectation" : BinaryTreeLeafNumExpectation_Environment,
|
| 420 |
+
"BitEquationCounting" : BitEquationCounting_Environment,
|
| 421 |
+
"BitAndZero_PathCounting" : BitAndZero_PathCounting_Environment,
|
| 422 |
+
"BitwiseOperationSequenceCounting" : BitwiseOperationSequenceCounting_Environment,
|
| 423 |
+
"BlockImage" : BlockImage_Environment,
|
| 424 |
+
"BoundedAdjacencyDifference_Permutation_Counting" : BoundedAdjacencyDifference_Permutation_Counting_Environment,
|
| 425 |
+
"BoundedIntervalIntersection" : BoundedIntervalIntersection_Environment,
|
| 426 |
+
"BoundedMeanSubarrayCounting" : BoundedMeanSubarrayCounting_Environment,
|
| 427 |
+
"BoundedSubarrayCounting" : BoundedSubarrayCounting_Environment,
|
| 428 |
+
"BoxScheduling" : BoxScheduling_Environment,
|
| 429 |
+
"Bridge" : Bridge_Environment,
|
| 430 |
+
"BubbleSwapLowerBound_PermutationCounting" : BubbleSwapLowerBound_PermutationCounting_Environment,
|
| 431 |
+
"BucketSorting" : BucketSorting_Environment,
|
| 432 |
+
"CampfireParty" : CampfireParty_Environment,
|
| 433 |
+
"CampsitePuzzle" : CampsitePuzzle_Environment,
|
| 434 |
+
"Canon" : Canon_Environment,
|
| 435 |
+
"CantorExpansion" : CantorExpansion_Environment,
|
| 436 |
+
"CapitalCityEffect" : CapitalCityEffect_Environment,
|
| 437 |
+
"CardColoringCounting" : CardColoringCounting_Environment,
|
| 438 |
+
"CatalanNumberMod" : CatalanNumberMod_Environment,
|
| 439 |
+
"CheckAllCycleXorZero" : CheckAllCycleXorZero_Environment,
|
| 440 |
+
"ChoHamsters" : ChoHamsters_Environment,
|
| 441 |
+
"Cinema" : Cinema_Environment,
|
| 442 |
+
"Circuit" : Circuit_Environment,
|
| 443 |
+
"CirculatingDecimalCounting" : CirculatingDecimalCounting_Environment,
|
| 444 |
+
"CirculatingGrid" : CirculatingGrid_Environment,
|
| 445 |
+
"CleaningUp" : CleaningUp_Environment,
|
| 446 |
+
"ClearSymmetry" : ClearSymmetry_Environment,
|
| 447 |
+
"Clique_IndependentSet_Partitioning_Counting" : Clique_IndependentSet_Partitioning_Counting_Environment,
|
| 448 |
+
"CoinSquareGame" : CoinSquareGame_Environment,
|
| 449 |
+
"ColoringCounting" : ColoringCounting_Environment,
|
| 450 |
+
"CombinationOddSubsequenceCounting" : CombinationOddSubsequenceCounting_Environment,
|
| 451 |
+
"ConcatenationPartitionCountingSum" : ConcatenationPartitionCountingSum_Environment,
|
| 452 |
+
"CongruentEquation" : CongruentEquation_Environment,
|
| 453 |
+
"ConstructHackInterval" : ConstructHackInterval_Environment,
|
| 454 |
+
"ConvexHull" : ConvexHull_Environment,
|
| 455 |
+
"Cornfield" : Cornfield_Environment,
|
| 456 |
+
"CountdownEqual" : CountdownEqual_Environment, "CountdownClose" : CountdownClose_Environment,
|
| 457 |
+
"CowDanceShow" : CowDanceShow_Environment,
|
| 458 |
+
"CRT" : CRT_Environment,
|
| 459 |
+
"Cryptarithmetic" : Cryptarithmetic_Environment,
|
| 460 |
+
"Cube_FixedLocalMaximumCounting" : Cube_FixedLocalMaximumCounting_Environment,
|
| 461 |
+
"CycleCounting" : CycleCounting_Environment,
|
| 462 |
+
"DecreasingDigitCounting" : DecreasingDigitCounting_Environment,
|
| 463 |
+
"DegreeFixed_SpanningTree" : DegreeFixed_SpanningTree_Environment,
|
| 464 |
+
"DeltaMinPopcount" : DeltaMinPopcount_Environment,
|
| 465 |
+
"DeltaNimGame" : DeltaNimGame_Environment,
|
| 466 |
+
"DerangementExtension" : DerangementExtension_Environment,
|
| 467 |
+
"DifferenceConstraintSystem" : DifferenceConstraintSystem_Environment,
|
| 468 |
+
"DifferenceConstraintSystemDAG" : DifferenceConstraintSystemDAG_Environment,
|
| 469 |
+
"DifferentColorPairing" : DifferentColorPairing_Environment,
|
| 470 |
+
"Differentiate" : Differentiate_Environment,
|
| 471 |
+
"DigitLISCounting" : DigitLISCounting_Environment,
|
| 472 |
+
"DiscreteLogarithm" : DiscreteLogarithm_Environment,
|
| 473 |
+
"Disinfection" : Disinfection_Environment,
|
| 474 |
+
"DistinctArrayPermutation" : DistinctArrayPermutation_Environment,
|
| 475 |
+
"DistinctEdgeColoredCompleteGraphCounting" : DistinctEdgeColoredCompleteGraphCounting_Environment,
|
| 476 |
+
"Division" : Division_Environment,
|
| 477 |
+
"DivisorFlipExpectation" : DivisorFlipExpectation_Environment,
|
| 478 |
+
"DoubleCrossCounting" : DoubleCrossCounting_Environment,
|
| 479 |
+
"DoublePalindromicStringCounting" : DoublePalindromicStringCounting_Environment,
|
| 480 |
+
"DoubleStackSorting" : DoubleStackSorting_Environment,
|
| 481 |
+
"DynDynamite" : DynDynamite_Environment,
|
| 482 |
+
"EightDigitPuzzle" : EightDigitPuzzle_Environment,
|
| 483 |
+
"EmperorWorries" : EmperorWorries_Environment,
|
| 484 |
+
"EnergyStorageMeter" : EnergyStorageMeter_Environment,
|
| 485 |
+
"EuclidGame" : EuclidGame_Environment,
|
| 486 |
+
"EvenDegreeGraphPartitioning" : EvenDegreeGraphPartitioning_Environment,
|
| 487 |
+
"Expression_AddingParenthese_Counting" : Expression_AddingParenthese_Counting_Environment,
|
| 488 |
+
"FaceRightWay" : FaceRightWay_Environment,
|
| 489 |
+
"FactorialTrailingZeroCount" : FactorialTrailingZeroCount_Environment,
|
| 490 |
+
"FBI_BinaryTree" : FBI_BinaryTree_Environment,
|
| 491 |
+
"Fibonacci" : Fibonacci_Environment,
|
| 492 |
+
"FibonacciContainingCounting" : FibonacciContainingCounting_Environment,
|
| 493 |
+
"Fibtrain" : Fibtrain_Environment,
|
| 494 |
+
"FireworkShow" : FireworkShow_Environment,
|
| 495 |
+
"FixedModK_Selection_Counting" : FixedModK_Selection_Counting_Environment,
|
| 496 |
+
"FixedOneEdgeNum_SpanningTree" : FixedOneEdgeNum_SpanningTree_Environment,
|
| 497 |
+
"FractionalProgramming" : FractionalProgramming_Environment,
|
| 498 |
+
"FractionalProgramming_BipartiteGraphMatching" : FractionalProgramming_BipartiteGraphMatching_Environment,
|
| 499 |
+
"FutoshikiPuzzle" : FutoshikiPuzzle_Environment,
|
| 500 |
+
"GasFireExtinguishers" : GasFireExtinguishers_Environment,
|
| 501 |
+
"GaussianElimination" : GaussianElimination_Environment,
|
| 502 |
+
"GCDFibonacciProduct" : GCDFibonacciProduct_Environment,
|
| 503 |
+
"GcdLcmCounting" : GcdLcmCounting_Environment,
|
| 504 |
+
"GCDOne_Counting" : GCDOne_Counting_Environment,
|
| 505 |
+
"GCDPrime_Counting" : GCDPrime_Counting_Environment,
|
| 506 |
+
"GoldWashing" : GoldWashing_Environment,
|
| 507 |
+
"GraMinimaGame" : GraMinimaGame_Environment,
|
| 508 |
+
"GradeRankingCounting" : GradeRankingCounting_Environment,
|
| 509 |
+
"GraphContainTreeCounting" : GraphContainTreeCounting_Environment,
|
| 510 |
+
"GraphIsomorphism" : GraphIsomorphism_Environment,
|
| 511 |
+
"GridBFS" : GridBFS_Environment,
|
| 512 |
+
"GridColoringCounting" : GridColoringCounting_Environment,
|
| 513 |
+
"GridComponent" : GridComponent_Environment,
|
| 514 |
+
"GridLocalMinimumCounting" : GridLocalMinimumCounting_Environment,
|
| 515 |
+
"GridParityConstruction" : GridParityConstruction_Environment,
|
| 516 |
+
"GridTriangleCounting" : GridTriangleCounting_Environment,
|
| 517 |
+
"HalvingChainCounting" : HalvingChainCounting_Environment,
|
| 518 |
+
"HamiltonianPath" : HamiltonianPath_Environment,
|
| 519 |
+
"HamiltonianPathExistence" : HamiltonianPathExistence_Environment,
|
| 520 |
+
"HeapCounting" : HeapCounting_Environment,
|
| 521 |
+
"HitoriPuzzle" : HitoriPuzzle_Environment,
|
| 522 |
+
"HungryRabbit" : HungryRabbit_Environment,
|
| 523 |
+
"HURWarehouseStore" : HURWarehouseStore_Environment,
|
| 524 |
+
"ImpParty" : ImpParty_Environment,
|
| 525 |
+
"IndividualSumBounded_SequenceCounting" : IndividualSumBounded_SequenceCounting_Environment,
|
| 526 |
+
"IntegerFactorizationCounting" : IntegerFactorizationCounting_Environment,
|
| 527 |
+
"IntegerProgramming" : IntegerProgramming_Environment,
|
| 528 |
+
"Integral" : Integral_Environment,
|
| 529 |
+
"InversionPair" : InversionPair_Environment,
|
| 530 |
+
"InversionPairK_Counting" : InversionPairK_Counting_Environment,
|
| 531 |
+
"Josephus" : Josephus_Environment,
|
| 532 |
+
"JugPuzzle" : JugPuzzle_Environment,
|
| 533 |
+
"KPartition" : KPartition_Environment,
|
| 534 |
+
"Kakurasu" : Kakurasu_Environment,
|
| 535 |
+
"KiddingMe" : KiddingMe_Environment,
|
| 536 |
+
"KingSorting" : KingSorting_Environment,
|
| 537 |
+
"KloBlocks" : KloBlocks_Environment,
|
| 538 |
+
"Knapsack" : Knapsack_Environment,
|
| 539 |
+
"KnightsAndKnaves" : KnightsAndKnaves_Environment,
|
| 540 |
+
"KosDicing" : KosDicing_Environment,
|
| 541 |
+
"Kth_BinaryTree" : Kth_BinaryTree_Environment,
|
| 542 |
+
"Kth_SemiBalancedBracketSequence" : Kth_SemiBalancedBracketSequence_Environment,
|
| 543 |
+
"KthSubsequence" : KthSubsequence_Environment,
|
| 544 |
+
"KUR" : KUR_Environment,
|
| 545 |
+
"LampChanging" : LampChanging_Environment,
|
| 546 |
+
"LandAcquisition" : LandAcquisition_Environment,
|
| 547 |
+
"LandformGenerationCounting" : LandformGenerationCounting_Environment,
|
| 548 |
+
"LargestConvexPolygon" : LargestConvexPolygon_Environment,
|
| 549 |
+
"LargestRectangle_AmongPoints" : LargestRectangle_AmongPoints_Environment,
|
| 550 |
+
"LAS" : LAS_Environment,
|
| 551 |
+
"LASLaser" : LASLaser_Environment,
|
| 552 |
+
"LCM" : LCM_Environment,
|
| 553 |
+
"LDSTwo_Counting" : LDSTwo_Counting_Environment,
|
| 554 |
+
"LightUpPuzzle" : LightUpPuzzle_Environment,
|
| 555 |
+
"LinkBeads" : LinkBeads_Environment,
|
| 556 |
+
"LIS_LDS_Concatenation" : LIS_LDS_Concatenation_Environment,
|
| 557 |
+
"LIZ_Lollipop" : LIZ_Lollipop_Environment,
|
| 558 |
+
"Longest_DoublePalindrome" : Longest_DoublePalindrome_Environment,
|
| 559 |
+
"Longest_MatchingSubsequence" : Longest_MatchingSubsequence_Environment,
|
| 560 |
+
"LongestMaxDiffBoundedInterval" : LongestMaxDiffBoundedInterval_Environment,
|
| 561 |
+
"LongestPath" : LongestPath_Environment,
|
| 562 |
+
"Longest_RepeatedPalindrome" : Longest_RepeatedPalindrome_Environment,
|
| 563 |
+
"MafMafia" : MafMafia_Environment,
|
| 564 |
+
"MagicSquarePuzzle" : MagicSquarePuzzle_Environment,
|
| 565 |
+
"MakingGrade" : MakingGrade_Environment,
|
| 566 |
+
"Matrix_BinaryExponentiation" : Matrix_BinaryExponentiation_Environment,
|
| 567 |
+
"MatrixPermutation_BothDiagonalOne" : MatrixPermutation_BothDiagonalOne_Environment,
|
| 568 |
+
"MatrixPermutationEquivalence" : MatrixPermutationEquivalence_Environment,
|
| 569 |
+
"MatrixPermutation_MainDiagonalOne" : MatrixPermutation_MainDiagonalOne_Environment,
|
| 570 |
+
"MatrixPooling" : MatrixPooling_Environment,
|
| 571 |
+
"MatrixRMQCounting" : MatrixRMQCounting_Environment,
|
| 572 |
+
"MaxDifferentGroupPairDivision" : MaxDifferentGroupPairDivision_Environment,
|
| 573 |
+
"MaxGridPathIntersection" : MaxGridPathIntersection_Environment,
|
| 574 |
+
"MaxMinimum_AfterIntervalAddition" : MaxMinimum_AfterIntervalAddition_Environment,
|
| 575 |
+
"MaxMultSplit" : MaxMultSplit_Environment,
|
| 576 |
+
"MaxMultiplicationFixedSum" : MaxMultiplicationFixedSum_Environment,
|
| 577 |
+
"MaxNoConflictingBombs" : MaxNoConflictingBombs_Environment,
|
| 578 |
+
"Max_NonAdjacent_KElementSum" : Max_NonAdjacent_KElementSum_Environment,
|
| 579 |
+
"MaxPermutation" : MaxPermutation_Environment,
|
| 580 |
+
"MaxRMQExpectation" : MaxRMQExpectation_Environment,
|
| 581 |
+
"MaxSegmentCoverageConstraint" : MaxSegmentCoverageConstraint_Environment,
|
| 582 |
+
"MaxSumLDS" : MaxSumLDS_Environment,
|
| 583 |
+
"MaxThreeSquareSum" : MaxThreeSquareSum_Environment,
|
| 584 |
+
"Max_TreeConstrainedPermutation_Weight" : Max_TreeConstrainedPermutation_Weight_Environment,
|
| 585 |
+
"MaxTree_KPathCoverage" : MaxTree_KPathCoverahe_Environment,
|
| 586 |
+
"MaxTreeXorPath" : MaxTreeXorPath_Environment,
|
| 587 |
+
"MaxWeightPalindromicSubstring" : MaxWeightPalindromicSubstring_Environment,
|
| 588 |
+
"MaxXorPath" : MaxXorPath_Environment,
|
| 589 |
+
"MaxXorSet" : MaxXorSet_Environment,
|
| 590 |
+
"MaximumAchromaticNumber" : MaximumAchromaticNumber_Environment,
|
| 591 |
+
"MaximumClique" : MaximumClique_Environment,
|
| 592 |
+
"MaximumDivisor" : MaximumDivisor_Environment,
|
| 593 |
+
"MaximumIndependentSetGrid" : MaximumIndependentSetGrid_Environment,
|
| 594 |
+
"Maximum_IndependentSet_Tree" : Maximum_IndependentSet_Tree_Environment,
|
| 595 |
+
"MaximumLexicographicalOrderSubsequence" : MaximumLexicographicalOrderSubsequence_Environment,
|
| 596 |
+
"MaximumPointSegmentMatching" : MaximumPointSegmentMatching_Environment,
|
| 597 |
+
"Maximum_SubsequenceNum" : Maximum_SubsequenceNum_Environment,
|
| 598 |
+
"MaximumWeightMatching" : MaximumWeightMatching_Environment,
|
| 599 |
+
"Maze" : Maze_Environment,
|
| 600 |
+
"MinConversionToCycleCost" : MinConversionToCycleCost_Environment,
|
| 601 |
+
"MinCostReducingLNDS" : MinCostReducingLNDS_Environment,
|
| 602 |
+
"MinCostTreeCoverage" : MinCostTreeCoverage_Environment,
|
| 603 |
+
"MinCubeAssignment" : MinCubeAssignment_Environment,
|
| 604 |
+
"MinDivisionSumXor" : MinDivisionSumXor_Environment,
|
| 605 |
+
"MinInorderBinaryTree" : MinInorderBinaryTree_Environment,
|
| 606 |
+
"MinKDivisorNumber" : MinKDivisorNumber_Environment,
|
| 607 |
+
"MinNoSolutionLinearDiophantineEquation" : MinNoSolutionLinearDiophantineEquation_Environment,
|
| 608 |
+
"MinNonsubstring" : MinNonsubstring_Environment,
|
| 609 |
+
"MinPairSumMultiplicationPermutation" : MinPairSumMultiplicationPermutation_Environment,
|
| 610 |
+
"MinPathCover_DAG" : MinPathCover_DAG_Environment,
|
| 611 |
+
"MinSumChebyshevDistance" : MinSumChebyshevDistance_Environment,
|
| 612 |
+
"MinSumDistanceSquare" : MinSumDistanceSquare_Environment,
|
| 613 |
+
"MinSumPreXor" : MinSumPreXor_Environment,
|
| 614 |
+
"MinSwapTwoPermutations" : MinSwapTwoPermutations_Environment,
|
| 615 |
+
"MinXorPair" : MinXorPair_Environment,
|
| 616 |
+
"Minesweeping" : Minesweeping_Environment,
|
| 617 |
+
"MinimalCyclicShift" : MinimalCyclicShift_Environment,
|
| 618 |
+
"MinimumChromaticNumber" : MinimumChromaticNumber_Environment,
|
| 619 |
+
"MinimumChromaticNumber_SegmentOverlap" : MinimumChromaticNumber_SegmentOverlap_Environment,
|
| 620 |
+
"MinimumCost_MaximumFlow" : MinimumCost_MaximumFlow_Environment,
|
| 621 |
+
"Minimum_CrossingEdges_GraphPartition" : Minimum_CrossingEdges_GraphPartition_Environment,
|
| 622 |
+
"MinimumDirectedSpanningTree" : MinimumDirectedSpanningTree_Environment,
|
| 623 |
+
"Minimum_DominatingInterval" : Minimum_DominatingInterval_Environment,
|
| 624 |
+
"Minimum_DominatingSet" : Minimum_DominatingSet_Environment,
|
| 625 |
+
"Minimum_DominatingSet_Grid" : Minimum_DominatingSet_Grid_Environment,
|
| 626 |
+
"MinimumFibonacciRepresentation" : MinimumFibonacciRepresentation_Environment,
|
| 627 |
+
"MinimumHarmoniousChromaticNumber" : MinimumHarmoniousChromaticNumber_Environment,
|
| 628 |
+
"MinimumIntervalCoverage" : MinimumIntervalCoverage_Environment,
|
| 629 |
+
"Minimum_MaxAbsSlicer" : Minimum_MaxAbsSlicer_Environment,
|
| 630 |
+
"Minimum_MaxSlicer" : Minimum_MaxSlicer_Environment,
|
| 631 |
+
"MinimumRatioPath" : MinimumRatioPath_Environment,
|
| 632 |
+
"MinimumSpanningTree" : MinimumSpanningTree_Environment,
|
| 633 |
+
"MinimumSpanningTreeCounting" : MinimumSpanningTreeCounting_Environment,
|
| 634 |
+
"MinimumSteinerTree" : MinimumSteinerTree_Environment,
|
| 635 |
+
"MinimumSumDifferenceSubmatrix" : MinimumSumDifferenceSubmatrix_Environment,
|
| 636 |
+
"MinimumTreeWeightedDominatingAncestor" : MinimumTreeWeightedDominatingAncestor_Environment,
|
| 637 |
+
"MinimumUnconflictedGridKMax" : MinimumUnconflictedGridKMax_Environment,
|
| 638 |
+
"Minimum_VertexCover" : Minimum_VertexCover_Environment,
|
| 639 |
+
"MinimumWeightedSpanningTree" : MinimumWeightedSpanningTree_Environment,
|
| 640 |
+
"MitterTransportation" : MitterTransportation_Environment,
|
| 641 |
+
"MixedGraphEulerianCircuit" : MixedGraphEulerianCircuit_Environment,
|
| 642 |
+
"MoneyChargingGame" : MoneyChargingGame_Environment,
|
| 643 |
+
"MonochromeBlockCounting" : MonochromeBlockCounting_Environment,
|
| 644 |
+
"MonotonicStack" : MonotonicStack_Environment,
|
| 645 |
+
"MostComponentTreeRemovingTwoPaths" : MostComponentTreeRemovingTwoPaths_Environment,
|
| 646 |
+
"MostNumEdge_NonSelfIsomorphism" : MostNumEdge_NonSelfIsomorphism_Environment,
|
| 647 |
+
"MultiDrink" : MultiDrink_Environment,
|
| 648 |
+
"MultipleFlippingGame" : MultipleFlippingGame_Environment,
|
| 649 |
+
"Multiplication" : Multiplication_Environment,
|
| 650 |
+
"MYJ" : MYJ_Environment,
|
| 651 |
+
"NANDResultCounting" : NANDResultCounting_Environment,
|
| 652 |
+
"NegativeBase" : NegativeBase_Environment,
|
| 653 |
+
"NewNimGame" : NewNimGame_Environment,
|
| 654 |
+
"NextPalindromic" : NextPalindromic_Environment,
|
| 655 |
+
"NinePuzzle" : NinePuzzle_Environment,
|
| 656 |
+
"NoAdjacentGirlCounting" : NoAdjacentGirlCounting_Environment,
|
| 657 |
+
"NoDoubleTripleCounting" : NoDoubleTripleCounting_Environment,
|
| 658 |
+
"NotContainingStringCounting" : NotContainingStringCounting_Environment,
|
| 659 |
+
"NumberPartitionCounting" : NumberPartitionCounting_Environment,
|
| 660 |
+
"Numbrix" : Numbrix_Environment,
|
| 661 |
+
"OddVisitation" : OddVisitation_Environment,
|
| 662 |
+
"ODLDistance" : ODLDistance_Environment,
|
| 663 |
+
"PairMoreOneCounting" : PairMoreOneCounting_Environment,
|
| 664 |
+
"PalembangBridges" : PalembangBridges_Environment,
|
| 665 |
+
"PalindromePartitionCounting" : PalindromePartitionCounting_Environment,
|
| 666 |
+
"PalindromicSubstringNumberCounting" : PalindromicSubstringNumberCounting_Environment,
|
| 667 |
+
"PanSolarPanels" : PanSolarPanels_Environment,
|
| 668 |
+
"Path_NoGoingBack_Counting" : Path_NoGoingBack_Counting_Environment,
|
| 669 |
+
"Patrol" : Patrol_Environment,
|
| 670 |
+
"PCPPermutation" : PCPPermutation_Environment,
|
| 671 |
+
"PipelineArrangement" : PipelineArrangement_Environment,
|
| 672 |
+
"POLPolarization" : POLPolarization_Environment,
|
| 673 |
+
"PolyaModel" : PolyaModel_Environment,
|
| 674 |
+
"PolynomialFactorization" : PolynomialFactorization_Environment,
|
| 675 |
+
"PolynomialInterpolation" : PolynomialInterpolation_Environment,
|
| 676 |
+
"PolynomialMinimum" : PolynomialMinimum_Environment,
|
| 677 |
+
"PolynomialRemainder" : PolynomialRemainder_Environment,
|
| 678 |
+
"PowerCycle" : PowerCycle_Environment,
|
| 679 |
+
"PowerShortcut" : PowerShortcut_Environment,
|
| 680 |
+
"PowerNest" : PowerNest_Environment,
|
| 681 |
+
"PrefixConcatenation" : PrefixConcatenation_Environment,
|
| 682 |
+
"PrefixProductMODDistinctPermutation" : PrefixProductMODDistinctPermutation_Environment,
|
| 683 |
+
"PrefixSumMODDistinctPermutation" : PrefixSumMODDistinctPermutation_Environment,
|
| 684 |
+
"Prefixuffix" : Prefixuffix_Environment,
|
| 685 |
+
"PreorderTraversal" : PreorderTraversal_Environment,
|
| 686 |
+
"PrimeGraph_MinimumChromaticNumber" : PrimeGraph_MinimumChromaticNumber_Environment,
|
| 687 |
+
"ProtectingFlowers" : ProtectingFlowers_Environment,
|
| 688 |
+
"PythagoreanGraph_IndependentSetCounting" : PythagoreanGraph_IndependentSetCounting_Environment,
|
| 689 |
+
"QuadMagicItems" : QuadMagicItems_Environment,
|
| 690 |
+
"QuadraticFunctionSegmentation" : QuadraticFunctionSegmentation_Environment,
|
| 691 |
+
"QuantumLockPuzzle" : QuantumLockPuzzle_Environment,
|
| 692 |
+
"QueenPlacement" : QueenPlacement_Environment,
|
| 693 |
+
"RandomRangeMaxExpectation" : RandomRangeMaxExpectation_Environment,
|
| 694 |
+
"RangeConstrained_IncreasingSequence_Counting" : RangeConstrained_IncreasingSequence_Counting_Environment,
|
| 695 |
+
"RangeFourSequenceConstruction" : RangeFourSequenceConstruction_Environment,
|
| 696 |
+
"RangeShrinkingSequenceCounting" : RangeShrinkingSequenceCounting_Environment,
|
| 697 |
+
"RecursiveFunction" : RecursiveFunction_Environment,
|
| 698 |
+
"RecursiveSequenceSumConstruction" : RecursiveSequenceSumConstruction_Environment,
|
| 699 |
+
"RepeatSequenceLNDS" : RepeatSequenceLNDS_Environment,
|
| 700 |
+
"RootExtraction" : RootExtraction_Environment,
|
| 701 |
+
"RoundRobin" : RoundRobin_Environment,
|
| 702 |
+
"RoundTableAssignment" : RoundTableAssignment_Environment,
|
| 703 |
+
"RoyalLockCounting" : RoyalLockCounting_Environment,
|
| 704 |
+
"SaladBar" : SaladBar_Environment,
|
| 705 |
+
"SalesmanFatigue" : SalesmanFatigue_Environment,
|
| 706 |
+
"SameAdjacencyCounting" : SameAdjacencyCounting_Environment,
|
| 707 |
+
"SAT" : SAT_Environment,
|
| 708 |
+
"SCC_Sequence_Counting" : SCC_Sequence_Counting_Environment,
|
| 709 |
+
"SecretCowCode" : SecretCowCode_Environment,
|
| 710 |
+
"SegmentMinLengthEqual_Counting" : SegmentMinLengthEqual_Counting_Environment,
|
| 711 |
+
"SegmentTreeSortingCounting" : SegmentTreeSortingCounting_Environment,
|
| 712 |
+
"SelfPowerSequenceMOD" : SelfPowerSequenceMOD_Environment,
|
| 713 |
+
"SetCover" : SetCover_Environment,
|
| 714 |
+
"SetSplitting" : SetSplitting_Environment,
|
| 715 |
+
"SharedSubstringCounting" : SharedSubstringCounting_Environment,
|
| 716 |
+
"ShortestPath" : ShortestPath_Environment,
|
| 717 |
+
"ShortestPathCountConstruction" : ShortestPathCountConstruction_Environment,
|
| 718 |
+
"ShortestUnicolorSubstring" : ShortestUnicolorSubstring_Environment,
|
| 719 |
+
"SingingGirlStory" : SingingGirlStory_Environment,
|
| 720 |
+
"SingleStackSorting" : SingleStackSorting_Environment,
|
| 721 |
+
"SkaRockGarden" : SkaRockGarden_Environment,
|
| 722 |
+
"SkyscraperPuzzle" : SkyscraperPuzzle_Environment,
|
| 723 |
+
"SkyscraperSumPuzzle" : SkyscraperSumPuzzle_Environment,
|
| 724 |
+
"SlidingWindow" : SlidingWindow_Environment,
|
| 725 |
+
"SLOElephants" : SLOElephants_Environment,
|
| 726 |
+
"SmallestBinaryMultiple" : SmallestBinaryMultiple_Environment,
|
| 727 |
+
"SmallestCircle" : SmallestCircle_Environment,
|
| 728 |
+
"Sorting" : Sorting_Environment,
|
| 729 |
+
"SpiralMatrix" : SpiralMatrix_Environment,
|
| 730 |
+
"SplittingGame" : SplittingGame_Environment,
|
| 731 |
+
"SpyNetwork" : SpyNetwork_Environment,
|
| 732 |
+
"SquSquarks" : SquSquarks_Environment,
|
| 733 |
+
"SquareUndamagedPointCounting" : SquareUndamagedPointCounting_Environment,
|
| 734 |
+
"StarBattle" : StarBattle_Environment,
|
| 735 |
+
"StirlingSecond" : StirlingSecond_Environment,
|
| 736 |
+
"StoneGame" : StoneGame_Environment,
|
| 737 |
+
"StoneIntervalsGame" : StoneIntervalsGame_Environment,
|
| 738 |
+
"StringPartitionShuffle" : StringPartitionShuffle_Environment,
|
| 739 |
+
"StringReversalConstruction" : StringReversalConstruction_Environment,
|
| 740 |
+
"STUWell" : STUWell_Environment,
|
| 741 |
+
"StuntFlying" : StuntFlying_Environment,
|
| 742 |
+
"SubarraySumXor" : SubarraySumXor_Environment,
|
| 743 |
+
"SubarrayXorSum" : SubarrayXorSum_Environment,
|
| 744 |
+
"SubgraphIsomorphism" : SubgraphIsomorphism_Environment,
|
| 745 |
+
"SubmatrixSumDivisibleCounting" : SubmatrixSumDivisibleCounting_Environment,
|
| 746 |
+
"SubsequenceReversalLNDS" : SubsequenceReversalLNDS_Environment,
|
| 747 |
+
"SubsetSum" : SubsetSum_Environment,
|
| 748 |
+
"SubsetSumSequence" : SubsetSumSequence_Environment,
|
| 749 |
+
"Sudoku" : Sudoku_Environment,
|
| 750 |
+
"Sum_DivisorNum" : Sum_DivisorNum_Environment,
|
| 751 |
+
"SumGCD" : SumGCD_Environment,
|
| 752 |
+
"SumGCDWithIndividual" : SumGCDWithIndividual_Environment,
|
| 753 |
+
"SumLCM" : SumLCM_Environment,
|
| 754 |
+
"SumManhattan_CurvedSurface" : SumManhattan_CurvedSurface_Environment,
|
| 755 |
+
"SumMOD" : SumMOD_Environment,
|
| 756 |
+
"SumPHIInterval" : SumPHIInterval_Environment,
|
| 757 |
+
"SumProductDivisorNum" : SumProductDivisorNum_Environment,
|
| 758 |
+
"SumPseudoEuclidean" : SumPseudoEuclidean_Environment,
|
| 759 |
+
"SumSetMultiplication" : SumSetMultiplication_Environment,
|
| 760 |
+
"SumSpanningTreeGCD" : SumSpanningTreeGCD_Environment,
|
| 761 |
+
"SumTriangleArea" : SumTriangleArea_Environment,
|
| 762 |
+
"SumXorDivisorNum" : SumXorDivisorNum_Environment,
|
| 763 |
+
"SurvoPuzzle" : SurvoPuzzle_Environment,
|
| 764 |
+
"TakingPrimeGame" : TakingPrimeGame_Environment,
|
| 765 |
+
"TaskArrangement" : TaskArrangement_Environment,
|
| 766 |
+
"TetrisAttack" : TetrisAttack_Environment,
|
| 767 |
+
"ThreeStringCommonSubsequenceCounting" : ThreeStringCommonSubsequenceCounting_Environment,
|
| 768 |
+
"ThreeVertexCycleCounting" : ThreeVertexCycleCounting_Environment,
|
| 769 |
+
"TopologicalSort" : TopologicalSort_Environment,
|
| 770 |
+
"TopologicalSort_MinimalLexicographicalOrder" : TopologicalSort_MinimalLexicographicalOrder_Environment,
|
| 771 |
+
"Tournament_LongestPath" : Tournament_LongestPath_Environment,
|
| 772 |
+
"TransmissionDelay" : TransmissionDelay_Environment,
|
| 773 |
+
"TreeAddOneEdgeDiameter" : TreeAddOneEdgeDiameter_Environment,
|
| 774 |
+
"TreeCenter" : TreeCenter_Environment,
|
| 775 |
+
"TreeChangeOneEdgeDiameter" : TreeChangeOneEdgeDiameter_Environment,
|
| 776 |
+
"TreeColoring" : TreeColoring_Environment,
|
| 777 |
+
"Tree_DistanceEqualTriad_Counting" : Tree_DistanceEqualTriad_Counting_Environment,
|
| 778 |
+
"TreeDynamic_XORZeroPath" : TreeDynamic_XORZeroPath_Environment,
|
| 779 |
+
"TreeElimination_Expectation" : TreeElimination_Expectation_Environment,
|
| 780 |
+
"TreeEvenPartitioning" : TreeEvenPartitioning_Environment,
|
| 781 |
+
"TreeMaximumVisitedVertex" : TreeMaximumVisitedVertex_Environment,
|
| 782 |
+
"TreeRandomWalkExpectation" : TreeRandomWalkExpectation_Environment,
|
| 783 |
+
"TreeTopologicalSequenceCounting" : TreeTopologicalSequenceCounting_Environment,
|
| 784 |
+
"TriumphalArch" : TriumphalArch_Environment,
|
| 785 |
+
"TwiddlePuzzle" : TwiddlePuzzle_Environment,
|
| 786 |
+
"TwoSAT" : TwoSAT_Environment,
|
| 787 |
+
"TwoSet_AllCoprime_Counting" : TwoSet_AllCoprime_Counting_Environment,
|
| 788 |
+
"UndamagedSubmatrixCounting" : UndamagedSubmatrixCounting_Environment,
|
| 789 |
+
"ValueDiminishingSelection" : ValueDiminishingSelection_Environment,
|
| 790 |
+
"Vertex_KCenter" : Vertex_KCenter_Environment,
|
| 791 |
+
"VirusSynthesis" : VirusSynthesis_Environment,
|
| 792 |
+
"VisibleLine" : VisibleLine_Environment,
|
| 793 |
+
"WarehouseConstruction" : WarehouseConstruction_Environment,
|
| 794 |
+
"WeightedBinaryTree" : WeightedBinaryTree_Environment,
|
| 795 |
+
"WeightedLIS" : WeightedLIS_Environment,
|
| 796 |
+
"WhackAMole" : WhackAMole_Environment,
|
| 797 |
+
"WIL" : WIL_Environment,
|
| 798 |
+
"WYC" : WYC_Environment,
|
| 799 |
+
"WYRLevelingGround" : WYRLevelingGround_Environment,
|
| 800 |
+
"XorEquationCounting" : XorEquationCounting_Environment,
|
| 801 |
+
"ZeroPrefixSubsetCounting" : ZeroPrefixSubsetCounting_Environment,
|
| 802 |
+
}
|
server/Gym/environments/ab_program_simulation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import ABProgramSimulation_Environment
|
server/Gym/environments/ab_program_simulation/environment.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class ABProgramSimulation_Environment(VerifiableEnvironment) : # Source : https://x.com/VictorTaelin/status/1776096481704804789
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""A::B is a system with 4 tokens: `A#`, `#A`, `B#` and `#B`.
|
| 9 |
+
|
| 10 |
+
An A::B program is a sequence of tokens, e.g., `B# A# #B #A B#`.
|
| 11 |
+
|
| 12 |
+
To *compute* a program, we must rewrite neighbor tokens, using the rules (whenever two neighbor tokens have their `#` facing each-other, they must be rewritten according to the corresponding rule) :
|
| 13 |
+
+ `A# #A` ... becomes ... `` (nothing)
|
| 14 |
+
+ `A# #B` ... becomes ... `#B A#`
|
| 15 |
+
+ `B# #A` ... becomes ... `#A B#`
|
| 16 |
+
+ `B# #B` ... becomes ... `` (nothing)
|
| 17 |
+
|
| 18 |
+
Please give the final state of the program: {program}
|
| 19 |
+
An example for output format: `B# A# A#`
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self,
|
| 23 |
+
wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0,
|
| 24 |
+
**kwargs) :
|
| 25 |
+
"""
|
| 26 |
+
Initialize the AB_Program_Simulation_Environment instance.
|
| 27 |
+
"""
|
| 28 |
+
super().__init__(**kwargs)
|
| 29 |
+
|
| 30 |
+
self.rewards = {
|
| 31 |
+
"wrong_format" : wrong_format,
|
| 32 |
+
"correct_answer" : correct_answer,
|
| 33 |
+
"wrong_answer" : wrong_answer,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _generate(self) -> None :
|
| 38 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 39 |
+
N = self.parameter["N"]
|
| 40 |
+
assert N >= 1, "N should be greater than or equal to 1"
|
| 41 |
+
|
| 42 |
+
assert "max_steps" in self.parameter, "max_steps is required in parameter"
|
| 43 |
+
max_steps = self.parameter["max_steps"]
|
| 44 |
+
assert max_steps >= 1, "max_steps should be greater than or equal to 1"
|
| 45 |
+
|
| 46 |
+
while True :
|
| 47 |
+
distribution = [random.randint(1, N) for _ in range(4)]
|
| 48 |
+
distribution = [d / sum(distribution) for d in distribution]
|
| 49 |
+
self.parameter["program"] = [["A#", "#A", "B#", "#B"][i] for i in random.choices(range(4), distribution, k = N)]
|
| 50 |
+
|
| 51 |
+
current, final = self.parameter["program"].copy(), None
|
| 52 |
+
for step in range(max_steps) :
|
| 53 |
+
new_program = None
|
| 54 |
+
|
| 55 |
+
for i in range(len(current) - 1) :
|
| 56 |
+
a, b = current[i], current[i + 1]
|
| 57 |
+
if a == "A#" and b == "#A" :
|
| 58 |
+
new_program = current[: i] + current[i + 2 :]
|
| 59 |
+
elif a == "A#" and b == "#B" :
|
| 60 |
+
new_program = current[: i] + ["#B", "A#"] + current[i + 2 :]
|
| 61 |
+
elif a == "B#" and b == "#A" :
|
| 62 |
+
new_program = current[: i] + ["#A", "B#"] + current[i + 2 :]
|
| 63 |
+
elif a == "B#" and b == "#B" :
|
| 64 |
+
new_program = current[: i] + current[i + 2 :]
|
| 65 |
+
if new_program is not None:
|
| 66 |
+
break
|
| 67 |
+
|
| 68 |
+
if new_program is None :
|
| 69 |
+
final = current
|
| 70 |
+
break
|
| 71 |
+
else :
|
| 72 |
+
current = new_program
|
| 73 |
+
|
| 74 |
+
if final is not None :
|
| 75 |
+
self.parameter["reference_answer"] = " ".join(final)
|
| 76 |
+
self.parameter["gold_answer"] = final
|
| 77 |
+
break
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _prompt_generate(self) -> str :
|
| 81 |
+
return self.prompt_template.format(program = " ".join(self.parameter["program"]))
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 85 |
+
if answer is not None :
|
| 86 |
+
answer = answer.strip()
|
| 87 |
+
try :
|
| 88 |
+
answer_array = answer.split()
|
| 89 |
+
return answer_array
|
| 90 |
+
except ValueError :
|
| 91 |
+
return None # Invalid answer format
|
| 92 |
+
else :
|
| 93 |
+
return None # Invalid answer format
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def scorer(self, output : str) -> float :
|
| 97 |
+
processed_result = self.processor(output)
|
| 98 |
+
if processed_result is not None :
|
| 99 |
+
assert isinstance(processed_result, list), "processed_result should be a list"
|
| 100 |
+
|
| 101 |
+
if not all(token in ("A#", "#A", "B#", "#B") for token in processed_result) :
|
| 102 |
+
return self.rewards["wrong_format"]
|
| 103 |
+
|
| 104 |
+
if processed_result == self.parameter["gold_answer"] :
|
| 105 |
+
return self.rewards["correct_answer"]
|
| 106 |
+
else :
|
| 107 |
+
return self.rewards["wrong_answer"]
|
| 108 |
+
else :
|
| 109 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/add_multiple_divisible_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import AddMultiple_Divisible_Counting_Environment
|
server/Gym/environments/add_multiple_divisible_counting/environment.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import random
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from ...environment import VerifiableEnvironment
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class AddMultiple_Divisible_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4466
|
| 8 |
+
prompt_template = \
|
| 9 |
+
r"""Please compute the number of pairs (a, b) such that:
|
| 10 |
+
- 1 ≤ a < b ≤ {N}
|
| 11 |
+
- a × b is divisible by a + b
|
| 12 |
+
|
| 13 |
+
**Output Format:** Your final answer should be a single integer — the number of such pairs (a, b)."""
|
| 14 |
+
|
| 15 |
+
def __init__(self,
|
| 16 |
+
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
|
| 17 |
+
**kwargs) :
|
| 18 |
+
"""
|
| 19 |
+
Initialize the AddMultiple_Divisible_Counting_Environment instance.
|
| 20 |
+
"""
|
| 21 |
+
super().__init__(**kwargs)
|
| 22 |
+
|
| 23 |
+
self.rewards = {
|
| 24 |
+
"wrong_format" : wrong_format,
|
| 25 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 26 |
+
"rewarding_weight" : rewarding_weight,
|
| 27 |
+
"rewarding_beta" : rewarding_beta,
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _generate(self) -> None :
|
| 32 |
+
assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
|
| 33 |
+
MAX_N = self.parameter["MAX_N"]
|
| 34 |
+
assert MAX_N >= 6, "MAX_N should be greater than or equal to 6"
|
| 35 |
+
|
| 36 |
+
N = self.parameter["N"] = random.randint(6, MAX_N)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def calc(x : int, y : int) -> int :
|
| 40 |
+
"""
|
| 41 |
+
Compute
|
| 42 |
+
sum_{k = x+1..2*x-1} floor(y / k)
|
| 43 |
+
by grouping k’s with the same quotient.
|
| 44 |
+
"""
|
| 45 |
+
if y == 0 :
|
| 46 |
+
return 0
|
| 47 |
+
a = 0
|
| 48 |
+
z = x << 1
|
| 49 |
+
i = x + 1
|
| 50 |
+
while i < z :
|
| 51 |
+
q = y // i
|
| 52 |
+
if q == 0 :
|
| 53 |
+
break
|
| 54 |
+
j = min(y // q, z - 1)
|
| 55 |
+
a += (j - i + 1) * q
|
| 56 |
+
i = j + 1
|
| 57 |
+
return a
|
| 58 |
+
|
| 59 |
+
m = math.isqrt(N)
|
| 60 |
+
|
| 61 |
+
mu = [0] * (m + 1)
|
| 62 |
+
mu[1] = 1
|
| 63 |
+
is_comp = [False] * (m + 1)
|
| 64 |
+
primes = []
|
| 65 |
+
|
| 66 |
+
for i in range(2, m + 1) :
|
| 67 |
+
if not is_comp[i] :
|
| 68 |
+
primes.append(i)
|
| 69 |
+
mu[i] = -1
|
| 70 |
+
for p in primes :
|
| 71 |
+
ip = i * p
|
| 72 |
+
if ip > m :
|
| 73 |
+
break
|
| 74 |
+
is_comp[ip] = True
|
| 75 |
+
if i % p == 0 :
|
| 76 |
+
mu[ip] = 0
|
| 77 |
+
break
|
| 78 |
+
else :
|
| 79 |
+
mu[ip] = -mu[i]
|
| 80 |
+
|
| 81 |
+
ans = 0
|
| 82 |
+
for i in range(1, m + 1) :
|
| 83 |
+
if mu[i] == 0 :
|
| 84 |
+
continue
|
| 85 |
+
ii = i * i
|
| 86 |
+
top = m // i
|
| 87 |
+
for j in range(1, top + 1) :
|
| 88 |
+
y = N // (ii * j)
|
| 89 |
+
ans += mu[i] * calc(j, y)
|
| 90 |
+
assert ans > 0, "Answer should be greater than 0"
|
| 91 |
+
self.parameter["reference_answer"] = ans
|
| 92 |
+
|
| 93 |
+
def _prompt_generate(self) -> str :
|
| 94 |
+
return self.prompt_template.format(N = self.parameter["N"])
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 98 |
+
if answer is not None :
|
| 99 |
+
answer = answer.strip()
|
| 100 |
+
try :
|
| 101 |
+
int_answer = int(answer)
|
| 102 |
+
return int_answer
|
| 103 |
+
except ValueError :
|
| 104 |
+
return None
|
| 105 |
+
else :
|
| 106 |
+
return None
|
| 107 |
+
|
| 108 |
+
def scorer(self, output : str) -> float :
|
| 109 |
+
processed_result = self.processor(output)
|
| 110 |
+
if processed_result is not None :
|
| 111 |
+
if processed_result <= 0 :
|
| 112 |
+
return self.rewards["wrong_format"]
|
| 113 |
+
|
| 114 |
+
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
|
| 115 |
+
a, b = self.parameter["reference_answer"], processed_result
|
| 116 |
+
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
|
| 117 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 118 |
+
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
|
| 119 |
+
else :
|
| 120 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 121 |
+
else :
|
| 122 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/addition_table/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import AdditionTable_Environment
|
server/Gym/environments/addition_table/environment.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, Dict
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class AdditionTable_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1013
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given an unknown base-N number system (N is an integer ≥ 3), and {N} distinct digits {ALL_LETTERS} in that system. The digits satisfy the following equations in base-N:
|
| 9 |
+
|
| 10 |
+
{EQUATIONS}
|
| 11 |
+
|
| 12 |
+
Note:
|
| 13 |
+
- {ALL_LETTERS} are distinct digits in the range [0, N−1].
|
| 14 |
+
- Expressions like ba represent base-N numbers formed by **concatenation**. For example, if a=1 and b=2, then ba = "21" in base-N.
|
| 15 |
+
|
| 16 |
+
Your task is to find the correct base N (in decimal), and the values of {ALL_LETTERS} (also in decimal) that satisfy all the equations.
|
| 17 |
+
|
| 18 |
+
Output Format:
|
| 19 |
+
Your final answer should be a single line containing N, {ALL_LETTERS} (all in decimal), separated by **spaces**.
|
| 20 |
+
Example: `{N_plus_1} {EXAMPLE_1}` (do **NOT** include the backticks or quotes); this means N={N_plus_1}, {EXAMPLE_2}.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self,
|
| 24 |
+
wrong_format : float = -1.0, invalid_answer : float = -0.5, wrong_N : float = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0,
|
| 25 |
+
**kwargs) :
|
| 26 |
+
"""
|
| 27 |
+
Initialize the AdditionTable_Environment instance.
|
| 28 |
+
"""
|
| 29 |
+
super().__init__(**kwargs)
|
| 30 |
+
|
| 31 |
+
self.rewards = {
|
| 32 |
+
"wrong_format" : wrong_format,
|
| 33 |
+
"invalid_answer" : invalid_answer,
|
| 34 |
+
"wrong_N" : wrong_N,
|
| 35 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 36 |
+
"rewarding_weight" : rewarding_weight,
|
| 37 |
+
"rewarding_beta" : rewarding_beta,
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _generate(self) -> None :
|
| 42 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 43 |
+
N = self.parameter["N"]
|
| 44 |
+
assert N in range(3, 26 + 1), "N should be in the range [3, 26]"
|
| 45 |
+
|
| 46 |
+
digit2letter = self.parameter["digit2letter"] = [chr(i) for i in range(97, 97 + N)]
|
| 47 |
+
random.shuffle(digit2letter)
|
| 48 |
+
|
| 49 |
+
letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)}
|
| 50 |
+
self.parameter["reference_answer"] = "{} {}".format(N, " ".join([str(letter2digit[chr(i)]) for i in range(97, 97 + N)]))
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def convert_to_expression(self, n : int) -> str :
|
| 54 |
+
N = self.parameter["N"]
|
| 55 |
+
|
| 56 |
+
if n == 0 :
|
| 57 |
+
return self.parameter["digit2letter"][0]
|
| 58 |
+
else :
|
| 59 |
+
expression = ""
|
| 60 |
+
while n > 0 :
|
| 61 |
+
digit = n % N
|
| 62 |
+
expression = self.parameter["digit2letter"][digit] + expression
|
| 63 |
+
n //= N
|
| 64 |
+
return expression
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _prompt_generate(self) -> str :
|
| 68 |
+
N = self.parameter["N"]
|
| 69 |
+
ALL_LETTERS = ", ".join([chr(i) for i in range(97, 97 + N)])
|
| 70 |
+
|
| 71 |
+
digit2letter = self.parameter["digit2letter"]
|
| 72 |
+
letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)}
|
| 73 |
+
|
| 74 |
+
EQUATIONS = []
|
| 75 |
+
for a_ascii in range(97, 97 + N) :
|
| 76 |
+
for b_ascii in range(a_ascii, 97 + N) :
|
| 77 |
+
a = chr(a_ascii)
|
| 78 |
+
b = chr(b_ascii)
|
| 79 |
+
EQUATIONS.append("{} + {} = {}".format(a, b, self.convert_to_expression(letter2digit[a] + letter2digit[b])))
|
| 80 |
+
EQUATIONS = "\n".join(EQUATIONS)
|
| 81 |
+
|
| 82 |
+
return self.prompt_template.format(
|
| 83 |
+
ALL_LETTERS = ALL_LETTERS,
|
| 84 |
+
EQUATIONS = EQUATIONS,
|
| 85 |
+
N = N,
|
| 86 |
+
N_plus_1 = N + 1,
|
| 87 |
+
EXAMPLE_1 = " ".join([str(_) for _ in range(N)]),
|
| 88 |
+
EXAMPLE_2 = ", ".join(["{}={}".format(chr(i), i - 97) for i in range(97, 97 + N)]),
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _process(self, answer : Optional[str]) -> Optional[Dict] :
|
| 93 |
+
if answer is not None :
|
| 94 |
+
answer = answer.strip()
|
| 95 |
+
try :
|
| 96 |
+
answer_array = list(map(int, answer.split()))
|
| 97 |
+
if len(answer_array) != self.parameter["N"] + 1 :
|
| 98 |
+
return dict()
|
| 99 |
+
N = answer_array[0]
|
| 100 |
+
digits = answer_array[1 :]
|
| 101 |
+
return dict(N = N, digits = digits)
|
| 102 |
+
except ValueError :
|
| 103 |
+
return dict()
|
| 104 |
+
else :
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def scorer(self, output : str) -> float :
|
| 109 |
+
processed_result = self.processor(output)
|
| 110 |
+
if processed_result is not None :
|
| 111 |
+
if not processed_result :
|
| 112 |
+
return self.rewards["invalid_answer"]
|
| 113 |
+
|
| 114 |
+
N = processed_result["N"]
|
| 115 |
+
if N != self.parameter["N"] :
|
| 116 |
+
return self.rewards["wrong_N"]
|
| 117 |
+
|
| 118 |
+
predict_digits = processed_result["digits"]
|
| 119 |
+
assert len(predict_digits) == N, "digits should have the same length as N"
|
| 120 |
+
|
| 121 |
+
letter2digit = {letter : digit for digit, letter in enumerate(self.parameter["digit2letter"])}
|
| 122 |
+
assert len(letter2digit) == N, "letter2digit should have the same length as N"
|
| 123 |
+
gold_digits = [letter2digit[chr(i)] for i in range(97, 97 + N)]
|
| 124 |
+
|
| 125 |
+
if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" :
|
| 126 |
+
return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(gold_digits, predict_digits)) / N) ** self.rewards["rewarding_beta"])
|
| 127 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 128 |
+
return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(gold_digits, predict_digits))
|
| 129 |
+
else :
|
| 130 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 131 |
+
else :
|
| 132 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import AlmostCompleteGraphCycleCounting_Environment
|
server/Gym/environments/almost_complete_graph_cycle_counting/environment.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class AlmostCompleteGraphCycleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3862
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""Consider a graph with {N} vertices labeled from 1 to {N}. Every pair of vertices is connected by an undirected edge, except for the edge between vertices 1 and {N} (so the graph has {N} × ({N} - 1) / 2 - 1 edges).
|
| 9 |
+
|
| 10 |
+
What's the number of **simple cycles** in this graph? A simple cycle must:
|
| 11 |
+
- Have at least 3 vertices,
|
| 12 |
+
- Contain no repeated vertices or edges,
|
| 13 |
+
- Be considered the same as any cycle with the same set of edges (regardless of order or starting point); for example, `(1, 2, 3, 4)` and `(2, 1, 4, 3)` are the same, but `(1, 2, 3, 4)` and `(2, 1, 3, 4)` are different.
|
| 14 |
+
Output the answer modulo {MOD}."""
|
| 15 |
+
|
| 16 |
+
def __init__(self,
|
| 17 |
+
max_MOD : int = 1000000,
|
| 18 |
+
wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
|
| 19 |
+
**kwargs) :
|
| 20 |
+
"""
|
| 21 |
+
Initialize the AlmostCompleteGraphCycleCounting_Environment instance.
|
| 22 |
+
"""
|
| 23 |
+
super().__init__(**kwargs)
|
| 24 |
+
|
| 25 |
+
self.max_MOD = max_MOD
|
| 26 |
+
self.rewards = {
|
| 27 |
+
"wrong_format" : wrong_format,
|
| 28 |
+
"wrong_range" : wrong_range,
|
| 29 |
+
"correct_answer" : correct_answer,
|
| 30 |
+
"wrong_answer" : wrong_answer,
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _generate(self) -> None :
|
| 35 |
+
assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
|
| 36 |
+
MAX_N = self.parameter["MAX_N"]
|
| 37 |
+
assert MAX_N >= 4, "MAX_N should be greater than or equal to 4"
|
| 38 |
+
|
| 39 |
+
N = self.parameter["N"] = random.randint(4, MAX_N)
|
| 40 |
+
|
| 41 |
+
MOD = self.parameter["MOD"] = 2 * random.randint(1, self.max_MOD // 2) + 1
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
INV2 = (MOD + 1) // 2
|
| 45 |
+
|
| 46 |
+
def calc(x, y, s, N):
|
| 47 |
+
"""
|
| 48 |
+
x: current count of cycles for K_s
|
| 49 |
+
y: current count of paths of length 1 (one edge) in K_s
|
| 50 |
+
s: starting i value (we've precomputed up to K_s)
|
| 51 |
+
N: target N
|
| 52 |
+
"""
|
| 53 |
+
for i in range(s, N):
|
| 54 |
+
# compute ((i-1)*(i-2)/2) % MOD efficiently
|
| 55 |
+
half = ((i - 1) % MOD) * ((i - 2) % MOD) % MOD * INV2 % MOD
|
| 56 |
+
x = (x + y * half) % MOD
|
| 57 |
+
y = (y * ((i - 2) % MOD) + 1) % MOD
|
| 58 |
+
# finally add the contribution for closing the cycle at N
|
| 59 |
+
half_n = ((N - 2) % MOD) * ((N - 3) % MOD) % MOD * INV2 % MOD
|
| 60 |
+
return (x + y * half_n) % MOD
|
| 61 |
+
|
| 62 |
+
if N <= 3 :
|
| 63 |
+
self.parameter["reference_answer"] = 0
|
| 64 |
+
else :
|
| 65 |
+
self.parameter["reference_answer"] = calc(1, 2, 4, N)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _prompt_generate(self) -> str :
|
| 69 |
+
return self.prompt_template.format(N = self.parameter["N"], MOD = self.parameter["MOD"])
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 73 |
+
if answer is not None :
|
| 74 |
+
answer = answer.strip()
|
| 75 |
+
try :
|
| 76 |
+
int_answer = int(answer)
|
| 77 |
+
return int_answer
|
| 78 |
+
except ValueError :
|
| 79 |
+
return None
|
| 80 |
+
else :
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def scorer(self, output : str) -> float :
|
| 85 |
+
processed_result = self.processor(output)
|
| 86 |
+
if processed_result is not None :
|
| 87 |
+
if not (0 <= processed_result < self.parameter["MOD"]) :
|
| 88 |
+
return self.rewards["wrong_range"]
|
| 89 |
+
if processed_result == self.parameter["reference_answer"] :
|
| 90 |
+
return self.rewards["correct_answer"]
|
| 91 |
+
else :
|
| 92 |
+
return self.rewards["wrong_answer"]
|
| 93 |
+
else :
|
| 94 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/and_or_sequence_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import AndOr_Sequence_Counting_Environment
|
server/Gym/environments/and_or_sequence_counting/environment.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class AndOr_Sequence_Counting_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given an integer array `A` of length {N}:
|
| 9 |
+
{A}
|
| 10 |
+
|
| 11 |
+
Please count the number of valid integer arrays `B` of length {N} that satisfy the following conditions:
|
| 12 |
+
- For all indices 0 <= i <= {N_minus_1}, the value B[i] must be in the range: 0 <= B[i] < 2^{M} = {power_2_M}
|
| 13 |
+
- For all indices 0 <= i < {N_minus_1}, the following bitwise conditions hold:
|
| 14 |
+
- (A[i] & B[i]) <= (A[i + 1] & B[i + 1])
|
| 15 |
+
- (A[i] | B[i]) >= (A[i + 1] | B[i + 1])
|
| 16 |
+
- (Here, `&` is the bitwise AND operator and `|` is the bitwise OR operator.)
|
| 17 |
+
|
| 18 |
+
**Output Format:** Your final answer should be a single integer — the number of valid arrays `B` that satisfy all the above conditions."""
|
| 19 |
+
|
| 20 |
+
def __init__(self,
|
| 21 |
+
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
|
| 22 |
+
**kwargs) :
|
| 23 |
+
"""
|
| 24 |
+
Initialize the AndOr_Sequence_Counting_Environment instance.
|
| 25 |
+
"""
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
|
| 28 |
+
self.rewards = {
|
| 29 |
+
"wrong_format" : wrong_format,
|
| 30 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 31 |
+
"rewarding_weight" : rewarding_weight,
|
| 32 |
+
"rewarding_beta" : rewarding_beta,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
def _generate(self) -> None :
|
| 36 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 37 |
+
N = self.parameter["N"]
|
| 38 |
+
assert N >= 2, "N should be greater than or equal to 2"
|
| 39 |
+
|
| 40 |
+
assert "M" in self.parameter, "M is required in parameter"
|
| 41 |
+
M = self.parameter["M"]
|
| 42 |
+
assert M >= 1, "M should be greater than or equal to 1"
|
| 43 |
+
|
| 44 |
+
A = self.parameter["A"] = [random.randint(0, 2 ** M - 1) for i in range(N)]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def dp1(N, M, A) :
|
| 48 |
+
F = [[[0] * N for _ in range(N)] for _ in range(2)]
|
| 49 |
+
for l in range(N) :
|
| 50 |
+
for r in range(l, N) :
|
| 51 |
+
F[1][l][r] = 1
|
| 52 |
+
|
| 53 |
+
for b in range(M + 1) :
|
| 54 |
+
now = b % 2
|
| 55 |
+
lst = now ^ 1
|
| 56 |
+
|
| 57 |
+
for i in range(N) :
|
| 58 |
+
for j in range(N) :
|
| 59 |
+
F[now][i][j] = 0
|
| 60 |
+
|
| 61 |
+
Pre = [0] * (N + 1)
|
| 62 |
+
for i in range(1, N + 1) :
|
| 63 |
+
Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1)
|
| 64 |
+
|
| 65 |
+
for l in range(N) :
|
| 66 |
+
for r in range(l, N) :
|
| 67 |
+
for x in range(l - 1, r + 1) :
|
| 68 |
+
if Pre[r + 1] - Pre[x + 1] != (r - x) :
|
| 69 |
+
continue
|
| 70 |
+
|
| 71 |
+
left_count = F[lst][l][x] if x >= l else 1
|
| 72 |
+
right_count = F[lst][x + 1][r] if x+1 <= r else 1
|
| 73 |
+
F[now][l][r] += left_count * right_count
|
| 74 |
+
|
| 75 |
+
return F[M % 2][0][N - 1]
|
| 76 |
+
|
| 77 |
+
def dp2(N, M, A) :
|
| 78 |
+
F = [[[0] * N for _ in range(N)] for _ in range(2)]
|
| 79 |
+
for l in range(N) :
|
| 80 |
+
for r in range(l, N) :
|
| 81 |
+
F[1][l][r] = 1
|
| 82 |
+
|
| 83 |
+
for b in range(M + 1) :
|
| 84 |
+
now = b % 2
|
| 85 |
+
lst = now ^ 1
|
| 86 |
+
for i in range(N) :
|
| 87 |
+
for j in range(N) :
|
| 88 |
+
F[now][i][j] = 0
|
| 89 |
+
|
| 90 |
+
Pre = [0] * (N + 1)
|
| 91 |
+
for i in range(1, N + 1) :
|
| 92 |
+
Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1)
|
| 93 |
+
|
| 94 |
+
for l in range(N) :
|
| 95 |
+
for r in range(l, N) :
|
| 96 |
+
for x in range(l - 1, r + 1) :
|
| 97 |
+
if Pre[r + 1] - Pre[x + 1] != 0:
|
| 98 |
+
continue
|
| 99 |
+
|
| 100 |
+
left_count = F[lst][l][x] if x >= l else 1
|
| 101 |
+
right_count = F[lst][x + 1][r] if x + 1 <= r else 1
|
| 102 |
+
F[now][l][r] += left_count * right_count
|
| 103 |
+
|
| 104 |
+
return F[M % 2][0][N - 1]
|
| 105 |
+
|
| 106 |
+
self.parameter["reference_answer"] = dp1(N, M - 1, A) * dp2(N, M - 1, A)
|
| 107 |
+
|
| 108 |
+
def _prompt_generate(self) -> str :
|
| 109 |
+
N, M = self.parameter["N"], self.parameter["M"]
|
| 110 |
+
return self.prompt_template.format(
|
| 111 |
+
N = self.parameter["N"],
|
| 112 |
+
N_minus_1 = self.parameter["N"] - 1,
|
| 113 |
+
M = self.parameter["M"],
|
| 114 |
+
power_2_M = 2 ** self.parameter["M"],
|
| 115 |
+
A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])),
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 120 |
+
if answer is not None :
|
| 121 |
+
answer = answer.strip()
|
| 122 |
+
try :
|
| 123 |
+
int_answer = int(answer)
|
| 124 |
+
return int_answer
|
| 125 |
+
except ValueError :
|
| 126 |
+
return None
|
| 127 |
+
else :
|
| 128 |
+
return None
|
| 129 |
+
|
| 130 |
+
def scorer(self, output : str) -> float :
|
| 131 |
+
processed_result = self.processor(output)
|
| 132 |
+
if processed_result is not None :
|
| 133 |
+
if processed_result < 0 :
|
| 134 |
+
return self.rewards["wrong_format"]
|
| 135 |
+
|
| 136 |
+
if self.parameter["reference_answer"] == 0 :
|
| 137 |
+
return self.rewards["rewarding_weight"] * (processed_result == 0)
|
| 138 |
+
|
| 139 |
+
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
|
| 140 |
+
a, b = self.parameter["reference_answer"], processed_result
|
| 141 |
+
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
|
| 142 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 143 |
+
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
|
| 144 |
+
else :
|
| 145 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 146 |
+
else :
|
| 147 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/anti_palindromic_substring_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import AntiPalindromicSubstringCounting_Environment
|
server/Gym/environments/anti_palindromic_substring_counting/environment.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class AntiPalindromicSubstringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3501
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""We define an **anti-palindromic binary string** as a binary string such that its reverse is equal to the bitwise complement of the original string (i.e., '0' becomes '1' and '1' becomes '0'). For example, `000111` is anti-palindromic because its reverse is `111000`, which is the bitwise complement of `000111`. But `1001` is not, because its reverse is `1001`, while its flipped version is `0110`.
|
| 9 |
+
|
| 10 |
+
You are given a binary string: {S}
|
| 11 |
+
Please count the number of **contiguous substrings** of `S` that are anti-palindromic. Two substrings are considered different if they appear at different positions in `S`. Output a single integer — the number of anti-palindromic substrings."""
|
| 12 |
+
|
| 13 |
+
def __init__(self,
|
| 14 |
+
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
|
| 15 |
+
**kwargs) :
|
| 16 |
+
"""
|
| 17 |
+
Initialize the AntiPalindromicSubstringCounting_Environment instance.
|
| 18 |
+
"""
|
| 19 |
+
super().__init__(**kwargs)
|
| 20 |
+
|
| 21 |
+
self.rewards = {
|
| 22 |
+
"wrong_format" : wrong_format,
|
| 23 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 24 |
+
"rewarding_weight" : rewarding_weight,
|
| 25 |
+
"rewarding_beta" : rewarding_beta,
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _generate(self) -> None :
|
| 30 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 31 |
+
N = self.parameter["N"]
|
| 32 |
+
assert N >= 3, "N should be greater than or equal to 3"
|
| 33 |
+
|
| 34 |
+
endpoints = random.sample(range(1, N), random.randint(0, N - 1))
|
| 35 |
+
endpoints.sort()
|
| 36 |
+
endpoints = [0] + endpoints + [N]
|
| 37 |
+
|
| 38 |
+
one_probability = random.random()
|
| 39 |
+
|
| 40 |
+
S = ""
|
| 41 |
+
for i in range(len(endpoints) - 1) :
|
| 42 |
+
length = endpoints[i + 1] - endpoints[i]
|
| 43 |
+
if length % 2 == 0 :
|
| 44 |
+
half = "".join("1" if random.random() < one_probability else "0" for _ in range(length // 2))
|
| 45 |
+
S += half + "".join("1" if c == "0" else "0" for c in reversed(half))
|
| 46 |
+
else :
|
| 47 |
+
S += "".join("1" if random.random() < one_probability else "0" for _ in range(length))
|
| 48 |
+
self.parameter["S"] = S
|
| 49 |
+
assert len(S) == N, f"Generated string length {len(S)} does not match N {N}"
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Build the “S” array from the C++:
|
| 53 |
+
# S[0] = '$', S[1] = '#', then for each char: c, '#', and finally a trailing '$'
|
| 54 |
+
T = ['$','#']
|
| 55 |
+
for c in S:
|
| 56 |
+
T.append(c)
|
| 57 |
+
T.append('#')
|
| 58 |
+
T.append('$')
|
| 59 |
+
|
| 60 |
+
length = len(T)
|
| 61 |
+
tot = length - 2 # corresponds to C++ `tot` (1 + 2*N)
|
| 62 |
+
|
| 63 |
+
# P[i] will hold the Manacher‐style radius at center i
|
| 64 |
+
P = [0] * length
|
| 65 |
+
|
| 66 |
+
# inversion map for the 0/1 bits and the separator '#'
|
| 67 |
+
inv = {'0':'1', '1':'0', '#':'#'}
|
| 68 |
+
|
| 69 |
+
pos = 1 # center of the rightmost-reaching antisymmetry
|
| 70 |
+
mx = 1 # its right boundary = pos + P[pos]
|
| 71 |
+
ans = 0
|
| 72 |
+
|
| 73 |
+
# only odd i (the '#' positions) correspond to even‐length substrings
|
| 74 |
+
for i in range(1, tot+1, 2):
|
| 75 |
+
if i < mx:
|
| 76 |
+
mirror = 2*pos - i
|
| 77 |
+
# same as: len[i] = min(mx - i, len[mirror])
|
| 78 |
+
P[i] = min(mx - i, P[mirror])
|
| 79 |
+
else:
|
| 80 |
+
P[i] = 1
|
| 81 |
+
|
| 82 |
+
# expand as long as T[i + P] == inv[T[i - P]]
|
| 83 |
+
while True:
|
| 84 |
+
left = i - P[i]
|
| 85 |
+
right = i + P[i]
|
| 86 |
+
# boundary guard
|
| 87 |
+
if left < 0 or right >= length:
|
| 88 |
+
break
|
| 89 |
+
# must both be in our inv‐map (i.e. '#','0','1')
|
| 90 |
+
cL = T[left]
|
| 91 |
+
cR = T[right]
|
| 92 |
+
if cL not in inv or cR not in inv:
|
| 93 |
+
break
|
| 94 |
+
if cR == inv[cL]:
|
| 95 |
+
P[i] += 1
|
| 96 |
+
else:
|
| 97 |
+
break
|
| 98 |
+
|
| 99 |
+
# update the farthest-reaching center
|
| 100 |
+
if i + P[i] > mx:
|
| 101 |
+
mx = i + P[i]
|
| 102 |
+
pos = i
|
| 103 |
+
|
| 104 |
+
# each full two‐step in the radius == one antisymmetric substring
|
| 105 |
+
ans += (P[i] >> 1)
|
| 106 |
+
|
| 107 |
+
self.parameter["reference_answer"] = ans
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _prompt_generate(self) -> str :
|
| 111 |
+
return self.prompt_template.format(S = self.parameter["S"])
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 115 |
+
if answer is not None :
|
| 116 |
+
answer = answer.strip()
|
| 117 |
+
try :
|
| 118 |
+
int_answer = int(answer)
|
| 119 |
+
return int_answer
|
| 120 |
+
except ValueError :
|
| 121 |
+
return None
|
| 122 |
+
else :
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def scorer(self, output : str) -> float :
|
| 127 |
+
processed_result = self.processor(output)
|
| 128 |
+
if processed_result is not None :
|
| 129 |
+
if processed_result < 0 :
|
| 130 |
+
return self.rewards["wrong_format"]
|
| 131 |
+
|
| 132 |
+
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
|
| 133 |
+
if self.parameter["reference_answer"] == 0 :
|
| 134 |
+
return self.rewards["rewarding_weight"] * int(processed_result == 0)
|
| 135 |
+
a, b = self.parameter["reference_answer"], processed_result
|
| 136 |
+
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
|
| 137 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 138 |
+
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
|
| 139 |
+
else :
|
| 140 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 141 |
+
else :
|
| 142 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/axis_k_center/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import Axis_KCenter_Environment
|
server/Gym/environments/axis_k_center/environment.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Axis_KCenter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/K4767
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given {N} points on a line, labeled from 0 to {N_minus_1}. Their positions (from left to right) are: {X}
|
| 9 |
+
|
| 10 |
+
Please select a set of {K} distinct points. Try your best to minimize the total distance from all points to their nearest selected point (the distance is the absolute difference between positions).
|
| 11 |
+
|
| 12 |
+
**Output Format:** Your final answer should be a single line containing the indices of the selected {K} points in any order, separated by spaces."""
|
| 13 |
+
|
| 14 |
+
def __init__(self,
|
| 15 |
+
position_multiple : int = 5,
|
| 16 |
+
wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
|
| 17 |
+
**kwargs) :
|
| 18 |
+
"""
|
| 19 |
+
Initialize the Axis_KCenter_Environment instance.
|
| 20 |
+
"""
|
| 21 |
+
super().__init__(**kwargs)
|
| 22 |
+
|
| 23 |
+
self.position_multiple = position_multiple
|
| 24 |
+
|
| 25 |
+
self.rewards = {
|
| 26 |
+
"wrong_format" : wrong_format,
|
| 27 |
+
"invalid_solution" : invalid_solution,
|
| 28 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 29 |
+
"rewarding_weight" : rewarding_weight,
|
| 30 |
+
"rewarding_beta" : rewarding_beta,
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _generate(self) -> None :
|
| 35 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 36 |
+
N = self.parameter["N"]
|
| 37 |
+
assert N >= 3, "N should be greater than or equal to 3"
|
| 38 |
+
|
| 39 |
+
K = self.parameter["K"] = random.randint(1, N - 1)
|
| 40 |
+
|
| 41 |
+
X = self.parameter["X"] = random.sample(range(N * self.position_multiple + 1), N)
|
| 42 |
+
X.sort()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
INF = N * (X[-1] - X[0] + 1)
|
| 46 |
+
|
| 47 |
+
# Krecompute w[l][r]: cost of one post office for villages l..r (inclusive, 0-indexed)
|
| 48 |
+
w = [[0] * N for _ in range(N)]
|
| 49 |
+
for l in range(N):
|
| 50 |
+
for r in range(l + 1, N):
|
| 51 |
+
m = (l + r) // 2
|
| 52 |
+
w[l][r] = w[l][r - 1] + (X[r] - X[m])
|
| 53 |
+
|
| 54 |
+
# dp[i][j]: minimum total distance covering the first i villages with j post offices
|
| 55 |
+
dp = [[INF] * (K + 1) for _ in range(N + 1)]
|
| 56 |
+
# d[i][j]: the k giving the optimum for dp[i][j], for Knuth optimization
|
| 57 |
+
d = [[0] * (K + 1) for _ in range(N + 2)]
|
| 58 |
+
|
| 59 |
+
dp[0][0] = 0
|
| 60 |
+
|
| 61 |
+
for j in range(1, K + 1):
|
| 62 |
+
d[N + 1][j] = N
|
| 63 |
+
for i in range(N, 0, -1):
|
| 64 |
+
best = INF
|
| 65 |
+
argk = 0
|
| 66 |
+
start = d[i][j - 1]
|
| 67 |
+
end = d[i + 1][j]
|
| 68 |
+
if end > i - 1:
|
| 69 |
+
end = i - 1
|
| 70 |
+
for k in range(start, end + 1):
|
| 71 |
+
cost = dp[k][j - 1] + w[k][i - 1]
|
| 72 |
+
if cost < best:
|
| 73 |
+
best = cost
|
| 74 |
+
argk = k
|
| 75 |
+
dp[i][j] = best
|
| 76 |
+
d[i][j] = argk
|
| 77 |
+
|
| 78 |
+
# Output the result: all N villages with K post offices
|
| 79 |
+
self.parameter["gold_answer"] = dp[N][K]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _prompt_generate(self) -> str :
|
| 83 |
+
N = self.parameter["N"]
|
| 84 |
+
return self.prompt_template.format(
|
| 85 |
+
N = N,
|
| 86 |
+
N_minus_1 = N - 1,
|
| 87 |
+
K = self.parameter["K"],
|
| 88 |
+
X = " ".join(map(str, self.parameter["X"])),
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 93 |
+
if answer is not None :
|
| 94 |
+
answer = answer.strip()
|
| 95 |
+
try :
|
| 96 |
+
answer_array = list(map(int, answer.split()))
|
| 97 |
+
return answer_array
|
| 98 |
+
except ValueError :
|
| 99 |
+
return None
|
| 100 |
+
else :
|
| 101 |
+
return None
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def scorer(self, output : str) -> float :
|
| 105 |
+
processed_result = self.processor(output)
|
| 106 |
+
if processed_result is not None :
|
| 107 |
+
assert isinstance(processed_result, list), "processed_result should be a list"
|
| 108 |
+
|
| 109 |
+
selected_points = processed_result
|
| 110 |
+
|
| 111 |
+
if len(selected_points) != len(set(selected_points)) :
|
| 112 |
+
return self.rewards["invalid_solution"]
|
| 113 |
+
if len(selected_points) != self.parameter["K"] :
|
| 114 |
+
return self.rewards["invalid_solution"]
|
| 115 |
+
if not all(0 <= u < self.parameter["N"] for u in selected_points) :
|
| 116 |
+
return self.rewards["invalid_solution"]
|
| 117 |
+
|
| 118 |
+
answer = sum(min(abs(self.parameter["X"][u] - self.parameter["X"][v]) for v in selected_points) for u in range(self.parameter["N"]))
|
| 119 |
+
gold = self.parameter["gold_answer"]
|
| 120 |
+
assert gold <= answer, "gold should be less than or equal to answer"
|
| 121 |
+
|
| 122 |
+
if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
|
| 123 |
+
return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"])
|
| 124 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 125 |
+
return self.rewards["rewarding_weight"] * (gold == answer)
|
| 126 |
+
else :
|
| 127 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 128 |
+
else :
|
| 129 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/baj_bytecomputer/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BAJBytecomputer_Environment
|
server/Gym/environments/baj_bytecomputer/environment.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BAJBytecomputer_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3558
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given an array X of length {N}, where each element is initially -1, 0, or +1: {X}
|
| 9 |
+
You may perform the following operation any number of times: choose an index i (1 ≤ i < {N}), and update X[i + 1] := X[i + 1] + X[i]. Your goal is to make the array non-decreasing, i.e., X[1] ≤ X[2] ≤ ... ≤ X[{N}]; please output the **minimum number of operations** required to achieve this."""
|
| 10 |
+
|
| 11 |
+
def __init__(self,
|
| 12 |
+
wrong_format : float = -1.0, correct_answer : float = 1.0, incorrect_answer : float = 0.0,
|
| 13 |
+
**kwargs):
|
| 14 |
+
"""
|
| 15 |
+
Initialize the BAJBytecomputer_Environment instance.
|
| 16 |
+
"""
|
| 17 |
+
super().__init__(**kwargs)
|
| 18 |
+
|
| 19 |
+
self.rewards = {
|
| 20 |
+
"wrong_format": wrong_format,
|
| 21 |
+
"correct_answer": correct_answer,
|
| 22 |
+
"incorrect_answer": incorrect_answer,
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _generate(self) -> None :
|
| 27 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 28 |
+
N = self.parameter["N"]
|
| 29 |
+
assert N >= 3, "N should be greater than or equal to 3"
|
| 30 |
+
|
| 31 |
+
while True :
|
| 32 |
+
distribution = [random.randint(1, N) for _ in range(3)]
|
| 33 |
+
X = self.parameter["X"] = [random.choices([-1, 0, 1], weights = distribution)[0] for _ in range(N)]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Compute a suitable "infinity" based on the maximum possible operations:
|
| 37 |
+
# At most 2 operations per element (for N-1 transitions), so 2*N + a small buffer
|
| 38 |
+
INF = 2 * N + 5
|
| 39 |
+
|
| 40 |
+
# The three possible values after operations
|
| 41 |
+
val = [-1, 0, 1]
|
| 42 |
+
|
| 43 |
+
# dp[j] = minimum operations to make the previous element equal to val[j]
|
| 44 |
+
# Initialize for the first element
|
| 45 |
+
prev = [INF] * 3
|
| 46 |
+
prev[X[0] + 1] = 0
|
| 47 |
+
|
| 48 |
+
# Iterate through the sequence
|
| 49 |
+
for i in range(1, N):
|
| 50 |
+
curr = [INF] * 3
|
| 51 |
+
x = X[i]
|
| 52 |
+
for j in range(3):
|
| 53 |
+
ops_so_far = prev[j]
|
| 54 |
+
if ops_so_far >= INF:
|
| 55 |
+
continue
|
| 56 |
+
prev_val = val[j]
|
| 57 |
+
|
| 58 |
+
# 0 operations on x: new_x = x
|
| 59 |
+
new_x = x
|
| 60 |
+
if new_x >= prev_val:
|
| 61 |
+
curr[new_x + 1] = min(curr[new_x + 1], ops_so_far)
|
| 62 |
+
|
| 63 |
+
# 1 operation on x: new_x = x + prev_val
|
| 64 |
+
new_x = x + prev_val
|
| 65 |
+
if -1 <= new_x <= 1 and new_x >= prev_val:
|
| 66 |
+
curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 1)
|
| 67 |
+
|
| 68 |
+
# 2 operations on x: new_x = x + 2 * prev_val
|
| 69 |
+
new_x = x + 2 * prev_val
|
| 70 |
+
if -1 <= new_x <= 1 and new_x >= prev_val:
|
| 71 |
+
curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 2)
|
| 72 |
+
|
| 73 |
+
prev = curr
|
| 74 |
+
|
| 75 |
+
# The answer is the minimum operations to end with any of {-1,0,1}
|
| 76 |
+
ans = min(prev)
|
| 77 |
+
if ans < INF:
|
| 78 |
+
self.parameter["reference_answer"] = ans
|
| 79 |
+
break
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _prompt_generate(self) -> str :
|
| 83 |
+
return self.prompt_template.format(
|
| 84 |
+
N = self.parameter["N"],
|
| 85 |
+
X = ", ".join("X[{}]={}".format(i + 1, Xi) for i, Xi in enumerate(self.parameter["X"])),
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 90 |
+
if answer is not None :
|
| 91 |
+
answer = answer.strip()
|
| 92 |
+
try :
|
| 93 |
+
int_answer = int(answer)
|
| 94 |
+
return int_answer
|
| 95 |
+
except ValueError :
|
| 96 |
+
return None
|
| 97 |
+
else :
|
| 98 |
+
return None
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def scorer(self, output : str) -> float :
|
| 102 |
+
processed_result = self.processor(output)
|
| 103 |
+
if processed_result is not None :
|
| 104 |
+
if processed_result == self.parameter["reference_answer"] :
|
| 105 |
+
return self.rewards["correct_answer"]
|
| 106 |
+
else :
|
| 107 |
+
return self.rewards["incorrect_answer"]
|
| 108 |
+
else :
|
| 109 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/banned_point_superset_path_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BannedPointSupersetPathCounting_Environment
|
server/Gym/environments/banned_point_superset_path_counting/environment.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BannedPointSupersetPathCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3734
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""In a three-dimensional space, you start at point (0, 0, 0) and want to reach the point ({N}, {M}, {R}). At each step, if you are currently at (x, y, z), you may move to a new (different from the current one) point of one of the following types:
|
| 9 |
+
1. (x', y, z) such that x AND x' = x
|
| 10 |
+
2. (x, y', z) such that y AND y' = y
|
| 11 |
+
3. (x, y, z') such that z AND z' = z
|
| 12 |
+
(AND refers to the bitwise AND operation.)
|
| 13 |
+
|
| 14 |
+
You are **not allowed** to visit any of the following points:
|
| 15 |
+
{obstacles}
|
| 16 |
+
|
| 17 |
+
Please count the number of distinct valid paths from (0, 0, 0) to ({N}, {M}, {R}) that avoid all forbidden points. Output the result modulo {MOD}."""
|
| 18 |
+
|
| 19 |
+
def __init__(self,
|
| 20 |
+
max_MOD : int = 10000,
|
| 21 |
+
wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
|
| 22 |
+
**kwargs) -> None:
|
| 23 |
+
"""
|
| 24 |
+
Initialize the BannedPointSupersetPathCounting_Environment instance.
|
| 25 |
+
"""
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
|
| 28 |
+
self.max_MOD = max_MOD
|
| 29 |
+
self.rewards = {
|
| 30 |
+
"wrong_format": wrong_format,
|
| 31 |
+
"wrong_range": wrong_range,
|
| 32 |
+
"correct_answer": correct_answer,
|
| 33 |
+
"wrong_answer": wrong_answer,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _generate(self) -> None :
|
| 38 |
+
assert "MAX_N_M_R" in self.parameter, "MAX_N_M_R is required in parameter"
|
| 39 |
+
MAX_N_M_R = self.parameter["MAX_N_M_R"]
|
| 40 |
+
assert MAX_N_M_R >= 1, "MAX_N_M_R should be greater than or equal to 1"
|
| 41 |
+
|
| 42 |
+
while True :
|
| 43 |
+
N, M, R = self.parameter["N"], self.parameter["M"], self.parameter["R"] = random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R)
|
| 44 |
+
if (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2 >= 1 :
|
| 45 |
+
break
|
| 46 |
+
|
| 47 |
+
assert "MAX_O" in self.parameter, "MAX_O is required in parameter"
|
| 48 |
+
MAX_O = self.parameter["MAX_O"]
|
| 49 |
+
assert MAX_O >= 1, "MAX_O should be greater than or equal to 1"
|
| 50 |
+
MAX_O = min(MAX_O, (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2)
|
| 51 |
+
O = self.parameter["O"] = random.randint(1, MAX_O)
|
| 52 |
+
|
| 53 |
+
def convert_to_bits(x) -> List[int] :
|
| 54 |
+
result = []
|
| 55 |
+
bit = 1
|
| 56 |
+
while bit <= x :
|
| 57 |
+
if x & bit :
|
| 58 |
+
result.append(bit)
|
| 59 |
+
bit <<= 1
|
| 60 |
+
return result
|
| 61 |
+
N_bits, M_bits, R_bits = convert_to_bits(N), convert_to_bits(M), convert_to_bits(R)
|
| 62 |
+
def random_subset(bits : List[int]) -> int :
|
| 63 |
+
bits = random.sample(bits, random.randint(0, len(bits)))
|
| 64 |
+
return sum(bits)
|
| 65 |
+
|
| 66 |
+
obstacles = set()
|
| 67 |
+
while len(obstacles) < O :
|
| 68 |
+
x, y, z = random_subset(N_bits), random_subset(M_bits), random_subset(R_bits)
|
| 69 |
+
if (x, y, z) != (0, 0, 0) and (x, y, z) != (N, M, R) and (x, y, z) not in obstacles:
|
| 70 |
+
obstacles.add((x, y, z))
|
| 71 |
+
obstacles = list(obstacles)
|
| 72 |
+
random.shuffle(obstacles)
|
| 73 |
+
self.parameter["obstacles"] = obstacles.copy()
|
| 74 |
+
|
| 75 |
+
MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
points = [(0, 0, 0)] + obstacles
|
| 79 |
+
points.sort() # lex order by x, then y, then z
|
| 80 |
+
points.append((N, M, R))
|
| 81 |
+
total = len(points)
|
| 82 |
+
|
| 83 |
+
# Determine needed bit‐count dimensions
|
| 84 |
+
dx = N.bit_count()
|
| 85 |
+
dy = M.bit_count()
|
| 86 |
+
dz = R.bit_count()
|
| 87 |
+
max_d = max(dx, dy, dz)
|
| 88 |
+
|
| 89 |
+
# Precompute binomial coefficients up to max_d
|
| 90 |
+
binom = [[0] * (max_d + 1) for _ in range(max_d + 1)]
|
| 91 |
+
for i in range(max_d + 1):
|
| 92 |
+
binom[i][0] = 1
|
| 93 |
+
for j in range(1, i + 1):
|
| 94 |
+
binom[i][j] = (binom[i - 1][j - 1] + binom[i - 1][j]) % MOD
|
| 95 |
+
|
| 96 |
+
# Precompute f[x][y][z]: number of ways from (0,0,0) to a diff‐vector with
|
| 97 |
+
# x one‐bit‐flips in X, y flips in Y, z flips in Z (ignoring obstacles).
|
| 98 |
+
f = [[[0] * (dz + 1) for _ in range(dy + 1)] for __ in range(dx + 1)]
|
| 99 |
+
f[0][0][0] = 1
|
| 100 |
+
for x in range(dx + 1):
|
| 101 |
+
for y in range(dy + 1):
|
| 102 |
+
for z in range(dz + 1):
|
| 103 |
+
if x == y == z == 0:
|
| 104 |
+
continue
|
| 105 |
+
val = 0
|
| 106 |
+
# transitions increasing X
|
| 107 |
+
for i in range(x):
|
| 108 |
+
val = (val + f[i][y][z] * binom[x][i]) % MOD
|
| 109 |
+
# transitions increasing Y
|
| 110 |
+
for j in range(y):
|
| 111 |
+
val = (val + f[x][j][z] * binom[y][j]) % MOD
|
| 112 |
+
# transitions increasing Z
|
| 113 |
+
for k in range(z):
|
| 114 |
+
val = (val + f[x][y][k] * binom[z][k]) % MOD
|
| 115 |
+
f[x][y][z] = val
|
| 116 |
+
|
| 117 |
+
# DP over the sorted points
|
| 118 |
+
# g[i] = (−1) * sum_{j < i, p[j] ⊆ p[i]} g[j] * f[ popcount differences ]
|
| 119 |
+
g = [0] * total
|
| 120 |
+
g[0] = 1 # only one way to stay at the origin
|
| 121 |
+
for i in range(1, total):
|
| 122 |
+
xi, yi, zi = points[i]
|
| 123 |
+
acc = 0
|
| 124 |
+
for j in range(i):
|
| 125 |
+
xj, yj, zj = points[j]
|
| 126 |
+
# check subset on all three coordinates
|
| 127 |
+
if (xj & xi) == xj and (yj & yi) == yj and (zj & zi) == zj:
|
| 128 |
+
bx = (xi ^ xj).bit_count()
|
| 129 |
+
by = (yi ^ yj).bit_count()
|
| 130 |
+
bz = (zi ^ zj).bit_count()
|
| 131 |
+
acc = (acc + g[j] * f[bx][by][bz]) % MOD
|
| 132 |
+
g[i] = (-acc) % MOD
|
| 133 |
+
|
| 134 |
+
# The answer is -g[last] mod MOD, which recovers the positive sum
|
| 135 |
+
self.parameter["reference_answer"] = (-g[-1]) % MOD
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _prompt_generate(self) -> str :
|
| 139 |
+
return self.prompt_template.format(
|
| 140 |
+
N = self.parameter["N"],
|
| 141 |
+
M = self.parameter["M"],
|
| 142 |
+
R = self.parameter["R"],
|
| 143 |
+
obstacles = "\n".join("({}, {}, {})".format(x, y, z) for x, y, z in self.parameter["obstacles"]),
|
| 144 |
+
MOD = self.parameter["MOD"],
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 149 |
+
if answer is not None :
|
| 150 |
+
answer = answer.strip()
|
| 151 |
+
try :
|
| 152 |
+
int_answer = int(answer)
|
| 153 |
+
return int_answer
|
| 154 |
+
except ValueError :
|
| 155 |
+
return None
|
| 156 |
+
else :
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def scorer(self, output : str) -> float :
|
| 161 |
+
processed_result = self.processor(output)
|
| 162 |
+
if processed_result is not None :
|
| 163 |
+
if not (0 <= processed_result < self.parameter["MOD"]) :
|
| 164 |
+
return self.rewards["wrong_range"]
|
| 165 |
+
if processed_result == self.parameter["reference_answer"] :
|
| 166 |
+
return self.rewards["correct_answer"]
|
| 167 |
+
else :
|
| 168 |
+
return self.rewards["wrong_answer"]
|
| 169 |
+
else :
|
| 170 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/banyan_heart/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BanyanHeart_Environment
|
server/Gym/environments/banyan_heart/environment.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import networkx
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from ...environment import VerifiableEnvironment
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BanyanHeart_Environment(VerifiableEnvironment) :
|
| 8 |
+
prompt_template = \
|
| 9 |
+
r"""We use the following process to generate a tree with {N} vertices labeled from 1 to {N}:
|
| 10 |
+
- Initially, the tree contains only vertex 1, and its **heart vertex** is also 1.
|
| 11 |
+
- At each step, we add a new vertex `i` (2 ≤ i ≤ {N}) and connect it to an existing vertex with an undirected edge. Then, the heart vertex moves one step toward `i` (i.e., it moves to the neighbor that is closer to `i`).
|
| 12 |
+
- This process continues until all {N} vertices have been added.
|
| 13 |
+
|
| 14 |
+
The final tree has the following edges:
|
| 15 |
+
{edges}
|
| 16 |
+
|
| 17 |
+
Can you determine which vertices could be the heart vertex after the process is completed? Output a single line with {N} characters (either `T` or `F`) without separators, where the i-th character is `T` if vertex i can be the heart vertex, and `F` otherwise."""
|
| 18 |
+
|
| 19 |
+
def __init__(self,
|
| 20 |
+
wrong_format : float = -1.0, rewarding_strategy : str = "(intersection/union)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0,
|
| 21 |
+
**kwargs) :
|
| 22 |
+
"""
|
| 23 |
+
Initialize the BanyanHeart_Environment instance.
|
| 24 |
+
"""
|
| 25 |
+
super().__init__(**kwargs)
|
| 26 |
+
|
| 27 |
+
self.rewards = {
|
| 28 |
+
"wrong_format" : wrong_format,
|
| 29 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 30 |
+
"rewarding_beta" : rewarding_beta,
|
| 31 |
+
"rewarding_weight" : rewarding_weight,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _generate(self) -> None :
|
| 36 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 37 |
+
N = self.parameter["N"]
|
| 38 |
+
assert N >= 4, "N should be greater than or equal to 4"
|
| 39 |
+
|
| 40 |
+
edges = self.parameter["edges"] = []
|
| 41 |
+
permutations = list(range(1, N + 1))
|
| 42 |
+
random.shuffle(permutations)
|
| 43 |
+
for index, vertex in enumerate(permutations) :
|
| 44 |
+
if index == 0 :
|
| 45 |
+
continue
|
| 46 |
+
u, v = vertex, random.choice(permutations[: index])
|
| 47 |
+
u, v = min(u, v), max(u, v)
|
| 48 |
+
edges.append((u, v))
|
| 49 |
+
random.shuffle(edges)
|
| 50 |
+
|
| 51 |
+
for u, v in edges :
|
| 52 |
+
assert 1 <= u < v <= N
|
| 53 |
+
assert len(edges) == len(set(edges)) == N - 1
|
| 54 |
+
|
| 55 |
+
tree = networkx.Graph()
|
| 56 |
+
tree.add_edges_from(edges)
|
| 57 |
+
assert networkx.is_tree(tree)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# Build adjacency list dynamically
|
| 61 |
+
adjacency = [[] for _ in range(N + 1)]
|
| 62 |
+
for u, v in edges:
|
| 63 |
+
adjacency[u].append(v)
|
| 64 |
+
adjacency[v].append(u)
|
| 65 |
+
|
| 66 |
+
# Arrays (1..N); index 0 acts as a dummy node
|
| 67 |
+
dep = [0] * (N + 1)
|
| 68 |
+
siz = [0] * (N + 1)
|
| 69 |
+
hson = [0] * (N + 1)
|
| 70 |
+
hson2 = [0] * (N + 1)
|
| 71 |
+
f = [0] * (N + 1)
|
| 72 |
+
ans = [False] * (N + 1)
|
| 73 |
+
|
| 74 |
+
# cmp function: return the index with larger siz
|
| 75 |
+
def cmp(x, y):
|
| 76 |
+
return x if siz[x] > siz[y] else y
|
| 77 |
+
|
| 78 |
+
# Iterative dfs1: compute dep, siz, hson, hson2, f
|
| 79 |
+
stack = [(1, 0, 0)] # (u, parent, state) state 0=enter, 1=exit
|
| 80 |
+
dep[0] = 0
|
| 81 |
+
while stack:
|
| 82 |
+
u, fa, state = stack.pop()
|
| 83 |
+
if state == 0:
|
| 84 |
+
dep[u] = dep[fa] + 1
|
| 85 |
+
stack.append((u, fa, 1))
|
| 86 |
+
for v in adjacency[u]:
|
| 87 |
+
if v == fa:
|
| 88 |
+
continue
|
| 89 |
+
stack.append((v, u, 0))
|
| 90 |
+
else:
|
| 91 |
+
# post-order processing
|
| 92 |
+
s = 1
|
| 93 |
+
h1 = 0
|
| 94 |
+
h2 = 0
|
| 95 |
+
for v in adjacency[u]:
|
| 96 |
+
if v == fa:
|
| 97 |
+
continue
|
| 98 |
+
s += siz[v]
|
| 99 |
+
if siz[v] > siz[h1]:
|
| 100 |
+
h2 = h1
|
| 101 |
+
h1 = v
|
| 102 |
+
elif siz[v] > siz[h2]:
|
| 103 |
+
h2 = v
|
| 104 |
+
siz[u] = s
|
| 105 |
+
hson[u] = h1
|
| 106 |
+
hson2[u] = h2
|
| 107 |
+
|
| 108 |
+
if f[h1] <= (siz[u] - 1 - siz[h1]):
|
| 109 |
+
fv = (siz[u] - 1) % 2
|
| 110 |
+
else:
|
| 111 |
+
fv = f[h1] - (siz[u] - 1 - siz[h1])
|
| 112 |
+
f[u] = fv + 1
|
| 113 |
+
|
| 114 |
+
# Iterative dfs2: compute ans
|
| 115 |
+
stack = [(1, 0, 0)] # (u, parent, h)
|
| 116 |
+
while stack:
|
| 117 |
+
u, fa, h = stack.pop()
|
| 118 |
+
tmp = cmp(hson[u], h)
|
| 119 |
+
if f[tmp] <= N - dep[u] - siz[tmp]:
|
| 120 |
+
ans[u] = ((N & 1) == (dep[u] & 1))
|
| 121 |
+
for v in adjacency[u]:
|
| 122 |
+
if v == fa:
|
| 123 |
+
continue
|
| 124 |
+
if v == hson[u]:
|
| 125 |
+
h_child = cmp(hson2[u], h)
|
| 126 |
+
else:
|
| 127 |
+
h_child = cmp(hson[u], h)
|
| 128 |
+
stack.append((v, u, h_child))
|
| 129 |
+
|
| 130 |
+
self.parameter["reference_answer"] = "".join("T" if ans[i] else "F" for i in range(1, N + 1))
|
| 131 |
+
assert "T" in self.parameter["reference_answer"], "At least one vertex should be able to be the heart vertex"
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _prompt_generate(self) -> str :
|
| 135 |
+
return self.prompt_template.format(
|
| 136 |
+
N = self.parameter["N"],
|
| 137 |
+
edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]),
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def _process(self, answer : Optional[str]) -> Optional[str] :
|
| 142 |
+
if answer is not None :
|
| 143 |
+
answer = answer.strip()
|
| 144 |
+
if not(len(answer) == self.parameter["N"] and all(c in "TF" for c in answer)) :
|
| 145 |
+
return None
|
| 146 |
+
return answer
|
| 147 |
+
else :
|
| 148 |
+
return None
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def scorer(self, output : str) -> float :
|
| 152 |
+
processed_result = self.processor(output)
|
| 153 |
+
if processed_result is not None :
|
| 154 |
+
intersection = sum((a == "T" and b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"]))
|
| 155 |
+
union = sum((a == "T" or b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"]))
|
| 156 |
+
assert intersection <= union, "intersection should not exceed union"
|
| 157 |
+
|
| 158 |
+
if self.rewards["rewarding_strategy"] == "(intersection/union)^beta" :
|
| 159 |
+
return ((intersection / union) ** self.rewards["rewarding_beta"]) * self.rewards["rewarding_weight"]
|
| 160 |
+
elif self.rewards["rewarding_strategy"] == "intersection=union" :
|
| 161 |
+
return self.rewards["rewarding_weight"] * (intersection == union)
|
| 162 |
+
else :
|
| 163 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 164 |
+
else :
|
| 165 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/bez_minimalist_security/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BEZMinimalistSecurity_Environment
|
server/Gym/environments/bez_minimalist_security/environment.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BEZMinimalistSecurity_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3544
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""There is an array P of length {N}. Initially, P is: {P}
|
| 9 |
+
|
| 10 |
+
Now we want to construct a new array P' of length {N}, where 0 <= P'[i] <= P[i] for all i. Additionally, there are some constraints of the form P'[u] + P'[v] = w, where u and v are indices and w is a constant (it is guaranteed that P[u] + P[v] >= w). The constraints are:
|
| 11 |
+
{constraints}
|
| 12 |
+
|
| 13 |
+
Please output P'[0], P'[1], ..., P'[{N_minus_1}], separated by spaces, such that they satisfy all the constraints and their sum is {minimized_or_maximized}."""
|
| 14 |
+
|
| 15 |
+
def __init__(self,
|
| 16 |
+
wrong_format : float = -1.0, invalid_solution : float = -0.5,
|
| 17 |
+
rewarding_strategy_min : str = "(gold/answer)^beta", rewarding_weight_min : float = +1.0, rewarding_beta_min : float = 5.0,
|
| 18 |
+
rewarding_strategy_max : str = "(answer/gold)^beta", rewarding_weight_max : float = +1.0, rewarding_beta_max : float = 5.0,
|
| 19 |
+
**kwargs) :
|
| 20 |
+
"""
|
| 21 |
+
Initialize the BEZMinimalistSecurity_Environment instance.
|
| 22 |
+
"""
|
| 23 |
+
super().__init__(**kwargs)
|
| 24 |
+
|
| 25 |
+
self.rewards = {
|
| 26 |
+
"wrong_format" : wrong_format,
|
| 27 |
+
"invalid_solution" : invalid_solution,
|
| 28 |
+
"rewarding_strategy_max" : rewarding_strategy_max,
|
| 29 |
+
"rewarding_weight_max" : rewarding_weight_max,
|
| 30 |
+
"rewarding_beta_max" : rewarding_beta_max,
|
| 31 |
+
"rewarding_strategy_min" : rewarding_strategy_min,
|
| 32 |
+
"rewarding_weight_min" : rewarding_weight_min,
|
| 33 |
+
"rewarding_beta_min" : rewarding_beta_min,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _generate(self) -> None :
|
| 38 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 39 |
+
N = self.parameter["N"]
|
| 40 |
+
assert N >= 3, "N should be at least 3"
|
| 41 |
+
|
| 42 |
+
P_prime = [random.randint(0, N) for _ in range(N)]
|
| 43 |
+
|
| 44 |
+
assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter"
|
| 45 |
+
edge_ratio = self.parameter["edge_ratio"]
|
| 46 |
+
edges = self.parameter["edges"] = random.sample([(u, v, P_prime[u] + P_prime[v]) for u in range(N) for v in range(u + 1, N)], max(1, min(N * (N - 1) // 2, int(edge_ratio * N))))
|
| 47 |
+
random.shuffle(edges)
|
| 48 |
+
for u, v, w in edges :
|
| 49 |
+
assert 0 <= u < v < N
|
| 50 |
+
assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique"
|
| 51 |
+
|
| 52 |
+
P = self.parameter["P"] = [P_prime_u + random.randint(0, N) for P_prime_u in P_prime]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# Build adjacency list (0-indexed)
|
| 56 |
+
adjacency = [[] for _ in range(N)]
|
| 57 |
+
for u, v, w in edges:
|
| 58 |
+
adjacency[u].append((v, w))
|
| 59 |
+
adjacency[v].append((u, w))
|
| 60 |
+
|
| 61 |
+
vis = [False] * N
|
| 62 |
+
sgn = [0] * N
|
| 63 |
+
cons = [0] * N
|
| 64 |
+
q = [0] * N
|
| 65 |
+
mn = 0
|
| 66 |
+
mx = 0
|
| 67 |
+
|
| 68 |
+
def wa() :
|
| 69 |
+
assert False, "Invalid solution"
|
| 70 |
+
|
| 71 |
+
def dfs(u): # Depth-first search on component
|
| 72 |
+
nonlocal fix
|
| 73 |
+
vis[u] = True
|
| 74 |
+
stc.append(u)
|
| 75 |
+
# Early exit if constraint too large
|
| 76 |
+
if cons[u] > 10**6:
|
| 77 |
+
wa()
|
| 78 |
+
for v, w in adjacency[u]:
|
| 79 |
+
if not vis[v]:
|
| 80 |
+
sgn[v] = -sgn[u]
|
| 81 |
+
cons[v] = w - cons[u]
|
| 82 |
+
dfs(v)
|
| 83 |
+
else:
|
| 84 |
+
if sgn[u] == sgn[v]:
|
| 85 |
+
res = w - cons[u] - cons[v]
|
| 86 |
+
# Must be even
|
| 87 |
+
if res & 1:
|
| 88 |
+
wa()
|
| 89 |
+
denom = 2 * sgn[u]
|
| 90 |
+
res //= denom
|
| 91 |
+
# Check valid fixed value
|
| 92 |
+
if res < 0 or res > P[anc] or (fix is not None and fix != res):
|
| 93 |
+
wa()
|
| 94 |
+
fix = res
|
| 95 |
+
else:
|
| 96 |
+
# Sum of constants must match
|
| 97 |
+
if cons[u] + cons[v] != w:
|
| 98 |
+
wa()
|
| 99 |
+
|
| 100 |
+
# Process each connected component
|
| 101 |
+
for i in range(N):
|
| 102 |
+
if not vis[i]:
|
| 103 |
+
stc = [] # nodes in current component
|
| 104 |
+
anc = i # anchor node for fixed value range
|
| 105 |
+
fix = None # fixed solution parameter
|
| 106 |
+
sgn[i] = 1 # sign for anchor
|
| 107 |
+
cons[i] = 0 # constant offset for anchor
|
| 108 |
+
dfs(i)
|
| 109 |
+
|
| 110 |
+
if fix is not None:
|
| 111 |
+
# Unique solution determined by `fix`
|
| 112 |
+
for u in stc:
|
| 113 |
+
q[u] = sgn[u] * fix + cons[u]
|
| 114 |
+
delta = P[u] - q[u]
|
| 115 |
+
mn += delta
|
| 116 |
+
mx += delta
|
| 117 |
+
if q[u] < 0 or q[u] > P[u]:
|
| 118 |
+
wa()
|
| 119 |
+
# Verify edges
|
| 120 |
+
for u in stc:
|
| 121 |
+
for v, w in adjacency[u]:
|
| 122 |
+
if q[u] + q[v] != w:
|
| 123 |
+
wa()
|
| 124 |
+
else:
|
| 125 |
+
# Range of valid `fix` values [l, r]
|
| 126 |
+
l, r = 0, P[anc]
|
| 127 |
+
for u in stc:
|
| 128 |
+
if sgn[u] == 1:
|
| 129 |
+
l = max(l, -cons[u])
|
| 130 |
+
r = min(r, P[u] - cons[u])
|
| 131 |
+
else:
|
| 132 |
+
l = max(l, cons[u] - P[u])
|
| 133 |
+
r = min(r, cons[u])
|
| 134 |
+
if l > r:
|
| 135 |
+
wa()
|
| 136 |
+
# Compute sum of reductions for minimal `fix = l`
|
| 137 |
+
base_sum = 0
|
| 138 |
+
tsign = 0
|
| 139 |
+
for u in stc:
|
| 140 |
+
base_sum += P[u] - (l * sgn[u] + cons[u])
|
| 141 |
+
tsign -= sgn[u]
|
| 142 |
+
# Depending on tsign, extremes at l or r
|
| 143 |
+
if tsign > 0:
|
| 144 |
+
mx += base_sum + tsign * (r - l)
|
| 145 |
+
mn += base_sum
|
| 146 |
+
else:
|
| 147 |
+
mx += base_sum
|
| 148 |
+
mn += base_sum + tsign * (r - l)
|
| 149 |
+
|
| 150 |
+
self.parameter["minimized_or_maximized"] = random.choice(["minimized", "maximized"])
|
| 151 |
+
if self.parameter["minimized_or_maximized"] == "minimized" :
|
| 152 |
+
self.parameter["gold_answer"] = sum(P) - mx
|
| 153 |
+
elif self.parameter["minimized_or_maximized"] == "maximized" :
|
| 154 |
+
self.parameter["gold_answer"] = sum(P) - mn
|
| 155 |
+
else :
|
| 156 |
+
raise ValueError("minimized_or_maximized should be either 'minimized' or 'maximized'")
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _prompt_generate(self) -> str :
|
| 160 |
+
N = self.parameter["N"]
|
| 161 |
+
return self.prompt_template.format(
|
| 162 |
+
N = N,
|
| 163 |
+
N_minus_1 = N - 1,
|
| 164 |
+
P = " ".join("P[{}]={}".format(i, P_i) for i, P_i in enumerate(self.parameter["P"])),
|
| 165 |
+
constraints = "\n".join("P'[{}] + P'[{}] = {}".format(u, v, w) for u, v, w in self.parameter["edges"]),
|
| 166 |
+
minimized_or_maximized = self.parameter["minimized_or_maximized"],
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 171 |
+
if answer is not None :
|
| 172 |
+
answer = answer.strip()
|
| 173 |
+
try :
|
| 174 |
+
answer_array = list(map(int, answer.split()))
|
| 175 |
+
return answer_array
|
| 176 |
+
except ValueError :
|
| 177 |
+
return None # Invalid answer format
|
| 178 |
+
else :
|
| 179 |
+
return None # Invalid answer format
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def scorer(self, output : str) -> float :
|
| 183 |
+
processed_result = self.processor(output)
|
| 184 |
+
if processed_result is not None :
|
| 185 |
+
assert isinstance(processed_result, list), "processed_result should be a list"
|
| 186 |
+
|
| 187 |
+
P_prime = processed_result
|
| 188 |
+
if len(P_prime) != self.parameter["N"] :
|
| 189 |
+
return self.rewards["invalid_solution"]
|
| 190 |
+
if not all(0 <= P_prime_u <= P_u for P_prime_u, P_u in zip(P_prime, self.parameter["P"])) :
|
| 191 |
+
return self.rewards["invalid_solution"]
|
| 192 |
+
if not all(P_prime[u] + P_prime[v] == w for u, v, w in self.parameter["edges"]) :
|
| 193 |
+
return self.rewards["invalid_solution"]
|
| 194 |
+
|
| 195 |
+
gold, answer = self.parameter["gold_answer"], sum(P_prime)
|
| 196 |
+
if self.parameter["minimized_or_maximized"] == "minimized" :
|
| 197 |
+
assert 0 <= gold <= answer, "For minimization, answer should be greater than 0 and at least as large as the gold answer"
|
| 198 |
+
if self.rewards["rewarding_strategy_min"] == "(gold/answer)^beta" :
|
| 199 |
+
if answer == 0 :
|
| 200 |
+
assert gold == 0, "If answer is 0, gold should also be 0"
|
| 201 |
+
return self.rewards["rewarding_weight_min"] * 1.0
|
| 202 |
+
return self.rewards["rewarding_weight_min"] * ((gold / answer) ** self.rewards["rewarding_beta_min"])
|
| 203 |
+
elif self.rewards["rewarding_strategy_min"] == "gold=answer" :
|
| 204 |
+
return self.rewards["rewarding_weight_min"] * (gold == answer)
|
| 205 |
+
else :
|
| 206 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_min"]))
|
| 207 |
+
elif self.parameter["minimized_or_maximized"] == "maximized" :
|
| 208 |
+
assert 0 <= answer <= gold, "For maximization, answer should be greater than 0 and at most as large as the gold answer"
|
| 209 |
+
if self.rewards["rewarding_strategy_max"] == "(answer/gold)^beta" :
|
| 210 |
+
if gold == 0 :
|
| 211 |
+
assert answer == 0, "If gold is 0, answer should also be 0"
|
| 212 |
+
return self.rewards["rewarding_weight_max"] * 1.0
|
| 213 |
+
return self.rewards["rewarding_weight_max"] * ((answer / gold) ** self.rewards["rewarding_beta_max"])
|
| 214 |
+
elif self.rewards["rewarding_strategy_max"] == "gold=answer" :
|
| 215 |
+
return self.rewards["rewarding_weight_max"] * (gold == answer)
|
| 216 |
+
else :
|
| 217 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_max"]))
|
| 218 |
+
else :
|
| 219 |
+
assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'"
|
| 220 |
+
else :
|
| 221 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/bezout_identity/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BezoutIdentity_Environment
|
server/Gym/environments/bezout_identity/environment.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import random
|
| 3 |
+
from typing import Optional, List
|
| 4 |
+
from ...environment import VerifiableEnvironment
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BezoutIdentity_Environment(VerifiableEnvironment) :
|
| 8 |
+
prompt_template = \
|
| 9 |
+
r"""You are given an array of length {N}, denoted as A[1], ..., A[{N}]. Please find **integers** X[1], ..., X[{N}] such that the value of S = A[1] * X[1] + ... + A[{N}] * X[{N}] satisfies the condition: **S > 0**. Try your best to **minimize the value of S** while meeting this condition.
|
| 10 |
+
|
| 11 |
+
A: {A}
|
| 12 |
+
|
| 13 |
+
**Output Format:** Output a single line containing X[1], ..., X[{N}], separated by spaces."""
|
| 14 |
+
|
| 15 |
+
def __init__(self,
|
| 16 |
+
wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
|
| 17 |
+
**kwargs) :
|
| 18 |
+
"""
|
| 19 |
+
Initialize the BezoutIdentity_Environment instance.
|
| 20 |
+
"""
|
| 21 |
+
super().__init__(**kwargs)
|
| 22 |
+
|
| 23 |
+
self.rewards = {
|
| 24 |
+
"wrong_format" : wrong_format,
|
| 25 |
+
"invalid_solution" : invalid_solution,
|
| 26 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 27 |
+
"rewarding_weight" : rewarding_weight,
|
| 28 |
+
"rewarding_beta" : rewarding_beta,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _generate(self) -> None :
|
| 33 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 34 |
+
N = self.parameter["N"]
|
| 35 |
+
assert N >= 2, "N should be greater than or equal to 2"
|
| 36 |
+
|
| 37 |
+
assert "MAX_A" in self.parameter, "MAX_A is required in parameter"
|
| 38 |
+
MAX_A = self.parameter["MAX_A"]
|
| 39 |
+
assert MAX_A >= 2, "MAX_A should be greater than or equal to 2"
|
| 40 |
+
|
| 41 |
+
self.parameter["A"] = A = []
|
| 42 |
+
for _ in range(N) :
|
| 43 |
+
picked_a, best_counting = None, -1
|
| 44 |
+
for try_step in range(1024) :
|
| 45 |
+
current_a = random.randint(2, MAX_A)
|
| 46 |
+
counting = sum(int(math.gcd(current_a, _a) > 1) for _a in A)
|
| 47 |
+
if counting > best_counting :
|
| 48 |
+
best_counting, picked_a = counting, current_a
|
| 49 |
+
if best_counting == len(A) :
|
| 50 |
+
break
|
| 51 |
+
if random.random() < 0.5 :
|
| 52 |
+
picked_a = -picked_a
|
| 53 |
+
A.append(picked_a)
|
| 54 |
+
random.shuffle(A)
|
| 55 |
+
assert len(A) == N, "The length of A should be equal to N"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def exgcd(a, b):
|
| 59 |
+
"""
|
| 60 |
+
Returns (g, x, y) such that
|
| 61 |
+
g = gcd(a, b)
|
| 62 |
+
a*x + b*y = g
|
| 63 |
+
Ensures g >= 0.
|
| 64 |
+
"""
|
| 65 |
+
if b == 0:
|
| 66 |
+
return (abs(a), 1 if a >= 0 else -1, 0)
|
| 67 |
+
g, x1, y1 = exgcd(b, a % b)
|
| 68 |
+
# b*x1 + (a%b)*y1 = g
|
| 69 |
+
# a%b = a - (a//b)*b
|
| 70 |
+
x = y1
|
| 71 |
+
y = x1 - (a // b) * y1
|
| 72 |
+
return (g, x, y)
|
| 73 |
+
|
| 74 |
+
# initialize with A[0]
|
| 75 |
+
g = abs(A[0])
|
| 76 |
+
X = [0] * N
|
| 77 |
+
X[0] = 1 if A[0] >= 0 else -1
|
| 78 |
+
|
| 79 |
+
# incorporate each A[i]
|
| 80 |
+
for i in range(1, N):
|
| 81 |
+
ai = A[i]
|
| 82 |
+
g2, u, v = exgcd(g, ai)
|
| 83 |
+
# scale previous coefficients by u
|
| 84 |
+
for j in range(i):
|
| 85 |
+
X[j] *= u
|
| 86 |
+
# coefficient for A[i] is v
|
| 87 |
+
X[i] = v
|
| 88 |
+
g = g2
|
| 89 |
+
|
| 90 |
+
S = sum(x * a for x, a in zip(X, A))
|
| 91 |
+
assert S == g
|
| 92 |
+
assert S > 0, "The sum S must be greater than 0"
|
| 93 |
+
self.parameter["reference_answer"] = " ".join(map(str, X))
|
| 94 |
+
self.parameter["gold_answer"] = S
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _prompt_generate(self) -> str :
|
| 98 |
+
return self.prompt_template.format(
|
| 99 |
+
N = self.parameter["N"],
|
| 100 |
+
A = ", ".join(map(str, self.parameter["A"])),
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 104 |
+
if answer is not None :
|
| 105 |
+
answer = answer.strip()
|
| 106 |
+
try :
|
| 107 |
+
answer_array = list(map(int, answer.split()))
|
| 108 |
+
return answer_array
|
| 109 |
+
except ValueError :
|
| 110 |
+
return None # Invalid answer format
|
| 111 |
+
else :
|
| 112 |
+
return None # Invalid answer format
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def scorer(self, output : str) -> float :
|
| 116 |
+
processed_result = self.processor(output)
|
| 117 |
+
if processed_result is not None :
|
| 118 |
+
assert isinstance(processed_result, list), "processed_result should be a list"
|
| 119 |
+
|
| 120 |
+
if len(processed_result) != self.parameter["N"] :
|
| 121 |
+
return self.rewards["invalid_solution"]
|
| 122 |
+
S = sum(x * a for x, a in zip(processed_result, self.parameter["A"]))
|
| 123 |
+
if S <= 0 :
|
| 124 |
+
return self.rewards["invalid_solution"]
|
| 125 |
+
assert self.parameter["gold_answer"] <= S, "The computed sum S must be greater than or equal to the gold answer"
|
| 126 |
+
|
| 127 |
+
if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
|
| 128 |
+
return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / S) ** self.rewards["rewarding_beta"])
|
| 129 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 130 |
+
return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == S)
|
| 131 |
+
else :
|
| 132 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 133 |
+
else :
|
| 134 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/binario/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import Binario_Environment
|
server/Gym/environments/binario/environment.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Binario_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given a {N} × {M} matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that:
|
| 9 |
+
1. The number of `1`s in each row (from top to bottom) is: {row_counts}.
|
| 10 |
+
2. The number of `1`s in each column (from left to right) is: {col_counts}.
|
| 11 |
+
3. No more than two consecutive cells in a row or column can contain the same number.
|
| 12 |
+
|
| 13 |
+
The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*':
|
| 14 |
+
{matrix}
|
| 15 |
+
|
| 16 |
+
**Output Format:** Output {N} lines, each containing {M} characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators)."""
|
| 17 |
+
|
| 18 |
+
def __init__(self,
|
| 19 |
+
wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0,
|
| 20 |
+
**kwargs) :
|
| 21 |
+
"""
|
| 22 |
+
Initialize the Binario_Environment instance.
|
| 23 |
+
"""
|
| 24 |
+
super().__init__(**kwargs)
|
| 25 |
+
|
| 26 |
+
self.rewards = {
|
| 27 |
+
"wrong_format" : wrong_format,
|
| 28 |
+
"invalid_solution" : invalid_solution,
|
| 29 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 30 |
+
"rewarding_weight" : rewarding_weight,
|
| 31 |
+
"rewarding_beta" : rewarding_beta,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _generate(self) -> None :
|
| 36 |
+
assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
|
| 37 |
+
MAX_N_M = self.parameter["MAX_N_M"]
|
| 38 |
+
assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
|
| 39 |
+
|
| 40 |
+
N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M)
|
| 41 |
+
|
| 42 |
+
def generate_matrix(N, M):
|
| 43 |
+
# Initialize the grid with None
|
| 44 |
+
grid = [[None] * M for _ in range(N)]
|
| 45 |
+
|
| 46 |
+
all_cells = [(i, j) for i in range(N) for j in range(M)]
|
| 47 |
+
random.shuffle(all_cells) # Shuffle to ensure randomness in placement
|
| 48 |
+
|
| 49 |
+
backtrack_counting = 0
|
| 50 |
+
|
| 51 |
+
def backtrack(idx):
|
| 52 |
+
# If we've filled past the last row, we're done
|
| 53 |
+
if idx == len(all_cells):
|
| 54 |
+
return True
|
| 55 |
+
i, j = all_cells[idx]
|
| 56 |
+
|
| 57 |
+
nonlocal backtrack_counting
|
| 58 |
+
backtrack_counting += 1
|
| 59 |
+
if backtrack_counting > 10000000:
|
| 60 |
+
return False
|
| 61 |
+
|
| 62 |
+
# Try placing 0 or 1 in random order
|
| 63 |
+
for v in random.sample(["0", "1"], 2):
|
| 64 |
+
# Check adjacency constraints in row (no three in a row)
|
| 65 |
+
if j >= 2 and grid[i][j-1] == v and grid[i][j-2] == v:
|
| 66 |
+
continue
|
| 67 |
+
if j >= 1 and j + 1 < M and grid[i][j-1] == v and grid[i][j+1] == v:
|
| 68 |
+
continue
|
| 69 |
+
if j + 2 < M and grid[i][j+1] == v and grid[i][j+2] == v:
|
| 70 |
+
continue
|
| 71 |
+
|
| 72 |
+
# Check adjacency constraints in column
|
| 73 |
+
if i >= 2 and grid[i-1][j] == v and grid[i-2][j] == v:
|
| 74 |
+
continue
|
| 75 |
+
if i >= 1 and i + 1 < N and grid[i-1][j] == v and grid[i+1][j] == v:
|
| 76 |
+
continue
|
| 77 |
+
if i + 2 < N and grid[i+1][j] == v and grid[i+2][j] == v:
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
# Place v
|
| 81 |
+
grid[i][j] = v
|
| 82 |
+
|
| 83 |
+
# Recurse
|
| 84 |
+
if backtrack(idx + 1):
|
| 85 |
+
return True
|
| 86 |
+
|
| 87 |
+
grid[i][j] = None
|
| 88 |
+
|
| 89 |
+
# No valid value at (i, j): backtrack
|
| 90 |
+
return False
|
| 91 |
+
|
| 92 |
+
return grid if backtrack(0) else None
|
| 93 |
+
|
| 94 |
+
matrix = generate_matrix(N, M)
|
| 95 |
+
if matrix is None :
|
| 96 |
+
self.parameter = None
|
| 97 |
+
return
|
| 98 |
+
self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix)
|
| 99 |
+
|
| 100 |
+
self.parameter["row_counts"] = [sum(int(cell == "1") for cell in row) for row in matrix]
|
| 101 |
+
self.parameter["col_counts"] = [sum(int(matrix[i][j] == "1") for i in range(N)) for j in range(M)]
|
| 102 |
+
|
| 103 |
+
assert "sparsity" in self.parameter, "sparsity is required in parameter"
|
| 104 |
+
sparsity = self.parameter["sparsity"]
|
| 105 |
+
assert 0 < sparsity < 1, "sparsity should be between 0 and 1"
|
| 106 |
+
empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity)))
|
| 107 |
+
for cell in empty_cells :
|
| 108 |
+
row, column = divmod(cell, M)
|
| 109 |
+
matrix[row][column] = '*'
|
| 110 |
+
self.parameter["matrix"] = ["".join(row) for row in matrix]
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def _prompt_generate(self) -> str :
|
| 114 |
+
return self.prompt_template.format(
|
| 115 |
+
N = self.parameter["N"],
|
| 116 |
+
M = self.parameter["M"],
|
| 117 |
+
matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]),
|
| 118 |
+
row_counts = ", ".join(map(str, self.parameter["row_counts"])),
|
| 119 |
+
col_counts = ", ".join(map(str, self.parameter["col_counts"])),
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 124 |
+
if answer is not None :
|
| 125 |
+
answer = answer.strip()
|
| 126 |
+
try :
|
| 127 |
+
matrix = []
|
| 128 |
+
for line in answer.splitlines() :
|
| 129 |
+
line = line.strip()
|
| 130 |
+
if line :
|
| 131 |
+
matrix.append(line.strip())
|
| 132 |
+
return matrix
|
| 133 |
+
except ValueError :
|
| 134 |
+
return None
|
| 135 |
+
else :
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def scorer(self, output : str) -> float :
|
| 140 |
+
processed_result = self.processor(output)
|
| 141 |
+
if processed_result is not None :
|
| 142 |
+
assert isinstance(processed_result, list), "processed_result should be a list"
|
| 143 |
+
|
| 144 |
+
N, M = self.parameter["N"], self.parameter["M"]
|
| 145 |
+
solution = processed_result
|
| 146 |
+
|
| 147 |
+
if len(solution) != N or any(len(row) != M for row in solution) :
|
| 148 |
+
return self.rewards["wrong_format"]
|
| 149 |
+
for row in solution :
|
| 150 |
+
if not all(c in "01" for c in row) :
|
| 151 |
+
return self.rewards["wrong_format"]
|
| 152 |
+
|
| 153 |
+
for row, original_row in zip(solution, self.parameter["matrix"]) :
|
| 154 |
+
for cell, original_cell in zip(row, original_row) :
|
| 155 |
+
if original_cell != '*' and cell != original_cell :
|
| 156 |
+
assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0')
|
| 157 |
+
return self.rewards["invalid_solution"]
|
| 158 |
+
|
| 159 |
+
delta = [
|
| 160 |
+
(+1, 0),
|
| 161 |
+
(-1, 0),
|
| 162 |
+
(0, +1),
|
| 163 |
+
(0, -1),
|
| 164 |
+
]
|
| 165 |
+
for i in range(N) :
|
| 166 |
+
for j in range(M) :
|
| 167 |
+
for di, dj in delta :
|
| 168 |
+
ni, nj = i + di, j + dj
|
| 169 |
+
nni, nnj = i + 2 * di, j + 2 * dj
|
| 170 |
+
if 0 <= ni < N and 0 <= nj < M and 0 <= nni < N and 0 <= nnj < M :
|
| 171 |
+
if solution[i][j] == solution[ni][nj] == solution[nni][nnj] :
|
| 172 |
+
return self.rewards["invalid_solution"]
|
| 173 |
+
|
| 174 |
+
row_counts = [sum(int(cell == "1") for cell in row) for row in solution]
|
| 175 |
+
col_counts = [sum(int(solution[i][j] == "1") for i in range(N)) for j in range(M)]
|
| 176 |
+
|
| 177 |
+
satisfied = sum(int(answer == gold) for answer, gold in zip(row_counts, self.parameter["row_counts"])) + \
|
| 178 |
+
sum(int(answer == gold) for answer, gold in zip(col_counts, self.parameter["col_counts"]))
|
| 179 |
+
assert satisfied <= N + M, "satisfied should not exceed N + M"
|
| 180 |
+
|
| 181 |
+
if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" :
|
| 182 |
+
return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"])
|
| 183 |
+
elif self.rewards["rewarding_strategy"] == "satisfied=all" :
|
| 184 |
+
return self.rewards["rewarding_weight"] * (satisfied == (N + M))
|
| 185 |
+
else :
|
| 186 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 187 |
+
else :
|
| 188 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/binario_no_adjacency_requirement/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import Binario_NoAdjacencyRequirement_Environment
|
server/Gym/environments/binario_no_adjacency_requirement/environment.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Binario_NoAdjacencyRequirement_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given a (2 × {N}) × (2 × {M}) matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that:
|
| 9 |
+
1. Each **row** contains exactly {M} '0's and {M} '1's.
|
| 10 |
+
2. Each **column** contains exactly {N} '0's and {N} '1's.
|
| 11 |
+
|
| 12 |
+
The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*':
|
| 13 |
+
{matrix}
|
| 14 |
+
|
| 15 |
+
**Output Format:** Output (2 × {N}) lines, each containing (2 × {M}) characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators)."""
|
| 16 |
+
|
| 17 |
+
def __init__(self,
|
| 18 |
+
wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0,
|
| 19 |
+
**kwargs) :
|
| 20 |
+
"""
|
| 21 |
+
Initialize the Binario_Environment instance.
|
| 22 |
+
"""
|
| 23 |
+
super().__init__(**kwargs)
|
| 24 |
+
|
| 25 |
+
self.rewards = {
|
| 26 |
+
"wrong_format" : wrong_format,
|
| 27 |
+
"invalid_solution" : invalid_solution,
|
| 28 |
+
"wrong_solution" : wrong_solution,
|
| 29 |
+
"correct_solution" : correct_solution,
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _generate(self) -> None :
|
| 34 |
+
assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
|
| 35 |
+
MAX_N_M = self.parameter["MAX_N_M"]
|
| 36 |
+
assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
|
| 37 |
+
|
| 38 |
+
N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M)
|
| 39 |
+
|
| 40 |
+
row_permutation, col_permutation = list(range(2 * N)), list(range(2 * M))
|
| 41 |
+
random.shuffle(row_permutation)
|
| 42 |
+
random.shuffle(col_permutation)
|
| 43 |
+
|
| 44 |
+
matrix = [[str((row_permutation[i] + col_permutation[j]) % 2) for j in range(2 * M)] for i in range(2 * N)]
|
| 45 |
+
self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix)
|
| 46 |
+
|
| 47 |
+
assert "sparsity" in self.parameter, "sparsity is required in parameter"
|
| 48 |
+
sparsity = self.parameter["sparsity"]
|
| 49 |
+
assert 0 < sparsity < 1, "sparsity should be between 0 and 1"
|
| 50 |
+
empty_cells = random.sample(range((2 * N) * (2 * M)), max(1, int((2 * N) * (2 * M) * sparsity)))
|
| 51 |
+
for cell in empty_cells :
|
| 52 |
+
row, column = divmod(cell, 2 * M)
|
| 53 |
+
matrix[row][column] = '*'
|
| 54 |
+
self.parameter["matrix"] = ["".join(row) for row in matrix]
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _prompt_generate(self) -> str :
|
| 58 |
+
return self.prompt_template.format(
|
| 59 |
+
N = self.parameter["N"],
|
| 60 |
+
M = self.parameter["M"],
|
| 61 |
+
matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]),
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 66 |
+
if answer is not None :
|
| 67 |
+
answer = answer.strip()
|
| 68 |
+
try :
|
| 69 |
+
matrix = []
|
| 70 |
+
for line in answer.splitlines() :
|
| 71 |
+
line = line.strip()
|
| 72 |
+
if line :
|
| 73 |
+
matrix.append(line.strip())
|
| 74 |
+
return matrix
|
| 75 |
+
except ValueError :
|
| 76 |
+
return None
|
| 77 |
+
else :
|
| 78 |
+
return None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def scorer(self, output : str) -> float :
|
| 82 |
+
processed_result = self.processor(output)
|
| 83 |
+
if processed_result is not None :
|
| 84 |
+
assert isinstance(processed_result, list), "processed_result should be a list"
|
| 85 |
+
|
| 86 |
+
N, M = self.parameter["N"], self.parameter["M"]
|
| 87 |
+
solution = processed_result
|
| 88 |
+
|
| 89 |
+
if len(solution) != 2 * N or any(len(row) != 2 * M for row in solution) :
|
| 90 |
+
return self.rewards["wrong_format"]
|
| 91 |
+
for row in solution :
|
| 92 |
+
if not all(c in "01" for c in row) :
|
| 93 |
+
return self.rewards["wrong_format"]
|
| 94 |
+
|
| 95 |
+
for row, original_row in zip(solution, self.parameter["matrix"]) :
|
| 96 |
+
for cell, original_cell in zip(row, original_row) :
|
| 97 |
+
if original_cell != '*' and cell != original_cell :
|
| 98 |
+
assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0')
|
| 99 |
+
return self.rewards["invalid_solution"]
|
| 100 |
+
|
| 101 |
+
for i in range(2 * N) :
|
| 102 |
+
if solution[i].count('1') != solution[i].count('0') :
|
| 103 |
+
return self.rewards["wrong_solution"]
|
| 104 |
+
assert solution[i].count('1') == M, "Row {} does not have exactly {} ones".format(i, M)
|
| 105 |
+
assert solution[i].count('0') == M, "Row {} does not have exactly {} zeros".format(i, M)
|
| 106 |
+
for j in range(2 * M) :
|
| 107 |
+
if sum(solution[i][j] == '1' for i in range(2 * N)) != sum(solution[i][j] == '0' for i in range(2 * N)) :
|
| 108 |
+
return self.rewards["wrong_solution"]
|
| 109 |
+
assert sum(solution[i][j] == '1' for i in range(2 * N)) == N, "Column {} does not have exactly {} ones".format(j, N)
|
| 110 |
+
assert sum(solution[i][j] == '0' for i in range(2 * N)) == N, "Column {} does not have exactly {} zeros".format(j, N)
|
| 111 |
+
|
| 112 |
+
return self.rewards["correct_solution"]
|
| 113 |
+
else :
|
| 114 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/binary_alternation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BinaryAlternation_Environment
|
server/Gym/environments/binary_alternation/environment.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BinaryAlternation_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given a binary string of length {N}, consisting of `0`s and `1`s. It is 0-indexed: {string}
|
| 9 |
+
|
| 10 |
+
In one operation, you may **swap** the characters at indices `i` and `j` (0 ≤ i, j < {N}). Please transform the string into an **alternating binary string** (no two adjacent characters are the same) using the **minimum number of operations**.
|
| 11 |
+
|
| 12 |
+
**Output Format:** Each operation should be written on a single line in the format: `i j`, where `i` and `j` are the indices being swapped. Do **NOT** include backticks or quotes. Output one operation per line in the order they should be performed."""
|
| 13 |
+
|
| 14 |
+
def __init__(self,
|
| 15 |
+
wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
|
| 16 |
+
**kwargs) :
|
| 17 |
+
"""
|
| 18 |
+
Initialize the BinaryAlternation_Environment instance.
|
| 19 |
+
"""
|
| 20 |
+
super().__init__(**kwargs)
|
| 21 |
+
|
| 22 |
+
self.rewards = {
|
| 23 |
+
"wrong_format" : wrong_format,
|
| 24 |
+
"invalid_solution" : invalid_solution,
|
| 25 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 26 |
+
"rewarding_weight" : rewarding_weight,
|
| 27 |
+
"rewarding_beta" : rewarding_beta,
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _generate(self) -> None :
|
| 32 |
+
assert "zero_count" in self.parameter, "zero_count is required in parameter"
|
| 33 |
+
zero_count = self.parameter["zero_count"]
|
| 34 |
+
assert zero_count >= 2, "zero_count should be greater than or equal to 2"
|
| 35 |
+
|
| 36 |
+
one_count = random.randint(zero_count - 1, zero_count + 1)
|
| 37 |
+
|
| 38 |
+
string = ["0"] * zero_count + ["1"] * one_count
|
| 39 |
+
random.shuffle(string)
|
| 40 |
+
string = self.parameter["string"] = "".join(string)
|
| 41 |
+
|
| 42 |
+
self.parameter["reference_answer"] = None
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def compute(should : str) -> List[str] :
|
| 46 |
+
zero_to_one, one_to_zero = [], []
|
| 47 |
+
for i, now in enumerate(string) :
|
| 48 |
+
if now != should :
|
| 49 |
+
if now == "0" :
|
| 50 |
+
zero_to_one.append(i)
|
| 51 |
+
else :
|
| 52 |
+
one_to_zero.append(i)
|
| 53 |
+
should = "1" if should == "0" else "0"
|
| 54 |
+
assert len(zero_to_one) == len(one_to_zero), "zero_to_one and one_to_zero should have the same length"
|
| 55 |
+
solution = []
|
| 56 |
+
for i, j in zip(zero_to_one, one_to_zero) :
|
| 57 |
+
solution.append("{} {}".format(i, j))
|
| 58 |
+
return solution
|
| 59 |
+
|
| 60 |
+
if zero_count >= one_count :
|
| 61 |
+
self.parameter["reference_answer"] = compute("0")
|
| 62 |
+
if one_count >= zero_count :
|
| 63 |
+
candidate = compute("1")
|
| 64 |
+
if self.parameter["reference_answer"] is None or len(candidate) < len(self.parameter["reference_answer"]) :
|
| 65 |
+
self.parameter["reference_answer"] = candidate
|
| 66 |
+
self.parameter["gold_answer"] = len(self.parameter["reference_answer"])
|
| 67 |
+
self.parameter["reference_answer"] = "\n".join(self.parameter["reference_answer"])
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _prompt_generate(self) -> str :
|
| 71 |
+
string = self.parameter["string"]
|
| 72 |
+
return self.prompt_template.format(N = len(string), string = string)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _process(self, answer : Optional[str]) -> Optional[List] :
|
| 76 |
+
if answer is not None :
|
| 77 |
+
answer = answer.strip()
|
| 78 |
+
actions = []
|
| 79 |
+
for line in answer.splitlines() :
|
| 80 |
+
line = line.strip()
|
| 81 |
+
if line :
|
| 82 |
+
actions.append(line.split())
|
| 83 |
+
action = actions[-1]
|
| 84 |
+
if len(action) != 2 :
|
| 85 |
+
return None
|
| 86 |
+
try :
|
| 87 |
+
action[0] = int(action[0])
|
| 88 |
+
action[1] = int(action[1])
|
| 89 |
+
except ValueError :
|
| 90 |
+
return None
|
| 91 |
+
return actions
|
| 92 |
+
else :
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def scorer(self, output : str) -> float :
|
| 97 |
+
processed_result = self.processor(output)
|
| 98 |
+
if processed_result is not None :
|
| 99 |
+
string = list(self.parameter["string"])
|
| 100 |
+
for i, j in processed_result :
|
| 101 |
+
if not (0 <= i < len(string) and 0 <= j < len(string)) :
|
| 102 |
+
return self.rewards["invalid_solution"]
|
| 103 |
+
string[i], string[j] = string[j], string[i]
|
| 104 |
+
string = "".join(string)
|
| 105 |
+
if any(string[i] == string[i + 1] for i in range(len(string) - 1)) :
|
| 106 |
+
return self.rewards["invalid_solution"]
|
| 107 |
+
|
| 108 |
+
gold, answer = self.parameter["gold_answer"], len(processed_result)
|
| 109 |
+
assert gold <= answer, "gold should be less than or equal to answer"
|
| 110 |
+
|
| 111 |
+
if answer == 0 :
|
| 112 |
+
return self.rewards["rewarding_weight"]
|
| 113 |
+
|
| 114 |
+
if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
|
| 115 |
+
return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"])
|
| 116 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 117 |
+
return self.rewards["rewarding_weight"] * (gold == answer)
|
| 118 |
+
else :
|
| 119 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 120 |
+
else :
|
| 121 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/binary_linear_equation_solution_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BinaryLinearEquation_SolutionCounting_Environment
|
server/Gym/environments/binary_linear_equation_solution_counting/environment.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BinaryLinearEquation_SolutionCounting_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = r"""What is the number of integer solution pairs (x, y) such that ({A}) * x + ({B}) * y + ({C}) = 0, with {X1} <= x <= {X2} and {Y1} <= y <= {Y2}?"""
|
| 8 |
+
|
| 9 |
+
def __init__(self,
|
| 10 |
+
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
|
| 11 |
+
not_guaranteed_probability : float = 0.05,
|
| 12 |
+
**kwargs) :
|
| 13 |
+
"""
|
| 14 |
+
Initialize the BinaryLinearEquation_SolutionCounting instance.
|
| 15 |
+
"""
|
| 16 |
+
super().__init__(**kwargs)
|
| 17 |
+
|
| 18 |
+
self.not_guaranteed_probability = not_guaranteed_probability
|
| 19 |
+
self.rewards = {
|
| 20 |
+
"wrong_format" : wrong_format,
|
| 21 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 22 |
+
"rewarding_weight" : rewarding_weight,
|
| 23 |
+
"rewarding_beta" : rewarding_beta,
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _generate(self) -> None :
|
| 28 |
+
assert "MAX_RANGE" in self.parameter, "MAX_RANGE is required in parameter"
|
| 29 |
+
MAX_RANGE = self.parameter["MAX_RANGE"]
|
| 30 |
+
assert MAX_RANGE >= 8, "MAX_RANGE must be at least 8"
|
| 31 |
+
|
| 32 |
+
A = self.parameter["A"] = random.randint(-MAX_RANGE, +MAX_RANGE)
|
| 33 |
+
B = self.parameter["B"] = random.randint(-MAX_RANGE, +MAX_RANGE)
|
| 34 |
+
not_guaranteed = random.random() < self.not_guaranteed_probability
|
| 35 |
+
if not_guaranteed :
|
| 36 |
+
X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, +MAX_RANGE)
|
| 37 |
+
X2 = self.parameter["X2"] = random.randint(X1, +MAX_RANGE)
|
| 38 |
+
Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, +MAX_RANGE)
|
| 39 |
+
Y2 = self.parameter["Y2"] = random.randint(Y1, +MAX_RANGE)
|
| 40 |
+
C = self.parameter["C"] = random.randint(-2 * (MAX_RANGE ** 2),+2 * (MAX_RANGE ** 2))
|
| 41 |
+
else :
|
| 42 |
+
x = random.randint(-MAX_RANGE, +MAX_RANGE)
|
| 43 |
+
y = random.randint(-MAX_RANGE, +MAX_RANGE)
|
| 44 |
+
C = self.parameter["C"] = -(A * x + B * y)
|
| 45 |
+
X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, x)
|
| 46 |
+
X2 = self.parameter["X2"] = random.randint(x, +MAX_RANGE)
|
| 47 |
+
Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, y)
|
| 48 |
+
Y2 = self.parameter["Y2"] = random.randint(y, +MAX_RANGE)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def gcd(a, b):
|
| 52 |
+
while b:
|
| 53 |
+
a, b = b, a % b
|
| 54 |
+
return abs(a)
|
| 55 |
+
|
| 56 |
+
def extended_gcd_positive(a, b):
|
| 57 |
+
# Returns (g, x, y) with a*x + b*y = g, for a,b >= 0
|
| 58 |
+
if b == 0:
|
| 59 |
+
return (a, 1, 0)
|
| 60 |
+
g, x1, y1 = extended_gcd_positive(b, a % b)
|
| 61 |
+
return (g, y1, x1 - (a // b) * y1)
|
| 62 |
+
|
| 63 |
+
def ceil_div(a, b):
|
| 64 |
+
# Ceil division that works for any sign of b
|
| 65 |
+
return -((-a) // b)
|
| 66 |
+
|
| 67 |
+
def floor_div(a, b):
|
| 68 |
+
# Floor division (Python's // already floors)
|
| 69 |
+
return a // b
|
| 70 |
+
|
| 71 |
+
def k_range(a0, step, L, R):
|
| 72 |
+
"""
|
| 73 |
+
From constraint: L <= a0 + step*k <= R
|
| 74 |
+
Return [lo, hi] for integer k, or (1, 0) for empty.
|
| 75 |
+
"""
|
| 76 |
+
if step > 0:
|
| 77 |
+
lo = ceil_div(L - a0, step)
|
| 78 |
+
hi = floor_div(R - a0, step)
|
| 79 |
+
else: # step < 0
|
| 80 |
+
# Inequality reverses when dividing by a negative
|
| 81 |
+
lo = ceil_div(R - a0, step)
|
| 82 |
+
hi = floor_div(L - a0, step)
|
| 83 |
+
return lo, hi
|
| 84 |
+
|
| 85 |
+
def compute(A, B, C, X1, X2, Y1, Y2):
|
| 86 |
+
if X1 > X2:
|
| 87 |
+
X1, X2 = X2, X1
|
| 88 |
+
if Y1 > Y2:
|
| 89 |
+
Y1, Y2 = Y2, Y1
|
| 90 |
+
|
| 91 |
+
# Degenerate cases
|
| 92 |
+
if A == 0 and B == 0:
|
| 93 |
+
return (X2 - X1 + 1) * (Y2 - Y1 + 1) if C == 0 else 0
|
| 94 |
+
|
| 95 |
+
if A == 0:
|
| 96 |
+
# B*y + C = 0
|
| 97 |
+
if C % B == 0:
|
| 98 |
+
y = -C // B
|
| 99 |
+
return (X2 - X1 + 1) if (Y1 <= y <= Y2) else 0
|
| 100 |
+
else:
|
| 101 |
+
return 0
|
| 102 |
+
|
| 103 |
+
if B == 0:
|
| 104 |
+
# A*x + C = 0
|
| 105 |
+
if C % A == 0:
|
| 106 |
+
x = -C // A
|
| 107 |
+
return (Y2 - Y1 + 1) if (X1 <= x <= X2) else 0
|
| 108 |
+
else:
|
| 109 |
+
return 0
|
| 110 |
+
|
| 111 |
+
# General case
|
| 112 |
+
d = gcd(A, B)
|
| 113 |
+
if C % d != 0:
|
| 114 |
+
return 0
|
| 115 |
+
|
| 116 |
+
# Find one solution to A*x + B*y = -C
|
| 117 |
+
_, xg, yg = extended_gcd_positive(abs(A), abs(B)) # gives axg + byg = gcd(|A|,|B|)
|
| 118 |
+
if A < 0:
|
| 119 |
+
xg = -xg
|
| 120 |
+
if B < 0:
|
| 121 |
+
yg = -yg
|
| 122 |
+
|
| 123 |
+
mult = (-C) // d
|
| 124 |
+
x0 = xg * mult
|
| 125 |
+
y0 = yg * mult
|
| 126 |
+
|
| 127 |
+
# Parametric form
|
| 128 |
+
step_x = B // d
|
| 129 |
+
step_y = -A // d # note: can be negative
|
| 130 |
+
|
| 131 |
+
# k-range from x and y intervals
|
| 132 |
+
kx_lo, kx_hi = k_range(x0, step_x, X1, X2)
|
| 133 |
+
ky_lo, ky_hi = k_range(y0, step_y, Y1, Y2)
|
| 134 |
+
|
| 135 |
+
lo = max(kx_lo, ky_lo)
|
| 136 |
+
hi = min(kx_hi, ky_hi)
|
| 137 |
+
|
| 138 |
+
return 0 if lo > hi else hi - lo + 1
|
| 139 |
+
|
| 140 |
+
self.parameter["reference_answer"] = compute(A, B, C, X1, X2, Y1, Y2)
|
| 141 |
+
if not not_guaranteed :
|
| 142 |
+
assert self.parameter["reference_answer"] >= 1
|
| 143 |
+
else :
|
| 144 |
+
assert self.parameter["reference_answer"] >= 0
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def _prompt_generate(self) -> str :
|
| 148 |
+
return self.prompt_template.format(
|
| 149 |
+
A = self.parameter["A"],
|
| 150 |
+
B = self.parameter["B"],
|
| 151 |
+
C = self.parameter["C"],
|
| 152 |
+
X1 = self.parameter["X1"],
|
| 153 |
+
X2 = self.parameter["X2"],
|
| 154 |
+
Y1 = self.parameter["Y1"],
|
| 155 |
+
Y2 = self.parameter["Y2"],
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 160 |
+
if answer is not None :
|
| 161 |
+
answer = answer.strip()
|
| 162 |
+
try :
|
| 163 |
+
int_answer = int(answer)
|
| 164 |
+
return int_answer
|
| 165 |
+
except ValueError :
|
| 166 |
+
return None
|
| 167 |
+
else :
|
| 168 |
+
return None
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def scorer(self, output : str) -> float :
|
| 172 |
+
processed_result = self.processor(output)
|
| 173 |
+
if processed_result is not None :
|
| 174 |
+
if processed_result < 0 :
|
| 175 |
+
return self.rewards["wrong_format"]
|
| 176 |
+
|
| 177 |
+
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
|
| 178 |
+
if self.parameter["reference_answer"] == 0 :
|
| 179 |
+
return self.rewards["rewarding_weight"] * (processed_result == 0)
|
| 180 |
+
a, b = self.parameter["reference_answer"], processed_result
|
| 181 |
+
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
|
| 182 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 183 |
+
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
|
| 184 |
+
else :
|
| 185 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 186 |
+
else :
|
| 187 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BinaryTreeLeafNumExpectation_Environment
|
server/Gym/environments/binary_tree_leaf_num_expectation/environment.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import random
|
| 3 |
+
from typing import Optional, Tuple
|
| 4 |
+
from ...environment import VerifiableEnvironment
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BinaryTreeLeafNumExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3978
|
| 8 |
+
prompt_template = \
|
| 9 |
+
r"""We uniformly at random generate a **binary tree** with exactly {N} nodes (all distinct binary trees with {N} nodes are equally likely). Two binary trees are considered identical if and only if:
|
| 10 |
+
- both are empty, **OR**
|
| 11 |
+
- both are non-empty, and their left subtrees are identical and their right subtrees are identical.
|
| 12 |
+
|
| 13 |
+
What is the expected number of **leaf** nodes (nodes whose left and right children are both empty) in the generated binary tree? Output the result as `A/B` (do NOT include quotes), where A and B are positive integers separated by a slash `/`."""
|
| 14 |
+
|
| 15 |
+
def __init__(self,
|
| 16 |
+
wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0,
|
| 17 |
+
**kwargs) :
|
| 18 |
+
"""
|
| 19 |
+
Initialize the BinaryTreeLeafNumExpectation_Environment instance.
|
| 20 |
+
"""
|
| 21 |
+
super().__init__(**kwargs)
|
| 22 |
+
|
| 23 |
+
self.rewards = {
|
| 24 |
+
"wrong_format" : wrong_format,
|
| 25 |
+
"correct_answer" : correct_answer,
|
| 26 |
+
"wrong_answer" : wrong_answer,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _generate(self) -> None :
|
| 31 |
+
assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
|
| 32 |
+
MAX_N = self.parameter["MAX_N"]
|
| 33 |
+
assert MAX_N >= 5, "MAX_N should be greater than or equal to 5"
|
| 34 |
+
|
| 35 |
+
N = self.parameter["N"] = random.randint(1, MAX_N)
|
| 36 |
+
|
| 37 |
+
A, B = N * (N + 1), 2 * (2 * N - 1)
|
| 38 |
+
gcd_AB = math.gcd(A, B)
|
| 39 |
+
A //= gcd_AB
|
| 40 |
+
B //= gcd_AB
|
| 41 |
+
self.parameter["gold_answer"] = dict(A = A, B = B)
|
| 42 |
+
self.parameter["reference_answer"] = "{}/{}".format(A, B)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _prompt_generate(self) -> str :
|
| 46 |
+
return self.prompt_template.format(N = self.parameter["N"])
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] :
|
| 50 |
+
if answer is not None :
|
| 51 |
+
answer = answer.strip()
|
| 52 |
+
try :
|
| 53 |
+
A, B = map(int, map(str.strip, answer.split('/')))
|
| 54 |
+
return (A, B)
|
| 55 |
+
except :
|
| 56 |
+
return None
|
| 57 |
+
else :
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def scorer(self, output : str) -> float :
|
| 62 |
+
processed_result = self.processor(output)
|
| 63 |
+
if processed_result is not None :
|
| 64 |
+
A, B = processed_result
|
| 65 |
+
if not (A > 0 and B > 0) :
|
| 66 |
+
return self.rewards["wrong_format"]
|
| 67 |
+
gold_A, gold_B = self.parameter["gold_answer"]["A"], self.parameter["gold_answer"]["B"]
|
| 68 |
+
gcd_AB = math.gcd(A, B)
|
| 69 |
+
A //= gcd_AB
|
| 70 |
+
B //= gcd_AB
|
| 71 |
+
if (A, B) == (gold_A, gold_B) :
|
| 72 |
+
return self.rewards["correct_answer"]
|
| 73 |
+
else :
|
| 74 |
+
return self.rewards["wrong_answer"]
|
| 75 |
+
else :
|
| 76 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/bit_equation_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BitEquationCounting_Environment
|
server/Gym/environments/bit_equation_counting/environment.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BitEquationCounting_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""Given a Boolean expression (where `_` represents a variable that can be 0 or 1, `&` is bitwise AND, `|` is bitwise OR, and `^` is bitwise XOR): {expression}
|
| 9 |
+
|
| 10 |
+
There are 2^{N} possible combinations of values for the variables. Your task is to find how many of these combinations make the expression evaluate to true.
|
| 11 |
+
|
| 12 |
+
**Output Format:** Your final answer should be a single integer — the number of combinations that make the expression true. Example: `15` (do **NOT** include quotes or backticks)."""
|
| 13 |
+
|
| 14 |
+
def __init__(self,
|
| 15 |
+
wrong_format : float = -1.0, wrong_range : float = -0.5, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
|
| 16 |
+
**kwargs) :
|
| 17 |
+
"""
|
| 18 |
+
Initialize the BitEquationCounting_Environment instance.
|
| 19 |
+
"""
|
| 20 |
+
super().__init__(**kwargs)
|
| 21 |
+
|
| 22 |
+
self.rewards = {
|
| 23 |
+
"wrong_format" : wrong_format,
|
| 24 |
+
"wrong_range" : wrong_range,
|
| 25 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 26 |
+
"rewarding_weight" : rewarding_weight,
|
| 27 |
+
"rewarding_beta" : rewarding_beta,
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
def _generate(self) -> None :
|
| 31 |
+
assert "N" in self.parameter, "N is required in parameter"
|
| 32 |
+
N = self.parameter["N"]
|
| 33 |
+
assert N >= 2, "N should be greater than or equal to 2"\
|
| 34 |
+
|
| 35 |
+
def build_expression(n) :
|
| 36 |
+
if n == 1 :
|
| 37 |
+
return "_", 1, 1
|
| 38 |
+
left_n = random.randint(1, n - 1)
|
| 39 |
+
right_n = n - left_n
|
| 40 |
+
left_expr, left_true, left_false = build_expression(left_n)
|
| 41 |
+
right_expr, right_true, right_false = build_expression(right_n)
|
| 42 |
+
op = random.choice(("&", "|", "^"))
|
| 43 |
+
if op == "&" :
|
| 44 |
+
true_count = left_true * right_true
|
| 45 |
+
false_count = (2 ** n) - true_count
|
| 46 |
+
elif op == "|" :
|
| 47 |
+
false_count = left_false * right_false
|
| 48 |
+
true_count = (2 ** n) - false_count
|
| 49 |
+
elif op == "^" :
|
| 50 |
+
true_count = left_true * right_false + left_false * right_true
|
| 51 |
+
false_count = left_true * right_true + left_false * right_false
|
| 52 |
+
assert true_count + false_count == 2 ** n, "XOR operation should cover all cases"
|
| 53 |
+
else :
|
| 54 |
+
raise ValueError("Invalid operator")
|
| 55 |
+
return "({} {} {})".format(left_expr, op, right_expr), true_count, false_count
|
| 56 |
+
expression, true_count, false_count = build_expression(N)
|
| 57 |
+
|
| 58 |
+
self.parameter["expression"] = expression[1 : -1]
|
| 59 |
+
self.parameter["reference_answer"] = true_count
|
| 60 |
+
|
| 61 |
+
def _prompt_generate(self) -> str :
|
| 62 |
+
return self.prompt_template.format(expression = self.parameter["expression"], N = self.parameter["N"])
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 66 |
+
if answer is not None :
|
| 67 |
+
answer = answer.strip()
|
| 68 |
+
try :
|
| 69 |
+
int_answer = int(answer)
|
| 70 |
+
return int_answer
|
| 71 |
+
except ValueError :
|
| 72 |
+
return None
|
| 73 |
+
else :
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def scorer(self, output : str) -> float :
|
| 78 |
+
processed_result = self.processor(output)
|
| 79 |
+
if processed_result is not None :
|
| 80 |
+
if not (0 <= processed_result <= 2 ** self.parameter["N"]) :
|
| 81 |
+
return self.rewards["wrong_range"]
|
| 82 |
+
|
| 83 |
+
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
|
| 84 |
+
a, b = self.parameter["reference_answer"], processed_result
|
| 85 |
+
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
|
| 86 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 87 |
+
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
|
| 88 |
+
else :
|
| 89 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 90 |
+
else :
|
| 91 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/bitand_zero_path_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BitAndZero_PathCounting_Environment
|
server/Gym/environments/bitand_zero_path_counting/environment.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BitAndZero_PathCounting_Environment(VerifiableEnvironment) :
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given a **directed graph** with an **infinite number of vertices**, where each vertex is labeled with a non-negative integer: `0`, `1`, `2`, ...
|
| 9 |
+
|
| 10 |
+
There is a directed edge from vertex `s` to vertex `t` if and only if:
|
| 11 |
+
- `s < t`, and
|
| 12 |
+
- `s & t = 0` (where `&` denotes the bitwise AND operation)
|
| 13 |
+
|
| 14 |
+
Please compute the number of **distinct paths** from vertex `{S}` to vertex `{T}`. Give the result **modulo {MOD}**.
|
| 15 |
+
Note that the two vertices labels are provided in **binary (base-2)** representation.
|
| 16 |
+
|
| 17 |
+
**Output Format:** Your final answer should be a single integer — the number of distinct paths modulo `{MOD}`."""
|
| 18 |
+
MOD = 10000
|
| 19 |
+
|
| 20 |
+
def __init__(self,
|
| 21 |
+
wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
|
| 22 |
+
**kwargs) :
|
| 23 |
+
"""
|
| 24 |
+
Initialize the BitAndZero_PathCounting_Environment instance.
|
| 25 |
+
"""
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
|
| 28 |
+
self.rewards = {
|
| 29 |
+
"wrong_format" : wrong_format,
|
| 30 |
+
"wrong_range" : wrong_range,
|
| 31 |
+
"correct_answer" : correct_answer,
|
| 32 |
+
"wrong_answer" : wrong_answer,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
def _generate_helper(self) -> None :
|
| 36 |
+
assert "max_length" in self.parameter, "max_length is required in parameter"
|
| 37 |
+
max_length = self.parameter["max_length"]
|
| 38 |
+
assert max_length >= 1, "max_length should be greater than or equal to 1"
|
| 39 |
+
|
| 40 |
+
S = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1))
|
| 41 |
+
T = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1))
|
| 42 |
+
|
| 43 |
+
if len(S) > len(T) or (len(S) == len(T) and S > T) :
|
| 44 |
+
S, T = T, S
|
| 45 |
+
# Ensure S <= T
|
| 46 |
+
self.parameter["S"], self.parameter["T"] = S, T
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
MOD = self.MOD
|
| 50 |
+
|
| 51 |
+
def Mult(a: int, b: int) -> int:
|
| 52 |
+
return (a * b) % MOD
|
| 53 |
+
|
| 54 |
+
def Add(a: int, b: int) -> int:
|
| 55 |
+
s = a + b
|
| 56 |
+
return s - MOD if s >= MOD else s
|
| 57 |
+
|
| 58 |
+
S = list(map(int, S))
|
| 59 |
+
T = list(map(int, T))
|
| 60 |
+
N, M = len(S), len(T)
|
| 61 |
+
|
| 62 |
+
if M > N:
|
| 63 |
+
S = [0] * (M - N) + S
|
| 64 |
+
else:
|
| 65 |
+
assert M == N
|
| 66 |
+
|
| 67 |
+
G = [[[0, 0] for _ in range(M)] for __ in range(2)]
|
| 68 |
+
for st in (0, 1):
|
| 69 |
+
G[st][0][st] = 1
|
| 70 |
+
for i in range(1, M):
|
| 71 |
+
G[st][i][0] = Add(G[st][i-1][0], G[st][i-1][1])
|
| 72 |
+
G[st][i][1] = G[st][i-1][0]
|
| 73 |
+
|
| 74 |
+
H = 1
|
| 75 |
+
while H <= M and S[H-1] == 0:
|
| 76 |
+
H += 1
|
| 77 |
+
|
| 78 |
+
F = [[0] * M for _ in range(M + 1)]
|
| 79 |
+
F[1][0] = 1
|
| 80 |
+
|
| 81 |
+
for i in range(2, M + 1):
|
| 82 |
+
for x in range(0, i - 1):
|
| 83 |
+
bit = T[i-1]
|
| 84 |
+
if i <= H:
|
| 85 |
+
F[i][x+1] = Add(F[i][x+1], Mult(F[i-1][x], G[1][x+1][bit]))
|
| 86 |
+
if i < H:
|
| 87 |
+
total = Add(G[0][x][bit], G[1][x][bit])
|
| 88 |
+
F[i][x] = Add(F[i][x], Mult(F[i-1][x], total))
|
| 89 |
+
if i > H:
|
| 90 |
+
F[i][x] = Add(F[i][x], Mult(F[i-1][x], G[S[i-1]][x][bit]))
|
| 91 |
+
|
| 92 |
+
ans = 0
|
| 93 |
+
for x in range(0, M):
|
| 94 |
+
ans = Add(ans, F[M][x])
|
| 95 |
+
self.parameter["reference_answer"] = ans
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _generate(self) -> None :
|
| 99 |
+
while True :
|
| 100 |
+
self._generate_helper()
|
| 101 |
+
if self.parameter["reference_answer"] not in (0, 1) :
|
| 102 |
+
break
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _prompt_generate(self) -> str :
|
| 106 |
+
return self.prompt_template.format(
|
| 107 |
+
S = self.parameter["S"],
|
| 108 |
+
T = self.parameter["T"],
|
| 109 |
+
MOD = self.MOD,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 114 |
+
if answer is not None :
|
| 115 |
+
answer = answer.strip()
|
| 116 |
+
try :
|
| 117 |
+
int_answer = int(answer)
|
| 118 |
+
return int_answer
|
| 119 |
+
except ValueError :
|
| 120 |
+
return None
|
| 121 |
+
else :
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
def scorer(self, output : str) -> float :
|
| 125 |
+
processed_result = self.processor(output)
|
| 126 |
+
if processed_result is not None :
|
| 127 |
+
if not (0 <= processed_result < self.MOD) :
|
| 128 |
+
return self.rewards["wrong_range"]
|
| 129 |
+
|
| 130 |
+
if processed_result == self.parameter["reference_answer"] :
|
| 131 |
+
return self.rewards["correct_answer"]
|
| 132 |
+
else :
|
| 133 |
+
return self.rewards["wrong_answer"]
|
| 134 |
+
else :
|
| 135 |
+
return self.rewards["wrong_format"]
|
server/Gym/environments/bitwise_operation_sequence_counting/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .environment import BitwiseOperationSequenceCounting_Environment
|
server/Gym/environments/bitwise_operation_sequence_counting/environment.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from ...environment import VerifiableEnvironment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BitwiseOperationSequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4424
|
| 7 |
+
prompt_template = \
|
| 8 |
+
r"""You are given an array A of {N} + 1 binary strings, each of length {M}. The strings are:
|
| 9 |
+
{A}
|
| 10 |
+
|
| 11 |
+
You will insert an operation (`AND` or `OR`) between every pair of adjacent elements in A, resulting in {N} operations total, to form an expression. You can evaluate the expression from left to right (without operator precedence) to get the final result of the expression.
|
| 12 |
+
Count the number of different ways to insert these operations such that the final result equals this binary string: {R}"""
|
| 13 |
+
|
| 14 |
+
def __init__(self,
|
| 15 |
+
wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
|
| 16 |
+
**kwargs) :
|
| 17 |
+
"""
|
| 18 |
+
Initialize the BitwiseOperationSequenceCounting_Environment instance.
|
| 19 |
+
"""
|
| 20 |
+
super().__init__(**kwargs)
|
| 21 |
+
|
| 22 |
+
self.rewards = {
|
| 23 |
+
"wrong_format" : wrong_format,
|
| 24 |
+
"rewarding_strategy" : rewarding_strategy,
|
| 25 |
+
"rewarding_weight" : rewarding_weight,
|
| 26 |
+
"rewarding_beta" : rewarding_beta,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
def _generate(self) -> None :
|
| 30 |
+
assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
|
| 31 |
+
MAX_N_M = self.parameter["MAX_N_M"]
|
| 32 |
+
assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
|
| 33 |
+
|
| 34 |
+
N = self.parameter["N"] = random.randint(2, MAX_N_M)
|
| 35 |
+
M = self.parameter["M"] = random.randint(2, MAX_N_M)
|
| 36 |
+
|
| 37 |
+
self.parameter["A"] = A = [None] * (N + 1)
|
| 38 |
+
A[0] = "0" * M
|
| 39 |
+
result = "0" * M
|
| 40 |
+
AND_probability = random.random()
|
| 41 |
+
for i in range(1, N + 1) :
|
| 42 |
+
one_probability = random.random()
|
| 43 |
+
A[i] = "".join(str(int(random.random() < one_probability)) for _ in range(M))
|
| 44 |
+
operation = "AND" if random.random() < AND_probability else "OR"
|
| 45 |
+
if operation == "AND" :
|
| 46 |
+
result = "".join(str(int(A[i][j]) & int(result[j])) for j in range(M))
|
| 47 |
+
else :
|
| 48 |
+
result = "".join(str(int(A[i][j]) | int(result[j])) for j in range(M))
|
| 49 |
+
self.parameter["R"] = result
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
S = A[1 :]
|
| 53 |
+
|
| 54 |
+
# rk will store the current column order (0-indexed)
|
| 55 |
+
rk = list(range(M))
|
| 56 |
+
# b[j][i] will store the bit in column j, row i
|
| 57 |
+
b = [[0] * N for _ in range(M)]
|
| 58 |
+
|
| 59 |
+
# Read the N rows of the matrix, and maintain the stable partition of rk
|
| 60 |
+
for i in range(N):
|
| 61 |
+
s = S[i]
|
| 62 |
+
# parse the bits of this row
|
| 63 |
+
row = [int(ch) for ch in s]
|
| 64 |
+
# fill b
|
| 65 |
+
for j in range(M):
|
| 66 |
+
b[j][i] = row[j]
|
| 67 |
+
# stable partition rk: first zeros, then ones
|
| 68 |
+
new_rk = []
|
| 69 |
+
for k in rk:
|
| 70 |
+
if row[k] == 0:
|
| 71 |
+
new_rk.append(k)
|
| 72 |
+
for k in rk:
|
| 73 |
+
if row[k] == 1:
|
| 74 |
+
new_rk.append(k)
|
| 75 |
+
rk = new_rk
|
| 76 |
+
|
| 77 |
+
# Compute Ans[j] = integer value of column j (bits b[j][N-1]...b[j][0]) mod MOD
|
| 78 |
+
Ans = [0] * M
|
| 79 |
+
for j in range(M):
|
| 80 |
+
val = 0
|
| 81 |
+
# build the number from most-significant bit b[j][N-1] down to b[j][0]
|
| 82 |
+
for i in range(N - 1, -1, -1):
|
| 83 |
+
val = val * 2 + b[j][i]
|
| 84 |
+
Ans[j] = val
|
| 85 |
+
|
| 86 |
+
def compute() :
|
| 87 |
+
s = result
|
| 88 |
+
# Find the first position in rk where the bit is '1'
|
| 89 |
+
Rk_idx = M # default to sentinel
|
| 90 |
+
for idx in range(M):
|
| 91 |
+
if s[rk[idx]] == '1':
|
| 92 |
+
Rk_idx = idx
|
| 93 |
+
break
|
| 94 |
+
# Find the last position in rk where the bit is '0'
|
| 95 |
+
Lk_idx = -1 # default to before first
|
| 96 |
+
for idx in range(M - 1, -1, -1):
|
| 97 |
+
if s[rk[idx]] == '0':
|
| 98 |
+
Lk_idx = idx
|
| 99 |
+
break
|
| 100 |
+
|
| 101 |
+
# If the first '1' comes before the last '0', no valid interval
|
| 102 |
+
if Rk_idx < Lk_idx:
|
| 103 |
+
return 0
|
| 104 |
+
else:
|
| 105 |
+
# Determine the two endpoints' values
|
| 106 |
+
x_val = 0 if Lk_idx == -1 else Ans[rk[Lk_idx]]
|
| 107 |
+
y_val = (2 ** N) if Rk_idx == M else Ans[rk[Rk_idx]]
|
| 108 |
+
# Answer is y_val - x_val
|
| 109 |
+
return y_val - x_val
|
| 110 |
+
|
| 111 |
+
self.parameter["reference_answer"] = compute()
|
| 112 |
+
assert self.parameter["reference_answer"] > 0
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _prompt_generate(self) -> str :
|
| 116 |
+
return self.prompt_template.format(
|
| 117 |
+
N = self.parameter["N"],
|
| 118 |
+
M = self.parameter["M"],
|
| 119 |
+
A = "\n".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])),
|
| 120 |
+
R = self.parameter["R"],
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _process(self, answer : Optional[str]) -> Optional[int] :
|
| 125 |
+
if answer is not None :
|
| 126 |
+
answer = answer.strip()
|
| 127 |
+
try :
|
| 128 |
+
int_answer = int(answer)
|
| 129 |
+
return int_answer
|
| 130 |
+
except ValueError :
|
| 131 |
+
return None
|
| 132 |
+
else :
|
| 133 |
+
return None
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def scorer(self, output : str) -> float :
|
| 137 |
+
processed_result = self.processor(output)
|
| 138 |
+
if processed_result is not None :
|
| 139 |
+
if processed_result < 0 :
|
| 140 |
+
return self.rewards["wrong_format"]
|
| 141 |
+
|
| 142 |
+
if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
|
| 143 |
+
a, b = self.parameter["reference_answer"], processed_result
|
| 144 |
+
return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
|
| 145 |
+
elif self.rewards["rewarding_strategy"] == "gold=answer" :
|
| 146 |
+
return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
|
| 147 |
+
else :
|
| 148 |
+
raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
|
| 149 |
+
else :
|
| 150 |
+
return self.rewards["wrong_format"]
|