ZhiyuanZeng commited on
Commit
3bf8430
·
verified ·
1 Parent(s): 1273f5f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +76 -0
  2. README.md +193 -5
  3. __init__.py +13 -0
  4. client.py +62 -0
  5. models.py +45 -0
  6. openenv.yaml +7 -0
  7. pyproject.toml +51 -0
  8. server/Gym/__init__.py +0 -0
  9. server/Gym/environment.py +217 -0
  10. server/Gym/environments/__init__.py +802 -0
  11. server/Gym/environments/ab_program_simulation/__init__.py +1 -0
  12. server/Gym/environments/ab_program_simulation/environment.py +109 -0
  13. server/Gym/environments/add_multiple_divisible_counting/__init__.py +1 -0
  14. server/Gym/environments/add_multiple_divisible_counting/environment.py +122 -0
  15. server/Gym/environments/addition_table/__init__.py +1 -0
  16. server/Gym/environments/addition_table/environment.py +132 -0
  17. server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py +1 -0
  18. server/Gym/environments/almost_complete_graph_cycle_counting/environment.py +94 -0
  19. server/Gym/environments/and_or_sequence_counting/__init__.py +1 -0
  20. server/Gym/environments/and_or_sequence_counting/environment.py +147 -0
  21. server/Gym/environments/anti_palindromic_substring_counting/__init__.py +1 -0
  22. server/Gym/environments/anti_palindromic_substring_counting/environment.py +142 -0
  23. server/Gym/environments/axis_k_center/__init__.py +1 -0
  24. server/Gym/environments/axis_k_center/environment.py +129 -0
  25. server/Gym/environments/baj_bytecomputer/__init__.py +1 -0
  26. server/Gym/environments/baj_bytecomputer/environment.py +109 -0
  27. server/Gym/environments/banned_point_superset_path_counting/__init__.py +1 -0
  28. server/Gym/environments/banned_point_superset_path_counting/environment.py +170 -0
  29. server/Gym/environments/banyan_heart/__init__.py +1 -0
  30. server/Gym/environments/banyan_heart/environment.py +165 -0
  31. server/Gym/environments/bez_minimalist_security/__init__.py +1 -0
  32. server/Gym/environments/bez_minimalist_security/environment.py +221 -0
  33. server/Gym/environments/bezout_identity/__init__.py +1 -0
  34. server/Gym/environments/bezout_identity/environment.py +134 -0
  35. server/Gym/environments/binario/__init__.py +1 -0
  36. server/Gym/environments/binario/environment.py +188 -0
  37. server/Gym/environments/binario_no_adjacency_requirement/__init__.py +1 -0
  38. server/Gym/environments/binario_no_adjacency_requirement/environment.py +114 -0
  39. server/Gym/environments/binary_alternation/__init__.py +1 -0
  40. server/Gym/environments/binary_alternation/environment.py +121 -0
  41. server/Gym/environments/binary_linear_equation_solution_counting/__init__.py +1 -0
  42. server/Gym/environments/binary_linear_equation_solution_counting/environment.py +187 -0
  43. server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py +1 -0
  44. server/Gym/environments/binary_tree_leaf_num_expectation/environment.py +76 -0
  45. server/Gym/environments/bit_equation_counting/__init__.py +1 -0
  46. server/Gym/environments/bit_equation_counting/environment.py +91 -0
  47. server/Gym/environments/bitand_zero_path_counting/__init__.py +1 -0
  48. server/Gym/environments/bitand_zero_path_counting/environment.py +135 -0
  49. server/Gym/environments/bitwise_operation_sequence_counting/__init__.py +1 -0
  50. server/Gym/environments/bitwise_operation_sequence_counting/environment.py +150 -0
Dockerfile ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local src/core)
10
+ # - Standalone environments (with openenv-core from pip)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Build argument to control whether we're building standalone or in-repo
19
+ ARG BUILD_MODE=in-repo
20
+ ARG ENV_NAME=RLVE_Gym
21
+
22
+ # Copy environment code (always at root of build context)
23
+ COPY . /app/env
24
+
25
+ # For in-repo builds, openenv-core is already in the pyproject.toml dependencies
26
+ # For standalone builds, openenv-core will be installed from pip via pyproject.toml
27
+ WORKDIR /app/env
28
+
29
+ # Ensure uv is available (for local builds where base image lacks it)
30
+ RUN if ! command -v uv >/dev/null 2>&1; then \
31
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
32
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
33
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
34
+ fi
35
+
36
+ # Install dependencies using uv sync
37
+ # If uv.lock exists, use it; otherwise resolve on the fly
38
+ RUN --mount=type=cache,target=/root/.cache/uv \
39
+ if [ -f uv.lock ]; then \
40
+ uv sync --frozen --no-install-project --no-editable; \
41
+ else \
42
+ uv sync --no-install-project --no-editable; \
43
+ fi
44
+
45
+ RUN --mount=type=cache,target=/root/.cache/uv \
46
+ if [ -f uv.lock ]; then \
47
+ uv sync --frozen --no-editable; \
48
+ else \
49
+ uv sync --no-editable; \
50
+ fi
51
+
52
+ # Final runtime stage
53
+ FROM ${BASE_IMAGE}
54
+
55
+ WORKDIR /app
56
+
57
+ # Copy the virtual environment from builder
58
+ COPY --from=builder /app/env/.venv /app/.venv
59
+
60
+ # Copy the environment code
61
+ COPY --from=builder /app/env /app/env
62
+
63
+ # Set PATH to use the virtual environment
64
+ ENV PATH="/app/.venv/bin:$PATH"
65
+
66
+ # Set PYTHONPATH so imports work correctly
67
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
68
+
69
+ # Health check
70
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
71
+ CMD curl -f http://localhost:8000/health || exit 1
72
+
73
+ # Run the FastAPI server
74
+ # The module path is constructed to work with the /app/env structure
75
+ ENV ENABLE_WEB_INTERFACE=true
76
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
README.md CHANGED
@@ -1,10 +1,198 @@
1
  ---
2
- title: RLVE Gym
3
- emoji: 🦀
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Rlve Gym Environment Server
3
+ emoji: 📡
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
  ---
13
 
14
+ # Rlve Gym Environment
15
+
16
+ A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
17
+
18
+ ## Quick Start
19
+
20
+ The simplest way to use the Rlve Gym environment is through the `RlveGymEnv` class:
21
+
22
+ ```python
23
+ from RLVE_Gym import RlveGymAction, RlveGymEnv
24
+
25
+ try:
26
+ # Create environment from Docker image
27
+ RLVE_Gymenv = RlveGymEnv.from_docker_image("RLVE_Gym-env:latest")
28
+
29
+ # Reset
30
+ result = RLVE_Gymenv.reset()
31
+ print(f"Reset: {result.observation.echoed_message}")
32
+
33
+ # Send multiple messages
34
+ messages = ["Hello, World!", "Testing echo", "Final message"]
35
+
36
+ for msg in messages:
37
+ result = RLVE_Gymenv.step(RlveGymAction(message=msg))
38
+ print(f"Sent: '{msg}'")
39
+ print(f" → Echoed: '{result.observation.echoed_message}'")
40
+ print(f" → Length: {result.observation.message_length}")
41
+ print(f" → Reward: {result.reward}")
42
+
43
+ finally:
44
+ # Always clean up
45
+ RLVE_Gymenv.close()
46
+ ```
47
+
48
+ That's it! The `RlveGymEnv.from_docker_image()` method handles:
49
+ - Starting the Docker container
50
+ - Waiting for the server to be ready
51
+ - Connecting to the environment
52
+ - Container cleanup when you call `close()`
53
+
54
+ ## Building the Docker Image
55
+
56
+ Before using the environment, you need to build the Docker image:
57
+
58
+ ```bash
59
+ # From project root
60
+ docker build -t RLVE_Gym-env:latest -f server/Dockerfile .
61
+ ```
62
+
63
+ ## Deploying to Hugging Face Spaces
64
+
65
+ You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
66
+
67
+ ```bash
68
+ # From the environment directory (where openenv.yaml is located)
69
+ openenv push
70
+
71
+ # Or specify options
72
+ openenv push --namespace my-org --private
73
+ ```
74
+
75
+ The `openenv push` command will:
76
+ 1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
77
+ 2. Prepare a custom build for Hugging Face Docker space (enables web interface)
78
+ 3. Upload to Hugging Face (ensuring you're logged in)
79
+
80
+ ### Prerequisites
81
+
82
+ - Authenticate with Hugging Face: The command will prompt for login if not already authenticated
83
+
84
+ ### Options
85
+
86
+ - `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
87
+ - `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
88
+ - `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
89
+ - `--private`: Deploy the space as private (default: public)
90
+
91
+ ### Examples
92
+
93
+ ```bash
94
+ # Push to your personal namespace (defaults to username/env-name from openenv.yaml)
95
+ openenv push
96
+
97
+ # Push to a specific repository
98
+ openenv push --repo-id my-org/my-env
99
+
100
+ # Push with a custom base image
101
+ openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
102
+
103
+ # Push as a private space
104
+ openenv push --private
105
+
106
+ # Combine options
107
+ openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
108
+ ```
109
+
110
+ After deployment, your space will be available at:
111
+ `https://huggingface.co/spaces/<repo-id>`
112
+
113
+ The deployed space includes:
114
+ - **Web Interface** at `/web` - Interactive UI for exploring the environment
115
+ - **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
116
+ - **Health Check** at `/health` - Container health monitoring
117
+
118
+ ## Environment Details
119
+
120
+ ### Action
121
+ **RlveGymAction**: Contains a single field
122
+ - `message` (str) - The message to echo back
123
+
124
+ ### Observation
125
+ **RlveGymObservation**: Contains the echo response and metadata
126
+ - `echoed_message` (str) - The message echoed back
127
+ - `message_length` (int) - Length of the message
128
+ - `reward` (float) - Reward based on message length (length × 0.1)
129
+ - `done` (bool) - Always False for echo environment
130
+ - `metadata` (dict) - Additional info like step count
131
+
132
+ ### Reward
133
+ The reward is calculated as: `message_length × 0.1`
134
+ - "Hi" → reward: 0.2
135
+ - "Hello, World!" → reward: 1.3
136
+ - Empty message → reward: 0.0
137
+
138
+ ## Advanced Usage
139
+
140
+ ### Connecting to an Existing Server
141
+
142
+ If you already have a Rlve Gym environment server running, you can connect directly:
143
+
144
+ ```python
145
+ from RLVE_Gym import RlveGymEnv
146
+
147
+ # Connect to existing server
148
+ RLVE_Gymenv = RlveGymEnv(base_url="<ENV_HTTP_URL_HERE>")
149
+
150
+ # Use as normal
151
+ result = RLVE_Gymenv.reset()
152
+ result = RLVE_Gymenv.step(RlveGymAction(message="Hello!"))
153
+ ```
154
+
155
+ Note: When connecting to an existing server, `RLVE_Gymenv.close()` will NOT stop the server.
156
+
157
+ ## Development & Testing
158
+
159
+ ### Direct Environment Testing
160
+
161
+ Test the environment logic directly without starting the HTTP server:
162
+
163
+ ```bash
164
+ # From the server directory
165
+ python3 server/RLVE_Gym_environment.py
166
+ ```
167
+
168
+ This verifies that:
169
+ - Environment resets correctly
170
+ - Step executes actions properly
171
+ - State tracking works
172
+ - Rewards are calculated correctly
173
+
174
+ ### Running Locally
175
+
176
+ Run the server locally for development:
177
+
178
+ ```bash
179
+ uvicorn server.app:app --reload
180
+ ```
181
+
182
+ ## Project Structure
183
+
184
+ ```
185
+ RLVE_Gym/
186
+ ├── __init__.py # Module exports
187
+ ├── README.md # This file
188
+ ├── openenv.yaml # OpenEnv manifest
189
+ ├── pyproject.toml # Project metadata and dependencies
190
+ ├── uv.lock # Locked dependencies (generated)
191
+ ├── client.py # RlveGymEnv client implementation
192
+ ├── models.py # Action and Observation models
193
+ └── server/
194
+ ├── __init__.py # Server module exports
195
+ ├── RLVE_Gym_environment.py # Core environment logic
196
+ ├── app.py # FastAPI application
197
+ └── Dockerfile # Container image definition
198
+ ```
__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Rlve Gym Environment - A simple test environment for HTTP server."""
8
+
9
+ from .client import RlveGymEnv
10
+ from .models import RlveGymAction, RlveGymObservation
11
+
12
+ __all__ = ["RlveGymAction", "RlveGymObservation", "RlveGymEnv"]
13
+
client.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Rlve Gym Environment HTTP Client.
9
+
10
+ This module provides the client for connecting to a Rlve Gym Environment server
11
+ over HTTP.
12
+ """
13
+
14
+ from typing import Dict
15
+
16
+ from openenv_core.client_types import StepResult
17
+ from openenv_core.http_env_client import HTTPEnvClient
18
+
19
+ from .models import RlveGymState, RlveGymAction, RlveGymObservation
20
+
21
+
22
+ class RlveGymEnv(HTTPEnvClient[RlveGymAction, RlveGymObservation]):
23
+ """
24
+ HTTP client for the Rlve Gym Environment.
25
+
26
+ This client connects to a RlveGymEnvironment HTTP server and provides
27
+ methods to interact with it: reset(), step(), and state access.
28
+ """
29
+
30
+ def _step_payload(self, action: RlveGymAction) -> Dict:
31
+ """
32
+ Convert RlveGymAction to JSON payload for step request.
33
+
34
+ Args:
35
+ action: RlveGymAction instance
36
+
37
+ Returns:
38
+ Dictionary representation suitable for JSON encoding
39
+ """
40
+ return {
41
+ "output": action.output,
42
+ }
43
+
44
+ def _parse_result(self, payload: Dict) -> StepResult[RlveGymObservation]:
45
+ """
46
+ Parse server response into StepResult[RlveGymObservation].
47
+
48
+ Args:
49
+ payload: JSON response from server
50
+
51
+ Returns:
52
+ StepResult with RlveGymObservation
53
+ """
54
+ obs = RlveGymObservation(**payload["observation"])
55
+ return StepResult(
56
+ observation=obs,
57
+ reward=payload.get("reward"),
58
+ done=payload.get("done", False),
59
+ )
60
+
61
+ def _parse_state(self, payload: Dict) -> RlveGymState:
62
+ return RlveGymState(**payload)
models.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for the Rlve Gym Environment.
9
+
10
+ The RLVE_Gym environment is a simple test environment that echoes back messages.
11
+ """
12
+
13
+ from dataclasses import dataclass
14
+
15
+ from openenv_core.env_server.types import Action, Observation, State
16
+
17
+ from typing import Dict, Union
18
+
19
+
20
+ @dataclass(kw_only=True)
21
+ class RlveGymState(State):
22
+ """State of the RLVE_Gym containing the seed."""
23
+ seed: int
24
+ problem_input: str = None
25
+
26
+ num_samples: int = 0
27
+ sum_accuracy: int = 0
28
+
29
+
30
+ @dataclass(kw_only=True)
31
+ class RlveGymAction(Action):
32
+ """Action for the RLVE_Gym environment - just a model output."""
33
+
34
+ output: str
35
+
36
+
37
+ @dataclass(kw_only=True)
38
+ class RlveGymObservation(Observation):
39
+ """Observation from the RLVE_Gym environment."""
40
+
41
+ problem_input: str
42
+ verifier_result: Dict[str, Union[float, int]]
43
+
44
+ success: bool
45
+ message: str
openenv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: RLVE_Gym
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
7
+
pyproject.toml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-RLVE_Gym"
13
+ version = "0.1.0"
14
+ description = "Rlve Gym environment for OpenEnv"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ # Core OpenEnv dependencies (required for server functionality)
18
+ # "openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@main#subdirectory=src/core",
19
+ "openenv-core>=0.1.0",
20
+ "fastapi>=0.115.0",
21
+ "pydantic>=2.0.0",
22
+ "uvicorn>=0.24.0",
23
+ "requests>=2.31.0",
24
+ # Environment-specific dependencies
25
+ # Add all dependencies needed for your environment here
26
+ # Examples:
27
+ # "numpy>=1.19.0",
28
+ # "torch>=2.0.0",
29
+ # "gymnasium>=0.29.0",
30
+ # "openspiel>=1.0.0",
31
+ # "smolagents>=1.22.0,<2",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ dev = [
36
+ "pytest>=8.0.0",
37
+ "pytest-cov>=4.0.0",
38
+ ]
39
+
40
+ [project.scripts]
41
+ # Server entry point - enables running via: uv run --project . server
42
+ # or: python -m RLVE_Gym.server.app
43
+ server = "RLVE_Gym.server.app:main"
44
+
45
+ [tool.setuptools]
46
+ packages = ["RLVE_Gym", "RLVE_Gym.server"]
47
+ package-dir = { "RLVE_Gym" = ".", "RLVE_Gym.server" = "server" }
48
+
49
+ [tool.setuptools.packages.find]
50
+ where = ["."]
51
+
server/Gym/__init__.py ADDED
File without changes
server/Gym/environment.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import copy
4
+ from abc import ABC, abstractmethod
5
+ from typing import Dict, Optional, Tuple, Any, Union
6
+
7
+
8
+
9
+ import functools
10
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
11
+
12
+ class TimeoutException(Exception) :
13
+ pass
14
+
15
+ def timeout(seconds) :
16
+ def decorator(func) :
17
+ @functools.wraps(func)
18
+ def wrapper(*args, **kwargs) :
19
+ executor = ThreadPoolExecutor(max_workers = 1)
20
+ future = executor.submit(func, *args, **kwargs)
21
+ try :
22
+ return future.result(timeout=seconds)
23
+ except FutureTimeoutError :
24
+ raise TimeoutException("Function timed out after {} seconds".format(seconds))
25
+ finally :
26
+ executor.shutdown(wait=False, cancel_futures=True)
27
+ return wrapper
28
+ return decorator
29
+
30
+
31
+
32
+ import torch
33
+ import random
34
+ import numpy as np
35
+ def manual_seed(args_or_seed : int, fix_cudnn = False) :
36
+ random.seed(args_or_seed)
37
+ np.random.seed(args_or_seed)
38
+ torch.manual_seed(args_or_seed)
39
+ torch.cuda.manual_seed_all(args_or_seed)
40
+ os.environ["PYTHONHASHSEED"] = str(args_or_seed)
41
+ if fix_cudnn :
42
+ torch.backends.cudnn.deterministic = True # noqa
43
+ torch.backends.cudnn.benchmark = False # noqa
44
+
45
+
46
+
47
+ class VerifiableEnvironment(ABC) :
48
+ """
49
+ Abstract base class for a verifiable environment.
50
+ """
51
+ def __init__(self, answer_markers : Optional[Tuple[str, str]] = None) :
52
+ """
53
+ Initializes the environment with default seed and parameter values.
54
+ """
55
+ self.seed = None
56
+ self.parameter = None
57
+
58
+ if answer_markers is None :
59
+ answer_markers = (r"<answer>", r"</answer>")
60
+ assert hasattr(answer_markers, "__len__"), "answer_markers should have __len__"
61
+ assert len(answer_markers) == 2 and isinstance(answer_markers[0], str) and isinstance(answer_markers[1], str), "answer_markers should be a tuple of two strings"
62
+ self.answer_markers = answer_markers
63
+
64
+ self.passing_reward_threshold = 1.0
65
+
66
+
67
+ def generator(self, seed : int, parameter : Optional[Dict] = None, timeout_second : int = 10) -> bool :
68
+ """
69
+ Initializes the environment with the given seed and (initial) parameters, and samples environment-specific parameters to generate a problem.
70
+
71
+ Args:
72
+ seed (int): Random seed for reproducibility.
73
+ parameter (Optional[Dict]): Dictionary of (initial) problem parameters.
74
+ timeout_second (int): Timeout in seconds for the generation process.
75
+
76
+ Returns:
77
+ bool: True if the generation was successful, False otherwise.
78
+ """
79
+ @timeout(timeout_second)
80
+ def self_generate() :
81
+ self.seed = seed
82
+ self.parameter = copy.deepcopy(parameter) if parameter is not None else {}
83
+
84
+ manual_seed(self.seed)
85
+ self._generate()
86
+ try :
87
+ self_generate()
88
+ except :
89
+ return False
90
+ return self.parameter is not None
91
+
92
+
93
+ @abstractmethod
94
+ def _generate(self) -> None :
95
+ """
96
+ Subclasses must implement problem generation using self.seed and self.parameter.
97
+ """
98
+ pass
99
+
100
+
101
+ def prompt_generator(self) -> str :
102
+ """
103
+ Generates the prompt string for the problem.
104
+
105
+ Returns:
106
+ str: The formatted prompt for the problem.
107
+ """
108
+ assert self.seed is not None and self.parameter is not None, "generator() should be called before prompt_generator()"
109
+
110
+ return self._prompt_generate()
111
+
112
+
113
+ @abstractmethod
114
+ def _prompt_generate(self) -> str :
115
+ """
116
+ Subclasses must implement prompt generation using self.seed and self.parameter.
117
+
118
+ Returns:
119
+ str: The problem prompt.
120
+ """
121
+ pass
122
+
123
+
124
+ def processor(self, output : str) -> Any :
125
+ """
126
+ Processes the model's output to extract useful information.
127
+
128
+ Args:
129
+ output (str): The string output from a model.
130
+
131
+ Returns:
132
+ Any: Any useful information that may be used for following steps (e.g., scoring).
133
+ """
134
+
135
+ # Remove everything before the first "Assistant:" (if possible)
136
+ if "Assistant:" in output :
137
+ output = output.split("Assistant:", 1)[1]
138
+ elif "<|im_start|>assistant" in output :
139
+ output = output.split("<|im_start|>assistant", 1)[1]
140
+ else :
141
+ pass
142
+
143
+ answer_pattern = re.escape(self.answer_markers[0]) + r"(.*?)" + re.escape(self.answer_markers[1])
144
+ matches = list(re.finditer(answer_pattern, output, re.DOTALL))
145
+ if matches :
146
+ answer = matches[-1].group(1)
147
+ else :
148
+ answer = None
149
+ return self._process(answer)
150
+
151
+
152
+ @abstractmethod
153
+ def _process(self, answer : Optional[str]) -> Any :
154
+ """
155
+ Subclasses must implement the processing of the answer.
156
+
157
+ Args:
158
+ answer (str): The model's answer. If it is None, it means the model did not provide an answer in the expected format.
159
+
160
+ Returns:
161
+ Any: The processed answer, which may be used for scoring.
162
+ """
163
+ pass
164
+
165
+
166
+ @abstractmethod
167
+ def scorer(self, output : str) -> float :
168
+ """
169
+ Computes a numeric score for the output, which should be in [-1.0, +1.0].
170
+
171
+ Args:
172
+ output (str): The model's output.
173
+
174
+ Returns:
175
+ float: The score for the given output, between -1.0 and +1.0.
176
+ """
177
+ pass
178
+
179
+
180
+ def verifier(self, output : str) -> Dict[str, Union[float, int]] :
181
+ """
182
+ Verifies the model's output.
183
+ """
184
+ try :
185
+ score = self.scorer(output)
186
+ except :
187
+ score = -1.0
188
+ assert -1.0 <= score <= +1.0, "Score out of bounds: score={}\n\nPrompt:\n{}".format(score, self.prompt_generator())
189
+
190
+ eps = 1E-6
191
+ return dict(
192
+ reward = score, # [-1.0, +1.0]
193
+ accuracy = int(score >= self.passing_reward_threshold - eps), # 0 or 1
194
+ format_score = int(score >= -1.0 + eps), # 0 or 1
195
+ )
196
+
197
+
198
+ def get_config(self) -> Dict :
199
+ """
200
+ Returns the configuration of the current problem.
201
+
202
+ Returns:
203
+ Dict: Dictionary with keys 'seed' and 'parameter'.
204
+ """
205
+ return dict(seed = self.seed, parameter = self.parameter, passing_reward_threshold = self.passing_reward_threshold)
206
+
207
+
208
+ def set_config(self, config : Dict) -> None :
209
+ """
210
+ Sets the configuration for the current problem.
211
+
212
+ Args:
213
+ config (Dict): Dictionary with 'seed' and 'parameter' keys.
214
+ """
215
+ assert "seed" in config, "seed is required in config"
216
+ assert "parameter" in config, "parameter is required in config"
217
+ self.seed, self.parameter, self.passing_reward_threshold = config["seed"], config["parameter"], config.get("passing_reward_threshold", 1.0)
server/Gym/environments/__init__.py ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ab_program_simulation import ABProgramSimulation_Environment
2
+ from .add_multiple_divisible_counting import AddMultiple_Divisible_Counting_Environment
3
+ from .addition_table import AdditionTable_Environment
4
+ from .almost_complete_graph_cycle_counting import AlmostCompleteGraphCycleCounting_Environment
5
+ from .and_or_sequence_counting import AndOr_Sequence_Counting_Environment
6
+ from .anti_palindromic_substring_counting import AntiPalindromicSubstringCounting_Environment
7
+ from .axis_k_center import Axis_KCenter_Environment
8
+ from .baj_bytecomputer import BAJBytecomputer_Environment
9
+ from .banned_point_superset_path_counting import BannedPointSupersetPathCounting_Environment
10
+ from .banyan_heart import BanyanHeart_Environment
11
+ from .bez_minimalist_security import BEZMinimalistSecurity_Environment
12
+ from .bezout_identity import BezoutIdentity_Environment
13
+ from .binario import Binario_Environment
14
+ from .binario_no_adjacency_requirement import Binario_NoAdjacencyRequirement_Environment
15
+ from .binary_alternation import BinaryAlternation_Environment
16
+ from .binary_linear_equation_solution_counting import BinaryLinearEquation_SolutionCounting_Environment
17
+ from .binary_tree_leaf_num_expectation import BinaryTreeLeafNumExpectation_Environment
18
+ from .bit_equation_counting import BitEquationCounting_Environment
19
+ from .bitand_zero_path_counting import BitAndZero_PathCounting_Environment
20
+ from .bitwise_operation_sequence_counting import BitwiseOperationSequenceCounting_Environment
21
+ from .block_image import BlockImage_Environment
22
+ from .bounded_adjacency_difference_permutation_counting import BoundedAdjacencyDifference_Permutation_Counting_Environment
23
+ from .bounded_interval_intersection import BoundedIntervalIntersection_Environment
24
+ from .bounded_mean_subarray_counting import BoundedMeanSubarrayCounting_Environment
25
+ from .bounded_subarray_counting import BoundedSubarrayCounting_Environment
26
+ from .box_scheduling import BoxScheduling_Environment
27
+ from .bridge import Bridge_Environment
28
+ from .bubble_swap_lower_bound_permutation_counting import BubbleSwapLowerBound_PermutationCounting_Environment
29
+ from .bucket_sorting import BucketSorting_Environment
30
+ from .campfire_party import CampfireParty_Environment
31
+ from .campsite_puzzle import CampsitePuzzle_Environment
32
+ from .canon import Canon_Environment
33
+ from .cantor_expansion import CantorExpansion_Environment
34
+ from .capital_city_effect import CapitalCityEffect_Environment
35
+ from .card_coloring_counting import CardColoringCounting_Environment
36
+ from .catalan_number_mod import CatalanNumberMod_Environment
37
+ from .check_all_cycle_xor_zero import CheckAllCycleXorZero_Environment
38
+ from .cho_hamsters import ChoHamsters_Environment
39
+ from .cinema import Cinema_Environment
40
+ from .circuit import Circuit_Environment
41
+ from .circulating_decimal_counting import CirculatingDecimalCounting_Environment
42
+ from .circulating_grid import CirculatingGrid_Environment
43
+ from .cleaning_up import CleaningUp_Environment
44
+ from .clear_symmetry import ClearSymmetry_Environment
45
+ from .clique_independent_set_partitioning_counting import Clique_IndependentSet_Partitioning_Counting_Environment
46
+ from .coin_square_game import CoinSquareGame_Environment
47
+ from .coloring_counting import ColoringCounting_Environment
48
+ from .combination_odd_subsequence_counting import CombinationOddSubsequenceCounting_Environment
49
+ from .concatenation_partition_counting_sum import ConcatenationPartitionCountingSum_Environment
50
+ from .congruent_equation import CongruentEquation_Environment
51
+ from .construct_hack_interval import ConstructHackInterval_Environment
52
+ from .convex_hull import ConvexHull_Environment
53
+ from .cornfield import Cornfield_Environment
54
+ from .countdown import CountdownEqual_Environment, CountdownClose_Environment
55
+ from .cow_dance_show import CowDanceShow_Environment
56
+ from .crt import CRT_Environment
57
+ from .cryptarithmetic import Cryptarithmetic_Environment
58
+ from .cube_fixed_local_maximum_counting import Cube_FixedLocalMaximumCounting_Environment
59
+ from .cycle_counting import CycleCounting_Environment
60
+ from .decreasing_digit_counting import DecreasingDigitCounting_Environment
61
+ from .degree_fixed_spanning_tree import DegreeFixed_SpanningTree_Environment
62
+ from .delta_min_popcount import DeltaMinPopcount_Environment
63
+ from .delta_nim_game import DeltaNimGame_Environment
64
+ from .derangement_extension import DerangementExtension_Environment
65
+ from .difference_constraint_system import DifferenceConstraintSystem_Environment
66
+ from .difference_constraint_system_dag import DifferenceConstraintSystemDAG_Environment
67
+ from .different_color_pairing import DifferentColorPairing_Environment
68
+ from .differentiate import Differentiate_Environment
69
+ from .digit_lis_counting import DigitLISCounting_Environment
70
+ from .discrete_logarithm import DiscreteLogarithm_Environment
71
+ from .disinfection import Disinfection_Environment
72
+ from .distinct_array_permutation import DistinctArrayPermutation_Environment
73
+ from .distinct_edge_colored_complete_graph_counting import DistinctEdgeColoredCompleteGraphCounting_Environment
74
+ from .division import Division_Environment
75
+ from .divisor_flip_expectation import DivisorFlipExpectation_Environment
76
+ from .double_cross_counting import DoubleCrossCounting_Environment
77
+ from .double_palindromic_string_counting import DoublePalindromicStringCounting_Environment
78
+ from .double_stack_sorting import DoubleStackSorting_Environment
79
+ from .dyn_dynamite import DynDynamite_Environment
80
+ from .eight_digit_puzzle import EightDigitPuzzle_Environment
81
+ from .emperor_worries import EmperorWorries_Environment
82
+ from .energy_storage_meter import EnergyStorageMeter_Environment
83
+ from .euclid_game import EuclidGame_Environment
84
+ from .even_degree_graph_partitioning import EvenDegreeGraphPartitioning_Environment
85
+ from .expression_adding_parenthese_counting import Expression_AddingParenthese_Counting_Environment
86
+ from .face_right_way import FaceRightWay_Environment
87
+ from .factorial_trailing_zero_count import FactorialTrailingZeroCount_Environment
88
+ from .fbi_binary_tree import FBI_BinaryTree_Environment
89
+ from .fibonacci import Fibonacci_Environment
90
+ from .fibonacci_containing_counting import FibonacciContainingCounting_Environment
91
+ from .fibtrain import Fibtrain_Environment
92
+ from .firework_show import FireworkShow_Environment
93
+ from .fixed_mod_k_selection_counting import FixedModK_Selection_Counting_Environment
94
+ from .fixed_one_edge_num_spanning_tree import FixedOneEdgeNum_SpanningTree_Environment
95
+ from .fractional_programming import FractionalProgramming_Environment
96
+ from .fractional_programming_bipartite_graph_matching import FractionalProgramming_BipartiteGraphMatching_Environment
97
+ from .futoshiki_puzzle import FutoshikiPuzzle_Environment
98
+ from .gas_fire_extinguishers import GasFireExtinguishers_Environment
99
+ from .gaussian_elimination import GaussianElimination_Environment
100
+ from .gcd_fibonacci_product import GCDFibonacciProduct_Environment
101
+ from .gcd_lcm_counting import GcdLcmCounting_Environment
102
+ from .gcd_one_counting import GCDOne_Counting_Environment
103
+ from .gcd_prime_counting import GCDPrime_Counting_Environment
104
+ from .gold_washing import GoldWashing_Environment
105
+ from .gra_minima_game import GraMinimaGame_Environment
106
+ from .grade_ranking_counting import GradeRankingCounting_Environment
107
+ from .graph_contain_tree_counting import GraphContainTreeCounting_Environment
108
+ from .graph_isomorphism import GraphIsomorphism_Environment
109
+ from .grid_bfs import GridBFS_Environment
110
+ from .grid_coloring_counting import GridColoringCounting_Environment
111
+ from .grid_component import GridComponent_Environment
112
+ from .grid_local_minimum_counting import GridLocalMinimumCounting_Environment
113
+ from .grid_parity_construction import GridParityConstruction_Environment
114
+ from .grid_triangle_counting import GridTriangleCounting_Environment
115
+ from .halving_chain_counting import HalvingChainCounting_Environment
116
+ from .hamiltonian_path import HamiltonianPath_Environment
117
+ from .hamiltonian_path_existence import HamiltonianPathExistence_Environment
118
+ from .heap_counting import HeapCounting_Environment
119
+ from .hitori_puzzle import HitoriPuzzle_Environment
120
+ from .hungry_rabbit import HungryRabbit_Environment
121
+ from .hur_warehouse_store import HURWarehouseStore_Environment
122
+ from .imp_party import ImpParty_Environment
123
+ from .individual_sum_bounded_sequence_counting import IndividualSumBounded_SequenceCounting_Environment
124
+ from .integer_factorization_counting import IntegerFactorizationCounting_Environment
125
+ from .integer_programming import IntegerProgramming_Environment
126
+ from .integral import Integral_Environment
127
+ from .inversion_pair import InversionPair_Environment
128
+ from .inversion_pair_k_counting import InversionPairK_Counting_Environment
129
+ from .josephus import Josephus_Environment
130
+ from .jug_puzzle import JugPuzzle_Environment
131
+ from .k_partition import KPartition_Environment
132
+ from .kakurasu import Kakurasu_Environment
133
+ from .kidding_me import KiddingMe_Environment
134
+ from .king_sorting import KingSorting_Environment
135
+ from .klo_blocks import KloBlocks_Environment
136
+ from .knapsack import Knapsack_Environment
137
+ from .knights_and_knaves import KnightsAndKnaves_Environment
138
+ from .kos_dicing import KosDicing_Environment
139
+ from .kth_binary_tree import Kth_BinaryTree_Environment
140
+ from .kth_semi_balanced_bracket_sequence import Kth_SemiBalancedBracketSequence_Environment
141
+ from .kth_subsequence import KthSubsequence_Environment
142
+ from .kur import KUR_Environment
143
+ from .lamp_changing import LampChanging_Environment
144
+ from .land_acquisition import LandAcquisition_Environment
145
+ from .landform_generation_counting import LandformGenerationCounting_Environment
146
+ from .largest_convex_polygon import LargestConvexPolygon_Environment
147
+ from .largest_rectangle_among_points import LargestRectangle_AmongPoints_Environment
148
+ from .las import LAS_Environment
149
+ from .las_laser import LASLaser_Environment
150
+ from .lcm import LCM_Environment
151
+ from .lds_two_counting import LDSTwo_Counting_Environment
152
+ from .light_up_puzzle import LightUpPuzzle_Environment
153
+ from .link_beads import LinkBeads_Environment
154
+ from .lis_lds_concatenation import LIS_LDS_Concatenation_Environment
155
+ from .liz_lollipop import LIZ_Lollipop_Environment
156
+ from .longest_double_palindrome import Longest_DoublePalindrome_Environment
157
+ from .longest_matching_subsequence import Longest_MatchingSubsequence_Environment
158
+ from .longest_maxdiff_bounded_interval import LongestMaxDiffBoundedInterval_Environment
159
+ from .longest_path import LongestPath_Environment
160
+ from .longest_repeated_palindrome import Longest_RepeatedPalindrome_Environment
161
+ from .maf_mafia import MafMafia_Environment
162
+ from .magic_square_puzzle import MagicSquarePuzzle_Environment
163
+ from .making_grade import MakingGrade_Environment
164
+ from .matrix_binary_exponentiation import Matrix_BinaryExponentiation_Environment
165
+ from .matrix_permutation_both_diagonal_one import MatrixPermutation_BothDiagonalOne_Environment
166
+ from .matrix_permutation_equivalence import MatrixPermutationEquivalence_Environment
167
+ from .matrix_permutation_main_diagonal_one import MatrixPermutation_MainDiagonalOne_Environment
168
+ from .matrix_pooling import MatrixPooling_Environment
169
+ from .matrix_rmq_counting import MatrixRMQCounting_Environment
170
+ from .max_different_group_pair_division import MaxDifferentGroupPairDivision_Environment
171
+ from .max_grid_path_intersection import MaxGridPathIntersection_Environment
172
+ from .max_minimum_after_interval_addition import MaxMinimum_AfterIntervalAddition_Environment
173
+ from .max_mult_split import MaxMultSplit_Environment
174
+ from .max_multiplication_fixed_sum import MaxMultiplicationFixedSum_Environment
175
+ from .max_no_conflicting_bombs import MaxNoConflictingBombs_Environment
176
+ from .max_nonadjacent_k_element_sum import Max_NonAdjacent_KElementSum_Environment
177
+ from .max_permutation import MaxPermutation_Environment
178
+ from .max_rmq_expectation import MaxRMQExpectation_Environment
179
+ from .max_segment_coverage_constraint import MaxSegmentCoverageConstraint_Environment
180
+ from .max_sum_lds import MaxSumLDS_Environment
181
+ from .max_three_square_sum import MaxThreeSquareSum_Environment
182
+ from .max_tree_constrained_permutation_weight import Max_TreeConstrainedPermutation_Weight_Environment
183
+ from .max_tree_k_path_coverage import MaxTree_KPathCoverahe_Environment
184
+ from .max_tree_xor_path import MaxTreeXorPath_Environment
185
+ from .max_weight_palindromic_substring import MaxWeightPalindromicSubstring_Environment
186
+ from .max_xor_path import MaxXorPath_Environment
187
+ from .max_xor_set import MaxXorSet_Environment
188
+ from .maximum_achromatic_number import MaximumAchromaticNumber_Environment
189
+ from .maximum_clique import MaximumClique_Environment
190
+ from .maximum_divisor import MaximumDivisor_Environment
191
+ from .maximum_independent_set_grid import MaximumIndependentSetGrid_Environment
192
+ from .maximum_independent_set_tree import Maximum_IndependentSet_Tree_Environment
193
+ from .maximum_lexicographical_order_subsequence import MaximumLexicographicalOrderSubsequence_Environment
194
+ from .maximum_point_segment_matching import MaximumPointSegmentMatching_Environment
195
+ from .maximum_subsequence_num import Maximum_SubsequenceNum_Environment
196
+ from .maximum_weight_matching import MaximumWeightMatching_Environment
197
+ from .maze import Maze_Environment
198
+ from .min_conversion_to_cycle_cost import MinConversionToCycleCost_Environment
199
+ from .min_cost_reducing_lnds import MinCostReducingLNDS_Environment
200
+ from .min_cost_tree_coverage import MinCostTreeCoverage_Environment
201
+ from .min_cube_assignment import MinCubeAssignment_Environment
202
+ from .min_division_sum_xor import MinDivisionSumXor_Environment
203
+ from .min_inorder_binary_tree import MinInorderBinaryTree_Environment
204
+ from .min_kdivisor_number import MinKDivisorNumber_Environment
205
+ from .min_no_solution_linear_diophantine_equation import MinNoSolutionLinearDiophantineEquation_Environment
206
+ from .min_nonsubstring import MinNonsubstring_Environment
207
+ from .min_pairsum_multiplication_permutation import MinPairSumMultiplicationPermutation_Environment
208
+ from .min_path_cover_dag import MinPathCover_DAG_Environment
209
+ from .min_sum_chebyshev_distance import MinSumChebyshevDistance_Environment
210
+ from .min_sum_distance_square import MinSumDistanceSquare_Environment
211
+ from .min_sum_pre_xor import MinSumPreXor_Environment
212
+ from .min_swap_two_permutations import MinSwapTwoPermutations_Environment
213
+ from .min_xor_pair import MinXorPair_Environment
214
+ from .minesweeping import Minesweeping_Environment
215
+ from .minimal_cyclic_shift import MinimalCyclicShift_Environment
216
+ from .minimum_chromatic_number import MinimumChromaticNumber_Environment
217
+ from .minimum_chromatic_number_segment_overlap import MinimumChromaticNumber_SegmentOverlap_Environment
218
+ from .minimum_cost_maximum_flow import MinimumCost_MaximumFlow_Environment
219
+ from .minimum_crossing_edges_graph_partition import Minimum_CrossingEdges_GraphPartition_Environment
220
+ from .minimum_directed_spanning_tree import MinimumDirectedSpanningTree_Environment
221
+ from .minimum_dominating_interval import Minimum_DominatingInterval_Environment
222
+ from .minimum_dominating_set import Minimum_DominatingSet_Environment
223
+ from .minimum_dominating_set_grid import Minimum_DominatingSet_Grid_Environment
224
+ from .minimum_fibonacci_representation import MinimumFibonacciRepresentation_Environment
225
+ from .minimum_harmonious_chromatic_number import MinimumHarmoniousChromaticNumber_Environment
226
+ from .minimum_interval_coverage import MinimumIntervalCoverage_Environment
227
+ from .minimum_max_abs_slicer import Minimum_MaxAbsSlicer_Environment
228
+ from .minimum_max_slicer import Minimum_MaxSlicer_Environment
229
+ from .minimum_ratio_path import MinimumRatioPath_Environment
230
+ from .minimum_spanning_tree import MinimumSpanningTree_Environment
231
+ from .minimum_spanning_tree_counting import MinimumSpanningTreeCounting_Environment
232
+ from .minimum_steiner_tree import MinimumSteinerTree_Environment
233
+ from .minimum_sum_difference_submatrix import MinimumSumDifferenceSubmatrix_Environment
234
+ from .minimum_tree_weighted_dominating_ancestor import MinimumTreeWeightedDominatingAncestor_Environment
235
+ from .minimum_unconflicted_grid_kmax import MinimumUnconflictedGridKMax_Environment
236
+ from .minimum_vertex_cover import Minimum_VertexCover_Environment
237
+ from .minimum_weighted_spanning_tree import MinimumWeightedSpanningTree_Environment
238
+ from .mitter_transportation import MitterTransportation_Environment
239
+ from .mixed_graph_eulerian_circuit import MixedGraphEulerianCircuit_Environment
240
+ from .money_charging_game import MoneyChargingGame_Environment
241
+ from .monochrome_block_counting import MonochromeBlockCounting_Environment
242
+ from .monotonic_stack import MonotonicStack_Environment
243
+ from .most_component_tree_removing_two_paths import MostComponentTreeRemovingTwoPaths_Environment
244
+ from .most_num_edge_non_self_isomorphism import MostNumEdge_NonSelfIsomorphism_Environment
245
+ from .multidrink import MultiDrink_Environment
246
+ from .multiple_flipping_game import MultipleFlippingGame_Environment
247
+ from .multiplication import Multiplication_Environment
248
+ from .myj import MYJ_Environment
249
+ from .nand_result_counting import NANDResultCounting_Environment
250
+ from .negative_base import NegativeBase_Environment
251
+ from .new_nim_game import NewNimGame_Environment
252
+ from .next_palindromic import NextPalindromic_Environment
253
+ from .nine_puzzle import NinePuzzle_Environment
254
+ from .no_adjacent_girl_counting import NoAdjacentGirlCounting_Environment
255
+ from .no_double_triple_counting import NoDoubleTripleCounting_Environment
256
+ from .not_containing_string_counting import NotContainingStringCounting_Environment
257
+ from .number_partition_counting import NumberPartitionCounting_Environment
258
+ from .numbrix import Numbrix_Environment
259
+ from .odd_visitation import OddVisitation_Environment
260
+ from .odl_distance import ODLDistance_Environment
261
+ from .pair_more_one_counting import PairMoreOneCounting_Environment
262
+ from .palembang_bridges import PalembangBridges_Environment
263
+ from .palindrome_partition_counting import PalindromePartitionCounting_Environment
264
+ from .palindromic_substring_number_counting import PalindromicSubstringNumberCounting_Environment
265
+ from .pan_solar_panels import PanSolarPanels_Environment
266
+ from .path_no_going_back_counting import Path_NoGoingBack_Counting_Environment
267
+ from .patrol import Patrol_Environment
268
+ from .pcp_permutation import PCPPermutation_Environment
269
+ from .pipeline_arrangement import PipelineArrangement_Environment
270
+ from .pol_polarization import POLPolarization_Environment
271
+ from .polya_model import PolyaModel_Environment
272
+ from .polynomial_factorization import PolynomialFactorization_Environment
273
+ from .polynomial_interpolation import PolynomialInterpolation_Environment
274
+ from .polynomial_minimum import PolynomialMinimum_Environment
275
+ from .polynomial_remainder import PolynomialRemainder_Environment
276
+ from .power_cycle import PowerCycle_Environment
277
+ from .power_shortcut import PowerShortcut_Environment
278
+ from .powernest import PowerNest_Environment
279
+ from .prefix_concatenation import PrefixConcatenation_Environment
280
+ from .prefix_product_mod_distinct_permutation import PrefixProductMODDistinctPermutation_Environment
281
+ from .prefix_sum_mod_distinct_permutation import PrefixSumMODDistinctPermutation_Environment
282
+ from .prefixuffix import Prefixuffix_Environment
283
+ from .preorder_traversal import PreorderTraversal_Environment
284
+ from .prime_graph_minimum_chromatic_number import PrimeGraph_MinimumChromaticNumber_Environment
285
+ from .protecting_flowers import ProtectingFlowers_Environment
286
+ from .pythagorean_graph_independent_set_counting import PythagoreanGraph_IndependentSetCounting_Environment
287
+ from .quad_magic_items import QuadMagicItems_Environment
288
+ from .quadratic_function_segmentation import QuadraticFunctionSegmentation_Environment
289
+ from .quantum_lock_puzzle import QuantumLockPuzzle_Environment
290
+ from .queen_placement import QueenPlacement_Environment
291
+ from .random_range_max_expectation import RandomRangeMaxExpectation_Environment
292
+ from .range_constrained_increasing_sequence_counting import RangeConstrained_IncreasingSequence_Counting_Environment
293
+ from .range_four_sequence_construction import RangeFourSequenceConstruction_Environment
294
+ from .range_shrinking_sequence_counting import RangeShrinkingSequenceCounting_Environment
295
+ from .recursive_function import RecursiveFunction_Environment
296
+ from .recursive_sequence_sum_construction import RecursiveSequenceSumConstruction_Environment
297
+ from .repeat_sequence_lnds import RepeatSequenceLNDS_Environment
298
+ from .root_extraction import RootExtraction_Environment
299
+ from .round_robin import RoundRobin_Environment
300
+ from .roundtable_assignment import RoundTableAssignment_Environment
301
+ from .royal_lock_counting import RoyalLockCounting_Environment
302
+ from .salad_bar import SaladBar_Environment
303
+ from .salesman_fatigue import SalesmanFatigue_Environment
304
+ from .same_adjacency_counting import SameAdjacencyCounting_Environment
305
+ from .sat import SAT_Environment
306
+ from .scc_sequence_counting import SCC_Sequence_Counting_Environment
307
+ from .secret_cow_code import SecretCowCode_Environment
308
+ from .segment_min_length_equal_counting import SegmentMinLengthEqual_Counting_Environment
309
+ from .segment_tree_sorting_counting import SegmentTreeSortingCounting_Environment
310
+ from .self_power_sequence_mod import SelfPowerSequenceMOD_Environment
311
+ from .set_cover import SetCover_Environment
312
+ from .set_splitting import SetSplitting_Environment
313
+ from .shared_substring_counting import SharedSubstringCounting_Environment
314
+ from .shortest_path import ShortestPath_Environment
315
+ from .shortest_path_count_construction import ShortestPathCountConstruction_Environment
316
+ from .shortest_unicolor_substring import ShortestUnicolorSubstring_Environment
317
+ from .singing_girl_story import SingingGirlStory_Environment
318
+ from .single_stack_sorting import SingleStackSorting_Environment
319
+ from .ska_rock_garden import SkaRockGarden_Environment
320
+ from .skyscraper_puzzle import SkyscraperPuzzle_Environment
321
+ from .skyscraper_sum_puzzle import SkyscraperSumPuzzle_Environment
322
+ from .sliding_window import SlidingWindow_Environment
323
+ from .slo_elephants import SLOElephants_Environment
324
+ from .smallest_binary_multiple import SmallestBinaryMultiple_Environment
325
+ from .smallest_circle import SmallestCircle_Environment
326
+ from .sorting import Sorting_Environment
327
+ from .spiral_matrix import SpiralMatrix_Environment
328
+ from .splitting_game import SplittingGame_Environment
329
+ from .spy_network import SpyNetwork_Environment
330
+ from .squ_squarks import SquSquarks_Environment
331
+ from .square_undamaged_point_counting import SquareUndamagedPointCounting_Environment
332
+ from .star_battle import StarBattle_Environment
333
+ from .stirling_second import StirlingSecond_Environment
334
+ from .stone_game import StoneGame_Environment
335
+ from .stone_intervals_game import StoneIntervalsGame_Environment
336
+ from .string_partition_shuffle import StringPartitionShuffle_Environment
337
+ from .string_reversal_construction import StringReversalConstruction_Environment
338
+ from .stu_well import STUWell_Environment
339
+ from .stunt_flying import StuntFlying_Environment
340
+ from .subarray_sum_xor import SubarraySumXor_Environment
341
+ from .subarray_xor_sum import SubarrayXorSum_Environment
342
+ from .subgraph_isomorphism import SubgraphIsomorphism_Environment
343
+ from .submatrix_sum_divisible_counting import SubmatrixSumDivisibleCounting_Environment
344
+ from .subsequence_reversal_lnds import SubsequenceReversalLNDS_Environment
345
+ from .subset_sum import SubsetSum_Environment
346
+ from .subset_sum_sequence import SubsetSumSequence_Environment
347
+ from .sudoku import Sudoku_Environment
348
+ from .sum_divisor_num import Sum_DivisorNum_Environment
349
+ from .sum_gcd import SumGCD_Environment
350
+ from .sum_gcd_with_individual import SumGCDWithIndividual_Environment
351
+ from .sum_lcm import SumLCM_Environment
352
+ from .sum_manhattan_curved_surface import SumManhattan_CurvedSurface_Environment
353
+ from .sum_mod import SumMOD_Environment
354
+ from .sum_phi_interval import SumPHIInterval_Environment
355
+ from .sum_product_divisor_num import SumProductDivisorNum_Environment
356
+ from .sum_pseudo_euclidean import SumPseudoEuclidean_Environment
357
+ from .sum_set_multiplication import SumSetMultiplication_Environment
358
+ from .sum_spanning_tree_gcd import SumSpanningTreeGCD_Environment
359
+ from .sum_triangle_area import SumTriangleArea_Environment
360
+ from .sum_xor_divisor_num import SumXorDivisorNum_Environment
361
+ from .survo_puzzle import SurvoPuzzle_Environment
362
+ from .taking_prime_game import TakingPrimeGame_Environment
363
+ from .task_arrangement import TaskArrangement_Environment
364
+ from .tetris_attack import TetrisAttack_Environment
365
+ from .three_string_common_subsequence_counting import ThreeStringCommonSubsequenceCounting_Environment
366
+ from .three_vertex_cycle_counting import ThreeVertexCycleCounting_Environment
367
+ from .topological_sort import TopologicalSort_Environment
368
+ from .topological_sort_minimal_lexicographical_order import TopologicalSort_MinimalLexicographicalOrder_Environment
369
+ from .tournament_longest_path import Tournament_LongestPath_Environment
370
+ from .transmission_delay import TransmissionDelay_Environment
371
+ from .tree_add_one_edge_diameter import TreeAddOneEdgeDiameter_Environment
372
+ from .tree_center import TreeCenter_Environment
373
+ from .tree_change_one_edge_diameter import TreeChangeOneEdgeDiameter_Environment
374
+ from .tree_coloring import TreeColoring_Environment
375
+ from .tree_distance_equal_triad_counting import Tree_DistanceEqualTriad_Counting_Environment
376
+ from .tree_dynamic_xor_zero_path import TreeDynamic_XORZeroPath_Environment
377
+ from .tree_elimination_expectation import TreeElimination_Expectation_Environment
378
+ from .tree_even_partitioning import TreeEvenPartitioning_Environment
379
+ from .tree_maximum_visited_vertex import TreeMaximumVisitedVertex_Environment
380
+ from .tree_random_walk_expectation import TreeRandomWalkExpectation_Environment
381
+ from .tree_topological_sequence_counting import TreeTopologicalSequenceCounting_Environment
382
+ from .triumphal_arch import TriumphalArch_Environment
383
+ from .twiddle_puzzle import TwiddlePuzzle_Environment
384
+ from .two_sat import TwoSAT_Environment
385
+ from .two_set_all_coprime_counting import TwoSet_AllCoprime_Counting_Environment
386
+ from .undamaged_submatrix_counting import UndamagedSubmatrixCounting_Environment
387
+ from .value_diminishing_selection import ValueDiminishingSelection_Environment
388
+ from .vertex_k_center import Vertex_KCenter_Environment
389
+ from .virus_synthesis import VirusSynthesis_Environment
390
+ from .visible_line import VisibleLine_Environment
391
+ from .warehouse_construction import WarehouseConstruction_Environment
392
+ from .weighted_binarytree import WeightedBinaryTree_Environment
393
+ from .weighted_lis import WeightedLIS_Environment
394
+ from .whack_a_mole import WhackAMole_Environment
395
+ from .wil import WIL_Environment
396
+ from .wyc import WYC_Environment
397
+ from .wyr_leveling_ground import WYRLevelingGround_Environment
398
+ from .xor_equation_counting import XorEquationCounting_Environment
399
+ from .zero_prefix_subset_counting import ZeroPrefixSubsetCounting_Environment
400
+
401
+
402
+ identifier2environment = {
403
+ "ABProgramSimulation" : ABProgramSimulation_Environment,
404
+ "AddMultiple_Divisible_Counting" : AddMultiple_Divisible_Counting_Environment,
405
+ "AdditionTable" : AdditionTable_Environment,
406
+ "AlmostCompleteGraphCycleCounting" : AlmostCompleteGraphCycleCounting_Environment,
407
+ "AndOr_Sequence_Counting" : AndOr_Sequence_Counting_Environment,
408
+ "AntiPalindromicSubstringCounting" : AntiPalindromicSubstringCounting_Environment,
409
+ "Axis_KCenter" : Axis_KCenter_Environment,
410
+ "BAJBytecomputer" : BAJBytecomputer_Environment,
411
+ "BannedPointSupersetPathCounting" : BannedPointSupersetPathCounting_Environment,
412
+ "BanyanHeart" : BanyanHeart_Environment,
413
+ "BEZMinimalistSecurity" : BEZMinimalistSecurity_Environment,
414
+ "BezoutIdentity" : BezoutIdentity_Environment,
415
+ "Binario" : Binario_Environment,
416
+ "Binario_NoAdjacencyRequirement" : Binario_NoAdjacencyRequirement_Environment,
417
+ "BinaryAlternation" : BinaryAlternation_Environment,
418
+ "BinaryLinearEquation_SolutionCounting" : BinaryLinearEquation_SolutionCounting_Environment,
419
+ "BinaryTreeLeafNumExpectation" : BinaryTreeLeafNumExpectation_Environment,
420
+ "BitEquationCounting" : BitEquationCounting_Environment,
421
+ "BitAndZero_PathCounting" : BitAndZero_PathCounting_Environment,
422
+ "BitwiseOperationSequenceCounting" : BitwiseOperationSequenceCounting_Environment,
423
+ "BlockImage" : BlockImage_Environment,
424
+ "BoundedAdjacencyDifference_Permutation_Counting" : BoundedAdjacencyDifference_Permutation_Counting_Environment,
425
+ "BoundedIntervalIntersection" : BoundedIntervalIntersection_Environment,
426
+ "BoundedMeanSubarrayCounting" : BoundedMeanSubarrayCounting_Environment,
427
+ "BoundedSubarrayCounting" : BoundedSubarrayCounting_Environment,
428
+ "BoxScheduling" : BoxScheduling_Environment,
429
+ "Bridge" : Bridge_Environment,
430
+ "BubbleSwapLowerBound_PermutationCounting" : BubbleSwapLowerBound_PermutationCounting_Environment,
431
+ "BucketSorting" : BucketSorting_Environment,
432
+ "CampfireParty" : CampfireParty_Environment,
433
+ "CampsitePuzzle" : CampsitePuzzle_Environment,
434
+ "Canon" : Canon_Environment,
435
+ "CantorExpansion" : CantorExpansion_Environment,
436
+ "CapitalCityEffect" : CapitalCityEffect_Environment,
437
+ "CardColoringCounting" : CardColoringCounting_Environment,
438
+ "CatalanNumberMod" : CatalanNumberMod_Environment,
439
+ "CheckAllCycleXorZero" : CheckAllCycleXorZero_Environment,
440
+ "ChoHamsters" : ChoHamsters_Environment,
441
+ "Cinema" : Cinema_Environment,
442
+ "Circuit" : Circuit_Environment,
443
+ "CirculatingDecimalCounting" : CirculatingDecimalCounting_Environment,
444
+ "CirculatingGrid" : CirculatingGrid_Environment,
445
+ "CleaningUp" : CleaningUp_Environment,
446
+ "ClearSymmetry" : ClearSymmetry_Environment,
447
+ "Clique_IndependentSet_Partitioning_Counting" : Clique_IndependentSet_Partitioning_Counting_Environment,
448
+ "CoinSquareGame" : CoinSquareGame_Environment,
449
+ "ColoringCounting" : ColoringCounting_Environment,
450
+ "CombinationOddSubsequenceCounting" : CombinationOddSubsequenceCounting_Environment,
451
+ "ConcatenationPartitionCountingSum" : ConcatenationPartitionCountingSum_Environment,
452
+ "CongruentEquation" : CongruentEquation_Environment,
453
+ "ConstructHackInterval" : ConstructHackInterval_Environment,
454
+ "ConvexHull" : ConvexHull_Environment,
455
+ "Cornfield" : Cornfield_Environment,
456
+ "CountdownEqual" : CountdownEqual_Environment, "CountdownClose" : CountdownClose_Environment,
457
+ "CowDanceShow" : CowDanceShow_Environment,
458
+ "CRT" : CRT_Environment,
459
+ "Cryptarithmetic" : Cryptarithmetic_Environment,
460
+ "Cube_FixedLocalMaximumCounting" : Cube_FixedLocalMaximumCounting_Environment,
461
+ "CycleCounting" : CycleCounting_Environment,
462
+ "DecreasingDigitCounting" : DecreasingDigitCounting_Environment,
463
+ "DegreeFixed_SpanningTree" : DegreeFixed_SpanningTree_Environment,
464
+ "DeltaMinPopcount" : DeltaMinPopcount_Environment,
465
+ "DeltaNimGame" : DeltaNimGame_Environment,
466
+ "DerangementExtension" : DerangementExtension_Environment,
467
+ "DifferenceConstraintSystem" : DifferenceConstraintSystem_Environment,
468
+ "DifferenceConstraintSystemDAG" : DifferenceConstraintSystemDAG_Environment,
469
+ "DifferentColorPairing" : DifferentColorPairing_Environment,
470
+ "Differentiate" : Differentiate_Environment,
471
+ "DigitLISCounting" : DigitLISCounting_Environment,
472
+ "DiscreteLogarithm" : DiscreteLogarithm_Environment,
473
+ "Disinfection" : Disinfection_Environment,
474
+ "DistinctArrayPermutation" : DistinctArrayPermutation_Environment,
475
+ "DistinctEdgeColoredCompleteGraphCounting" : DistinctEdgeColoredCompleteGraphCounting_Environment,
476
+ "Division" : Division_Environment,
477
+ "DivisorFlipExpectation" : DivisorFlipExpectation_Environment,
478
+ "DoubleCrossCounting" : DoubleCrossCounting_Environment,
479
+ "DoublePalindromicStringCounting" : DoublePalindromicStringCounting_Environment,
480
+ "DoubleStackSorting" : DoubleStackSorting_Environment,
481
+ "DynDynamite" : DynDynamite_Environment,
482
+ "EightDigitPuzzle" : EightDigitPuzzle_Environment,
483
+ "EmperorWorries" : EmperorWorries_Environment,
484
+ "EnergyStorageMeter" : EnergyStorageMeter_Environment,
485
+ "EuclidGame" : EuclidGame_Environment,
486
+ "EvenDegreeGraphPartitioning" : EvenDegreeGraphPartitioning_Environment,
487
+ "Expression_AddingParenthese_Counting" : Expression_AddingParenthese_Counting_Environment,
488
+ "FaceRightWay" : FaceRightWay_Environment,
489
+ "FactorialTrailingZeroCount" : FactorialTrailingZeroCount_Environment,
490
+ "FBI_BinaryTree" : FBI_BinaryTree_Environment,
491
+ "Fibonacci" : Fibonacci_Environment,
492
+ "FibonacciContainingCounting" : FibonacciContainingCounting_Environment,
493
+ "Fibtrain" : Fibtrain_Environment,
494
+ "FireworkShow" : FireworkShow_Environment,
495
+ "FixedModK_Selection_Counting" : FixedModK_Selection_Counting_Environment,
496
+ "FixedOneEdgeNum_SpanningTree" : FixedOneEdgeNum_SpanningTree_Environment,
497
+ "FractionalProgramming" : FractionalProgramming_Environment,
498
+ "FractionalProgramming_BipartiteGraphMatching" : FractionalProgramming_BipartiteGraphMatching_Environment,
499
+ "FutoshikiPuzzle" : FutoshikiPuzzle_Environment,
500
+ "GasFireExtinguishers" : GasFireExtinguishers_Environment,
501
+ "GaussianElimination" : GaussianElimination_Environment,
502
+ "GCDFibonacciProduct" : GCDFibonacciProduct_Environment,
503
+ "GcdLcmCounting" : GcdLcmCounting_Environment,
504
+ "GCDOne_Counting" : GCDOne_Counting_Environment,
505
+ "GCDPrime_Counting" : GCDPrime_Counting_Environment,
506
+ "GoldWashing" : GoldWashing_Environment,
507
+ "GraMinimaGame" : GraMinimaGame_Environment,
508
+ "GradeRankingCounting" : GradeRankingCounting_Environment,
509
+ "GraphContainTreeCounting" : GraphContainTreeCounting_Environment,
510
+ "GraphIsomorphism" : GraphIsomorphism_Environment,
511
+ "GridBFS" : GridBFS_Environment,
512
+ "GridColoringCounting" : GridColoringCounting_Environment,
513
+ "GridComponent" : GridComponent_Environment,
514
+ "GridLocalMinimumCounting" : GridLocalMinimumCounting_Environment,
515
+ "GridParityConstruction" : GridParityConstruction_Environment,
516
+ "GridTriangleCounting" : GridTriangleCounting_Environment,
517
+ "HalvingChainCounting" : HalvingChainCounting_Environment,
518
+ "HamiltonianPath" : HamiltonianPath_Environment,
519
+ "HamiltonianPathExistence" : HamiltonianPathExistence_Environment,
520
+ "HeapCounting" : HeapCounting_Environment,
521
+ "HitoriPuzzle" : HitoriPuzzle_Environment,
522
+ "HungryRabbit" : HungryRabbit_Environment,
523
+ "HURWarehouseStore" : HURWarehouseStore_Environment,
524
+ "ImpParty" : ImpParty_Environment,
525
+ "IndividualSumBounded_SequenceCounting" : IndividualSumBounded_SequenceCounting_Environment,
526
+ "IntegerFactorizationCounting" : IntegerFactorizationCounting_Environment,
527
+ "IntegerProgramming" : IntegerProgramming_Environment,
528
+ "Integral" : Integral_Environment,
529
+ "InversionPair" : InversionPair_Environment,
530
+ "InversionPairK_Counting" : InversionPairK_Counting_Environment,
531
+ "Josephus" : Josephus_Environment,
532
+ "JugPuzzle" : JugPuzzle_Environment,
533
+ "KPartition" : KPartition_Environment,
534
+ "Kakurasu" : Kakurasu_Environment,
535
+ "KiddingMe" : KiddingMe_Environment,
536
+ "KingSorting" : KingSorting_Environment,
537
+ "KloBlocks" : KloBlocks_Environment,
538
+ "Knapsack" : Knapsack_Environment,
539
+ "KnightsAndKnaves" : KnightsAndKnaves_Environment,
540
+ "KosDicing" : KosDicing_Environment,
541
+ "Kth_BinaryTree" : Kth_BinaryTree_Environment,
542
+ "Kth_SemiBalancedBracketSequence" : Kth_SemiBalancedBracketSequence_Environment,
543
+ "KthSubsequence" : KthSubsequence_Environment,
544
+ "KUR" : KUR_Environment,
545
+ "LampChanging" : LampChanging_Environment,
546
+ "LandAcquisition" : LandAcquisition_Environment,
547
+ "LandformGenerationCounting" : LandformGenerationCounting_Environment,
548
+ "LargestConvexPolygon" : LargestConvexPolygon_Environment,
549
+ "LargestRectangle_AmongPoints" : LargestRectangle_AmongPoints_Environment,
550
+ "LAS" : LAS_Environment,
551
+ "LASLaser" : LASLaser_Environment,
552
+ "LCM" : LCM_Environment,
553
+ "LDSTwo_Counting" : LDSTwo_Counting_Environment,
554
+ "LightUpPuzzle" : LightUpPuzzle_Environment,
555
+ "LinkBeads" : LinkBeads_Environment,
556
+ "LIS_LDS_Concatenation" : LIS_LDS_Concatenation_Environment,
557
+ "LIZ_Lollipop" : LIZ_Lollipop_Environment,
558
+ "Longest_DoublePalindrome" : Longest_DoublePalindrome_Environment,
559
+ "Longest_MatchingSubsequence" : Longest_MatchingSubsequence_Environment,
560
+ "LongestMaxDiffBoundedInterval" : LongestMaxDiffBoundedInterval_Environment,
561
+ "LongestPath" : LongestPath_Environment,
562
+ "Longest_RepeatedPalindrome" : Longest_RepeatedPalindrome_Environment,
563
+ "MafMafia" : MafMafia_Environment,
564
+ "MagicSquarePuzzle" : MagicSquarePuzzle_Environment,
565
+ "MakingGrade" : MakingGrade_Environment,
566
+ "Matrix_BinaryExponentiation" : Matrix_BinaryExponentiation_Environment,
567
+ "MatrixPermutation_BothDiagonalOne" : MatrixPermutation_BothDiagonalOne_Environment,
568
+ "MatrixPermutationEquivalence" : MatrixPermutationEquivalence_Environment,
569
+ "MatrixPermutation_MainDiagonalOne" : MatrixPermutation_MainDiagonalOne_Environment,
570
+ "MatrixPooling" : MatrixPooling_Environment,
571
+ "MatrixRMQCounting" : MatrixRMQCounting_Environment,
572
+ "MaxDifferentGroupPairDivision" : MaxDifferentGroupPairDivision_Environment,
573
+ "MaxGridPathIntersection" : MaxGridPathIntersection_Environment,
574
+ "MaxMinimum_AfterIntervalAddition" : MaxMinimum_AfterIntervalAddition_Environment,
575
+ "MaxMultSplit" : MaxMultSplit_Environment,
576
+ "MaxMultiplicationFixedSum" : MaxMultiplicationFixedSum_Environment,
577
+ "MaxNoConflictingBombs" : MaxNoConflictingBombs_Environment,
578
+ "Max_NonAdjacent_KElementSum" : Max_NonAdjacent_KElementSum_Environment,
579
+ "MaxPermutation" : MaxPermutation_Environment,
580
+ "MaxRMQExpectation" : MaxRMQExpectation_Environment,
581
+ "MaxSegmentCoverageConstraint" : MaxSegmentCoverageConstraint_Environment,
582
+ "MaxSumLDS" : MaxSumLDS_Environment,
583
+ "MaxThreeSquareSum" : MaxThreeSquareSum_Environment,
584
+ "Max_TreeConstrainedPermutation_Weight" : Max_TreeConstrainedPermutation_Weight_Environment,
585
+ "MaxTree_KPathCoverage" : MaxTree_KPathCoverahe_Environment,
586
+ "MaxTreeXorPath" : MaxTreeXorPath_Environment,
587
+ "MaxWeightPalindromicSubstring" : MaxWeightPalindromicSubstring_Environment,
588
+ "MaxXorPath" : MaxXorPath_Environment,
589
+ "MaxXorSet" : MaxXorSet_Environment,
590
+ "MaximumAchromaticNumber" : MaximumAchromaticNumber_Environment,
591
+ "MaximumClique" : MaximumClique_Environment,
592
+ "MaximumDivisor" : MaximumDivisor_Environment,
593
+ "MaximumIndependentSetGrid" : MaximumIndependentSetGrid_Environment,
594
+ "Maximum_IndependentSet_Tree" : Maximum_IndependentSet_Tree_Environment,
595
+ "MaximumLexicographicalOrderSubsequence" : MaximumLexicographicalOrderSubsequence_Environment,
596
+ "MaximumPointSegmentMatching" : MaximumPointSegmentMatching_Environment,
597
+ "Maximum_SubsequenceNum" : Maximum_SubsequenceNum_Environment,
598
+ "MaximumWeightMatching" : MaximumWeightMatching_Environment,
599
+ "Maze" : Maze_Environment,
600
+ "MinConversionToCycleCost" : MinConversionToCycleCost_Environment,
601
+ "MinCostReducingLNDS" : MinCostReducingLNDS_Environment,
602
+ "MinCostTreeCoverage" : MinCostTreeCoverage_Environment,
603
+ "MinCubeAssignment" : MinCubeAssignment_Environment,
604
+ "MinDivisionSumXor" : MinDivisionSumXor_Environment,
605
+ "MinInorderBinaryTree" : MinInorderBinaryTree_Environment,
606
+ "MinKDivisorNumber" : MinKDivisorNumber_Environment,
607
+ "MinNoSolutionLinearDiophantineEquation" : MinNoSolutionLinearDiophantineEquation_Environment,
608
+ "MinNonsubstring" : MinNonsubstring_Environment,
609
+ "MinPairSumMultiplicationPermutation" : MinPairSumMultiplicationPermutation_Environment,
610
+ "MinPathCover_DAG" : MinPathCover_DAG_Environment,
611
+ "MinSumChebyshevDistance" : MinSumChebyshevDistance_Environment,
612
+ "MinSumDistanceSquare" : MinSumDistanceSquare_Environment,
613
+ "MinSumPreXor" : MinSumPreXor_Environment,
614
+ "MinSwapTwoPermutations" : MinSwapTwoPermutations_Environment,
615
+ "MinXorPair" : MinXorPair_Environment,
616
+ "Minesweeping" : Minesweeping_Environment,
617
+ "MinimalCyclicShift" : MinimalCyclicShift_Environment,
618
+ "MinimumChromaticNumber" : MinimumChromaticNumber_Environment,
619
+ "MinimumChromaticNumber_SegmentOverlap" : MinimumChromaticNumber_SegmentOverlap_Environment,
620
+ "MinimumCost_MaximumFlow" : MinimumCost_MaximumFlow_Environment,
621
+ "Minimum_CrossingEdges_GraphPartition" : Minimum_CrossingEdges_GraphPartition_Environment,
622
+ "MinimumDirectedSpanningTree" : MinimumDirectedSpanningTree_Environment,
623
+ "Minimum_DominatingInterval" : Minimum_DominatingInterval_Environment,
624
+ "Minimum_DominatingSet" : Minimum_DominatingSet_Environment,
625
+ "Minimum_DominatingSet_Grid" : Minimum_DominatingSet_Grid_Environment,
626
+ "MinimumFibonacciRepresentation" : MinimumFibonacciRepresentation_Environment,
627
+ "MinimumHarmoniousChromaticNumber" : MinimumHarmoniousChromaticNumber_Environment,
628
+ "MinimumIntervalCoverage" : MinimumIntervalCoverage_Environment,
629
+ "Minimum_MaxAbsSlicer" : Minimum_MaxAbsSlicer_Environment,
630
+ "Minimum_MaxSlicer" : Minimum_MaxSlicer_Environment,
631
+ "MinimumRatioPath" : MinimumRatioPath_Environment,
632
+ "MinimumSpanningTree" : MinimumSpanningTree_Environment,
633
+ "MinimumSpanningTreeCounting" : MinimumSpanningTreeCounting_Environment,
634
+ "MinimumSteinerTree" : MinimumSteinerTree_Environment,
635
+ "MinimumSumDifferenceSubmatrix" : MinimumSumDifferenceSubmatrix_Environment,
636
+ "MinimumTreeWeightedDominatingAncestor" : MinimumTreeWeightedDominatingAncestor_Environment,
637
+ "MinimumUnconflictedGridKMax" : MinimumUnconflictedGridKMax_Environment,
638
+ "Minimum_VertexCover" : Minimum_VertexCover_Environment,
639
+ "MinimumWeightedSpanningTree" : MinimumWeightedSpanningTree_Environment,
640
+ "MitterTransportation" : MitterTransportation_Environment,
641
+ "MixedGraphEulerianCircuit" : MixedGraphEulerianCircuit_Environment,
642
+ "MoneyChargingGame" : MoneyChargingGame_Environment,
643
+ "MonochromeBlockCounting" : MonochromeBlockCounting_Environment,
644
+ "MonotonicStack" : MonotonicStack_Environment,
645
+ "MostComponentTreeRemovingTwoPaths" : MostComponentTreeRemovingTwoPaths_Environment,
646
+ "MostNumEdge_NonSelfIsomorphism" : MostNumEdge_NonSelfIsomorphism_Environment,
647
+ "MultiDrink" : MultiDrink_Environment,
648
+ "MultipleFlippingGame" : MultipleFlippingGame_Environment,
649
+ "Multiplication" : Multiplication_Environment,
650
+ "MYJ" : MYJ_Environment,
651
+ "NANDResultCounting" : NANDResultCounting_Environment,
652
+ "NegativeBase" : NegativeBase_Environment,
653
+ "NewNimGame" : NewNimGame_Environment,
654
+ "NextPalindromic" : NextPalindromic_Environment,
655
+ "NinePuzzle" : NinePuzzle_Environment,
656
+ "NoAdjacentGirlCounting" : NoAdjacentGirlCounting_Environment,
657
+ "NoDoubleTripleCounting" : NoDoubleTripleCounting_Environment,
658
+ "NotContainingStringCounting" : NotContainingStringCounting_Environment,
659
+ "NumberPartitionCounting" : NumberPartitionCounting_Environment,
660
+ "Numbrix" : Numbrix_Environment,
661
+ "OddVisitation" : OddVisitation_Environment,
662
+ "ODLDistance" : ODLDistance_Environment,
663
+ "PairMoreOneCounting" : PairMoreOneCounting_Environment,
664
+ "PalembangBridges" : PalembangBridges_Environment,
665
+ "PalindromePartitionCounting" : PalindromePartitionCounting_Environment,
666
+ "PalindromicSubstringNumberCounting" : PalindromicSubstringNumberCounting_Environment,
667
+ "PanSolarPanels" : PanSolarPanels_Environment,
668
+ "Path_NoGoingBack_Counting" : Path_NoGoingBack_Counting_Environment,
669
+ "Patrol" : Patrol_Environment,
670
+ "PCPPermutation" : PCPPermutation_Environment,
671
+ "PipelineArrangement" : PipelineArrangement_Environment,
672
+ "POLPolarization" : POLPolarization_Environment,
673
+ "PolyaModel" : PolyaModel_Environment,
674
+ "PolynomialFactorization" : PolynomialFactorization_Environment,
675
+ "PolynomialInterpolation" : PolynomialInterpolation_Environment,
676
+ "PolynomialMinimum" : PolynomialMinimum_Environment,
677
+ "PolynomialRemainder" : PolynomialRemainder_Environment,
678
+ "PowerCycle" : PowerCycle_Environment,
679
+ "PowerShortcut" : PowerShortcut_Environment,
680
+ "PowerNest" : PowerNest_Environment,
681
+ "PrefixConcatenation" : PrefixConcatenation_Environment,
682
+ "PrefixProductMODDistinctPermutation" : PrefixProductMODDistinctPermutation_Environment,
683
+ "PrefixSumMODDistinctPermutation" : PrefixSumMODDistinctPermutation_Environment,
684
+ "Prefixuffix" : Prefixuffix_Environment,
685
+ "PreorderTraversal" : PreorderTraversal_Environment,
686
+ "PrimeGraph_MinimumChromaticNumber" : PrimeGraph_MinimumChromaticNumber_Environment,
687
+ "ProtectingFlowers" : ProtectingFlowers_Environment,
688
+ "PythagoreanGraph_IndependentSetCounting" : PythagoreanGraph_IndependentSetCounting_Environment,
689
+ "QuadMagicItems" : QuadMagicItems_Environment,
690
+ "QuadraticFunctionSegmentation" : QuadraticFunctionSegmentation_Environment,
691
+ "QuantumLockPuzzle" : QuantumLockPuzzle_Environment,
692
+ "QueenPlacement" : QueenPlacement_Environment,
693
+ "RandomRangeMaxExpectation" : RandomRangeMaxExpectation_Environment,
694
+ "RangeConstrained_IncreasingSequence_Counting" : RangeConstrained_IncreasingSequence_Counting_Environment,
695
+ "RangeFourSequenceConstruction" : RangeFourSequenceConstruction_Environment,
696
+ "RangeShrinkingSequenceCounting" : RangeShrinkingSequenceCounting_Environment,
697
+ "RecursiveFunction" : RecursiveFunction_Environment,
698
+ "RecursiveSequenceSumConstruction" : RecursiveSequenceSumConstruction_Environment,
699
+ "RepeatSequenceLNDS" : RepeatSequenceLNDS_Environment,
700
+ "RootExtraction" : RootExtraction_Environment,
701
+ "RoundRobin" : RoundRobin_Environment,
702
+ "RoundTableAssignment" : RoundTableAssignment_Environment,
703
+ "RoyalLockCounting" : RoyalLockCounting_Environment,
704
+ "SaladBar" : SaladBar_Environment,
705
+ "SalesmanFatigue" : SalesmanFatigue_Environment,
706
+ "SameAdjacencyCounting" : SameAdjacencyCounting_Environment,
707
+ "SAT" : SAT_Environment,
708
+ "SCC_Sequence_Counting" : SCC_Sequence_Counting_Environment,
709
+ "SecretCowCode" : SecretCowCode_Environment,
710
+ "SegmentMinLengthEqual_Counting" : SegmentMinLengthEqual_Counting_Environment,
711
+ "SegmentTreeSortingCounting" : SegmentTreeSortingCounting_Environment,
712
+ "SelfPowerSequenceMOD" : SelfPowerSequenceMOD_Environment,
713
+ "SetCover" : SetCover_Environment,
714
+ "SetSplitting" : SetSplitting_Environment,
715
+ "SharedSubstringCounting" : SharedSubstringCounting_Environment,
716
+ "ShortestPath" : ShortestPath_Environment,
717
+ "ShortestPathCountConstruction" : ShortestPathCountConstruction_Environment,
718
+ "ShortestUnicolorSubstring" : ShortestUnicolorSubstring_Environment,
719
+ "SingingGirlStory" : SingingGirlStory_Environment,
720
+ "SingleStackSorting" : SingleStackSorting_Environment,
721
+ "SkaRockGarden" : SkaRockGarden_Environment,
722
+ "SkyscraperPuzzle" : SkyscraperPuzzle_Environment,
723
+ "SkyscraperSumPuzzle" : SkyscraperSumPuzzle_Environment,
724
+ "SlidingWindow" : SlidingWindow_Environment,
725
+ "SLOElephants" : SLOElephants_Environment,
726
+ "SmallestBinaryMultiple" : SmallestBinaryMultiple_Environment,
727
+ "SmallestCircle" : SmallestCircle_Environment,
728
+ "Sorting" : Sorting_Environment,
729
+ "SpiralMatrix" : SpiralMatrix_Environment,
730
+ "SplittingGame" : SplittingGame_Environment,
731
+ "SpyNetwork" : SpyNetwork_Environment,
732
+ "SquSquarks" : SquSquarks_Environment,
733
+ "SquareUndamagedPointCounting" : SquareUndamagedPointCounting_Environment,
734
+ "StarBattle" : StarBattle_Environment,
735
+ "StirlingSecond" : StirlingSecond_Environment,
736
+ "StoneGame" : StoneGame_Environment,
737
+ "StoneIntervalsGame" : StoneIntervalsGame_Environment,
738
+ "StringPartitionShuffle" : StringPartitionShuffle_Environment,
739
+ "StringReversalConstruction" : StringReversalConstruction_Environment,
740
+ "STUWell" : STUWell_Environment,
741
+ "StuntFlying" : StuntFlying_Environment,
742
+ "SubarraySumXor" : SubarraySumXor_Environment,
743
+ "SubarrayXorSum" : SubarrayXorSum_Environment,
744
+ "SubgraphIsomorphism" : SubgraphIsomorphism_Environment,
745
+ "SubmatrixSumDivisibleCounting" : SubmatrixSumDivisibleCounting_Environment,
746
+ "SubsequenceReversalLNDS" : SubsequenceReversalLNDS_Environment,
747
+ "SubsetSum" : SubsetSum_Environment,
748
+ "SubsetSumSequence" : SubsetSumSequence_Environment,
749
+ "Sudoku" : Sudoku_Environment,
750
+ "Sum_DivisorNum" : Sum_DivisorNum_Environment,
751
+ "SumGCD" : SumGCD_Environment,
752
+ "SumGCDWithIndividual" : SumGCDWithIndividual_Environment,
753
+ "SumLCM" : SumLCM_Environment,
754
+ "SumManhattan_CurvedSurface" : SumManhattan_CurvedSurface_Environment,
755
+ "SumMOD" : SumMOD_Environment,
756
+ "SumPHIInterval" : SumPHIInterval_Environment,
757
+ "SumProductDivisorNum" : SumProductDivisorNum_Environment,
758
+ "SumPseudoEuclidean" : SumPseudoEuclidean_Environment,
759
+ "SumSetMultiplication" : SumSetMultiplication_Environment,
760
+ "SumSpanningTreeGCD" : SumSpanningTreeGCD_Environment,
761
+ "SumTriangleArea" : SumTriangleArea_Environment,
762
+ "SumXorDivisorNum" : SumXorDivisorNum_Environment,
763
+ "SurvoPuzzle" : SurvoPuzzle_Environment,
764
+ "TakingPrimeGame" : TakingPrimeGame_Environment,
765
+ "TaskArrangement" : TaskArrangement_Environment,
766
+ "TetrisAttack" : TetrisAttack_Environment,
767
+ "ThreeStringCommonSubsequenceCounting" : ThreeStringCommonSubsequenceCounting_Environment,
768
+ "ThreeVertexCycleCounting" : ThreeVertexCycleCounting_Environment,
769
+ "TopologicalSort" : TopologicalSort_Environment,
770
+ "TopologicalSort_MinimalLexicographicalOrder" : TopologicalSort_MinimalLexicographicalOrder_Environment,
771
+ "Tournament_LongestPath" : Tournament_LongestPath_Environment,
772
+ "TransmissionDelay" : TransmissionDelay_Environment,
773
+ "TreeAddOneEdgeDiameter" : TreeAddOneEdgeDiameter_Environment,
774
+ "TreeCenter" : TreeCenter_Environment,
775
+ "TreeChangeOneEdgeDiameter" : TreeChangeOneEdgeDiameter_Environment,
776
+ "TreeColoring" : TreeColoring_Environment,
777
+ "Tree_DistanceEqualTriad_Counting" : Tree_DistanceEqualTriad_Counting_Environment,
778
+ "TreeDynamic_XORZeroPath" : TreeDynamic_XORZeroPath_Environment,
779
+ "TreeElimination_Expectation" : TreeElimination_Expectation_Environment,
780
+ "TreeEvenPartitioning" : TreeEvenPartitioning_Environment,
781
+ "TreeMaximumVisitedVertex" : TreeMaximumVisitedVertex_Environment,
782
+ "TreeRandomWalkExpectation" : TreeRandomWalkExpectation_Environment,
783
+ "TreeTopologicalSequenceCounting" : TreeTopologicalSequenceCounting_Environment,
784
+ "TriumphalArch" : TriumphalArch_Environment,
785
+ "TwiddlePuzzle" : TwiddlePuzzle_Environment,
786
+ "TwoSAT" : TwoSAT_Environment,
787
+ "TwoSet_AllCoprime_Counting" : TwoSet_AllCoprime_Counting_Environment,
788
+ "UndamagedSubmatrixCounting" : UndamagedSubmatrixCounting_Environment,
789
+ "ValueDiminishingSelection" : ValueDiminishingSelection_Environment,
790
+ "Vertex_KCenter" : Vertex_KCenter_Environment,
791
+ "VirusSynthesis" : VirusSynthesis_Environment,
792
+ "VisibleLine" : VisibleLine_Environment,
793
+ "WarehouseConstruction" : WarehouseConstruction_Environment,
794
+ "WeightedBinaryTree" : WeightedBinaryTree_Environment,
795
+ "WeightedLIS" : WeightedLIS_Environment,
796
+ "WhackAMole" : WhackAMole_Environment,
797
+ "WIL" : WIL_Environment,
798
+ "WYC" : WYC_Environment,
799
+ "WYRLevelingGround" : WYRLevelingGround_Environment,
800
+ "XorEquationCounting" : XorEquationCounting_Environment,
801
+ "ZeroPrefixSubsetCounting" : ZeroPrefixSubsetCounting_Environment,
802
+ }
server/Gym/environments/ab_program_simulation/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import ABProgramSimulation_Environment
server/Gym/environments/ab_program_simulation/environment.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class ABProgramSimulation_Environment(VerifiableEnvironment) : # Source : https://x.com/VictorTaelin/status/1776096481704804789
7
+ prompt_template = \
8
+ r"""A::B is a system with 4 tokens: `A#`, `#A`, `B#` and `#B`.
9
+
10
+ An A::B program is a sequence of tokens, e.g., `B# A# #B #A B#`.
11
+
12
+ To *compute* a program, we must rewrite neighbor tokens, using the rules (whenever two neighbor tokens have their `#` facing each-other, they must be rewritten according to the corresponding rule) :
13
+ + `A# #A` ... becomes ... `` (nothing)
14
+ + `A# #B` ... becomes ... `#B A#`
15
+ + `B# #A` ... becomes ... `#A B#`
16
+ + `B# #B` ... becomes ... `` (nothing)
17
+
18
+ Please give the final state of the program: {program}
19
+ An example for output format: `B# A# A#`
20
+ """
21
+
22
+ def __init__(self,
23
+ wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0,
24
+ **kwargs) :
25
+ """
26
+ Initialize the AB_Program_Simulation_Environment instance.
27
+ """
28
+ super().__init__(**kwargs)
29
+
30
+ self.rewards = {
31
+ "wrong_format" : wrong_format,
32
+ "correct_answer" : correct_answer,
33
+ "wrong_answer" : wrong_answer,
34
+ }
35
+
36
+
37
+ def _generate(self) -> None :
38
+ assert "N" in self.parameter, "N is required in parameter"
39
+ N = self.parameter["N"]
40
+ assert N >= 1, "N should be greater than or equal to 1"
41
+
42
+ assert "max_steps" in self.parameter, "max_steps is required in parameter"
43
+ max_steps = self.parameter["max_steps"]
44
+ assert max_steps >= 1, "max_steps should be greater than or equal to 1"
45
+
46
+ while True :
47
+ distribution = [random.randint(1, N) for _ in range(4)]
48
+ distribution = [d / sum(distribution) for d in distribution]
49
+ self.parameter["program"] = [["A#", "#A", "B#", "#B"][i] for i in random.choices(range(4), distribution, k = N)]
50
+
51
+ current, final = self.parameter["program"].copy(), None
52
+ for step in range(max_steps) :
53
+ new_program = None
54
+
55
+ for i in range(len(current) - 1) :
56
+ a, b = current[i], current[i + 1]
57
+ if a == "A#" and b == "#A" :
58
+ new_program = current[: i] + current[i + 2 :]
59
+ elif a == "A#" and b == "#B" :
60
+ new_program = current[: i] + ["#B", "A#"] + current[i + 2 :]
61
+ elif a == "B#" and b == "#A" :
62
+ new_program = current[: i] + ["#A", "B#"] + current[i + 2 :]
63
+ elif a == "B#" and b == "#B" :
64
+ new_program = current[: i] + current[i + 2 :]
65
+ if new_program is not None:
66
+ break
67
+
68
+ if new_program is None :
69
+ final = current
70
+ break
71
+ else :
72
+ current = new_program
73
+
74
+ if final is not None :
75
+ self.parameter["reference_answer"] = " ".join(final)
76
+ self.parameter["gold_answer"] = final
77
+ break
78
+
79
+
80
+ def _prompt_generate(self) -> str :
81
+ return self.prompt_template.format(program = " ".join(self.parameter["program"]))
82
+
83
+
84
+ def _process(self, answer : Optional[str]) -> Optional[List] :
85
+ if answer is not None :
86
+ answer = answer.strip()
87
+ try :
88
+ answer_array = answer.split()
89
+ return answer_array
90
+ except ValueError :
91
+ return None # Invalid answer format
92
+ else :
93
+ return None # Invalid answer format
94
+
95
+
96
+ def scorer(self, output : str) -> float :
97
+ processed_result = self.processor(output)
98
+ if processed_result is not None :
99
+ assert isinstance(processed_result, list), "processed_result should be a list"
100
+
101
+ if not all(token in ("A#", "#A", "B#", "#B") for token in processed_result) :
102
+ return self.rewards["wrong_format"]
103
+
104
+ if processed_result == self.parameter["gold_answer"] :
105
+ return self.rewards["correct_answer"]
106
+ else :
107
+ return self.rewards["wrong_answer"]
108
+ else :
109
+ return self.rewards["wrong_format"]
server/Gym/environments/add_multiple_divisible_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import AddMultiple_Divisible_Counting_Environment
server/Gym/environments/add_multiple_divisible_counting/environment.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import random
3
+ from typing import Optional
4
+ from ...environment import VerifiableEnvironment
5
+
6
+
7
+ class AddMultiple_Divisible_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4466
8
+ prompt_template = \
9
+ r"""Please compute the number of pairs (a, b) such that:
10
+ - 1 ≤ a < b ≤ {N}
11
+ - a × b is divisible by a + b
12
+
13
+ **Output Format:** Your final answer should be a single integer — the number of such pairs (a, b)."""
14
+
15
+ def __init__(self,
16
+ wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
17
+ **kwargs) :
18
+ """
19
+ Initialize the AddMultiple_Divisible_Counting_Environment instance.
20
+ """
21
+ super().__init__(**kwargs)
22
+
23
+ self.rewards = {
24
+ "wrong_format" : wrong_format,
25
+ "rewarding_strategy" : rewarding_strategy,
26
+ "rewarding_weight" : rewarding_weight,
27
+ "rewarding_beta" : rewarding_beta,
28
+ }
29
+
30
+
31
+ def _generate(self) -> None :
32
+ assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
33
+ MAX_N = self.parameter["MAX_N"]
34
+ assert MAX_N >= 6, "MAX_N should be greater than or equal to 6"
35
+
36
+ N = self.parameter["N"] = random.randint(6, MAX_N)
37
+
38
+
39
+ def calc(x : int, y : int) -> int :
40
+ """
41
+ Compute
42
+ sum_{k = x+1..2*x-1} floor(y / k)
43
+ by grouping k’s with the same quotient.
44
+ """
45
+ if y == 0 :
46
+ return 0
47
+ a = 0
48
+ z = x << 1
49
+ i = x + 1
50
+ while i < z :
51
+ q = y // i
52
+ if q == 0 :
53
+ break
54
+ j = min(y // q, z - 1)
55
+ a += (j - i + 1) * q
56
+ i = j + 1
57
+ return a
58
+
59
+ m = math.isqrt(N)
60
+
61
+ mu = [0] * (m + 1)
62
+ mu[1] = 1
63
+ is_comp = [False] * (m + 1)
64
+ primes = []
65
+
66
+ for i in range(2, m + 1) :
67
+ if not is_comp[i] :
68
+ primes.append(i)
69
+ mu[i] = -1
70
+ for p in primes :
71
+ ip = i * p
72
+ if ip > m :
73
+ break
74
+ is_comp[ip] = True
75
+ if i % p == 0 :
76
+ mu[ip] = 0
77
+ break
78
+ else :
79
+ mu[ip] = -mu[i]
80
+
81
+ ans = 0
82
+ for i in range(1, m + 1) :
83
+ if mu[i] == 0 :
84
+ continue
85
+ ii = i * i
86
+ top = m // i
87
+ for j in range(1, top + 1) :
88
+ y = N // (ii * j)
89
+ ans += mu[i] * calc(j, y)
90
+ assert ans > 0, "Answer should be greater than 0"
91
+ self.parameter["reference_answer"] = ans
92
+
93
+ def _prompt_generate(self) -> str :
94
+ return self.prompt_template.format(N = self.parameter["N"])
95
+
96
+
97
+ def _process(self, answer : Optional[str]) -> Optional[int] :
98
+ if answer is not None :
99
+ answer = answer.strip()
100
+ try :
101
+ int_answer = int(answer)
102
+ return int_answer
103
+ except ValueError :
104
+ return None
105
+ else :
106
+ return None
107
+
108
+ def scorer(self, output : str) -> float :
109
+ processed_result = self.processor(output)
110
+ if processed_result is not None :
111
+ if processed_result <= 0 :
112
+ return self.rewards["wrong_format"]
113
+
114
+ if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
115
+ a, b = self.parameter["reference_answer"], processed_result
116
+ return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
117
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
118
+ return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
119
+ else :
120
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
121
+ else :
122
+ return self.rewards["wrong_format"]
server/Gym/environments/addition_table/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import AdditionTable_Environment
server/Gym/environments/addition_table/environment.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, Dict
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class AdditionTable_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1013
7
+ prompt_template = \
8
+ r"""You are given an unknown base-N number system (N is an integer ≥ 3), and {N} distinct digits {ALL_LETTERS} in that system. The digits satisfy the following equations in base-N:
9
+
10
+ {EQUATIONS}
11
+
12
+ Note:
13
+ - {ALL_LETTERS} are distinct digits in the range [0, N−1].
14
+ - Expressions like ba represent base-N numbers formed by **concatenation**. For example, if a=1 and b=2, then ba = "21" in base-N.
15
+
16
+ Your task is to find the correct base N (in decimal), and the values of {ALL_LETTERS} (also in decimal) that satisfy all the equations.
17
+
18
+ Output Format:
19
+ Your final answer should be a single line containing N, {ALL_LETTERS} (all in decimal), separated by **spaces**.
20
+ Example: `{N_plus_1} {EXAMPLE_1}` (do **NOT** include the backticks or quotes); this means N={N_plus_1}, {EXAMPLE_2}.
21
+ """
22
+
23
+ def __init__(self,
24
+ wrong_format : float = -1.0, invalid_answer : float = -0.5, wrong_N : float = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0,
25
+ **kwargs) :
26
+ """
27
+ Initialize the AdditionTable_Environment instance.
28
+ """
29
+ super().__init__(**kwargs)
30
+
31
+ self.rewards = {
32
+ "wrong_format" : wrong_format,
33
+ "invalid_answer" : invalid_answer,
34
+ "wrong_N" : wrong_N,
35
+ "rewarding_strategy" : rewarding_strategy,
36
+ "rewarding_weight" : rewarding_weight,
37
+ "rewarding_beta" : rewarding_beta,
38
+ }
39
+
40
+
41
+ def _generate(self) -> None :
42
+ assert "N" in self.parameter, "N is required in parameter"
43
+ N = self.parameter["N"]
44
+ assert N in range(3, 26 + 1), "N should be in the range [3, 26]"
45
+
46
+ digit2letter = self.parameter["digit2letter"] = [chr(i) for i in range(97, 97 + N)]
47
+ random.shuffle(digit2letter)
48
+
49
+ letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)}
50
+ self.parameter["reference_answer"] = "{} {}".format(N, " ".join([str(letter2digit[chr(i)]) for i in range(97, 97 + N)]))
51
+
52
+
53
+ def convert_to_expression(self, n : int) -> str :
54
+ N = self.parameter["N"]
55
+
56
+ if n == 0 :
57
+ return self.parameter["digit2letter"][0]
58
+ else :
59
+ expression = ""
60
+ while n > 0 :
61
+ digit = n % N
62
+ expression = self.parameter["digit2letter"][digit] + expression
63
+ n //= N
64
+ return expression
65
+
66
+
67
+ def _prompt_generate(self) -> str :
68
+ N = self.parameter["N"]
69
+ ALL_LETTERS = ", ".join([chr(i) for i in range(97, 97 + N)])
70
+
71
+ digit2letter = self.parameter["digit2letter"]
72
+ letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)}
73
+
74
+ EQUATIONS = []
75
+ for a_ascii in range(97, 97 + N) :
76
+ for b_ascii in range(a_ascii, 97 + N) :
77
+ a = chr(a_ascii)
78
+ b = chr(b_ascii)
79
+ EQUATIONS.append("{} + {} = {}".format(a, b, self.convert_to_expression(letter2digit[a] + letter2digit[b])))
80
+ EQUATIONS = "\n".join(EQUATIONS)
81
+
82
+ return self.prompt_template.format(
83
+ ALL_LETTERS = ALL_LETTERS,
84
+ EQUATIONS = EQUATIONS,
85
+ N = N,
86
+ N_plus_1 = N + 1,
87
+ EXAMPLE_1 = " ".join([str(_) for _ in range(N)]),
88
+ EXAMPLE_2 = ", ".join(["{}={}".format(chr(i), i - 97) for i in range(97, 97 + N)]),
89
+ )
90
+
91
+
92
+ def _process(self, answer : Optional[str]) -> Optional[Dict] :
93
+ if answer is not None :
94
+ answer = answer.strip()
95
+ try :
96
+ answer_array = list(map(int, answer.split()))
97
+ if len(answer_array) != self.parameter["N"] + 1 :
98
+ return dict()
99
+ N = answer_array[0]
100
+ digits = answer_array[1 :]
101
+ return dict(N = N, digits = digits)
102
+ except ValueError :
103
+ return dict()
104
+ else :
105
+ return None
106
+
107
+
108
+ def scorer(self, output : str) -> float :
109
+ processed_result = self.processor(output)
110
+ if processed_result is not None :
111
+ if not processed_result :
112
+ return self.rewards["invalid_answer"]
113
+
114
+ N = processed_result["N"]
115
+ if N != self.parameter["N"] :
116
+ return self.rewards["wrong_N"]
117
+
118
+ predict_digits = processed_result["digits"]
119
+ assert len(predict_digits) == N, "digits should have the same length as N"
120
+
121
+ letter2digit = {letter : digit for digit, letter in enumerate(self.parameter["digit2letter"])}
122
+ assert len(letter2digit) == N, "letter2digit should have the same length as N"
123
+ gold_digits = [letter2digit[chr(i)] for i in range(97, 97 + N)]
124
+
125
+ if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" :
126
+ return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(gold_digits, predict_digits)) / N) ** self.rewards["rewarding_beta"])
127
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
128
+ return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(gold_digits, predict_digits))
129
+ else :
130
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
131
+ else :
132
+ return self.rewards["wrong_format"]
server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import AlmostCompleteGraphCycleCounting_Environment
server/Gym/environments/almost_complete_graph_cycle_counting/environment.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class AlmostCompleteGraphCycleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3862
7
+ prompt_template = \
8
+ r"""Consider a graph with {N} vertices labeled from 1 to {N}. Every pair of vertices is connected by an undirected edge, except for the edge between vertices 1 and {N} (so the graph has {N} × ({N} - 1) / 2 - 1 edges).
9
+
10
+ What's the number of **simple cycles** in this graph? A simple cycle must:
11
+ - Have at least 3 vertices,
12
+ - Contain no repeated vertices or edges,
13
+ - Be considered the same as any cycle with the same set of edges (regardless of order or starting point); for example, `(1, 2, 3, 4)` and `(2, 1, 4, 3)` are the same, but `(1, 2, 3, 4)` and `(2, 1, 3, 4)` are different.
14
+ Output the answer modulo {MOD}."""
15
+
16
+ def __init__(self,
17
+ max_MOD : int = 1000000,
18
+ wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
19
+ **kwargs) :
20
+ """
21
+ Initialize the AlmostCompleteGraphCycleCounting_Environment instance.
22
+ """
23
+ super().__init__(**kwargs)
24
+
25
+ self.max_MOD = max_MOD
26
+ self.rewards = {
27
+ "wrong_format" : wrong_format,
28
+ "wrong_range" : wrong_range,
29
+ "correct_answer" : correct_answer,
30
+ "wrong_answer" : wrong_answer,
31
+ }
32
+
33
+
34
+ def _generate(self) -> None :
35
+ assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
36
+ MAX_N = self.parameter["MAX_N"]
37
+ assert MAX_N >= 4, "MAX_N should be greater than or equal to 4"
38
+
39
+ N = self.parameter["N"] = random.randint(4, MAX_N)
40
+
41
+ MOD = self.parameter["MOD"] = 2 * random.randint(1, self.max_MOD // 2) + 1
42
+
43
+
44
+ INV2 = (MOD + 1) // 2
45
+
46
+ def calc(x, y, s, N):
47
+ """
48
+ x: current count of cycles for K_s
49
+ y: current count of paths of length 1 (one edge) in K_s
50
+ s: starting i value (we've precomputed up to K_s)
51
+ N: target N
52
+ """
53
+ for i in range(s, N):
54
+ # compute ((i-1)*(i-2)/2) % MOD efficiently
55
+ half = ((i - 1) % MOD) * ((i - 2) % MOD) % MOD * INV2 % MOD
56
+ x = (x + y * half) % MOD
57
+ y = (y * ((i - 2) % MOD) + 1) % MOD
58
+ # finally add the contribution for closing the cycle at N
59
+ half_n = ((N - 2) % MOD) * ((N - 3) % MOD) % MOD * INV2 % MOD
60
+ return (x + y * half_n) % MOD
61
+
62
+ if N <= 3 :
63
+ self.parameter["reference_answer"] = 0
64
+ else :
65
+ self.parameter["reference_answer"] = calc(1, 2, 4, N)
66
+
67
+
68
+ def _prompt_generate(self) -> str :
69
+ return self.prompt_template.format(N = self.parameter["N"], MOD = self.parameter["MOD"])
70
+
71
+
72
+ def _process(self, answer : Optional[str]) -> Optional[int] :
73
+ if answer is not None :
74
+ answer = answer.strip()
75
+ try :
76
+ int_answer = int(answer)
77
+ return int_answer
78
+ except ValueError :
79
+ return None
80
+ else :
81
+ return None
82
+
83
+
84
+ def scorer(self, output : str) -> float :
85
+ processed_result = self.processor(output)
86
+ if processed_result is not None :
87
+ if not (0 <= processed_result < self.parameter["MOD"]) :
88
+ return self.rewards["wrong_range"]
89
+ if processed_result == self.parameter["reference_answer"] :
90
+ return self.rewards["correct_answer"]
91
+ else :
92
+ return self.rewards["wrong_answer"]
93
+ else :
94
+ return self.rewards["wrong_format"]
server/Gym/environments/and_or_sequence_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import AndOr_Sequence_Counting_Environment
server/Gym/environments/and_or_sequence_counting/environment.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class AndOr_Sequence_Counting_Environment(VerifiableEnvironment) :
7
+ prompt_template = \
8
+ r"""You are given an integer array `A` of length {N}:
9
+ {A}
10
+
11
+ Please count the number of valid integer arrays `B` of length {N} that satisfy the following conditions:
12
+ - For all indices 0 <= i <= {N_minus_1}, the value B[i] must be in the range: 0 <= B[i] < 2^{M} = {power_2_M}
13
+ - For all indices 0 <= i < {N_minus_1}, the following bitwise conditions hold:
14
+ - (A[i] & B[i]) <= (A[i + 1] & B[i + 1])
15
+ - (A[i] | B[i]) >= (A[i + 1] | B[i + 1])
16
+ - (Here, `&` is the bitwise AND operator and `|` is the bitwise OR operator.)
17
+
18
+ **Output Format:** Your final answer should be a single integer — the number of valid arrays `B` that satisfy all the above conditions."""
19
+
20
+ def __init__(self,
21
+ wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
22
+ **kwargs) :
23
+ """
24
+ Initialize the AndOr_Sequence_Counting_Environment instance.
25
+ """
26
+ super().__init__(**kwargs)
27
+
28
+ self.rewards = {
29
+ "wrong_format" : wrong_format,
30
+ "rewarding_strategy" : rewarding_strategy,
31
+ "rewarding_weight" : rewarding_weight,
32
+ "rewarding_beta" : rewarding_beta,
33
+ }
34
+
35
+ def _generate(self) -> None :
36
+ assert "N" in self.parameter, "N is required in parameter"
37
+ N = self.parameter["N"]
38
+ assert N >= 2, "N should be greater than or equal to 2"
39
+
40
+ assert "M" in self.parameter, "M is required in parameter"
41
+ M = self.parameter["M"]
42
+ assert M >= 1, "M should be greater than or equal to 1"
43
+
44
+ A = self.parameter["A"] = [random.randint(0, 2 ** M - 1) for i in range(N)]
45
+
46
+
47
+ def dp1(N, M, A) :
48
+ F = [[[0] * N for _ in range(N)] for _ in range(2)]
49
+ for l in range(N) :
50
+ for r in range(l, N) :
51
+ F[1][l][r] = 1
52
+
53
+ for b in range(M + 1) :
54
+ now = b % 2
55
+ lst = now ^ 1
56
+
57
+ for i in range(N) :
58
+ for j in range(N) :
59
+ F[now][i][j] = 0
60
+
61
+ Pre = [0] * (N + 1)
62
+ for i in range(1, N + 1) :
63
+ Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1)
64
+
65
+ for l in range(N) :
66
+ for r in range(l, N) :
67
+ for x in range(l - 1, r + 1) :
68
+ if Pre[r + 1] - Pre[x + 1] != (r - x) :
69
+ continue
70
+
71
+ left_count = F[lst][l][x] if x >= l else 1
72
+ right_count = F[lst][x + 1][r] if x+1 <= r else 1
73
+ F[now][l][r] += left_count * right_count
74
+
75
+ return F[M % 2][0][N - 1]
76
+
77
+ def dp2(N, M, A) :
78
+ F = [[[0] * N for _ in range(N)] for _ in range(2)]
79
+ for l in range(N) :
80
+ for r in range(l, N) :
81
+ F[1][l][r] = 1
82
+
83
+ for b in range(M + 1) :
84
+ now = b % 2
85
+ lst = now ^ 1
86
+ for i in range(N) :
87
+ for j in range(N) :
88
+ F[now][i][j] = 0
89
+
90
+ Pre = [0] * (N + 1)
91
+ for i in range(1, N + 1) :
92
+ Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1)
93
+
94
+ for l in range(N) :
95
+ for r in range(l, N) :
96
+ for x in range(l - 1, r + 1) :
97
+ if Pre[r + 1] - Pre[x + 1] != 0:
98
+ continue
99
+
100
+ left_count = F[lst][l][x] if x >= l else 1
101
+ right_count = F[lst][x + 1][r] if x + 1 <= r else 1
102
+ F[now][l][r] += left_count * right_count
103
+
104
+ return F[M % 2][0][N - 1]
105
+
106
+ self.parameter["reference_answer"] = dp1(N, M - 1, A) * dp2(N, M - 1, A)
107
+
108
+ def _prompt_generate(self) -> str :
109
+ N, M = self.parameter["N"], self.parameter["M"]
110
+ return self.prompt_template.format(
111
+ N = self.parameter["N"],
112
+ N_minus_1 = self.parameter["N"] - 1,
113
+ M = self.parameter["M"],
114
+ power_2_M = 2 ** self.parameter["M"],
115
+ A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])),
116
+ )
117
+
118
+
119
+ def _process(self, answer : Optional[str]) -> Optional[int] :
120
+ if answer is not None :
121
+ answer = answer.strip()
122
+ try :
123
+ int_answer = int(answer)
124
+ return int_answer
125
+ except ValueError :
126
+ return None
127
+ else :
128
+ return None
129
+
130
+ def scorer(self, output : str) -> float :
131
+ processed_result = self.processor(output)
132
+ if processed_result is not None :
133
+ if processed_result < 0 :
134
+ return self.rewards["wrong_format"]
135
+
136
+ if self.parameter["reference_answer"] == 0 :
137
+ return self.rewards["rewarding_weight"] * (processed_result == 0)
138
+
139
+ if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
140
+ a, b = self.parameter["reference_answer"], processed_result
141
+ return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
142
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
143
+ return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
144
+ else :
145
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
146
+ else :
147
+ return self.rewards["wrong_format"]
server/Gym/environments/anti_palindromic_substring_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import AntiPalindromicSubstringCounting_Environment
server/Gym/environments/anti_palindromic_substring_counting/environment.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class AntiPalindromicSubstringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3501
7
+ prompt_template = \
8
+ r"""We define an **anti-palindromic binary string** as a binary string such that its reverse is equal to the bitwise complement of the original string (i.e., '0' becomes '1' and '1' becomes '0'). For example, `000111` is anti-palindromic because its reverse is `111000`, which is the bitwise complement of `000111`. But `1001` is not, because its reverse is `1001`, while its flipped version is `0110`.
9
+
10
+ You are given a binary string: {S}
11
+ Please count the number of **contiguous substrings** of `S` that are anti-palindromic. Two substrings are considered different if they appear at different positions in `S`. Output a single integer — the number of anti-palindromic substrings."""
12
+
13
+ def __init__(self,
14
+ wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
15
+ **kwargs) :
16
+ """
17
+ Initialize the AntiPalindromicSubstringCounting_Environment instance.
18
+ """
19
+ super().__init__(**kwargs)
20
+
21
+ self.rewards = {
22
+ "wrong_format" : wrong_format,
23
+ "rewarding_strategy" : rewarding_strategy,
24
+ "rewarding_weight" : rewarding_weight,
25
+ "rewarding_beta" : rewarding_beta,
26
+ }
27
+
28
+
29
+ def _generate(self) -> None :
30
+ assert "N" in self.parameter, "N is required in parameter"
31
+ N = self.parameter["N"]
32
+ assert N >= 3, "N should be greater than or equal to 3"
33
+
34
+ endpoints = random.sample(range(1, N), random.randint(0, N - 1))
35
+ endpoints.sort()
36
+ endpoints = [0] + endpoints + [N]
37
+
38
+ one_probability = random.random()
39
+
40
+ S = ""
41
+ for i in range(len(endpoints) - 1) :
42
+ length = endpoints[i + 1] - endpoints[i]
43
+ if length % 2 == 0 :
44
+ half = "".join("1" if random.random() < one_probability else "0" for _ in range(length // 2))
45
+ S += half + "".join("1" if c == "0" else "0" for c in reversed(half))
46
+ else :
47
+ S += "".join("1" if random.random() < one_probability else "0" for _ in range(length))
48
+ self.parameter["S"] = S
49
+ assert len(S) == N, f"Generated string length {len(S)} does not match N {N}"
50
+
51
+
52
+ # Build the “S” array from the C++:
53
+ # S[0] = '$', S[1] = '#', then for each char: c, '#', and finally a trailing '$'
54
+ T = ['$','#']
55
+ for c in S:
56
+ T.append(c)
57
+ T.append('#')
58
+ T.append('$')
59
+
60
+ length = len(T)
61
+ tot = length - 2 # corresponds to C++ `tot` (1 + 2*N)
62
+
63
+ # P[i] will hold the Manacher‐style radius at center i
64
+ P = [0] * length
65
+
66
+ # inversion map for the 0/1 bits and the separator '#'
67
+ inv = {'0':'1', '1':'0', '#':'#'}
68
+
69
+ pos = 1 # center of the rightmost-reaching antisymmetry
70
+ mx = 1 # its right boundary = pos + P[pos]
71
+ ans = 0
72
+
73
+ # only odd i (the '#' positions) correspond to even‐length substrings
74
+ for i in range(1, tot+1, 2):
75
+ if i < mx:
76
+ mirror = 2*pos - i
77
+ # same as: len[i] = min(mx - i, len[mirror])
78
+ P[i] = min(mx - i, P[mirror])
79
+ else:
80
+ P[i] = 1
81
+
82
+ # expand as long as T[i + P] == inv[T[i - P]]
83
+ while True:
84
+ left = i - P[i]
85
+ right = i + P[i]
86
+ # boundary guard
87
+ if left < 0 or right >= length:
88
+ break
89
+ # must both be in our inv‐map (i.e. '#','0','1')
90
+ cL = T[left]
91
+ cR = T[right]
92
+ if cL not in inv or cR not in inv:
93
+ break
94
+ if cR == inv[cL]:
95
+ P[i] += 1
96
+ else:
97
+ break
98
+
99
+ # update the farthest-reaching center
100
+ if i + P[i] > mx:
101
+ mx = i + P[i]
102
+ pos = i
103
+
104
+ # each full two‐step in the radius == one antisymmetric substring
105
+ ans += (P[i] >> 1)
106
+
107
+ self.parameter["reference_answer"] = ans
108
+
109
+
110
+ def _prompt_generate(self) -> str :
111
+ return self.prompt_template.format(S = self.parameter["S"])
112
+
113
+
114
+ def _process(self, answer : Optional[str]) -> Optional[int] :
115
+ if answer is not None :
116
+ answer = answer.strip()
117
+ try :
118
+ int_answer = int(answer)
119
+ return int_answer
120
+ except ValueError :
121
+ return None
122
+ else :
123
+ return None
124
+
125
+
126
+ def scorer(self, output : str) -> float :
127
+ processed_result = self.processor(output)
128
+ if processed_result is not None :
129
+ if processed_result < 0 :
130
+ return self.rewards["wrong_format"]
131
+
132
+ if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
133
+ if self.parameter["reference_answer"] == 0 :
134
+ return self.rewards["rewarding_weight"] * int(processed_result == 0)
135
+ a, b = self.parameter["reference_answer"], processed_result
136
+ return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
137
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
138
+ return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
139
+ else :
140
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
141
+ else :
142
+ return self.rewards["wrong_format"]
server/Gym/environments/axis_k_center/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import Axis_KCenter_Environment
server/Gym/environments/axis_k_center/environment.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class Axis_KCenter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/K4767
7
+ prompt_template = \
8
+ r"""You are given {N} points on a line, labeled from 0 to {N_minus_1}. Their positions (from left to right) are: {X}
9
+
10
+ Please select a set of {K} distinct points. Try your best to minimize the total distance from all points to their nearest selected point (the distance is the absolute difference between positions).
11
+
12
+ **Output Format:** Your final answer should be a single line containing the indices of the selected {K} points in any order, separated by spaces."""
13
+
14
+ def __init__(self,
15
+ position_multiple : int = 5,
16
+ wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
17
+ **kwargs) :
18
+ """
19
+ Initialize the Axis_KCenter_Environment instance.
20
+ """
21
+ super().__init__(**kwargs)
22
+
23
+ self.position_multiple = position_multiple
24
+
25
+ self.rewards = {
26
+ "wrong_format" : wrong_format,
27
+ "invalid_solution" : invalid_solution,
28
+ "rewarding_strategy" : rewarding_strategy,
29
+ "rewarding_weight" : rewarding_weight,
30
+ "rewarding_beta" : rewarding_beta,
31
+ }
32
+
33
+
34
+ def _generate(self) -> None :
35
+ assert "N" in self.parameter, "N is required in parameter"
36
+ N = self.parameter["N"]
37
+ assert N >= 3, "N should be greater than or equal to 3"
38
+
39
+ K = self.parameter["K"] = random.randint(1, N - 1)
40
+
41
+ X = self.parameter["X"] = random.sample(range(N * self.position_multiple + 1), N)
42
+ X.sort()
43
+
44
+
45
+ INF = N * (X[-1] - X[0] + 1)
46
+
47
+ # Krecompute w[l][r]: cost of one post office for villages l..r (inclusive, 0-indexed)
48
+ w = [[0] * N for _ in range(N)]
49
+ for l in range(N):
50
+ for r in range(l + 1, N):
51
+ m = (l + r) // 2
52
+ w[l][r] = w[l][r - 1] + (X[r] - X[m])
53
+
54
+ # dp[i][j]: minimum total distance covering the first i villages with j post offices
55
+ dp = [[INF] * (K + 1) for _ in range(N + 1)]
56
+ # d[i][j]: the k giving the optimum for dp[i][j], for Knuth optimization
57
+ d = [[0] * (K + 1) for _ in range(N + 2)]
58
+
59
+ dp[0][0] = 0
60
+
61
+ for j in range(1, K + 1):
62
+ d[N + 1][j] = N
63
+ for i in range(N, 0, -1):
64
+ best = INF
65
+ argk = 0
66
+ start = d[i][j - 1]
67
+ end = d[i + 1][j]
68
+ if end > i - 1:
69
+ end = i - 1
70
+ for k in range(start, end + 1):
71
+ cost = dp[k][j - 1] + w[k][i - 1]
72
+ if cost < best:
73
+ best = cost
74
+ argk = k
75
+ dp[i][j] = best
76
+ d[i][j] = argk
77
+
78
+ # Output the result: all N villages with K post offices
79
+ self.parameter["gold_answer"] = dp[N][K]
80
+
81
+
82
+ def _prompt_generate(self) -> str :
83
+ N = self.parameter["N"]
84
+ return self.prompt_template.format(
85
+ N = N,
86
+ N_minus_1 = N - 1,
87
+ K = self.parameter["K"],
88
+ X = " ".join(map(str, self.parameter["X"])),
89
+ )
90
+
91
+
92
+ def _process(self, answer : Optional[str]) -> Optional[List] :
93
+ if answer is not None :
94
+ answer = answer.strip()
95
+ try :
96
+ answer_array = list(map(int, answer.split()))
97
+ return answer_array
98
+ except ValueError :
99
+ return None
100
+ else :
101
+ return None
102
+
103
+
104
+ def scorer(self, output : str) -> float :
105
+ processed_result = self.processor(output)
106
+ if processed_result is not None :
107
+ assert isinstance(processed_result, list), "processed_result should be a list"
108
+
109
+ selected_points = processed_result
110
+
111
+ if len(selected_points) != len(set(selected_points)) :
112
+ return self.rewards["invalid_solution"]
113
+ if len(selected_points) != self.parameter["K"] :
114
+ return self.rewards["invalid_solution"]
115
+ if not all(0 <= u < self.parameter["N"] for u in selected_points) :
116
+ return self.rewards["invalid_solution"]
117
+
118
+ answer = sum(min(abs(self.parameter["X"][u] - self.parameter["X"][v]) for v in selected_points) for u in range(self.parameter["N"]))
119
+ gold = self.parameter["gold_answer"]
120
+ assert gold <= answer, "gold should be less than or equal to answer"
121
+
122
+ if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
123
+ return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"])
124
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
125
+ return self.rewards["rewarding_weight"] * (gold == answer)
126
+ else :
127
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
128
+ else :
129
+ return self.rewards["wrong_format"]
server/Gym/environments/baj_bytecomputer/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BAJBytecomputer_Environment
server/Gym/environments/baj_bytecomputer/environment.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BAJBytecomputer_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3558
7
+ prompt_template = \
8
+ r"""You are given an array X of length {N}, where each element is initially -1, 0, or +1: {X}
9
+ You may perform the following operation any number of times: choose an index i (1 ≤ i < {N}), and update X[i + 1] := X[i + 1] + X[i]. Your goal is to make the array non-decreasing, i.e., X[1] ≤ X[2] ≤ ... ≤ X[{N}]; please output the **minimum number of operations** required to achieve this."""
10
+
11
+ def __init__(self,
12
+ wrong_format : float = -1.0, correct_answer : float = 1.0, incorrect_answer : float = 0.0,
13
+ **kwargs):
14
+ """
15
+ Initialize the BAJBytecomputer_Environment instance.
16
+ """
17
+ super().__init__(**kwargs)
18
+
19
+ self.rewards = {
20
+ "wrong_format": wrong_format,
21
+ "correct_answer": correct_answer,
22
+ "incorrect_answer": incorrect_answer,
23
+ }
24
+
25
+
26
+ def _generate(self) -> None :
27
+ assert "N" in self.parameter, "N is required in parameter"
28
+ N = self.parameter["N"]
29
+ assert N >= 3, "N should be greater than or equal to 3"
30
+
31
+ while True :
32
+ distribution = [random.randint(1, N) for _ in range(3)]
33
+ X = self.parameter["X"] = [random.choices([-1, 0, 1], weights = distribution)[0] for _ in range(N)]
34
+
35
+
36
+ # Compute a suitable "infinity" based on the maximum possible operations:
37
+ # At most 2 operations per element (for N-1 transitions), so 2*N + a small buffer
38
+ INF = 2 * N + 5
39
+
40
+ # The three possible values after operations
41
+ val = [-1, 0, 1]
42
+
43
+ # dp[j] = minimum operations to make the previous element equal to val[j]
44
+ # Initialize for the first element
45
+ prev = [INF] * 3
46
+ prev[X[0] + 1] = 0
47
+
48
+ # Iterate through the sequence
49
+ for i in range(1, N):
50
+ curr = [INF] * 3
51
+ x = X[i]
52
+ for j in range(3):
53
+ ops_so_far = prev[j]
54
+ if ops_so_far >= INF:
55
+ continue
56
+ prev_val = val[j]
57
+
58
+ # 0 operations on x: new_x = x
59
+ new_x = x
60
+ if new_x >= prev_val:
61
+ curr[new_x + 1] = min(curr[new_x + 1], ops_so_far)
62
+
63
+ # 1 operation on x: new_x = x + prev_val
64
+ new_x = x + prev_val
65
+ if -1 <= new_x <= 1 and new_x >= prev_val:
66
+ curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 1)
67
+
68
+ # 2 operations on x: new_x = x + 2 * prev_val
69
+ new_x = x + 2 * prev_val
70
+ if -1 <= new_x <= 1 and new_x >= prev_val:
71
+ curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 2)
72
+
73
+ prev = curr
74
+
75
+ # The answer is the minimum operations to end with any of {-1,0,1}
76
+ ans = min(prev)
77
+ if ans < INF:
78
+ self.parameter["reference_answer"] = ans
79
+ break
80
+
81
+
82
+ def _prompt_generate(self) -> str :
83
+ return self.prompt_template.format(
84
+ N = self.parameter["N"],
85
+ X = ", ".join("X[{}]={}".format(i + 1, Xi) for i, Xi in enumerate(self.parameter["X"])),
86
+ )
87
+
88
+
89
+ def _process(self, answer : Optional[str]) -> Optional[int] :
90
+ if answer is not None :
91
+ answer = answer.strip()
92
+ try :
93
+ int_answer = int(answer)
94
+ return int_answer
95
+ except ValueError :
96
+ return None
97
+ else :
98
+ return None
99
+
100
+
101
+ def scorer(self, output : str) -> float :
102
+ processed_result = self.processor(output)
103
+ if processed_result is not None :
104
+ if processed_result == self.parameter["reference_answer"] :
105
+ return self.rewards["correct_answer"]
106
+ else :
107
+ return self.rewards["incorrect_answer"]
108
+ else :
109
+ return self.rewards["wrong_format"]
server/Gym/environments/banned_point_superset_path_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BannedPointSupersetPathCounting_Environment
server/Gym/environments/banned_point_superset_path_counting/environment.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BannedPointSupersetPathCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3734
7
+ prompt_template = \
8
+ r"""In a three-dimensional space, you start at point (0, 0, 0) and want to reach the point ({N}, {M}, {R}). At each step, if you are currently at (x, y, z), you may move to a new (different from the current one) point of one of the following types:
9
+ 1. (x', y, z) such that x AND x' = x
10
+ 2. (x, y', z) such that y AND y' = y
11
+ 3. (x, y, z') such that z AND z' = z
12
+ (AND refers to the bitwise AND operation.)
13
+
14
+ You are **not allowed** to visit any of the following points:
15
+ {obstacles}
16
+
17
+ Please count the number of distinct valid paths from (0, 0, 0) to ({N}, {M}, {R}) that avoid all forbidden points. Output the result modulo {MOD}."""
18
+
19
+ def __init__(self,
20
+ max_MOD : int = 10000,
21
+ wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
22
+ **kwargs) -> None:
23
+ """
24
+ Initialize the BannedPointSupersetPathCounting_Environment instance.
25
+ """
26
+ super().__init__(**kwargs)
27
+
28
+ self.max_MOD = max_MOD
29
+ self.rewards = {
30
+ "wrong_format": wrong_format,
31
+ "wrong_range": wrong_range,
32
+ "correct_answer": correct_answer,
33
+ "wrong_answer": wrong_answer,
34
+ }
35
+
36
+
37
+ def _generate(self) -> None :
38
+ assert "MAX_N_M_R" in self.parameter, "MAX_N_M_R is required in parameter"
39
+ MAX_N_M_R = self.parameter["MAX_N_M_R"]
40
+ assert MAX_N_M_R >= 1, "MAX_N_M_R should be greater than or equal to 1"
41
+
42
+ while True :
43
+ N, M, R = self.parameter["N"], self.parameter["M"], self.parameter["R"] = random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R)
44
+ if (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2 >= 1 :
45
+ break
46
+
47
+ assert "MAX_O" in self.parameter, "MAX_O is required in parameter"
48
+ MAX_O = self.parameter["MAX_O"]
49
+ assert MAX_O >= 1, "MAX_O should be greater than or equal to 1"
50
+ MAX_O = min(MAX_O, (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2)
51
+ O = self.parameter["O"] = random.randint(1, MAX_O)
52
+
53
+ def convert_to_bits(x) -> List[int] :
54
+ result = []
55
+ bit = 1
56
+ while bit <= x :
57
+ if x & bit :
58
+ result.append(bit)
59
+ bit <<= 1
60
+ return result
61
+ N_bits, M_bits, R_bits = convert_to_bits(N), convert_to_bits(M), convert_to_bits(R)
62
+ def random_subset(bits : List[int]) -> int :
63
+ bits = random.sample(bits, random.randint(0, len(bits)))
64
+ return sum(bits)
65
+
66
+ obstacles = set()
67
+ while len(obstacles) < O :
68
+ x, y, z = random_subset(N_bits), random_subset(M_bits), random_subset(R_bits)
69
+ if (x, y, z) != (0, 0, 0) and (x, y, z) != (N, M, R) and (x, y, z) not in obstacles:
70
+ obstacles.add((x, y, z))
71
+ obstacles = list(obstacles)
72
+ random.shuffle(obstacles)
73
+ self.parameter["obstacles"] = obstacles.copy()
74
+
75
+ MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD)
76
+
77
+
78
+ points = [(0, 0, 0)] + obstacles
79
+ points.sort() # lex order by x, then y, then z
80
+ points.append((N, M, R))
81
+ total = len(points)
82
+
83
+ # Determine needed bit‐count dimensions
84
+ dx = N.bit_count()
85
+ dy = M.bit_count()
86
+ dz = R.bit_count()
87
+ max_d = max(dx, dy, dz)
88
+
89
+ # Precompute binomial coefficients up to max_d
90
+ binom = [[0] * (max_d + 1) for _ in range(max_d + 1)]
91
+ for i in range(max_d + 1):
92
+ binom[i][0] = 1
93
+ for j in range(1, i + 1):
94
+ binom[i][j] = (binom[i - 1][j - 1] + binom[i - 1][j]) % MOD
95
+
96
+ # Precompute f[x][y][z]: number of ways from (0,0,0) to a diff‐vector with
97
+ # x one‐bit‐flips in X, y flips in Y, z flips in Z (ignoring obstacles).
98
+ f = [[[0] * (dz + 1) for _ in range(dy + 1)] for __ in range(dx + 1)]
99
+ f[0][0][0] = 1
100
+ for x in range(dx + 1):
101
+ for y in range(dy + 1):
102
+ for z in range(dz + 1):
103
+ if x == y == z == 0:
104
+ continue
105
+ val = 0
106
+ # transitions increasing X
107
+ for i in range(x):
108
+ val = (val + f[i][y][z] * binom[x][i]) % MOD
109
+ # transitions increasing Y
110
+ for j in range(y):
111
+ val = (val + f[x][j][z] * binom[y][j]) % MOD
112
+ # transitions increasing Z
113
+ for k in range(z):
114
+ val = (val + f[x][y][k] * binom[z][k]) % MOD
115
+ f[x][y][z] = val
116
+
117
+ # DP over the sorted points
118
+ # g[i] = (−1) * sum_{j < i, p[j] ⊆ p[i]} g[j] * f[ popcount differences ]
119
+ g = [0] * total
120
+ g[0] = 1 # only one way to stay at the origin
121
+ for i in range(1, total):
122
+ xi, yi, zi = points[i]
123
+ acc = 0
124
+ for j in range(i):
125
+ xj, yj, zj = points[j]
126
+ # check subset on all three coordinates
127
+ if (xj & xi) == xj and (yj & yi) == yj and (zj & zi) == zj:
128
+ bx = (xi ^ xj).bit_count()
129
+ by = (yi ^ yj).bit_count()
130
+ bz = (zi ^ zj).bit_count()
131
+ acc = (acc + g[j] * f[bx][by][bz]) % MOD
132
+ g[i] = (-acc) % MOD
133
+
134
+ # The answer is -g[last] mod MOD, which recovers the positive sum
135
+ self.parameter["reference_answer"] = (-g[-1]) % MOD
136
+
137
+
138
+ def _prompt_generate(self) -> str :
139
+ return self.prompt_template.format(
140
+ N = self.parameter["N"],
141
+ M = self.parameter["M"],
142
+ R = self.parameter["R"],
143
+ obstacles = "\n".join("({}, {}, {})".format(x, y, z) for x, y, z in self.parameter["obstacles"]),
144
+ MOD = self.parameter["MOD"],
145
+ )
146
+
147
+
148
+ def _process(self, answer : Optional[str]) -> Optional[int] :
149
+ if answer is not None :
150
+ answer = answer.strip()
151
+ try :
152
+ int_answer = int(answer)
153
+ return int_answer
154
+ except ValueError :
155
+ return None
156
+ else :
157
+ return None
158
+
159
+
160
+ def scorer(self, output : str) -> float :
161
+ processed_result = self.processor(output)
162
+ if processed_result is not None :
163
+ if not (0 <= processed_result < self.parameter["MOD"]) :
164
+ return self.rewards["wrong_range"]
165
+ if processed_result == self.parameter["reference_answer"] :
166
+ return self.rewards["correct_answer"]
167
+ else :
168
+ return self.rewards["wrong_answer"]
169
+ else :
170
+ return self.rewards["wrong_format"]
server/Gym/environments/banyan_heart/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BanyanHeart_Environment
server/Gym/environments/banyan_heart/environment.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import networkx
3
+ from typing import Optional
4
+ from ...environment import VerifiableEnvironment
5
+
6
+
7
+ class BanyanHeart_Environment(VerifiableEnvironment) :
8
+ prompt_template = \
9
+ r"""We use the following process to generate a tree with {N} vertices labeled from 1 to {N}:
10
+ - Initially, the tree contains only vertex 1, and its **heart vertex** is also 1.
11
+ - At each step, we add a new vertex `i` (2 ≤ i ≤ {N}) and connect it to an existing vertex with an undirected edge. Then, the heart vertex moves one step toward `i` (i.e., it moves to the neighbor that is closer to `i`).
12
+ - This process continues until all {N} vertices have been added.
13
+
14
+ The final tree has the following edges:
15
+ {edges}
16
+
17
+ Can you determine which vertices could be the heart vertex after the process is completed? Output a single line with {N} characters (either `T` or `F`) without separators, where the i-th character is `T` if vertex i can be the heart vertex, and `F` otherwise."""
18
+
19
+ def __init__(self,
20
+ wrong_format : float = -1.0, rewarding_strategy : str = "(intersection/union)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0,
21
+ **kwargs) :
22
+ """
23
+ Initialize the BanyanHeart_Environment instance.
24
+ """
25
+ super().__init__(**kwargs)
26
+
27
+ self.rewards = {
28
+ "wrong_format" : wrong_format,
29
+ "rewarding_strategy" : rewarding_strategy,
30
+ "rewarding_beta" : rewarding_beta,
31
+ "rewarding_weight" : rewarding_weight,
32
+ }
33
+
34
+
35
+ def _generate(self) -> None :
36
+ assert "N" in self.parameter, "N is required in parameter"
37
+ N = self.parameter["N"]
38
+ assert N >= 4, "N should be greater than or equal to 4"
39
+
40
+ edges = self.parameter["edges"] = []
41
+ permutations = list(range(1, N + 1))
42
+ random.shuffle(permutations)
43
+ for index, vertex in enumerate(permutations) :
44
+ if index == 0 :
45
+ continue
46
+ u, v = vertex, random.choice(permutations[: index])
47
+ u, v = min(u, v), max(u, v)
48
+ edges.append((u, v))
49
+ random.shuffle(edges)
50
+
51
+ for u, v in edges :
52
+ assert 1 <= u < v <= N
53
+ assert len(edges) == len(set(edges)) == N - 1
54
+
55
+ tree = networkx.Graph()
56
+ tree.add_edges_from(edges)
57
+ assert networkx.is_tree(tree)
58
+
59
+
60
+ # Build adjacency list dynamically
61
+ adjacency = [[] for _ in range(N + 1)]
62
+ for u, v in edges:
63
+ adjacency[u].append(v)
64
+ adjacency[v].append(u)
65
+
66
+ # Arrays (1..N); index 0 acts as a dummy node
67
+ dep = [0] * (N + 1)
68
+ siz = [0] * (N + 1)
69
+ hson = [0] * (N + 1)
70
+ hson2 = [0] * (N + 1)
71
+ f = [0] * (N + 1)
72
+ ans = [False] * (N + 1)
73
+
74
+ # cmp function: return the index with larger siz
75
+ def cmp(x, y):
76
+ return x if siz[x] > siz[y] else y
77
+
78
+ # Iterative dfs1: compute dep, siz, hson, hson2, f
79
+ stack = [(1, 0, 0)] # (u, parent, state) state 0=enter, 1=exit
80
+ dep[0] = 0
81
+ while stack:
82
+ u, fa, state = stack.pop()
83
+ if state == 0:
84
+ dep[u] = dep[fa] + 1
85
+ stack.append((u, fa, 1))
86
+ for v in adjacency[u]:
87
+ if v == fa:
88
+ continue
89
+ stack.append((v, u, 0))
90
+ else:
91
+ # post-order processing
92
+ s = 1
93
+ h1 = 0
94
+ h2 = 0
95
+ for v in adjacency[u]:
96
+ if v == fa:
97
+ continue
98
+ s += siz[v]
99
+ if siz[v] > siz[h1]:
100
+ h2 = h1
101
+ h1 = v
102
+ elif siz[v] > siz[h2]:
103
+ h2 = v
104
+ siz[u] = s
105
+ hson[u] = h1
106
+ hson2[u] = h2
107
+
108
+ if f[h1] <= (siz[u] - 1 - siz[h1]):
109
+ fv = (siz[u] - 1) % 2
110
+ else:
111
+ fv = f[h1] - (siz[u] - 1 - siz[h1])
112
+ f[u] = fv + 1
113
+
114
+ # Iterative dfs2: compute ans
115
+ stack = [(1, 0, 0)] # (u, parent, h)
116
+ while stack:
117
+ u, fa, h = stack.pop()
118
+ tmp = cmp(hson[u], h)
119
+ if f[tmp] <= N - dep[u] - siz[tmp]:
120
+ ans[u] = ((N & 1) == (dep[u] & 1))
121
+ for v in adjacency[u]:
122
+ if v == fa:
123
+ continue
124
+ if v == hson[u]:
125
+ h_child = cmp(hson2[u], h)
126
+ else:
127
+ h_child = cmp(hson[u], h)
128
+ stack.append((v, u, h_child))
129
+
130
+ self.parameter["reference_answer"] = "".join("T" if ans[i] else "F" for i in range(1, N + 1))
131
+ assert "T" in self.parameter["reference_answer"], "At least one vertex should be able to be the heart vertex"
132
+
133
+
134
+ def _prompt_generate(self) -> str :
135
+ return self.prompt_template.format(
136
+ N = self.parameter["N"],
137
+ edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]),
138
+ )
139
+
140
+
141
+ def _process(self, answer : Optional[str]) -> Optional[str] :
142
+ if answer is not None :
143
+ answer = answer.strip()
144
+ if not(len(answer) == self.parameter["N"] and all(c in "TF" for c in answer)) :
145
+ return None
146
+ return answer
147
+ else :
148
+ return None
149
+
150
+
151
+ def scorer(self, output : str) -> float :
152
+ processed_result = self.processor(output)
153
+ if processed_result is not None :
154
+ intersection = sum((a == "T" and b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"]))
155
+ union = sum((a == "T" or b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"]))
156
+ assert intersection <= union, "intersection should not exceed union"
157
+
158
+ if self.rewards["rewarding_strategy"] == "(intersection/union)^beta" :
159
+ return ((intersection / union) ** self.rewards["rewarding_beta"]) * self.rewards["rewarding_weight"]
160
+ elif self.rewards["rewarding_strategy"] == "intersection=union" :
161
+ return self.rewards["rewarding_weight"] * (intersection == union)
162
+ else :
163
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
164
+ else :
165
+ return self.rewards["wrong_format"]
server/Gym/environments/bez_minimalist_security/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BEZMinimalistSecurity_Environment
server/Gym/environments/bez_minimalist_security/environment.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BEZMinimalistSecurity_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3544
7
+ prompt_template = \
8
+ r"""There is an array P of length {N}. Initially, P is: {P}
9
+
10
+ Now we want to construct a new array P' of length {N}, where 0 <= P'[i] <= P[i] for all i. Additionally, there are some constraints of the form P'[u] + P'[v] = w, where u and v are indices and w is a constant (it is guaranteed that P[u] + P[v] >= w). The constraints are:
11
+ {constraints}
12
+
13
+ Please output P'[0], P'[1], ..., P'[{N_minus_1}], separated by spaces, such that they satisfy all the constraints and their sum is {minimized_or_maximized}."""
14
+
15
+ def __init__(self,
16
+ wrong_format : float = -1.0, invalid_solution : float = -0.5,
17
+ rewarding_strategy_min : str = "(gold/answer)^beta", rewarding_weight_min : float = +1.0, rewarding_beta_min : float = 5.0,
18
+ rewarding_strategy_max : str = "(answer/gold)^beta", rewarding_weight_max : float = +1.0, rewarding_beta_max : float = 5.0,
19
+ **kwargs) :
20
+ """
21
+ Initialize the BEZMinimalistSecurity_Environment instance.
22
+ """
23
+ super().__init__(**kwargs)
24
+
25
+ self.rewards = {
26
+ "wrong_format" : wrong_format,
27
+ "invalid_solution" : invalid_solution,
28
+ "rewarding_strategy_max" : rewarding_strategy_max,
29
+ "rewarding_weight_max" : rewarding_weight_max,
30
+ "rewarding_beta_max" : rewarding_beta_max,
31
+ "rewarding_strategy_min" : rewarding_strategy_min,
32
+ "rewarding_weight_min" : rewarding_weight_min,
33
+ "rewarding_beta_min" : rewarding_beta_min,
34
+ }
35
+
36
+
37
+ def _generate(self) -> None :
38
+ assert "N" in self.parameter, "N is required in parameter"
39
+ N = self.parameter["N"]
40
+ assert N >= 3, "N should be at least 3"
41
+
42
+ P_prime = [random.randint(0, N) for _ in range(N)]
43
+
44
+ assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter"
45
+ edge_ratio = self.parameter["edge_ratio"]
46
+ edges = self.parameter["edges"] = random.sample([(u, v, P_prime[u] + P_prime[v]) for u in range(N) for v in range(u + 1, N)], max(1, min(N * (N - 1) // 2, int(edge_ratio * N))))
47
+ random.shuffle(edges)
48
+ for u, v, w in edges :
49
+ assert 0 <= u < v < N
50
+ assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique"
51
+
52
+ P = self.parameter["P"] = [P_prime_u + random.randint(0, N) for P_prime_u in P_prime]
53
+
54
+
55
+ # Build adjacency list (0-indexed)
56
+ adjacency = [[] for _ in range(N)]
57
+ for u, v, w in edges:
58
+ adjacency[u].append((v, w))
59
+ adjacency[v].append((u, w))
60
+
61
+ vis = [False] * N
62
+ sgn = [0] * N
63
+ cons = [0] * N
64
+ q = [0] * N
65
+ mn = 0
66
+ mx = 0
67
+
68
+ def wa() :
69
+ assert False, "Invalid solution"
70
+
71
+ def dfs(u): # Depth-first search on component
72
+ nonlocal fix
73
+ vis[u] = True
74
+ stc.append(u)
75
+ # Early exit if constraint too large
76
+ if cons[u] > 10**6:
77
+ wa()
78
+ for v, w in adjacency[u]:
79
+ if not vis[v]:
80
+ sgn[v] = -sgn[u]
81
+ cons[v] = w - cons[u]
82
+ dfs(v)
83
+ else:
84
+ if sgn[u] == sgn[v]:
85
+ res = w - cons[u] - cons[v]
86
+ # Must be even
87
+ if res & 1:
88
+ wa()
89
+ denom = 2 * sgn[u]
90
+ res //= denom
91
+ # Check valid fixed value
92
+ if res < 0 or res > P[anc] or (fix is not None and fix != res):
93
+ wa()
94
+ fix = res
95
+ else:
96
+ # Sum of constants must match
97
+ if cons[u] + cons[v] != w:
98
+ wa()
99
+
100
+ # Process each connected component
101
+ for i in range(N):
102
+ if not vis[i]:
103
+ stc = [] # nodes in current component
104
+ anc = i # anchor node for fixed value range
105
+ fix = None # fixed solution parameter
106
+ sgn[i] = 1 # sign for anchor
107
+ cons[i] = 0 # constant offset for anchor
108
+ dfs(i)
109
+
110
+ if fix is not None:
111
+ # Unique solution determined by `fix`
112
+ for u in stc:
113
+ q[u] = sgn[u] * fix + cons[u]
114
+ delta = P[u] - q[u]
115
+ mn += delta
116
+ mx += delta
117
+ if q[u] < 0 or q[u] > P[u]:
118
+ wa()
119
+ # Verify edges
120
+ for u in stc:
121
+ for v, w in adjacency[u]:
122
+ if q[u] + q[v] != w:
123
+ wa()
124
+ else:
125
+ # Range of valid `fix` values [l, r]
126
+ l, r = 0, P[anc]
127
+ for u in stc:
128
+ if sgn[u] == 1:
129
+ l = max(l, -cons[u])
130
+ r = min(r, P[u] - cons[u])
131
+ else:
132
+ l = max(l, cons[u] - P[u])
133
+ r = min(r, cons[u])
134
+ if l > r:
135
+ wa()
136
+ # Compute sum of reductions for minimal `fix = l`
137
+ base_sum = 0
138
+ tsign = 0
139
+ for u in stc:
140
+ base_sum += P[u] - (l * sgn[u] + cons[u])
141
+ tsign -= sgn[u]
142
+ # Depending on tsign, extremes at l or r
143
+ if tsign > 0:
144
+ mx += base_sum + tsign * (r - l)
145
+ mn += base_sum
146
+ else:
147
+ mx += base_sum
148
+ mn += base_sum + tsign * (r - l)
149
+
150
+ self.parameter["minimized_or_maximized"] = random.choice(["minimized", "maximized"])
151
+ if self.parameter["minimized_or_maximized"] == "minimized" :
152
+ self.parameter["gold_answer"] = sum(P) - mx
153
+ elif self.parameter["minimized_or_maximized"] == "maximized" :
154
+ self.parameter["gold_answer"] = sum(P) - mn
155
+ else :
156
+ raise ValueError("minimized_or_maximized should be either 'minimized' or 'maximized'")
157
+
158
+
159
+ def _prompt_generate(self) -> str :
160
+ N = self.parameter["N"]
161
+ return self.prompt_template.format(
162
+ N = N,
163
+ N_minus_1 = N - 1,
164
+ P = " ".join("P[{}]={}".format(i, P_i) for i, P_i in enumerate(self.parameter["P"])),
165
+ constraints = "\n".join("P'[{}] + P'[{}] = {}".format(u, v, w) for u, v, w in self.parameter["edges"]),
166
+ minimized_or_maximized = self.parameter["minimized_or_maximized"],
167
+ )
168
+
169
+
170
+ def _process(self, answer : Optional[str]) -> Optional[List] :
171
+ if answer is not None :
172
+ answer = answer.strip()
173
+ try :
174
+ answer_array = list(map(int, answer.split()))
175
+ return answer_array
176
+ except ValueError :
177
+ return None # Invalid answer format
178
+ else :
179
+ return None # Invalid answer format
180
+
181
+
182
+ def scorer(self, output : str) -> float :
183
+ processed_result = self.processor(output)
184
+ if processed_result is not None :
185
+ assert isinstance(processed_result, list), "processed_result should be a list"
186
+
187
+ P_prime = processed_result
188
+ if len(P_prime) != self.parameter["N"] :
189
+ return self.rewards["invalid_solution"]
190
+ if not all(0 <= P_prime_u <= P_u for P_prime_u, P_u in zip(P_prime, self.parameter["P"])) :
191
+ return self.rewards["invalid_solution"]
192
+ if not all(P_prime[u] + P_prime[v] == w for u, v, w in self.parameter["edges"]) :
193
+ return self.rewards["invalid_solution"]
194
+
195
+ gold, answer = self.parameter["gold_answer"], sum(P_prime)
196
+ if self.parameter["minimized_or_maximized"] == "minimized" :
197
+ assert 0 <= gold <= answer, "For minimization, answer should be greater than 0 and at least as large as the gold answer"
198
+ if self.rewards["rewarding_strategy_min"] == "(gold/answer)^beta" :
199
+ if answer == 0 :
200
+ assert gold == 0, "If answer is 0, gold should also be 0"
201
+ return self.rewards["rewarding_weight_min"] * 1.0
202
+ return self.rewards["rewarding_weight_min"] * ((gold / answer) ** self.rewards["rewarding_beta_min"])
203
+ elif self.rewards["rewarding_strategy_min"] == "gold=answer" :
204
+ return self.rewards["rewarding_weight_min"] * (gold == answer)
205
+ else :
206
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_min"]))
207
+ elif self.parameter["minimized_or_maximized"] == "maximized" :
208
+ assert 0 <= answer <= gold, "For maximization, answer should be greater than 0 and at most as large as the gold answer"
209
+ if self.rewards["rewarding_strategy_max"] == "(answer/gold)^beta" :
210
+ if gold == 0 :
211
+ assert answer == 0, "If gold is 0, answer should also be 0"
212
+ return self.rewards["rewarding_weight_max"] * 1.0
213
+ return self.rewards["rewarding_weight_max"] * ((answer / gold) ** self.rewards["rewarding_beta_max"])
214
+ elif self.rewards["rewarding_strategy_max"] == "gold=answer" :
215
+ return self.rewards["rewarding_weight_max"] * (gold == answer)
216
+ else :
217
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_max"]))
218
+ else :
219
+ assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'"
220
+ else :
221
+ return self.rewards["wrong_format"]
server/Gym/environments/bezout_identity/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BezoutIdentity_Environment
server/Gym/environments/bezout_identity/environment.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import random
3
+ from typing import Optional, List
4
+ from ...environment import VerifiableEnvironment
5
+
6
+
7
+ class BezoutIdentity_Environment(VerifiableEnvironment) :
8
+ prompt_template = \
9
+ r"""You are given an array of length {N}, denoted as A[1], ..., A[{N}]. Please find **integers** X[1], ..., X[{N}] such that the value of S = A[1] * X[1] + ... + A[{N}] * X[{N}] satisfies the condition: **S > 0**. Try your best to **minimize the value of S** while meeting this condition.
10
+
11
+ A: {A}
12
+
13
+ **Output Format:** Output a single line containing X[1], ..., X[{N}], separated by spaces."""
14
+
15
+ def __init__(self,
16
+ wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
17
+ **kwargs) :
18
+ """
19
+ Initialize the BezoutIdentity_Environment instance.
20
+ """
21
+ super().__init__(**kwargs)
22
+
23
+ self.rewards = {
24
+ "wrong_format" : wrong_format,
25
+ "invalid_solution" : invalid_solution,
26
+ "rewarding_strategy" : rewarding_strategy,
27
+ "rewarding_weight" : rewarding_weight,
28
+ "rewarding_beta" : rewarding_beta,
29
+ }
30
+
31
+
32
+ def _generate(self) -> None :
33
+ assert "N" in self.parameter, "N is required in parameter"
34
+ N = self.parameter["N"]
35
+ assert N >= 2, "N should be greater than or equal to 2"
36
+
37
+ assert "MAX_A" in self.parameter, "MAX_A is required in parameter"
38
+ MAX_A = self.parameter["MAX_A"]
39
+ assert MAX_A >= 2, "MAX_A should be greater than or equal to 2"
40
+
41
+ self.parameter["A"] = A = []
42
+ for _ in range(N) :
43
+ picked_a, best_counting = None, -1
44
+ for try_step in range(1024) :
45
+ current_a = random.randint(2, MAX_A)
46
+ counting = sum(int(math.gcd(current_a, _a) > 1) for _a in A)
47
+ if counting > best_counting :
48
+ best_counting, picked_a = counting, current_a
49
+ if best_counting == len(A) :
50
+ break
51
+ if random.random() < 0.5 :
52
+ picked_a = -picked_a
53
+ A.append(picked_a)
54
+ random.shuffle(A)
55
+ assert len(A) == N, "The length of A should be equal to N"
56
+
57
+
58
+ def exgcd(a, b):
59
+ """
60
+ Returns (g, x, y) such that
61
+ g = gcd(a, b)
62
+ a*x + b*y = g
63
+ Ensures g >= 0.
64
+ """
65
+ if b == 0:
66
+ return (abs(a), 1 if a >= 0 else -1, 0)
67
+ g, x1, y1 = exgcd(b, a % b)
68
+ # b*x1 + (a%b)*y1 = g
69
+ # a%b = a - (a//b)*b
70
+ x = y1
71
+ y = x1 - (a // b) * y1
72
+ return (g, x, y)
73
+
74
+ # initialize with A[0]
75
+ g = abs(A[0])
76
+ X = [0] * N
77
+ X[0] = 1 if A[0] >= 0 else -1
78
+
79
+ # incorporate each A[i]
80
+ for i in range(1, N):
81
+ ai = A[i]
82
+ g2, u, v = exgcd(g, ai)
83
+ # scale previous coefficients by u
84
+ for j in range(i):
85
+ X[j] *= u
86
+ # coefficient for A[i] is v
87
+ X[i] = v
88
+ g = g2
89
+
90
+ S = sum(x * a for x, a in zip(X, A))
91
+ assert S == g
92
+ assert S > 0, "The sum S must be greater than 0"
93
+ self.parameter["reference_answer"] = " ".join(map(str, X))
94
+ self.parameter["gold_answer"] = S
95
+
96
+
97
+ def _prompt_generate(self) -> str :
98
+ return self.prompt_template.format(
99
+ N = self.parameter["N"],
100
+ A = ", ".join(map(str, self.parameter["A"])),
101
+ )
102
+
103
+ def _process(self, answer : Optional[str]) -> Optional[List] :
104
+ if answer is not None :
105
+ answer = answer.strip()
106
+ try :
107
+ answer_array = list(map(int, answer.split()))
108
+ return answer_array
109
+ except ValueError :
110
+ return None # Invalid answer format
111
+ else :
112
+ return None # Invalid answer format
113
+
114
+
115
+ def scorer(self, output : str) -> float :
116
+ processed_result = self.processor(output)
117
+ if processed_result is not None :
118
+ assert isinstance(processed_result, list), "processed_result should be a list"
119
+
120
+ if len(processed_result) != self.parameter["N"] :
121
+ return self.rewards["invalid_solution"]
122
+ S = sum(x * a for x, a in zip(processed_result, self.parameter["A"]))
123
+ if S <= 0 :
124
+ return self.rewards["invalid_solution"]
125
+ assert self.parameter["gold_answer"] <= S, "The computed sum S must be greater than or equal to the gold answer"
126
+
127
+ if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
128
+ return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / S) ** self.rewards["rewarding_beta"])
129
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
130
+ return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == S)
131
+ else :
132
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
133
+ else :
134
+ return self.rewards["wrong_format"]
server/Gym/environments/binario/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import Binario_Environment
server/Gym/environments/binario/environment.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class Binario_Environment(VerifiableEnvironment) :
7
+ prompt_template = \
8
+ r"""You are given a {N} × {M} matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that:
9
+ 1. The number of `1`s in each row (from top to bottom) is: {row_counts}.
10
+ 2. The number of `1`s in each column (from left to right) is: {col_counts}.
11
+ 3. No more than two consecutive cells in a row or column can contain the same number.
12
+
13
+ The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*':
14
+ {matrix}
15
+
16
+ **Output Format:** Output {N} lines, each containing {M} characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators)."""
17
+
18
+ def __init__(self,
19
+ wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0,
20
+ **kwargs) :
21
+ """
22
+ Initialize the Binario_Environment instance.
23
+ """
24
+ super().__init__(**kwargs)
25
+
26
+ self.rewards = {
27
+ "wrong_format" : wrong_format,
28
+ "invalid_solution" : invalid_solution,
29
+ "rewarding_strategy" : rewarding_strategy,
30
+ "rewarding_weight" : rewarding_weight,
31
+ "rewarding_beta" : rewarding_beta,
32
+ }
33
+
34
+
35
+ def _generate(self) -> None :
36
+ assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
37
+ MAX_N_M = self.parameter["MAX_N_M"]
38
+ assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
39
+
40
+ N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M)
41
+
42
+ def generate_matrix(N, M):
43
+ # Initialize the grid with None
44
+ grid = [[None] * M for _ in range(N)]
45
+
46
+ all_cells = [(i, j) for i in range(N) for j in range(M)]
47
+ random.shuffle(all_cells) # Shuffle to ensure randomness in placement
48
+
49
+ backtrack_counting = 0
50
+
51
+ def backtrack(idx):
52
+ # If we've filled past the last row, we're done
53
+ if idx == len(all_cells):
54
+ return True
55
+ i, j = all_cells[idx]
56
+
57
+ nonlocal backtrack_counting
58
+ backtrack_counting += 1
59
+ if backtrack_counting > 10000000:
60
+ return False
61
+
62
+ # Try placing 0 or 1 in random order
63
+ for v in random.sample(["0", "1"], 2):
64
+ # Check adjacency constraints in row (no three in a row)
65
+ if j >= 2 and grid[i][j-1] == v and grid[i][j-2] == v:
66
+ continue
67
+ if j >= 1 and j + 1 < M and grid[i][j-1] == v and grid[i][j+1] == v:
68
+ continue
69
+ if j + 2 < M and grid[i][j+1] == v and grid[i][j+2] == v:
70
+ continue
71
+
72
+ # Check adjacency constraints in column
73
+ if i >= 2 and grid[i-1][j] == v and grid[i-2][j] == v:
74
+ continue
75
+ if i >= 1 and i + 1 < N and grid[i-1][j] == v and grid[i+1][j] == v:
76
+ continue
77
+ if i + 2 < N and grid[i+1][j] == v and grid[i+2][j] == v:
78
+ continue
79
+
80
+ # Place v
81
+ grid[i][j] = v
82
+
83
+ # Recurse
84
+ if backtrack(idx + 1):
85
+ return True
86
+
87
+ grid[i][j] = None
88
+
89
+ # No valid value at (i, j): backtrack
90
+ return False
91
+
92
+ return grid if backtrack(0) else None
93
+
94
+ matrix = generate_matrix(N, M)
95
+ if matrix is None :
96
+ self.parameter = None
97
+ return
98
+ self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix)
99
+
100
+ self.parameter["row_counts"] = [sum(int(cell == "1") for cell in row) for row in matrix]
101
+ self.parameter["col_counts"] = [sum(int(matrix[i][j] == "1") for i in range(N)) for j in range(M)]
102
+
103
+ assert "sparsity" in self.parameter, "sparsity is required in parameter"
104
+ sparsity = self.parameter["sparsity"]
105
+ assert 0 < sparsity < 1, "sparsity should be between 0 and 1"
106
+ empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity)))
107
+ for cell in empty_cells :
108
+ row, column = divmod(cell, M)
109
+ matrix[row][column] = '*'
110
+ self.parameter["matrix"] = ["".join(row) for row in matrix]
111
+
112
+
113
+ def _prompt_generate(self) -> str :
114
+ return self.prompt_template.format(
115
+ N = self.parameter["N"],
116
+ M = self.parameter["M"],
117
+ matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]),
118
+ row_counts = ", ".join(map(str, self.parameter["row_counts"])),
119
+ col_counts = ", ".join(map(str, self.parameter["col_counts"])),
120
+ )
121
+
122
+
123
+ def _process(self, answer : Optional[str]) -> Optional[List] :
124
+ if answer is not None :
125
+ answer = answer.strip()
126
+ try :
127
+ matrix = []
128
+ for line in answer.splitlines() :
129
+ line = line.strip()
130
+ if line :
131
+ matrix.append(line.strip())
132
+ return matrix
133
+ except ValueError :
134
+ return None
135
+ else :
136
+ return None
137
+
138
+
139
+ def scorer(self, output : str) -> float :
140
+ processed_result = self.processor(output)
141
+ if processed_result is not None :
142
+ assert isinstance(processed_result, list), "processed_result should be a list"
143
+
144
+ N, M = self.parameter["N"], self.parameter["M"]
145
+ solution = processed_result
146
+
147
+ if len(solution) != N or any(len(row) != M for row in solution) :
148
+ return self.rewards["wrong_format"]
149
+ for row in solution :
150
+ if not all(c in "01" for c in row) :
151
+ return self.rewards["wrong_format"]
152
+
153
+ for row, original_row in zip(solution, self.parameter["matrix"]) :
154
+ for cell, original_cell in zip(row, original_row) :
155
+ if original_cell != '*' and cell != original_cell :
156
+ assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0')
157
+ return self.rewards["invalid_solution"]
158
+
159
+ delta = [
160
+ (+1, 0),
161
+ (-1, 0),
162
+ (0, +1),
163
+ (0, -1),
164
+ ]
165
+ for i in range(N) :
166
+ for j in range(M) :
167
+ for di, dj in delta :
168
+ ni, nj = i + di, j + dj
169
+ nni, nnj = i + 2 * di, j + 2 * dj
170
+ if 0 <= ni < N and 0 <= nj < M and 0 <= nni < N and 0 <= nnj < M :
171
+ if solution[i][j] == solution[ni][nj] == solution[nni][nnj] :
172
+ return self.rewards["invalid_solution"]
173
+
174
+ row_counts = [sum(int(cell == "1") for cell in row) for row in solution]
175
+ col_counts = [sum(int(solution[i][j] == "1") for i in range(N)) for j in range(M)]
176
+
177
+ satisfied = sum(int(answer == gold) for answer, gold in zip(row_counts, self.parameter["row_counts"])) + \
178
+ sum(int(answer == gold) for answer, gold in zip(col_counts, self.parameter["col_counts"]))
179
+ assert satisfied <= N + M, "satisfied should not exceed N + M"
180
+
181
+ if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" :
182
+ return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"])
183
+ elif self.rewards["rewarding_strategy"] == "satisfied=all" :
184
+ return self.rewards["rewarding_weight"] * (satisfied == (N + M))
185
+ else :
186
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
187
+ else :
188
+ return self.rewards["wrong_format"]
server/Gym/environments/binario_no_adjacency_requirement/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import Binario_NoAdjacencyRequirement_Environment
server/Gym/environments/binario_no_adjacency_requirement/environment.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class Binario_NoAdjacencyRequirement_Environment(VerifiableEnvironment) :
7
+ prompt_template = \
8
+ r"""You are given a (2 × {N}) × (2 × {M}) matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that:
9
+ 1. Each **row** contains exactly {M} '0's and {M} '1's.
10
+ 2. Each **column** contains exactly {N} '0's and {N} '1's.
11
+
12
+ The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*':
13
+ {matrix}
14
+
15
+ **Output Format:** Output (2 × {N}) lines, each containing (2 × {M}) characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators)."""
16
+
17
+ def __init__(self,
18
+ wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0,
19
+ **kwargs) :
20
+ """
21
+ Initialize the Binario_Environment instance.
22
+ """
23
+ super().__init__(**kwargs)
24
+
25
+ self.rewards = {
26
+ "wrong_format" : wrong_format,
27
+ "invalid_solution" : invalid_solution,
28
+ "wrong_solution" : wrong_solution,
29
+ "correct_solution" : correct_solution,
30
+ }
31
+
32
+
33
+ def _generate(self) -> None :
34
+ assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
35
+ MAX_N_M = self.parameter["MAX_N_M"]
36
+ assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
37
+
38
+ N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M)
39
+
40
+ row_permutation, col_permutation = list(range(2 * N)), list(range(2 * M))
41
+ random.shuffle(row_permutation)
42
+ random.shuffle(col_permutation)
43
+
44
+ matrix = [[str((row_permutation[i] + col_permutation[j]) % 2) for j in range(2 * M)] for i in range(2 * N)]
45
+ self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix)
46
+
47
+ assert "sparsity" in self.parameter, "sparsity is required in parameter"
48
+ sparsity = self.parameter["sparsity"]
49
+ assert 0 < sparsity < 1, "sparsity should be between 0 and 1"
50
+ empty_cells = random.sample(range((2 * N) * (2 * M)), max(1, int((2 * N) * (2 * M) * sparsity)))
51
+ for cell in empty_cells :
52
+ row, column = divmod(cell, 2 * M)
53
+ matrix[row][column] = '*'
54
+ self.parameter["matrix"] = ["".join(row) for row in matrix]
55
+
56
+
57
+ def _prompt_generate(self) -> str :
58
+ return self.prompt_template.format(
59
+ N = self.parameter["N"],
60
+ M = self.parameter["M"],
61
+ matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]),
62
+ )
63
+
64
+
65
+ def _process(self, answer : Optional[str]) -> Optional[List] :
66
+ if answer is not None :
67
+ answer = answer.strip()
68
+ try :
69
+ matrix = []
70
+ for line in answer.splitlines() :
71
+ line = line.strip()
72
+ if line :
73
+ matrix.append(line.strip())
74
+ return matrix
75
+ except ValueError :
76
+ return None
77
+ else :
78
+ return None
79
+
80
+
81
+ def scorer(self, output : str) -> float :
82
+ processed_result = self.processor(output)
83
+ if processed_result is not None :
84
+ assert isinstance(processed_result, list), "processed_result should be a list"
85
+
86
+ N, M = self.parameter["N"], self.parameter["M"]
87
+ solution = processed_result
88
+
89
+ if len(solution) != 2 * N or any(len(row) != 2 * M for row in solution) :
90
+ return self.rewards["wrong_format"]
91
+ for row in solution :
92
+ if not all(c in "01" for c in row) :
93
+ return self.rewards["wrong_format"]
94
+
95
+ for row, original_row in zip(solution, self.parameter["matrix"]) :
96
+ for cell, original_cell in zip(row, original_row) :
97
+ if original_cell != '*' and cell != original_cell :
98
+ assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0')
99
+ return self.rewards["invalid_solution"]
100
+
101
+ for i in range(2 * N) :
102
+ if solution[i].count('1') != solution[i].count('0') :
103
+ return self.rewards["wrong_solution"]
104
+ assert solution[i].count('1') == M, "Row {} does not have exactly {} ones".format(i, M)
105
+ assert solution[i].count('0') == M, "Row {} does not have exactly {} zeros".format(i, M)
106
+ for j in range(2 * M) :
107
+ if sum(solution[i][j] == '1' for i in range(2 * N)) != sum(solution[i][j] == '0' for i in range(2 * N)) :
108
+ return self.rewards["wrong_solution"]
109
+ assert sum(solution[i][j] == '1' for i in range(2 * N)) == N, "Column {} does not have exactly {} ones".format(j, N)
110
+ assert sum(solution[i][j] == '0' for i in range(2 * N)) == N, "Column {} does not have exactly {} zeros".format(j, N)
111
+
112
+ return self.rewards["correct_solution"]
113
+ else :
114
+ return self.rewards["wrong_format"]
server/Gym/environments/binary_alternation/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BinaryAlternation_Environment
server/Gym/environments/binary_alternation/environment.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional, List
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BinaryAlternation_Environment(VerifiableEnvironment) :
7
+ prompt_template = \
8
+ r"""You are given a binary string of length {N}, consisting of `0`s and `1`s. It is 0-indexed: {string}
9
+
10
+ In one operation, you may **swap** the characters at indices `i` and `j` (0 ≤ i, j < {N}). Please transform the string into an **alternating binary string** (no two adjacent characters are the same) using the **minimum number of operations**.
11
+
12
+ **Output Format:** Each operation should be written on a single line in the format: `i j`, where `i` and `j` are the indices being swapped. Do **NOT** include backticks or quotes. Output one operation per line in the order they should be performed."""
13
+
14
+ def __init__(self,
15
+ wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
16
+ **kwargs) :
17
+ """
18
+ Initialize the BinaryAlternation_Environment instance.
19
+ """
20
+ super().__init__(**kwargs)
21
+
22
+ self.rewards = {
23
+ "wrong_format" : wrong_format,
24
+ "invalid_solution" : invalid_solution,
25
+ "rewarding_strategy" : rewarding_strategy,
26
+ "rewarding_weight" : rewarding_weight,
27
+ "rewarding_beta" : rewarding_beta,
28
+ }
29
+
30
+
31
+ def _generate(self) -> None :
32
+ assert "zero_count" in self.parameter, "zero_count is required in parameter"
33
+ zero_count = self.parameter["zero_count"]
34
+ assert zero_count >= 2, "zero_count should be greater than or equal to 2"
35
+
36
+ one_count = random.randint(zero_count - 1, zero_count + 1)
37
+
38
+ string = ["0"] * zero_count + ["1"] * one_count
39
+ random.shuffle(string)
40
+ string = self.parameter["string"] = "".join(string)
41
+
42
+ self.parameter["reference_answer"] = None
43
+
44
+
45
+ def compute(should : str) -> List[str] :
46
+ zero_to_one, one_to_zero = [], []
47
+ for i, now in enumerate(string) :
48
+ if now != should :
49
+ if now == "0" :
50
+ zero_to_one.append(i)
51
+ else :
52
+ one_to_zero.append(i)
53
+ should = "1" if should == "0" else "0"
54
+ assert len(zero_to_one) == len(one_to_zero), "zero_to_one and one_to_zero should have the same length"
55
+ solution = []
56
+ for i, j in zip(zero_to_one, one_to_zero) :
57
+ solution.append("{} {}".format(i, j))
58
+ return solution
59
+
60
+ if zero_count >= one_count :
61
+ self.parameter["reference_answer"] = compute("0")
62
+ if one_count >= zero_count :
63
+ candidate = compute("1")
64
+ if self.parameter["reference_answer"] is None or len(candidate) < len(self.parameter["reference_answer"]) :
65
+ self.parameter["reference_answer"] = candidate
66
+ self.parameter["gold_answer"] = len(self.parameter["reference_answer"])
67
+ self.parameter["reference_answer"] = "\n".join(self.parameter["reference_answer"])
68
+
69
+
70
+ def _prompt_generate(self) -> str :
71
+ string = self.parameter["string"]
72
+ return self.prompt_template.format(N = len(string), string = string)
73
+
74
+
75
+ def _process(self, answer : Optional[str]) -> Optional[List] :
76
+ if answer is not None :
77
+ answer = answer.strip()
78
+ actions = []
79
+ for line in answer.splitlines() :
80
+ line = line.strip()
81
+ if line :
82
+ actions.append(line.split())
83
+ action = actions[-1]
84
+ if len(action) != 2 :
85
+ return None
86
+ try :
87
+ action[0] = int(action[0])
88
+ action[1] = int(action[1])
89
+ except ValueError :
90
+ return None
91
+ return actions
92
+ else :
93
+ return None
94
+
95
+
96
+ def scorer(self, output : str) -> float :
97
+ processed_result = self.processor(output)
98
+ if processed_result is not None :
99
+ string = list(self.parameter["string"])
100
+ for i, j in processed_result :
101
+ if not (0 <= i < len(string) and 0 <= j < len(string)) :
102
+ return self.rewards["invalid_solution"]
103
+ string[i], string[j] = string[j], string[i]
104
+ string = "".join(string)
105
+ if any(string[i] == string[i + 1] for i in range(len(string) - 1)) :
106
+ return self.rewards["invalid_solution"]
107
+
108
+ gold, answer = self.parameter["gold_answer"], len(processed_result)
109
+ assert gold <= answer, "gold should be less than or equal to answer"
110
+
111
+ if answer == 0 :
112
+ return self.rewards["rewarding_weight"]
113
+
114
+ if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
115
+ return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"])
116
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
117
+ return self.rewards["rewarding_weight"] * (gold == answer)
118
+ else :
119
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
120
+ else :
121
+ return self.rewards["wrong_format"]
server/Gym/environments/binary_linear_equation_solution_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BinaryLinearEquation_SolutionCounting_Environment
server/Gym/environments/binary_linear_equation_solution_counting/environment.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BinaryLinearEquation_SolutionCounting_Environment(VerifiableEnvironment) :
7
+ prompt_template = r"""What is the number of integer solution pairs (x, y) such that ({A}) * x + ({B}) * y + ({C}) = 0, with {X1} <= x <= {X2} and {Y1} <= y <= {Y2}?"""
8
+
9
+ def __init__(self,
10
+ wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
11
+ not_guaranteed_probability : float = 0.05,
12
+ **kwargs) :
13
+ """
14
+ Initialize the BinaryLinearEquation_SolutionCounting instance.
15
+ """
16
+ super().__init__(**kwargs)
17
+
18
+ self.not_guaranteed_probability = not_guaranteed_probability
19
+ self.rewards = {
20
+ "wrong_format" : wrong_format,
21
+ "rewarding_strategy" : rewarding_strategy,
22
+ "rewarding_weight" : rewarding_weight,
23
+ "rewarding_beta" : rewarding_beta,
24
+ }
25
+
26
+
27
+ def _generate(self) -> None :
28
+ assert "MAX_RANGE" in self.parameter, "MAX_RANGE is required in parameter"
29
+ MAX_RANGE = self.parameter["MAX_RANGE"]
30
+ assert MAX_RANGE >= 8, "MAX_RANGE must be at least 8"
31
+
32
+ A = self.parameter["A"] = random.randint(-MAX_RANGE, +MAX_RANGE)
33
+ B = self.parameter["B"] = random.randint(-MAX_RANGE, +MAX_RANGE)
34
+ not_guaranteed = random.random() < self.not_guaranteed_probability
35
+ if not_guaranteed :
36
+ X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, +MAX_RANGE)
37
+ X2 = self.parameter["X2"] = random.randint(X1, +MAX_RANGE)
38
+ Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, +MAX_RANGE)
39
+ Y2 = self.parameter["Y2"] = random.randint(Y1, +MAX_RANGE)
40
+ C = self.parameter["C"] = random.randint(-2 * (MAX_RANGE ** 2),+2 * (MAX_RANGE ** 2))
41
+ else :
42
+ x = random.randint(-MAX_RANGE, +MAX_RANGE)
43
+ y = random.randint(-MAX_RANGE, +MAX_RANGE)
44
+ C = self.parameter["C"] = -(A * x + B * y)
45
+ X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, x)
46
+ X2 = self.parameter["X2"] = random.randint(x, +MAX_RANGE)
47
+ Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, y)
48
+ Y2 = self.parameter["Y2"] = random.randint(y, +MAX_RANGE)
49
+
50
+
51
+ def gcd(a, b):
52
+ while b:
53
+ a, b = b, a % b
54
+ return abs(a)
55
+
56
+ def extended_gcd_positive(a, b):
57
+ # Returns (g, x, y) with a*x + b*y = g, for a,b >= 0
58
+ if b == 0:
59
+ return (a, 1, 0)
60
+ g, x1, y1 = extended_gcd_positive(b, a % b)
61
+ return (g, y1, x1 - (a // b) * y1)
62
+
63
+ def ceil_div(a, b):
64
+ # Ceil division that works for any sign of b
65
+ return -((-a) // b)
66
+
67
+ def floor_div(a, b):
68
+ # Floor division (Python's // already floors)
69
+ return a // b
70
+
71
+ def k_range(a0, step, L, R):
72
+ """
73
+ From constraint: L <= a0 + step*k <= R
74
+ Return [lo, hi] for integer k, or (1, 0) for empty.
75
+ """
76
+ if step > 0:
77
+ lo = ceil_div(L - a0, step)
78
+ hi = floor_div(R - a0, step)
79
+ else: # step < 0
80
+ # Inequality reverses when dividing by a negative
81
+ lo = ceil_div(R - a0, step)
82
+ hi = floor_div(L - a0, step)
83
+ return lo, hi
84
+
85
+ def compute(A, B, C, X1, X2, Y1, Y2):
86
+ if X1 > X2:
87
+ X1, X2 = X2, X1
88
+ if Y1 > Y2:
89
+ Y1, Y2 = Y2, Y1
90
+
91
+ # Degenerate cases
92
+ if A == 0 and B == 0:
93
+ return (X2 - X1 + 1) * (Y2 - Y1 + 1) if C == 0 else 0
94
+
95
+ if A == 0:
96
+ # B*y + C = 0
97
+ if C % B == 0:
98
+ y = -C // B
99
+ return (X2 - X1 + 1) if (Y1 <= y <= Y2) else 0
100
+ else:
101
+ return 0
102
+
103
+ if B == 0:
104
+ # A*x + C = 0
105
+ if C % A == 0:
106
+ x = -C // A
107
+ return (Y2 - Y1 + 1) if (X1 <= x <= X2) else 0
108
+ else:
109
+ return 0
110
+
111
+ # General case
112
+ d = gcd(A, B)
113
+ if C % d != 0:
114
+ return 0
115
+
116
+ # Find one solution to A*x + B*y = -C
117
+ _, xg, yg = extended_gcd_positive(abs(A), abs(B)) # gives axg + byg = gcd(|A|,|B|)
118
+ if A < 0:
119
+ xg = -xg
120
+ if B < 0:
121
+ yg = -yg
122
+
123
+ mult = (-C) // d
124
+ x0 = xg * mult
125
+ y0 = yg * mult
126
+
127
+ # Parametric form
128
+ step_x = B // d
129
+ step_y = -A // d # note: can be negative
130
+
131
+ # k-range from x and y intervals
132
+ kx_lo, kx_hi = k_range(x0, step_x, X1, X2)
133
+ ky_lo, ky_hi = k_range(y0, step_y, Y1, Y2)
134
+
135
+ lo = max(kx_lo, ky_lo)
136
+ hi = min(kx_hi, ky_hi)
137
+
138
+ return 0 if lo > hi else hi - lo + 1
139
+
140
+ self.parameter["reference_answer"] = compute(A, B, C, X1, X2, Y1, Y2)
141
+ if not not_guaranteed :
142
+ assert self.parameter["reference_answer"] >= 1
143
+ else :
144
+ assert self.parameter["reference_answer"] >= 0
145
+
146
+
147
+ def _prompt_generate(self) -> str :
148
+ return self.prompt_template.format(
149
+ A = self.parameter["A"],
150
+ B = self.parameter["B"],
151
+ C = self.parameter["C"],
152
+ X1 = self.parameter["X1"],
153
+ X2 = self.parameter["X2"],
154
+ Y1 = self.parameter["Y1"],
155
+ Y2 = self.parameter["Y2"],
156
+ )
157
+
158
+
159
+ def _process(self, answer : Optional[str]) -> Optional[int] :
160
+ if answer is not None :
161
+ answer = answer.strip()
162
+ try :
163
+ int_answer = int(answer)
164
+ return int_answer
165
+ except ValueError :
166
+ return None
167
+ else :
168
+ return None
169
+
170
+
171
+ def scorer(self, output : str) -> float :
172
+ processed_result = self.processor(output)
173
+ if processed_result is not None :
174
+ if processed_result < 0 :
175
+ return self.rewards["wrong_format"]
176
+
177
+ if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
178
+ if self.parameter["reference_answer"] == 0 :
179
+ return self.rewards["rewarding_weight"] * (processed_result == 0)
180
+ a, b = self.parameter["reference_answer"], processed_result
181
+ return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
182
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
183
+ return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
184
+ else :
185
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
186
+ else :
187
+ return self.rewards["wrong_format"]
server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BinaryTreeLeafNumExpectation_Environment
server/Gym/environments/binary_tree_leaf_num_expectation/environment.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import random
3
+ from typing import Optional, Tuple
4
+ from ...environment import VerifiableEnvironment
5
+
6
+
7
+ class BinaryTreeLeafNumExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3978
8
+ prompt_template = \
9
+ r"""We uniformly at random generate a **binary tree** with exactly {N} nodes (all distinct binary trees with {N} nodes are equally likely). Two binary trees are considered identical if and only if:
10
+ - both are empty, **OR**
11
+ - both are non-empty, and their left subtrees are identical and their right subtrees are identical.
12
+
13
+ What is the expected number of **leaf** nodes (nodes whose left and right children are both empty) in the generated binary tree? Output the result as `A/B` (do NOT include quotes), where A and B are positive integers separated by a slash `/`."""
14
+
15
+ def __init__(self,
16
+ wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0,
17
+ **kwargs) :
18
+ """
19
+ Initialize the BinaryTreeLeafNumExpectation_Environment instance.
20
+ """
21
+ super().__init__(**kwargs)
22
+
23
+ self.rewards = {
24
+ "wrong_format" : wrong_format,
25
+ "correct_answer" : correct_answer,
26
+ "wrong_answer" : wrong_answer,
27
+ }
28
+
29
+
30
+ def _generate(self) -> None :
31
+ assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
32
+ MAX_N = self.parameter["MAX_N"]
33
+ assert MAX_N >= 5, "MAX_N should be greater than or equal to 5"
34
+
35
+ N = self.parameter["N"] = random.randint(1, MAX_N)
36
+
37
+ A, B = N * (N + 1), 2 * (2 * N - 1)
38
+ gcd_AB = math.gcd(A, B)
39
+ A //= gcd_AB
40
+ B //= gcd_AB
41
+ self.parameter["gold_answer"] = dict(A = A, B = B)
42
+ self.parameter["reference_answer"] = "{}/{}".format(A, B)
43
+
44
+
45
+ def _prompt_generate(self) -> str :
46
+ return self.prompt_template.format(N = self.parameter["N"])
47
+
48
+
49
+ def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] :
50
+ if answer is not None :
51
+ answer = answer.strip()
52
+ try :
53
+ A, B = map(int, map(str.strip, answer.split('/')))
54
+ return (A, B)
55
+ except :
56
+ return None
57
+ else :
58
+ return None
59
+
60
+
61
+ def scorer(self, output : str) -> float :
62
+ processed_result = self.processor(output)
63
+ if processed_result is not None :
64
+ A, B = processed_result
65
+ if not (A > 0 and B > 0) :
66
+ return self.rewards["wrong_format"]
67
+ gold_A, gold_B = self.parameter["gold_answer"]["A"], self.parameter["gold_answer"]["B"]
68
+ gcd_AB = math.gcd(A, B)
69
+ A //= gcd_AB
70
+ B //= gcd_AB
71
+ if (A, B) == (gold_A, gold_B) :
72
+ return self.rewards["correct_answer"]
73
+ else :
74
+ return self.rewards["wrong_answer"]
75
+ else :
76
+ return self.rewards["wrong_format"]
server/Gym/environments/bit_equation_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BitEquationCounting_Environment
server/Gym/environments/bit_equation_counting/environment.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BitEquationCounting_Environment(VerifiableEnvironment) :
7
+ prompt_template = \
8
+ r"""Given a Boolean expression (where `_` represents a variable that can be 0 or 1, `&` is bitwise AND, `|` is bitwise OR, and `^` is bitwise XOR): {expression}
9
+
10
+ There are 2^{N} possible combinations of values for the variables. Your task is to find how many of these combinations make the expression evaluate to true.
11
+
12
+ **Output Format:** Your final answer should be a single integer — the number of combinations that make the expression true. Example: `15` (do **NOT** include quotes or backticks)."""
13
+
14
+ def __init__(self,
15
+ wrong_format : float = -1.0, wrong_range : float = -0.5, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
16
+ **kwargs) :
17
+ """
18
+ Initialize the BitEquationCounting_Environment instance.
19
+ """
20
+ super().__init__(**kwargs)
21
+
22
+ self.rewards = {
23
+ "wrong_format" : wrong_format,
24
+ "wrong_range" : wrong_range,
25
+ "rewarding_strategy" : rewarding_strategy,
26
+ "rewarding_weight" : rewarding_weight,
27
+ "rewarding_beta" : rewarding_beta,
28
+ }
29
+
30
+ def _generate(self) -> None :
31
+ assert "N" in self.parameter, "N is required in parameter"
32
+ N = self.parameter["N"]
33
+ assert N >= 2, "N should be greater than or equal to 2"\
34
+
35
+ def build_expression(n) :
36
+ if n == 1 :
37
+ return "_", 1, 1
38
+ left_n = random.randint(1, n - 1)
39
+ right_n = n - left_n
40
+ left_expr, left_true, left_false = build_expression(left_n)
41
+ right_expr, right_true, right_false = build_expression(right_n)
42
+ op = random.choice(("&", "|", "^"))
43
+ if op == "&" :
44
+ true_count = left_true * right_true
45
+ false_count = (2 ** n) - true_count
46
+ elif op == "|" :
47
+ false_count = left_false * right_false
48
+ true_count = (2 ** n) - false_count
49
+ elif op == "^" :
50
+ true_count = left_true * right_false + left_false * right_true
51
+ false_count = left_true * right_true + left_false * right_false
52
+ assert true_count + false_count == 2 ** n, "XOR operation should cover all cases"
53
+ else :
54
+ raise ValueError("Invalid operator")
55
+ return "({} {} {})".format(left_expr, op, right_expr), true_count, false_count
56
+ expression, true_count, false_count = build_expression(N)
57
+
58
+ self.parameter["expression"] = expression[1 : -1]
59
+ self.parameter["reference_answer"] = true_count
60
+
61
+ def _prompt_generate(self) -> str :
62
+ return self.prompt_template.format(expression = self.parameter["expression"], N = self.parameter["N"])
63
+
64
+
65
+ def _process(self, answer : Optional[str]) -> Optional[int] :
66
+ if answer is not None :
67
+ answer = answer.strip()
68
+ try :
69
+ int_answer = int(answer)
70
+ return int_answer
71
+ except ValueError :
72
+ return None
73
+ else :
74
+ return None
75
+
76
+
77
+ def scorer(self, output : str) -> float :
78
+ processed_result = self.processor(output)
79
+ if processed_result is not None :
80
+ if not (0 <= processed_result <= 2 ** self.parameter["N"]) :
81
+ return self.rewards["wrong_range"]
82
+
83
+ if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
84
+ a, b = self.parameter["reference_answer"], processed_result
85
+ return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
86
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
87
+ return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
88
+ else :
89
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
90
+ else :
91
+ return self.rewards["wrong_format"]
server/Gym/environments/bitand_zero_path_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BitAndZero_PathCounting_Environment
server/Gym/environments/bitand_zero_path_counting/environment.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BitAndZero_PathCounting_Environment(VerifiableEnvironment) :
7
+ prompt_template = \
8
+ r"""You are given a **directed graph** with an **infinite number of vertices**, where each vertex is labeled with a non-negative integer: `0`, `1`, `2`, ...
9
+
10
+ There is a directed edge from vertex `s` to vertex `t` if and only if:
11
+ - `s < t`, and
12
+ - `s & t = 0` (where `&` denotes the bitwise AND operation)
13
+
14
+ Please compute the number of **distinct paths** from vertex `{S}` to vertex `{T}`. Give the result **modulo {MOD}**.
15
+ Note that the two vertices labels are provided in **binary (base-2)** representation.
16
+
17
+ **Output Format:** Your final answer should be a single integer — the number of distinct paths modulo `{MOD}`."""
18
+ MOD = 10000
19
+
20
+ def __init__(self,
21
+ wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
22
+ **kwargs) :
23
+ """
24
+ Initialize the BitAndZero_PathCounting_Environment instance.
25
+ """
26
+ super().__init__(**kwargs)
27
+
28
+ self.rewards = {
29
+ "wrong_format" : wrong_format,
30
+ "wrong_range" : wrong_range,
31
+ "correct_answer" : correct_answer,
32
+ "wrong_answer" : wrong_answer,
33
+ }
34
+
35
+ def _generate_helper(self) -> None :
36
+ assert "max_length" in self.parameter, "max_length is required in parameter"
37
+ max_length = self.parameter["max_length"]
38
+ assert max_length >= 1, "max_length should be greater than or equal to 1"
39
+
40
+ S = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1))
41
+ T = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1))
42
+
43
+ if len(S) > len(T) or (len(S) == len(T) and S > T) :
44
+ S, T = T, S
45
+ # Ensure S <= T
46
+ self.parameter["S"], self.parameter["T"] = S, T
47
+
48
+
49
+ MOD = self.MOD
50
+
51
+ def Mult(a: int, b: int) -> int:
52
+ return (a * b) % MOD
53
+
54
+ def Add(a: int, b: int) -> int:
55
+ s = a + b
56
+ return s - MOD if s >= MOD else s
57
+
58
+ S = list(map(int, S))
59
+ T = list(map(int, T))
60
+ N, M = len(S), len(T)
61
+
62
+ if M > N:
63
+ S = [0] * (M - N) + S
64
+ else:
65
+ assert M == N
66
+
67
+ G = [[[0, 0] for _ in range(M)] for __ in range(2)]
68
+ for st in (0, 1):
69
+ G[st][0][st] = 1
70
+ for i in range(1, M):
71
+ G[st][i][0] = Add(G[st][i-1][0], G[st][i-1][1])
72
+ G[st][i][1] = G[st][i-1][0]
73
+
74
+ H = 1
75
+ while H <= M and S[H-1] == 0:
76
+ H += 1
77
+
78
+ F = [[0] * M for _ in range(M + 1)]
79
+ F[1][0] = 1
80
+
81
+ for i in range(2, M + 1):
82
+ for x in range(0, i - 1):
83
+ bit = T[i-1]
84
+ if i <= H:
85
+ F[i][x+1] = Add(F[i][x+1], Mult(F[i-1][x], G[1][x+1][bit]))
86
+ if i < H:
87
+ total = Add(G[0][x][bit], G[1][x][bit])
88
+ F[i][x] = Add(F[i][x], Mult(F[i-1][x], total))
89
+ if i > H:
90
+ F[i][x] = Add(F[i][x], Mult(F[i-1][x], G[S[i-1]][x][bit]))
91
+
92
+ ans = 0
93
+ for x in range(0, M):
94
+ ans = Add(ans, F[M][x])
95
+ self.parameter["reference_answer"] = ans
96
+
97
+
98
+ def _generate(self) -> None :
99
+ while True :
100
+ self._generate_helper()
101
+ if self.parameter["reference_answer"] not in (0, 1) :
102
+ break
103
+
104
+
105
+ def _prompt_generate(self) -> str :
106
+ return self.prompt_template.format(
107
+ S = self.parameter["S"],
108
+ T = self.parameter["T"],
109
+ MOD = self.MOD,
110
+ )
111
+
112
+
113
+ def _process(self, answer : Optional[str]) -> Optional[int] :
114
+ if answer is not None :
115
+ answer = answer.strip()
116
+ try :
117
+ int_answer = int(answer)
118
+ return int_answer
119
+ except ValueError :
120
+ return None
121
+ else :
122
+ return None
123
+
124
+ def scorer(self, output : str) -> float :
125
+ processed_result = self.processor(output)
126
+ if processed_result is not None :
127
+ if not (0 <= processed_result < self.MOD) :
128
+ return self.rewards["wrong_range"]
129
+
130
+ if processed_result == self.parameter["reference_answer"] :
131
+ return self.rewards["correct_answer"]
132
+ else :
133
+ return self.rewards["wrong_answer"]
134
+ else :
135
+ return self.rewards["wrong_format"]
server/Gym/environments/bitwise_operation_sequence_counting/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .environment import BitwiseOperationSequenceCounting_Environment
server/Gym/environments/bitwise_operation_sequence_counting/environment.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import Optional
3
+ from ...environment import VerifiableEnvironment
4
+
5
+
6
+ class BitwiseOperationSequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4424
7
+ prompt_template = \
8
+ r"""You are given an array A of {N} + 1 binary strings, each of length {M}. The strings are:
9
+ {A}
10
+
11
+ You will insert an operation (`AND` or `OR`) between every pair of adjacent elements in A, resulting in {N} operations total, to form an expression. You can evaluate the expression from left to right (without operator precedence) to get the final result of the expression.
12
+ Count the number of different ways to insert these operations such that the final result equals this binary string: {R}"""
13
+
14
+ def __init__(self,
15
+ wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
16
+ **kwargs) :
17
+ """
18
+ Initialize the BitwiseOperationSequenceCounting_Environment instance.
19
+ """
20
+ super().__init__(**kwargs)
21
+
22
+ self.rewards = {
23
+ "wrong_format" : wrong_format,
24
+ "rewarding_strategy" : rewarding_strategy,
25
+ "rewarding_weight" : rewarding_weight,
26
+ "rewarding_beta" : rewarding_beta,
27
+ }
28
+
29
+ def _generate(self) -> None :
30
+ assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
31
+ MAX_N_M = self.parameter["MAX_N_M"]
32
+ assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
33
+
34
+ N = self.parameter["N"] = random.randint(2, MAX_N_M)
35
+ M = self.parameter["M"] = random.randint(2, MAX_N_M)
36
+
37
+ self.parameter["A"] = A = [None] * (N + 1)
38
+ A[0] = "0" * M
39
+ result = "0" * M
40
+ AND_probability = random.random()
41
+ for i in range(1, N + 1) :
42
+ one_probability = random.random()
43
+ A[i] = "".join(str(int(random.random() < one_probability)) for _ in range(M))
44
+ operation = "AND" if random.random() < AND_probability else "OR"
45
+ if operation == "AND" :
46
+ result = "".join(str(int(A[i][j]) & int(result[j])) for j in range(M))
47
+ else :
48
+ result = "".join(str(int(A[i][j]) | int(result[j])) for j in range(M))
49
+ self.parameter["R"] = result
50
+
51
+
52
+ S = A[1 :]
53
+
54
+ # rk will store the current column order (0-indexed)
55
+ rk = list(range(M))
56
+ # b[j][i] will store the bit in column j, row i
57
+ b = [[0] * N for _ in range(M)]
58
+
59
+ # Read the N rows of the matrix, and maintain the stable partition of rk
60
+ for i in range(N):
61
+ s = S[i]
62
+ # parse the bits of this row
63
+ row = [int(ch) for ch in s]
64
+ # fill b
65
+ for j in range(M):
66
+ b[j][i] = row[j]
67
+ # stable partition rk: first zeros, then ones
68
+ new_rk = []
69
+ for k in rk:
70
+ if row[k] == 0:
71
+ new_rk.append(k)
72
+ for k in rk:
73
+ if row[k] == 1:
74
+ new_rk.append(k)
75
+ rk = new_rk
76
+
77
+ # Compute Ans[j] = integer value of column j (bits b[j][N-1]...b[j][0]) mod MOD
78
+ Ans = [0] * M
79
+ for j in range(M):
80
+ val = 0
81
+ # build the number from most-significant bit b[j][N-1] down to b[j][0]
82
+ for i in range(N - 1, -1, -1):
83
+ val = val * 2 + b[j][i]
84
+ Ans[j] = val
85
+
86
+ def compute() :
87
+ s = result
88
+ # Find the first position in rk where the bit is '1'
89
+ Rk_idx = M # default to sentinel
90
+ for idx in range(M):
91
+ if s[rk[idx]] == '1':
92
+ Rk_idx = idx
93
+ break
94
+ # Find the last position in rk where the bit is '0'
95
+ Lk_idx = -1 # default to before first
96
+ for idx in range(M - 1, -1, -1):
97
+ if s[rk[idx]] == '0':
98
+ Lk_idx = idx
99
+ break
100
+
101
+ # If the first '1' comes before the last '0', no valid interval
102
+ if Rk_idx < Lk_idx:
103
+ return 0
104
+ else:
105
+ # Determine the two endpoints' values
106
+ x_val = 0 if Lk_idx == -1 else Ans[rk[Lk_idx]]
107
+ y_val = (2 ** N) if Rk_idx == M else Ans[rk[Rk_idx]]
108
+ # Answer is y_val - x_val
109
+ return y_val - x_val
110
+
111
+ self.parameter["reference_answer"] = compute()
112
+ assert self.parameter["reference_answer"] > 0
113
+
114
+
115
+ def _prompt_generate(self) -> str :
116
+ return self.prompt_template.format(
117
+ N = self.parameter["N"],
118
+ M = self.parameter["M"],
119
+ A = "\n".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])),
120
+ R = self.parameter["R"],
121
+ )
122
+
123
+
124
+ def _process(self, answer : Optional[str]) -> Optional[int] :
125
+ if answer is not None :
126
+ answer = answer.strip()
127
+ try :
128
+ int_answer = int(answer)
129
+ return int_answer
130
+ except ValueError :
131
+ return None
132
+ else :
133
+ return None
134
+
135
+
136
+ def scorer(self, output : str) -> float :
137
+ processed_result = self.processor(output)
138
+ if processed_result is not None :
139
+ if processed_result < 0 :
140
+ return self.rewards["wrong_format"]
141
+
142
+ if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
143
+ a, b = self.parameter["reference_answer"], processed_result
144
+ return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
145
+ elif self.rewards["rewarding_strategy"] == "gold=answer" :
146
+ return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
147
+ else :
148
+ raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
149
+ else :
150
+ return self.rewards["wrong_format"]