Spaces:

ZhiyuanZeng
/

RLVE_Gym

Running

App Files Files Community

ZhiyuanZeng commited on 24 days ago

Commit

3bf8430

verified ·

1 Parent(s): 1273f5f

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +76 -0
README.md +193 -5
__init__.py +13 -0
client.py +62 -0
models.py +45 -0
openenv.yaml +7 -0
pyproject.toml +51 -0
server/Gym/__init__.py +0 -0
server/Gym/environment.py +217 -0
server/Gym/environments/__init__.py +802 -0
server/Gym/environments/ab_program_simulation/__init__.py +1 -0
server/Gym/environments/ab_program_simulation/environment.py +109 -0
server/Gym/environments/add_multiple_divisible_counting/__init__.py +1 -0
server/Gym/environments/add_multiple_divisible_counting/environment.py +122 -0
server/Gym/environments/addition_table/__init__.py +1 -0
server/Gym/environments/addition_table/environment.py +132 -0
server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py +1 -0
server/Gym/environments/almost_complete_graph_cycle_counting/environment.py +94 -0
server/Gym/environments/and_or_sequence_counting/__init__.py +1 -0
server/Gym/environments/and_or_sequence_counting/environment.py +147 -0
server/Gym/environments/anti_palindromic_substring_counting/__init__.py +1 -0
server/Gym/environments/anti_palindromic_substring_counting/environment.py +142 -0
server/Gym/environments/axis_k_center/__init__.py +1 -0
server/Gym/environments/axis_k_center/environment.py +129 -0
server/Gym/environments/baj_bytecomputer/__init__.py +1 -0
server/Gym/environments/baj_bytecomputer/environment.py +109 -0
server/Gym/environments/banned_point_superset_path_counting/__init__.py +1 -0
server/Gym/environments/banned_point_superset_path_counting/environment.py +170 -0
server/Gym/environments/banyan_heart/__init__.py +1 -0
server/Gym/environments/banyan_heart/environment.py +165 -0
server/Gym/environments/bez_minimalist_security/__init__.py +1 -0
server/Gym/environments/bez_minimalist_security/environment.py +221 -0
server/Gym/environments/bezout_identity/__init__.py +1 -0
server/Gym/environments/bezout_identity/environment.py +134 -0
server/Gym/environments/binario/__init__.py +1 -0
server/Gym/environments/binario/environment.py +188 -0
server/Gym/environments/binario_no_adjacency_requirement/__init__.py +1 -0
server/Gym/environments/binario_no_adjacency_requirement/environment.py +114 -0
server/Gym/environments/binary_alternation/__init__.py +1 -0
server/Gym/environments/binary_alternation/environment.py +121 -0
server/Gym/environments/binary_linear_equation_solution_counting/__init__.py +1 -0
server/Gym/environments/binary_linear_equation_solution_counting/environment.py +187 -0
server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py +1 -0
server/Gym/environments/binary_tree_leaf_num_expectation/environment.py +76 -0
server/Gym/environments/bit_equation_counting/__init__.py +1 -0
server/Gym/environments/bit_equation_counting/environment.py +91 -0
server/Gym/environments/bitand_zero_path_counting/__init__.py +1 -0
server/Gym/environments/bitand_zero_path_counting/environment.py +135 -0
server/Gym/environments/bitwise_operation_sequence_counting/__init__.py +1 -0
server/Gym/environments/bitwise_operation_sequence_counting/environment.py +150 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,76 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local src/core)
+# - Standalone environments (with openenv-core from pip)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+ARG ENV_NAME=RLVE_Gym
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv-core is already in the pyproject.toml dependencies
+# For standalone builds, openenv-core will be installed from pip via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install dependencies using uv sync
+# If uv.lock exists, use it; otherwise resolve on the fly
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+# The module path is constructed to work with the /app/env structure
+ENV ENABLE_WEB_INTERFACE=true
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

README.md CHANGED Viewed

@@ -1,10 +1,198 @@
 ---
-title: RLVE Gym
-emoji: 🦀
-colorFrom: blue
-colorTo: purple
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Rlve Gym Environment Server
+emoji: 📡
+colorFrom: purple
+colorTo: blue
 sdk: docker
 pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
 ---
+# Rlve Gym Environment
+A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
+## Quick Start
+The simplest way to use the Rlve Gym environment is through the `RlveGymEnv` class:
+```python
+from RLVE_Gym import RlveGymAction, RlveGymEnv
+try:
+    # Create environment from Docker image
+    RLVE_Gymenv = RlveGymEnv.from_docker_image("RLVE_Gym-env:latest")
+    # Reset
+    result = RLVE_Gymenv.reset()
+    print(f"Reset: {result.observation.echoed_message}")
+    # Send multiple messages
+    messages = ["Hello, World!", "Testing echo", "Final message"]
+    for msg in messages:
+        result = RLVE_Gymenv.step(RlveGymAction(message=msg))
+        print(f"Sent: '{msg}'")
+        print(f"  → Echoed: '{result.observation.echoed_message}'")
+        print(f"  → Length: {result.observation.message_length}")
+        print(f"  → Reward: {result.reward}")
+finally:
+    # Always clean up
+    RLVE_Gymenv.close()
+```
+That's it! The `RlveGymEnv.from_docker_image()` method handles:
+- Starting the Docker container
+- Waiting for the server to be ready
+- Connecting to the environment
+- Container cleanup when you call `close()`
+## Building the Docker Image
+Before using the environment, you need to build the Docker image:
+```bash
+# From project root
+docker build -t RLVE_Gym-env:latest -f server/Dockerfile .
+```
+## Deploying to Hugging Face Spaces
+You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
+```bash
+# From the environment directory (where openenv.yaml is located)
+openenv push
+# Or specify options
+openenv push --namespace my-org --private
+```
+The `openenv push` command will:
+1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
+2. Prepare a custom build for Hugging Face Docker space (enables web interface)
+3. Upload to Hugging Face (ensuring you're logged in)
+### Prerequisites
+- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
+### Options
+- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
+- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
+- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
+- `--private`: Deploy the space as private (default: public)
+### Examples
+```bash
+# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
+openenv push
+# Push to a specific repository
+openenv push --repo-id my-org/my-env
+# Push with a custom base image
+openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
+# Push as a private space
+openenv push --private
+# Combine options
+openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
+```
+After deployment, your space will be available at:
+`https://huggingface.co/spaces/<repo-id>`
+The deployed space includes:
+- **Web Interface** at `/web` - Interactive UI for exploring the environment
+- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
+- **Health Check** at `/health` - Container health monitoring
+## Environment Details
+### Action
+**RlveGymAction**: Contains a single field
+- `message` (str) - The message to echo back
+### Observation
+**RlveGymObservation**: Contains the echo response and metadata
+- `echoed_message` (str) - The message echoed back
+- `message_length` (int) - Length of the message
+- `reward` (float) - Reward based on message length (length × 0.1)
+- `done` (bool) - Always False for echo environment
+- `metadata` (dict) - Additional info like step count
+### Reward
+The reward is calculated as: `message_length × 0.1`
+- "Hi" → reward: 0.2
+- "Hello, World!" → reward: 1.3
+- Empty message → reward: 0.0
+## Advanced Usage
+### Connecting to an Existing Server
+If you already have a Rlve Gym environment server running, you can connect directly:
+```python
+from RLVE_Gym import RlveGymEnv
+# Connect to existing server
+RLVE_Gymenv = RlveGymEnv(base_url="<ENV_HTTP_URL_HERE>")
+# Use as normal
+result = RLVE_Gymenv.reset()
+result = RLVE_Gymenv.step(RlveGymAction(message="Hello!"))
+```
+Note: When connecting to an existing server, `RLVE_Gymenv.close()` will NOT stop the server.
+## Development & Testing
+### Direct Environment Testing
+Test the environment logic directly without starting the HTTP server:
+```bash
+# From the server directory
+python3 server/RLVE_Gym_environment.py
+```
+This verifies that:
+- Environment resets correctly
+- Step executes actions properly
+- State tracking works
+- Rewards are calculated correctly
+### Running Locally
+Run the server locally for development:
+```bash
+uvicorn server.app:app --reload
+```
+## Project Structure
+```
+RLVE_Gym/
+├── __init__.py            # Module exports
+├── README.md              # This file
+├── openenv.yaml           # OpenEnv manifest
+├── pyproject.toml         # Project metadata and dependencies
+├── uv.lock                # Locked dependencies (generated)
+├── client.py              # RlveGymEnv client implementation
+├── models.py              # Action and Observation models
+└── server/
+    ├── __init__.py        # Server module exports
+    ├── RLVE_Gym_environment.py  # Core environment logic
+    ├── app.py             # FastAPI application
+    └── Dockerfile         # Container image definition
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Rlve Gym Environment - A simple test environment for HTTP server."""
+from .client import RlveGymEnv
+from .models import RlveGymAction, RlveGymObservation
+__all__ = ["RlveGymAction", "RlveGymObservation", "RlveGymEnv"]

client.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Rlve Gym Environment HTTP Client.
+This module provides the client for connecting to a Rlve Gym Environment server
+over HTTP.
+"""
+from typing import Dict
+from openenv_core.client_types import StepResult
+from openenv_core.http_env_client import HTTPEnvClient
+from .models import RlveGymState, RlveGymAction, RlveGymObservation
+class RlveGymEnv(HTTPEnvClient[RlveGymAction, RlveGymObservation]):
+    """
+    HTTP client for the Rlve Gym Environment.
+    This client connects to a RlveGymEnvironment HTTP server and provides
+    methods to interact with it: reset(), step(), and state access.
+    """
+    def _step_payload(self, action: RlveGymAction) -> Dict:
+        """
+        Convert RlveGymAction to JSON payload for step request.
+        Args:
+            action: RlveGymAction instance
+        Returns:
+            Dictionary representation suitable for JSON encoding
+        """
+        return {
+            "output": action.output,
+        }
+    def _parse_result(self, payload: Dict) -> StepResult[RlveGymObservation]:
+        """
+        Parse server response into StepResult[RlveGymObservation].
+        Args:
+            payload: JSON response from server
+        Returns:
+            StepResult with RlveGymObservation
+        """
+        obs = RlveGymObservation(**payload["observation"])
+        return StepResult(
+            observation=obs,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> RlveGymState:
+        return RlveGymState(**payload)

models.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data models for the Rlve Gym Environment.
+The RLVE_Gym environment is a simple test environment that echoes back messages.
+"""
+from dataclasses import dataclass
+from openenv_core.env_server.types import Action, Observation, State
+from typing import Dict, Union
+@dataclass(kw_only=True)
+class RlveGymState(State):
+    """State of the RLVE_Gym containing the seed."""
+    seed: int
+    problem_input: str = None
+    num_samples: int = 0
+    sum_accuracy: int = 0
+@dataclass(kw_only=True)
+class RlveGymAction(Action):
+    """Action for the RLVE_Gym environment - just a model output."""
+    output: str
+@dataclass(kw_only=True)
+class RlveGymObservation(Observation):
+    """Observation from the RLVE_Gym environment."""
+    problem_input: str
+    verifier_result: Dict[str, Union[float, int]]
+    success: bool
+    message: str

openenv.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+spec_version: 1
+name: RLVE_Gym
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000

pyproject.toml ADDED Viewed

	@@ -0,0 +1,51 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-RLVE_Gym"
+version = "0.1.0"
+description = "Rlve Gym environment for OpenEnv"
+requires-python = ">=3.10"
+dependencies = [
+    # Core OpenEnv dependencies (required for server functionality)
+    # "openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@main#subdirectory=src/core",
+    "openenv-core>=0.1.0",
+    "fastapi>=0.115.0",
+    "pydantic>=2.0.0",
+    "uvicorn>=0.24.0",
+    "requests>=2.31.0",
+    # Environment-specific dependencies
+    # Add all dependencies needed for your environment here
+    # Examples:
+    # "numpy>=1.19.0",
+    # "torch>=2.0.0",
+    # "gymnasium>=0.29.0",
+    # "openspiel>=1.0.0",
+    # "smolagents>=1.22.0,<2",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+]
+[project.scripts]
+# Server entry point - enables running via: uv run --project . server
+# or: python -m RLVE_Gym.server.app
+server = "RLVE_Gym.server.app:main"
+[tool.setuptools]
+packages = ["RLVE_Gym", "RLVE_Gym.server"]
+package-dir = { "RLVE_Gym" = ".", "RLVE_Gym.server" = "server" }
+[tool.setuptools.packages.find]
+where = ["."]

server/Gym/__init__.py ADDED Viewed

File without changes

server/Gym/environment.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import os
+import re
+import copy
+from abc import ABC, abstractmethod
+from typing import Dict, Optional, Tuple, Any, Union
+import functools
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
+class TimeoutException(Exception) :
+    pass
+def timeout(seconds) :
+    def decorator(func) :
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs) :
+            executor = ThreadPoolExecutor(max_workers = 1)
+            future = executor.submit(func, *args, **kwargs)
+            try :
+                return future.result(timeout=seconds)
+            except FutureTimeoutError :
+                raise TimeoutException("Function timed out after {} seconds".format(seconds))
+            finally :
+                executor.shutdown(wait=False, cancel_futures=True)
+        return wrapper
+    return decorator
+import torch
+import random
+import numpy as np
+def manual_seed(args_or_seed : int, fix_cudnn = False) :
+    random.seed(args_or_seed)
+    np.random.seed(args_or_seed)
+    torch.manual_seed(args_or_seed)
+    torch.cuda.manual_seed_all(args_or_seed)
+    os.environ["PYTHONHASHSEED"] = str(args_or_seed)
+    if fix_cudnn :
+        torch.backends.cudnn.deterministic = True  # noqa
+        torch.backends.cudnn.benchmark = False  # noqa
+class VerifiableEnvironment(ABC) :
+    """
+    Abstract base class for a verifiable environment.
+    """
+    def __init__(self, answer_markers : Optional[Tuple[str, str]] = None) :
+        """
+        Initializes the environment with default seed and parameter values.
+        """
+        self.seed = None
+        self.parameter = None
+        if answer_markers is None :
+            answer_markers = (r"<answer>", r"</answer>")
+        assert hasattr(answer_markers, "__len__"), "answer_markers should have __len__"
+        assert len(answer_markers) == 2 and isinstance(answer_markers[0], str) and isinstance(answer_markers[1], str), "answer_markers should be a tuple of two strings"
+        self.answer_markers = answer_markers
+        self.passing_reward_threshold = 1.0
+    def generator(self, seed : int, parameter : Optional[Dict] = None, timeout_second : int = 10) -> bool :
+        """
+        Initializes the environment with the given seed and (initial) parameters, and samples environment-specific parameters to generate a problem.
+        Args:
+            seed (int): Random seed for reproducibility.
+            parameter (Optional[Dict]): Dictionary of (initial) problem parameters.
+            timeout_second (int): Timeout in seconds for the generation process.
+        Returns:
+            bool: True if the generation was successful, False otherwise.
+        """
+        @timeout(timeout_second)
+        def self_generate() :
+            self.seed = seed
+            self.parameter = copy.deepcopy(parameter) if parameter is not None else {}
+            manual_seed(self.seed)
+            self._generate()
+        try :
+            self_generate()
+        except :
+            return False
+        return self.parameter is not None
+    @abstractmethod
+    def _generate(self) -> None :
+        """
+        Subclasses must implement problem generation using self.seed and self.parameter.
+        """
+        pass
+    def prompt_generator(self) -> str :
+        """
+        Generates the prompt string for the problem.
+        Returns:
+            str: The formatted prompt for the problem.
+        """
+        assert self.seed is not None and self.parameter is not None, "generator() should be called before prompt_generator()"
+        return self._prompt_generate()
+    @abstractmethod
+    def _prompt_generate(self) -> str :
+        """
+        Subclasses must implement prompt generation using self.seed and self.parameter.
+        Returns:
+            str: The problem prompt.
+        """
+        pass
+    def processor(self, output : str) -> Any :
+        """
+        Processes the model's output to extract useful information.
+        Args:
+            output (str): The string output from a model.
+        Returns:
+            Any: Any useful information that may be used for following steps (e.g., scoring).
+        """
+        # Remove everything before the first "Assistant:" (if possible)
+        if "Assistant:" in output :
+            output = output.split("Assistant:", 1)[1]
+        elif "<|im_start|>assistant" in output :
+            output = output.split("<|im_start|>assistant", 1)[1]
+        else :
+            pass
+        answer_pattern = re.escape(self.answer_markers[0]) + r"(.*?)" + re.escape(self.answer_markers[1])
+        matches = list(re.finditer(answer_pattern, output, re.DOTALL))
+        if matches :
+            answer = matches[-1].group(1)
+        else :
+            answer = None
+        return self._process(answer)
+    @abstractmethod
+    def _process(self, answer : Optional[str]) -> Any :
+        """
+        Subclasses must implement the processing of the answer.
+        Args:
+            answer (str): The model's answer. If it is None, it means the model did not provide an answer in the expected format.
+        Returns:
+            Any: The processed answer, which may be used for scoring.
+        """
+        pass
+    @abstractmethod
+    def scorer(self, output : str) -> float :
+        """
+        Computes a numeric score for the output, which should be in [-1.0, +1.0].
+        Args:
+            output (str): The model's output.
+        Returns:
+            float: The score for the given output, between -1.0 and +1.0.
+        """
+        pass
+    def verifier(self, output : str) -> Dict[str, Union[float, int]] :
+        """
+        Verifies the model's output.
+        """
+        try :
+            score = self.scorer(output)
+        except :
+            score = -1.0
+        assert -1.0 <= score <= +1.0, "Score out of bounds: score={}\n\nPrompt:\n{}".format(score, self.prompt_generator())
+        eps = 1E-6
+        return dict(
+            reward = score, # [-1.0, +1.0]
+            accuracy = int(score >= self.passing_reward_threshold - eps), # 0 or 1
+            format_score = int(score >= -1.0 + eps), # 0 or 1
+        )
+    def get_config(self) -> Dict :
+        """
+        Returns the configuration of the current problem.
+        Returns:
+            Dict: Dictionary with keys 'seed' and 'parameter'.
+        """
+        return dict(seed = self.seed, parameter = self.parameter, passing_reward_threshold = self.passing_reward_threshold)
+    def set_config(self, config : Dict) -> None :
+        """
+        Sets the configuration for the current problem.
+        Args:
+            config (Dict): Dictionary with 'seed' and 'parameter' keys.
+        """
+        assert "seed" in config, "seed is required in config"
+        assert "parameter" in config, "parameter is required in config"
+        self.seed, self.parameter, self.passing_reward_threshold = config["seed"], config["parameter"], config.get("passing_reward_threshold", 1.0)

server/Gym/environments/__init__.py ADDED Viewed

	@@ -0,0 +1,802 @@

+from .ab_program_simulation import ABProgramSimulation_Environment
+from .add_multiple_divisible_counting import AddMultiple_Divisible_Counting_Environment
+from .addition_table import AdditionTable_Environment
+from .almost_complete_graph_cycle_counting import AlmostCompleteGraphCycleCounting_Environment
+from .and_or_sequence_counting import AndOr_Sequence_Counting_Environment
+from .anti_palindromic_substring_counting import AntiPalindromicSubstringCounting_Environment
+from .axis_k_center import Axis_KCenter_Environment
+from .baj_bytecomputer import BAJBytecomputer_Environment
+from .banned_point_superset_path_counting import BannedPointSupersetPathCounting_Environment
+from .banyan_heart import BanyanHeart_Environment
+from .bez_minimalist_security import BEZMinimalistSecurity_Environment
+from .bezout_identity import BezoutIdentity_Environment
+from .binario import Binario_Environment
+from .binario_no_adjacency_requirement import Binario_NoAdjacencyRequirement_Environment
+from .binary_alternation import BinaryAlternation_Environment
+from .binary_linear_equation_solution_counting import BinaryLinearEquation_SolutionCounting_Environment
+from .binary_tree_leaf_num_expectation import BinaryTreeLeafNumExpectation_Environment
+from .bit_equation_counting import BitEquationCounting_Environment
+from .bitand_zero_path_counting import BitAndZero_PathCounting_Environment
+from .bitwise_operation_sequence_counting import BitwiseOperationSequenceCounting_Environment
+from .block_image import BlockImage_Environment
+from .bounded_adjacency_difference_permutation_counting import BoundedAdjacencyDifference_Permutation_Counting_Environment
+from .bounded_interval_intersection import BoundedIntervalIntersection_Environment
+from .bounded_mean_subarray_counting import BoundedMeanSubarrayCounting_Environment
+from .bounded_subarray_counting import BoundedSubarrayCounting_Environment
+from .box_scheduling import BoxScheduling_Environment
+from .bridge import Bridge_Environment
+from .bubble_swap_lower_bound_permutation_counting import BubbleSwapLowerBound_PermutationCounting_Environment
+from .bucket_sorting import BucketSorting_Environment
+from .campfire_party import CampfireParty_Environment
+from .campsite_puzzle import CampsitePuzzle_Environment
+from .canon import Canon_Environment
+from .cantor_expansion import CantorExpansion_Environment
+from .capital_city_effect import CapitalCityEffect_Environment
+from .card_coloring_counting import CardColoringCounting_Environment
+from .catalan_number_mod import CatalanNumberMod_Environment
+from .check_all_cycle_xor_zero import CheckAllCycleXorZero_Environment
+from .cho_hamsters import ChoHamsters_Environment
+from .cinema import Cinema_Environment
+from .circuit import Circuit_Environment
+from .circulating_decimal_counting import CirculatingDecimalCounting_Environment
+from .circulating_grid import CirculatingGrid_Environment
+from .cleaning_up import CleaningUp_Environment
+from .clear_symmetry import ClearSymmetry_Environment
+from .clique_independent_set_partitioning_counting import Clique_IndependentSet_Partitioning_Counting_Environment
+from .coin_square_game import CoinSquareGame_Environment
+from .coloring_counting import ColoringCounting_Environment
+from .combination_odd_subsequence_counting import CombinationOddSubsequenceCounting_Environment
+from .concatenation_partition_counting_sum import ConcatenationPartitionCountingSum_Environment
+from .congruent_equation import CongruentEquation_Environment
+from .construct_hack_interval import ConstructHackInterval_Environment
+from .convex_hull import ConvexHull_Environment
+from .cornfield import Cornfield_Environment
+from .countdown import CountdownEqual_Environment, CountdownClose_Environment
+from .cow_dance_show import CowDanceShow_Environment
+from .crt import CRT_Environment
+from .cryptarithmetic import Cryptarithmetic_Environment
+from .cube_fixed_local_maximum_counting import Cube_FixedLocalMaximumCounting_Environment
+from .cycle_counting import CycleCounting_Environment
+from .decreasing_digit_counting import DecreasingDigitCounting_Environment
+from .degree_fixed_spanning_tree import DegreeFixed_SpanningTree_Environment
+from .delta_min_popcount import DeltaMinPopcount_Environment
+from .delta_nim_game import DeltaNimGame_Environment
+from .derangement_extension import DerangementExtension_Environment
+from .difference_constraint_system import DifferenceConstraintSystem_Environment
+from .difference_constraint_system_dag import DifferenceConstraintSystemDAG_Environment
+from .different_color_pairing import DifferentColorPairing_Environment
+from .differentiate import Differentiate_Environment
+from .digit_lis_counting import DigitLISCounting_Environment
+from .discrete_logarithm import DiscreteLogarithm_Environment
+from .disinfection import Disinfection_Environment
+from .distinct_array_permutation import DistinctArrayPermutation_Environment
+from .distinct_edge_colored_complete_graph_counting import DistinctEdgeColoredCompleteGraphCounting_Environment
+from .division import Division_Environment
+from .divisor_flip_expectation import DivisorFlipExpectation_Environment
+from .double_cross_counting import DoubleCrossCounting_Environment
+from .double_palindromic_string_counting import DoublePalindromicStringCounting_Environment
+from .double_stack_sorting import DoubleStackSorting_Environment
+from .dyn_dynamite import DynDynamite_Environment
+from .eight_digit_puzzle import EightDigitPuzzle_Environment
+from .emperor_worries import EmperorWorries_Environment
+from .energy_storage_meter import EnergyStorageMeter_Environment
+from .euclid_game import EuclidGame_Environment
+from .even_degree_graph_partitioning import EvenDegreeGraphPartitioning_Environment
+from .expression_adding_parenthese_counting import Expression_AddingParenthese_Counting_Environment
+from .face_right_way import FaceRightWay_Environment
+from .factorial_trailing_zero_count import FactorialTrailingZeroCount_Environment
+from .fbi_binary_tree import FBI_BinaryTree_Environment
+from .fibonacci import Fibonacci_Environment
+from .fibonacci_containing_counting import FibonacciContainingCounting_Environment
+from .fibtrain import Fibtrain_Environment
+from .firework_show import FireworkShow_Environment
+from .fixed_mod_k_selection_counting import FixedModK_Selection_Counting_Environment
+from .fixed_one_edge_num_spanning_tree import FixedOneEdgeNum_SpanningTree_Environment
+from .fractional_programming import FractionalProgramming_Environment
+from .fractional_programming_bipartite_graph_matching import FractionalProgramming_BipartiteGraphMatching_Environment
+from .futoshiki_puzzle import FutoshikiPuzzle_Environment
+from .gas_fire_extinguishers import GasFireExtinguishers_Environment
+from .gaussian_elimination import GaussianElimination_Environment
+from .gcd_fibonacci_product import GCDFibonacciProduct_Environment
+from .gcd_lcm_counting import GcdLcmCounting_Environment
+from .gcd_one_counting import GCDOne_Counting_Environment
+from .gcd_prime_counting import GCDPrime_Counting_Environment
+from .gold_washing import GoldWashing_Environment
+from .gra_minima_game import GraMinimaGame_Environment
+from .grade_ranking_counting import GradeRankingCounting_Environment
+from .graph_contain_tree_counting import GraphContainTreeCounting_Environment
+from .graph_isomorphism import GraphIsomorphism_Environment
+from .grid_bfs import GridBFS_Environment
+from .grid_coloring_counting import GridColoringCounting_Environment
+from .grid_component import GridComponent_Environment
+from .grid_local_minimum_counting import GridLocalMinimumCounting_Environment
+from .grid_parity_construction import GridParityConstruction_Environment
+from .grid_triangle_counting import GridTriangleCounting_Environment
+from .halving_chain_counting import HalvingChainCounting_Environment
+from .hamiltonian_path import HamiltonianPath_Environment
+from .hamiltonian_path_existence import HamiltonianPathExistence_Environment
+from .heap_counting import HeapCounting_Environment
+from .hitori_puzzle import HitoriPuzzle_Environment
+from .hungry_rabbit import HungryRabbit_Environment
+from .hur_warehouse_store import HURWarehouseStore_Environment
+from .imp_party import ImpParty_Environment
+from .individual_sum_bounded_sequence_counting import IndividualSumBounded_SequenceCounting_Environment
+from .integer_factorization_counting import IntegerFactorizationCounting_Environment
+from .integer_programming import IntegerProgramming_Environment
+from .integral import Integral_Environment
+from .inversion_pair import InversionPair_Environment
+from .inversion_pair_k_counting import InversionPairK_Counting_Environment
+from .josephus import Josephus_Environment
+from .jug_puzzle import JugPuzzle_Environment
+from .k_partition import KPartition_Environment
+from .kakurasu import Kakurasu_Environment
+from .kidding_me import KiddingMe_Environment
+from .king_sorting import KingSorting_Environment
+from .klo_blocks import KloBlocks_Environment
+from .knapsack import Knapsack_Environment
+from .knights_and_knaves import KnightsAndKnaves_Environment
+from .kos_dicing import KosDicing_Environment
+from .kth_binary_tree import Kth_BinaryTree_Environment
+from .kth_semi_balanced_bracket_sequence import Kth_SemiBalancedBracketSequence_Environment
+from .kth_subsequence import KthSubsequence_Environment
+from .kur import KUR_Environment
+from .lamp_changing import LampChanging_Environment
+from .land_acquisition import LandAcquisition_Environment
+from .landform_generation_counting import LandformGenerationCounting_Environment
+from .largest_convex_polygon import LargestConvexPolygon_Environment
+from .largest_rectangle_among_points import LargestRectangle_AmongPoints_Environment
+from .las import LAS_Environment
+from .las_laser import LASLaser_Environment
+from .lcm import LCM_Environment
+from .lds_two_counting import LDSTwo_Counting_Environment
+from .light_up_puzzle import LightUpPuzzle_Environment
+from .link_beads import LinkBeads_Environment
+from .lis_lds_concatenation import LIS_LDS_Concatenation_Environment
+from .liz_lollipop import LIZ_Lollipop_Environment
+from .longest_double_palindrome import Longest_DoublePalindrome_Environment
+from .longest_matching_subsequence import Longest_MatchingSubsequence_Environment
+from .longest_maxdiff_bounded_interval import LongestMaxDiffBoundedInterval_Environment
+from .longest_path import LongestPath_Environment
+from .longest_repeated_palindrome import Longest_RepeatedPalindrome_Environment
+from .maf_mafia import MafMafia_Environment
+from .magic_square_puzzle import MagicSquarePuzzle_Environment
+from .making_grade import MakingGrade_Environment
+from .matrix_binary_exponentiation import Matrix_BinaryExponentiation_Environment
+from .matrix_permutation_both_diagonal_one import MatrixPermutation_BothDiagonalOne_Environment
+from .matrix_permutation_equivalence import MatrixPermutationEquivalence_Environment
+from .matrix_permutation_main_diagonal_one import MatrixPermutation_MainDiagonalOne_Environment
+from .matrix_pooling import MatrixPooling_Environment
+from .matrix_rmq_counting import MatrixRMQCounting_Environment
+from .max_different_group_pair_division import MaxDifferentGroupPairDivision_Environment
+from .max_grid_path_intersection import MaxGridPathIntersection_Environment
+from .max_minimum_after_interval_addition import MaxMinimum_AfterIntervalAddition_Environment
+from .max_mult_split import MaxMultSplit_Environment
+from .max_multiplication_fixed_sum import MaxMultiplicationFixedSum_Environment
+from .max_no_conflicting_bombs import MaxNoConflictingBombs_Environment
+from .max_nonadjacent_k_element_sum import Max_NonAdjacent_KElementSum_Environment
+from .max_permutation import MaxPermutation_Environment
+from .max_rmq_expectation import MaxRMQExpectation_Environment
+from .max_segment_coverage_constraint import MaxSegmentCoverageConstraint_Environment
+from .max_sum_lds import MaxSumLDS_Environment
+from .max_three_square_sum import MaxThreeSquareSum_Environment
+from .max_tree_constrained_permutation_weight import Max_TreeConstrainedPermutation_Weight_Environment
+from .max_tree_k_path_coverage import MaxTree_KPathCoverahe_Environment
+from .max_tree_xor_path import MaxTreeXorPath_Environment
+from .max_weight_palindromic_substring import MaxWeightPalindromicSubstring_Environment
+from .max_xor_path import MaxXorPath_Environment
+from .max_xor_set import MaxXorSet_Environment
+from .maximum_achromatic_number import MaximumAchromaticNumber_Environment
+from .maximum_clique import MaximumClique_Environment
+from .maximum_divisor import MaximumDivisor_Environment
+from .maximum_independent_set_grid import MaximumIndependentSetGrid_Environment
+from .maximum_independent_set_tree import Maximum_IndependentSet_Tree_Environment
+from .maximum_lexicographical_order_subsequence import MaximumLexicographicalOrderSubsequence_Environment
+from .maximum_point_segment_matching import MaximumPointSegmentMatching_Environment
+from .maximum_subsequence_num import Maximum_SubsequenceNum_Environment
+from .maximum_weight_matching import MaximumWeightMatching_Environment
+from .maze import Maze_Environment
+from .min_conversion_to_cycle_cost import MinConversionToCycleCost_Environment
+from .min_cost_reducing_lnds import MinCostReducingLNDS_Environment
+from .min_cost_tree_coverage import MinCostTreeCoverage_Environment
+from .min_cube_assignment import MinCubeAssignment_Environment
+from .min_division_sum_xor import MinDivisionSumXor_Environment
+from .min_inorder_binary_tree import MinInorderBinaryTree_Environment
+from .min_kdivisor_number import MinKDivisorNumber_Environment
+from .min_no_solution_linear_diophantine_equation import MinNoSolutionLinearDiophantineEquation_Environment
+from .min_nonsubstring import MinNonsubstring_Environment
+from .min_pairsum_multiplication_permutation import MinPairSumMultiplicationPermutation_Environment
+from .min_path_cover_dag import MinPathCover_DAG_Environment
+from .min_sum_chebyshev_distance import MinSumChebyshevDistance_Environment
+from .min_sum_distance_square import MinSumDistanceSquare_Environment
+from .min_sum_pre_xor import MinSumPreXor_Environment
+from .min_swap_two_permutations import MinSwapTwoPermutations_Environment
+from .min_xor_pair import MinXorPair_Environment
+from .minesweeping import Minesweeping_Environment
+from .minimal_cyclic_shift import MinimalCyclicShift_Environment
+from .minimum_chromatic_number import MinimumChromaticNumber_Environment
+from .minimum_chromatic_number_segment_overlap import MinimumChromaticNumber_SegmentOverlap_Environment
+from .minimum_cost_maximum_flow import MinimumCost_MaximumFlow_Environment
+from .minimum_crossing_edges_graph_partition import Minimum_CrossingEdges_GraphPartition_Environment
+from .minimum_directed_spanning_tree import MinimumDirectedSpanningTree_Environment
+from .minimum_dominating_interval import Minimum_DominatingInterval_Environment
+from .minimum_dominating_set import Minimum_DominatingSet_Environment
+from .minimum_dominating_set_grid import Minimum_DominatingSet_Grid_Environment
+from .minimum_fibonacci_representation import MinimumFibonacciRepresentation_Environment
+from .minimum_harmonious_chromatic_number import MinimumHarmoniousChromaticNumber_Environment
+from .minimum_interval_coverage import MinimumIntervalCoverage_Environment
+from .minimum_max_abs_slicer import Minimum_MaxAbsSlicer_Environment
+from .minimum_max_slicer import Minimum_MaxSlicer_Environment
+from .minimum_ratio_path import MinimumRatioPath_Environment
+from .minimum_spanning_tree import MinimumSpanningTree_Environment
+from .minimum_spanning_tree_counting import MinimumSpanningTreeCounting_Environment
+from .minimum_steiner_tree import MinimumSteinerTree_Environment
+from .minimum_sum_difference_submatrix import MinimumSumDifferenceSubmatrix_Environment
+from .minimum_tree_weighted_dominating_ancestor import MinimumTreeWeightedDominatingAncestor_Environment
+from .minimum_unconflicted_grid_kmax import MinimumUnconflictedGridKMax_Environment
+from .minimum_vertex_cover import Minimum_VertexCover_Environment
+from .minimum_weighted_spanning_tree import MinimumWeightedSpanningTree_Environment
+from .mitter_transportation import MitterTransportation_Environment
+from .mixed_graph_eulerian_circuit import MixedGraphEulerianCircuit_Environment
+from .money_charging_game import MoneyChargingGame_Environment
+from .monochrome_block_counting import MonochromeBlockCounting_Environment
+from .monotonic_stack import MonotonicStack_Environment
+from .most_component_tree_removing_two_paths import MostComponentTreeRemovingTwoPaths_Environment
+from .most_num_edge_non_self_isomorphism import MostNumEdge_NonSelfIsomorphism_Environment
+from .multidrink import MultiDrink_Environment
+from .multiple_flipping_game import MultipleFlippingGame_Environment
+from .multiplication import Multiplication_Environment
+from .myj import MYJ_Environment
+from .nand_result_counting import NANDResultCounting_Environment
+from .negative_base import NegativeBase_Environment
+from .new_nim_game import NewNimGame_Environment
+from .next_palindromic import NextPalindromic_Environment
+from .nine_puzzle import NinePuzzle_Environment
+from .no_adjacent_girl_counting import NoAdjacentGirlCounting_Environment
+from .no_double_triple_counting import NoDoubleTripleCounting_Environment
+from .not_containing_string_counting import NotContainingStringCounting_Environment
+from .number_partition_counting import NumberPartitionCounting_Environment
+from .numbrix import Numbrix_Environment
+from .odd_visitation import OddVisitation_Environment
+from .odl_distance import ODLDistance_Environment
+from .pair_more_one_counting import PairMoreOneCounting_Environment
+from .palembang_bridges import PalembangBridges_Environment
+from .palindrome_partition_counting import PalindromePartitionCounting_Environment
+from .palindromic_substring_number_counting import PalindromicSubstringNumberCounting_Environment
+from .pan_solar_panels import PanSolarPanels_Environment
+from .path_no_going_back_counting import Path_NoGoingBack_Counting_Environment
+from .patrol import Patrol_Environment
+from .pcp_permutation import PCPPermutation_Environment
+from .pipeline_arrangement import PipelineArrangement_Environment
+from .pol_polarization import POLPolarization_Environment
+from .polya_model import PolyaModel_Environment
+from .polynomial_factorization import PolynomialFactorization_Environment
+from .polynomial_interpolation import PolynomialInterpolation_Environment
+from .polynomial_minimum import PolynomialMinimum_Environment
+from .polynomial_remainder import PolynomialRemainder_Environment
+from .power_cycle import PowerCycle_Environment
+from .power_shortcut import PowerShortcut_Environment
+from .powernest import PowerNest_Environment
+from .prefix_concatenation import PrefixConcatenation_Environment
+from .prefix_product_mod_distinct_permutation import PrefixProductMODDistinctPermutation_Environment
+from .prefix_sum_mod_distinct_permutation import PrefixSumMODDistinctPermutation_Environment
+from .prefixuffix import Prefixuffix_Environment
+from .preorder_traversal import PreorderTraversal_Environment
+from .prime_graph_minimum_chromatic_number import PrimeGraph_MinimumChromaticNumber_Environment
+from .protecting_flowers import ProtectingFlowers_Environment
+from .pythagorean_graph_independent_set_counting import PythagoreanGraph_IndependentSetCounting_Environment
+from .quad_magic_items import QuadMagicItems_Environment
+from .quadratic_function_segmentation import QuadraticFunctionSegmentation_Environment
+from .quantum_lock_puzzle import QuantumLockPuzzle_Environment
+from .queen_placement import QueenPlacement_Environment
+from .random_range_max_expectation import RandomRangeMaxExpectation_Environment
+from .range_constrained_increasing_sequence_counting import RangeConstrained_IncreasingSequence_Counting_Environment
+from .range_four_sequence_construction import RangeFourSequenceConstruction_Environment
+from .range_shrinking_sequence_counting import RangeShrinkingSequenceCounting_Environment
+from .recursive_function import RecursiveFunction_Environment
+from .recursive_sequence_sum_construction import RecursiveSequenceSumConstruction_Environment
+from .repeat_sequence_lnds import RepeatSequenceLNDS_Environment
+from .root_extraction import RootExtraction_Environment
+from .round_robin import RoundRobin_Environment
+from .roundtable_assignment import RoundTableAssignment_Environment
+from .royal_lock_counting import RoyalLockCounting_Environment
+from .salad_bar import SaladBar_Environment
+from .salesman_fatigue import SalesmanFatigue_Environment
+from .same_adjacency_counting import SameAdjacencyCounting_Environment
+from .sat import SAT_Environment
+from .scc_sequence_counting import SCC_Sequence_Counting_Environment
+from .secret_cow_code import SecretCowCode_Environment
+from .segment_min_length_equal_counting import SegmentMinLengthEqual_Counting_Environment
+from .segment_tree_sorting_counting import SegmentTreeSortingCounting_Environment
+from .self_power_sequence_mod import SelfPowerSequenceMOD_Environment
+from .set_cover import SetCover_Environment
+from .set_splitting import SetSplitting_Environment
+from .shared_substring_counting import SharedSubstringCounting_Environment
+from .shortest_path import ShortestPath_Environment
+from .shortest_path_count_construction import ShortestPathCountConstruction_Environment
+from .shortest_unicolor_substring import ShortestUnicolorSubstring_Environment
+from .singing_girl_story import SingingGirlStory_Environment
+from .single_stack_sorting import SingleStackSorting_Environment
+from .ska_rock_garden import SkaRockGarden_Environment
+from .skyscraper_puzzle import SkyscraperPuzzle_Environment
+from .skyscraper_sum_puzzle import SkyscraperSumPuzzle_Environment
+from .sliding_window import SlidingWindow_Environment
+from .slo_elephants import SLOElephants_Environment
+from .smallest_binary_multiple import SmallestBinaryMultiple_Environment
+from .smallest_circle import SmallestCircle_Environment
+from .sorting import Sorting_Environment
+from .spiral_matrix import SpiralMatrix_Environment
+from .splitting_game import SplittingGame_Environment
+from .spy_network import SpyNetwork_Environment
+from .squ_squarks import SquSquarks_Environment
+from .square_undamaged_point_counting import SquareUndamagedPointCounting_Environment
+from .star_battle import StarBattle_Environment
+from .stirling_second import StirlingSecond_Environment
+from .stone_game import StoneGame_Environment
+from .stone_intervals_game import StoneIntervalsGame_Environment
+from .string_partition_shuffle import StringPartitionShuffle_Environment
+from .string_reversal_construction import StringReversalConstruction_Environment
+from .stu_well import STUWell_Environment
+from .stunt_flying import StuntFlying_Environment
+from .subarray_sum_xor import SubarraySumXor_Environment
+from .subarray_xor_sum import SubarrayXorSum_Environment
+from .subgraph_isomorphism import SubgraphIsomorphism_Environment
+from .submatrix_sum_divisible_counting import SubmatrixSumDivisibleCounting_Environment
+from .subsequence_reversal_lnds import SubsequenceReversalLNDS_Environment
+from .subset_sum import SubsetSum_Environment
+from .subset_sum_sequence import SubsetSumSequence_Environment
+from .sudoku import Sudoku_Environment
+from .sum_divisor_num import Sum_DivisorNum_Environment
+from .sum_gcd import SumGCD_Environment
+from .sum_gcd_with_individual import SumGCDWithIndividual_Environment
+from .sum_lcm import SumLCM_Environment
+from .sum_manhattan_curved_surface import SumManhattan_CurvedSurface_Environment
+from .sum_mod import SumMOD_Environment
+from .sum_phi_interval import SumPHIInterval_Environment
+from .sum_product_divisor_num import SumProductDivisorNum_Environment
+from .sum_pseudo_euclidean import SumPseudoEuclidean_Environment
+from .sum_set_multiplication import SumSetMultiplication_Environment
+from .sum_spanning_tree_gcd import SumSpanningTreeGCD_Environment
+from .sum_triangle_area import SumTriangleArea_Environment
+from .sum_xor_divisor_num import SumXorDivisorNum_Environment
+from .survo_puzzle import SurvoPuzzle_Environment
+from .taking_prime_game import TakingPrimeGame_Environment
+from .task_arrangement import TaskArrangement_Environment
+from .tetris_attack import TetrisAttack_Environment
+from .three_string_common_subsequence_counting import ThreeStringCommonSubsequenceCounting_Environment
+from .three_vertex_cycle_counting import ThreeVertexCycleCounting_Environment
+from .topological_sort import TopologicalSort_Environment
+from .topological_sort_minimal_lexicographical_order import TopologicalSort_MinimalLexicographicalOrder_Environment
+from .tournament_longest_path import Tournament_LongestPath_Environment
+from .transmission_delay import TransmissionDelay_Environment
+from .tree_add_one_edge_diameter import TreeAddOneEdgeDiameter_Environment
+from .tree_center import TreeCenter_Environment
+from .tree_change_one_edge_diameter import TreeChangeOneEdgeDiameter_Environment
+from .tree_coloring import TreeColoring_Environment
+from .tree_distance_equal_triad_counting import Tree_DistanceEqualTriad_Counting_Environment
+from .tree_dynamic_xor_zero_path import TreeDynamic_XORZeroPath_Environment
+from .tree_elimination_expectation import TreeElimination_Expectation_Environment
+from .tree_even_partitioning import TreeEvenPartitioning_Environment
+from .tree_maximum_visited_vertex import TreeMaximumVisitedVertex_Environment
+from .tree_random_walk_expectation import TreeRandomWalkExpectation_Environment
+from .tree_topological_sequence_counting import TreeTopologicalSequenceCounting_Environment
+from .triumphal_arch import TriumphalArch_Environment
+from .twiddle_puzzle import TwiddlePuzzle_Environment
+from .two_sat import TwoSAT_Environment
+from .two_set_all_coprime_counting import TwoSet_AllCoprime_Counting_Environment
+from .undamaged_submatrix_counting import UndamagedSubmatrixCounting_Environment
+from .value_diminishing_selection import ValueDiminishingSelection_Environment
+from .vertex_k_center import Vertex_KCenter_Environment
+from .virus_synthesis import VirusSynthesis_Environment
+from .visible_line import VisibleLine_Environment
+from .warehouse_construction import WarehouseConstruction_Environment
+from .weighted_binarytree import WeightedBinaryTree_Environment
+from .weighted_lis import WeightedLIS_Environment
+from .whack_a_mole import WhackAMole_Environment
+from .wil import WIL_Environment
+from .wyc import WYC_Environment
+from .wyr_leveling_ground import WYRLevelingGround_Environment
+from .xor_equation_counting import XorEquationCounting_Environment
+from .zero_prefix_subset_counting import ZeroPrefixSubsetCounting_Environment
+identifier2environment = {
+    "ABProgramSimulation" : ABProgramSimulation_Environment,
+    "AddMultiple_Divisible_Counting" : AddMultiple_Divisible_Counting_Environment,
+    "AdditionTable" : AdditionTable_Environment,
+    "AlmostCompleteGraphCycleCounting" : AlmostCompleteGraphCycleCounting_Environment,
+    "AndOr_Sequence_Counting" : AndOr_Sequence_Counting_Environment,
+    "AntiPalindromicSubstringCounting" : AntiPalindromicSubstringCounting_Environment,
+    "Axis_KCenter" : Axis_KCenter_Environment,
+    "BAJBytecomputer" : BAJBytecomputer_Environment,
+    "BannedPointSupersetPathCounting" : BannedPointSupersetPathCounting_Environment,
+    "BanyanHeart" : BanyanHeart_Environment,
+    "BEZMinimalistSecurity" : BEZMinimalistSecurity_Environment,
+    "BezoutIdentity" : BezoutIdentity_Environment,
+    "Binario" : Binario_Environment,
+    "Binario_NoAdjacencyRequirement" : Binario_NoAdjacencyRequirement_Environment,
+    "BinaryAlternation" : BinaryAlternation_Environment,
+    "BinaryLinearEquation_SolutionCounting" : BinaryLinearEquation_SolutionCounting_Environment,
+    "BinaryTreeLeafNumExpectation" : BinaryTreeLeafNumExpectation_Environment,
+    "BitEquationCounting" : BitEquationCounting_Environment,
+    "BitAndZero_PathCounting" : BitAndZero_PathCounting_Environment,
+    "BitwiseOperationSequenceCounting" : BitwiseOperationSequenceCounting_Environment,
+    "BlockImage" : BlockImage_Environment,
+    "BoundedAdjacencyDifference_Permutation_Counting" : BoundedAdjacencyDifference_Permutation_Counting_Environment,
+    "BoundedIntervalIntersection" : BoundedIntervalIntersection_Environment,
+    "BoundedMeanSubarrayCounting" : BoundedMeanSubarrayCounting_Environment,
+    "BoundedSubarrayCounting" : BoundedSubarrayCounting_Environment,
+    "BoxScheduling" : BoxScheduling_Environment,
+    "Bridge" : Bridge_Environment,
+    "BubbleSwapLowerBound_PermutationCounting" : BubbleSwapLowerBound_PermutationCounting_Environment,
+    "BucketSorting" : BucketSorting_Environment,
+    "CampfireParty" : CampfireParty_Environment,
+    "CampsitePuzzle" : CampsitePuzzle_Environment,
+    "Canon" : Canon_Environment,
+    "CantorExpansion" : CantorExpansion_Environment,
+    "CapitalCityEffect" : CapitalCityEffect_Environment,
+    "CardColoringCounting" : CardColoringCounting_Environment,
+    "CatalanNumberMod" : CatalanNumberMod_Environment,
+    "CheckAllCycleXorZero" : CheckAllCycleXorZero_Environment,
+    "ChoHamsters" : ChoHamsters_Environment,
+    "Cinema" : Cinema_Environment,
+    "Circuit" : Circuit_Environment,
+    "CirculatingDecimalCounting" : CirculatingDecimalCounting_Environment,
+    "CirculatingGrid" : CirculatingGrid_Environment,
+    "CleaningUp" : CleaningUp_Environment,
+    "ClearSymmetry" : ClearSymmetry_Environment,
+    "Clique_IndependentSet_Partitioning_Counting" : Clique_IndependentSet_Partitioning_Counting_Environment,
+    "CoinSquareGame" : CoinSquareGame_Environment,
+    "ColoringCounting" : ColoringCounting_Environment,
+    "CombinationOddSubsequenceCounting" : CombinationOddSubsequenceCounting_Environment,
+    "ConcatenationPartitionCountingSum" : ConcatenationPartitionCountingSum_Environment,
+    "CongruentEquation" : CongruentEquation_Environment,
+    "ConstructHackInterval" : ConstructHackInterval_Environment,
+    "ConvexHull" : ConvexHull_Environment,
+    "Cornfield" : Cornfield_Environment,
+    "CountdownEqual" : CountdownEqual_Environment, "CountdownClose" : CountdownClose_Environment,
+    "CowDanceShow" : CowDanceShow_Environment,
+    "CRT" : CRT_Environment,
+    "Cryptarithmetic" : Cryptarithmetic_Environment,
+    "Cube_FixedLocalMaximumCounting" : Cube_FixedLocalMaximumCounting_Environment,
+    "CycleCounting" : CycleCounting_Environment,
+    "DecreasingDigitCounting" : DecreasingDigitCounting_Environment,
+    "DegreeFixed_SpanningTree" : DegreeFixed_SpanningTree_Environment,
+    "DeltaMinPopcount" : DeltaMinPopcount_Environment,
+    "DeltaNimGame" : DeltaNimGame_Environment,
+    "DerangementExtension" : DerangementExtension_Environment,
+    "DifferenceConstraintSystem" : DifferenceConstraintSystem_Environment,
+    "DifferenceConstraintSystemDAG" : DifferenceConstraintSystemDAG_Environment,
+    "DifferentColorPairing" : DifferentColorPairing_Environment,
+    "Differentiate" : Differentiate_Environment,
+    "DigitLISCounting" : DigitLISCounting_Environment,
+    "DiscreteLogarithm" : DiscreteLogarithm_Environment,
+    "Disinfection" : Disinfection_Environment,
+    "DistinctArrayPermutation" : DistinctArrayPermutation_Environment,
+    "DistinctEdgeColoredCompleteGraphCounting" : DistinctEdgeColoredCompleteGraphCounting_Environment,
+    "Division" : Division_Environment,
+    "DivisorFlipExpectation" : DivisorFlipExpectation_Environment,
+    "DoubleCrossCounting" : DoubleCrossCounting_Environment,
+    "DoublePalindromicStringCounting" : DoublePalindromicStringCounting_Environment,
+    "DoubleStackSorting" : DoubleStackSorting_Environment,
+    "DynDynamite" : DynDynamite_Environment,
+    "EightDigitPuzzle" : EightDigitPuzzle_Environment,
+    "EmperorWorries" : EmperorWorries_Environment,
+    "EnergyStorageMeter" : EnergyStorageMeter_Environment,
+    "EuclidGame" : EuclidGame_Environment,
+    "EvenDegreeGraphPartitioning" : EvenDegreeGraphPartitioning_Environment,
+    "Expression_AddingParenthese_Counting" : Expression_AddingParenthese_Counting_Environment,
+    "FaceRightWay" : FaceRightWay_Environment,
+    "FactorialTrailingZeroCount" : FactorialTrailingZeroCount_Environment,
+    "FBI_BinaryTree" : FBI_BinaryTree_Environment,
+    "Fibonacci" : Fibonacci_Environment,
+    "FibonacciContainingCounting" : FibonacciContainingCounting_Environment,
+    "Fibtrain" : Fibtrain_Environment,
+    "FireworkShow" : FireworkShow_Environment,
+    "FixedModK_Selection_Counting" : FixedModK_Selection_Counting_Environment,
+    "FixedOneEdgeNum_SpanningTree" : FixedOneEdgeNum_SpanningTree_Environment,
+    "FractionalProgramming" : FractionalProgramming_Environment,
+    "FractionalProgramming_BipartiteGraphMatching" : FractionalProgramming_BipartiteGraphMatching_Environment,
+    "FutoshikiPuzzle" : FutoshikiPuzzle_Environment,
+    "GasFireExtinguishers" : GasFireExtinguishers_Environment,
+    "GaussianElimination" : GaussianElimination_Environment,
+    "GCDFibonacciProduct" : GCDFibonacciProduct_Environment,
+    "GcdLcmCounting" : GcdLcmCounting_Environment,
+    "GCDOne_Counting" : GCDOne_Counting_Environment,
+    "GCDPrime_Counting" : GCDPrime_Counting_Environment,
+    "GoldWashing" : GoldWashing_Environment,
+    "GraMinimaGame" : GraMinimaGame_Environment,
+    "GradeRankingCounting" : GradeRankingCounting_Environment,
+    "GraphContainTreeCounting" : GraphContainTreeCounting_Environment,
+    "GraphIsomorphism" : GraphIsomorphism_Environment,
+    "GridBFS" : GridBFS_Environment,
+    "GridColoringCounting" : GridColoringCounting_Environment,
+    "GridComponent" : GridComponent_Environment,
+    "GridLocalMinimumCounting" : GridLocalMinimumCounting_Environment,
+    "GridParityConstruction" : GridParityConstruction_Environment,
+    "GridTriangleCounting" : GridTriangleCounting_Environment,
+    "HalvingChainCounting" : HalvingChainCounting_Environment,
+    "HamiltonianPath" : HamiltonianPath_Environment,
+    "HamiltonianPathExistence" : HamiltonianPathExistence_Environment,
+    "HeapCounting" : HeapCounting_Environment,
+    "HitoriPuzzle" : HitoriPuzzle_Environment,
+    "HungryRabbit" : HungryRabbit_Environment,
+    "HURWarehouseStore" : HURWarehouseStore_Environment,
+    "ImpParty" : ImpParty_Environment,
+    "IndividualSumBounded_SequenceCounting" : IndividualSumBounded_SequenceCounting_Environment,
+    "IntegerFactorizationCounting" : IntegerFactorizationCounting_Environment,
+    "IntegerProgramming" : IntegerProgramming_Environment,
+    "Integral" : Integral_Environment,
+    "InversionPair" : InversionPair_Environment,
+    "InversionPairK_Counting" : InversionPairK_Counting_Environment,
+    "Josephus" : Josephus_Environment,
+    "JugPuzzle" : JugPuzzle_Environment,
+    "KPartition" : KPartition_Environment,
+    "Kakurasu" : Kakurasu_Environment,
+    "KiddingMe" : KiddingMe_Environment,
+    "KingSorting" : KingSorting_Environment,
+    "KloBlocks" : KloBlocks_Environment,
+    "Knapsack" : Knapsack_Environment,
+    "KnightsAndKnaves" : KnightsAndKnaves_Environment,
+    "KosDicing" : KosDicing_Environment,
+    "Kth_BinaryTree" : Kth_BinaryTree_Environment,
+    "Kth_SemiBalancedBracketSequence" : Kth_SemiBalancedBracketSequence_Environment,
+    "KthSubsequence" : KthSubsequence_Environment,
+    "KUR" : KUR_Environment,
+    "LampChanging" : LampChanging_Environment,
+    "LandAcquisition" : LandAcquisition_Environment,
+    "LandformGenerationCounting" : LandformGenerationCounting_Environment,
+    "LargestConvexPolygon" : LargestConvexPolygon_Environment,
+    "LargestRectangle_AmongPoints" : LargestRectangle_AmongPoints_Environment,
+    "LAS" : LAS_Environment,
+    "LASLaser" : LASLaser_Environment,
+    "LCM" : LCM_Environment,
+    "LDSTwo_Counting" : LDSTwo_Counting_Environment,
+    "LightUpPuzzle" : LightUpPuzzle_Environment,
+    "LinkBeads" : LinkBeads_Environment,
+    "LIS_LDS_Concatenation" : LIS_LDS_Concatenation_Environment,
+    "LIZ_Lollipop" : LIZ_Lollipop_Environment,
+    "Longest_DoublePalindrome" : Longest_DoublePalindrome_Environment,
+    "Longest_MatchingSubsequence" : Longest_MatchingSubsequence_Environment,
+    "LongestMaxDiffBoundedInterval" : LongestMaxDiffBoundedInterval_Environment,
+    "LongestPath" : LongestPath_Environment,
+    "Longest_RepeatedPalindrome" : Longest_RepeatedPalindrome_Environment,
+    "MafMafia" : MafMafia_Environment,
+    "MagicSquarePuzzle" : MagicSquarePuzzle_Environment,
+    "MakingGrade" : MakingGrade_Environment,
+    "Matrix_BinaryExponentiation" : Matrix_BinaryExponentiation_Environment,
+    "MatrixPermutation_BothDiagonalOne" : MatrixPermutation_BothDiagonalOne_Environment,
+    "MatrixPermutationEquivalence" : MatrixPermutationEquivalence_Environment,
+    "MatrixPermutation_MainDiagonalOne" : MatrixPermutation_MainDiagonalOne_Environment,
+    "MatrixPooling" : MatrixPooling_Environment,
+    "MatrixRMQCounting" : MatrixRMQCounting_Environment,
+    "MaxDifferentGroupPairDivision" : MaxDifferentGroupPairDivision_Environment,
+    "MaxGridPathIntersection" : MaxGridPathIntersection_Environment,
+    "MaxMinimum_AfterIntervalAddition" : MaxMinimum_AfterIntervalAddition_Environment,
+    "MaxMultSplit" : MaxMultSplit_Environment,
+    "MaxMultiplicationFixedSum" : MaxMultiplicationFixedSum_Environment,
+    "MaxNoConflictingBombs" : MaxNoConflictingBombs_Environment,
+    "Max_NonAdjacent_KElementSum" : Max_NonAdjacent_KElementSum_Environment,
+    "MaxPermutation" : MaxPermutation_Environment,
+    "MaxRMQExpectation" : MaxRMQExpectation_Environment,
+    "MaxSegmentCoverageConstraint" : MaxSegmentCoverageConstraint_Environment,
+    "MaxSumLDS" : MaxSumLDS_Environment,
+    "MaxThreeSquareSum" : MaxThreeSquareSum_Environment,
+    "Max_TreeConstrainedPermutation_Weight" : Max_TreeConstrainedPermutation_Weight_Environment,
+    "MaxTree_KPathCoverage" : MaxTree_KPathCoverahe_Environment,
+    "MaxTreeXorPath" : MaxTreeXorPath_Environment,
+    "MaxWeightPalindromicSubstring" : MaxWeightPalindromicSubstring_Environment,
+    "MaxXorPath" : MaxXorPath_Environment,
+    "MaxXorSet" : MaxXorSet_Environment,
+    "MaximumAchromaticNumber" : MaximumAchromaticNumber_Environment,
+    "MaximumClique" : MaximumClique_Environment,
+    "MaximumDivisor" : MaximumDivisor_Environment,
+    "MaximumIndependentSetGrid" : MaximumIndependentSetGrid_Environment,
+    "Maximum_IndependentSet_Tree" : Maximum_IndependentSet_Tree_Environment,
+    "MaximumLexicographicalOrderSubsequence" : MaximumLexicographicalOrderSubsequence_Environment,
+    "MaximumPointSegmentMatching" : MaximumPointSegmentMatching_Environment,
+    "Maximum_SubsequenceNum" : Maximum_SubsequenceNum_Environment,
+    "MaximumWeightMatching" : MaximumWeightMatching_Environment,
+    "Maze" : Maze_Environment,
+    "MinConversionToCycleCost" : MinConversionToCycleCost_Environment,
+    "MinCostReducingLNDS" : MinCostReducingLNDS_Environment,
+    "MinCostTreeCoverage" : MinCostTreeCoverage_Environment,
+    "MinCubeAssignment" : MinCubeAssignment_Environment,
+    "MinDivisionSumXor" : MinDivisionSumXor_Environment,
+    "MinInorderBinaryTree" : MinInorderBinaryTree_Environment,
+    "MinKDivisorNumber" : MinKDivisorNumber_Environment,
+    "MinNoSolutionLinearDiophantineEquation" : MinNoSolutionLinearDiophantineEquation_Environment,
+    "MinNonsubstring" : MinNonsubstring_Environment,
+    "MinPairSumMultiplicationPermutation" : MinPairSumMultiplicationPermutation_Environment,
+    "MinPathCover_DAG" : MinPathCover_DAG_Environment,
+    "MinSumChebyshevDistance" : MinSumChebyshevDistance_Environment,
+    "MinSumDistanceSquare" : MinSumDistanceSquare_Environment,
+    "MinSumPreXor" : MinSumPreXor_Environment,
+    "MinSwapTwoPermutations" : MinSwapTwoPermutations_Environment,
+    "MinXorPair" : MinXorPair_Environment,
+    "Minesweeping" : Minesweeping_Environment,
+    "MinimalCyclicShift" : MinimalCyclicShift_Environment,
+    "MinimumChromaticNumber" : MinimumChromaticNumber_Environment,
+    "MinimumChromaticNumber_SegmentOverlap" : MinimumChromaticNumber_SegmentOverlap_Environment,
+    "MinimumCost_MaximumFlow" : MinimumCost_MaximumFlow_Environment,
+    "Minimum_CrossingEdges_GraphPartition" : Minimum_CrossingEdges_GraphPartition_Environment,
+    "MinimumDirectedSpanningTree" : MinimumDirectedSpanningTree_Environment,
+    "Minimum_DominatingInterval" : Minimum_DominatingInterval_Environment,
+    "Minimum_DominatingSet" : Minimum_DominatingSet_Environment,
+    "Minimum_DominatingSet_Grid" : Minimum_DominatingSet_Grid_Environment,
+    "MinimumFibonacciRepresentation" : MinimumFibonacciRepresentation_Environment,
+    "MinimumHarmoniousChromaticNumber" : MinimumHarmoniousChromaticNumber_Environment,
+    "MinimumIntervalCoverage" : MinimumIntervalCoverage_Environment,
+    "Minimum_MaxAbsSlicer" : Minimum_MaxAbsSlicer_Environment,
+    "Minimum_MaxSlicer" : Minimum_MaxSlicer_Environment,
+    "MinimumRatioPath" : MinimumRatioPath_Environment,
+    "MinimumSpanningTree" : MinimumSpanningTree_Environment,
+    "MinimumSpanningTreeCounting" : MinimumSpanningTreeCounting_Environment,
+    "MinimumSteinerTree" : MinimumSteinerTree_Environment,
+    "MinimumSumDifferenceSubmatrix" : MinimumSumDifferenceSubmatrix_Environment,
+    "MinimumTreeWeightedDominatingAncestor" : MinimumTreeWeightedDominatingAncestor_Environment,
+    "MinimumUnconflictedGridKMax" : MinimumUnconflictedGridKMax_Environment,
+    "Minimum_VertexCover" : Minimum_VertexCover_Environment,
+    "MinimumWeightedSpanningTree" : MinimumWeightedSpanningTree_Environment,
+    "MitterTransportation" : MitterTransportation_Environment,
+    "MixedGraphEulerianCircuit" : MixedGraphEulerianCircuit_Environment,
+    "MoneyChargingGame" : MoneyChargingGame_Environment,
+    "MonochromeBlockCounting" : MonochromeBlockCounting_Environment,
+    "MonotonicStack" : MonotonicStack_Environment,
+    "MostComponentTreeRemovingTwoPaths" : MostComponentTreeRemovingTwoPaths_Environment,
+    "MostNumEdge_NonSelfIsomorphism" : MostNumEdge_NonSelfIsomorphism_Environment,
+    "MultiDrink" : MultiDrink_Environment,
+    "MultipleFlippingGame" : MultipleFlippingGame_Environment,
+    "Multiplication" : Multiplication_Environment,
+    "MYJ" : MYJ_Environment,
+    "NANDResultCounting" : NANDResultCounting_Environment,
+    "NegativeBase" : NegativeBase_Environment,
+    "NewNimGame" : NewNimGame_Environment,
+    "NextPalindromic" : NextPalindromic_Environment,
+    "NinePuzzle" : NinePuzzle_Environment,
+    "NoAdjacentGirlCounting" : NoAdjacentGirlCounting_Environment,
+    "NoDoubleTripleCounting" : NoDoubleTripleCounting_Environment,
+    "NotContainingStringCounting" : NotContainingStringCounting_Environment,
+    "NumberPartitionCounting" : NumberPartitionCounting_Environment,
+    "Numbrix" : Numbrix_Environment,
+    "OddVisitation" : OddVisitation_Environment,
+    "ODLDistance" : ODLDistance_Environment,
+    "PairMoreOneCounting" : PairMoreOneCounting_Environment,
+    "PalembangBridges" : PalembangBridges_Environment,
+    "PalindromePartitionCounting" : PalindromePartitionCounting_Environment,
+    "PalindromicSubstringNumberCounting" : PalindromicSubstringNumberCounting_Environment,
+    "PanSolarPanels" : PanSolarPanels_Environment,
+    "Path_NoGoingBack_Counting" : Path_NoGoingBack_Counting_Environment,
+    "Patrol" : Patrol_Environment,
+    "PCPPermutation" : PCPPermutation_Environment,
+    "PipelineArrangement" : PipelineArrangement_Environment,
+    "POLPolarization" : POLPolarization_Environment,
+    "PolyaModel" : PolyaModel_Environment,
+    "PolynomialFactorization" : PolynomialFactorization_Environment,
+    "PolynomialInterpolation" : PolynomialInterpolation_Environment,
+    "PolynomialMinimum" : PolynomialMinimum_Environment,
+    "PolynomialRemainder" : PolynomialRemainder_Environment,
+    "PowerCycle" : PowerCycle_Environment,
+    "PowerShortcut" : PowerShortcut_Environment,
+    "PowerNest" : PowerNest_Environment,
+    "PrefixConcatenation" : PrefixConcatenation_Environment,
+    "PrefixProductMODDistinctPermutation" : PrefixProductMODDistinctPermutation_Environment,
+    "PrefixSumMODDistinctPermutation" : PrefixSumMODDistinctPermutation_Environment,
+    "Prefixuffix" : Prefixuffix_Environment,
+    "PreorderTraversal" : PreorderTraversal_Environment,
+    "PrimeGraph_MinimumChromaticNumber" : PrimeGraph_MinimumChromaticNumber_Environment,
+    "ProtectingFlowers" : ProtectingFlowers_Environment,
+    "PythagoreanGraph_IndependentSetCounting" : PythagoreanGraph_IndependentSetCounting_Environment,
+    "QuadMagicItems" : QuadMagicItems_Environment,
+    "QuadraticFunctionSegmentation" : QuadraticFunctionSegmentation_Environment,
+    "QuantumLockPuzzle" : QuantumLockPuzzle_Environment,
+    "QueenPlacement" : QueenPlacement_Environment,
+    "RandomRangeMaxExpectation" : RandomRangeMaxExpectation_Environment,
+    "RangeConstrained_IncreasingSequence_Counting" : RangeConstrained_IncreasingSequence_Counting_Environment,
+    "RangeFourSequenceConstruction" : RangeFourSequenceConstruction_Environment,
+    "RangeShrinkingSequenceCounting" : RangeShrinkingSequenceCounting_Environment,
+    "RecursiveFunction" : RecursiveFunction_Environment,
+    "RecursiveSequenceSumConstruction" : RecursiveSequenceSumConstruction_Environment,
+    "RepeatSequenceLNDS" : RepeatSequenceLNDS_Environment,
+    "RootExtraction" : RootExtraction_Environment,
+    "RoundRobin" : RoundRobin_Environment,
+    "RoundTableAssignment" : RoundTableAssignment_Environment,
+    "RoyalLockCounting" : RoyalLockCounting_Environment,
+    "SaladBar" : SaladBar_Environment,
+    "SalesmanFatigue" : SalesmanFatigue_Environment,
+    "SameAdjacencyCounting" : SameAdjacencyCounting_Environment,
+    "SAT" : SAT_Environment,
+    "SCC_Sequence_Counting" : SCC_Sequence_Counting_Environment,
+    "SecretCowCode" : SecretCowCode_Environment,
+    "SegmentMinLengthEqual_Counting" : SegmentMinLengthEqual_Counting_Environment,
+    "SegmentTreeSortingCounting" : SegmentTreeSortingCounting_Environment,
+    "SelfPowerSequenceMOD" : SelfPowerSequenceMOD_Environment,
+    "SetCover" : SetCover_Environment,
+    "SetSplitting" : SetSplitting_Environment,
+    "SharedSubstringCounting" : SharedSubstringCounting_Environment,
+    "ShortestPath" : ShortestPath_Environment,
+    "ShortestPathCountConstruction" : ShortestPathCountConstruction_Environment,
+    "ShortestUnicolorSubstring" : ShortestUnicolorSubstring_Environment,
+    "SingingGirlStory" : SingingGirlStory_Environment,
+    "SingleStackSorting" : SingleStackSorting_Environment,
+    "SkaRockGarden" : SkaRockGarden_Environment,
+    "SkyscraperPuzzle" : SkyscraperPuzzle_Environment,
+    "SkyscraperSumPuzzle" : SkyscraperSumPuzzle_Environment,
+    "SlidingWindow" : SlidingWindow_Environment,
+    "SLOElephants" : SLOElephants_Environment,
+    "SmallestBinaryMultiple" : SmallestBinaryMultiple_Environment,
+    "SmallestCircle" : SmallestCircle_Environment,
+    "Sorting" : Sorting_Environment,
+    "SpiralMatrix" : SpiralMatrix_Environment,
+    "SplittingGame" : SplittingGame_Environment,
+    "SpyNetwork" : SpyNetwork_Environment,
+    "SquSquarks" : SquSquarks_Environment,
+    "SquareUndamagedPointCounting" : SquareUndamagedPointCounting_Environment,
+    "StarBattle" : StarBattle_Environment,
+    "StirlingSecond" : StirlingSecond_Environment,
+    "StoneGame" : StoneGame_Environment,
+    "StoneIntervalsGame" : StoneIntervalsGame_Environment,
+    "StringPartitionShuffle" : StringPartitionShuffle_Environment,
+    "StringReversalConstruction" : StringReversalConstruction_Environment,
+    "STUWell" : STUWell_Environment,
+    "StuntFlying" : StuntFlying_Environment,
+    "SubarraySumXor" : SubarraySumXor_Environment,
+    "SubarrayXorSum" : SubarrayXorSum_Environment,
+    "SubgraphIsomorphism" : SubgraphIsomorphism_Environment,
+    "SubmatrixSumDivisibleCounting" : SubmatrixSumDivisibleCounting_Environment,
+    "SubsequenceReversalLNDS" : SubsequenceReversalLNDS_Environment,
+    "SubsetSum" : SubsetSum_Environment,
+    "SubsetSumSequence" : SubsetSumSequence_Environment,
+    "Sudoku" : Sudoku_Environment,
+    "Sum_DivisorNum" : Sum_DivisorNum_Environment,
+    "SumGCD" : SumGCD_Environment,
+    "SumGCDWithIndividual" : SumGCDWithIndividual_Environment,
+    "SumLCM" : SumLCM_Environment,
+    "SumManhattan_CurvedSurface" : SumManhattan_CurvedSurface_Environment,
+    "SumMOD" : SumMOD_Environment,
+    "SumPHIInterval" : SumPHIInterval_Environment,
+    "SumProductDivisorNum" : SumProductDivisorNum_Environment,
+    "SumPseudoEuclidean" : SumPseudoEuclidean_Environment,
+    "SumSetMultiplication" : SumSetMultiplication_Environment,
+    "SumSpanningTreeGCD" : SumSpanningTreeGCD_Environment,
+    "SumTriangleArea" : SumTriangleArea_Environment,
+    "SumXorDivisorNum" : SumXorDivisorNum_Environment,
+    "SurvoPuzzle" : SurvoPuzzle_Environment,
+    "TakingPrimeGame" : TakingPrimeGame_Environment,
+    "TaskArrangement" : TaskArrangement_Environment,
+    "TetrisAttack" : TetrisAttack_Environment,
+    "ThreeStringCommonSubsequenceCounting" : ThreeStringCommonSubsequenceCounting_Environment,
+    "ThreeVertexCycleCounting" : ThreeVertexCycleCounting_Environment,
+    "TopologicalSort" : TopologicalSort_Environment,
+    "TopologicalSort_MinimalLexicographicalOrder" : TopologicalSort_MinimalLexicographicalOrder_Environment,
+    "Tournament_LongestPath" : Tournament_LongestPath_Environment,
+    "TransmissionDelay" : TransmissionDelay_Environment,
+    "TreeAddOneEdgeDiameter" : TreeAddOneEdgeDiameter_Environment,
+    "TreeCenter" : TreeCenter_Environment,
+    "TreeChangeOneEdgeDiameter" : TreeChangeOneEdgeDiameter_Environment,
+    "TreeColoring" : TreeColoring_Environment,
+    "Tree_DistanceEqualTriad_Counting" : Tree_DistanceEqualTriad_Counting_Environment,
+    "TreeDynamic_XORZeroPath" : TreeDynamic_XORZeroPath_Environment,
+    "TreeElimination_Expectation" : TreeElimination_Expectation_Environment,
+    "TreeEvenPartitioning" : TreeEvenPartitioning_Environment,
+    "TreeMaximumVisitedVertex" : TreeMaximumVisitedVertex_Environment,
+    "TreeRandomWalkExpectation" : TreeRandomWalkExpectation_Environment,
+    "TreeTopologicalSequenceCounting" : TreeTopologicalSequenceCounting_Environment,
+    "TriumphalArch" : TriumphalArch_Environment,
+    "TwiddlePuzzle" : TwiddlePuzzle_Environment,
+    "TwoSAT" : TwoSAT_Environment,
+    "TwoSet_AllCoprime_Counting" : TwoSet_AllCoprime_Counting_Environment,
+    "UndamagedSubmatrixCounting" : UndamagedSubmatrixCounting_Environment,
+    "ValueDiminishingSelection" : ValueDiminishingSelection_Environment,
+    "Vertex_KCenter" : Vertex_KCenter_Environment,
+    "VirusSynthesis" : VirusSynthesis_Environment,
+    "VisibleLine" : VisibleLine_Environment,
+    "WarehouseConstruction" : WarehouseConstruction_Environment,
+    "WeightedBinaryTree" : WeightedBinaryTree_Environment,
+    "WeightedLIS" : WeightedLIS_Environment,
+    "WhackAMole" : WhackAMole_Environment,
+    "WIL" : WIL_Environment,
+    "WYC" : WYC_Environment,
+    "WYRLevelingGround" : WYRLevelingGround_Environment,
+    "XorEquationCounting" : XorEquationCounting_Environment,
+    "ZeroPrefixSubsetCounting" : ZeroPrefixSubsetCounting_Environment,
+}

server/Gym/environments/ab_program_simulation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import ABProgramSimulation_Environment

server/Gym/environments/ab_program_simulation/environment.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class ABProgramSimulation_Environment(VerifiableEnvironment) : # Source : https://x.com/VictorTaelin/status/1776096481704804789
+    prompt_template = \
+r"""A::B is a system with 4 tokens: `A#`, `#A`, `B#` and `#B`.
+An A::B program is a sequence of tokens, e.g., `B# A# #B #A B#`.
+To *compute* a program, we must rewrite neighbor tokens, using the rules (whenever two neighbor tokens have their `#` facing each-other, they must be rewritten according to the corresponding rule) :
++ `A# #A` ... becomes ... `` (nothing)
++ `A# #B` ... becomes ... `#B A#`
++ `B# #A` ... becomes ... `#A B#`
++ `B# #B` ... becomes ...  `` (nothing)
+Please give the final state of the program: {program}
+An example for output format: `B# A# A#`
+"""
+    def __init__(self,
+                 wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0,
+                 **kwargs) :
+        """
+        Initialize the AB_Program_Simulation_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "correct_answer" : correct_answer,
+            "wrong_answer" : wrong_answer,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 1, "N should be greater than or equal to 1"
+        assert "max_steps" in self.parameter, "max_steps is required in parameter"
+        max_steps = self.parameter["max_steps"]
+        assert max_steps >= 1, "max_steps should be greater than or equal to 1"
+        while True :
+            distribution = [random.randint(1, N) for _ in range(4)]
+            distribution = [d / sum(distribution) for d in distribution]
+            self.parameter["program"] = [["A#", "#A", "B#", "#B"][i] for i in random.choices(range(4), distribution, k = N)]
+            current, final = self.parameter["program"].copy(), None
+            for step in range(max_steps) :
+                new_program = None
+                for i in range(len(current) - 1) :
+                    a, b = current[i], current[i + 1]
+                    if a == "A#" and b == "#A" :
+                        new_program = current[: i] + current[i + 2 :]
+                    elif a == "A#" and b == "#B" :
+                        new_program = current[: i] + ["#B", "A#"] + current[i + 2 :]
+                    elif a == "B#" and b == "#A" :
+                        new_program = current[: i] + ["#A", "B#"] + current[i + 2 :]
+                    elif a == "B#" and b == "#B" :
+                        new_program = current[: i] + current[i + 2 :]
+                    if new_program is not None:
+                        break
+                if new_program is None :
+                    final = current
+                    break
+                else :
+                    current = new_program
+            if final is not None :
+                self.parameter["reference_answer"] = " ".join(final)
+                self.parameter["gold_answer"] = final
+                break
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(program = " ".join(self.parameter["program"]))
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                answer_array = answer.split()
+                return answer_array
+            except ValueError :
+                return None # Invalid answer format
+        else :
+            return None # Invalid answer format
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            assert isinstance(processed_result, list), "processed_result should be a list"
+            if not all(token in ("A#", "#A", "B#", "#B") for token in processed_result) :
+                return self.rewards["wrong_format"]
+            if processed_result == self.parameter["gold_answer"] :
+                return self.rewards["correct_answer"]
+            else :
+                return self.rewards["wrong_answer"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/add_multiple_divisible_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import AddMultiple_Divisible_Counting_Environment

server/Gym/environments/add_multiple_divisible_counting/environment.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import math
+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class AddMultiple_Divisible_Counting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4466
+    prompt_template = \
+r"""Please compute the number of pairs (a, b) such that:
+- 1 ≤ a < b ≤ {N}
+- a × b is divisible by a + b
+**Output Format:** Your final answer should be a single integer — the number of such pairs (a, b)."""
+    def __init__(self,
+                 wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
+                 **kwargs) :
+        """
+        Initialize the AddMultiple_Divisible_Counting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
+        MAX_N = self.parameter["MAX_N"]
+        assert MAX_N >= 6, "MAX_N should be greater than or equal to 6"
+        N = self.parameter["N"] = random.randint(6, MAX_N)
+        def calc(x : int, y : int) -> int :
+            """
+            Compute
+                sum_{k = x+1..2*x-1} floor(y / k)
+            by grouping k’s with the same quotient.
+            """
+            if y == 0 :
+                return 0
+            a = 0
+            z = x << 1
+            i = x + 1
+            while i < z :
+                q = y // i
+                if q == 0 :
+                    break
+                j = min(y // q, z - 1)
+                a += (j - i + 1) * q
+                i = j + 1
+            return a
+        m = math.isqrt(N)
+        mu = [0] * (m + 1)
+        mu[1] = 1
+        is_comp = [False] * (m + 1)
+        primes = []
+        for i in range(2, m + 1) :
+            if not is_comp[i] :
+                primes.append(i)
+                mu[i] = -1
+            for p in primes :
+                ip = i * p
+                if ip > m :
+                    break
+                is_comp[ip] = True
+                if i % p == 0 :
+                    mu[ip] = 0
+                    break
+                else :
+                    mu[ip] = -mu[i]
+        ans = 0
+        for i in range(1, m + 1) :
+            if mu[i] == 0 :
+                continue
+            ii = i * i
+            top = m // i
+            for j in range(1, top + 1) :
+                y = N // (ii * j)
+                ans += mu[i] * calc(j, y)
+        assert ans > 0, "Answer should be greater than 0"
+        self.parameter["reference_answer"] = ans
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(N = self.parameter["N"])
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if processed_result <= 0 :
+                return self.rewards["wrong_format"]
+            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
+                a, b = self.parameter["reference_answer"], processed_result
+                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/addition_table/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import AdditionTable_Environment

server/Gym/environments/addition_table/environment.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import random
+from typing import Optional, Dict
+from ...environment import VerifiableEnvironment
+class AdditionTable_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P1013
+    prompt_template = \
+r"""You are given an unknown base-N number system (N is an integer ≥ 3), and {N} distinct digits {ALL_LETTERS} in that system. The digits satisfy the following equations in base-N:
+{EQUATIONS}
+Note:
+- {ALL_LETTERS} are distinct digits in the range [0, N−1].
+- Expressions like ba represent base-N numbers formed by **concatenation**. For example, if a=1 and b=2, then ba = "21" in base-N.
+Your task is to find the correct base N (in decimal), and the values of {ALL_LETTERS} (also in decimal) that satisfy all the equations.
+Output Format:
+Your final answer should be a single line containing N, {ALL_LETTERS} (all in decimal), separated by **spaces**.
+Example: `{N_plus_1} {EXAMPLE_1}` (do **NOT** include the backticks or quotes); this means N={N_plus_1}, {EXAMPLE_2}.
+"""
+    def __init__(self,
+                 wrong_format : float = -1.0, invalid_answer : float = -0.5, wrong_N : float = 0.0, rewarding_strategy : str = "mean([gold=answer])^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 3.0,
+                 **kwargs) :
+        """
+        Initialize the AdditionTable_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_answer" : invalid_answer,
+            "wrong_N" : wrong_N,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N in range(3, 26 + 1), "N should be in the range [3, 26]"
+        digit2letter = self.parameter["digit2letter"] = [chr(i) for i in range(97, 97 + N)]
+        random.shuffle(digit2letter)
+        letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)}
+        self.parameter["reference_answer"] = "{} {}".format(N, " ".join([str(letter2digit[chr(i)]) for i in range(97, 97 + N)]))
+    def convert_to_expression(self, n : int) -> str :
+        N = self.parameter["N"]
+        if n == 0 :
+            return self.parameter["digit2letter"][0]
+        else :
+            expression = ""
+            while n > 0 :
+                digit = n % N
+                expression = self.parameter["digit2letter"][digit] + expression
+                n //= N
+            return expression
+    def _prompt_generate(self) -> str :
+        N = self.parameter["N"]
+        ALL_LETTERS = ", ".join([chr(i) for i in range(97, 97 + N)])
+        digit2letter = self.parameter["digit2letter"]
+        letter2digit = {letter : digit for digit, letter in enumerate(digit2letter)}
+        EQUATIONS = []
+        for a_ascii in range(97, 97 + N) :
+            for b_ascii in range(a_ascii, 97 + N) :
+                a = chr(a_ascii)
+                b = chr(b_ascii)
+                EQUATIONS.append("{} + {} = {}".format(a, b, self.convert_to_expression(letter2digit[a] + letter2digit[b])))
+        EQUATIONS = "\n".join(EQUATIONS)
+        return self.prompt_template.format(
+            ALL_LETTERS = ALL_LETTERS,
+            EQUATIONS = EQUATIONS,
+            N = N,
+            N_plus_1 = N + 1,
+            EXAMPLE_1 = " ".join([str(_) for _ in range(N)]),
+            EXAMPLE_2 = ", ".join(["{}={}".format(chr(i), i - 97) for i in range(97, 97 + N)]),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[Dict] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                answer_array = list(map(int, answer.split()))
+                if len(answer_array) != self.parameter["N"] + 1 :
+                    return dict()
+                N = answer_array[0]
+                digits = answer_array[1 :]
+                return dict(N = N, digits = digits)
+            except ValueError :
+                return dict()
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if not processed_result :
+                return self.rewards["invalid_answer"]
+            N = processed_result["N"]
+            if N != self.parameter["N"] :
+                return self.rewards["wrong_N"]
+            predict_digits = processed_result["digits"]
+            assert len(predict_digits) == N, "digits should have the same length as N"
+            letter2digit = {letter : digit for digit, letter in enumerate(self.parameter["digit2letter"])}
+            assert len(letter2digit) == N, "letter2digit should have the same length as N"
+            gold_digits = [letter2digit[chr(i)] for i in range(97, 97 + N)]
+            if self.rewards["rewarding_strategy"] == "mean([gold=answer])^beta" :
+                return self.rewards["rewarding_weight"] * ((sum(float(a == b) for a, b in zip(gold_digits, predict_digits)) / N) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * all(a == b for a, b in zip(gold_digits, predict_digits))
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/almost_complete_graph_cycle_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import AlmostCompleteGraphCycleCounting_Environment

server/Gym/environments/almost_complete_graph_cycle_counting/environment.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class AlmostCompleteGraphCycleCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3862
+    prompt_template = \
+r"""Consider a graph with {N} vertices labeled from 1 to {N}. Every pair of vertices is connected by an undirected edge, except for the edge between vertices 1 and {N} (so the graph has {N} × ({N} - 1) / 2 - 1 edges).
+What's the number of **simple cycles** in this graph? A simple cycle must:
+- Have at least 3 vertices,
+- Contain no repeated vertices or edges,
+- Be considered the same as any cycle with the same set of edges (regardless of order or starting point); for example, `(1, 2, 3, 4)` and `(2, 1, 4, 3)` are the same, but `(1, 2, 3, 4)` and `(2, 1, 3, 4)` are different.
+Output the answer modulo {MOD}."""
+    def __init__(self,
+                 max_MOD : int = 1000000,
+                 wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
+                 **kwargs) :
+        """
+        Initialize the AlmostCompleteGraphCycleCounting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.max_MOD = max_MOD
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "wrong_range" : wrong_range,
+            "correct_answer" : correct_answer,
+            "wrong_answer" : wrong_answer,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
+        MAX_N = self.parameter["MAX_N"]
+        assert MAX_N >= 4, "MAX_N should be greater than or equal to 4"
+        N = self.parameter["N"] = random.randint(4, MAX_N)
+        MOD = self.parameter["MOD"] = 2 * random.randint(1, self.max_MOD // 2) + 1
+        INV2 = (MOD + 1) // 2
+        def calc(x, y, s, N):
+            """
+            x: current count of cycles for K_s
+            y: current count of paths of length 1 (one edge) in K_s
+            s: starting i value (we've precomputed up to K_s)
+            N: target N
+            """
+            for i in range(s, N):
+                # compute ((i-1)*(i-2)/2) % MOD efficiently
+                half = ((i - 1) % MOD) * ((i - 2) % MOD) % MOD * INV2 % MOD
+                x = (x + y * half) % MOD
+                y = (y * ((i - 2) % MOD) + 1) % MOD
+            # finally add the contribution for closing the cycle at N
+            half_n = ((N - 2) % MOD) * ((N - 3) % MOD) % MOD * INV2 % MOD
+            return (x + y * half_n) % MOD
+        if N <= 3 :
+            self.parameter["reference_answer"] = 0
+        else :
+            self.parameter["reference_answer"] = calc(1, 2, 4, N)
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(N = self.parameter["N"], MOD = self.parameter["MOD"])
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if not (0 <= processed_result < self.parameter["MOD"]) :
+                return self.rewards["wrong_range"]
+            if processed_result == self.parameter["reference_answer"] :
+                return self.rewards["correct_answer"]
+            else :
+                return self.rewards["wrong_answer"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/and_or_sequence_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import AndOr_Sequence_Counting_Environment

server/Gym/environments/and_or_sequence_counting/environment.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class AndOr_Sequence_Counting_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""You are given an integer array `A` of length {N}:
+{A}
+Please count the number of valid integer arrays `B` of length {N} that satisfy the following conditions:
+- For all indices 0 <= i <= {N_minus_1}, the value B[i] must be in the range: 0 <= B[i] < 2^{M} = {power_2_M}
+- For all indices 0 <= i < {N_minus_1}, the following bitwise conditions hold:
+  - (A[i] & B[i]) <= (A[i + 1] & B[i + 1])
+  - (A[i] | B[i]) >= (A[i + 1] | B[i + 1])
+  - (Here, `&` is the bitwise AND operator and `|` is the bitwise OR operator.)
+**Output Format:** Your final answer should be a single integer — the number of valid arrays `B` that satisfy all the above conditions."""
+    def __init__(self,
+                 wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
+                 **kwargs) :
+        """
+        Initialize the AndOr_Sequence_Counting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 2, "N should be greater than or equal to 2"
+        assert "M" in self.parameter, "M is required in parameter"
+        M = self.parameter["M"]
+        assert M >= 1, "M should be greater than or equal to 1"
+        A = self.parameter["A"] = [random.randint(0, 2 ** M - 1) for i in range(N)]
+        def dp1(N, M, A) :
+            F = [[[0] * N for _ in range(N)] for _ in range(2)]
+            for l in range(N) :
+                for r in range(l, N) :
+                    F[1][l][r] = 1
+            for b in range(M + 1) :
+                now = b % 2
+                lst = now ^ 1
+                for i in range(N) :
+                    for j in range(N) :
+                        F[now][i][j] = 0
+                Pre = [0] * (N + 1)
+                for i in range(1, N + 1) :
+                    Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1)
+                for l in range(N) :
+                    for r in range(l, N) :
+                        for x in range(l - 1, r + 1) :
+                            if Pre[r + 1] - Pre[x + 1] != (r - x) :
+                                continue
+                            left_count  = F[lst][l][x]   if x   >= l else 1
+                            right_count = F[lst][x + 1][r] if x+1 <= r else 1
+                            F[now][l][r] += left_count * right_count
+            return F[M % 2][0][N - 1]
+        def dp2(N, M, A) :
+            F = [[[0] * N for _ in range(N)] for _ in range(2)]
+            for l in range(N) :
+                for r in range(l, N) :
+                    F[1][l][r] = 1
+            for b in range(M + 1) :
+                now = b % 2
+                lst = now ^ 1
+                for i in range(N) :
+                    for j in range(N) :
+                        F[now][i][j] = 0
+                Pre = [0] * (N + 1)
+                for i in range(1, N + 1) :
+                    Pre[i] = Pre[i - 1] + ((A[i - 1] >> b) & 1)
+                for l in range(N) :
+                    for r in range(l, N) :
+                        for x in range(l - 1, r + 1) :
+                            if Pre[r + 1] - Pre[x + 1] != 0:
+                                continue
+                            left_count  = F[lst][l][x] if x >= l else 1
+                            right_count = F[lst][x + 1][r] if x + 1 <= r else 1
+                            F[now][l][r] += left_count * right_count
+            return F[M % 2][0][N - 1]
+        self.parameter["reference_answer"] = dp1(N, M - 1, A) * dp2(N, M - 1, A)
+    def _prompt_generate(self) -> str :
+        N, M = self.parameter["N"], self.parameter["M"]
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            N_minus_1 = self.parameter["N"] - 1,
+            M = self.parameter["M"],
+            power_2_M = 2 ** self.parameter["M"],
+            A = " ".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if processed_result < 0 :
+                return self.rewards["wrong_format"]
+            if self.parameter["reference_answer"] == 0 :
+                return self.rewards["rewarding_weight"] * (processed_result == 0)
+            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
+                a, b = self.parameter["reference_answer"], processed_result
+                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/anti_palindromic_substring_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import AntiPalindromicSubstringCounting_Environment

server/Gym/environments/anti_palindromic_substring_counting/environment.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class AntiPalindromicSubstringCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3501
+    prompt_template = \
+r"""We define an **anti-palindromic binary string** as a binary string such that its reverse is equal to the bitwise complement of the original string (i.e., '0' becomes '1' and '1' becomes '0'). For example, `000111` is anti-palindromic because its reverse is `111000`, which is the bitwise complement of `000111`. But `1001` is not, because its reverse is `1001`, while its flipped version is `0110`.
+You are given a binary string: {S}
+Please count the number of **contiguous substrings** of `S` that are anti-palindromic. Two substrings are considered different if they appear at different positions in `S`. Output a single integer — the number of anti-palindromic substrings."""
+    def __init__(self,
+                 wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
+                 **kwargs) :
+        """
+        Initialize the AntiPalindromicSubstringCounting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 3, "N should be greater than or equal to 3"
+        endpoints = random.sample(range(1, N), random.randint(0, N - 1))
+        endpoints.sort()
+        endpoints = [0] + endpoints + [N]
+        one_probability = random.random()
+        S = ""
+        for i in range(len(endpoints) - 1) :
+            length = endpoints[i + 1] - endpoints[i]
+            if length % 2 == 0 :
+                half = "".join("1" if random.random() < one_probability else "0" for _ in range(length // 2))
+                S += half + "".join("1" if c == "0" else "0" for c in reversed(half))
+            else :
+                S += "".join("1" if random.random() < one_probability else "0" for _ in range(length))
+        self.parameter["S"] = S
+        assert len(S) == N, f"Generated string length {len(S)} does not match N {N}"
+        # Build the “S” array from the C++:
+        #   S[0] = '$', S[1] = '#', then for each char: c, '#', and finally a trailing '$'
+        T = ['$','#']
+        for c in S:
+            T.append(c)
+            T.append('#')
+        T.append('$')
+        length = len(T)
+        tot = length - 2   # corresponds to C++ `tot` (1 + 2*N)
+        # P[i] will hold the Manacher‐style radius at center i
+        P = [0] * length
+        # inversion map for the 0/1 bits and the separator '#'
+        inv = {'0':'1', '1':'0', '#':'#'}
+        pos = 1   # center of the rightmost-reaching antisymmetry
+        mx  = 1   # its right boundary = pos + P[pos]
+        ans = 0
+        # only odd i (the '#' positions) correspond to even‐length substrings
+        for i in range(1, tot+1, 2):
+            if i < mx:
+                mirror = 2*pos - i
+                # same as: len[i] = min(mx - i, len[mirror])
+                P[i] = min(mx - i, P[mirror])
+            else:
+                P[i] = 1
+            # expand as long as T[i + P] == inv[T[i - P]]
+            while True:
+                left = i - P[i]
+                right = i + P[i]
+                # boundary guard
+                if left < 0 or right >= length:
+                    break
+                # must both be in our inv‐map (i.e. '#','0','1')
+                cL = T[left]
+                cR = T[right]
+                if cL not in inv or cR not in inv:
+                    break
+                if cR == inv[cL]:
+                    P[i] += 1
+                else:
+                    break
+            # update the farthest-reaching center
+            if i + P[i] > mx:
+                mx  = i + P[i]
+                pos = i
+            # each full two‐step in the radius == one antisymmetric substring
+            ans += (P[i] >> 1)
+        self.parameter["reference_answer"] = ans
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(S = self.parameter["S"])
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if processed_result < 0 :
+                return self.rewards["wrong_format"]
+            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
+                if self.parameter["reference_answer"] == 0 :
+                    return self.rewards["rewarding_weight"] * int(processed_result == 0)
+                a, b = self.parameter["reference_answer"], processed_result
+                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/axis_k_center/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import Axis_KCenter_Environment

server/Gym/environments/axis_k_center/environment.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class Axis_KCenter_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/K4767
+    prompt_template = \
+r"""You are given {N} points on a line, labeled from 0 to {N_minus_1}. Their positions (from left to right) are: {X}
+Please select a set of {K} distinct points. Try your best to minimize the total distance from all points to their nearest selected point (the distance is the absolute difference between positions).
+**Output Format:** Your final answer should be a single line containing the indices of the selected {K} points in any order, separated by spaces."""
+    def __init__(self,
+                 position_multiple : int = 5,
+                 wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
+                 **kwargs) :
+        """
+        Initialize the Axis_KCenter_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.position_multiple = position_multiple
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_solution" : invalid_solution,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 3, "N should be greater than or equal to 3"
+        K = self.parameter["K"] = random.randint(1, N - 1)
+        X = self.parameter["X"] = random.sample(range(N * self.position_multiple + 1), N)
+        X.sort()
+        INF = N * (X[-1] - X[0] + 1)
+        # Krecompute w[l][r]: cost of one post office for villages l..r (inclusive, 0-indexed)
+        w = [[0] * N for _ in range(N)]
+        for l in range(N):
+            for r in range(l + 1, N):
+                m = (l + r) // 2
+                w[l][r] = w[l][r - 1] + (X[r] - X[m])
+        # dp[i][j]: minimum total distance covering the first i villages with j post offices
+        dp = [[INF] * (K + 1) for _ in range(N + 1)]
+        # d[i][j]: the k giving the optimum for dp[i][j], for Knuth optimization
+        d = [[0] * (K + 1) for _ in range(N + 2)]
+        dp[0][0] = 0
+        for j in range(1, K + 1):
+            d[N + 1][j] = N
+            for i in range(N, 0, -1):
+                best = INF
+                argk = 0
+                start = d[i][j - 1]
+                end = d[i + 1][j]
+                if end > i - 1:
+                    end = i - 1
+                for k in range(start, end + 1):
+                    cost = dp[k][j - 1] + w[k][i - 1]
+                    if cost < best:
+                        best = cost
+                        argk = k
+                dp[i][j] = best
+                d[i][j] = argk
+        # Output the result: all N villages with K post offices
+        self.parameter["gold_answer"] = dp[N][K]
+    def _prompt_generate(self) -> str :
+        N = self.parameter["N"]
+        return self.prompt_template.format(
+            N = N,
+            N_minus_1 = N - 1,
+            K = self.parameter["K"],
+            X = " ".join(map(str, self.parameter["X"])),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                answer_array = list(map(int, answer.split()))
+                return answer_array
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            assert isinstance(processed_result, list), "processed_result should be a list"
+            selected_points = processed_result
+            if len(selected_points) != len(set(selected_points)) :
+                return self.rewards["invalid_solution"]
+            if len(selected_points) != self.parameter["K"] :
+                return self.rewards["invalid_solution"]
+            if not all(0 <= u < self.parameter["N"] for u in selected_points) :
+                return self.rewards["invalid_solution"]
+            answer = sum(min(abs(self.parameter["X"][u] - self.parameter["X"][v]) for v in selected_points) for u in range(self.parameter["N"]))
+            gold = self.parameter["gold_answer"]
+            assert gold <= answer, "gold should be less than or equal to answer"
+            if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
+                return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (gold == answer)
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/baj_bytecomputer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BAJBytecomputer_Environment

server/Gym/environments/baj_bytecomputer/environment.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class BAJBytecomputer_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3558
+    prompt_template = \
+r"""You are given an array X of length {N}, where each element is initially -1, 0, or +1: {X}
+You may perform the following operation any number of times: choose an index i (1 ≤ i < {N}), and update X[i + 1] := X[i + 1] + X[i]. Your goal is to make the array non-decreasing, i.e., X[1] ≤ X[2] ≤ ... ≤ X[{N}]; please output the **minimum number of operations** required to achieve this."""
+    def __init__(self,
+                 wrong_format : float = -1.0, correct_answer : float = 1.0, incorrect_answer : float = 0.0,
+                 **kwargs):
+        """
+        Initialize the BAJBytecomputer_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format": wrong_format,
+            "correct_answer": correct_answer,
+            "incorrect_answer": incorrect_answer,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 3, "N should be greater than or equal to 3"
+        while True :
+            distribution = [random.randint(1, N) for _ in range(3)]
+            X = self.parameter["X"] = [random.choices([-1, 0, 1], weights = distribution)[0] for _ in range(N)]
+            # Compute a suitable "infinity" based on the maximum possible operations:
+            # At most 2 operations per element (for N-1 transitions), so 2*N + a small buffer
+            INF = 2 * N + 5
+            # The three possible values after operations
+            val = [-1, 0, 1]
+            # dp[j] = minimum operations to make the previous element equal to val[j]
+            # Initialize for the first element
+            prev = [INF] * 3
+            prev[X[0] + 1] = 0
+            # Iterate through the sequence
+            for i in range(1, N):
+                curr = [INF] * 3
+                x = X[i]
+                for j in range(3):
+                    ops_so_far = prev[j]
+                    if ops_so_far >= INF:
+                        continue
+                    prev_val = val[j]
+                    # 0 operations on x: new_x = x
+                    new_x = x
+                    if new_x >= prev_val:
+                        curr[new_x + 1] = min(curr[new_x + 1], ops_so_far)
+                    # 1 operation on x: new_x = x + prev_val
+                    new_x = x + prev_val
+                    if -1 <= new_x <= 1 and new_x >= prev_val:
+                        curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 1)
+                    # 2 operations on x: new_x = x + 2 * prev_val
+                    new_x = x + 2 * prev_val
+                    if -1 <= new_x <= 1 and new_x >= prev_val:
+                        curr[new_x + 1] = min(curr[new_x + 1], ops_so_far + 2)
+                prev = curr
+            # The answer is the minimum operations to end with any of {-1,0,1}
+            ans = min(prev)
+            if ans < INF:
+                self.parameter["reference_answer"] = ans
+                break
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            X = ", ".join("X[{}]={}".format(i + 1, Xi) for i, Xi in enumerate(self.parameter["X"])),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if processed_result == self.parameter["reference_answer"] :
+                return self.rewards["correct_answer"]
+            else :
+                return self.rewards["incorrect_answer"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/banned_point_superset_path_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BannedPointSupersetPathCounting_Environment

server/Gym/environments/banned_point_superset_path_counting/environment.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class BannedPointSupersetPathCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3734
+    prompt_template = \
+r"""In a three-dimensional space, you start at point (0, 0, 0) and want to reach the point ({N}, {M}, {R}). At each step, if you are currently at (x, y, z), you may move to a new (different from the current one) point of one of the following types:
+1. (x', y, z) such that x AND x' = x
+2. (x, y', z) such that y AND y' = y
+3. (x, y, z') such that z AND z' = z
+(AND refers to the bitwise AND operation.)
+You are **not allowed** to visit any of the following points:
+{obstacles}
+Please count the number of distinct valid paths from (0, 0, 0) to ({N}, {M}, {R}) that avoid all forbidden points. Output the result modulo {MOD}."""
+    def __init__(self,
+                 max_MOD : int = 10000,
+                 wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
+                 **kwargs) -> None:
+        """
+        Initialize the BannedPointSupersetPathCounting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.max_MOD = max_MOD
+        self.rewards = {
+            "wrong_format": wrong_format,
+            "wrong_range": wrong_range,
+            "correct_answer": correct_answer,
+            "wrong_answer": wrong_answer,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N_M_R" in self.parameter, "MAX_N_M_R is required in parameter"
+        MAX_N_M_R = self.parameter["MAX_N_M_R"]
+        assert MAX_N_M_R >= 1, "MAX_N_M_R should be greater than or equal to 1"
+        while True :
+            N, M, R = self.parameter["N"], self.parameter["M"], self.parameter["R"] = random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R), random.randint(0, MAX_N_M_R)
+            if (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2 >= 1 :
+                break
+        assert "MAX_O" in self.parameter, "MAX_O is required in parameter"
+        MAX_O = self.parameter["MAX_O"]
+        assert MAX_O >= 1, "MAX_O should be greater than or equal to 1"
+        MAX_O = min(MAX_O, (2 ** N.bit_count()) * (2 ** M.bit_count()) * (2 ** R.bit_count()) - 2)
+        O = self.parameter["O"] = random.randint(1, MAX_O)
+        def convert_to_bits(x) -> List[int] :
+            result = []
+            bit = 1
+            while bit <= x :
+                if x & bit :
+                    result.append(bit)
+                bit <<= 1
+            return result
+        N_bits, M_bits, R_bits = convert_to_bits(N), convert_to_bits(M), convert_to_bits(R)
+        def random_subset(bits : List[int]) -> int :
+            bits = random.sample(bits, random.randint(0, len(bits)))
+            return sum(bits)
+        obstacles = set()
+        while len(obstacles) < O :
+            x, y, z = random_subset(N_bits), random_subset(M_bits), random_subset(R_bits)
+            if (x, y, z) != (0, 0, 0) and (x, y, z) != (N, M, R) and (x, y, z) not in obstacles:
+                obstacles.add((x, y, z))
+        obstacles = list(obstacles)
+        random.shuffle(obstacles)
+        self.parameter["obstacles"] = obstacles.copy()
+        MOD = self.parameter["MOD"] = random.randint(2, self.max_MOD)
+        points = [(0, 0, 0)] + obstacles
+        points.sort()  # lex order by x, then y, then z
+        points.append((N, M, R))
+        total = len(points)
+        # Determine needed bit‐count dimensions
+        dx = N.bit_count()
+        dy = M.bit_count()
+        dz = R.bit_count()
+        max_d = max(dx, dy, dz)
+        # Precompute binomial coefficients up to max_d
+        binom = [[0] * (max_d + 1) for _ in range(max_d + 1)]
+        for i in range(max_d + 1):
+            binom[i][0] = 1
+            for j in range(1, i + 1):
+                binom[i][j] = (binom[i - 1][j - 1] + binom[i - 1][j]) % MOD
+        # Precompute f[x][y][z]: number of ways from (0,0,0) to a diff‐vector with
+        # x one‐bit‐flips in X, y flips in Y, z flips in Z (ignoring obstacles).
+        f = [[[0] * (dz + 1) for _ in range(dy + 1)] for __ in range(dx + 1)]
+        f[0][0][0] = 1
+        for x in range(dx + 1):
+            for y in range(dy + 1):
+                for z in range(dz + 1):
+                    if x == y == z == 0:
+                        continue
+                    val = 0
+                    # transitions increasing X
+                    for i in range(x):
+                        val = (val + f[i][y][z] * binom[x][i]) % MOD
+                    # transitions increasing Y
+                    for j in range(y):
+                        val = (val + f[x][j][z] * binom[y][j]) % MOD
+                    # transitions increasing Z
+                    for k in range(z):
+                        val = (val + f[x][y][k] * binom[z][k]) % MOD
+                    f[x][y][z] = val
+        # DP over the sorted points
+        # g[i] = (−1) * sum_{j < i, p[j] ⊆ p[i]} g[j] * f[ popcount differences ]
+        g = [0] * total
+        g[0] = 1  # only one way to stay at the origin
+        for i in range(1, total):
+            xi, yi, zi = points[i]
+            acc = 0
+            for j in range(i):
+                xj, yj, zj = points[j]
+                # check subset on all three coordinates
+                if (xj & xi) == xj and (yj & yi) == yj and (zj & zi) == zj:
+                    bx = (xi ^ xj).bit_count()
+                    by = (yi ^ yj).bit_count()
+                    bz = (zi ^ zj).bit_count()
+                    acc = (acc + g[j] * f[bx][by][bz]) % MOD
+            g[i] = (-acc) % MOD
+        # The answer is -g[last] mod MOD, which recovers the positive sum
+        self.parameter["reference_answer"] = (-g[-1]) % MOD
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            M = self.parameter["M"],
+            R = self.parameter["R"],
+            obstacles = "\n".join("({}, {}, {})".format(x, y, z) for x, y, z in self.parameter["obstacles"]),
+            MOD = self.parameter["MOD"],
+        )
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if not (0 <= processed_result < self.parameter["MOD"]) :
+                return self.rewards["wrong_range"]
+            if processed_result == self.parameter["reference_answer"] :
+                return self.rewards["correct_answer"]
+            else :
+                return self.rewards["wrong_answer"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/banyan_heart/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BanyanHeart_Environment

server/Gym/environments/banyan_heart/environment.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import random
+import networkx
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class BanyanHeart_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""We use the following process to generate a tree with {N} vertices labeled from 1 to {N}:
+- Initially, the tree contains only vertex 1, and its **heart vertex** is also 1.
+- At each step, we add a new vertex `i` (2 ≤ i ≤ {N}) and connect it to an existing vertex with an undirected edge. Then, the heart vertex moves one step toward `i` (i.e., it moves to the neighbor that is closer to `i`).
+- This process continues until all {N} vertices have been added.
+The final tree has the following edges:
+{edges}
+Can you determine which vertices could be the heart vertex after the process is completed? Output a single line with {N} characters (either `T` or `F`) without separators, where the i-th character is `T` if vertex i can be the heart vertex, and `F` otherwise."""
+    def __init__(self,
+                 wrong_format : float = -1.0, rewarding_strategy : str = "(intersection/union)^beta", rewarding_beta : float = 5.0, rewarding_weight : float = +1.0,
+                 **kwargs) :
+        """
+        Initialize the BanyanHeart_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_beta" : rewarding_beta,
+            "rewarding_weight" : rewarding_weight,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 4, "N should be greater than or equal to 4"
+        edges = self.parameter["edges"] = []
+        permutations = list(range(1, N + 1))
+        random.shuffle(permutations)
+        for index, vertex in enumerate(permutations) :
+            if index == 0 :
+                continue
+            u, v = vertex, random.choice(permutations[: index])
+            u, v = min(u, v), max(u, v)
+            edges.append((u, v))
+        random.shuffle(edges)
+        for u, v in edges :
+            assert 1 <= u < v <= N
+        assert len(edges) == len(set(edges)) == N - 1
+        tree = networkx.Graph()
+        tree.add_edges_from(edges)
+        assert networkx.is_tree(tree)
+        # Build adjacency list dynamically
+        adjacency = [[] for _ in range(N + 1)]
+        for u, v in edges:
+            adjacency[u].append(v)
+            adjacency[v].append(u)
+        # Arrays (1..N); index 0 acts as a dummy node
+        dep = [0] * (N + 1)
+        siz = [0] * (N + 1)
+        hson = [0] * (N + 1)
+        hson2 = [0] * (N + 1)
+        f = [0] * (N + 1)
+        ans = [False] * (N + 1)
+        # cmp function: return the index with larger siz
+        def cmp(x, y):
+            return x if siz[x] > siz[y] else y
+        # Iterative dfs1: compute dep, siz, hson, hson2, f
+        stack = [(1, 0, 0)]  # (u, parent, state) state 0=enter, 1=exit
+        dep[0] = 0
+        while stack:
+            u, fa, state = stack.pop()
+            if state == 0:
+                dep[u] = dep[fa] + 1
+                stack.append((u, fa, 1))
+                for v in adjacency[u]:
+                    if v == fa:
+                        continue
+                    stack.append((v, u, 0))
+            else:
+                # post-order processing
+                s = 1
+                h1 = 0
+                h2 = 0
+                for v in adjacency[u]:
+                    if v == fa:
+                        continue
+                    s += siz[v]
+                    if siz[v] > siz[h1]:
+                        h2 = h1
+                        h1 = v
+                    elif siz[v] > siz[h2]:
+                        h2 = v
+                siz[u] = s
+                hson[u] = h1
+                hson2[u] = h2
+                if f[h1] <= (siz[u] - 1 - siz[h1]):
+                    fv = (siz[u] - 1) % 2
+                else:
+                    fv = f[h1] - (siz[u] - 1 - siz[h1])
+                f[u] = fv + 1
+        # Iterative dfs2: compute ans
+        stack = [(1, 0, 0)]  # (u, parent, h)
+        while stack:
+            u, fa, h = stack.pop()
+            tmp = cmp(hson[u], h)
+            if f[tmp] <= N - dep[u] - siz[tmp]:
+                ans[u] = ((N & 1) == (dep[u] & 1))
+            for v in adjacency[u]:
+                if v == fa:
+                    continue
+                if v == hson[u]:
+                    h_child = cmp(hson2[u], h)
+                else:
+                    h_child = cmp(hson[u], h)
+                stack.append((v, u, h_child))
+        self.parameter["reference_answer"] = "".join("T" if ans[i] else "F" for i in range(1, N + 1))
+        assert "T" in self.parameter["reference_answer"], "At least one vertex should be able to be the heart vertex"
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            edges = "\n".join("({}, {})".format(u, v) for u, v in self.parameter["edges"]),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[str] :
+        if answer is not None :
+            answer = answer.strip()
+            if not(len(answer) == self.parameter["N"] and all(c in "TF" for c in answer)) :
+                return None
+            return answer
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            intersection = sum((a == "T" and b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"]))
+            union = sum((a == "T" or b == "T") for a, b in zip(processed_result, self.parameter["reference_answer"]))
+            assert intersection <= union, "intersection should not exceed union"
+            if self.rewards["rewarding_strategy"] == "(intersection/union)^beta" :
+                return ((intersection / union) ** self.rewards["rewarding_beta"]) * self.rewards["rewarding_weight"]
+            elif self.rewards["rewarding_strategy"] == "intersection=union" :
+                return self.rewards["rewarding_weight"] * (intersection == union)
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/bez_minimalist_security/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BEZMinimalistSecurity_Environment

server/Gym/environments/bez_minimalist_security/environment.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class BEZMinimalistSecurity_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3544
+    prompt_template = \
+r"""There is an array P of length {N}. Initially, P is: {P}
+Now we want to construct a new array P' of length {N}, where 0 <= P'[i] <= P[i] for all i. Additionally, there are some constraints of the form P'[u] + P'[v] = w, where u and v are indices and w is a constant (it is guaranteed that P[u] + P[v] >= w). The constraints are:
+{constraints}
+Please output P'[0], P'[1], ..., P'[{N_minus_1}], separated by spaces, such that they satisfy all the constraints and their sum is {minimized_or_maximized}."""
+    def __init__(self,
+                 wrong_format : float = -1.0, invalid_solution : float = -0.5,
+                 rewarding_strategy_min : str = "(gold/answer)^beta", rewarding_weight_min : float = +1.0, rewarding_beta_min : float = 5.0,
+                 rewarding_strategy_max : str = "(answer/gold)^beta", rewarding_weight_max : float = +1.0, rewarding_beta_max : float = 5.0,
+                 **kwargs) :
+        """
+        Initialize the BEZMinimalistSecurity_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_solution" : invalid_solution,
+            "rewarding_strategy_max" : rewarding_strategy_max,
+            "rewarding_weight_max" : rewarding_weight_max,
+            "rewarding_beta_max" : rewarding_beta_max,
+            "rewarding_strategy_min" : rewarding_strategy_min,
+            "rewarding_weight_min" : rewarding_weight_min,
+            "rewarding_beta_min" : rewarding_beta_min,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 3, "N should be at least 3"
+        P_prime = [random.randint(0, N) for _ in range(N)]
+        assert "edge_ratio" in self.parameter, "edge_ratio is required in parameter"
+        edge_ratio = self.parameter["edge_ratio"]
+        edges = self.parameter["edges"] = random.sample([(u, v, P_prime[u] + P_prime[v]) for u in range(N) for v in range(u + 1, N)], max(1, min(N * (N - 1) // 2, int(edge_ratio * N))))
+        random.shuffle(edges)
+        for u, v, w in edges :
+            assert 0 <= u < v < N
+        assert len(edges) == len(set((u, v) for u, v, w in edges)), "edges should be unique"
+        P = self.parameter["P"] = [P_prime_u + random.randint(0, N) for P_prime_u in P_prime]
+        # Build adjacency list (0-indexed)
+        adjacency = [[] for _ in range(N)]
+        for u, v, w in edges:
+            adjacency[u].append((v, w))
+            adjacency[v].append((u, w))
+        vis = [False] * N
+        sgn = [0] * N
+        cons = [0] * N
+        q = [0] * N
+        mn = 0
+        mx = 0
+        def wa() :
+            assert False, "Invalid solution"
+        def dfs(u):  # Depth-first search on component
+            nonlocal fix
+            vis[u] = True
+            stc.append(u)
+            # Early exit if constraint too large
+            if cons[u] > 10**6:
+                wa()
+            for v, w in adjacency[u]:
+                if not vis[v]:
+                    sgn[v] = -sgn[u]
+                    cons[v] = w - cons[u]
+                    dfs(v)
+                else:
+                    if sgn[u] == sgn[v]:
+                        res = w - cons[u] - cons[v]
+                        # Must be even
+                        if res & 1:
+                            wa()
+                        denom = 2 * sgn[u]
+                        res //= denom
+                        # Check valid fixed value
+                        if res < 0 or res > P[anc] or (fix is not None and fix != res):
+                            wa()
+                        fix = res
+                    else:
+                        # Sum of constants must match
+                        if cons[u] + cons[v] != w:
+                            wa()
+        # Process each connected component
+        for i in range(N):
+            if not vis[i]:
+                stc = []           # nodes in current component
+                anc = i           # anchor node for fixed value range
+                fix = None        # fixed solution parameter
+                sgn[i] = 1        # sign for anchor
+                cons[i] = 0       # constant offset for anchor
+                dfs(i)
+                if fix is not None:
+                    # Unique solution determined by `fix`
+                    for u in stc:
+                        q[u] = sgn[u] * fix + cons[u]
+                        delta = P[u] - q[u]
+                        mn += delta
+                        mx += delta
+                        if q[u] < 0 or q[u] > P[u]:
+                            wa()
+                    # Verify edges
+                    for u in stc:
+                        for v, w in adjacency[u]:
+                            if q[u] + q[v] != w:
+                                wa()
+                else:
+                    # Range of valid `fix` values [l, r]
+                    l, r = 0, P[anc]
+                    for u in stc:
+                        if sgn[u] == 1:
+                            l = max(l, -cons[u])
+                            r = min(r, P[u] - cons[u])
+                        else:
+                            l = max(l, cons[u] - P[u])
+                            r = min(r, cons[u])
+                    if l > r:
+                        wa()
+                    # Compute sum of reductions for minimal `fix = l`
+                    base_sum = 0
+                    tsign = 0
+                    for u in stc:
+                        base_sum += P[u] - (l * sgn[u] + cons[u])
+                        tsign -= sgn[u]
+                    # Depending on tsign, extremes at l or r
+                    if tsign > 0:
+                        mx += base_sum + tsign * (r - l)
+                        mn += base_sum
+                    else:
+                        mx += base_sum
+                        mn += base_sum + tsign * (r - l)
+        self.parameter["minimized_or_maximized"] = random.choice(["minimized", "maximized"])
+        if self.parameter["minimized_or_maximized"] == "minimized" :
+            self.parameter["gold_answer"] = sum(P) - mx
+        elif self.parameter["minimized_or_maximized"] == "maximized" :
+            self.parameter["gold_answer"] = sum(P) - mn
+        else :
+            raise ValueError("minimized_or_maximized should be either 'minimized' or 'maximized'")
+    def _prompt_generate(self) -> str :
+        N = self.parameter["N"]
+        return self.prompt_template.format(
+            N = N,
+            N_minus_1 = N - 1,
+            P = " ".join("P[{}]={}".format(i, P_i) for i, P_i in enumerate(self.parameter["P"])),
+            constraints = "\n".join("P'[{}] + P'[{}] = {}".format(u, v, w) for u, v, w in self.parameter["edges"]),
+            minimized_or_maximized = self.parameter["minimized_or_maximized"],
+        )
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                answer_array = list(map(int, answer.split()))
+                return answer_array
+            except ValueError :
+                return None # Invalid answer format
+        else :
+            return None # Invalid answer format
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            assert isinstance(processed_result, list), "processed_result should be a list"
+            P_prime = processed_result
+            if len(P_prime) != self.parameter["N"] :
+                return self.rewards["invalid_solution"]
+            if not all(0 <= P_prime_u <= P_u for P_prime_u, P_u in zip(P_prime, self.parameter["P"])) :
+                return self.rewards["invalid_solution"]
+            if not all(P_prime[u] + P_prime[v] == w for u, v, w in self.parameter["edges"]) :
+                return self.rewards["invalid_solution"]
+            gold, answer = self.parameter["gold_answer"], sum(P_prime)
+            if self.parameter["minimized_or_maximized"] == "minimized" :
+                assert 0 <= gold <= answer, "For minimization, answer should be greater than 0 and at least as large as the gold answer"
+                if self.rewards["rewarding_strategy_min"] == "(gold/answer)^beta" :
+                    if answer == 0 :
+                        assert gold == 0, "If answer is 0, gold should also be 0"
+                        return self.rewards["rewarding_weight_min"] * 1.0
+                    return self.rewards["rewarding_weight_min"] * ((gold / answer) ** self.rewards["rewarding_beta_min"])
+                elif self.rewards["rewarding_strategy_min"] == "gold=answer" :
+                    return self.rewards["rewarding_weight_min"] * (gold == answer)
+                else :
+                    raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_min"]))
+            elif self.parameter["minimized_or_maximized"] == "maximized" :
+                assert 0 <= answer <= gold, "For maximization, answer should be greater than 0 and at most as large as the gold answer"
+                if self.rewards["rewarding_strategy_max"] == "(answer/gold)^beta" :
+                    if gold == 0 :
+                        assert answer == 0, "If gold is 0, answer should also be 0"
+                        return self.rewards["rewarding_weight_max"] * 1.0
+                    return self.rewards["rewarding_weight_max"] * ((answer / gold) ** self.rewards["rewarding_beta_max"])
+                elif self.rewards["rewarding_strategy_max"] == "gold=answer" :
+                    return self.rewards["rewarding_weight_max"] * (gold == answer)
+                else :
+                    raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy_max"]))
+            else :
+                assert False, "minimize_or_maximize should be either 'minimize' or 'maximize'"
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/bezout_identity/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BezoutIdentity_Environment

server/Gym/environments/bezout_identity/environment.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import math
+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class BezoutIdentity_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""You are given an array of length {N}, denoted as A[1], ..., A[{N}]. Please find **integers** X[1], ..., X[{N}] such that the value of S = A[1] * X[1] + ... + A[{N}] * X[{N}] satisfies the condition: **S > 0**. Try your best to **minimize the value of S** while meeting this condition.
+A: {A}
+**Output Format:** Output a single line containing X[1], ..., X[{N}], separated by spaces."""
+    def __init__(self,
+                 wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
+                 **kwargs) :
+        """
+        Initialize the BezoutIdentity_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_solution" : invalid_solution,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 2, "N should be greater than or equal to 2"
+        assert "MAX_A" in self.parameter, "MAX_A is required in parameter"
+        MAX_A = self.parameter["MAX_A"]
+        assert MAX_A >= 2, "MAX_A should be greater than or equal to 2"
+        self.parameter["A"] = A = []
+        for _ in range(N) :
+            picked_a, best_counting = None, -1
+            for try_step in range(1024) :
+                current_a = random.randint(2, MAX_A)
+                counting = sum(int(math.gcd(current_a, _a) > 1) for _a in A)
+                if counting > best_counting :
+                    best_counting, picked_a = counting, current_a
+                if best_counting == len(A) :
+                    break
+            if random.random() < 0.5 :
+                picked_a = -picked_a
+            A.append(picked_a)
+        random.shuffle(A)
+        assert len(A) == N, "The length of A should be equal to N"
+        def exgcd(a, b):
+            """
+            Returns (g, x, y) such that
+                g = gcd(a, b)
+                a*x + b*y = g
+            Ensures g >= 0.
+            """
+            if b == 0:
+                return (abs(a), 1 if a >= 0 else -1, 0)
+            g, x1, y1 = exgcd(b, a % b)
+            # b*x1 + (a%b)*y1 = g
+            # a%b = a - (a//b)*b
+            x = y1
+            y = x1 - (a // b) * y1
+            return (g, x, y)
+        # initialize with A[0]
+        g = abs(A[0])
+        X = [0] * N
+        X[0] = 1 if A[0] >= 0 else -1
+        # incorporate each A[i]
+        for i in range(1, N):
+            ai = A[i]
+            g2, u, v = exgcd(g, ai)
+            # scale previous coefficients by u
+            for j in range(i):
+                X[j] *= u
+            # coefficient for A[i] is v
+            X[i] = v
+            g = g2
+        S = sum(x * a for x, a in zip(X, A))
+        assert S == g
+        assert S > 0, "The sum S must be greater than 0"
+        self.parameter["reference_answer"] = " ".join(map(str, X))
+        self.parameter["gold_answer"] = S
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            A = ", ".join(map(str, self.parameter["A"])),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                answer_array = list(map(int, answer.split()))
+                return answer_array
+            except ValueError :
+                return None # Invalid answer format
+        else :
+            return None # Invalid answer format
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            assert isinstance(processed_result, list), "processed_result should be a list"
+            if len(processed_result) != self.parameter["N"] :
+                return self.rewards["invalid_solution"]
+            S = sum(x * a for x, a in zip(processed_result, self.parameter["A"]))
+            if S <= 0 :
+                return self.rewards["invalid_solution"]
+            assert self.parameter["gold_answer"] <= S, "The computed sum S must be greater than or equal to the gold answer"
+            if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
+                return self.rewards["rewarding_weight"] * ((self.parameter["gold_answer"] / S) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (self.parameter["gold_answer"] == S)
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/binario/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import Binario_Environment

server/Gym/environments/binario/environment.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class Binario_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""You are given a {N} × {M} matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that:
+1. The number of `1`s in each row (from top to bottom) is: {row_counts}.
+2. The number of `1`s in each column (from left to right) is: {col_counts}.
+3. No more than two consecutive cells in a row or column can contain the same number.
+The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*':
+{matrix}
+**Output Format:** Output {N} lines, each containing {M} characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators)."""
+    def __init__(self,
+                 wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(satisfied/all)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 10.0,
+                 **kwargs) :
+        """
+        Initialize the Binario_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_solution" : invalid_solution,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
+        MAX_N_M = self.parameter["MAX_N_M"]
+        assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
+        N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M)
+        def generate_matrix(N, M):
+            # Initialize the grid with None
+            grid = [[None] * M for _ in range(N)]
+            all_cells = [(i, j) for i in range(N) for j in range(M)]
+            random.shuffle(all_cells)  # Shuffle to ensure randomness in placement
+            backtrack_counting = 0
+            def backtrack(idx):
+                # If we've filled past the last row, we're done
+                if idx == len(all_cells):
+                    return True
+                i, j = all_cells[idx]
+                nonlocal backtrack_counting
+                backtrack_counting += 1
+                if backtrack_counting > 10000000:
+                    return False
+                # Try placing 0 or 1 in random order
+                for v in random.sample(["0", "1"], 2):
+                    # Check adjacency constraints in row (no three in a row)
+                    if j >= 2 and grid[i][j-1] == v and grid[i][j-2] == v:
+                        continue
+                    if j >= 1 and j + 1 < M and grid[i][j-1] == v and grid[i][j+1] == v:
+                        continue
+                    if j + 2 < M and grid[i][j+1] == v and grid[i][j+2] == v:
+                        continue
+                    # Check adjacency constraints in column
+                    if i >= 2 and grid[i-1][j] == v and grid[i-2][j] == v:
+                        continue
+                    if i >= 1 and i + 1 < N and grid[i-1][j] == v and grid[i+1][j] == v:
+                        continue
+                    if i + 2 < N and grid[i+1][j] == v and grid[i+2][j] == v:
+                        continue
+                    # Place v
+                    grid[i][j] = v
+                    # Recurse
+                    if backtrack(idx + 1):
+                        return True
+                    grid[i][j] = None
+                # No valid value at (i, j): backtrack
+                return False
+            return grid if backtrack(0) else None
+        matrix = generate_matrix(N, M)
+        if matrix is None :
+            self.parameter = None
+            return
+        self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix)
+        self.parameter["row_counts"] = [sum(int(cell == "1") for cell in row) for row in matrix]
+        self.parameter["col_counts"] = [sum(int(matrix[i][j] == "1") for i in range(N)) for j in range(M)]
+        assert "sparsity" in self.parameter, "sparsity is required in parameter"
+        sparsity = self.parameter["sparsity"]
+        assert 0 < sparsity < 1, "sparsity should be between 0 and 1"
+        empty_cells = random.sample(range(N * M), max(1, int(N * M * sparsity)))
+        for cell in empty_cells :
+            row, column = divmod(cell, M)
+            matrix[row][column] = '*'
+        self.parameter["matrix"] = ["".join(row) for row in matrix]
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            M = self.parameter["M"],
+            matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]),
+            row_counts = ", ".join(map(str, self.parameter["row_counts"])),
+            col_counts = ", ".join(map(str, self.parameter["col_counts"])),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                matrix = []
+                for line in answer.splitlines() :
+                    line = line.strip()
+                    if line :
+                        matrix.append(line.strip())
+                return matrix
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            assert isinstance(processed_result, list), "processed_result should be a list"
+            N, M = self.parameter["N"], self.parameter["M"]
+            solution = processed_result
+            if len(solution) != N or any(len(row) != M for row in solution) :
+                return self.rewards["wrong_format"]
+            for row in solution :
+                if not all(c in "01" for c in row) :
+                    return self.rewards["wrong_format"]
+            for row, original_row in zip(solution, self.parameter["matrix"]) :
+                for cell, original_cell in zip(row, original_row) :
+                    if original_cell != '*' and cell != original_cell :
+                        assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0')
+                        return self.rewards["invalid_solution"]
+            delta = [
+                (+1, 0),
+                (-1, 0),
+                (0, +1),
+                (0, -1),
+            ]
+            for i in range(N) :
+                for j in range(M) :
+                    for di, dj in delta :
+                        ni, nj = i + di, j + dj
+                        nni, nnj = i + 2 * di, j + 2 * dj
+                        if 0 <= ni < N and 0 <= nj < M and 0 <= nni < N and 0 <= nnj < M :
+                            if solution[i][j] == solution[ni][nj] == solution[nni][nnj] :
+                                return self.rewards["invalid_solution"]
+            row_counts = [sum(int(cell == "1") for cell in row) for row in solution]
+            col_counts = [sum(int(solution[i][j] == "1") for i in range(N)) for j in range(M)]
+            satisfied = sum(int(answer == gold) for answer, gold in zip(row_counts, self.parameter["row_counts"])) + \
+                        sum(int(answer == gold) for answer, gold in zip(col_counts, self.parameter["col_counts"]))
+            assert satisfied <= N + M, "satisfied should not exceed N + M"
+            if self.rewards["rewarding_strategy"] == "(satisfied/all)^beta" :
+                return self.rewards["rewarding_weight"] * ((satisfied / (N + M)) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "satisfied=all" :
+                return self.rewards["rewarding_weight"] * (satisfied == (N + M))
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/binario_no_adjacency_requirement/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import Binario_NoAdjacencyRequirement_Environment

server/Gym/environments/binario_no_adjacency_requirement/environment.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class Binario_NoAdjacencyRequirement_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""You are given a (2 × {N}) × (2 × {M}) matrix. Each cell contains either '0', '1', or '*' ('*' means the cell is empty). Please fill all '*' cells with either '0' or '1' such that:
+1. Each **row** contains exactly {M} '0's and {M} '1's.
+2. Each **column** contains exactly {N} '0's and {N} '1's.
+The matrix is given in **row-major order**, with each row represented as a string of '0', '1', and '*':
+{matrix}
+**Output Format:** Output (2 × {N}) lines, each containing (2 × {M}) characters, where each character is either '0' or '1'. The output should match the format of the input (i.e., one row per line, no separators)."""
+    def __init__(self,
+                 wrong_format : float = -1.0, invalid_solution : float = -0.5, wrong_solution : float = 0.0, correct_solution : float = 1.0,
+                 **kwargs) :
+        """
+        Initialize the Binario_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_solution" : invalid_solution,
+            "wrong_solution" : wrong_solution,
+            "correct_solution" : correct_solution,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
+        MAX_N_M = self.parameter["MAX_N_M"]
+        assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
+        N, M = self.parameter["N"], self.parameter["M"] = random.randint(2, MAX_N_M), random.randint(2, MAX_N_M)
+        row_permutation, col_permutation = list(range(2 * N)), list(range(2 * M))
+        random.shuffle(row_permutation)
+        random.shuffle(col_permutation)
+        matrix = [[str((row_permutation[i] + col_permutation[j]) % 2) for j in range(2 * M)] for i in range(2 * N)]
+        self.parameter["reference_answer"] = "\n".join("".join(row) for row in matrix)
+        assert "sparsity" in self.parameter, "sparsity is required in parameter"
+        sparsity = self.parameter["sparsity"]
+        assert 0 < sparsity < 1, "sparsity should be between 0 and 1"
+        empty_cells = random.sample(range((2 * N) * (2 * M)), max(1, int((2 * N) * (2 * M) * sparsity)))
+        for cell in empty_cells :
+            row, column = divmod(cell, 2 * M)
+            matrix[row][column] = '*'
+        self.parameter["matrix"] = ["".join(row) for row in matrix]
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            M = self.parameter["M"],
+            matrix = "\n".join("".join(map(str, row)) for row in self.parameter["matrix"]),
+        )
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                matrix = []
+                for line in answer.splitlines() :
+                    line = line.strip()
+                    if line :
+                        matrix.append(line.strip())
+                return matrix
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            assert isinstance(processed_result, list), "processed_result should be a list"
+            N, M = self.parameter["N"], self.parameter["M"]
+            solution = processed_result
+            if len(solution) != 2 * N or any(len(row) != 2 * M for row in solution) :
+                return self.rewards["wrong_format"]
+            for row in solution :
+                if not all(c in "01" for c in row) :
+                    return self.rewards["wrong_format"]
+            for row, original_row in zip(solution, self.parameter["matrix"]) :
+                for cell, original_cell in zip(row, original_row) :
+                    if original_cell != '*' and cell != original_cell :
+                        assert (original_cell == '0' and cell == '1') or (original_cell == '1' and cell == '0')
+                        return self.rewards["invalid_solution"]
+            for i in range(2 * N) :
+                if solution[i].count('1') != solution[i].count('0') :
+                    return self.rewards["wrong_solution"]
+                assert solution[i].count('1') == M, "Row {} does not have exactly {} ones".format(i, M)
+                assert solution[i].count('0') == M, "Row {} does not have exactly {} zeros".format(i, M)
+            for j in range(2 * M) :
+                if sum(solution[i][j] == '1' for i in range(2 * N)) != sum(solution[i][j] == '0' for i in range(2 * N)) :
+                    return self.rewards["wrong_solution"]
+                assert sum(solution[i][j] == '1' for i in range(2 * N)) == N, "Column {} does not have exactly {} ones".format(j, N)
+                assert sum(solution[i][j] == '0' for i in range(2 * N)) == N, "Column {} does not have exactly {} zeros".format(j, N)
+            return self.rewards["correct_solution"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/binary_alternation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BinaryAlternation_Environment

server/Gym/environments/binary_alternation/environment.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import random
+from typing import Optional, List
+from ...environment import VerifiableEnvironment
+class BinaryAlternation_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""You are given a binary string of length {N}, consisting of `0`s and `1`s. It is 0-indexed: {string}
+In one operation, you may **swap** the characters at indices `i` and `j` (0 ≤ i, j < {N}). Please transform the string into an **alternating binary string** (no two adjacent characters are the same) using the **minimum number of operations**.
+**Output Format:** Each operation should be written on a single line in the format: `i j`, where `i` and `j` are the indices being swapped. Do **NOT** include backticks or quotes. Output one operation per line in the order they should be performed."""
+    def __init__(self,
+                 wrong_format : float = -1.0, invalid_solution : float = -0.5, rewarding_strategy : str = "(gold/answer)^beta", rewarding_weight : float = +1.0, rewarding_beta : float = 5.0,
+                 **kwargs) :
+        """
+        Initialize the BinaryAlternation_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "invalid_solution" : invalid_solution,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "zero_count" in self.parameter, "zero_count is required in parameter"
+        zero_count = self.parameter["zero_count"]
+        assert zero_count >= 2, "zero_count should be greater than or equal to 2"
+        one_count = random.randint(zero_count - 1, zero_count + 1)
+        string = ["0"] * zero_count + ["1"] * one_count
+        random.shuffle(string)
+        string = self.parameter["string"] = "".join(string)
+        self.parameter["reference_answer"] = None
+        def compute(should : str) -> List[str] :
+            zero_to_one, one_to_zero = [], []
+            for i, now in enumerate(string) :
+                if now != should :
+                    if now == "0" :
+                        zero_to_one.append(i)
+                    else :
+                        one_to_zero.append(i)
+                should = "1" if should == "0" else "0"
+            assert len(zero_to_one) == len(one_to_zero), "zero_to_one and one_to_zero should have the same length"
+            solution = []
+            for i, j in zip(zero_to_one, one_to_zero) :
+                solution.append("{} {}".format(i, j))
+            return solution
+        if zero_count >= one_count :
+            self.parameter["reference_answer"] = compute("0")
+        if one_count >= zero_count :
+            candidate = compute("1")
+            if self.parameter["reference_answer"] is None or len(candidate) < len(self.parameter["reference_answer"]) :
+                self.parameter["reference_answer"] = candidate
+        self.parameter["gold_answer"] = len(self.parameter["reference_answer"])
+        self.parameter["reference_answer"] = "\n".join(self.parameter["reference_answer"])
+    def _prompt_generate(self) -> str :
+        string = self.parameter["string"]
+        return self.prompt_template.format(N = len(string), string = string)
+    def _process(self, answer : Optional[str]) -> Optional[List] :
+        if answer is not None :
+            answer = answer.strip()
+            actions = []
+            for line in answer.splitlines() :
+                line = line.strip()
+                if line :
+                    actions.append(line.split())
+                    action = actions[-1]
+                    if len(action) != 2 :
+                        return None
+                    try :
+                        action[0] = int(action[0])
+                        action[1] = int(action[1])
+                    except ValueError :
+                        return None
+            return actions
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            string = list(self.parameter["string"])
+            for i, j in processed_result :
+                if not (0 <= i < len(string) and 0 <= j < len(string)) :
+                    return self.rewards["invalid_solution"]
+                string[i], string[j] = string[j], string[i]
+            string = "".join(string)
+            if any(string[i] == string[i + 1] for i in range(len(string) - 1)) :
+                return self.rewards["invalid_solution"]
+            gold, answer = self.parameter["gold_answer"], len(processed_result)
+            assert gold <= answer, "gold should be less than or equal to answer"
+            if answer == 0 :
+                return self.rewards["rewarding_weight"]
+            if self.rewards["rewarding_strategy"] == "(gold/answer)^beta" :
+                return self.rewards["rewarding_weight"] * ((gold / answer) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (gold == answer)
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/binary_linear_equation_solution_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BinaryLinearEquation_SolutionCounting_Environment

server/Gym/environments/binary_linear_equation_solution_counting/environment.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class BinaryLinearEquation_SolutionCounting_Environment(VerifiableEnvironment) :
+    prompt_template = r"""What is the number of integer solution pairs (x, y) such that ({A}) * x + ({B}) * y + ({C}) = 0, with {X1} <= x <= {X2} and {Y1} <= y <= {Y2}?"""
+    def __init__(self,
+                wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
+                not_guaranteed_probability : float = 0.05,
+                **kwargs) :
+        """
+        Initialize the BinaryLinearEquation_SolutionCounting instance.
+        """
+        super().__init__(**kwargs)
+        self.not_guaranteed_probability = not_guaranteed_probability
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "MAX_RANGE" in self.parameter, "MAX_RANGE is required in parameter"
+        MAX_RANGE = self.parameter["MAX_RANGE"]
+        assert MAX_RANGE >= 8, "MAX_RANGE must be at least 8"
+        A = self.parameter["A"] = random.randint(-MAX_RANGE, +MAX_RANGE)
+        B = self.parameter["B"] = random.randint(-MAX_RANGE, +MAX_RANGE)
+        not_guaranteed = random.random() < self.not_guaranteed_probability
+        if not_guaranteed :
+            X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, +MAX_RANGE)
+            X2 = self.parameter["X2"] = random.randint(X1, +MAX_RANGE)
+            Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, +MAX_RANGE)
+            Y2 = self.parameter["Y2"] = random.randint(Y1, +MAX_RANGE)
+            C = self.parameter["C"] = random.randint(-2 * (MAX_RANGE ** 2),+2 * (MAX_RANGE ** 2))
+        else :
+            x = random.randint(-MAX_RANGE, +MAX_RANGE)
+            y = random.randint(-MAX_RANGE, +MAX_RANGE)
+            C = self.parameter["C"] = -(A * x + B * y)
+            X1 = self.parameter["X1"] = random.randint(-MAX_RANGE, x)
+            X2 = self.parameter["X2"] = random.randint(x, +MAX_RANGE)
+            Y1 = self.parameter["Y1"] = random.randint(-MAX_RANGE, y)
+            Y2 = self.parameter["Y2"] = random.randint(y, +MAX_RANGE)
+        def gcd(a, b):
+            while b:
+                a, b = b, a % b
+            return abs(a)
+        def extended_gcd_positive(a, b):
+            # Returns (g, x, y) with a*x + b*y = g, for a,b >= 0
+            if b == 0:
+                return (a, 1, 0)
+            g, x1, y1 = extended_gcd_positive(b, a % b)
+            return (g, y1, x1 - (a // b) * y1)
+        def ceil_div(a, b):
+            # Ceil division that works for any sign of b
+            return -((-a) // b)
+        def floor_div(a, b):
+            # Floor division (Python's // already floors)
+            return a // b
+        def k_range(a0, step, L, R):
+            """
+            From constraint: L <= a0 + step*k <= R
+            Return [lo, hi] for integer k, or (1, 0) for empty.
+            """
+            if step > 0:
+                lo = ceil_div(L - a0, step)
+                hi = floor_div(R - a0, step)
+            else:  # step < 0
+                # Inequality reverses when dividing by a negative
+                lo = ceil_div(R - a0, step)
+                hi = floor_div(L - a0, step)
+            return lo, hi
+        def compute(A, B, C, X1, X2, Y1, Y2):
+            if X1 > X2:
+                X1, X2 = X2, X1
+            if Y1 > Y2:
+                Y1, Y2 = Y2, Y1
+            # Degenerate cases
+            if A == 0 and B == 0:
+                return (X2 - X1 + 1) * (Y2 - Y1 + 1) if C == 0 else 0
+            if A == 0:
+                # B*y + C = 0
+                if C % B == 0:
+                    y = -C // B
+                    return (X2 - X1 + 1) if (Y1 <= y <= Y2) else 0
+                else:
+                    return 0
+            if B == 0:
+                # A*x + C = 0
+                if C % A == 0:
+                    x = -C // A
+                    return (Y2 - Y1 + 1) if (X1 <= x <= X2) else 0
+                else:
+                    return 0
+            # General case
+            d = gcd(A, B)
+            if C % d != 0:
+                return 0
+            # Find one solution to A*x + B*y = -C
+            _, xg, yg = extended_gcd_positive(abs(A), abs(B))  # gives axg + byg = gcd(|A|,|B|)
+            if A < 0:
+                xg = -xg
+            if B < 0:
+                yg = -yg
+            mult = (-C) // d
+            x0 = xg * mult
+            y0 = yg * mult
+            # Parametric form
+            step_x = B // d
+            step_y = -A // d  # note: can be negative
+            # k-range from x and y intervals
+            kx_lo, kx_hi = k_range(x0, step_x, X1, X2)
+            ky_lo, ky_hi = k_range(y0, step_y, Y1, Y2)
+            lo = max(kx_lo, ky_lo)
+            hi = min(kx_hi, ky_hi)
+            return 0 if lo > hi else hi - lo + 1
+        self.parameter["reference_answer"] = compute(A, B, C, X1, X2, Y1, Y2)
+        if not not_guaranteed :
+            assert self.parameter["reference_answer"] >= 1
+        else :
+            assert self.parameter["reference_answer"] >= 0
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            A = self.parameter["A"],
+            B = self.parameter["B"],
+            C = self.parameter["C"],
+            X1 = self.parameter["X1"],
+            X2 = self.parameter["X2"],
+            Y1 = self.parameter["Y1"],
+            Y2 = self.parameter["Y2"],
+        )
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if processed_result < 0 :
+                return self.rewards["wrong_format"]
+            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
+                if self.parameter["reference_answer"] == 0 :
+                    return self.rewards["rewarding_weight"] * (processed_result == 0)
+                a, b = self.parameter["reference_answer"], processed_result
+                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/binary_tree_leaf_num_expectation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BinaryTreeLeafNumExpectation_Environment

server/Gym/environments/binary_tree_leaf_num_expectation/environment.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import math
+import random
+from typing import Optional, Tuple
+from ...environment import VerifiableEnvironment
+class BinaryTreeLeafNumExpectation_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P3978
+    prompt_template = \
+r"""We uniformly at random generate a **binary tree** with exactly {N} nodes (all distinct binary trees with {N} nodes are equally likely). Two binary trees are considered identical if and only if:
+- both are empty, **OR**
+- both are non-empty, and their left subtrees are identical and their right subtrees are identical.
+What is the expected number of **leaf** nodes (nodes whose left and right children are both empty) in the generated binary tree? Output the result as `A/B` (do NOT include quotes), where A and B are positive integers separated by a slash `/`."""
+    def __init__(self,
+                 wrong_format : float = -1.0, correct_answer : float = +1.0, wrong_answer : float = 0.0,
+                 **kwargs) :
+        """
+        Initialize the BinaryTreeLeafNumExpectation_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "correct_answer" : correct_answer,
+            "wrong_answer" : wrong_answer,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N" in self.parameter, "MAX_N is required in parameter"
+        MAX_N = self.parameter["MAX_N"]
+        assert MAX_N >= 5, "MAX_N should be greater than or equal to 5"
+        N = self.parameter["N"] = random.randint(1, MAX_N)
+        A, B = N * (N + 1), 2 * (2 * N - 1)
+        gcd_AB = math.gcd(A, B)
+        A //= gcd_AB
+        B //= gcd_AB
+        self.parameter["gold_answer"] = dict(A = A, B = B)
+        self.parameter["reference_answer"] = "{}/{}".format(A, B)
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(N = self.parameter["N"])
+    def _process(self, answer : Optional[str]) -> Optional[Tuple[int, int]] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                A, B = map(int, map(str.strip, answer.split('/')))
+                return (A, B)
+            except :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            A, B = processed_result
+            if not (A > 0 and B > 0) :
+                return self.rewards["wrong_format"]
+            gold_A, gold_B = self.parameter["gold_answer"]["A"], self.parameter["gold_answer"]["B"]
+            gcd_AB = math.gcd(A, B)
+            A //= gcd_AB
+            B //= gcd_AB
+            if (A, B) == (gold_A, gold_B) :
+                return self.rewards["correct_answer"]
+            else :
+                return self.rewards["wrong_answer"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/bit_equation_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BitEquationCounting_Environment

server/Gym/environments/bit_equation_counting/environment.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class BitEquationCounting_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""Given a Boolean expression (where `_` represents a variable that can be 0 or 1, `&` is bitwise AND, `|` is bitwise OR, and `^` is bitwise XOR): {expression}
+There are 2^{N} possible combinations of values for the variables. Your task is to find how many of these combinations make the expression evaluate to true.
+**Output Format:** Your final answer should be a single integer — the number of combinations that make the expression true. Example: `15` (do **NOT** include quotes or backticks)."""
+    def __init__(self,
+                 wrong_format : float = -1.0, wrong_range : float = -0.5, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
+                 **kwargs) :
+        """
+        Initialize the BitEquationCounting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "wrong_range" : wrong_range,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "N" in self.parameter, "N is required in parameter"
+        N = self.parameter["N"]
+        assert N >= 2, "N should be greater than or equal to 2"\
+        def build_expression(n) :
+            if n == 1 :
+                return "_", 1, 1
+            left_n = random.randint(1, n - 1)
+            right_n = n - left_n
+            left_expr, left_true, left_false = build_expression(left_n)
+            right_expr, right_true, right_false = build_expression(right_n)
+            op = random.choice(("&", "|", "^"))
+            if op == "&" :
+                true_count = left_true * right_true
+                false_count = (2 ** n) - true_count
+            elif op == "|" :
+                false_count = left_false * right_false
+                true_count = (2 ** n) - false_count
+            elif op == "^" :
+                true_count = left_true * right_false + left_false * right_true
+                false_count = left_true * right_true + left_false * right_false
+                assert true_count + false_count == 2 ** n, "XOR operation should cover all cases"
+            else :
+                raise ValueError("Invalid operator")
+            return "({} {} {})".format(left_expr, op, right_expr), true_count, false_count
+        expression, true_count, false_count = build_expression(N)
+        self.parameter["expression"] = expression[1 : -1]
+        self.parameter["reference_answer"] = true_count
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(expression = self.parameter["expression"], N = self.parameter["N"])
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if not (0 <= processed_result <= 2 ** self.parameter["N"]) :
+                return self.rewards["wrong_range"]
+            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
+                a, b = self.parameter["reference_answer"], processed_result
+                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/bitand_zero_path_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BitAndZero_PathCounting_Environment

server/Gym/environments/bitand_zero_path_counting/environment.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class BitAndZero_PathCounting_Environment(VerifiableEnvironment) :
+    prompt_template = \
+r"""You are given a **directed graph** with an **infinite number of vertices**, where each vertex is labeled with a non-negative integer: `0`, `1`, `2`, ...
+There is a directed edge from vertex `s` to vertex `t` if and only if:
+- `s < t`, and
+- `s & t = 0` (where `&` denotes the bitwise AND operation)
+Please compute the number of **distinct paths** from vertex `{S}` to vertex `{T}`. Give the result **modulo {MOD}**.
+Note that the two vertices labels are provided in **binary (base-2)** representation.
+**Output Format:** Your final answer should be a single integer — the number of distinct paths modulo `{MOD}`."""
+    MOD = 10000
+    def __init__(self,
+                 wrong_format : float = -1.0, wrong_range : float = -0.5, correct_answer : float = +1.0, wrong_answer : float = 0.0,
+                 **kwargs) :
+        """
+        Initialize the BitAndZero_PathCounting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "wrong_range" : wrong_range,
+            "correct_answer" : correct_answer,
+            "wrong_answer" : wrong_answer,
+        }
+    def _generate_helper(self) -> None :
+        assert "max_length" in self.parameter, "max_length is required in parameter"
+        max_length = self.parameter["max_length"]
+        assert max_length >= 1, "max_length should be greater than or equal to 1"
+        S = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1))
+        T = "1" + "".join(str(random.randint(0, 1)) for _ in range(random.randint(1, max_length) - 1))
+        if len(S) > len(T) or (len(S) == len(T) and S > T) :
+            S, T = T, S
+            # Ensure S <= T
+        self.parameter["S"], self.parameter["T"] = S, T
+        MOD = self.MOD
+        def Mult(a: int, b: int) -> int:
+            return (a * b) % MOD
+        def Add(a: int, b: int) -> int:
+            s = a + b
+            return s - MOD if s >= MOD else s
+        S = list(map(int, S))
+        T = list(map(int, T))
+        N, M = len(S), len(T)
+        if M > N:
+            S = [0] * (M - N) + S
+        else:
+            assert M == N
+        G = [[[0, 0] for _ in range(M)] for __ in range(2)]
+        for st in (0, 1):
+            G[st][0][st] = 1
+            for i in range(1, M):
+                G[st][i][0] = Add(G[st][i-1][0], G[st][i-1][1])
+                G[st][i][1] = G[st][i-1][0]
+        H = 1
+        while H <= M and S[H-1] == 0:
+            H += 1
+        F = [[0] * M for _ in range(M + 1)]
+        F[1][0] = 1
+        for i in range(2, M + 1):
+            for x in range(0, i - 1):
+                bit = T[i-1]
+                if i <= H:
+                    F[i][x+1] = Add(F[i][x+1], Mult(F[i-1][x], G[1][x+1][bit]))
+                if i < H:
+                    total = Add(G[0][x][bit], G[1][x][bit])
+                    F[i][x]   = Add(F[i][x],   Mult(F[i-1][x], total))
+                if i > H:
+                    F[i][x]   = Add(F[i][x],   Mult(F[i-1][x], G[S[i-1]][x][bit]))
+        ans = 0
+        for x in range(0, M):
+            ans = Add(ans, F[M][x])
+        self.parameter["reference_answer"] = ans
+    def _generate(self) -> None :
+        while True :
+            self._generate_helper()
+            if self.parameter["reference_answer"] not in (0, 1) :
+                break
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            S = self.parameter["S"],
+            T = self.parameter["T"],
+            MOD = self.MOD,
+        )
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if not (0 <= processed_result < self.MOD) :
+                return self.rewards["wrong_range"]
+            if processed_result == self.parameter["reference_answer"] :
+                return self.rewards["correct_answer"]
+            else :
+                return self.rewards["wrong_answer"]
+        else :
+            return self.rewards["wrong_format"]

server/Gym/environments/bitwise_operation_sequence_counting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .environment import BitwiseOperationSequenceCounting_Environment

server/Gym/environments/bitwise_operation_sequence_counting/environment.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import random
+from typing import Optional
+from ...environment import VerifiableEnvironment
+class BitwiseOperationSequenceCounting_Environment(VerifiableEnvironment) : # Source : https://www.luogu.com.cn/problem/P4424
+    prompt_template = \
+r"""You are given an array A of {N} + 1 binary strings, each of length {M}. The strings are:
+{A}
+You will insert an operation (`AND` or `OR`) between every pair of adjacent elements in A, resulting in {N} operations total, to form an expression. You can evaluate the expression from left to right (without operator precedence) to get the final result of the expression.
+Count the number of different ways to insert these operations such that the final result equals this binary string: {R}"""
+    def __init__(self,
+                wrong_format : float = -1.0, rewarding_strategy : str = "(min/max)^beta", rewarding_weight : float = 1.0, rewarding_beta : float = 10.0,
+                **kwargs) :
+        """
+        Initialize the BitwiseOperationSequenceCounting_Environment instance.
+        """
+        super().__init__(**kwargs)
+        self.rewards = {
+            "wrong_format" : wrong_format,
+            "rewarding_strategy" : rewarding_strategy,
+            "rewarding_weight" : rewarding_weight,
+            "rewarding_beta" : rewarding_beta,
+        }
+    def _generate(self) -> None :
+        assert "MAX_N_M" in self.parameter, "MAX_N_M is required in parameter"
+        MAX_N_M = self.parameter["MAX_N_M"]
+        assert MAX_N_M >= 2, "MAX_N_M should be greater than or equal to 2"
+        N = self.parameter["N"] = random.randint(2, MAX_N_M)
+        M = self.parameter["M"] = random.randint(2, MAX_N_M)
+        self.parameter["A"] = A = [None] * (N + 1)
+        A[0] = "0" * M
+        result = "0" * M
+        AND_probability = random.random()
+        for i in range(1, N + 1) :
+            one_probability = random.random()
+            A[i] = "".join(str(int(random.random() < one_probability)) for _ in range(M))
+            operation = "AND" if random.random() < AND_probability else "OR"
+            if operation == "AND" :
+                result = "".join(str(int(A[i][j]) & int(result[j])) for j in range(M))
+            else :
+                result = "".join(str(int(A[i][j]) | int(result[j])) for j in range(M))
+        self.parameter["R"] = result
+        S = A[1 :]
+        # rk will store the current column order (0-indexed)
+        rk = list(range(M))
+        # b[j][i] will store the bit in column j, row i
+        b = [[0] * N for _ in range(M)]
+        # Read the N rows of the matrix, and maintain the stable partition of rk
+        for i in range(N):
+            s = S[i]
+            # parse the bits of this row
+            row = [int(ch) for ch in s]
+            # fill b
+            for j in range(M):
+                b[j][i] = row[j]
+            # stable partition rk: first zeros, then ones
+            new_rk = []
+            for k in rk:
+                if row[k] == 0:
+                    new_rk.append(k)
+            for k in rk:
+                if row[k] == 1:
+                    new_rk.append(k)
+            rk = new_rk
+        # Compute Ans[j] = integer value of column j (bits b[j][N-1]...b[j][0]) mod MOD
+        Ans = [0] * M
+        for j in range(M):
+            val = 0
+            # build the number from most-significant bit b[j][N-1] down to b[j][0]
+            for i in range(N - 1, -1, -1):
+                val = val * 2 + b[j][i]
+            Ans[j] = val
+        def compute() :
+            s = result
+            # Find the first position in rk where the bit is '1'
+            Rk_idx = M  # default to sentinel
+            for idx in range(M):
+                if s[rk[idx]] == '1':
+                    Rk_idx = idx
+                    break
+            # Find the last position in rk where the bit is '0'
+            Lk_idx = -1  # default to before first
+            for idx in range(M - 1, -1, -1):
+                if s[rk[idx]] == '0':
+                    Lk_idx = idx
+                    break
+            # If the first '1' comes before the last '0', no valid interval
+            if Rk_idx < Lk_idx:
+                return 0
+            else:
+                # Determine the two endpoints' values
+                x_val = 0 if Lk_idx == -1 else Ans[rk[Lk_idx]]
+                y_val = (2 ** N) if Rk_idx == M else Ans[rk[Rk_idx]]
+                # Answer is y_val - x_val
+                return y_val - x_val
+        self.parameter["reference_answer"] = compute()
+        assert self.parameter["reference_answer"] > 0
+    def _prompt_generate(self) -> str :
+        return self.prompt_template.format(
+            N = self.parameter["N"],
+            M = self.parameter["M"],
+            A = "\n".join("A[{}]={}".format(i, Ai) for i, Ai in enumerate(self.parameter["A"])),
+            R = self.parameter["R"],
+        )
+    def _process(self, answer : Optional[str]) -> Optional[int] :
+        if answer is not None :
+            answer = answer.strip()
+            try :
+                int_answer = int(answer)
+                return int_answer
+            except ValueError :
+                return None
+        else :
+            return None
+    def scorer(self, output : str) -> float :
+        processed_result = self.processor(output)
+        if processed_result is not None :
+            if processed_result < 0 :
+                return self.rewards["wrong_format"]
+            if self.rewards["rewarding_strategy"] == "(min/max)^beta" :
+                a, b = self.parameter["reference_answer"], processed_result
+                return self.rewards["rewarding_weight"] * (((min(a, b) / max(a, b))) ** self.rewards["rewarding_beta"])
+            elif self.rewards["rewarding_strategy"] == "gold=answer" :
+                return self.rewards["rewarding_weight"] * (processed_result == self.parameter["reference_answer"])
+            else :
+                raise NotImplementedError("Unknown rewarding strategy: {}".format(self.rewards["rewarding_strategy"]))
+        else :
+            return self.rewards["wrong_format"]