Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +22 -61
- README.md +135 -56
- __init__.py +4 -8
- client.py +31 -74
- models.py +27 -39
- openenv.yaml +5 -9
- pyproject.toml +15 -25
- server/Dockerfile.backup +25 -0
- server/README.md +51 -0
- server/__init__.py +3 -3
- server/app.py +25 -163
- server/python_codeact_env.py +117 -0
- server/python_executor.py +157 -0
- server/transforms.py +94 -0
Dockerfile
CHANGED
|
@@ -1,74 +1,35 @@
|
|
| 1 |
-
#
|
| 2 |
-
#
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
|
| 8 |
-
# This Dockerfile is flexible and works for both:
|
| 9 |
-
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
-
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
-
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
-
|
| 13 |
-
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
-
FROM ${BASE_IMAGE} AS builder
|
| 15 |
|
|
|
|
| 16 |
WORKDIR /app
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
RUN apt-get update && \
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
-
ARG BUILD_MODE=in-repo
|
| 25 |
-
ARG ENV_NAME=code_review_env
|
| 26 |
-
|
| 27 |
-
# Copy environment code (always at root of build context)
|
| 28 |
-
COPY . /app/env
|
| 29 |
-
|
| 30 |
-
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
-
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
-
WORKDIR /app/env
|
| 33 |
-
|
| 34 |
-
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
-
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
-
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
-
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
-
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
-
fi
|
| 40 |
-
|
| 41 |
-
# Install dependencies using uv sync
|
| 42 |
-
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
-
if [ -f uv.lock ]; then \
|
| 45 |
-
uv sync --frozen --no-editable; \
|
| 46 |
-
else \
|
| 47 |
-
uv sync --no-editable; \
|
| 48 |
-
fi
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
COPY --from=builder /app/env/.venv /app/.venv
|
| 57 |
-
|
| 58 |
-
# Copy the environment code
|
| 59 |
-
COPY --from=builder /app/env /app/env
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
ENV
|
|
|
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
|
| 67 |
# Health check
|
| 68 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 69 |
CMD curl -f http://localhost:8000/health || exit 1
|
| 70 |
|
| 71 |
-
# Run the
|
| 72 |
-
|
| 73 |
-
ENV ENABLE_WEB_INTERFACE=true
|
| 74 |
-
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
|
|
|
| 1 |
+
# Dockerfile for Coding Environment
|
| 2 |
+
# Build from repo root:
|
| 3 |
+
# docker build -t coding-env:latest -f envs/coding_env/server/Dockerfile .
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
FROM python:3.11-slim
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# Set working directory
|
| 8 |
WORKDIR /app
|
| 9 |
|
| 10 |
+
# Install system dependencies
|
| 11 |
+
RUN apt-get update && apt-get install -y \
|
| 12 |
+
git \
|
| 13 |
+
curl \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# Copy coding_env package
|
| 17 |
+
COPY envs/coding_env/ ./envs/coding_env/
|
| 18 |
|
| 19 |
+
# Install openenv-core first from PyPI, then coding_env
|
| 20 |
+
RUN pip install --no-cache-dir "openenv-core[core]>=0.2.2" && \
|
| 21 |
+
pip install --no-cache-dir ./envs/coding_env/
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# Environment variables
|
| 24 |
+
ENV PYTHONUNBUFFERED=1
|
| 25 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 26 |
|
| 27 |
+
# Expose port
|
| 28 |
+
EXPOSE 8000
|
| 29 |
|
| 30 |
# Health check
|
| 31 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 32 |
CMD curl -f http://localhost:8000/health || exit 1
|
| 33 |
|
| 34 |
+
# Run the server
|
| 35 |
+
CMD ["uvicorn", "coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,84 +1,163 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
app_port: 8000
|
|
|
|
| 9 |
tags:
|
| 10 |
- openenv
|
| 11 |
-
base_path: /web
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
##
|
| 21 |
|
| 22 |
-
|
| 23 |
|
| 24 |
-
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
| easy | Easy | Identify syntax/runtime errors |
|
| 29 |
-
| medium | Medium | Identify logic bugs in code that runs but produces wrong output |
|
| 30 |
-
| hard | Hard | Identify security vulnerabilities |
|
| 31 |
|
| 32 |
-
#
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
-
##
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|-------|------|-------------|
|
| 45 |
-
| code_snippet | str | Python code to review |
|
| 46 |
-
| task_description | str | What the agent is asked to do |
|
| 47 |
-
| task_id | str | easy, medium, or hard |
|
| 48 |
-
| attempt_number | int | Steps taken so far |
|
| 49 |
-
| previous_feedback | str | Feedback from last step |
|
| 50 |
-
| done | bool | Whether episode is complete |
|
| 51 |
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
- **+0.5** quality explanation (key concepts present)
|
| 57 |
-
- **-0.3** wrong bug category confidently stated
|
| 58 |
-
- **-0.1** per retry after first attempt
|
| 59 |
-
- Normalized to 0.0–1.0 range
|
| 60 |
|
| 61 |
-
#
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|------|-------|
|
| 65 |
-
| easy | 1.0 |
|
| 66 |
-
| medium | 1.0 |
|
| 67 |
-
| hard | 1.0 |
|
| 68 |
-
| **average** | **1.0** |
|
| 69 |
|
| 70 |
-
|
| 71 |
|
| 72 |
```bash
|
| 73 |
-
|
| 74 |
-
uvicorn server.app:app --host 0.0.0.0 --port 8000
|
| 75 |
```
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Coding Environment Server
|
| 3 |
+
emoji: 💻
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
tags:
|
| 11 |
- openenv
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Coding Environment
|
| 15 |
+
|
| 16 |
+
A Python code execution environment that runs arbitrary Python code and returns results. Perfect for testing code execution infrastructure and demonstrating environment usage patterns.
|
| 17 |
+
|
| 18 |
+
## Quick Start
|
| 19 |
+
|
| 20 |
+
The simplest way to use the Coding environment is through the `CodingEnv` class. The client is **async by default**:
|
| 21 |
+
|
| 22 |
+
```python
|
| 23 |
+
import asyncio
|
| 24 |
+
from coding_env import CodeAction, CodingEnv
|
| 25 |
+
|
| 26 |
+
async def main():
|
| 27 |
+
# Create environment from Docker image
|
| 28 |
+
client = await CodingEnv.from_docker_image("coding-env:latest")
|
| 29 |
+
|
| 30 |
+
async with client:
|
| 31 |
+
# Reset
|
| 32 |
+
result = await client.reset()
|
| 33 |
+
print(f"Reset complete: exit_code={result.observation.exit_code}")
|
| 34 |
+
|
| 35 |
+
# Execute Python code
|
| 36 |
+
code_samples = [
|
| 37 |
+
"print('Hello, World!')",
|
| 38 |
+
"x = 5 + 3\nprint(f'Result: {x}')",
|
| 39 |
+
"import math\nprint(math.pi)"
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
for code in code_samples:
|
| 43 |
+
result = await client.step(CodeAction(code=code))
|
| 44 |
+
print(f"Code: {code}")
|
| 45 |
+
print(f" → stdout: {result.observation.stdout.strip()}")
|
| 46 |
+
print(f" → exit_code: {result.observation.exit_code}")
|
| 47 |
+
|
| 48 |
+
asyncio.run(main())
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
For **synchronous usage**, use the `.sync()` wrapper:
|
| 52 |
+
|
| 53 |
+
```python
|
| 54 |
+
from coding_env import CodeAction, CodingEnv
|
| 55 |
+
|
| 56 |
+
with CodingEnv(base_url="http://localhost:8000").sync() as client:
|
| 57 |
+
result = client.reset()
|
| 58 |
+
result = client.step(CodeAction(code="print('Hello!')"))
|
| 59 |
+
print(result.observation.stdout)
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
The `CodingEnv.from_docker_image()` method handles:
|
| 63 |
+
- Starting the Docker container
|
| 64 |
+
- Waiting for the server to be ready
|
| 65 |
+
- Connecting to the environment
|
| 66 |
+
- Container cleanup when the context manager exits
|
| 67 |
+
|
| 68 |
+
## Building the Docker Image
|
| 69 |
+
|
| 70 |
+
Before using the environment, you need to build the Docker image:
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
# From project root
|
| 74 |
+
docker build -t coding-env:latest -f envs/coding_env/server/Dockerfile .
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
## Environment Details
|
| 78 |
+
|
| 79 |
+
### Action
|
| 80 |
+
**CodeAction**: Contains a single field
|
| 81 |
+
- `code` (str) - The Python code to execute
|
| 82 |
|
| 83 |
+
### Observation
|
| 84 |
+
**CodeObservation**: Contains the execution results
|
| 85 |
+
- `stdout` (str) - Standard output from code execution
|
| 86 |
+
- `stderr` (str) - Standard error from code execution
|
| 87 |
+
- `exit_code` (int) - Exit code (0 for success, non-zero for errors)
|
| 88 |
|
| 89 |
+
### State
|
| 90 |
+
**CodeState**: Tracks execution state
|
| 91 |
+
- `episode_id` (str) - Unique identifier for the episode
|
| 92 |
+
- `step_count` (int) - Number of steps taken
|
| 93 |
+
- `last_exit_code` (int) - Exit code from the last execution
|
| 94 |
|
| 95 |
+
## Advanced Usage
|
| 96 |
|
| 97 |
+
### Connecting to an Existing Server
|
| 98 |
|
| 99 |
+
If you already have a Coding environment server running, you can connect directly:
|
| 100 |
|
| 101 |
+
```python
|
| 102 |
+
from coding_env import CodeAction, CodingEnv
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
# Async usage
|
| 105 |
+
async with CodingEnv(base_url="http://localhost:8000") as client:
|
| 106 |
+
result = await client.reset()
|
| 107 |
+
result = await client.step(CodeAction(code="print('Hello!')"))
|
| 108 |
|
| 109 |
+
# Sync usage
|
| 110 |
+
with CodingEnv(base_url="http://localhost:8000").sync() as client:
|
| 111 |
+
result = client.reset()
|
| 112 |
+
result = client.step(CodeAction(code="print('Hello!')"))
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
Note: When connecting to an existing server, closing the client will NOT stop the server.
|
| 116 |
|
| 117 |
+
## Development & Testing
|
| 118 |
|
| 119 |
+
### Running Tests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
Install the coding_env package with dev dependencies and run the tests from the repo root:
|
| 122 |
+
|
| 123 |
+
```bash
|
| 124 |
+
# Install coding_env with dev dependencies (includes smolagents and pytest)
|
| 125 |
+
uv pip install -e "envs/coding_env[dev]"
|
| 126 |
|
| 127 |
+
# Run unit tests (no Docker required)
|
| 128 |
+
uv run pytest tests/envs/test_python_codeact_reset.py tests/envs/test_python_codeact_rewards.py -v
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
# Run integration tests (requires Docker image to be built)
|
| 131 |
+
docker build -t coding-env:latest -f envs/coding_env/server/Dockerfile .
|
| 132 |
+
SKIP_DOCKER_TESTS=0 uv run pytest tests/envs/test_coding_env_integration.py -v
|
| 133 |
+
```
|
| 134 |
|
| 135 |
+
### Running the Full Example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
Run the complete example that demonstrates the full workflow:
|
| 138 |
|
| 139 |
```bash
|
| 140 |
+
python3 envs/coding_env/client/example_usage.py
|
|
|
|
| 141 |
```
|
| 142 |
|
| 143 |
+
This example shows:
|
| 144 |
+
- Creating an environment from a Docker image
|
| 145 |
+
- Resetting and executing code through the environment
|
| 146 |
+
- Automatic cleanup with `close()`
|
| 147 |
+
|
| 148 |
+
## Project Structure
|
| 149 |
|
| 150 |
+
```
|
| 151 |
+
coding_env/
|
| 152 |
+
├── README.md # This file
|
| 153 |
+
├── models.py # Action, Observation, and State models
|
| 154 |
+
├── client/
|
| 155 |
+
│ ├── coding_env_client.py # CodingEnv client implementation
|
| 156 |
+
│ └── example_usage.py # Usage examples
|
| 157 |
+
└── server/
|
| 158 |
+
├── python_codeact_env.py # Core environment logic
|
| 159 |
+
├── app.py # FastAPI application
|
| 160 |
+
├── transforms.py # Observation transforms
|
| 161 |
+
├── Dockerfile # Container image definition
|
| 162 |
+
└── README.md # Server-specific documentation
|
| 163 |
+
```
|
__init__.py
CHANGED
|
@@ -4,13 +4,9 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""
|
| 8 |
|
| 9 |
-
from .client import
|
| 10 |
-
from .models import
|
| 11 |
|
| 12 |
-
__all__ = [
|
| 13 |
-
"CodeReviewAction",
|
| 14 |
-
"CodeReviewObservation",
|
| 15 |
-
"CodeReviewEnv",
|
| 16 |
-
]
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Coding Environment - A Python code execution environment."""
|
| 8 |
|
| 9 |
+
from .client import CodingEnv
|
| 10 |
+
from .models import CodeAction, CodeObservation, CodeState
|
| 11 |
|
| 12 |
+
__all__ = ["CodingEnv", "CodeAction", "CodeObservation", "CodeState"]
|
|
|
|
|
|
|
|
|
|
|
|
client.py
CHANGED
|
@@ -1,99 +1,56 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
from openenv.core.client_types import StepResult
|
| 13 |
-
from openenv.core.env_server.types import State
|
| 14 |
-
|
| 15 |
-
from .models import CodeReviewAction, CodeReviewObservation
|
| 16 |
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
-
EnvClient[CodeReviewAction, CodeReviewObservation, State]
|
| 20 |
-
):
|
| 21 |
-
"""
|
| 22 |
-
Client for the Code Review Env Environment.
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
Each client instance has its own dedicated environment session on the server.
|
| 27 |
|
| 28 |
-
|
| 29 |
-
>>> # Connect to a running server
|
| 30 |
-
>>> with CodeReviewEnv(base_url="http://localhost:8000") as client:
|
| 31 |
-
... result = client.reset()
|
| 32 |
-
... print(result.observation.echoed_message)
|
| 33 |
-
...
|
| 34 |
-
... result = client.step(CodeReviewAction(message="Hello!"))
|
| 35 |
-
... print(result.observation.echoed_message)
|
| 36 |
|
| 37 |
-
Example with Docker:
|
| 38 |
-
>>> # Automatically start container and connect
|
| 39 |
-
>>> client = CodeReviewEnv.from_docker_image("code_review_env-env:latest")
|
| 40 |
-
>>> try:
|
| 41 |
-
... result = client.reset()
|
| 42 |
-
... result = client.step(CodeReviewAction(message="Test"))
|
| 43 |
-
... finally:
|
| 44 |
-
... client.close()
|
| 45 |
-
"""
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
Convert CodeReviewAction to JSON payload for step message.
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
Returns:
|
| 55 |
-
Dictionary representation suitable for JSON encoding
|
| 56 |
-
"""
|
| 57 |
return {
|
| 58 |
-
"
|
| 59 |
}
|
| 60 |
|
| 61 |
-
def _parse_result(self, payload:
|
| 62 |
-
"""
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
Args:
|
| 66 |
-
payload: JSON response data from server
|
| 67 |
-
|
| 68 |
-
Returns:
|
| 69 |
-
StepResult with CodeReviewObservation
|
| 70 |
-
"""
|
| 71 |
-
obs_data = payload.get("observation", {})
|
| 72 |
-
observation = CodeReviewObservation(
|
| 73 |
-
echoed_message=obs_data.get("echoed_message", ""),
|
| 74 |
-
message_length=obs_data.get("message_length", 0),
|
| 75 |
-
done=payload.get("done", False),
|
| 76 |
-
reward=payload.get("reward"),
|
| 77 |
-
metadata=obs_data.get("metadata", {}),
|
| 78 |
-
)
|
| 79 |
-
|
| 80 |
return StepResult(
|
| 81 |
-
observation=
|
| 82 |
reward=payload.get("reward"),
|
| 83 |
-
done=payload.get("done", False),
|
| 84 |
)
|
| 85 |
|
| 86 |
-
def _parse_state(self, payload:
|
| 87 |
"""
|
| 88 |
-
Parse server response into
|
| 89 |
|
| 90 |
Args:
|
| 91 |
-
payload: JSON response from state
|
| 92 |
|
| 93 |
Returns:
|
| 94 |
-
|
| 95 |
"""
|
| 96 |
-
return
|
| 97 |
episode_id=payload.get("episode_id"),
|
| 98 |
step_count=payload.get("step_count", 0),
|
|
|
|
| 99 |
)
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CodingEnv
|
| 3 |
+
---------
|
| 4 |
+
Client-side wrapper for the Coding environment server.
|
|
|
|
| 5 |
|
| 6 |
+
This client maintains a persistent WebSocket connection to the environment
|
| 7 |
+
server, enabling efficient multi-step interactions with lower latency.
|
| 8 |
|
| 9 |
+
- users instantiate CodingEnv with a base_url provided by the higher-level
|
| 10 |
+
vector/orchestration layer.
|
| 11 |
+
- Environment authors ship the Docker image that serves the API.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
(Seeds, episode IDs, request IDs, capabilities can be added later in the payloads.)
|
| 14 |
+
"""
|
| 15 |
|
| 16 |
+
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
from openenv.core.client_types import StepResult
|
| 19 |
+
from openenv.core.env_client import EnvClient
|
|
|
|
| 20 |
|
| 21 |
+
from .models import CodeAction, CodeObservation, CodeState
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
class CodingEnv(EnvClient[CodeAction, CodeObservation, CodeState]):
|
| 25 |
+
# --- HTTPEnvClient abstract hooks ---
|
|
|
|
| 26 |
|
| 27 |
+
def _step_payload(self, action: CodeAction) -> dict:
|
| 28 |
+
# Shape expected by the server's /step endpoint under "action"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
return {
|
| 30 |
+
"code": action.code,
|
| 31 |
}
|
| 32 |
|
| 33 |
+
def _parse_result(self, payload: dict) -> StepResult[CodeObservation]:
|
| 34 |
+
# Expecting: { "observation": {...}, "reward": <float|null>, "done": <bool>, "info": {...} }
|
| 35 |
+
obs = CodeObservation(**payload["observation"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
return StepResult(
|
| 37 |
+
observation=obs,
|
| 38 |
reward=payload.get("reward"),
|
| 39 |
+
done=bool(payload.get("done", False)),
|
| 40 |
)
|
| 41 |
|
| 42 |
+
def _parse_state(self, payload: dict) -> CodeState:
|
| 43 |
"""
|
| 44 |
+
Parse server response into CodeState object.
|
| 45 |
|
| 46 |
Args:
|
| 47 |
+
payload: JSON response from /state endpoint
|
| 48 |
|
| 49 |
Returns:
|
| 50 |
+
CodeState object with episode_id, step_count, and last_exit_code
|
| 51 |
"""
|
| 52 |
+
return CodeState(
|
| 53 |
episode_id=payload.get("episode_id"),
|
| 54 |
step_count=payload.get("step_count", 0),
|
| 55 |
+
last_exit_code=payload.get("last_exit_code", 0),
|
| 56 |
)
|
models.py
CHANGED
|
@@ -1,46 +1,34 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
"""
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
from __future__ import annotations
|
| 13 |
-
|
| 14 |
from openenv.core.env_server.interfaces import Action, Observation, State
|
| 15 |
|
| 16 |
|
| 17 |
-
class
|
| 18 |
-
"""
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
class
|
| 37 |
-
"""
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
correct_bug_type: str = ""
|
| 41 |
-
correct_line_number: int = -1
|
| 42 |
-
correct_keywords: list = []
|
| 43 |
-
step_count: int = 0
|
| 44 |
-
task_episode_id: str = ""
|
| 45 |
-
cumulative_reward: float = 0.0
|
| 46 |
-
total_snippets: int = 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
envs/coding_env/models.py
|
| 3 |
+
--------------------------------
|
| 4 |
+
Action/Observation types for the Coding environment.
|
| 5 |
"""
|
| 6 |
|
| 7 |
from __future__ import annotations
|
| 8 |
+
|
| 9 |
from openenv.core.env_server.interfaces import Action, Observation, State
|
| 10 |
|
| 11 |
|
| 12 |
+
class CodeAction(Action):
|
| 13 |
+
"""
|
| 14 |
+
Represents a single code execution request.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
code: str
|
| 18 |
+
# Optional: future fields like 'lint': bool, 'timeout_s': float, etc.
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class CodeObservation(Observation):
|
| 22 |
+
"""
|
| 23 |
+
Result of executing code in the environment.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
stdout: str = ""
|
| 27 |
+
stderr: str = ""
|
| 28 |
+
exit_code: int = 0
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class CodeState(State):
|
| 32 |
+
"""State for CodeAct environment with persistent execution context."""
|
| 33 |
+
|
| 34 |
+
last_exit_code: int = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openenv.yaml
CHANGED
|
@@ -1,9 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
port: 8000
|
| 7 |
-
version: "1.0.0"
|
| 8 |
-
description: "AI agent environment for Python code review across syntax, logic, and security bug detection"
|
| 9 |
-
|
|
|
|
| 1 |
+
name: coding_env
|
| 2 |
+
version: "0.1.0"
|
| 3 |
+
description: "Coding environment for OpenEnv"
|
| 4 |
+
action: CodeAction
|
| 5 |
+
observation: CodeObservation
|
|
|
|
|
|
|
|
|
|
|
|
pyproject.toml
CHANGED
|
@@ -1,45 +1,35 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
[build-system]
|
| 8 |
requires = ["setuptools>=45", "wheel"]
|
| 9 |
build-backend = "setuptools.build_meta"
|
| 10 |
|
| 11 |
[project]
|
| 12 |
-
name = "openenv-
|
| 13 |
version = "0.1.0"
|
| 14 |
-
description = "
|
| 15 |
requires-python = ">=3.10"
|
| 16 |
dependencies = [
|
| 17 |
-
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
-
# install from github
|
| 19 |
-
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
"openenv-core[core]>=0.2.2",
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
# "gymnasium>=0.29.0",
|
| 27 |
-
# "openspiel>=1.0.0",
|
| 28 |
-
# "smolagents>=1.22.0,<2",
|
| 29 |
]
|
| 30 |
|
| 31 |
[project.optional-dependencies]
|
| 32 |
dev = [
|
| 33 |
"pytest>=8.0.0",
|
| 34 |
"pytest-cov>=4.0.0",
|
|
|
|
| 35 |
]
|
| 36 |
|
| 37 |
[project.scripts]
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
server = "code_review_env.server.app:main"
|
| 41 |
|
| 42 |
[tool.setuptools]
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
[build-system]
|
| 2 |
requires = ["setuptools>=45", "wheel"]
|
| 3 |
build-backend = "setuptools.build_meta"
|
| 4 |
|
| 5 |
[project]
|
| 6 |
+
name = "openenv-coding_env"
|
| 7 |
version = "0.1.0"
|
| 8 |
+
description = "Coding Environment for OpenEnv"
|
| 9 |
requires-python = ">=3.10"
|
| 10 |
dependencies = [
|
|
|
|
|
|
|
|
|
|
| 11 |
"openenv-core[core]>=0.2.2",
|
| 12 |
+
"fastapi>=0.115.0",
|
| 13 |
+
"pydantic>=2.0.0",
|
| 14 |
+
"uvicorn[standard]>=0.24.0",
|
| 15 |
+
"requests>=2.31.0",
|
| 16 |
+
"smolagents>=1.22.0,<2",
|
|
|
|
|
|
|
|
|
|
| 17 |
]
|
| 18 |
|
| 19 |
[project.optional-dependencies]
|
| 20 |
dev = [
|
| 21 |
"pytest>=8.0.0",
|
| 22 |
"pytest-cov>=4.0.0",
|
| 23 |
+
"ipykernel>=6.29.5",
|
| 24 |
]
|
| 25 |
|
| 26 |
[project.scripts]
|
| 27 |
+
server = "coding_env.server.app:main"
|
| 28 |
+
|
|
|
|
| 29 |
|
| 30 |
[tool.setuptools]
|
| 31 |
+
packages = ["coding_env", "coding_env.server"]
|
| 32 |
+
package-dir = { "coding_env" = ".", "coding_env.server" = "server" }
|
| 33 |
+
|
| 34 |
+
[tool.setuptools.package-data]
|
| 35 |
+
coding_env = ["**/*.yaml", "**/*.yml"]
|
server/Dockerfile.backup
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Use the standard openenv base image
|
| 8 |
+
# Built from: docker build -t openenv-base:latest -f src/openenv/core/containers/images/Dockerfile .
|
| 9 |
+
# In GitHub Actions, this is overridden to use the GHCR base image
|
| 10 |
+
ARG BASE_IMAGE=openenv-base:latest
|
| 11 |
+
FROM ${BASE_IMAGE}
|
| 12 |
+
|
| 13 |
+
# Copy only what's needed for this environment
|
| 14 |
+
COPY src/core/ /app/src/core/
|
| 15 |
+
COPY envs/coding_env/ /app/envs/coding_env/
|
| 16 |
+
|
| 17 |
+
# Copy README for web interface documentation
|
| 18 |
+
COPY envs/coding_env/README.md /app/README.md
|
| 19 |
+
|
| 20 |
+
# Health check
|
| 21 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 22 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 23 |
+
|
| 24 |
+
# Run the FastAPI server
|
| 25 |
+
CMD ["uvicorn", "envs.coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
server/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CodingEnv HTTP Server
|
| 2 |
+
|
| 3 |
+
This directory contains the HTTP server implementation for the CodingEnvironment.
|
| 4 |
+
|
| 5 |
+
## Running Locally
|
| 6 |
+
|
| 7 |
+
### Prerequisites
|
| 8 |
+
```bash
|
| 9 |
+
pip install fastapi uvicorn
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
### Start the server
|
| 13 |
+
```bash
|
| 14 |
+
# From the project root (/Users/pankit/git/envtorch)
|
| 15 |
+
cd src
|
| 16 |
+
uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
The server will be available at `http://localhost:8000`
|
| 20 |
+
|
| 21 |
+
### API Endpoints
|
| 22 |
+
|
| 23 |
+
- `POST /reset` - Reset the environment
|
| 24 |
+
- `POST /step` - Execute a code action
|
| 25 |
+
- `GET /state` - Get current environment state
|
| 26 |
+
- `GET /health` - Health check
|
| 27 |
+
|
| 28 |
+
### Test with curl
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
# Health check
|
| 32 |
+
curl http://localhost:8000/health
|
| 33 |
+
|
| 34 |
+
# Reset
|
| 35 |
+
curl -X POST http://localhost:8000/reset \
|
| 36 |
+
-H "Content-Type: application/json" \
|
| 37 |
+
-d '{}'
|
| 38 |
+
|
| 39 |
+
# Execute code
|
| 40 |
+
curl -X POST http://localhost:8000/step \
|
| 41 |
+
-H "Content-Type: application/json" \
|
| 42 |
+
-d '{
|
| 43 |
+
"action": {
|
| 44 |
+
"code": "print(\"Hello from HTTP!\")"
|
| 45 |
+
},
|
| 46 |
+
"timeout_s": 15
|
| 47 |
+
}'
|
| 48 |
+
|
| 49 |
+
# Get state
|
| 50 |
+
curl http://localhost:8000/state
|
| 51 |
+
```
|
server/__init__.py
CHANGED
|
@@ -4,8 +4,8 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""
|
| 8 |
|
| 9 |
-
from .
|
| 10 |
|
| 11 |
-
__all__ = ["
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Coding environment server components."""
|
| 8 |
|
| 9 |
+
from .python_codeact_env import PythonCodeActEnv
|
| 10 |
|
| 11 |
+
__all__ = ["PythonCodeActEnv"]
|
server/app.py
CHANGED
|
@@ -5,181 +5,43 @@
|
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
"""
|
| 8 |
-
FastAPI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
-
from models import
|
| 12 |
-
from server.
|
| 13 |
from openenv.core.env_server import create_app
|
| 14 |
-
from fastapi import FastAPI, Query
|
| 15 |
-
from fastapi.routing import APIRouter
|
| 16 |
|
| 17 |
-
app
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
ReviewObservation,
|
| 21 |
-
env_name="code_review_env",
|
| 22 |
-
)
|
| 23 |
|
| 24 |
-
@app.get("/tasks")
|
| 25 |
-
def list_tasks():
|
| 26 |
-
return {
|
| 27 |
-
"tasks": [
|
| 28 |
-
{
|
| 29 |
-
"task_id": "easy",
|
| 30 |
-
"description": "Identify syntax/runtime errors in Python code",
|
| 31 |
-
"difficulty": "easy",
|
| 32 |
-
"action_schema": {
|
| 33 |
-
"review": "string - your analysis",
|
| 34 |
-
"bug_type": "string - syntax | logic | security | none",
|
| 35 |
-
"line_number": "int - line with the bug, -1 if unknown",
|
| 36 |
-
"confidence": "float - your confidence 0.0 to 1.0"
|
| 37 |
-
},
|
| 38 |
-
"example_action": {
|
| 39 |
-
"review": "Line 1 is missing a colon after the function definition. This is a syntax error.",
|
| 40 |
-
"bug_type": "syntax",
|
| 41 |
-
"line_number": 1,
|
| 42 |
-
"confidence": 0.95
|
| 43 |
-
}
|
| 44 |
-
},
|
| 45 |
-
{
|
| 46 |
-
"task_id": "medium",
|
| 47 |
-
"description": "Identify logic bugs in code that runs but produces wrong output",
|
| 48 |
-
"difficulty": "medium",
|
| 49 |
-
"action_schema": {
|
| 50 |
-
"review": "string - your analysis",
|
| 51 |
-
"bug_type": "string - syntax | logic | security | none",
|
| 52 |
-
"line_number": "int - line with the bug, -1 if unknown",
|
| 53 |
-
"confidence": "float - your confidence 0.0 to 1.0"
|
| 54 |
-
},
|
| 55 |
-
"example_action": {
|
| 56 |
-
"review": "Line 5 has an index error: it should be max_val = numbers[i], not numbers[i - 1]. This is a logic bug.",
|
| 57 |
-
"bug_type": "logic",
|
| 58 |
-
"line_number": 5,
|
| 59 |
-
"confidence": 0.95
|
| 60 |
-
}
|
| 61 |
-
},
|
| 62 |
-
{
|
| 63 |
-
"task_id": "hard",
|
| 64 |
-
"description": "Identify security vulnerabilities in Python code",
|
| 65 |
-
"difficulty": "hard",
|
| 66 |
-
"action_schema": {
|
| 67 |
-
"review": "string - your analysis",
|
| 68 |
-
"bug_type": "string - syntax | logic | security | none",
|
| 69 |
-
"line_number": "int - line with the bug, -1 if unknown",
|
| 70 |
-
"confidence": "float - your confidence 0.0 to 1.0"
|
| 71 |
-
},
|
| 72 |
-
"example_action": {
|
| 73 |
-
"review": "Line 6 has a SQL injection vulnerability because the username is concatenated directly into the query without parameterized statements.",
|
| 74 |
-
"bug_type": "security",
|
| 75 |
-
"line_number": 6,
|
| 76 |
-
"confidence": 0.95
|
| 77 |
-
}
|
| 78 |
-
}
|
| 79 |
-
]
|
| 80 |
-
}
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
"""
|
| 85 |
-
Returns information about the Code Review Environment.
|
| 86 |
-
Returns: environment name, version, description, number of tasks, and supported difficulty levels
|
| 87 |
-
"""
|
| 88 |
-
return {
|
| 89 |
-
"name": "code_review_env",
|
| 90 |
-
"version": "1.0.0",
|
| 91 |
-
"description": "AI agent environment for Python code review across syntax, logic, and security bug detection",
|
| 92 |
-
"num_tasks": 3,
|
| 93 |
-
"difficulty_levels": ["easy", "medium", "hard"]
|
| 94 |
-
}
|
| 95 |
|
| 96 |
-
|
| 97 |
-
def grader(task_id: str = Query("easy"), episode_id: str = Query(None)):
|
| 98 |
-
"""
|
| 99 |
-
Run a single task with a perfect answer.
|
| 100 |
-
Query params: task_id (str), episode_id (str, optional)
|
| 101 |
-
Returns: {"task_id": str, "score": float, "feedback": str}
|
| 102 |
-
"""
|
| 103 |
-
env = CodeReviewEnvironment()
|
| 104 |
-
env.reset(task_id)
|
| 105 |
-
|
| 106 |
-
# Create perfect answer based on task_id
|
| 107 |
-
if task_id == "easy":
|
| 108 |
-
action = ReviewAction(
|
| 109 |
-
review="Line 1 is missing a colon after the function definition. This is a syntax error.",
|
| 110 |
-
bug_type="syntax",
|
| 111 |
-
line_number=1,
|
| 112 |
-
confidence=0.95
|
| 113 |
-
)
|
| 114 |
-
elif task_id == "medium":
|
| 115 |
-
action = ReviewAction(
|
| 116 |
-
review="Line 5 has an index error: it should be max_val = numbers[i], not numbers[i - 1]. This is a logic bug.",
|
| 117 |
-
bug_type="logic",
|
| 118 |
-
line_number=5,
|
| 119 |
-
confidence=0.95
|
| 120 |
-
)
|
| 121 |
-
else: # hard
|
| 122 |
-
action = ReviewAction(
|
| 123 |
-
review="Line 6 has a SQL injection vulnerability because the username is concatenated directly into the query without parameterized statements.",
|
| 124 |
-
bug_type="security",
|
| 125 |
-
line_number=6,
|
| 126 |
-
confidence=0.95
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
obs = env.step(action)
|
| 130 |
-
return {
|
| 131 |
-
"task_id": task_id,
|
| 132 |
-
"score": env.state.cumulative_reward,
|
| 133 |
-
"feedback": obs.previous_feedback
|
| 134 |
-
}
|
| 135 |
|
| 136 |
-
@app.get("/baseline")
|
| 137 |
-
def baseline():
|
| 138 |
-
"""
|
| 139 |
-
Run all 3 tasks (easy, medium, hard) with perfect hardcoded answers.
|
| 140 |
-
Returns: {"scores": {"easy": float, "medium": float, "hard": float}, "average": float}
|
| 141 |
-
"""
|
| 142 |
-
scores = {}
|
| 143 |
-
|
| 144 |
-
for task_id in ["easy", "medium", "hard"]:
|
| 145 |
-
env = CodeReviewEnvironment()
|
| 146 |
-
env.reset(task_id)
|
| 147 |
-
|
| 148 |
-
# Create perfect answer based on task_id
|
| 149 |
-
if task_id == "easy":
|
| 150 |
-
action = ReviewAction(
|
| 151 |
-
review="Line 1 is missing a colon after the function definition. This is a syntax error.",
|
| 152 |
-
bug_type="syntax",
|
| 153 |
-
line_number=1,
|
| 154 |
-
confidence=0.95
|
| 155 |
-
)
|
| 156 |
-
elif task_id == "medium":
|
| 157 |
-
action = ReviewAction(
|
| 158 |
-
review="Line 5 has an index error: it should be max_val = numbers[i], not numbers[i - 1]. This is a logic bug.",
|
| 159 |
-
bug_type="logic",
|
| 160 |
-
line_number=5,
|
| 161 |
-
confidence=0.95
|
| 162 |
-
)
|
| 163 |
-
else: # hard
|
| 164 |
-
action = ReviewAction(
|
| 165 |
-
review="Line 6 has a SQL injection vulnerability because the username is concatenated directly into the query without parameterized statements.",
|
| 166 |
-
bug_type="security",
|
| 167 |
-
line_number=6,
|
| 168 |
-
confidence=0.95
|
| 169 |
-
)
|
| 170 |
-
|
| 171 |
-
obs = env.step(action)
|
| 172 |
-
scores[task_id] = env.state.cumulative_reward
|
| 173 |
-
|
| 174 |
-
average = sum(scores.values()) / len(scores)
|
| 175 |
-
return {
|
| 176 |
-
"scores": scores,
|
| 177 |
-
"average": round(average, 4)
|
| 178 |
-
}
|
| 179 |
|
| 180 |
def main():
|
|
|
|
| 181 |
import uvicorn
|
|
|
|
| 182 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 183 |
|
|
|
|
| 184 |
if __name__ == "__main__":
|
| 185 |
main()
|
|
|
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
"""
|
| 8 |
+
FastAPI application for the Coding Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the PythonCodeActEnv
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
# Development (with auto-reload):
|
| 15 |
+
uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
|
| 16 |
+
|
| 17 |
+
# Production:
|
| 18 |
+
uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 19 |
+
|
| 20 |
+
# Or run directly:
|
| 21 |
+
python -m envs.coding_env.server.app
|
| 22 |
"""
|
| 23 |
|
| 24 |
+
from coding_env.models import CodeAction, CodeObservation
|
| 25 |
+
from coding_env.server.python_codeact_env import PythonCodeActEnv
|
| 26 |
from openenv.core.env_server import create_app
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# Create the app with web interface and README integration
|
| 29 |
+
# Pass the class (factory) instead of an instance for WebSocket session support
|
| 30 |
+
app = create_app(PythonCodeActEnv, CodeAction, CodeObservation, env_name="coding_env")
|
|
|
|
|
|
|
|
|
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
import uvicorn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
def main():
|
| 40 |
+
"""Main entry point for running the server."""
|
| 41 |
import uvicorn
|
| 42 |
+
|
| 43 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 44 |
|
| 45 |
+
|
| 46 |
if __name__ == "__main__":
|
| 47 |
main()
|
server/python_codeact_env.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Python Code Action Environment.
|
| 9 |
+
|
| 10 |
+
This module provides a server-side environment implementation for executing
|
| 11 |
+
Python code actions using PyExecutor.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import uuid
|
| 15 |
+
|
| 16 |
+
from openenv.core.env_server.interfaces import Action, Environment, Observation
|
| 17 |
+
|
| 18 |
+
from ..models import CodeAction, CodeObservation, CodeState
|
| 19 |
+
from .python_executor import PyExecutor
|
| 20 |
+
from .transforms import create_safe_coding_transform
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class PythonCodeActEnv(Environment):
|
| 24 |
+
"""
|
| 25 |
+
Python Code Action Environment for executing code and tracking state.
|
| 26 |
+
|
| 27 |
+
This environment executes Python code submitted as CodeAction during step,
|
| 28 |
+
maintains the last exit code in its state, and returns results wrapped
|
| 29 |
+
in CodeObservation.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
transform: Optional transform to apply to observations
|
| 33 |
+
additional_imports: List of additional module imports to authorize
|
| 34 |
+
(e.g., ["numpy", "pandas", "matplotlib"])
|
| 35 |
+
|
| 36 |
+
Example:
|
| 37 |
+
>>> env = PythonCodeActEnv()
|
| 38 |
+
>>> obs = env.reset()
|
| 39 |
+
>>> action = CodeAction(code="print('Hello, World!')")
|
| 40 |
+
>>> obs = env.step(action)
|
| 41 |
+
>>> print(obs.stdout) # "Hello, World!\n"
|
| 42 |
+
>>> print(obs.exit_code) # 0
|
| 43 |
+
>>> print(env.state.last_exit_code) # 0
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
def __init__(
|
| 47 |
+
self,
|
| 48 |
+
):
|
| 49 |
+
self.transform = create_safe_coding_transform()
|
| 50 |
+
self._executor = PyExecutor()
|
| 51 |
+
self._state = CodeState()
|
| 52 |
+
|
| 53 |
+
def reset(self) -> Observation:
|
| 54 |
+
"""
|
| 55 |
+
Reset environment and start fresh execution session.
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
Initial observation with empty stdout/stderr and exit_code=0
|
| 59 |
+
"""
|
| 60 |
+
# Initialize fresh state
|
| 61 |
+
self._state = CodeState(episode_id=str(uuid.uuid4()), step_count=0)
|
| 62 |
+
# Add last_exit_code to state
|
| 63 |
+
self._state.last_exit_code = 0
|
| 64 |
+
|
| 65 |
+
# Reset executor to clear any previously defined variables/functions
|
| 66 |
+
self._executor = PyExecutor()
|
| 67 |
+
|
| 68 |
+
# Reset transform to clear any accumulated state
|
| 69 |
+
self.transform = create_safe_coding_transform()
|
| 70 |
+
|
| 71 |
+
# Return initial observation
|
| 72 |
+
observation = CodeObservation(
|
| 73 |
+
stdout="",
|
| 74 |
+
stderr="",
|
| 75 |
+
exit_code=0,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return self._apply_transform(observation)
|
| 79 |
+
|
| 80 |
+
def step(self, action: Action) -> Observation:
|
| 81 |
+
"""
|
| 82 |
+
Execute code action and return observation.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
action: CodeAction containing the code to execute
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
CodeObservation with execution results (stdout, stderr, exit_code)
|
| 89 |
+
|
| 90 |
+
Raises:
|
| 91 |
+
ValueError: If action is not a CodeAction instance
|
| 92 |
+
"""
|
| 93 |
+
if not isinstance(action, CodeAction):
|
| 94 |
+
raise ValueError(f"Expected CodeAction, got {type(action)}")
|
| 95 |
+
|
| 96 |
+
# Execute the code using PyExecutor
|
| 97 |
+
result = self._executor.run(action.code)
|
| 98 |
+
|
| 99 |
+
# Update state
|
| 100 |
+
self._state.step_count += 1
|
| 101 |
+
self._state.last_exit_code = result.exit_code
|
| 102 |
+
|
| 103 |
+
# Create observation from execution result
|
| 104 |
+
# Include code in metadata for transform reward calculation
|
| 105 |
+
observation = CodeObservation(
|
| 106 |
+
stdout=result.stdout,
|
| 107 |
+
stderr=result.stderr,
|
| 108 |
+
exit_code=result.exit_code,
|
| 109 |
+
metadata={"last_code": action.code},
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
return self._apply_transform(observation)
|
| 113 |
+
|
| 114 |
+
@property
|
| 115 |
+
def state(self) -> CodeState:
|
| 116 |
+
"""Get current environment state including last exit code."""
|
| 117 |
+
return self._state
|
server/python_executor.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Local Python Executor (enhanced).
|
| 8 |
+
|
| 9 |
+
This module provides a safer wrapper around smolagents.LocalPythonExecutor
|
| 10 |
+
with improved exception handling and a few helpful tools registered with
|
| 11 |
+
the executor to make debugging executed code easier.
|
| 12 |
+
|
| 13 |
+
Key improvements:
|
| 14 |
+
- Register a few helper utilities via send_tools so user code can use
|
| 15 |
+
them for reporting (e.g. `format_exc`).
|
| 16 |
+
- More robust extraction of stdout/stderr/exit codes from the executor
|
| 17 |
+
result object, tolerant to different versions of smolagents.
|
| 18 |
+
- Detailed stderr on unexpected exceptions including full traceback.
|
| 19 |
+
- Structured logging for operational visibility.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import json
|
| 25 |
+
import logging
|
| 26 |
+
import traceback
|
| 27 |
+
|
| 28 |
+
from openenv.core.env_server.types import CodeExecResult
|
| 29 |
+
from smolagents import LocalPythonExecutor
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
logger.addHandler(logging.NullHandler())
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class PyExecutor:
|
| 36 |
+
"""Wrapper around smolagents LocalPythonExecutor.
|
| 37 |
+
|
| 38 |
+
The wrapper registers a few non-privileged helper tools to the
|
| 39 |
+
LocalPythonExecutor that can be used by the executed code to
|
| 40 |
+
format exceptions and to safely stringify results for improved
|
| 41 |
+
error reporting.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, additional_imports: list[str] | None = None):
|
| 45 |
+
if additional_imports is None:
|
| 46 |
+
additional_imports = []
|
| 47 |
+
|
| 48 |
+
self._executor = LocalPythonExecutor(
|
| 49 |
+
additional_authorized_imports=additional_imports
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Register helpful utilities exposed to the execution environment.
|
| 53 |
+
# These are intentionally small, read-only helpers.
|
| 54 |
+
tools = {
|
| 55 |
+
# Provide a small helper to format the current exception in the
|
| 56 |
+
# executed context. This is a *string formatting* helper only.
|
| 57 |
+
"format_exc": traceback.format_exc,
|
| 58 |
+
# Safe JSON dumps with a fallback for non-serializable objects.
|
| 59 |
+
"safe_json_dumps": lambda obj: json.dumps(obj, default=lambda o: repr(o)),
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# `send_tools` is the public API on LocalPythonExecutor to make
|
| 63 |
+
# helper callables available to the sandboxed runtime. We don't
|
| 64 |
+
# provide any builtins that could change the environment.
|
| 65 |
+
try:
|
| 66 |
+
self._executor.send_tools(tools)
|
| 67 |
+
except Exception:
|
| 68 |
+
# If the LocalPythonExecutor implementation doesn't support
|
| 69 |
+
# send_tools or fails, log and continue — the executor is still usable.
|
| 70 |
+
logger.debug(
|
| 71 |
+
"LocalPythonExecutor.send_tools failed; continuing without extra tools",
|
| 72 |
+
exc_info=True,
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def run(self, code: str) -> CodeExecResult:
|
| 76 |
+
"""Execute Python code and return a CodeExecResult.
|
| 77 |
+
|
| 78 |
+
This method is intentionally defensive: it attempts to extract
|
| 79 |
+
meaningful stdout/stderr/exit_code information from a variety of
|
| 80 |
+
possible return shapes that different versions of smolagents
|
| 81 |
+
may provide.
|
| 82 |
+
"""
|
| 83 |
+
try:
|
| 84 |
+
exec_result = self._executor(code)
|
| 85 |
+
|
| 86 |
+
# Default values
|
| 87 |
+
stdout_parts: list[str] = []
|
| 88 |
+
stderr_parts: list[str] = []
|
| 89 |
+
exit_code = 0
|
| 90 |
+
|
| 91 |
+
# Extract logs/prints
|
| 92 |
+
try:
|
| 93 |
+
logs = getattr(exec_result, "logs", None)
|
| 94 |
+
if logs:
|
| 95 |
+
stdout_parts.append(str(logs))
|
| 96 |
+
except Exception:
|
| 97 |
+
logger.debug("Failed to read exec_result.logs", exc_info=True)
|
| 98 |
+
|
| 99 |
+
# Extract the result / output value
|
| 100 |
+
try:
|
| 101 |
+
if hasattr(exec_result, "output"):
|
| 102 |
+
out_val = exec_result.output
|
| 103 |
+
# If the output is not None, stringify it in a safe way
|
| 104 |
+
if out_val is not None:
|
| 105 |
+
# Prefer JSON if possible, otherwise repr
|
| 106 |
+
try:
|
| 107 |
+
stdout_parts.append(json.dumps(out_val))
|
| 108 |
+
except Exception:
|
| 109 |
+
stdout_parts.append(repr(out_val))
|
| 110 |
+
except Exception:
|
| 111 |
+
logger.debug("Failed to read exec_result.output", exc_info=True)
|
| 112 |
+
|
| 113 |
+
# Some runtime implementations may put errors on `error` or `exception`
|
| 114 |
+
try:
|
| 115 |
+
err = getattr(exec_result, "error", None)
|
| 116 |
+
if err:
|
| 117 |
+
stderr_parts.append(str(err))
|
| 118 |
+
except Exception:
|
| 119 |
+
logger.debug("Failed to read exec_result.error", exc_info=True)
|
| 120 |
+
|
| 121 |
+
try:
|
| 122 |
+
ex = getattr(exec_result, "exception", None)
|
| 123 |
+
if ex:
|
| 124 |
+
stderr_parts.append(str(ex))
|
| 125 |
+
except Exception:
|
| 126 |
+
logger.debug("Failed to read exec_result.exception", exc_info=True)
|
| 127 |
+
|
| 128 |
+
# Determine exit code if provided
|
| 129 |
+
try:
|
| 130 |
+
if hasattr(exec_result, "exit_code"):
|
| 131 |
+
exit_code = (
|
| 132 |
+
int(exec_result.exit_code)
|
| 133 |
+
if exec_result.exit_code is not None
|
| 134 |
+
else 0
|
| 135 |
+
)
|
| 136 |
+
elif hasattr(exec_result, "success"):
|
| 137 |
+
# Some versions use `success` boolean
|
| 138 |
+
exit_code = 0 if exec_result.success else 1
|
| 139 |
+
else:
|
| 140 |
+
# Fallback: if there were any stderr parts, treat as non-zero
|
| 141 |
+
exit_code = 1 if stderr_parts else 0
|
| 142 |
+
except Exception:
|
| 143 |
+
logger.debug("Failed to determine exec_result exit code", exc_info=True)
|
| 144 |
+
exit_code = 1 if stderr_parts else 0
|
| 145 |
+
|
| 146 |
+
# Compose the final stdout/stderr strings
|
| 147 |
+
stdout = "\n".join(part for part in stdout_parts if part is not None)
|
| 148 |
+
stderr = "\n".join(part for part in stderr_parts if part is not None)
|
| 149 |
+
|
| 150 |
+
return CodeExecResult(stdout=stdout, stderr=stderr, exit_code=exit_code)
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
# Any unexpected exception from the LocalPythonExecutor is
|
| 154 |
+
# returned with a full traceback to make debugging easier.
|
| 155 |
+
tb = traceback.format_exc()
|
| 156 |
+
logger.exception("LocalPythonExecutor raised an exception during run")
|
| 157 |
+
return CodeExecResult(stdout="", stderr=tb, exit_code=1)
|
server/transforms.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Transforms specific to coding environments."""
|
| 8 |
+
|
| 9 |
+
import ast
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
from openenv.core.env_server.base_transforms import CompositeTransform
|
| 13 |
+
from openenv.core.env_server.interfaces import Transform
|
| 14 |
+
from openenv.core.env_server.types import Observation
|
| 15 |
+
|
| 16 |
+
from ..models import CodeObservation
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class CodeSafetyTransform(Transform):
|
| 20 |
+
"""Evaluates code safety and assigns penalties for dangerous patterns."""
|
| 21 |
+
|
| 22 |
+
def __init__(self, penalty: float = -1.0):
|
| 23 |
+
self.penalty = penalty
|
| 24 |
+
self.dangerous_patterns = [
|
| 25 |
+
r"import\s+os",
|
| 26 |
+
r"import\s+subprocess",
|
| 27 |
+
r"eval\(",
|
| 28 |
+
r"exec\(",
|
| 29 |
+
r"__import__",
|
| 30 |
+
r"open\(",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
def __call__(self, observation: Observation) -> Observation:
|
| 34 |
+
if not isinstance(observation, CodeObservation):
|
| 35 |
+
return observation
|
| 36 |
+
|
| 37 |
+
if "last_code" in observation.metadata:
|
| 38 |
+
code = observation.metadata["last_code"]
|
| 39 |
+
for pattern in self.dangerous_patterns:
|
| 40 |
+
if re.search(pattern, code):
|
| 41 |
+
observation.reward = self.penalty
|
| 42 |
+
observation.metadata["safety_violation"] = pattern
|
| 43 |
+
break
|
| 44 |
+
else:
|
| 45 |
+
if observation.reward is None:
|
| 46 |
+
observation.reward = 0.0
|
| 47 |
+
|
| 48 |
+
return observation
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class CodeQualityTransform(Transform):
|
| 52 |
+
"""Evaluates and rewards code quality metrics."""
|
| 53 |
+
|
| 54 |
+
def __init__(
|
| 55 |
+
self,
|
| 56 |
+
concise_bonus: float = 0.1,
|
| 57 |
+
max_length_threshold: int = 100,
|
| 58 |
+
syntax_penalty: float = -0.2,
|
| 59 |
+
):
|
| 60 |
+
self.concise_bonus = concise_bonus
|
| 61 |
+
self.max_length_threshold = max_length_threshold
|
| 62 |
+
self.syntax_penalty = syntax_penalty
|
| 63 |
+
|
| 64 |
+
def __call__(self, observation: Observation) -> Observation:
|
| 65 |
+
if not isinstance(observation, CodeObservation):
|
| 66 |
+
return observation
|
| 67 |
+
|
| 68 |
+
quality_score = 0.0
|
| 69 |
+
|
| 70 |
+
if "last_code" in observation.metadata:
|
| 71 |
+
code = observation.metadata["last_code"]
|
| 72 |
+
|
| 73 |
+
# Reward concise code
|
| 74 |
+
if len(code.strip()) <= self.max_length_threshold:
|
| 75 |
+
quality_score += self.concise_bonus
|
| 76 |
+
|
| 77 |
+
# Check syntax (redundant but useful for quality assessment)
|
| 78 |
+
try:
|
| 79 |
+
ast.parse(code)
|
| 80 |
+
except SyntaxError:
|
| 81 |
+
quality_score += self.syntax_penalty
|
| 82 |
+
|
| 83 |
+
# Add to existing reward
|
| 84 |
+
if observation.reward is None:
|
| 85 |
+
observation.reward = quality_score
|
| 86 |
+
else:
|
| 87 |
+
observation.reward += quality_score
|
| 88 |
+
|
| 89 |
+
return observation
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def create_safe_coding_transform() -> CompositeTransform:
|
| 93 |
+
"""Create a transform focused on safe coding practices and quality."""
|
| 94 |
+
return CompositeTransform([CodeSafetyTransform(), CodeQualityTransform()])
|