Spaces:

ArchCoder
/

Openenv

Sleeping

App Files Files Community

Priyansh Saxena commited on 13 days ago

Commit

0feb6c8

unverified ·

0 Parent(s):

Add files via upload

Browse files

Files changed (7) hide show

.gitignore +7 -0
Dockerfile +15 -0
README.md +81 -0
inference.py +126 -0
openenv.yaml +40 -0
pytest.ini +2 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__/
+*.pyc
+*.pyo
+.env
+.pytest_cache/
+dist/
+*.egg-info/

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PORT=7860
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "src.pytorch_debug_env.server:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,81 @@

+---
+title: PyTorch Debug Env
+emoji: 🔥
+colorFrom: orange
+colorTo: red
+sdk: docker
+app_port: 7860
+short_description: Multi-step RL environment for diagnosing broken PyTorch training jobs
+tags:
+  - openenv
+  - pytorch
+  - reinforcement-learning
+  - debugging
+  - ml-training
+  - agent
+pinned: true
+---
+# PyTorch Debug Env 🔥
+A complete [OpenEnv](https://meta-pytorch.org/OpenEnv/) environment for the **Meta PyTorch Hackathon** where an AI agent investigates and diagnoses broken PyTorch training jobs.
+## Quick Start
+```python
+from openenv import AutoEnv, AutoAction
+env = AutoEnv.from_env("ArchCoder/pytorch-debug-env")
+Action = AutoAction.from_env("ArchCoder/pytorch-debug-env")
+with env.sync() as client:
+    result = client.reset(task_id="easy")
+    action = Action(
+        current_hypothesis={
+            "bug_type": "missing_zero_grad",
+            "affected_file": "train.py",
+            "confidence": 0.7
+        },
+        commit_diagnosis=False
+    )
+    step_result = client.step(action)
+```
+## API Endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/` | GET | Environment info |
+| `/health` | GET | Health check |
+| `/reset?task_id=easy` | POST | Start new episode |
+| `/step` | POST | Submit hypothesis + action |
+| `/state` | GET | Current episode state |
+## Tasks
+| Task | Difficulty | Description |
+|------|-----------|-------------|
+| `easy` | ⭐ | Single-file bug — missing `zero_grad`, wrong loss |
+| `medium` | ⭐⭐ | Multi-file root cause — data leakage, scheduler mismatch |
+| `hard` | ⭐⭐⭐ | Silent failure — memory leak, AMP overflow, red herrings |
+## Reward Structure
+- **Hypothesis delta** (60%) — reward for improving your bug hypothesis each step
+- **Investigation** (20%) — reward for inspecting the right files
+- **Final diagnosis** (20%) — accuracy of committed diagnosis vs ground truth
+Scores range from `0.0` to `1.0`. Partial credit for correct bug category on hard tasks.
+## Environment State
+Each episode provides a synthetic PyTorch repo with:
+- Source files (`train.py`, `model/`, `data/`, `config/`)
+- Loss curves and GPU memory profiles
+- Training logs with realistic noise and red herrings
+The agent reveals files progressively across up to 5–6 steps, refining its hypothesis before committing a final diagnosis.
+## Author
+**Priyansh Saxena** — IIIT Gwalior

inference.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# inference.py
+import asyncio
+import json
+import os
+from typing import List
+from openai import OpenAI
+import httpx
+API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
+MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
+API_KEY = os.environ.get("OPENAI_API_KEY", "dummy")
+ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
+TASK_NAME = os.environ.get("TASK_NAME", "easy")
+MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
+SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
+MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
+def log_start(task, env, model):
+    print(json.dumps({
+        "type": "START",
+        "task": task,
+        "env": env,
+        "model": model,
+    }), flush=True)
+def log_step(step, action, reward, done, error):
+    print(json.dumps({
+        "type": "STEP",
+        "step": step,
+        "action": action,
+        "reward": float(reward),
+        "done": bool(done),
+        "error": error,
+    }), flush=True)
+def log_end(success, steps, score, rewards):
+    print(json.dumps({
+        "type": "END",
+        "success": bool(success),
+        "steps": steps,
+        "score": float(score),
+        "rewards": [float(r) for r in rewards],
+    }), flush=True)
+def get_model_message(client: OpenAI, observation: dict, history: List[str]) -> str:
+    prompt = f"""
+You are debugging a PyTorch training job. Respond ONLY with valid JSON matching this exact schema:
+{{
+  "current_hypothesis": {{"bug_type": "<string>", "affected_file": "<string>", "confidence": <0.0-1.0>}},
+  "investigation_action": {{"action": "reveal_file", "target": "<filename>"}},
+  "commit_diagnosis": false,
+  "final_diagnosis": null
+}}
+Valid action types: reveal_file, extend_loss_curve, extend_gpu_profile, reveal_log_chunk, run_diagnostic
+Valid bug types: missing_zero_grad, data_leakage, memory_leak, learning_rate_too_high, gradient_explosion
+Observation:
+{json.dumps(observation)[:8000]}
+History: {history}
+"""
+    completion = client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0,
+        max_tokens=500,
+    )
+    return (completion.choices[0].message.content or "").strip()
+async def main():
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    rewards = []
+    history = []
+    steps_taken = 0
+    score = 0.0
+    success = False
+    log_start(task=TASK_NAME, env="pytorch-debug-env", model=MODEL_NAME)
+    async with httpx.AsyncClient(timeout=60.0) as session:
+        reset_resp = await session.post(f"{ENV_URL}/reset", params={"task_id": TASK_NAME})
+        reset_resp.raise_for_status()
+        result = reset_resp.json()
+        session_id = result.get("session_id")
+        observation = result["observation"]
+        for step in range(1, MAX_STEPS + 1):
+            if result.get("done"):
+                break
+            action_text = get_model_message(client, observation, history)
+            try:
+                action_json = json.loads(action_text)
+                step_resp = await session.post(f"{ENV_URL}/step", params={"session_id": session_id}, json=action_json)
+                step_resp.raise_for_status()
+                result = step_resp.json()
+                reward = result.get("reward", 0.0)
+                done = result.get("done", False)
+                error = None
+                observation = result["observation"]
+            except Exception as exc:
+                reward = 0.0
+                done = True
+                error = str(exc)
+            rewards.append(reward)
+            steps_taken = step
+            log_step(step=step, action=action_text, reward=reward, done=done, error=error)
+            history.append(f"step={step} reward={reward:.3f}")
+            if done:
+                break
+    score = min(max(rewards[-1] if rewards else 0.0, 0.0), 1.0)
+    success = score >= SUCCESS_SCORE_THRESHOLD
+    log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
+if __name__ == "__main__":
+    asyncio.run(main())

openenv.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+name: pytorch-debug-env
+version: 1.0.0
+description: Multi-step OpenEnv environment for diagnosing broken PyTorch training jobs.
+author: Priyansh Saxena
+client:
+  class_name: PyTorchDebugEnv
+  module: src.pytorch_debug_env.environment
+action:
+  class_name: PyTorchDebugAction
+  module: src.pytorch_debug_env.models
+observation:
+  class_name: PyTorchDebugObservation
+  module: src.pytorch_debug_env.models
+default_image: pytorch-debug-env:latest
+spec_version: 1
+tags:
+  - openenv
+  - pytorch
+  - debugging
+  - reinforcement-learning
+tasks:
+  - id: easy
+    name: Single-file bug detection
+    difficulty: easy
+  - id: medium
+    name: Multi-file root cause analysis
+    difficulty: medium
+  - id: hard
+    name: Silent failure diagnosis
+    difficulty: hard
+runtime:
+  framework: fastapi
+  container_port: 7860

pytest.ini ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [pytest]
2	+ asyncio_mode = auto

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi==0.115.0
+uvicorn[standard]==0.30.6
+pydantic==2.9.2
+numpy==2.1.1
+openai==1.51.0
+httpx==0.27.2
+pytest==8.3.3
+pytest-asyncio==0.24.0
+openenv>=0.1.0