Spaces:

Parthiban007
/

rust_coder

Sleeping

App Files Files Community

Parthiban007 commited on 1 day ago

Commit

8a096e2

verified ·

1 Parent(s): 2154988

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

openenv.yaml +102 -22
server/app.py +90 -0

openenv.yaml CHANGED Viewed

@@ -13,37 +13,117 @@ tags:
   - coding-benchmark
 # Task Definition (Easy -> Medium -> Hard)
 tasks:
-  - id: 1
     title: "Broken CLI Argument Parser"
-    difficulty: "Easy"
-  - id: 2
-    title: "Conflicting Borrows"
-    difficulty: "Easy"
-  - id: 3
     title: "Lifetime Annotations"
-    difficulty: "Medium"
-  - id: 4
-    title: "Business Logic"
-    difficulty: "Medium"
-  - id: 5
     title: "Linked List Management"
-    difficulty: "Medium"
-  - id: 6
     title: "Multi-threaded Deadlocks"
-    difficulty: "Hard"
-  - id: 7
     title: "Async Borrowing"
-    difficulty: "Hard"
-  - id: 8
     title: "Unsafe FFI Integration"
-    difficulty: "Hard"
-  - id: 9
     title: "Inefficient Data Pipelines"
-    difficulty: "Hard"
-  - id: 10
     title: "Memory Leak Prevention"
-    difficulty: "Hard+"
 # Definitions for Documentation and Graders
 action_space:

   - coding-benchmark
 # Task Definition (Easy -> Medium -> Hard)
+# Each task has a grader that scores submissions 0.0-1.0
 tasks:
+  - id: "task_1"
     title: "Broken CLI Argument Parser"
+    difficulty: "easy"
+    description: "Fix enum variant mismatches and incomplete match arms in a CLI argument parser."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_1"
+      success_threshold: 0.7
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_2"
+    title: "Conflicting Borrows in Collection Processing"
+    difficulty: "easy"
+    description: "Resolve mutable/immutable borrow conflicts in a string collection processor."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_2"
+      success_threshold: 0.7
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_3"
     title: "Lifetime Annotations"
+    difficulty: "medium"
+    description: "Add correct lifetime annotations to enable a struct holding references to work properly."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_3"
+      success_threshold: 0.6
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_4"
+    title: "Business Logic Bug"
+    difficulty: "medium"
+    description: "Fix off-by-one errors and logic bugs in a financial calculation module."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_4"
+      success_threshold: 0.6
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_5"
     title: "Linked List Management"
+    difficulty: "medium"
+    description: "Implement a safe singly-linked list with push, pop, and peek operations."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_5"
+      success_threshold: 0.6
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_6"
     title: "Multi-threaded Deadlocks"
+    difficulty: "hard"
+    description: "Identify and fix deadlock conditions in a multi-threaded producer-consumer pattern."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_6"
+      success_threshold: 0.5
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_7"
     title: "Async Borrowing"
+    difficulty: "hard"
+    description: "Fix async/await borrowing conflicts in a concurrent file processor."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_7"
+      success_threshold: 0.5
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_8"
     title: "Unsafe FFI Integration"
+    difficulty: "hard"
+    description: "Write safe Rust wrappers around unsafe FFI calls to a C library."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_8"
+      success_threshold: 0.5
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_9"
     title: "Inefficient Data Pipelines"
+    difficulty: "hard"
+    description: "Optimize a data transformation pipeline using iterators and avoiding unnecessary allocations."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_9"
+      success_threshold: 0.5
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
+  - id: "task_10"
     title: "Memory Leak Prevention"
+    difficulty: "hard"
+    description: "Fix memory leak patterns in a custom allocator and ensure proper Drop implementations."
+    grader:
+      type: "programmatic"
+      endpoint: "/grade/task_10"
+      success_threshold: 0.4
+      reward_range: [0.0, 1.0]
+      description: "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)"
 # Definitions for Documentation and Graders
 action_space:

server/app.py CHANGED Viewed

@@ -9,12 +9,19 @@ Endpoints (provided by OpenEnv `create_app`):
     - GET  /state
     - GET  /schema
     - WS   /ws
 """
 import os
 import logging
 from dotenv import load_dotenv
 from openenv.core.env_server.http_server import create_app
 from models import RustCoderAction, RustCoderObservation
@@ -37,11 +44,94 @@ app = create_app(
 )
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}
 def main(host: str = "0.0.0.0", port: int = 8000) -> None:
     import uvicorn

     - GET  /state
     - GET  /schema
     - WS   /ws
+Additional endpoints:
+    - GET  /health
+    - GET  /tasks           — list all tasks with grader metadata
+    - POST /grade/{task_id} — grade a code submission for a specific task
 """
 import os
 import logging
 from dotenv import load_dotenv
+from fastapi import HTTPException
+from pydantic import BaseModel
 from openenv.core.env_server.http_server import create_app
 from models import RustCoderAction, RustCoderObservation
 )
+# ---------------------------------------------------------------------------
+# Task metadata — mirrors openenv.yaml tasks section
+# ---------------------------------------------------------------------------
+_TASK_REGISTRY = [
+    {"id": "task_1",  "index": 0,  "title": "Broken CLI Argument Parser",              "difficulty": "easy",   "success_threshold": 0.7},
+    {"id": "task_2",  "index": 1,  "title": "Conflicting Borrows in Collection Processing", "difficulty": "easy",   "success_threshold": 0.7},
+    {"id": "task_3",  "index": 2,  "title": "Lifetime Annotations",                     "difficulty": "medium", "success_threshold": 0.6},
+    {"id": "task_4",  "index": 3,  "title": "Business Logic Bug",                       "difficulty": "medium", "success_threshold": 0.6},
+    {"id": "task_5",  "index": 4,  "title": "Linked List Management",                   "difficulty": "medium", "success_threshold": 0.6},
+    {"id": "task_6",  "index": 5,  "title": "Multi-threaded Deadlocks",                 "difficulty": "hard",   "success_threshold": 0.5},
+    {"id": "task_7",  "index": 6,  "title": "Async Borrowing",                          "difficulty": "hard",   "success_threshold": 0.5},
+    {"id": "task_8",  "index": 7,  "title": "Unsafe FFI Integration",                   "difficulty": "hard",   "success_threshold": 0.5},
+    {"id": "task_9",  "index": 8,  "title": "Inefficient Data Pipelines",               "difficulty": "hard",   "success_threshold": 0.5},
+    {"id": "task_10", "index": 9,  "title": "Memory Leak Prevention",                   "difficulty": "hard",   "success_threshold": 0.4},
+]
+_TASK_BY_ID = {t["id"]: t for t in _TASK_REGISTRY}
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
 @app.get("/health")
 async def health_check():
     return {"status": "healthy"}
+@app.get("/tasks")
+async def list_tasks():
+    """Return the list of all tasks with their grader metadata."""
+    tasks_out = []
+    for t in _TASK_REGISTRY:
+        tasks_out.append({
+            "id": t["id"],
+            "title": t["title"],
+            "difficulty": t["difficulty"],
+            "grader": {
+                "type": "programmatic",
+                "endpoint": f"/grade/{t['id']}",
+                "success_threshold": t["success_threshold"],
+                "reward_range": [0.0, 1.0],
+                "description": "Compilation(40%) + Correctness(20%) + Coverage(20%) + Elegance(10%) + Efficiency(10%)",
+            },
+        })
+    return {"tasks": tasks_out, "total": len(tasks_out)}
+class GradeRequest(BaseModel):
+    code: str = ""
+@app.post("/grade/{task_id}")
+async def grade_task(task_id: str, request: GradeRequest):
+    """
+    Grade a Rust code submission for a specific task.
+    Returns a score in [0.0, 1.0] with detailed breakdown.
+    This is the programmatic grader endpoint referenced in openenv.yaml.
+    """
+    task_meta = _TASK_BY_ID.get(task_id)
+    if task_meta is None:
+        raise HTTPException(status_code=404, detail=f"Task '{task_id}' not found.")
+    env = RustCoderEnvironment()
+    # Reset to the specific task
+    env.reset(start_index=task_meta["index"])
+    # Submit the code
+    action = RustCoderAction(code=request.code)
+    obs = env.step(action)
+    score = float(obs.reward) if obs.reward is not None else 0.0
+    score = max(0.0, min(1.0, score))
+    success = score >= task_meta["success_threshold"]
+    return {
+        "task_id": task_id,
+        "score": round(score, 4),
+        "success": success,
+        "success_threshold": task_meta["success_threshold"],
+        "reward_breakdown": obs.reward_breakdown,
+        "compilation_success": obs.compilation_success,
+        "compilation_output": obs.compilation_output,
+        "test_results": obs.test_results,
+    }
 def main(host: str = "0.0.0.0", port: int = 8000) -> None:
     import uvicorn