Spaces:

darshanajudiya7
/

python_env

Sleeping

App Files Files Community

darshanajudiya7 commited on 12 days ago

Commit

83bfb8f

verified ·

1 Parent(s): 524ccd2

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

models.py +4 -3
server/app.py +4 -4
server/python_env_environment.py +26 -50

models.py CHANGED Viewed

@@ -185,7 +185,7 @@ class PythonReviewAction(Action):
     """Structured review action emitted by a model or trainer."""
     # Primary UI Fields (7 benchmark + 3 template = 10 total)
-    operation: str = Field(default="submit_findings", description="The operation to perform.")
     findings: List[ReviewFinding] = Field(default_factory=list, description="The findings list.")
     patched_code: Optional[str] = Field(default=None, description="The fixed source code.")
@@ -201,9 +201,10 @@ class PythonReviewAction(Action):
     def validate_action_shape(self) -> "PythonReviewAction":
         """Require the right fields for each action type."""
-        # Bypass benchmark validation if using the template 'operation' style (e.g. submit_findings)
         if self.operation != "ADD_COMMENT":
-             return self
         if self.action_type == ActionType.ADD_COMMENT:
             missing = []

     """Structured review action emitted by a model or trainer."""
     # Primary UI Fields (7 benchmark + 3 template = 10 total)
+    operation: str = Field(default="ADD_COMMENT", description="The operation to perform.")
     findings: List[ReviewFinding] = Field(default_factory=list, description="The findings list.")
     patched_code: Optional[str] = Field(default=None, description="The fixed source code.")
     def validate_action_shape(self) -> "PythonReviewAction":
         """Require the right fields for each action type."""
+        # Legacy template actions still use string operations like `submit_findings`.
+        # Benchmark actions should validate against `action_type`.
         if self.operation != "ADD_COMMENT":
+            return self
         if self.action_type == ActionType.ADD_COMMENT:
             missing = []

server/app.py CHANGED Viewed

@@ -47,7 +47,7 @@ try:
         TaskListResponse,
     )
     from .python_env_environment import (
-        PythonEnvironment,
         get_current_state,
         get_health_response,
         get_metrics_response,
@@ -63,7 +63,7 @@ except ImportError:
         TaskListResponse,
     )
     from server.python_env_environment import (  # type: ignore
-        PythonEnvironment,
         get_current_state,
         get_health_response,
         get_metrics_response,
@@ -73,11 +73,11 @@ except ImportError:
 # Create the app with web interface and README integration
 app = create_app(
-    PythonEnvironment,
     PythonAction,
     PythonObservation,
     env_name="python_env",
-    max_concurrent_envs=1,  # increase this number to allow more concurrent WebSocket sessions
 )

         TaskListResponse,
     )
     from .python_env_environment import (
+        get_environment,
         get_current_state,
         get_health_response,
         get_metrics_response,
         TaskListResponse,
     )
     from server.python_env_environment import (  # type: ignore
+        get_environment,
         get_current_state,
         get_health_response,
         get_metrics_response,
 # Create the app with web interface and README integration
 app = create_app(
+    get_environment,
     PythonAction,
     PythonObservation,
     env_name="python_env",
+    max_concurrent_envs=1,  # the shared environment is intended for one active session
 )

server/python_env_environment.py CHANGED Viewed

@@ -440,61 +440,37 @@ class PythonEnvironment(Environment[PythonAction, PythonObservation, State]):
         return self._state
-# try:
-#     from .review_runtime import (  # type: ignore
-...
-#     )
-# --- App Interface Shims ---
-_GLOBAL_ENV: Optional[PythonEnvironment] = None
-def _get_env() -> PythonEnvironment:
     global _GLOBAL_ENV
     if _GLOBAL_ENV is None:
-        _GLOBAL_ENV = PythonEnvironment()
     return _GLOBAL_ENV
-def get_current_state() -> PythonState:
-    env = _get_env()
-    obs = env._build_observation(feedback="State request", reward=0.0, done=False)
-    # Convert PythonObservation to PythonState if needed
-    return PythonState(
-        episode_id=env.state.episode_id,
-        current_step=env.state.step_count,
-        task_id=obs.task.task_id if obs.task else None,
-        difficulty=Difficulty(obs.task.difficulty) if obs.task else None,
-        done=False,
-        last_feedback=obs.feedback,
-    )
-def get_health_response() -> HealthResponse:
-    return HealthResponse(
-        status="ok",
-        environment="python_env",
-        task_count=len(TASK_BANK),
-    )
-def get_metrics_response() -> MetricsResponse:
-    return MetricsResponse()
-def get_tasks_response() -> TaskListResponse:
-    from .task_bank import load_task_catalog
-    try:
-        tasks = load_task_catalog()
-    except Exception:
-        tasks = []
-    # If using local TASK_BANK, convert them
-    if not tasks:
-        tasks = [
-            TaskMetadata(
-                task_id=tid,
-                name=t.descriptor.title,
-                difficulty=Difficulty(t.descriptor.difficulty),
-                description=t.descriptor.objective,
-                snippet_count=1,
-                max_steps=t.descriptor.max_steps,
-            )
-            for tid, t in TASK_BANK.items()
-        ]
-    return TaskListResponse(tasks=tasks)

         return self._state
+# Compatibility bridge:
+# keep the old module path, but route the actual app/runtime through the
+# dataset-backed dense-reward benchmark implementation.
+try:
+    from .review_runtime import (
+        PythonReviewRuntime as _BenchmarkPythonEnvironment,
+        get_current_state,
+        get_health_response,
+        get_metrics_response,
+        get_tasks_response,
+    )
+except ImportError:
+    from server.review_runtime import (  # type: ignore
+        PythonReviewRuntime as _BenchmarkPythonEnvironment,
+        get_current_state,
+        get_health_response,
+        get_metrics_response,
+        get_tasks_response,
+    )
+_GLOBAL_ENV: Optional[_BenchmarkPythonEnvironment] = None
+def get_environment() -> _BenchmarkPythonEnvironment:
+    """Return the shared benchmark environment used by the HTTP app."""
     global _GLOBAL_ENV
     if _GLOBAL_ENV is None:
+        _GLOBAL_ENV = _BenchmarkPythonEnvironment()
     return _GLOBAL_ENV
+PythonEnvironment = _BenchmarkPythonEnvironment