Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- models.py +4 -3
- server/app.py +4 -4
- server/python_env_environment.py +26 -50
models.py
CHANGED
|
@@ -185,7 +185,7 @@ class PythonReviewAction(Action):
|
|
| 185 |
"""Structured review action emitted by a model or trainer."""
|
| 186 |
|
| 187 |
# Primary UI Fields (7 benchmark + 3 template = 10 total)
|
| 188 |
-
operation: str = Field(default="
|
| 189 |
findings: List[ReviewFinding] = Field(default_factory=list, description="The findings list.")
|
| 190 |
patched_code: Optional[str] = Field(default=None, description="The fixed source code.")
|
| 191 |
|
|
@@ -201,9 +201,10 @@ class PythonReviewAction(Action):
|
|
| 201 |
def validate_action_shape(self) -> "PythonReviewAction":
|
| 202 |
"""Require the right fields for each action type."""
|
| 203 |
|
| 204 |
-
#
|
|
|
|
| 205 |
if self.operation != "ADD_COMMENT":
|
| 206 |
-
|
| 207 |
|
| 208 |
if self.action_type == ActionType.ADD_COMMENT:
|
| 209 |
missing = []
|
|
|
|
| 185 |
"""Structured review action emitted by a model or trainer."""
|
| 186 |
|
| 187 |
# Primary UI Fields (7 benchmark + 3 template = 10 total)
|
| 188 |
+
operation: str = Field(default="ADD_COMMENT", description="The operation to perform.")
|
| 189 |
findings: List[ReviewFinding] = Field(default_factory=list, description="The findings list.")
|
| 190 |
patched_code: Optional[str] = Field(default=None, description="The fixed source code.")
|
| 191 |
|
|
|
|
| 201 |
def validate_action_shape(self) -> "PythonReviewAction":
|
| 202 |
"""Require the right fields for each action type."""
|
| 203 |
|
| 204 |
+
# Legacy template actions still use string operations like `submit_findings`.
|
| 205 |
+
# Benchmark actions should validate against `action_type`.
|
| 206 |
if self.operation != "ADD_COMMENT":
|
| 207 |
+
return self
|
| 208 |
|
| 209 |
if self.action_type == ActionType.ADD_COMMENT:
|
| 210 |
missing = []
|
server/app.py
CHANGED
|
@@ -47,7 +47,7 @@ try:
|
|
| 47 |
TaskListResponse,
|
| 48 |
)
|
| 49 |
from .python_env_environment import (
|
| 50 |
-
|
| 51 |
get_current_state,
|
| 52 |
get_health_response,
|
| 53 |
get_metrics_response,
|
|
@@ -63,7 +63,7 @@ except ImportError:
|
|
| 63 |
TaskListResponse,
|
| 64 |
)
|
| 65 |
from server.python_env_environment import ( # type: ignore
|
| 66 |
-
|
| 67 |
get_current_state,
|
| 68 |
get_health_response,
|
| 69 |
get_metrics_response,
|
|
@@ -73,11 +73,11 @@ except ImportError:
|
|
| 73 |
|
| 74 |
# Create the app with web interface and README integration
|
| 75 |
app = create_app(
|
| 76 |
-
|
| 77 |
PythonAction,
|
| 78 |
PythonObservation,
|
| 79 |
env_name="python_env",
|
| 80 |
-
max_concurrent_envs=1, #
|
| 81 |
)
|
| 82 |
|
| 83 |
|
|
|
|
| 47 |
TaskListResponse,
|
| 48 |
)
|
| 49 |
from .python_env_environment import (
|
| 50 |
+
get_environment,
|
| 51 |
get_current_state,
|
| 52 |
get_health_response,
|
| 53 |
get_metrics_response,
|
|
|
|
| 63 |
TaskListResponse,
|
| 64 |
)
|
| 65 |
from server.python_env_environment import ( # type: ignore
|
| 66 |
+
get_environment,
|
| 67 |
get_current_state,
|
| 68 |
get_health_response,
|
| 69 |
get_metrics_response,
|
|
|
|
| 73 |
|
| 74 |
# Create the app with web interface and README integration
|
| 75 |
app = create_app(
|
| 76 |
+
get_environment,
|
| 77 |
PythonAction,
|
| 78 |
PythonObservation,
|
| 79 |
env_name="python_env",
|
| 80 |
+
max_concurrent_envs=1, # the shared environment is intended for one active session
|
| 81 |
)
|
| 82 |
|
| 83 |
|
server/python_env_environment.py
CHANGED
|
@@ -440,61 +440,37 @@ class PythonEnvironment(Environment[PythonAction, PythonObservation, State]):
|
|
| 440 |
return self._state
|
| 441 |
|
| 442 |
|
| 443 |
-
#
|
| 444 |
-
#
|
| 445 |
-
.
|
| 446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
-
|
| 449 |
|
| 450 |
-
_GLOBAL_ENV: Optional[PythonEnvironment] = None
|
| 451 |
|
| 452 |
-
def
|
|
|
|
|
|
|
| 453 |
global _GLOBAL_ENV
|
| 454 |
if _GLOBAL_ENV is None:
|
| 455 |
-
_GLOBAL_ENV =
|
| 456 |
return _GLOBAL_ENV
|
| 457 |
|
| 458 |
-
def get_current_state() -> PythonState:
|
| 459 |
-
env = _get_env()
|
| 460 |
-
obs = env._build_observation(feedback="State request", reward=0.0, done=False)
|
| 461 |
-
# Convert PythonObservation to PythonState if needed
|
| 462 |
-
return PythonState(
|
| 463 |
-
episode_id=env.state.episode_id,
|
| 464 |
-
current_step=env.state.step_count,
|
| 465 |
-
task_id=obs.task.task_id if obs.task else None,
|
| 466 |
-
difficulty=Difficulty(obs.task.difficulty) if obs.task else None,
|
| 467 |
-
done=False,
|
| 468 |
-
last_feedback=obs.feedback,
|
| 469 |
-
)
|
| 470 |
|
| 471 |
-
|
| 472 |
-
return HealthResponse(
|
| 473 |
-
status="ok",
|
| 474 |
-
environment="python_env",
|
| 475 |
-
task_count=len(TASK_BANK),
|
| 476 |
-
)
|
| 477 |
-
|
| 478 |
-
def get_metrics_response() -> MetricsResponse:
|
| 479 |
-
return MetricsResponse()
|
| 480 |
-
|
| 481 |
-
def get_tasks_response() -> TaskListResponse:
|
| 482 |
-
from .task_bank import load_task_catalog
|
| 483 |
-
try:
|
| 484 |
-
tasks = load_task_catalog()
|
| 485 |
-
except Exception:
|
| 486 |
-
tasks = []
|
| 487 |
-
# If using local TASK_BANK, convert them
|
| 488 |
-
if not tasks:
|
| 489 |
-
tasks = [
|
| 490 |
-
TaskMetadata(
|
| 491 |
-
task_id=tid,
|
| 492 |
-
name=t.descriptor.title,
|
| 493 |
-
difficulty=Difficulty(t.descriptor.difficulty),
|
| 494 |
-
description=t.descriptor.objective,
|
| 495 |
-
snippet_count=1,
|
| 496 |
-
max_steps=t.descriptor.max_steps,
|
| 497 |
-
)
|
| 498 |
-
for tid, t in TASK_BANK.items()
|
| 499 |
-
]
|
| 500 |
-
return TaskListResponse(tasks=tasks)
|
|
|
|
| 440 |
return self._state
|
| 441 |
|
| 442 |
|
| 443 |
+
# Compatibility bridge:
|
| 444 |
+
# keep the old module path, but route the actual app/runtime through the
|
| 445 |
+
# dataset-backed dense-reward benchmark implementation.
|
| 446 |
+
try:
|
| 447 |
+
from .review_runtime import (
|
| 448 |
+
PythonReviewRuntime as _BenchmarkPythonEnvironment,
|
| 449 |
+
get_current_state,
|
| 450 |
+
get_health_response,
|
| 451 |
+
get_metrics_response,
|
| 452 |
+
get_tasks_response,
|
| 453 |
+
)
|
| 454 |
+
except ImportError:
|
| 455 |
+
from server.review_runtime import ( # type: ignore
|
| 456 |
+
PythonReviewRuntime as _BenchmarkPythonEnvironment,
|
| 457 |
+
get_current_state,
|
| 458 |
+
get_health_response,
|
| 459 |
+
get_metrics_response,
|
| 460 |
+
get_tasks_response,
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
|
| 464 |
+
_GLOBAL_ENV: Optional[_BenchmarkPythonEnvironment] = None
|
| 465 |
|
|
|
|
| 466 |
|
| 467 |
+
def get_environment() -> _BenchmarkPythonEnvironment:
|
| 468 |
+
"""Return the shared benchmark environment used by the HTTP app."""
|
| 469 |
+
|
| 470 |
global _GLOBAL_ENV
|
| 471 |
if _GLOBAL_ENV is None:
|
| 472 |
+
_GLOBAL_ENV = _BenchmarkPythonEnvironment()
|
| 473 |
return _GLOBAL_ENV
|
| 474 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
|
| 476 |
+
PythonEnvironment = _BenchmarkPythonEnvironment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|