darshanajudiya7 commited on
Commit
83bfb8f
·
verified ·
1 Parent(s): 524ccd2

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. models.py +4 -3
  2. server/app.py +4 -4
  3. server/python_env_environment.py +26 -50
models.py CHANGED
@@ -185,7 +185,7 @@ class PythonReviewAction(Action):
185
  """Structured review action emitted by a model or trainer."""
186
 
187
  # Primary UI Fields (7 benchmark + 3 template = 10 total)
188
- operation: str = Field(default="submit_findings", description="The operation to perform.")
189
  findings: List[ReviewFinding] = Field(default_factory=list, description="The findings list.")
190
  patched_code: Optional[str] = Field(default=None, description="The fixed source code.")
191
 
@@ -201,9 +201,10 @@ class PythonReviewAction(Action):
201
  def validate_action_shape(self) -> "PythonReviewAction":
202
  """Require the right fields for each action type."""
203
 
204
- # Bypass benchmark validation if using the template 'operation' style (e.g. submit_findings)
 
205
  if self.operation != "ADD_COMMENT":
206
- return self
207
 
208
  if self.action_type == ActionType.ADD_COMMENT:
209
  missing = []
 
185
  """Structured review action emitted by a model or trainer."""
186
 
187
  # Primary UI Fields (7 benchmark + 3 template = 10 total)
188
+ operation: str = Field(default="ADD_COMMENT", description="The operation to perform.")
189
  findings: List[ReviewFinding] = Field(default_factory=list, description="The findings list.")
190
  patched_code: Optional[str] = Field(default=None, description="The fixed source code.")
191
 
 
201
  def validate_action_shape(self) -> "PythonReviewAction":
202
  """Require the right fields for each action type."""
203
 
204
+ # Legacy template actions still use string operations like `submit_findings`.
205
+ # Benchmark actions should validate against `action_type`.
206
  if self.operation != "ADD_COMMENT":
207
+ return self
208
 
209
  if self.action_type == ActionType.ADD_COMMENT:
210
  missing = []
server/app.py CHANGED
@@ -47,7 +47,7 @@ try:
47
  TaskListResponse,
48
  )
49
  from .python_env_environment import (
50
- PythonEnvironment,
51
  get_current_state,
52
  get_health_response,
53
  get_metrics_response,
@@ -63,7 +63,7 @@ except ImportError:
63
  TaskListResponse,
64
  )
65
  from server.python_env_environment import ( # type: ignore
66
- PythonEnvironment,
67
  get_current_state,
68
  get_health_response,
69
  get_metrics_response,
@@ -73,11 +73,11 @@ except ImportError:
73
 
74
  # Create the app with web interface and README integration
75
  app = create_app(
76
- PythonEnvironment,
77
  PythonAction,
78
  PythonObservation,
79
  env_name="python_env",
80
- max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
81
  )
82
 
83
 
 
47
  TaskListResponse,
48
  )
49
  from .python_env_environment import (
50
+ get_environment,
51
  get_current_state,
52
  get_health_response,
53
  get_metrics_response,
 
63
  TaskListResponse,
64
  )
65
  from server.python_env_environment import ( # type: ignore
66
+ get_environment,
67
  get_current_state,
68
  get_health_response,
69
  get_metrics_response,
 
73
 
74
  # Create the app with web interface and README integration
75
  app = create_app(
76
+ get_environment,
77
  PythonAction,
78
  PythonObservation,
79
  env_name="python_env",
80
+ max_concurrent_envs=1, # the shared environment is intended for one active session
81
  )
82
 
83
 
server/python_env_environment.py CHANGED
@@ -440,61 +440,37 @@ class PythonEnvironment(Environment[PythonAction, PythonObservation, State]):
440
  return self._state
441
 
442
 
443
- # try:
444
- # from .review_runtime import ( # type: ignore
445
- ...
446
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
- # --- App Interface Shims ---
449
 
450
- _GLOBAL_ENV: Optional[PythonEnvironment] = None
451
 
452
- def _get_env() -> PythonEnvironment:
 
 
453
  global _GLOBAL_ENV
454
  if _GLOBAL_ENV is None:
455
- _GLOBAL_ENV = PythonEnvironment()
456
  return _GLOBAL_ENV
457
 
458
- def get_current_state() -> PythonState:
459
- env = _get_env()
460
- obs = env._build_observation(feedback="State request", reward=0.0, done=False)
461
- # Convert PythonObservation to PythonState if needed
462
- return PythonState(
463
- episode_id=env.state.episode_id,
464
- current_step=env.state.step_count,
465
- task_id=obs.task.task_id if obs.task else None,
466
- difficulty=Difficulty(obs.task.difficulty) if obs.task else None,
467
- done=False,
468
- last_feedback=obs.feedback,
469
- )
470
 
471
- def get_health_response() -> HealthResponse:
472
- return HealthResponse(
473
- status="ok",
474
- environment="python_env",
475
- task_count=len(TASK_BANK),
476
- )
477
-
478
- def get_metrics_response() -> MetricsResponse:
479
- return MetricsResponse()
480
-
481
- def get_tasks_response() -> TaskListResponse:
482
- from .task_bank import load_task_catalog
483
- try:
484
- tasks = load_task_catalog()
485
- except Exception:
486
- tasks = []
487
- # If using local TASK_BANK, convert them
488
- if not tasks:
489
- tasks = [
490
- TaskMetadata(
491
- task_id=tid,
492
- name=t.descriptor.title,
493
- difficulty=Difficulty(t.descriptor.difficulty),
494
- description=t.descriptor.objective,
495
- snippet_count=1,
496
- max_steps=t.descriptor.max_steps,
497
- )
498
- for tid, t in TASK_BANK.items()
499
- ]
500
- return TaskListResponse(tasks=tasks)
 
440
  return self._state
441
 
442
 
443
+ # Compatibility bridge:
444
+ # keep the old module path, but route the actual app/runtime through the
445
+ # dataset-backed dense-reward benchmark implementation.
446
+ try:
447
+ from .review_runtime import (
448
+ PythonReviewRuntime as _BenchmarkPythonEnvironment,
449
+ get_current_state,
450
+ get_health_response,
451
+ get_metrics_response,
452
+ get_tasks_response,
453
+ )
454
+ except ImportError:
455
+ from server.review_runtime import ( # type: ignore
456
+ PythonReviewRuntime as _BenchmarkPythonEnvironment,
457
+ get_current_state,
458
+ get_health_response,
459
+ get_metrics_response,
460
+ get_tasks_response,
461
+ )
462
+
463
 
464
+ _GLOBAL_ENV: Optional[_BenchmarkPythonEnvironment] = None
465
 
 
466
 
467
+ def get_environment() -> _BenchmarkPythonEnvironment:
468
+ """Return the shared benchmark environment used by the HTTP app."""
469
+
470
  global _GLOBAL_ENV
471
  if _GLOBAL_ENV is None:
472
+ _GLOBAL_ENV = _BenchmarkPythonEnvironment()
473
  return _GLOBAL_ENV
474
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
+ PythonEnvironment = _BenchmarkPythonEnvironment