uvpatel7271 commited on
Commit
692f802
·
1 Parent(s): cd5c208

fixes bugs and head ahead of winning

Browse files
__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  """Public package exports for python_code_review_env."""
2
 
3
  from .client import PythonCodeReviewEnv, PythonEnv
4
- from .models import PyTorchCodeAnalyzerModel
5
- from .openenv_models import (
6
  PythonAction,
7
  PythonCodeReviewAction,
8
  PythonCodeReviewObservation,
 
1
  """Public package exports for python_code_review_env."""
2
 
3
  from .client import PythonCodeReviewEnv, PythonEnv
4
+ from .models import (
5
+ PyTorchCodeAnalyzerModel,
6
  PythonAction,
7
  PythonCodeReviewAction,
8
  PythonCodeReviewObservation,
__pycache__/__init__.cpython-313.pyc CHANGED
Binary files a/__pycache__/__init__.cpython-313.pyc and b/__pycache__/__init__.cpython-313.pyc differ
 
__pycache__/client.cpython-313.pyc CHANGED
Binary files a/__pycache__/client.cpython-313.pyc and b/__pycache__/client.cpython-313.pyc differ
 
__pycache__/models.cpython-313.pyc CHANGED
Binary files a/__pycache__/models.cpython-313.pyc and b/__pycache__/models.cpython-313.pyc differ
 
client.py CHANGED
@@ -7,7 +7,7 @@ from typing import Dict
7
  from openenv.core import EnvClient
8
  from openenv.core.client_types import StepResult
9
 
10
- from .openenv_models import (
11
  PythonCodeReviewAction,
12
  PythonCodeReviewObservation,
13
  PythonCodeReviewState,
 
7
  from openenv.core import EnvClient
8
  from openenv.core.client_types import StepResult
9
 
10
+ from .models import (
11
  PythonCodeReviewAction,
12
  PythonCodeReviewObservation,
13
  PythonCodeReviewState,
graders/bug_fix.py CHANGED
@@ -3,10 +3,10 @@
3
  from __future__ import annotations
4
 
5
  try:
6
- from ..openenv_models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
- from openenv_models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .shared import (
 
3
  from __future__ import annotations
4
 
5
  try:
6
+ from ..models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
+ from models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .shared import (
graders/dispatch.py CHANGED
@@ -3,10 +3,10 @@
3
  from __future__ import annotations
4
 
5
  try:
6
- from ..openenv_models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
- from openenv_models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .bug_fix import grade_bug_fix_task
 
3
  from __future__ import annotations
4
 
5
  try:
6
+ from ..models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
+ from models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .bug_fix import grade_bug_fix_task
graders/optimization.py CHANGED
@@ -3,10 +3,10 @@
3
  from __future__ import annotations
4
 
5
  try:
6
- from ..openenv_models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
- from openenv_models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .shared import (
 
3
  from __future__ import annotations
4
 
5
  try:
6
+ from ..models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
+ from models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .shared import (
graders/shared.py CHANGED
@@ -12,10 +12,10 @@ import traceback
12
  from typing import Any, Callable, Dict, List
13
 
14
  try:
15
- from ..openenv_models import TaskGrade
16
  from ..tasks.catalog import CallCase, ReviewTask
17
  except ImportError:
18
- from openenv_models import TaskGrade
19
  from tasks.catalog import CallCase, ReviewTask
20
 
21
 
 
12
  from typing import Any, Callable, Dict, List
13
 
14
  try:
15
+ from ..models import TaskGrade
16
  from ..tasks.catalog import CallCase, ReviewTask
17
  except ImportError:
18
+ from models import TaskGrade
19
  from tasks.catalog import CallCase, ReviewTask
20
 
21
 
graders/syntax.py CHANGED
@@ -3,10 +3,10 @@
3
  from __future__ import annotations
4
 
5
  try:
6
- from ..openenv_models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
- from openenv_models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .shared import (
 
3
  from __future__ import annotations
4
 
5
  try:
6
+ from ..models import TaskGrade
7
  from ..tasks.catalog import ReviewTask
8
  except ImportError:
9
+ from models import TaskGrade
10
  from tasks.catalog import ReviewTask
11
 
12
  from .shared import (
models/__init__.py CHANGED
@@ -1,5 +1,66 @@
1
- """PyTorch-backed model wrappers for the analyzer platform."""
 
 
 
 
 
 
2
 
3
  from .pytorch_model import PyTorchCodeAnalyzerModel
4
 
5
- __all__ = ["PyTorchCodeAnalyzerModel"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PyTorch-backed model wrappers plus OpenEnv schema exports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib.util
6
+ import sys
7
+ from pathlib import Path
8
 
9
  from .pytorch_model import PyTorchCodeAnalyzerModel
10
 
11
+
12
+ def _load_schema_module():
13
+ schema_path = Path(__file__).resolve().parent.parent / "models.py"
14
+ spec = importlib.util.spec_from_file_location("_python_env_schema_models", schema_path)
15
+ if spec is None or spec.loader is None: # pragma: no cover
16
+ raise ImportError(f"Unable to load schema models from {schema_path}")
17
+ if spec.name in sys.modules:
18
+ return sys.modules[spec.name]
19
+ module = importlib.util.module_from_spec(spec)
20
+ sys.modules[spec.name] = module
21
+ spec.loader.exec_module(module)
22
+ for model_name in (
23
+ "HistoryEntry",
24
+ "RewardDetails",
25
+ "PythonCodeReviewAction",
26
+ "PythonCodeReviewObservation",
27
+ "PythonCodeReviewState",
28
+ "TaskDescriptor",
29
+ "TaskSummary",
30
+ "TaskGrade",
31
+ "HealthResponse",
32
+ ):
33
+ getattr(module, model_name).model_rebuild()
34
+ return module
35
+
36
+
37
+ _schema_models = _load_schema_module()
38
+
39
+ HealthResponse = _schema_models.HealthResponse
40
+ HistoryEntry = _schema_models.HistoryEntry
41
+ PythonAction = _schema_models.PythonAction
42
+ PythonCodeReviewAction = _schema_models.PythonCodeReviewAction
43
+ PythonCodeReviewObservation = _schema_models.PythonCodeReviewObservation
44
+ PythonCodeReviewState = _schema_models.PythonCodeReviewState
45
+ PythonObservation = _schema_models.PythonObservation
46
+ PythonState = _schema_models.PythonState
47
+ RewardDetails = _schema_models.RewardDetails
48
+ TaskDescriptor = _schema_models.TaskDescriptor
49
+ TaskGrade = _schema_models.TaskGrade
50
+ TaskSummary = _schema_models.TaskSummary
51
+
52
+ __all__ = [
53
+ "HealthResponse",
54
+ "HistoryEntry",
55
+ "PyTorchCodeAnalyzerModel",
56
+ "PythonAction",
57
+ "PythonCodeReviewAction",
58
+ "PythonCodeReviewObservation",
59
+ "PythonCodeReviewState",
60
+ "PythonObservation",
61
+ "PythonState",
62
+ "RewardDetails",
63
+ "TaskDescriptor",
64
+ "TaskGrade",
65
+ "TaskSummary",
66
+ ]
openenv_models.py DELETED
@@ -1,146 +0,0 @@
1
- """Typed models for the python_code_review_env environment."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Any, Dict, List, Literal, Optional
6
-
7
- from pydantic import BaseModel, Field
8
-
9
- from openenv.core.env_server.types import Action, Observation, State
10
-
11
-
12
- Difficulty = Literal["easy", "medium", "hard"]
13
- TaskKind = Literal["syntax_fix", "bug_fix", "optimization"]
14
- ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"]
15
-
16
-
17
- class HistoryEntry(BaseModel):
18
- """One environment transition recorded for the agent."""
19
-
20
- step: int = Field(..., ge=0)
21
- action_type: ActionType
22
- status: str = Field(..., description="Short outcome summary.")
23
- reward: float = Field(..., gt=0.0, lt=1.0, description="Reward returned for the step.")
24
-
25
-
26
- class RewardDetails(BaseModel):
27
- """Transparent reward decomposition for debugging and training."""
28
-
29
- value: float = Field(..., gt=0.0, lt=1.0, description="Clamped net reward in (0.0, 1.0).")
30
- syntax_reward: float = Field(default=0.0)
31
- test_reward: float = Field(default=0.0)
32
- correctness_bonus: float = Field(default=0.0)
33
- quality_bonus: float = Field(default=0.0)
34
- error_reduction_bonus: float = Field(default=0.0)
35
- completion_bonus: float = Field(default=0.0)
36
- runtime_bonus: float = Field(default=0.0)
37
- progress_delta: float = Field(default=0.0)
38
- invalid_action_penalty: float = Field(default=0.0)
39
- timeout_penalty: float = Field(default=0.0)
40
- regression_penalty: float = Field(default=0.0)
41
- stagnation_penalty: float = Field(default=0.0)
42
- reason: str = Field(..., description="Human-readable reward explanation.")
43
- prev_score: float = Field(default=0.01, gt=0.0, lt=1.0)
44
- curr_score: float = Field(default=0.01, gt=0.0, lt=1.0)
45
- code_changed: bool = Field(default=False)
46
-
47
-
48
- class PythonCodeReviewAction(Action):
49
- """Action schema exposed to the agent."""
50
-
51
- action_type: ActionType = Field(..., description="Environment action to take.")
52
- code: Optional[str] = Field(
53
- default=None,
54
- description="Updated Python source for edit_code or submit_solution actions.",
55
- )
56
-
57
-
58
- class PythonCodeReviewObservation(Observation):
59
- """Observation returned by reset and step."""
60
-
61
- task_id: str = Field(..., description="Stable task identifier.")
62
- title: str = Field(..., description="Human-readable task title.")
63
- difficulty: Difficulty
64
- task_kind: TaskKind
65
- task_description: str = Field(..., description="Task instructions shown to the agent.")
66
- current_code: str = Field(..., description="Latest code under review.")
67
- errors: str = Field(default="", description="Syntax or execution errors.")
68
- test_results: str = Field(default="", description="Public test and benchmark feedback.")
69
- visible_tests: List[str] = Field(default_factory=list)
70
- history: List[HistoryEntry] = Field(default_factory=list)
71
- attempts_remaining: int = Field(..., ge=0)
72
- last_action_status: str = Field(default="")
73
- last_action_error: Optional[str] = Field(default=None)
74
- score: float = Field(..., gt=0.0, lt=1.0)
75
- reward: float = Field(default=0.1, gt=0.0, lt=1.0)
76
- done: bool = Field(default=False)
77
- reward_details: RewardDetails = Field(
78
- default_factory=lambda: RewardDetails(value=0.1, reason="Environment reset.")
79
- )
80
-
81
-
82
- class PythonCodeReviewState(State):
83
- """Internal environment state exposed through /state."""
84
-
85
- task_id: Optional[str] = Field(default=None)
86
- difficulty: Optional[Difficulty] = Field(default=None)
87
- task_kind: Optional[TaskKind] = Field(default=None)
88
- attempts_remaining: int = Field(default=0, ge=0)
89
- current_code: str = Field(default="")
90
- errors: str = Field(default="")
91
- test_results: str = Field(default="")
92
- history: List[HistoryEntry] = Field(default_factory=list)
93
- score: float = Field(default=0.01, gt=0.0, lt=1.0)
94
- done: bool = Field(default=False)
95
-
96
-
97
- class TaskDescriptor(BaseModel):
98
- """Static task metadata."""
99
-
100
- task_id: str
101
- title: str
102
- difficulty: Difficulty
103
- task_kind: TaskKind
104
- task_description: str
105
- starter_code: str
106
- visible_tests: List[str] = Field(default_factory=list)
107
- repo_summary: str = Field(default="")
108
- changed_files: List[str] = Field(default_factory=list)
109
- available_files: List[str] = Field(default_factory=list)
110
- goal: str = Field(default="")
111
- max_steps: int = Field(..., ge=1)
112
-
113
-
114
- class TaskSummary(BaseModel):
115
- """Compact task listing entry."""
116
-
117
- task_id: str
118
- difficulty: Difficulty
119
- title: str
120
- goal: str = Field(default="")
121
-
122
-
123
- class TaskGrade(BaseModel):
124
- """Deterministic grader output."""
125
-
126
- score: float = Field(..., gt=0.0, lt=1.0)
127
- syntax_score: float = Field(default=0.01, gt=0.0, lt=1.0)
128
- tests_passed: int = Field(default=0, ge=0)
129
- tests_total: int = Field(default=0, ge=0)
130
- quality_score: float = Field(default=0.01, gt=0.0, lt=1.0)
131
- runtime_score: float = Field(default=0.01, gt=0.0, lt=1.0)
132
- timed_out: bool = Field(default=False)
133
- details: Dict[str, Any] = Field(default_factory=dict)
134
-
135
-
136
- class HealthResponse(BaseModel):
137
- """Health payload for smoke tests."""
138
-
139
- status: Literal["ok"] = "ok"
140
- environment: str = "python_code_review_env"
141
- task_count: int = Field(default=0, ge=0)
142
-
143
-
144
- PythonAction = PythonCodeReviewAction
145
- PythonObservation = PythonCodeReviewObservation
146
- PythonState = PythonCodeReviewState
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
server/__pycache__/app.cpython-313.pyc CHANGED
Binary files a/server/__pycache__/app.cpython-313.pyc and b/server/__pycache__/app.cpython-313.pyc differ
 
server/app.py CHANGED
@@ -19,10 +19,10 @@ except Exception:
19
  gr = None # type: ignore[assignment]
20
 
21
  try:
22
- from ..openenv_models import PythonCodeReviewAction, PythonCodeReviewObservation
23
  from .env import PythonCodeReviewEnvironment
24
  except ImportError:
25
- from openenv_models import PythonCodeReviewAction, PythonCodeReviewObservation
26
  from server.env import PythonCodeReviewEnvironment
27
 
28
 
 
19
  gr = None # type: ignore[assignment]
20
 
21
  try:
22
+ from ..models import PythonCodeReviewAction, PythonCodeReviewObservation
23
  from .env import PythonCodeReviewEnvironment
24
  except ImportError:
25
+ from models import PythonCodeReviewAction, PythonCodeReviewObservation
26
  from server.env import PythonCodeReviewEnvironment
27
 
28
 
server/env.py CHANGED
@@ -11,7 +11,7 @@ from openenv.core.env_server.types import EnvironmentMetadata
11
  try:
12
  from ..graders import grade_task
13
  from ..graders.shared import component_score, safe_ratio, strict_score
14
- from ..openenv_models import (
15
  HistoryEntry,
16
  PythonCodeReviewAction,
17
  PythonCodeReviewObservation,
@@ -23,7 +23,7 @@ try:
23
  except ImportError:
24
  from graders import grade_task
25
  from graders.shared import component_score, safe_ratio, strict_score
26
- from openenv_models import (
27
  HistoryEntry,
28
  PythonCodeReviewAction,
29
  PythonCodeReviewObservation,
 
11
  try:
12
  from ..graders import grade_task
13
  from ..graders.shared import component_score, safe_ratio, strict_score
14
+ from ..models import (
15
  HistoryEntry,
16
  PythonCodeReviewAction,
17
  PythonCodeReviewObservation,
 
23
  except ImportError:
24
  from graders import grade_task
25
  from graders.shared import component_score, safe_ratio, strict_score
26
+ from models import (
27
  HistoryEntry,
28
  PythonCodeReviewAction,
29
  PythonCodeReviewObservation,
tests/test_scoring.py CHANGED
@@ -1,7 +1,7 @@
1
  from __future__ import annotations
2
 
3
  from graders import grade_task
4
- from openenv_models import PythonCodeReviewAction
5
  from server.env import PythonCodeReviewEnvironment
6
  from tasks import list_tasks
7
 
 
1
  from __future__ import annotations
2
 
3
  from graders import grade_task
4
+ from models import PythonCodeReviewAction
5
  from server.env import PythonCodeReviewEnvironment
6
  from tasks import list_tasks
7