Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +1 -1
- server/devops_sandbox_environment.py +7 -7
inference.py
CHANGED
|
@@ -185,7 +185,7 @@ def main():
|
|
| 185 |
print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
|
| 186 |
except Exception as e:
|
| 187 |
# Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
|
| 188 |
-
print(f"[END] success=false steps=0 score=0.
|
| 189 |
|
| 190 |
if __name__ == "__main__":
|
| 191 |
main()
|
|
|
|
| 185 |
print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
|
| 186 |
except Exception as e:
|
| 187 |
# Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
|
| 188 |
+
print(f"[END] success=false steps=0 score=0.01 rewards=0.01", flush=True)
|
| 189 |
|
| 190 |
if __name__ == "__main__":
|
| 191 |
main()
|
server/devops_sandbox_environment.py
CHANGED
|
@@ -50,7 +50,7 @@ class DevOpsSandbox(Environment):
|
|
| 50 |
super().__init__()
|
| 51 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 52 |
self._current_dir: str = "/app"
|
| 53 |
-
self._last_score: float = 0.
|
| 54 |
self._current_task: str = "hard"
|
| 55 |
|
| 56 |
# When running on Windows locally, `/app` and `/app_backup` don't exist naturally,
|
|
@@ -80,7 +80,7 @@ class DevOpsSandbox(Environment):
|
|
| 80 |
"""Reset the environment state by copying the backup to the working dir."""
|
| 81 |
eid = episode_id or str(uuid4())
|
| 82 |
self._state = State(episode_id=eid, step_count=0)
|
| 83 |
-
self._last_score = 0.
|
| 84 |
self._current_dir = self._app_dir
|
| 85 |
self._current_task = kwargs.get("task_name", "hard")
|
| 86 |
|
|
@@ -144,10 +144,10 @@ class DevOpsSandbox(Environment):
|
|
| 144 |
stderr="",
|
| 145 |
current_dir=self._current_dir,
|
| 146 |
task_id=self._current_task,
|
| 147 |
-
grader_score=0.
|
| 148 |
grader_feedback="Episode started. Fix the bugs!",
|
| 149 |
done=False,
|
| 150 |
-
reward=0.
|
| 151 |
)
|
| 152 |
|
| 153 |
def step(
|
|
@@ -169,7 +169,7 @@ class DevOpsSandbox(Environment):
|
|
| 169 |
grader_score=self._last_score,
|
| 170 |
grader_feedback="No command executed.",
|
| 171 |
done=False,
|
| 172 |
-
reward=0.
|
| 173 |
)
|
| 174 |
|
| 175 |
# Handle 'cd' commands manually since subprocess run is transient
|
|
@@ -194,7 +194,7 @@ class DevOpsSandbox(Environment):
|
|
| 194 |
score, feedback = self._grade()
|
| 195 |
reward = max(0.0, score - self._last_score)
|
| 196 |
self._last_score = score
|
| 197 |
-
episode_done = (score >=
|
| 198 |
|
| 199 |
return TerminalObservation(
|
| 200 |
stdout=stdout,
|
|
@@ -217,7 +217,7 @@ class DevOpsSandbox(Environment):
|
|
| 217 |
score, feedback = self._grade()
|
| 218 |
reward = max(0.0, score - self._last_score)
|
| 219 |
self._last_score = score
|
| 220 |
-
episode_done = (score >=
|
| 221 |
|
| 222 |
return TerminalObservation(
|
| 223 |
stdout=stdout,
|
|
|
|
| 50 |
super().__init__()
|
| 51 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 52 |
self._current_dir: str = "/app"
|
| 53 |
+
self._last_score: float = 0.01
|
| 54 |
self._current_task: str = "hard"
|
| 55 |
|
| 56 |
# When running on Windows locally, `/app` and `/app_backup` don't exist naturally,
|
|
|
|
| 80 |
"""Reset the environment state by copying the backup to the working dir."""
|
| 81 |
eid = episode_id or str(uuid4())
|
| 82 |
self._state = State(episode_id=eid, step_count=0)
|
| 83 |
+
self._last_score = 0.01
|
| 84 |
self._current_dir = self._app_dir
|
| 85 |
self._current_task = kwargs.get("task_name", "hard")
|
| 86 |
|
|
|
|
| 144 |
stderr="",
|
| 145 |
current_dir=self._current_dir,
|
| 146 |
task_id=self._current_task,
|
| 147 |
+
grader_score=0.01,
|
| 148 |
grader_feedback="Episode started. Fix the bugs!",
|
| 149 |
done=False,
|
| 150 |
+
reward=0.01,
|
| 151 |
)
|
| 152 |
|
| 153 |
def step(
|
|
|
|
| 169 |
grader_score=self._last_score,
|
| 170 |
grader_feedback="No command executed.",
|
| 171 |
done=False,
|
| 172 |
+
reward=0.01,
|
| 173 |
)
|
| 174 |
|
| 175 |
# Handle 'cd' commands manually since subprocess run is transient
|
|
|
|
| 194 |
score, feedback = self._grade()
|
| 195 |
reward = max(0.0, score - self._last_score)
|
| 196 |
self._last_score = score
|
| 197 |
+
episode_done = (score >= 0.99) or (self._state.step_count >= MAX_STEPS)
|
| 198 |
|
| 199 |
return TerminalObservation(
|
| 200 |
stdout=stdout,
|
|
|
|
| 217 |
score, feedback = self._grade()
|
| 218 |
reward = max(0.0, score - self._last_score)
|
| 219 |
self._last_score = score
|
| 220 |
+
episode_done = (score >= 0.99) or (self._state.step_count >= MAX_STEPS)
|
| 221 |
|
| 222 |
return TerminalObservation(
|
| 223 |
stdout=stdout,
|