DEVessi commited on
Commit
4fd3038
·
verified ·
1 Parent(s): b761978

Upload folder using huggingface_hub

Browse files
inference.py CHANGED
@@ -185,7 +185,7 @@ def main():
185
  print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
186
  except Exception as e:
187
  # Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
188
- print(f"[END] success=false steps=0 score=0.00 rewards=0.00", flush=True)
189
 
190
  if __name__ == "__main__":
191
  main()
 
185
  print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
186
  except Exception as e:
187
  # Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
188
+ print(f"[END] success=false steps=0 score=0.01 rewards=0.01", flush=True)
189
 
190
  if __name__ == "__main__":
191
  main()
server/devops_sandbox_environment.py CHANGED
@@ -50,7 +50,7 @@ class DevOpsSandbox(Environment):
50
  super().__init__()
51
  self._state = State(episode_id=str(uuid4()), step_count=0)
52
  self._current_dir: str = "/app"
53
- self._last_score: float = 0.0
54
  self._current_task: str = "hard"
55
 
56
  # When running on Windows locally, `/app` and `/app_backup` don't exist naturally,
@@ -80,7 +80,7 @@ class DevOpsSandbox(Environment):
80
  """Reset the environment state by copying the backup to the working dir."""
81
  eid = episode_id or str(uuid4())
82
  self._state = State(episode_id=eid, step_count=0)
83
- self._last_score = 0.0
84
  self._current_dir = self._app_dir
85
  self._current_task = kwargs.get("task_name", "hard")
86
 
@@ -144,10 +144,10 @@ class DevOpsSandbox(Environment):
144
  stderr="",
145
  current_dir=self._current_dir,
146
  task_id=self._current_task,
147
- grader_score=0.0,
148
  grader_feedback="Episode started. Fix the bugs!",
149
  done=False,
150
- reward=0.0,
151
  )
152
 
153
  def step(
@@ -169,7 +169,7 @@ class DevOpsSandbox(Environment):
169
  grader_score=self._last_score,
170
  grader_feedback="No command executed.",
171
  done=False,
172
- reward=0.0,
173
  )
174
 
175
  # Handle 'cd' commands manually since subprocess run is transient
@@ -194,7 +194,7 @@ class DevOpsSandbox(Environment):
194
  score, feedback = self._grade()
195
  reward = max(0.0, score - self._last_score)
196
  self._last_score = score
197
- episode_done = (score >= 1.0) or (self._state.step_count >= MAX_STEPS)
198
 
199
  return TerminalObservation(
200
  stdout=stdout,
@@ -217,7 +217,7 @@ class DevOpsSandbox(Environment):
217
  score, feedback = self._grade()
218
  reward = max(0.0, score - self._last_score)
219
  self._last_score = score
220
- episode_done = (score >= 1.0) or (self._state.step_count >= MAX_STEPS)
221
 
222
  return TerminalObservation(
223
  stdout=stdout,
 
50
  super().__init__()
51
  self._state = State(episode_id=str(uuid4()), step_count=0)
52
  self._current_dir: str = "/app"
53
+ self._last_score: float = 0.01
54
  self._current_task: str = "hard"
55
 
56
  # When running on Windows locally, `/app` and `/app_backup` don't exist naturally,
 
80
  """Reset the environment state by copying the backup to the working dir."""
81
  eid = episode_id or str(uuid4())
82
  self._state = State(episode_id=eid, step_count=0)
83
+ self._last_score = 0.01
84
  self._current_dir = self._app_dir
85
  self._current_task = kwargs.get("task_name", "hard")
86
 
 
144
  stderr="",
145
  current_dir=self._current_dir,
146
  task_id=self._current_task,
147
+ grader_score=0.01,
148
  grader_feedback="Episode started. Fix the bugs!",
149
  done=False,
150
+ reward=0.01,
151
  )
152
 
153
  def step(
 
169
  grader_score=self._last_score,
170
  grader_feedback="No command executed.",
171
  done=False,
172
+ reward=0.01,
173
  )
174
 
175
  # Handle 'cd' commands manually since subprocess run is transient
 
194
  score, feedback = self._grade()
195
  reward = max(0.0, score - self._last_score)
196
  self._last_score = score
197
+ episode_done = (score >= 0.99) or (self._state.step_count >= MAX_STEPS)
198
 
199
  return TerminalObservation(
200
  stdout=stdout,
 
217
  score, feedback = self._grade()
218
  reward = max(0.0, score - self._last_score)
219
  self._last_score = score
220
+ episode_done = (score >= 0.99) or (self._state.step_count >= MAX_STEPS)
221
 
222
  return TerminalObservation(
223
  stdout=stdout,