Spaces:

Rayugacodes
/

Breach-OS

Sleeping

App Files Files Community

Naman Gupta commited on Apr 6

Commit

55c0431

1 Parent(s): ff8a596

Fix inference grade call when episode not done; update baseline scores from real run

Browse files

Files changed (2) hide show

README.md +1 -1
inference.py +26 -0

README.md CHANGED Viewed

@@ -248,7 +248,7 @@ Scores produced by running `inference.py` with `llama-3.1-8b-instant` against th
 |--------|-------|--------------|-------|
 | Easy   | 0.55  | D            | 5     |
 | Medium | 0.63  | C            | 8     |
-| Hard   | 0.51  | D            | 10    |
 Run baseline yourself:
 ```bash

 |--------|-------|--------------|-------|
 | Easy   | 0.55  | D            | 5     |
 | Medium | 0.63  | C            | 8     |
+| Hard   | 0.63  | C            | 10    |
 Run baseline yourself:
 ```bash

inference.py CHANGED Viewed

@@ -128,6 +128,32 @@ async def run_task(task: str) -> None:
                 if done:
                     break
             # Grade the episode
             try:
                 grade_resp = await http.post("/grade")

                 if done:
                     break
+            # If episode didn't end naturally, exhaust remaining turns to close it
+            state_resp = await http.get("/state")
+            if state_resp.json().get("is_active", False):
+                while True:
+                    step += 1
+                    action = generate_attack(defender_resp, step, prev_success)
+                    action_str = f"strategy={action['strategy_type']} category={action['target_category']} intensity={action['intensity']}"
+                    try:
+                        step_resp = await http.post("/step", json=action)
+                        step_data = step_resp.json()
+                        obs       = step_data["observation"]
+                        reward    = round(float(step_data.get("reward", 0.0)), 2)
+                        done      = bool(obs["episode_done"])
+                        defender_resp = obs["defender_response"]
+                        prev_success  = obs["attack_success_estimate"]
+                        rewards.append(reward)
+                        print(
+                            f"[STEP] step={step} action={action_str!r} "
+                            f"reward={reward:.2f} done={str(done).lower()} error=null",
+                            flush=True,
+                        )
+                    except Exception:
+                        done = True
+                    if done:
+                        break
             # Grade the episode
             try:
                 grade_resp = await http.post("/grade")