Spaces:

scaler-hack
/

scaler-openenv

Sleeping

App Files Files Community

suraj-01 commited on Apr 12

Commit

eea342f

1 Parent(s): dd69fa9

I

Browse files

Files changed (7) hide show

EasterEgg.jpeg +0 -0
inference.py +5 -4
requirements.txt +1 -0
rewards/reward.py +5 -2
src/adaptive_alert_triage/env.py +8 -8
src/adaptive_alert_triage/models.py +8 -1
src/adaptive_alert_triage/validate.py +1 -1

EasterEgg.jpeg ADDED Viewed

inference.py CHANGED Viewed

@@ -67,7 +67,7 @@ except ImportError:
 API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
 MODEL_NAME   = os.environ.get("MODEL_NAME",   "gpt-4o")
 HF_TOKEN     = os.environ.get("HF_TOKEN")
-_API_KEY     = HF_TOKEN or os.environ.get("OPENAI_API_KEY", "no-key-set")
 # ── Task registry ─────────────────────────────────────────────────────────────
 _TASKS: Dict[str, Dict[str, Any]] = {
@@ -171,6 +171,7 @@ class LLMTriageAgent:
     def act(self, obs: Observation) -> Action:
         if not obs.alerts:
             raise ValueError("act() called with empty alerts")
         text = self._call_api(_build_user_message(obs))
         if text is None:
             self.fallbacks += 1
@@ -306,7 +307,7 @@ def run_episode(agent: LLMTriageAgent, task_id: str, episode: int, seed: int) ->
 def run_baseline(
     tasks:        List[str],
-    num_episodes: int = 3,
     seed_offset:  int = 42,
 ) -> Dict[str, Any]:
     """
@@ -383,9 +384,9 @@ if __name__ == "__main__":
     )
     p.add_argument("--task",  choices=["easy", "medium", "hard"],
                    default=None, help="Single task (default: all three)")
-    p.add_argument("--n",     type=int, default=3,
                    metavar="N",
-                   help="Episodes per task (default: 3 — fits in 20 min budget)")
     p.add_argument("--seed",  type=int, default=42,
                    help="Base random seed (default: 42)")
     args = p.parse_args()

 API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
 MODEL_NAME   = os.environ.get("MODEL_NAME",   "gpt-4o")
 HF_TOKEN     = os.environ.get("HF_TOKEN")
+_API_KEY     = os.environ.get("API_KEY") or HF_TOKEN or os.environ.get("OPENAI_API_KEY", "no-key-set")
 # ── Task registry ─────────────────────────────────────────────────────────────
 _TASKS: Dict[str, Dict[str, Any]] = {
     def act(self, obs: Observation) -> Action:
         if not obs.alerts:
             raise ValueError("act() called with empty alerts")
         text = self._call_api(_build_user_message(obs))
         if text is None:
             self.fallbacks += 1
 def run_baseline(
     tasks:        List[str],
+    num_episodes: int = 1,
     seed_offset:  int = 42,
 ) -> Dict[str, Any]:
     """
     )
     p.add_argument("--task",  choices=["easy", "medium", "hard"],
                    default=None, help="Single task (default: all three)")
+    p.add_argument("--n",     type=int, default=1,
                    metavar="N",
+                   help="Episodes per task (default: 1 — strict API budget)")
     p.add_argument("--seed",  type=int, default=42,
                    help="Base random seed (default: 42)")
     args = p.parse_args()

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@
 # ── Core environment ──────────────────────────────────────────────────────────
 numpy>=1.24.0
 openenv>=0.1.0
 pydantic>=2.0.0
 # ── Web framework (FastAPI server) ────────────────────────────────────────────

 # ── Core environment ──────────────────────────────────────────────────────────
 numpy>=1.24.0
 openenv>=0.1.0
+openenv-core>=0.2.0
 pydantic>=2.0.0
 # ── Web framework (FastAPI server) ────────────────────────────────────────────

rewards/reward.py CHANGED Viewed

@@ -315,6 +315,7 @@ def calculate_reward(
         components = {k: v * multiplier for k, v in components.items()}
     total_reward: float = sum(components.values())
     # -----------------------------------------------------------------------
     # Info payload — consumed by graders and evaluation scripts
@@ -331,10 +332,11 @@ def calculate_reward(
             action_type, is_critical, is_false_positive, resource_constrained
         ),
         "task_multiplier":   multiplier,
     }
     return Reward(
-        value=total_reward,
         components=components,
         info=info,
     )
@@ -611,7 +613,8 @@ if __name__ == "__main__":
     for desc, act, alert, cfg, expected in cases:
         action = Action(alert_id=alert.id, action_type=act)
         result = calculate_reward(action, alert, cfg)
-        ok = abs(result.value - expected) < 1e-4
         status = "PASS" if ok else "FAIL"
         if not ok:
             all_pass = False

         components = {k: v * multiplier for k, v in components.items()}
     total_reward: float = sum(components.values())
+    norm_reward: float = max(0.01, min(0.99, (total_reward + 40.0) / 80.0))
     # -----------------------------------------------------------------------
     # Info payload — consumed by graders and evaluation scripts
             action_type, is_critical, is_false_positive, resource_constrained
         ),
         "task_multiplier":   multiplier,
+        "raw_reward":        total_reward,
     }
     return Reward(
+        value=norm_reward,
         components=components,
         info=info,
     )
     for desc, act, alert, cfg, expected in cases:
         action = Action(alert_id=alert.id, action_type=act)
         result = calculate_reward(action, alert, cfg)
+        normalized_expected = max(0.01, min(0.99, (expected + 40.0) / 80.0))
+        ok = abs(result.value - normalized_expected) < 1e-4
         status = "PASS" if ok else "FAIL"
         if not ok:
             all_pass = False

src/adaptive_alert_triage/env.py CHANGED Viewed

@@ -76,22 +76,22 @@ except ImportError:
 _TASK_CONFIGS: Dict[str, Dict[str, Any]] = {
     "easy": {
-        "max_steps": 30,
-        "failure_threshold": 5,
         "max_investigations": None,   # unconstrained
         "correlation_probability": 0.10,
         "description": "Basic alert prioritisation — no resource constraint.",
     },
     "medium": {
-        "max_steps": 40,
-        "failure_threshold": 5,
         "max_investigations": 3,      # K = 3 per step
         "correlation_probability": 0.20,
         "description": "Resource-constrained triage — K=3 investigations/step.",
     },
     "hard": {
-        "max_steps": 50,
-        "failure_threshold": 3,       # stricter
         "max_investigations": 3,
         "correlation_probability": 0.40,
         "description": (
@@ -267,7 +267,7 @@ class AdaptiveAlertTriageEnv(gym.Env):
         alert = self._get_alert_by_id(action.alert_id)
         if alert is None:
             reward = Reward(
-                value=-5.0,
                 components={"invalid_action": -5.0},
                 info={"error": f"Alert ID '{action.alert_id}' not found in queue"},
             )
@@ -284,7 +284,7 @@ class AdaptiveAlertTriageEnv(gym.Env):
         ):
             if self.investigations_used >= self.max_investigations_per_step:
                 reward = Reward(
-                    value=-3.0,
                     components={"resource_budget_exceeded": -3.0},
                     info={
                         "error": "Investigation budget exhausted for this step",

 _TASK_CONFIGS: Dict[str, Dict[str, Any]] = {
     "easy": {
+        "max_steps": 10,
+        "failure_threshold": 2,
         "max_investigations": None,   # unconstrained
         "correlation_probability": 0.10,
         "description": "Basic alert prioritisation — no resource constraint.",
     },
     "medium": {
+        "max_steps": 15,
+        "failure_threshold": 3,
         "max_investigations": 3,      # K = 3 per step
         "correlation_probability": 0.20,
         "description": "Resource-constrained triage — K=3 investigations/step.",
     },
     "hard": {
+        "max_steps": 20,
+        "failure_threshold": 2,       # stricter
         "max_investigations": 3,
         "correlation_probability": 0.40,
         "description": (
         alert = self._get_alert_by_id(action.alert_id)
         if alert is None:
             reward = Reward(
+                value=0.01,
                 components={"invalid_action": -5.0},
                 info={"error": f"Alert ID '{action.alert_id}' not found in queue"},
             )
         ):
             if self.investigations_used >= self.max_investigations_per_step:
                 reward = Reward(
+                    value=0.01,
                     components={"resource_budget_exceeded": -3.0},
                     info={
                         "error": "Investigation budget exhausted for this step",

src/adaptive_alert_triage/models.py CHANGED Viewed

@@ -222,7 +222,14 @@ class Reward(BaseModel):
         info:       Debugging / logging extras (ground-truth reveal, etc.).
     """
-    value: float = Field(..., description="Total scalar reward")
     components: Dict[str, float] = Field(
         default_factory=dict, description="Per-component reward breakdown"
     )

         info:       Debugging / logging extras (ground-truth reveal, etc.).
     """
+    value: float = Field(..., ge=0.0, le=1.0, description="Total scalar reward in [0.0, 1.0]")
+    @field_validator("value", mode="before")
+    @classmethod
+    def clamp_reward_value(cls, v: float) -> float:
+        """Silently clamp reward value to [0.01, 0.99] — strict (0, 1) bounds."""
+        return float(max(0.01, min(0.99, float(v))))
     components: Dict[str, float] = Field(
         default_factory=dict, description="Per-component reward breakdown"
     )

src/adaptive_alert_triage/validate.py CHANGED Viewed

@@ -123,7 +123,7 @@ class OpenEnvValidator:
             action_ok = restored.alert_id == action.alert_id
             self.check("Action serialization round-trip", action_ok)
-            reward   = Reward(value=10.0, components={"test": 10.0})
             restored = Reward.model_validate_json(reward.model_dump_json())
             reward_ok = restored.value == reward.value
             self.check("Reward serialization round-trip", reward_ok)

             action_ok = restored.alert_id == action.alert_id
             self.check("Action serialization round-trip", action_ok)
+            reward   = Reward(value=0.5, components={"test": 0.5})
             restored = Reward.model_validate_json(reward.model_dump_json())
             reward_ok = restored.value == reward.value
             self.check("Reward serialization round-trip", reward_ok)