Spaces:
Running
Running
Initial Set-up
Browse files- .DS_Store +0 -0
- .gitignore +11 -0
- Dockerfile +5 -1
- README.md +190 -0
- app.py +14 -1
- comms_bus.py +64 -0
- environment.py +29 -8
- inference.py +19 -23
- openenv.yaml +2 -2
- pyproject.toml +30 -0
- requirements.txt +5 -5
- scenarios.py +3 -2
- server/__init__.py +1 -0
- server/app.py +16 -0
- specialists.py +18 -4
- task_graph.py +33 -10
- tests/test_environment.py +55 -0
- tests/test_graders.py +46 -0
- tests/test_specialists.py +35 -0
- training/colab_notebook.ipynb +54 -0
- training/evaluate.py +153 -0
- training/train.py +118 -0
- uv.lock +3 -0
.DS_Store
DELETED
|
Binary file (6.15 kB)
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
.pytest_cache/
|
| 5 |
+
.mypy_cache/
|
| 6 |
+
.ruff_cache/
|
| 7 |
+
.venv/
|
| 8 |
+
outputs/
|
| 9 |
+
.env
|
| 10 |
+
.env.*
|
| 11 |
+
!.env.example
|
Dockerfile
CHANGED
|
@@ -14,9 +14,13 @@ COPY graders.py .
|
|
| 14 |
COPY specialists.py .
|
| 15 |
COPY trust_ledger.py .
|
| 16 |
COPY task_graph.py .
|
|
|
|
| 17 |
COPY scenarios.py .
|
| 18 |
COPY openenv.yaml .
|
| 19 |
COPY inference.py .
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Create outputs directory for baseline scores
|
| 22 |
RUN mkdir -p outputs
|
|
@@ -29,4 +33,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
| 29 |
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
|
| 30 |
|
| 31 |
# Start server
|
| 32 |
-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
|
|
|
| 14 |
COPY specialists.py .
|
| 15 |
COPY trust_ledger.py .
|
| 16 |
COPY task_graph.py .
|
| 17 |
+
COPY comms_bus.py .
|
| 18 |
COPY scenarios.py .
|
| 19 |
COPY openenv.yaml .
|
| 20 |
COPY inference.py .
|
| 21 |
+
COPY README.md .
|
| 22 |
+
COPY pyproject.toml .
|
| 23 |
+
COPY server ./server
|
| 24 |
|
| 25 |
# Create outputs directory for baseline scores
|
| 26 |
RUN mkdir -p outputs
|
|
|
|
| 33 |
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
|
| 34 |
|
| 35 |
# Start server
|
| 36 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
README.md
CHANGED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SENTINEL
|
| 2 |
+
|
| 3 |
+
Self-Evolving Network for Training Intelligent Agents Under Adversarial Long-Horizon Tasks.
|
| 4 |
+
|
| 5 |
+
SENTINEL is an OpenEnv-compatible RL environment for one core skill: training an orchestrator to decide who to trust, when to verify, how to recover, and how to finish long multi-agent work when specialist agents are unreliable or adversarial.
|
| 6 |
+
|
| 7 |
+
## Why It Matters
|
| 8 |
+
|
| 9 |
+
Modern agent systems fail in the same pattern:
|
| 10 |
+
|
| 11 |
+
1. A long task is decomposed into many steps.
|
| 12 |
+
2. The orchestrator delegates to sub-agents or tools.
|
| 13 |
+
3. One specialist returns a confident but wrong result.
|
| 14 |
+
4. The system trusts it, builds on it, and drifts into failure.
|
| 15 |
+
|
| 16 |
+
SENTINEL turns that failure mode into a trainable environment. The model only sees behavior: returned outcomes, confidence, stakes, history, and trust scores. It never sees hidden specialist identities.
|
| 17 |
+
|
| 18 |
+
## Environment Shape
|
| 19 |
+
|
| 20 |
+
- API: `reset()`, `step(action)`, `state()`
|
| 21 |
+
- Runtime: FastAPI on port `7860`
|
| 22 |
+
- Tasks: `task1`, `task2`, `task3`
|
| 23 |
+
- Specialists: 5 scripted FSM agents with shuffled hidden profiles
|
| 24 |
+
- Rewards: per-step reward plus terminal score, normalized to `0.0-1.0`
|
| 25 |
+
- Dataset: 120 abstract multi-agent scenarios
|
| 26 |
+
|
| 27 |
+
## Specialist Behaviors
|
| 28 |
+
|
| 29 |
+
| Public Slot | Hidden Behavior |
|
| 30 |
+
| --- | --- |
|
| 31 |
+
| S0-S4 | Public ids are shuffled every episode |
|
| 32 |
+
|
| 33 |
+
Hidden profiles:
|
| 34 |
+
|
| 35 |
+
- `AccurateSlow`: 90 percent accurate, costs extra steps.
|
| 36 |
+
- `OverconfidentFast`: quick and confident, wrong 40 percent of the time.
|
| 37 |
+
- `DomainBound`: strong on analysis/verification, weak elsewhere.
|
| 38 |
+
- `Adversarial`: benign at low stakes, poisons high-stakes steps.
|
| 39 |
+
- `Degrading`: strong early, gets worse with reuse.
|
| 40 |
+
|
| 41 |
+
## Tasks
|
| 42 |
+
|
| 43 |
+
| Task | Difficulty | Goal |
|
| 44 |
+
| --- | --- | --- |
|
| 45 |
+
| `task1` | Easy | Learn basic delegation over a 10-subtask chain. |
|
| 46 |
+
| `task2` | Medium | Complete a 15-subtask DAG and build calibrated trust. |
|
| 47 |
+
| `task3` | Hard | Complete a 20-subtask adversarial mission under high-stakes poison attempts. |
|
| 48 |
+
|
| 49 |
+
## Reward Model
|
| 50 |
+
|
| 51 |
+
Rewards are deterministic and boundary-exclusive per step: `(0.01, 0.99)`.
|
| 52 |
+
|
| 53 |
+
Task 3 terminal score:
|
| 54 |
+
|
| 55 |
+
```text
|
| 56 |
+
0.35 * completion_rate
|
| 57 |
+
+ 0.30 * adversarial_detection_rate
|
| 58 |
+
+ 0.25 * trust_calibration
|
| 59 |
+
+ 0.10 * efficiency
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
The episode `score` exposed in `info` and inference logs is normalized to `0.0-1.0`.
|
| 63 |
+
|
| 64 |
+
## API
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
curl http://localhost:7860/health
|
| 68 |
+
curl http://localhost:7860/
|
| 69 |
+
curl http://localhost:7860/metadata
|
| 70 |
+
curl http://localhost:7860/tasks
|
| 71 |
+
curl http://localhost:7860/schema
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
Start an episode:
|
| 75 |
+
|
| 76 |
+
```bash
|
| 77 |
+
curl -X POST http://localhost:7860/reset \
|
| 78 |
+
-H "Content-Type: application/json" \
|
| 79 |
+
-d '{"task_type":"task3","seed":42}'
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
Step:
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
curl -X POST "http://localhost:7860/step?session_id=<SESSION_ID>" \
|
| 86 |
+
-H "Content-Type: application/json" \
|
| 87 |
+
-d '{
|
| 88 |
+
"session_id":"<SESSION_ID>",
|
| 89 |
+
"task_type":"task3",
|
| 90 |
+
"action_type":"delegate",
|
| 91 |
+
"specialist_id":"S2",
|
| 92 |
+
"reasoning":"S2 has the best observed trust score"
|
| 93 |
+
}'
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
## Project Structure
|
| 97 |
+
|
| 98 |
+
```text
|
| 99 |
+
sentinel-env/
|
| 100 |
+
|-- app.py
|
| 101 |
+
|-- environment.py
|
| 102 |
+
|-- models.py
|
| 103 |
+
|-- graders.py
|
| 104 |
+
|-- specialists.py
|
| 105 |
+
|-- trust_ledger.py
|
| 106 |
+
|-- task_graph.py
|
| 107 |
+
|-- comms_bus.py
|
| 108 |
+
|-- scenarios.py
|
| 109 |
+
|-- inference.py
|
| 110 |
+
|-- openenv.yaml
|
| 111 |
+
|-- Dockerfile
|
| 112 |
+
|-- requirements.txt
|
| 113 |
+
|-- training/
|
| 114 |
+
| |-- train.py
|
| 115 |
+
| |-- evaluate.py
|
| 116 |
+
| `-- colab_notebook.ipynb
|
| 117 |
+
`-- tests/
|
| 118 |
+
|-- test_environment.py
|
| 119 |
+
|-- test_graders.py
|
| 120 |
+
`-- test_specialists.py
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
## Local Setup
|
| 124 |
+
|
| 125 |
+
```bash
|
| 126 |
+
python3 -m venv .venv
|
| 127 |
+
source .venv/bin/activate
|
| 128 |
+
python -m pip install --upgrade pip
|
| 129 |
+
pip install -r requirements.txt
|
| 130 |
+
pip install pytest
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
Run checks:
|
| 134 |
+
|
| 135 |
+
```bash
|
| 136 |
+
python -m py_compile app.py environment.py models.py graders.py specialists.py trust_ledger.py task_graph.py scenarios.py inference.py
|
| 137 |
+
python -m pytest -q
|
| 138 |
+
python inference.py
|
| 139 |
+
python training/evaluate.py --episodes 20 --task task3
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
Run the server:
|
| 143 |
+
|
| 144 |
+
```bash
|
| 145 |
+
uvicorn app:app --host 0.0.0.0 --port 7860
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
Validate with OpenEnv:
|
| 149 |
+
|
| 150 |
+
```bash
|
| 151 |
+
pip install openenv-core==0.2.3
|
| 152 |
+
openenv validate . --json
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
Docker:
|
| 156 |
+
|
| 157 |
+
```bash
|
| 158 |
+
docker build -t sentinel-env .
|
| 159 |
+
docker run -p 7860:7860 sentinel-env
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
## Baselines
|
| 163 |
+
|
| 164 |
+
`inference.py` runs 30 deterministic heuristic episodes and emits only strict hackathon logs:
|
| 165 |
+
|
| 166 |
+
```text
|
| 167 |
+
[START] task=SCN-TASK3-001 env=sentinel-env model=heuristic-baseline
|
| 168 |
+
[STEP] step=1 action=delegate:S0 reward=0.99 done=false error=null
|
| 169 |
+
[END] success=true steps=20 score=0.812 rewards=...
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
`training/evaluate.py` compares:
|
| 173 |
+
|
| 174 |
+
- `random`
|
| 175 |
+
- `heuristic`
|
| 176 |
+
- `oracle_lite`
|
| 177 |
+
|
| 178 |
+
The evaluator writes `outputs/evaluation_results.json` for demo charts.
|
| 179 |
+
|
| 180 |
+
## Hackathon Alignment
|
| 181 |
+
|
| 182 |
+
- Theme 1: multi-agent interaction, partial observability, adversarial specialist, trust calibration.
|
| 183 |
+
- Theme 2: long-horizon task graphs with delayed terminal reward and failure recovery.
|
| 184 |
+
- Theme 3.1: professional agent orchestration workflow with API-style actions.
|
| 185 |
+
- Theme 4: profile shuffle creates a self-resetting curriculum.
|
| 186 |
+
- Theme 5: targets a real AI systems failure: blind trust inside agent pipelines.
|
| 187 |
+
|
| 188 |
+
Winning demo line:
|
| 189 |
+
|
| 190 |
+
> Agents fail because they trust blindly. SENTINEL trains skepticism, recovery, and oversight.
|
app.py
CHANGED
|
@@ -59,6 +59,19 @@ def health():
|
|
| 59 |
return {"status": "ok", "environment": "sentinel-env", "version": "1.0.0"}
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
@app.get("/metadata")
|
| 63 |
def metadata():
|
| 64 |
summary = scenario_summary()
|
|
@@ -203,4 +216,4 @@ def mcp(body: dict[str, Any]):
|
|
| 203 |
if __name__ == "__main__":
|
| 204 |
import uvicorn
|
| 205 |
port = int(os.environ.get("PORT", 7860))
|
| 206 |
-
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
|
|
|
|
| 59 |
return {"status": "ok", "environment": "sentinel-env", "version": "1.0.0"}
|
| 60 |
|
| 61 |
|
| 62 |
+
@app.get("/")
|
| 63 |
+
def root():
|
| 64 |
+
return {
|
| 65 |
+
"name": "sentinel-env",
|
| 66 |
+
"status": "ok",
|
| 67 |
+
"summary": (
|
| 68 |
+
"SENTINEL trains an orchestrator to calibrate trust, verify risky "
|
| 69 |
+
"outputs, recover from failures, and finish long multi-agent tasks."
|
| 70 |
+
),
|
| 71 |
+
"routes": ["/health", "/metadata", "/tasks", "/schema", "/grader", "/reset", "/step", "/state"],
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
@app.get("/metadata")
|
| 76 |
def metadata():
|
| 77 |
summary = scenario_summary()
|
|
|
|
| 216 |
if __name__ == "__main__":
|
| 217 |
import uvicorn
|
| 218 |
port = int(os.environ.get("PORT", 7860))
|
| 219 |
+
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
|
comms_bus.py
CHANGED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from time import time
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class CommsMessage:
|
| 10 |
+
sender: str
|
| 11 |
+
receiver: str
|
| 12 |
+
payload: dict[str, Any]
|
| 13 |
+
timestamp: float = field(default_factory=time)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class CommsBus:
|
| 17 |
+
"""
|
| 18 |
+
Lightweight message log for partial-observability experiments.
|
| 19 |
+
|
| 20 |
+
The environment keeps hidden specialist metadata internally, while the
|
| 21 |
+
orchestrator-facing view only exposes the public response, confidence, and
|
| 22 |
+
outcome summary. This makes the trust problem behavioral instead of identity
|
| 23 |
+
based.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, partial_observability: bool = True) -> None:
|
| 27 |
+
self.partial_observability = partial_observability
|
| 28 |
+
self._messages: list[CommsMessage] = []
|
| 29 |
+
|
| 30 |
+
def reset(self) -> None:
|
| 31 |
+
self._messages.clear()
|
| 32 |
+
|
| 33 |
+
def route(self, sender: str, receiver: str, payload: dict[str, Any]) -> dict[str, Any]:
|
| 34 |
+
visible_payload = self._filter_payload(payload)
|
| 35 |
+
self._messages.append(
|
| 36 |
+
CommsMessage(sender=sender, receiver=receiver, payload=visible_payload)
|
| 37 |
+
)
|
| 38 |
+
return visible_payload
|
| 39 |
+
|
| 40 |
+
def history(self, receiver: str | None = None) -> list[dict[str, Any]]:
|
| 41 |
+
messages = self._messages
|
| 42 |
+
if receiver is not None:
|
| 43 |
+
messages = [msg for msg in messages if msg.receiver == receiver]
|
| 44 |
+
return [
|
| 45 |
+
{
|
| 46 |
+
"sender": msg.sender,
|
| 47 |
+
"receiver": msg.receiver,
|
| 48 |
+
"payload": dict(msg.payload),
|
| 49 |
+
"timestamp": round(msg.timestamp, 3),
|
| 50 |
+
}
|
| 51 |
+
for msg in messages
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
def _filter_payload(self, payload: dict[str, Any]) -> dict[str, Any]:
|
| 55 |
+
if not self.partial_observability:
|
| 56 |
+
return dict(payload)
|
| 57 |
+
|
| 58 |
+
hidden_keys = {
|
| 59 |
+
"internal_id",
|
| 60 |
+
"internal_profile",
|
| 61 |
+
"ground_truth_reliability",
|
| 62 |
+
"adversarial_slot",
|
| 63 |
+
}
|
| 64 |
+
return {key: value for key, value in payload.items() if key not in hidden_keys}
|
environment.py
CHANGED
|
@@ -60,6 +60,7 @@ class SentinelEnv:
|
|
| 60 |
self.step_count: int = 0
|
| 61 |
self.max_steps: int = 0
|
| 62 |
self.total_reward: float = 0.0
|
|
|
|
| 63 |
self.last_reward: float = 0.0
|
| 64 |
self.done: bool = False
|
| 65 |
self.episode_status: str = "active"
|
|
@@ -96,6 +97,7 @@ class SentinelEnv:
|
|
| 96 |
self.step_count = 0
|
| 97 |
self.max_steps = MAX_STEPS[scenario["task_type"]]
|
| 98 |
self.total_reward = 0.0
|
|
|
|
| 99 |
self.last_reward = 0.0
|
| 100 |
self.done = False
|
| 101 |
self.episode_status = "active"
|
|
@@ -149,7 +151,7 @@ class SentinelEnv:
|
|
| 149 |
subtask = node.subtask
|
| 150 |
stakes = subtask["stakes"]
|
| 151 |
|
| 152 |
-
|
| 153 |
|
| 154 |
# --- Execute specialist or self-solve ---
|
| 155 |
if action_type == "skip":
|
|
@@ -160,8 +162,8 @@ class SentinelEnv:
|
|
| 160 |
|
| 161 |
elif action_type == "solve_independently":
|
| 162 |
# Agent solves itself — always correct (no specialist involved)
|
| 163 |
-
# But costs 2 steps (enforced via max_steps budget pressure)
|
| 164 |
-
|
| 165 |
outcome = 1.0
|
| 166 |
was_adversarial = False
|
| 167 |
self._graph.record_outcome(subtask["id"], outcome, "SELF")
|
|
@@ -170,7 +172,7 @@ class SentinelEnv:
|
|
| 170 |
elif action_type == "verify":
|
| 171 |
# First get result, then verify (costs +1 step)
|
| 172 |
result = self._pool.execute(specialist_id, subtask["description"], stakes, self._rng)
|
| 173 |
-
|
| 174 |
outcome = result.outcome if not result.is_adversarial else 0.0
|
| 175 |
was_adversarial = result.is_adversarial
|
| 176 |
# Verification means agent caught adversarial — treat as detection
|
|
@@ -182,12 +184,15 @@ class SentinelEnv:
|
|
| 182 |
|
| 183 |
else: # delegate
|
| 184 |
result = self._pool.execute(specialist_id, subtask["description"], stakes, self._rng)
|
|
|
|
| 185 |
was_adversarial = result.is_adversarial
|
| 186 |
outcome = 0.0 if was_adversarial else result.outcome
|
| 187 |
self._graph.record_outcome(subtask["id"], outcome, specialist_id, was_adversarial)
|
| 188 |
self._ledger.update(specialist_id, result.outcome, stakes)
|
| 189 |
self.last_action_summary = f"Delegated to {specialist_id} on {subtask['id']}"
|
| 190 |
|
|
|
|
|
|
|
| 191 |
# --- Grade this step ---
|
| 192 |
reward_value, reason, breakdown = self._grade_step(
|
| 193 |
task_type, action_type, specialist_id, outcome,
|
|
@@ -196,6 +201,7 @@ class SentinelEnv:
|
|
| 196 |
|
| 197 |
self.last_reward = reward_value
|
| 198 |
self.total_reward += reward_value
|
|
|
|
| 199 |
|
| 200 |
# --- Check episode end ---
|
| 201 |
all_done = self._graph.is_done()
|
|
@@ -226,6 +232,7 @@ class SentinelEnv:
|
|
| 226 |
"step_count": self.step_count,
|
| 227 |
"max_steps": self.max_steps,
|
| 228 |
"total_reward": round(self.total_reward, 4),
|
|
|
|
| 229 |
"done": self.done,
|
| 230 |
"scenario_id": self.current_scenario["scenario_id"],
|
| 231 |
"task_type": self.current_scenario["task_type"],
|
|
@@ -301,11 +308,11 @@ class SentinelEnv:
|
|
| 301 |
terminal_breakdown = {"completion_rate": round(completion, 3)}
|
| 302 |
elif task_type == "task2":
|
| 303 |
terminal_value, terminal_reason, terminal_breakdown = grade_task2_terminal(
|
| 304 |
-
self._graph, self._ledger,
|
| 305 |
)
|
| 306 |
else:
|
| 307 |
terminal_value, terminal_reason, terminal_breakdown = grade_task3_terminal(
|
| 308 |
-
self._graph, self._ledger,
|
| 309 |
self.step_count, self.max_steps,
|
| 310 |
)
|
| 311 |
|
|
@@ -315,6 +322,7 @@ class SentinelEnv:
|
|
| 315 |
|
| 316 |
self.last_reward = terminal_value
|
| 317 |
self.total_reward += terminal_value
|
|
|
|
| 318 |
self.done = True
|
| 319 |
self.episode_status = "failed" if forced_end else "completed"
|
| 320 |
|
|
@@ -337,6 +345,9 @@ class SentinelEnv:
|
|
| 337 |
extra_info: dict | None = None,
|
| 338 |
) -> dict:
|
| 339 |
node = self._graph.current_node() if self._graph and not done else None
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
obs = {
|
| 342 |
"session_id": self.session_id,
|
|
@@ -345,7 +356,7 @@ class SentinelEnv:
|
|
| 345 |
"difficulty": self._difficulty(),
|
| 346 |
"task_description": self.current_scenario["description"] if self.current_scenario else "",
|
| 347 |
"current_subtask": node.subtask["description"] if node else "All subtasks complete.",
|
| 348 |
-
"subtask_index":
|
| 349 |
"subtasks_total": self._graph.subtasks_total() if self._graph else 0,
|
| 350 |
"subtasks_remaining": self._graph.subtasks_remaining() if self._graph else 0,
|
| 351 |
"available_specialists": self._pool.available_ids(),
|
|
@@ -370,6 +381,7 @@ class SentinelEnv:
|
|
| 370 |
"step_count": self.step_count,
|
| 371 |
"max_steps": self.max_steps,
|
| 372 |
"total_reward": round(self.total_reward, 4),
|
|
|
|
| 373 |
}
|
| 374 |
if extra_info:
|
| 375 |
info.update(extra_info)
|
|
@@ -379,4 +391,13 @@ class SentinelEnv:
|
|
| 379 |
def _difficulty(self) -> str:
|
| 380 |
return {"task1": "easy", "task2": "medium", "task3": "hard"}.get(
|
| 381 |
self.current_scenario["task_type"] if self.current_scenario else "task3", "hard"
|
| 382 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
self.step_count: int = 0
|
| 61 |
self.max_steps: int = 0
|
| 62 |
self.total_reward: float = 0.0
|
| 63 |
+
self.reward_events: int = 0
|
| 64 |
self.last_reward: float = 0.0
|
| 65 |
self.done: bool = False
|
| 66 |
self.episode_status: str = "active"
|
|
|
|
| 97 |
self.step_count = 0
|
| 98 |
self.max_steps = MAX_STEPS[scenario["task_type"]]
|
| 99 |
self.total_reward = 0.0
|
| 100 |
+
self.reward_events = 0
|
| 101 |
self.last_reward = 0.0
|
| 102 |
self.done = False
|
| 103 |
self.episode_status = "active"
|
|
|
|
| 151 |
subtask = node.subtask
|
| 152 |
stakes = subtask["stakes"]
|
| 153 |
|
| 154 |
+
step_cost = 1
|
| 155 |
|
| 156 |
# --- Execute specialist or self-solve ---
|
| 157 |
if action_type == "skip":
|
|
|
|
| 162 |
|
| 163 |
elif action_type == "solve_independently":
|
| 164 |
# Agent solves itself — always correct (no specialist involved)
|
| 165 |
+
# But costs 2 steps (enforced via max_steps budget pressure).
|
| 166 |
+
step_cost = 2
|
| 167 |
outcome = 1.0
|
| 168 |
was_adversarial = False
|
| 169 |
self._graph.record_outcome(subtask["id"], outcome, "SELF")
|
|
|
|
| 172 |
elif action_type == "verify":
|
| 173 |
# First get result, then verify (costs +1 step)
|
| 174 |
result = self._pool.execute(specialist_id, subtask["description"], stakes, self._rng)
|
| 175 |
+
step_cost = int(result.metadata.get("step_cost", 1)) + 1
|
| 176 |
outcome = result.outcome if not result.is_adversarial else 0.0
|
| 177 |
was_adversarial = result.is_adversarial
|
| 178 |
# Verification means agent caught adversarial — treat as detection
|
|
|
|
| 184 |
|
| 185 |
else: # delegate
|
| 186 |
result = self._pool.execute(specialist_id, subtask["description"], stakes, self._rng)
|
| 187 |
+
step_cost = int(result.metadata.get("step_cost", 1))
|
| 188 |
was_adversarial = result.is_adversarial
|
| 189 |
outcome = 0.0 if was_adversarial else result.outcome
|
| 190 |
self._graph.record_outcome(subtask["id"], outcome, specialist_id, was_adversarial)
|
| 191 |
self._ledger.update(specialist_id, result.outcome, stakes)
|
| 192 |
self.last_action_summary = f"Delegated to {specialist_id} on {subtask['id']}"
|
| 193 |
|
| 194 |
+
self.step_count += max(1, step_cost)
|
| 195 |
+
|
| 196 |
# --- Grade this step ---
|
| 197 |
reward_value, reason, breakdown = self._grade_step(
|
| 198 |
task_type, action_type, specialist_id, outcome,
|
|
|
|
| 201 |
|
| 202 |
self.last_reward = reward_value
|
| 203 |
self.total_reward += reward_value
|
| 204 |
+
self.reward_events += 1
|
| 205 |
|
| 206 |
# --- Check episode end ---
|
| 207 |
all_done = self._graph.is_done()
|
|
|
|
| 232 |
"step_count": self.step_count,
|
| 233 |
"max_steps": self.max_steps,
|
| 234 |
"total_reward": round(self.total_reward, 4),
|
| 235 |
+
"score": round(self.normalized_score(), 4),
|
| 236 |
"done": self.done,
|
| 237 |
"scenario_id": self.current_scenario["scenario_id"],
|
| 238 |
"task_type": self.current_scenario["task_type"],
|
|
|
|
| 308 |
terminal_breakdown = {"completion_rate": round(completion, 3)}
|
| 309 |
elif task_type == "task2":
|
| 310 |
terminal_value, terminal_reason, terminal_breakdown = grade_task2_terminal(
|
| 311 |
+
self._graph, self._ledger, self._public_ground_truth_reliability()
|
| 312 |
)
|
| 313 |
else:
|
| 314 |
terminal_value, terminal_reason, terminal_breakdown = grade_task3_terminal(
|
| 315 |
+
self._graph, self._ledger, self._public_ground_truth_reliability(),
|
| 316 |
self.step_count, self.max_steps,
|
| 317 |
)
|
| 318 |
|
|
|
|
| 322 |
|
| 323 |
self.last_reward = terminal_value
|
| 324 |
self.total_reward += terminal_value
|
| 325 |
+
self.reward_events += 1
|
| 326 |
self.done = True
|
| 327 |
self.episode_status = "failed" if forced_end else "completed"
|
| 328 |
|
|
|
|
| 345 |
extra_info: dict | None = None,
|
| 346 |
) -> dict:
|
| 347 |
node = self._graph.current_node() if self._graph and not done else None
|
| 348 |
+
subtask_index = self._graph.node_index(node.subtask["id"]) if node else (
|
| 349 |
+
self._graph.subtasks_total() if self._graph else 0
|
| 350 |
+
)
|
| 351 |
|
| 352 |
obs = {
|
| 353 |
"session_id": self.session_id,
|
|
|
|
| 356 |
"difficulty": self._difficulty(),
|
| 357 |
"task_description": self.current_scenario["description"] if self.current_scenario else "",
|
| 358 |
"current_subtask": node.subtask["description"] if node else "All subtasks complete.",
|
| 359 |
+
"subtask_index": subtask_index,
|
| 360 |
"subtasks_total": self._graph.subtasks_total() if self._graph else 0,
|
| 361 |
"subtasks_remaining": self._graph.subtasks_remaining() if self._graph else 0,
|
| 362 |
"available_specialists": self._pool.available_ids(),
|
|
|
|
| 381 |
"step_count": self.step_count,
|
| 382 |
"max_steps": self.max_steps,
|
| 383 |
"total_reward": round(self.total_reward, 4),
|
| 384 |
+
"score": round(self.normalized_score(), 4),
|
| 385 |
}
|
| 386 |
if extra_info:
|
| 387 |
info.update(extra_info)
|
|
|
|
| 391 |
def _difficulty(self) -> str:
|
| 392 |
return {"task1": "easy", "task2": "medium", "task3": "hard"}.get(
|
| 393 |
self.current_scenario["task_type"] if self.current_scenario else "task3", "hard"
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
def normalized_score(self) -> float:
|
| 397 |
+
"""Episode score normalized to 0.0-1.0 for judging logs."""
|
| 398 |
+
if self.reward_events <= 0:
|
| 399 |
+
return 0.0
|
| 400 |
+
return max(0.0, min(1.0, self.total_reward / self.reward_events))
|
| 401 |
+
|
| 402 |
+
def _public_ground_truth_reliability(self) -> dict[str, float]:
|
| 403 |
+
return self._pool.public_ground_truth_reliability(_GROUND_TRUTH_RELIABILITY)
|
inference.py
CHANGED
|
@@ -24,8 +24,6 @@ from __future__ import annotations
|
|
| 24 |
|
| 25 |
import json
|
| 26 |
import os
|
| 27 |
-
import sys
|
| 28 |
-
import time
|
| 29 |
from pathlib import Path
|
| 30 |
|
| 31 |
# ---------------------------------------------------------------------------
|
|
@@ -54,13 +52,14 @@ class EnvClient:
|
|
| 54 |
self._env = SentinelEnv()
|
| 55 |
self.session_id: str = ""
|
| 56 |
|
| 57 |
-
def reset(self, task_type: str, seed: int | None = None) -> dict:
|
|
|
|
| 58 |
if USE_REMOTE:
|
| 59 |
-
r = self._client.post("/reset", json=
|
| 60 |
r.raise_for_status()
|
| 61 |
result = r.json()
|
| 62 |
else:
|
| 63 |
-
result = self._env.reset(task_type=task_type, seed=seed)
|
| 64 |
self.session_id = result["info"]["session_id"]
|
| 65 |
return result
|
| 66 |
|
|
@@ -126,13 +125,14 @@ def run_episode(
|
|
| 126 |
scenario_id: str,
|
| 127 |
seed: int,
|
| 128 |
) -> dict:
|
| 129 |
-
result = client.reset(task_type=task_type, seed=seed)
|
| 130 |
session_id = client.session_id
|
| 131 |
|
| 132 |
print(f"[START] task={scenario_id} env=sentinel-env model=heuristic-baseline")
|
| 133 |
|
| 134 |
step_num = 0
|
| 135 |
-
|
|
|
|
| 136 |
|
| 137 |
while True:
|
| 138 |
obs = result["observation"]
|
|
@@ -142,13 +142,14 @@ def run_episode(
|
|
| 142 |
reward = result["reward"]["value"]
|
| 143 |
done = result["done"]
|
| 144 |
step_num += 1
|
| 145 |
-
|
|
|
|
| 146 |
|
| 147 |
action_str = f"{action['action_type']}:{action.get('specialist_id','SELF')}"
|
| 148 |
print(
|
| 149 |
f"[STEP] step={step_num} "
|
| 150 |
f"action={action_str} "
|
| 151 |
-
f"reward={reward:.
|
| 152 |
f"done={str(done).lower()} "
|
| 153 |
f"error=null"
|
| 154 |
)
|
|
@@ -162,19 +163,21 @@ def run_episode(
|
|
| 162 |
detections = info.get("adversarial_detections", 0)
|
| 163 |
poisonings = info.get("adversarial_poisonings", 0)
|
| 164 |
trust_snap = info.get("trust_snapshot", {})
|
|
|
|
| 165 |
|
| 166 |
print(
|
| 167 |
f"[END] success=true "
|
| 168 |
f"steps={step_num} "
|
| 169 |
-
f"score={
|
| 170 |
-
f"rewards={
|
| 171 |
)
|
| 172 |
|
| 173 |
return {
|
| 174 |
"scenario_id": scenario_id,
|
| 175 |
"task_type": task_type,
|
| 176 |
"steps": step_num,
|
| 177 |
-
"
|
|
|
|
| 178 |
"completion_rate": round(completion, 4),
|
| 179 |
"adversarial_detections": detections,
|
| 180 |
"adversarial_poisonings": poisonings,
|
|
@@ -198,27 +201,21 @@ def main():
|
|
| 198 |
result = run_episode(client, task_type, scenario_id, seed=i)
|
| 199 |
all_results.append(result)
|
| 200 |
except Exception as e:
|
| 201 |
-
print(f"[STEP] step=0 action=error reward=0.
|
| 202 |
-
print(f"[END] success=false steps=0 score=0.
|
| 203 |
|
| 204 |
-
# Summary
|
| 205 |
if all_results:
|
| 206 |
by_task: dict[str, list] = {"task1": [], "task2": [], "task3": []}
|
| 207 |
for r in all_results:
|
| 208 |
-
by_task[r["task_type"]].append(r["
|
| 209 |
|
| 210 |
-
print("\n=== Baseline Summary ===")
|
| 211 |
overall_scores = []
|
| 212 |
for task_type, scores in by_task.items():
|
| 213 |
if scores:
|
| 214 |
-
avg = sum(scores) / len(scores)
|
| 215 |
overall_scores.extend(scores)
|
| 216 |
-
print(f" {task_type}: episodes={len(scores)} avg_score={avg:.4f}")
|
| 217 |
|
| 218 |
overall_avg = sum(overall_scores) / len(overall_scores) if overall_scores else 0.0
|
| 219 |
-
print(f" OVERALL: episodes={len(overall_scores)} avg_score={overall_avg:.4f}")
|
| 220 |
|
| 221 |
-
# Save results
|
| 222 |
out_path = Path("outputs/baseline_scores.json")
|
| 223 |
out_path.parent.mkdir(exist_ok=True)
|
| 224 |
with open(out_path, "w") as f:
|
|
@@ -232,8 +229,7 @@ def main():
|
|
| 232 |
},
|
| 233 |
"episodes": all_results,
|
| 234 |
}, f, indent=2)
|
| 235 |
-
print(f"\nResults saved to {out_path}")
|
| 236 |
|
| 237 |
|
| 238 |
if __name__ == "__main__":
|
| 239 |
-
main()
|
|
|
|
| 24 |
|
| 25 |
import json
|
| 26 |
import os
|
|
|
|
|
|
|
| 27 |
from pathlib import Path
|
| 28 |
|
| 29 |
# ---------------------------------------------------------------------------
|
|
|
|
| 52 |
self._env = SentinelEnv()
|
| 53 |
self.session_id: str = ""
|
| 54 |
|
| 55 |
+
def reset(self, task_type: str, scenario_id: str | None = None, seed: int | None = None) -> dict:
|
| 56 |
+
payload = {"task_type": task_type, "scenario_id": scenario_id, "seed": seed}
|
| 57 |
if USE_REMOTE:
|
| 58 |
+
r = self._client.post("/reset", json=payload)
|
| 59 |
r.raise_for_status()
|
| 60 |
result = r.json()
|
| 61 |
else:
|
| 62 |
+
result = self._env.reset(task_type=task_type, scenario_id=scenario_id, seed=seed)
|
| 63 |
self.session_id = result["info"]["session_id"]
|
| 64 |
return result
|
| 65 |
|
|
|
|
| 125 |
scenario_id: str,
|
| 126 |
seed: int,
|
| 127 |
) -> dict:
|
| 128 |
+
result = client.reset(task_type=task_type, scenario_id=scenario_id, seed=seed)
|
| 129 |
session_id = client.session_id
|
| 130 |
|
| 131 |
print(f"[START] task={scenario_id} env=sentinel-env model=heuristic-baseline")
|
| 132 |
|
| 133 |
step_num = 0
|
| 134 |
+
rewards: list[float] = []
|
| 135 |
+
final_score = 0.0
|
| 136 |
|
| 137 |
while True:
|
| 138 |
obs = result["observation"]
|
|
|
|
| 142 |
reward = result["reward"]["value"]
|
| 143 |
done = result["done"]
|
| 144 |
step_num += 1
|
| 145 |
+
rewards.append(reward)
|
| 146 |
+
final_score = result["info"].get("score", 0.0)
|
| 147 |
|
| 148 |
action_str = f"{action['action_type']}:{action.get('specialist_id','SELF')}"
|
| 149 |
print(
|
| 150 |
f"[STEP] step={step_num} "
|
| 151 |
f"action={action_str} "
|
| 152 |
+
f"reward={reward:.2f} "
|
| 153 |
f"done={str(done).lower()} "
|
| 154 |
f"error=null"
|
| 155 |
)
|
|
|
|
| 163 |
detections = info.get("adversarial_detections", 0)
|
| 164 |
poisonings = info.get("adversarial_poisonings", 0)
|
| 165 |
trust_snap = info.get("trust_snapshot", {})
|
| 166 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 167 |
|
| 168 |
print(
|
| 169 |
f"[END] success=true "
|
| 170 |
f"steps={step_num} "
|
| 171 |
+
f"score={final_score:.3f} "
|
| 172 |
+
f"rewards={rewards_str}"
|
| 173 |
)
|
| 174 |
|
| 175 |
return {
|
| 176 |
"scenario_id": scenario_id,
|
| 177 |
"task_type": task_type,
|
| 178 |
"steps": step_num,
|
| 179 |
+
"score": round(final_score, 4),
|
| 180 |
+
"total_reward": round(info.get("total_reward", 0.0), 4),
|
| 181 |
"completion_rate": round(completion, 4),
|
| 182 |
"adversarial_detections": detections,
|
| 183 |
"adversarial_poisonings": poisonings,
|
|
|
|
| 201 |
result = run_episode(client, task_type, scenario_id, seed=i)
|
| 202 |
all_results.append(result)
|
| 203 |
except Exception as e:
|
| 204 |
+
print(f"[STEP] step=0 action=error reward=0.00 done=true error={e}")
|
| 205 |
+
print(f"[END] success=false steps=0 score=0.000 rewards=0.00")
|
| 206 |
|
|
|
|
| 207 |
if all_results:
|
| 208 |
by_task: dict[str, list] = {"task1": [], "task2": [], "task3": []}
|
| 209 |
for r in all_results:
|
| 210 |
+
by_task[r["task_type"]].append(r["score"])
|
| 211 |
|
|
|
|
| 212 |
overall_scores = []
|
| 213 |
for task_type, scores in by_task.items():
|
| 214 |
if scores:
|
|
|
|
| 215 |
overall_scores.extend(scores)
|
|
|
|
| 216 |
|
| 217 |
overall_avg = sum(overall_scores) / len(overall_scores) if overall_scores else 0.0
|
|
|
|
| 218 |
|
|
|
|
| 219 |
out_path = Path("outputs/baseline_scores.json")
|
| 220 |
out_path.parent.mkdir(exist_ok=True)
|
| 221 |
with open(out_path, "w") as f:
|
|
|
|
| 229 |
},
|
| 230 |
"episodes": all_results,
|
| 231 |
}, f, indent=2)
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
if __name__ == "__main__":
|
| 235 |
+
main()
|
openenv.yaml
CHANGED
|
@@ -140,9 +140,9 @@ baseline:
|
|
| 140 |
script: inference.py
|
| 141 |
required_env_vars: [API_BASE_URL, MODEL_NAME, HF_TOKEN]
|
| 142 |
optional_env_vars: [ENV_URL]
|
| 143 |
-
latest_local_score: 0.
|
| 144 |
latest_local_episodes: 30
|
| 145 |
reproducibility:
|
| 146 |
inference_temperature: 0.0
|
| 147 |
agent: heuristic-trust-weighted
|
| 148 |
-
dataset_order: fixed SCN-TASK*-001 through SCN-TASK*-010 per task
|
|
|
|
| 140 |
script: inference.py
|
| 141 |
required_env_vars: [API_BASE_URL, MODEL_NAME, HF_TOKEN]
|
| 142 |
optional_env_vars: [ENV_URL]
|
| 143 |
+
latest_local_score: 0.7942
|
| 144 |
latest_local_episodes: 30
|
| 145 |
reproducibility:
|
| 146 |
inference_temperature: 0.0
|
| 147 |
agent: heuristic-trust-weighted
|
| 148 |
+
dataset_order: fixed SCN-TASK*-001 through SCN-TASK*-010 per task
|
pyproject.toml
CHANGED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "sentinel-env"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
+
description = "OpenEnv-compatible multi-agent trust calibration environment."
|
| 5 |
+
requires-python = ">=3.10,<3.14"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"fastapi>=0.115,<1",
|
| 8 |
+
"uvicorn[standard]>=0.35,<1",
|
| 9 |
+
"pydantic>=2.7,<3",
|
| 10 |
+
"httpx>=0.28.1,<1",
|
| 11 |
+
"python-multipart==0.0.9",
|
| 12 |
+
"openenv-core>=0.2.0",
|
| 13 |
+
]
|
| 14 |
+
|
| 15 |
+
[project.scripts]
|
| 16 |
+
server = "server.app:main"
|
| 17 |
+
|
| 18 |
+
[project.optional-dependencies]
|
| 19 |
+
dev = ["pytest>=8.0.0"]
|
| 20 |
+
training = [
|
| 21 |
+
"trl",
|
| 22 |
+
"transformers",
|
| 23 |
+
"datasets",
|
| 24 |
+
"accelerate",
|
| 25 |
+
"unsloth",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
[tool.pytest.ini_options]
|
| 29 |
+
testpaths = ["tests"]
|
| 30 |
+
python_files = ["test_*.py"]
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
fastapi=
|
| 2 |
-
uvicorn[standard]=
|
| 3 |
-
pydantic=
|
| 4 |
-
httpx=
|
| 5 |
-
python-multipart==0.0.9
|
|
|
|
| 1 |
+
fastapi>=0.115,<1
|
| 2 |
+
uvicorn[standard]>=0.35,<1
|
| 3 |
+
pydantic>=2.7,<3
|
| 4 |
+
httpx>=0.28.1,<1
|
| 5 |
+
python-multipart==0.0.9
|
scenarios.py
CHANGED
|
@@ -205,9 +205,10 @@ def _generate_scenarios(
|
|
| 205 |
layout: list[tuple],
|
| 206 |
count: int = 40,
|
| 207 |
) -> list[Scenario]:
|
|
|
|
| 208 |
scenarios = []
|
| 209 |
for i in range(count):
|
| 210 |
-
jittered = _jitter_stakes(layout, seed=i * 100 +
|
| 211 |
sid = f"SCN-{task_type.upper()}-{i+1:03d}"
|
| 212 |
scenarios.append(
|
| 213 |
_build_scenario(sid, task_type, jittered, f"#{i+1:03d}")
|
|
@@ -263,4 +264,4 @@ def scenario_summary() -> dict:
|
|
| 263 |
"task2": len(_TASK2_LAYOUT),
|
| 264 |
"task3": len(_TASK3_LAYOUT),
|
| 265 |
},
|
| 266 |
-
}
|
|
|
|
| 205 |
layout: list[tuple],
|
| 206 |
count: int = 40,
|
| 207 |
) -> list[Scenario]:
|
| 208 |
+
stable_offsets = {"task1": 101, "task2": 202, "task3": 303}
|
| 209 |
scenarios = []
|
| 210 |
for i in range(count):
|
| 211 |
+
jittered = _jitter_stakes(layout, seed=i * 100 + stable_offsets[task_type])
|
| 212 |
sid = f"SCN-{task_type.upper()}-{i+1:03d}"
|
| 213 |
scenarios.append(
|
| 214 |
_build_scenario(sid, task_type, jittered, f"#{i+1:03d}")
|
|
|
|
| 264 |
"task2": len(_TASK2_LAYOUT),
|
| 265 |
"task3": len(_TASK3_LAYOUT),
|
| 266 |
},
|
| 267 |
+
}
|
server/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""OpenEnv server package wrapper."""
|
server/app.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from app import app
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def main() -> None:
|
| 9 |
+
import uvicorn
|
| 10 |
+
|
| 11 |
+
port = int(os.environ.get("PORT", 7860))
|
| 12 |
+
uvicorn.run("server.app:app", host="0.0.0.0", port=port, reload=False)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
if __name__ == "__main__":
|
| 16 |
+
main()
|
specialists.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
import random
|
| 4 |
from dataclasses import dataclass, field
|
| 5 |
-
from typing import Protocol
|
| 6 |
|
| 7 |
|
| 8 |
# ---------------------------------------------------------------------------
|
|
@@ -105,9 +104,10 @@ class DomainBoundSpecialist(BaseSpecialist):
|
|
| 105 |
pass
|
| 106 |
|
| 107 |
def execute(self, subtask: str, stakes: float, rng: random.Random) -> SpecialistResult:
|
| 108 |
-
#
|
|
|
|
| 109 |
in_domain = any(kw in subtask.lower() for kw in
|
| 110 |
-
["
|
| 111 |
accuracy = self.IN_DOMAIN_ACCURACY if in_domain else self.OUT_DOMAIN_ACCURACY
|
| 112 |
correct = rng.random() < accuracy
|
| 113 |
return SpecialistResult(
|
|
@@ -272,4 +272,18 @@ class SpecialistPool:
|
|
| 272 |
return result
|
| 273 |
|
| 274 |
def available_ids(self) -> list[str]:
|
| 275 |
-
return list(self._profile.keys())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import random
|
| 4 |
from dataclasses import dataclass, field
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
# ---------------------------------------------------------------------------
|
|
|
|
| 104 |
pass
|
| 105 |
|
| 106 |
def execute(self, subtask: str, stakes: float, rng: random.Random) -> SpecialistResult:
|
| 107 |
+
# SENTINEL's abstract scenarios expose domain through task wording.
|
| 108 |
+
# This specialist is strong at analysis/verification and weak elsewhere.
|
| 109 |
in_domain = any(kw in subtask.lower() for kw in
|
| 110 |
+
["analyze", "analysis", "identify", "pattern", "verify", "correctness", "assess"])
|
| 111 |
accuracy = self.IN_DOMAIN_ACCURACY if in_domain else self.OUT_DOMAIN_ACCURACY
|
| 112 |
correct = rng.random() < accuracy
|
| 113 |
return SpecialistResult(
|
|
|
|
| 272 |
return result
|
| 273 |
|
| 274 |
def available_ids(self) -> list[str]:
|
| 275 |
+
return list(self._profile.keys())
|
| 276 |
+
|
| 277 |
+
def internal_profile(self) -> dict[str, str]:
|
| 278 |
+
"""Public specialist id -> hidden internal behavior id."""
|
| 279 |
+
return dict(self._profile)
|
| 280 |
+
|
| 281 |
+
def public_ground_truth_reliability(self, internal_reliability: dict[str, float]) -> dict[str, float]:
|
| 282 |
+
"""
|
| 283 |
+
Map hidden internal behavior reliabilities onto public slots.
|
| 284 |
+
The reward engine uses this; the orchestrator never sees it.
|
| 285 |
+
"""
|
| 286 |
+
return {
|
| 287 |
+
public_id: internal_reliability.get(internal_id, 0.5)
|
| 288 |
+
for public_id, internal_id in self._profile.items()
|
| 289 |
+
}
|
task_graph.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
-
from dataclasses import dataclass
|
| 4 |
from typing import Optional
|
| 5 |
|
| 6 |
from scenarios import Scenario, SubTask
|
|
@@ -18,6 +18,8 @@ class TaskNode:
|
|
| 18 |
specialist_used: str = ""
|
| 19 |
attempts: int = 0
|
| 20 |
was_adversarial: bool = False
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
# ---------------------------------------------------------------------------
|
|
@@ -28,6 +30,8 @@ class TaskNode:
|
|
| 28 |
# ---------------------------------------------------------------------------
|
| 29 |
|
| 30 |
class TaskGraph:
|
|
|
|
|
|
|
| 31 |
def __init__(self, scenario: Scenario) -> None:
|
| 32 |
self._scenario = scenario
|
| 33 |
self._nodes: dict[str, TaskNode] = {}
|
|
@@ -50,6 +54,8 @@ class TaskGraph:
|
|
| 50 |
"""
|
| 51 |
for sid in self._order:
|
| 52 |
node = self._nodes[sid]
|
|
|
|
|
|
|
| 53 |
if node.status == "pending" and self._deps_met(sid):
|
| 54 |
node.status = "ready"
|
| 55 |
if node.status == "ready":
|
|
@@ -57,17 +63,24 @@ class TaskGraph:
|
|
| 57 |
return None
|
| 58 |
|
| 59 |
def _deps_met(self, subtask_id: str) -> bool:
|
| 60 |
-
"""All dependencies
|
| 61 |
deps = self._nodes[subtask_id].subtask["depends_on"]
|
| 62 |
return all(
|
| 63 |
-
self.
|
| 64 |
for dep in deps
|
| 65 |
if dep in self._nodes
|
| 66 |
)
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
def is_done(self) -> bool:
|
| 69 |
return all(
|
| 70 |
-
n.status in ("completed", "
|
|
|
|
| 71 |
for n in self._nodes.values()
|
| 72 |
)
|
| 73 |
|
|
@@ -81,8 +94,7 @@ class TaskGraph:
|
|
| 81 |
Avoided = node was adversarial AND orchestrator chose VERIFY or SOLVE_INDEPENDENTLY.
|
| 82 |
"""
|
| 83 |
return sum(
|
| 84 |
-
|
| 85 |
-
if n.was_adversarial and n.status == "completed" and n.outcome > 0.0
|
| 86 |
)
|
| 87 |
|
| 88 |
def adversarial_poisonings(self) -> int:
|
|
@@ -90,14 +102,14 @@ class TaskGraph:
|
|
| 90 |
Count of adversarial results that slipped through unchecked.
|
| 91 |
"""
|
| 92 |
return sum(
|
| 93 |
-
|
| 94 |
-
if n.was_adversarial and n.outcome == 0.0
|
| 95 |
)
|
| 96 |
|
| 97 |
def subtasks_remaining(self) -> int:
|
| 98 |
return sum(
|
| 99 |
1 for n in self._nodes.values()
|
| 100 |
if n.status in ("pending", "ready", "in_progress")
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
def subtasks_completed(self) -> int:
|
|
@@ -106,6 +118,12 @@ class TaskGraph:
|
|
| 106 |
def subtasks_total(self) -> int:
|
| 107 |
return len(self._nodes)
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
def high_stakes_nodes(self) -> list[TaskNode]:
|
| 110 |
return [n for n in self._nodes.values() if n.subtask["stakes"] >= 0.70]
|
| 111 |
|
|
@@ -126,7 +144,11 @@ class TaskGraph:
|
|
| 126 |
node.outcome = outcome
|
| 127 |
node.specialist_used = specialist_id
|
| 128 |
node.attempts += 1
|
| 129 |
-
node.was_adversarial = was_adversarial
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
node.status = "completed" if outcome > 0.0 else "failed"
|
| 131 |
|
| 132 |
def skip_node(self, subtask_id: str) -> None:
|
|
@@ -143,6 +165,7 @@ class TaskGraph:
|
|
| 143 |
"task_type": self._scenario["task_type"],
|
| 144 |
"subtasks_total": self.subtasks_total(),
|
| 145 |
"subtasks_completed": self.subtasks_completed(),
|
|
|
|
| 146 |
"subtasks_remaining": self.subtasks_remaining(),
|
| 147 |
"completion_rate": round(self.completion_rate(), 3),
|
| 148 |
"adversarial_detections": self.adversarial_detections(),
|
|
@@ -151,4 +174,4 @@ class TaskGraph:
|
|
| 151 |
}
|
| 152 |
|
| 153 |
def node_statuses(self) -> dict[str, str]:
|
| 154 |
-
return {sid: n.status for sid, n in self._nodes.items()}
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
from typing import Optional
|
| 5 |
|
| 6 |
from scenarios import Scenario, SubTask
|
|
|
|
| 18 |
specialist_used: str = ""
|
| 19 |
attempts: int = 0
|
| 20 |
was_adversarial: bool = False
|
| 21 |
+
adversarial_detection_count: int = 0
|
| 22 |
+
adversarial_poisoning_count: int = 0
|
| 23 |
|
| 24 |
|
| 25 |
# ---------------------------------------------------------------------------
|
|
|
|
| 30 |
# ---------------------------------------------------------------------------
|
| 31 |
|
| 32 |
class TaskGraph:
|
| 33 |
+
MAX_ATTEMPTS_PER_NODE = 2
|
| 34 |
+
|
| 35 |
def __init__(self, scenario: Scenario) -> None:
|
| 36 |
self._scenario = scenario
|
| 37 |
self._nodes: dict[str, TaskNode] = {}
|
|
|
|
| 54 |
"""
|
| 55 |
for sid in self._order:
|
| 56 |
node = self._nodes[sid]
|
| 57 |
+
if node.status == "failed" and node.attempts < self.MAX_ATTEMPTS_PER_NODE:
|
| 58 |
+
node.status = "ready"
|
| 59 |
if node.status == "pending" and self._deps_met(sid):
|
| 60 |
node.status = "ready"
|
| 61 |
if node.status == "ready":
|
|
|
|
| 63 |
return None
|
| 64 |
|
| 65 |
def _deps_met(self, subtask_id: str) -> bool:
|
| 66 |
+
"""All dependencies must be resolved before downstream work starts."""
|
| 67 |
deps = self._nodes[subtask_id].subtask["depends_on"]
|
| 68 |
return all(
|
| 69 |
+
self._is_dependency_resolved(dep)
|
| 70 |
for dep in deps
|
| 71 |
if dep in self._nodes
|
| 72 |
)
|
| 73 |
|
| 74 |
+
def _is_dependency_resolved(self, subtask_id: str) -> bool:
|
| 75 |
+
node = self._nodes[subtask_id]
|
| 76 |
+
if node.status in ("completed", "skipped"):
|
| 77 |
+
return True
|
| 78 |
+
return node.status == "failed" and node.attempts >= self.MAX_ATTEMPTS_PER_NODE
|
| 79 |
+
|
| 80 |
def is_done(self) -> bool:
|
| 81 |
return all(
|
| 82 |
+
n.status in ("completed", "skipped")
|
| 83 |
+
or (n.status == "failed" and n.attempts >= self.MAX_ATTEMPTS_PER_NODE)
|
| 84 |
for n in self._nodes.values()
|
| 85 |
)
|
| 86 |
|
|
|
|
| 94 |
Avoided = node was adversarial AND orchestrator chose VERIFY or SOLVE_INDEPENDENTLY.
|
| 95 |
"""
|
| 96 |
return sum(
|
| 97 |
+
n.adversarial_detection_count for n in self._nodes.values()
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
def adversarial_poisonings(self) -> int:
|
|
|
|
| 102 |
Count of adversarial results that slipped through unchecked.
|
| 103 |
"""
|
| 104 |
return sum(
|
| 105 |
+
n.adversarial_poisoning_count for n in self._nodes.values()
|
|
|
|
| 106 |
)
|
| 107 |
|
| 108 |
def subtasks_remaining(self) -> int:
|
| 109 |
return sum(
|
| 110 |
1 for n in self._nodes.values()
|
| 111 |
if n.status in ("pending", "ready", "in_progress")
|
| 112 |
+
or (n.status == "failed" and n.attempts < self.MAX_ATTEMPTS_PER_NODE)
|
| 113 |
)
|
| 114 |
|
| 115 |
def subtasks_completed(self) -> int:
|
|
|
|
| 118 |
def subtasks_total(self) -> int:
|
| 119 |
return len(self._nodes)
|
| 120 |
|
| 121 |
+
def subtasks_failed(self) -> int:
|
| 122 |
+
return sum(1 for n in self._nodes.values() if n.status == "failed")
|
| 123 |
+
|
| 124 |
+
def node_index(self, subtask_id: str) -> int:
|
| 125 |
+
return self._order.index(subtask_id)
|
| 126 |
+
|
| 127 |
def high_stakes_nodes(self) -> list[TaskNode]:
|
| 128 |
return [n for n in self._nodes.values() if n.subtask["stakes"] >= 0.70]
|
| 129 |
|
|
|
|
| 144 |
node.outcome = outcome
|
| 145 |
node.specialist_used = specialist_id
|
| 146 |
node.attempts += 1
|
| 147 |
+
node.was_adversarial = node.was_adversarial or was_adversarial
|
| 148 |
+
if was_adversarial and outcome > 0.0:
|
| 149 |
+
node.adversarial_detection_count += 1
|
| 150 |
+
elif was_adversarial:
|
| 151 |
+
node.adversarial_poisoning_count += 1
|
| 152 |
node.status = "completed" if outcome > 0.0 else "failed"
|
| 153 |
|
| 154 |
def skip_node(self, subtask_id: str) -> None:
|
|
|
|
| 165 |
"task_type": self._scenario["task_type"],
|
| 166 |
"subtasks_total": self.subtasks_total(),
|
| 167 |
"subtasks_completed": self.subtasks_completed(),
|
| 168 |
+
"subtasks_failed": self.subtasks_failed(),
|
| 169 |
"subtasks_remaining": self.subtasks_remaining(),
|
| 170 |
"completion_rate": round(self.completion_rate(), 3),
|
| 171 |
"adversarial_detections": self.adversarial_detections(),
|
|
|
|
| 174 |
}
|
| 175 |
|
| 176 |
def node_statuses(self) -> dict[str, str]:
|
| 177 |
+
return {sid: n.status for sid, n in self._nodes.items()}
|
tests/test_environment.py
CHANGED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import unittest
|
| 4 |
+
|
| 5 |
+
from environment import SentinelEnv
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class EnvironmentTests(unittest.TestCase):
|
| 9 |
+
def test_reset_observation_has_integer_subtask_index(self) -> None:
|
| 10 |
+
env = SentinelEnv()
|
| 11 |
+
result = env.reset(task_type="task3", seed=3)
|
| 12 |
+
|
| 13 |
+
self.assertEqual(result["observation"]["subtask_index"], 0)
|
| 14 |
+
self.assertEqual(result["info"]["score"], 0.0)
|
| 15 |
+
self.assertFalse(result["done"])
|
| 16 |
+
|
| 17 |
+
def test_accurate_slow_public_slot_costs_two_steps(self) -> None:
|
| 18 |
+
env = SentinelEnv()
|
| 19 |
+
result = env.reset(task_type="task1", seed=11)
|
| 20 |
+
slow_slot = next(
|
| 21 |
+
public_id
|
| 22 |
+
for public_id, internal_id in env._pool.internal_profile().items()
|
| 23 |
+
if internal_id == "S0"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
result = env.step({
|
| 27 |
+
"session_id": result["observation"]["session_id"],
|
| 28 |
+
"task_type": "task1",
|
| 29 |
+
"action_type": "delegate",
|
| 30 |
+
"specialist_id": slow_slot,
|
| 31 |
+
})
|
| 32 |
+
|
| 33 |
+
self.assertEqual(result["info"]["step_count"], 2)
|
| 34 |
+
|
| 35 |
+
def test_self_solve_finishes_long_task_with_normalized_score(self) -> None:
|
| 36 |
+
env = SentinelEnv()
|
| 37 |
+
result = env.reset(task_type="task3", seed=5)
|
| 38 |
+
|
| 39 |
+
while not result["done"]:
|
| 40 |
+
obs = result["observation"]
|
| 41 |
+
result = env.step({
|
| 42 |
+
"session_id": obs["session_id"],
|
| 43 |
+
"task_type": obs["task_type"],
|
| 44 |
+
"action_type": "solve_independently",
|
| 45 |
+
"subtask_response": "SELF_SOLVED",
|
| 46 |
+
})
|
| 47 |
+
|
| 48 |
+
self.assertEqual(result["info"]["completion_rate"], 1.0)
|
| 49 |
+
self.assertGreater(result["info"]["step_count"], 2)
|
| 50 |
+
self.assertGreaterEqual(result["info"]["score"], 0.0)
|
| 51 |
+
self.assertLessEqual(result["info"]["score"], 1.0)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
unittest.main()
|
tests/test_graders.py
CHANGED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import unittest
|
| 4 |
+
|
| 5 |
+
from graders import clamp, grade_task3_step
|
| 6 |
+
from scenarios import get_scenario
|
| 7 |
+
from task_graph import TaskGraph
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class GraderAndGraphTests(unittest.TestCase):
|
| 11 |
+
def test_clamp_is_boundary_exclusive(self) -> None:
|
| 12 |
+
self.assertEqual(clamp(-10), 0.01)
|
| 13 |
+
self.assertEqual(clamp(10), 0.99)
|
| 14 |
+
self.assertEqual(clamp(0.42), 0.42)
|
| 15 |
+
|
| 16 |
+
def test_adversarial_verify_gets_detection_reward(self) -> None:
|
| 17 |
+
reward, reason, breakdown = grade_task3_step(
|
| 18 |
+
specialist_outcome=1.0,
|
| 19 |
+
stakes=0.85,
|
| 20 |
+
was_adversarial=True,
|
| 21 |
+
action_type="verify",
|
| 22 |
+
step_count=10,
|
| 23 |
+
max_steps=45,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
self.assertGreater(reward, 0.8)
|
| 27 |
+
self.assertIn("Adversarial detected", reason)
|
| 28 |
+
self.assertEqual(breakdown["stakes_awareness"], 0.99)
|
| 29 |
+
|
| 30 |
+
def test_failed_nodes_are_retriable_then_resolved(self) -> None:
|
| 31 |
+
graph = TaskGraph(get_scenario("SCN-TASK1-001"))
|
| 32 |
+
node = graph.current_node()
|
| 33 |
+
self.assertIsNotNone(node)
|
| 34 |
+
|
| 35 |
+
graph.record_outcome(node.subtask["id"], 0.0, "S1")
|
| 36 |
+
retry = graph.current_node()
|
| 37 |
+
self.assertEqual(retry.subtask["id"], node.subtask["id"])
|
| 38 |
+
|
| 39 |
+
graph.record_outcome(node.subtask["id"], 0.0, "S1")
|
| 40 |
+
next_node = graph.current_node()
|
| 41 |
+
self.assertIsNotNone(next_node)
|
| 42 |
+
self.assertNotEqual(next_node.subtask["id"], node.subtask["id"])
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
unittest.main()
|
tests/test_specialists.py
CHANGED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
import unittest
|
| 5 |
+
|
| 6 |
+
from specialists import DomainBoundSpecialist, SpecialistPool
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class SpecialistTests(unittest.TestCase):
|
| 10 |
+
def test_domain_bound_matches_abstract_analysis_and_verify_tasks(self) -> None:
|
| 11 |
+
specialist = DomainBoundSpecialist()
|
| 12 |
+
|
| 13 |
+
in_domain = specialist.execute("Analyze the inputs and identify the key pattern.", 0.2, random.Random(1))
|
| 14 |
+
out_domain = specialist.execute("Execute the planned action and report the outcome.", 0.2, random.Random(1))
|
| 15 |
+
|
| 16 |
+
self.assertTrue(in_domain.metadata["in_domain"])
|
| 17 |
+
self.assertFalse(out_domain.metadata["in_domain"])
|
| 18 |
+
self.assertEqual(in_domain.outcome, 1.0)
|
| 19 |
+
self.assertEqual(out_domain.outcome, 0.0)
|
| 20 |
+
|
| 21 |
+
def test_profile_shuffle_keeps_public_reliability_aligned(self) -> None:
|
| 22 |
+
pool = SpecialistPool()
|
| 23 |
+
pool.reset(seed=7)
|
| 24 |
+
|
| 25 |
+
profile = pool.internal_profile()
|
| 26 |
+
reliability = pool.public_ground_truth_reliability({"S0": 0.9, "S1": 0.6, "S2": 0.7, "S3": 0.15, "S4": 0.65})
|
| 27 |
+
|
| 28 |
+
self.assertEqual(set(profile), {"S0", "S1", "S2", "S3", "S4"})
|
| 29 |
+
self.assertEqual(set(reliability), {"S0", "S1", "S2", "S3", "S4"})
|
| 30 |
+
self.assertEqual(profile[pool.adversarial_slot], "S3")
|
| 31 |
+
self.assertEqual(reliability[pool.adversarial_slot], 0.15)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
unittest.main()
|
training/colab_notebook.ipynb
CHANGED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# SENTINEL Training Notebook\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"This notebook is the hackathon-facing skeleton for running SENTINEL rollouts and wiring GRPO/Unsloth on the finale GPU machine."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": null,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"!git clone https://github.com/ADITYAGABA1322/sentinel-env || true\n",
|
| 19 |
+
"%cd sentinel-env\n",
|
| 20 |
+
"!pip install -r requirements.txt\n"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"cell_type": "code",
|
| 25 |
+
"execution_count": null,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"outputs": [],
|
| 28 |
+
"source": [
|
| 29 |
+
"!python inference.py\n",
|
| 30 |
+
"!python training/evaluate.py --episodes 20 --task task3\n"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "markdown",
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"source": [
|
| 37 |
+
"For onsite GRPO, install `trl`, `unsloth`, `transformers`, `datasets`, and `accelerate`, then connect `training/train.py` to the provided model checkpoint."
|
| 38 |
+
]
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
"metadata": {
|
| 42 |
+
"kernelspec": {
|
| 43 |
+
"display_name": "Python 3",
|
| 44 |
+
"language": "python",
|
| 45 |
+
"name": "python3"
|
| 46 |
+
},
|
| 47 |
+
"language_info": {
|
| 48 |
+
"name": "python",
|
| 49 |
+
"version": "3.11"
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"nbformat": 4,
|
| 53 |
+
"nbformat_minor": 5
|
| 54 |
+
}
|
training/evaluate.py
CHANGED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import random
|
| 6 |
+
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Callable
|
| 9 |
+
|
| 10 |
+
ROOT = Path(__file__).resolve().parents[1]
|
| 11 |
+
if str(ROOT) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(ROOT))
|
| 13 |
+
|
| 14 |
+
from environment import SentinelEnv, _GROUND_TRUTH_RELIABILITY
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
Policy = Callable[[SentinelEnv, dict, random.Random], dict]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def random_policy(env: SentinelEnv, obs: dict, rng: random.Random) -> dict:
|
| 21 |
+
specialist = rng.choice(obs["available_specialists"])
|
| 22 |
+
roll = rng.random()
|
| 23 |
+
if roll < 0.65:
|
| 24 |
+
action_type = "delegate"
|
| 25 |
+
elif roll < 0.85:
|
| 26 |
+
action_type = "verify"
|
| 27 |
+
elif roll < 0.95:
|
| 28 |
+
action_type = "solve_independently"
|
| 29 |
+
specialist = None
|
| 30 |
+
else:
|
| 31 |
+
action_type = "skip"
|
| 32 |
+
specialist = None
|
| 33 |
+
return _action(obs, action_type, specialist)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def heuristic_policy(env: SentinelEnv, obs: dict, rng: random.Random) -> dict:
|
| 37 |
+
trust = obs["trust_snapshot"]
|
| 38 |
+
specialist = max(obs["available_specialists"], key=lambda sid: trust.get(sid, 0.5))
|
| 39 |
+
action_type = "verify" if obs["stakes_level"] >= 0.70 and trust.get(specialist, 0.5) < 0.65 else "delegate"
|
| 40 |
+
return _action(obs, action_type, specialist)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def oracle_lite_policy(env: SentinelEnv, obs: dict, rng: random.Random) -> dict:
|
| 44 |
+
reliability = env._pool.public_ground_truth_reliability(_GROUND_TRUTH_RELIABILITY)
|
| 45 |
+
if obs["task_type"] == "task3" and obs["stakes_level"] >= 0.70:
|
| 46 |
+
return _action(obs, "verify", env._pool.adversarial_slot)
|
| 47 |
+
specialist = max(obs["available_specialists"], key=lambda sid: reliability.get(sid, 0.5))
|
| 48 |
+
return _action(obs, "delegate", specialist)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _action(obs: dict, action_type: str, specialist_id: str | None) -> dict:
|
| 52 |
+
return {
|
| 53 |
+
"session_id": obs["session_id"],
|
| 54 |
+
"task_type": obs["task_type"],
|
| 55 |
+
"action_type": action_type,
|
| 56 |
+
"specialist_id": specialist_id,
|
| 57 |
+
"subtask_response": "SELF_SOLVED" if action_type == "solve_independently" else None,
|
| 58 |
+
"reasoning": f"{action_type} via {specialist_id or 'SELF'}",
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def run_episode(policy_name: str, policy: Policy, task_type: str, seed: int) -> dict:
|
| 63 |
+
rng = random.Random(seed)
|
| 64 |
+
env = SentinelEnv()
|
| 65 |
+
result = env.reset(task_type=task_type, seed=seed)
|
| 66 |
+
rewards: list[float] = []
|
| 67 |
+
|
| 68 |
+
while not result["done"]:
|
| 69 |
+
action = policy(env, result["observation"], rng)
|
| 70 |
+
result = env.step(action)
|
| 71 |
+
rewards.append(result["reward"]["value"])
|
| 72 |
+
|
| 73 |
+
info = result["info"]
|
| 74 |
+
breakdown = result["reward"]["signal_breakdown"]
|
| 75 |
+
detections = info.get("adversarial_detections", 0)
|
| 76 |
+
poisonings = info.get("adversarial_poisonings", 0)
|
| 77 |
+
total_adversarial = detections + poisonings
|
| 78 |
+
detection_rate = detections / total_adversarial if total_adversarial else breakdown.get("detection_rate", 1.0)
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
"policy": policy_name,
|
| 82 |
+
"task_type": task_type,
|
| 83 |
+
"seed": seed,
|
| 84 |
+
"steps": info.get("step_count", 0),
|
| 85 |
+
"score": round(info.get("score", 0.0), 4),
|
| 86 |
+
"total_reward": round(info.get("total_reward", 0.0), 4),
|
| 87 |
+
"completion_rate": round(info.get("completion_rate", 0.0), 4),
|
| 88 |
+
"detection_rate": round(detection_rate, 4),
|
| 89 |
+
"trust_calibration": round(breakdown.get("trust_calibration", 0.0), 4),
|
| 90 |
+
"adversarial_detections": detections,
|
| 91 |
+
"adversarial_poisonings": poisonings,
|
| 92 |
+
"status": "failed" if info.get("forced_end") else "completed",
|
| 93 |
+
"rewards": [round(value, 4) for value in rewards],
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def summarize(rows: list[dict]) -> dict:
|
| 98 |
+
grouped: dict[str, list[dict]] = {}
|
| 99 |
+
for row in rows:
|
| 100 |
+
grouped.setdefault(row["policy"], []).append(row)
|
| 101 |
+
|
| 102 |
+
summary = {}
|
| 103 |
+
for policy_name, policy_rows in grouped.items():
|
| 104 |
+
n = len(policy_rows)
|
| 105 |
+
summary[policy_name] = {
|
| 106 |
+
"episodes": n,
|
| 107 |
+
"avg_score": _avg(policy_rows, "score"),
|
| 108 |
+
"avg_completion_rate": _avg(policy_rows, "completion_rate"),
|
| 109 |
+
"avg_detection_rate": _avg(policy_rows, "detection_rate"),
|
| 110 |
+
"avg_trust_calibration": _avg(policy_rows, "trust_calibration"),
|
| 111 |
+
"avg_steps": _avg(policy_rows, "steps"),
|
| 112 |
+
}
|
| 113 |
+
return summary
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _avg(rows: list[dict], key: str) -> float:
|
| 117 |
+
return round(sum(float(row.get(key, 0.0)) for row in rows) / max(1, len(rows)), 4)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def main() -> None:
|
| 121 |
+
parser = argparse.ArgumentParser(description="Evaluate SENTINEL policies.")
|
| 122 |
+
parser.add_argument("--episodes", type=int, default=20, help="Episodes per policy.")
|
| 123 |
+
parser.add_argument("--task", default="task3", choices=["task1", "task2", "task3"])
|
| 124 |
+
parser.add_argument("--out", default="outputs/evaluation_results.json")
|
| 125 |
+
args = parser.parse_args()
|
| 126 |
+
|
| 127 |
+
policies: dict[str, Policy] = {
|
| 128 |
+
"random": random_policy,
|
| 129 |
+
"heuristic": heuristic_policy,
|
| 130 |
+
"oracle_lite": oracle_lite_policy,
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
rows = []
|
| 134 |
+
for policy_name, policy in policies.items():
|
| 135 |
+
for seed in range(args.episodes):
|
| 136 |
+
rows.append(run_episode(policy_name, policy, args.task, seed))
|
| 137 |
+
|
| 138 |
+
payload = {
|
| 139 |
+
"task": args.task,
|
| 140 |
+
"episodes_per_policy": args.episodes,
|
| 141 |
+
"summary": summarize(rows),
|
| 142 |
+
"episodes": rows,
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
out_path = ROOT / args.out
|
| 146 |
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
| 147 |
+
out_path.write_text(json.dumps(payload, indent=2) + "\n")
|
| 148 |
+
|
| 149 |
+
print(json.dumps(payload["summary"], indent=2))
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
main()
|
training/train.py
CHANGED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Minimal onsite training entrypoint.
|
| 5 |
+
|
| 6 |
+
This file is intentionally import-light so it can run locally without GPU
|
| 7 |
+
packages. On the finale machine, install the training extras from pyproject and
|
| 8 |
+
use this script as the GRPO wiring point.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import json
|
| 13 |
+
import random
|
| 14 |
+
import re
|
| 15 |
+
import sys
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
ROOT = Path(__file__).resolve().parents[1]
|
| 19 |
+
if str(ROOT) not in sys.path:
|
| 20 |
+
sys.path.insert(0, str(ROOT))
|
| 21 |
+
|
| 22 |
+
from environment import SentinelEnv
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
ACTION_RE = re.compile(r"\{.*\}", re.DOTALL)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def build_prompt(observation: dict) -> str:
|
| 29 |
+
return (
|
| 30 |
+
"You are the SENTINEL orchestrator. Choose one JSON action.\n"
|
| 31 |
+
f"Task: {observation['task_type']}\n"
|
| 32 |
+
f"Subtask: {observation['current_subtask']}\n"
|
| 33 |
+
f"Stakes: {observation['stakes_level']:.2f}\n"
|
| 34 |
+
f"Trust: {json.dumps(observation['trust_snapshot'], sort_keys=True)}\n"
|
| 35 |
+
"Valid action_type values: delegate, verify, solve_independently, skip.\n"
|
| 36 |
+
"Return JSON with action_type and optional specialist_id."
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def parse_action(text: str, observation: dict) -> dict:
|
| 41 |
+
match = ACTION_RE.search(text or "")
|
| 42 |
+
payload = {}
|
| 43 |
+
if match:
|
| 44 |
+
try:
|
| 45 |
+
payload = json.loads(match.group(0))
|
| 46 |
+
except json.JSONDecodeError:
|
| 47 |
+
payload = {}
|
| 48 |
+
|
| 49 |
+
action_type = payload.get("action_type", "delegate")
|
| 50 |
+
specialist_id = payload.get("specialist_id")
|
| 51 |
+
if action_type in ("delegate", "verify") and specialist_id not in observation["available_specialists"]:
|
| 52 |
+
specialist_id = max(
|
| 53 |
+
observation["available_specialists"],
|
| 54 |
+
key=lambda sid: observation["trust_snapshot"].get(sid, 0.5),
|
| 55 |
+
)
|
| 56 |
+
if action_type == "solve_independently":
|
| 57 |
+
specialist_id = None
|
| 58 |
+
|
| 59 |
+
return {
|
| 60 |
+
"session_id": observation["session_id"],
|
| 61 |
+
"task_type": observation["task_type"],
|
| 62 |
+
"action_type": action_type,
|
| 63 |
+
"specialist_id": specialist_id,
|
| 64 |
+
"subtask_response": "SELF_SOLVED" if action_type == "solve_independently" else None,
|
| 65 |
+
"reasoning": payload.get("reasoning", "parsed-training-action"),
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def dry_run_rollouts(episodes: int, seed: int) -> dict:
|
| 70 |
+
rng = random.Random(seed)
|
| 71 |
+
scores = []
|
| 72 |
+
for idx in range(episodes):
|
| 73 |
+
env = SentinelEnv()
|
| 74 |
+
result = env.reset(task_type="task3", seed=seed + idx)
|
| 75 |
+
while not result["done"]:
|
| 76 |
+
obs = result["observation"]
|
| 77 |
+
specialist = max(obs["available_specialists"], key=lambda sid: obs["trust_snapshot"].get(sid, 0.5))
|
| 78 |
+
action = {
|
| 79 |
+
"session_id": obs["session_id"],
|
| 80 |
+
"task_type": obs["task_type"],
|
| 81 |
+
"action_type": "verify" if obs["stakes_level"] >= 0.70 and rng.random() < 0.5 else "delegate",
|
| 82 |
+
"specialist_id": specialist,
|
| 83 |
+
"subtask_response": None,
|
| 84 |
+
"reasoning": "dry-run heuristic",
|
| 85 |
+
}
|
| 86 |
+
result = env.step(action)
|
| 87 |
+
scores.append(result["info"]["score"])
|
| 88 |
+
return {"episodes": episodes, "avg_score": round(sum(scores) / max(1, len(scores)), 4)}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def main() -> None:
|
| 92 |
+
parser = argparse.ArgumentParser(description="SENTINEL GRPO training harness.")
|
| 93 |
+
parser.add_argument("--dry-run", action="store_true", help="Run local rollouts without GPU dependencies.")
|
| 94 |
+
parser.add_argument("--episodes", type=int, default=5)
|
| 95 |
+
parser.add_argument("--seed", type=int, default=0)
|
| 96 |
+
args = parser.parse_args()
|
| 97 |
+
|
| 98 |
+
if args.dry_run:
|
| 99 |
+
print(json.dumps(dry_run_rollouts(args.episodes, args.seed), indent=2))
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
try:
|
| 103 |
+
import trl # noqa: F401
|
| 104 |
+
import unsloth # noqa: F401
|
| 105 |
+
except ImportError as exc:
|
| 106 |
+
raise SystemExit(
|
| 107 |
+
"Training dependencies are not installed. Run with --dry-run locally, "
|
| 108 |
+
"or install the pyproject training extras on the finale GPU machine."
|
| 109 |
+
) from exc
|
| 110 |
+
|
| 111 |
+
raise SystemExit(
|
| 112 |
+
"GPU training hook is ready. Wire GRPOTrainer here using build_prompt(), "
|
| 113 |
+
"parse_action(), and SentinelEnv.step() as the reward source."
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
main()
|
uv.lock
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version = 1
|
| 2 |
+
revision = 1
|
| 3 |
+
requires-python = ">=3.10,<3.14"
|