Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- IMPLEMENTATION_ROADMAP.md +103 -214
- README.md +368 -368
- client.py +123 -123
- inference.py +137 -214
- models.py +55 -55
- openenv.yaml +30 -30
- openenv_cloud_queue_env.egg-info/PKG-INFO +9 -0
- openenv_cloud_queue_env.egg-info/SOURCES.txt +19 -0
- openenv_cloud_queue_env.egg-info/dependency_links.txt +1 -0
- openenv_cloud_queue_env.egg-info/entry_points.txt +2 -0
- openenv_cloud_queue_env.egg-info/requires.txt +5 -0
- openenv_cloud_queue_env.egg-info/top_level.txt +1 -0
- server/app.py +89 -89
- server/cloud_queue_env_environment.py +781 -762
IMPLEMENTATION_ROADMAP.md
CHANGED
|
@@ -1,272 +1,161 @@
|
|
| 1 |
# QueueOps OpenEnv Implementation Roadmap
|
| 2 |
|
| 3 |
-
This
|
| 4 |
|
| 5 |
-
|
| 6 |
-
- Keep
|
| 7 |
-
-
|
| 8 |
-
-
|
| 9 |
-
-
|
| 10 |
-
-
|
|
|
|
| 11 |
|
| 12 |
---
|
| 13 |
|
| 14 |
-
## V1 -
|
| 15 |
|
| 16 |
-
Goal:
|
| 17 |
|
| 18 |
-
### Phase 1 -
|
| 19 |
Sub-goals:
|
| 20 |
-
1. Replace
|
| 21 |
-
2.
|
| 22 |
-
3. Implement
|
| 23 |
-
4. Keep OpenEnv contract
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
-
### Phase 2 - Task
|
| 30 |
Sub-goals:
|
| 31 |
-
1. Add task
|
| 32 |
-
2.
|
| 33 |
-
3.
|
| 34 |
-
4.
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
|
| 40 |
-
### Phase 3 - Deterministic Graders
|
| 41 |
Sub-goals:
|
| 42 |
-
1. Implement per-task
|
| 43 |
-
2.
|
| 44 |
-
3. Handle
|
| 45 |
-
4.
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
|
| 51 |
-
### Phase 4 - Reward Shaping
|
| 52 |
Sub-goals:
|
| 53 |
-
1. Add dense
|
| 54 |
-
2.
|
| 55 |
-
3.
|
| 56 |
-
4. Expose
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
|
| 62 |
-
### Phase 5 - Inference
|
| 63 |
Sub-goals:
|
| 64 |
-
1.
|
| 65 |
-
2.
|
| 66 |
-
3. Emit [START], [STEP], [END]
|
| 67 |
-
4.
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
|
| 73 |
-
### Phase 6 - Validation
|
| 74 |
Sub-goals:
|
| 75 |
-
1.
|
| 76 |
-
2.
|
| 77 |
-
3. Update README with task
|
| 78 |
-
4.
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
|
| 84 |
### V1 Submission Gate
|
| 85 |
-
All
|
| 86 |
-
1.
|
| 87 |
-
2. Graders
|
| 88 |
-
3. Inference script
|
| 89 |
-
4. OpenEnv
|
| 90 |
-
5.
|
| 91 |
|
| 92 |
---
|
| 93 |
|
| 94 |
-
## V2 -
|
| 95 |
|
| 96 |
-
Goal: improve reliability,
|
| 97 |
|
| 98 |
### Phase 1 - Determinism Hardening
|
| 99 |
Sub-goals:
|
| 100 |
-
1.
|
| 101 |
-
2. Add
|
| 102 |
-
3.
|
| 103 |
|
| 104 |
### Phase 2 - Difficulty Calibration
|
| 105 |
Sub-goals:
|
| 106 |
-
1. Tune
|
| 107 |
-
2. Improve
|
| 108 |
-
3.
|
| 109 |
|
| 110 |
-
### Phase 3 - Reporting
|
| 111 |
Sub-goals:
|
| 112 |
-
1. Add per-seed
|
| 113 |
-
2. Add mean/std
|
| 114 |
-
3.
|
| 115 |
|
| 116 |
### V2 Exit Criteria
|
| 117 |
-
1. Lower variance
|
| 118 |
-
2. Clearer task
|
| 119 |
-
3. Better
|
| 120 |
|
| 121 |
---
|
| 122 |
|
| 123 |
-
## V3 - Extended Benchmark Pack
|
| 124 |
|
| 125 |
-
Goal: increase novelty and long-term benchmark value.
|
| 126 |
|
| 127 |
-
### Phase 1 -
|
| 128 |
Sub-goals:
|
| 129 |
-
1. Add
|
| 130 |
-
2. Grade robustness
|
| 131 |
|
| 132 |
-
### Phase 2 -
|
| 133 |
Sub-goals:
|
| 134 |
-
1. Add
|
| 135 |
-
2. Grade safe
|
| 136 |
|
| 137 |
-
### Phase 3 - Public
|
| 138 |
Sub-goals:
|
| 139 |
-
1. Publish official seed suites
|
| 140 |
-
2.
|
| 141 |
-
3. Provide
|
| 142 |
|
| 143 |
### V3 Exit Criteria
|
| 144 |
-
1.
|
| 145 |
-
2.
|
| 146 |
-
3.
|
| 147 |
|
| 148 |
---
|
| 149 |
|
| 150 |
-
##
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
1. V1 core implementation is in place and running.
|
| 159 |
-
2. openenv validate has passed.
|
| 160 |
-
3. V2 determinism hardening, calibration pass, and reporting upgrade are implemented.
|
| 161 |
-
4. Current focus shifts to V3 extensions and benchmark quality tuning.
|
| 162 |
-
|
| 163 |
-
## V2 Completion Notes
|
| 164 |
-
|
| 165 |
-
Implemented outcomes:
|
| 166 |
-
1. Separate RNG streams are active for arrivals, service, abandonment, and exogenous effects.
|
| 167 |
-
2. Deterministic trace metadata is exposed (`trace_digest`, `seed`, and RNG stream seeds).
|
| 168 |
-
3. Anti-exploit reward calibration includes rejection-heavy and harmful downscale penalties.
|
| 169 |
-
4. Inference supports multi-seed reporting with mean/std/ci95 outputs.
|
| 170 |
-
5. Inference supports replay-mode action traces via file input for deterministic debugging.
|
| 171 |
-
6. Inference supports JSON/CSV report export for per-seed analysis.
|
| 172 |
-
|
| 173 |
-
---
|
| 174 |
-
|
| 175 |
-
## Requirement Coverage Matrix (From requirementInfo.md)
|
| 176 |
-
|
| 177 |
-
This section is the final compliance tracker for judging criteria.
|
| 178 |
-
|
| 179 |
-
### Functional Requirements
|
| 180 |
-
|
| 181 |
-
1. Real-world task simulation
|
| 182 |
-
- Requirement: Must represent real human operational work, not toy behavior.
|
| 183 |
-
- Implementation target: queue operations in call center/cloud/logistics-style flow.
|
| 184 |
-
- Evidence to keep: README motivation + task descriptions + action semantics.
|
| 185 |
-
- Status: in progress (core done, examples and narrative should be strengthened).
|
| 186 |
-
|
| 187 |
-
2. OpenEnv spec compliance
|
| 188 |
-
- Requirement: typed models, reset, step(action), state, openenv.yaml, validate pass.
|
| 189 |
-
- Implementation target: models.py + server environment + openenv.yaml + app entrypoint.
|
| 190 |
-
- Evidence to keep: `openenv validate` output in PR notes/README.
|
| 191 |
-
- Status: done (validate passing).
|
| 192 |
-
|
| 193 |
-
3. Minimum 3 tasks with deterministic graders
|
| 194 |
-
- Requirement: at least easy/medium/hard, deterministic 0.0-1.0 grading.
|
| 195 |
-
- Implementation target: task configs + per-task scoring formulas + clamping.
|
| 196 |
-
- Evidence to keep: sample run showing all tasks and deterministic seeds.
|
| 197 |
-
- Status: done for 3 tasks, polish recommended for calibration.
|
| 198 |
-
|
| 199 |
-
4. Meaningful reward function
|
| 200 |
-
- Requirement: dense trajectory signal + penalties for undesirable behavior.
|
| 201 |
-
- Implementation target: weighted reward components and safety penalties.
|
| 202 |
-
- Evidence to keep: reward component logging in metadata and README equations.
|
| 203 |
-
- Status: done, tune weights in V2.
|
| 204 |
-
|
| 205 |
-
5. Baseline inference script
|
| 206 |
-
- Requirement: OpenAI-compatible client, env vars credentials, reproducible score over tasks.
|
| 207 |
-
- Implementation target: fixed tasks/seeds/model params, required log format.
|
| 208 |
-
- Evidence to keep: saved run logs and summary scores.
|
| 209 |
-
- Status: done, provider-fallback robustness can be improved.
|
| 210 |
-
|
| 211 |
-
### Non-Functional Requirements
|
| 212 |
-
|
| 213 |
-
1. Hugging Face Space deployment
|
| 214 |
-
- Requirement: containerized HF Space tagged openenv.
|
| 215 |
-
- Evidence to keep: Space URL + successful run proof.
|
| 216 |
-
- Status: done.
|
| 217 |
-
|
| 218 |
-
2. Containerized execution
|
| 219 |
-
- Requirement: Dockerfile works with build + run.
|
| 220 |
-
- Evidence to keep: commands and successful output snippet.
|
| 221 |
-
- Status: pending explicit evidence capture in docs.
|
| 222 |
-
|
| 223 |
-
3. Documentation completeness
|
| 224 |
-
- Requirement: README includes env motivation, spaces, tasks, setup/usage, baseline scores.
|
| 225 |
-
- Evidence to keep: README sections + benchmark output table.
|
| 226 |
-
- Status: mostly done, baseline score table still needed.
|
| 227 |
-
|
| 228 |
-
---
|
| 229 |
-
|
| 230 |
-
## Evaluation Criteria Coverage Checklist
|
| 231 |
-
|
| 232 |
-
### Real-world utility (30%)
|
| 233 |
-
1. Keep README examples tied to concrete real operations scenarios.
|
| 234 |
-
2. Add one paragraph on why this benchmark is useful for agent evaluation.
|
| 235 |
-
|
| 236 |
-
### Task and grader quality (25%)
|
| 237 |
-
1. Keep deterministic seed set fixed and documented.
|
| 238 |
-
2. Show per-task scoring decomposition and bounded outputs.
|
| 239 |
-
3. Add one reproducibility check note: same seed + same policy => same score.
|
| 240 |
-
|
| 241 |
-
### Environment design (20%)
|
| 242 |
-
1. Verify clean reset and sensible done boundaries for all tasks.
|
| 243 |
-
2. Keep action/observation schema stable and documented.
|
| 244 |
-
3. Keep dense reward with interpretable components.
|
| 245 |
-
|
| 246 |
-
### Code quality and spec compliance (15%)
|
| 247 |
-
1. Keep `openenv validate` passing.
|
| 248 |
-
2. Capture docker build/run commands and outcomes.
|
| 249 |
-
3. Keep deployment and ws route functional.
|
| 250 |
-
|
| 251 |
-
### Creativity and novelty (10%)
|
| 252 |
-
1. Emphasize queue-control benchmark novelty in README.
|
| 253 |
-
2. Keep multi-objective reward and cost/fairness tradeoff visible.
|
| 254 |
-
|
| 255 |
-
---
|
| 256 |
-
|
| 257 |
-
## Pre-Submission Evidence Pack (Must Attach)
|
| 258 |
-
|
| 259 |
-
1. Validation proof
|
| 260 |
-
- `openenv validate` success output.
|
| 261 |
-
|
| 262 |
-
2. Runtime proof
|
| 263 |
-
- HF Space URL and one successful task run excerpt.
|
| 264 |
-
|
| 265 |
-
3. Baseline proof
|
| 266 |
-
- One full [START]/[STEP]/[END]/[SUMMARY] run log.
|
| 267 |
-
|
| 268 |
-
4. Docker proof
|
| 269 |
-
- `docker build` and `docker run` command results.
|
| 270 |
-
|
| 271 |
-
5. Documentation proof
|
| 272 |
-
- README includes baseline score table (easy, medium, hard, final).
|
|
|
|
| 1 |
# QueueOps OpenEnv Implementation Roadmap
|
| 2 |
|
| 3 |
+
This roadmap is the execution reference for building the real-world queueing environment in this repository.
|
| 4 |
|
| 5 |
+
Constraints locked in:
|
| 6 |
+
- Keep existing directory structure unchanged.
|
| 7 |
+
- Treat `cloud_queue_env/` as the project root.
|
| 8 |
+
- Use HF token provider flow in `inference.py`.
|
| 9 |
+
- Follow OpenEnv compliance strictly: typed models, `step()/reset()/state()`, valid `openenv.yaml`.
|
| 10 |
+
- Provide deterministic graders with partial scoring in `[0, 1]`.
|
| 11 |
+
- Deliver at least 3 tasks (more optional).
|
| 12 |
|
| 13 |
---
|
| 14 |
|
| 15 |
+
## V1 - Hackathon-Ready Submission
|
| 16 |
|
| 17 |
+
Goal: submit a valid, real-world OpenEnv benchmark with 3 deterministic graded tasks and reproducible inference outputs.
|
| 18 |
|
| 19 |
+
### Phase 1 - Core Simulator Foundation
|
| 20 |
Sub-goals:
|
| 21 |
+
1. Replace echo logic with queue-operations simulation core.
|
| 22 |
+
2. Add deterministic RNG with explicit seed handling.
|
| 23 |
+
3. Implement proper episode boundaries (`horizon`, terminal conditions).
|
| 24 |
+
4. Keep strict OpenEnv contract for `reset()`, `step()`, and `state`.
|
| 25 |
|
| 26 |
+
Definition of done:
|
| 27 |
+
- Environment no longer behaves as dummy echo.
|
| 28 |
+
- Same seed + same action trace => identical trajectory.
|
| 29 |
+
- Episode always terminates predictably.
|
| 30 |
|
| 31 |
+
### Phase 2 - Task System (Easy/Medium/Hard)
|
| 32 |
Sub-goals:
|
| 33 |
+
1. Add task selection (`task_id`) and per-task config.
|
| 34 |
+
2. Implement Task A (single queue, admission control).
|
| 35 |
+
3. Implement Task B (multi-server, priority routing).
|
| 36 |
+
4. Implement Task C (two-stage queue network, dynamic scaling/cost).
|
| 37 |
|
| 38 |
+
Definition of done:
|
| 39 |
+
- All 3 tasks run end-to-end from `reset()` to terminal state.
|
| 40 |
+
- Difficulty progression is visible from A -> B -> C.
|
| 41 |
|
| 42 |
+
### Phase 3 - Deterministic Graders + Partial Scoring
|
| 43 |
Sub-goals:
|
| 44 |
+
1. Implement per-task grader formulas from master spec.
|
| 45 |
+
2. Keep each grader output bounded in `[0, 1]`.
|
| 46 |
+
3. Handle invalid/NaN/infinite values safely and deterministically.
|
| 47 |
+
4. Aggregate final benchmark score as mean of task scores.
|
| 48 |
|
| 49 |
+
Definition of done:
|
| 50 |
+
- Repeated runs on same seeds produce same grader outputs.
|
| 51 |
+
- Partial scoring is meaningful (not binary pass/fail only).
|
| 52 |
|
| 53 |
+
### Phase 4 - Reward Shaping and Safety Penalties
|
| 54 |
Sub-goals:
|
| 55 |
+
1. Add dense reward components: wait, throughput, SLA, cost, fairness, safety.
|
| 56 |
+
2. Add penalties for invalid actions and exploit patterns.
|
| 57 |
+
3. Bound reward scale across tasks.
|
| 58 |
+
4. Expose reward components in `info` for debugging.
|
| 59 |
|
| 60 |
+
Definition of done:
|
| 61 |
+
- Reward moves through trajectory, not only at the end.
|
| 62 |
+
- Unsafe or degenerate behavior is penalized.
|
| 63 |
|
| 64 |
+
### Phase 5 - Inference Protocol Compliance
|
| 65 |
Sub-goals:
|
| 66 |
+
1. Update `inference.py` to run all required tasks with fixed seeds.
|
| 67 |
+
2. Keep OpenAI client usage while authenticating with HF token flow.
|
| 68 |
+
3. Emit strict `[START]`, `[STEP]`, `[END]` line format.
|
| 69 |
+
4. Print per-task and final aggregate scores.
|
| 70 |
|
| 71 |
+
Definition of done:
|
| 72 |
+
- Script executes benchmark sweep reproducibly.
|
| 73 |
+
- Output format matches hackathon requirements.
|
| 74 |
|
| 75 |
+
### Phase 6 - Packaging, Validation, Documentation
|
| 76 |
Sub-goals:
|
| 77 |
+
1. Validate `openenv.yaml` metadata and app wiring.
|
| 78 |
+
2. Confirm Docker build/run success.
|
| 79 |
+
3. Update README with task definitions, action/observation spaces, reward/grader equations, baseline results.
|
| 80 |
+
4. Verify deployment readiness for HF Space.
|
| 81 |
|
| 82 |
+
Definition of done:
|
| 83 |
+
- OpenEnv validation passes.
|
| 84 |
+
- Container starts and serves correctly.
|
| 85 |
+
- README is submission-ready.
|
| 86 |
|
| 87 |
### V1 Submission Gate
|
| 88 |
+
All must be true:
|
| 89 |
+
1. 3 tasks implemented and deterministic.
|
| 90 |
+
2. Graders return valid partial scores in `[0, 1]`.
|
| 91 |
+
3. Inference script reports reproducible benchmark outputs.
|
| 92 |
+
4. OpenEnv spec compliance confirmed.
|
| 93 |
+
5. Docker and README requirements satisfied.
|
| 94 |
|
| 95 |
---
|
| 96 |
|
| 97 |
+
## V2 - Quality and Robustness Upgrade
|
| 98 |
|
| 99 |
+
Goal: improve benchmark reliability, score stability, and anti-exploit behavior after initial submission.
|
| 100 |
|
| 101 |
### Phase 1 - Determinism Hardening
|
| 102 |
Sub-goals:
|
| 103 |
+
1. Split RNG streams (arrivals/service/abandonment/shocks).
|
| 104 |
+
2. Add trace replay support for debugging.
|
| 105 |
+
3. Extend `info` with deterministic audit fields.
|
| 106 |
|
| 107 |
### Phase 2 - Difficulty Calibration
|
| 108 |
Sub-goals:
|
| 109 |
+
1. Tune parameters for cleaner A/B/C separation.
|
| 110 |
+
2. Improve level interpolation behavior.
|
| 111 |
+
3. Add stronger guards against reject-all or noop exploitation.
|
| 112 |
|
| 113 |
+
### Phase 3 - Reporting and Confidence
|
| 114 |
Sub-goals:
|
| 115 |
+
1. Add standardized per-seed report table.
|
| 116 |
+
2. Add mean/std summaries over seed sets.
|
| 117 |
+
3. Flag unstable metrics and grader edge cases.
|
| 118 |
|
| 119 |
### V2 Exit Criteria
|
| 120 |
+
1. Lower run-to-run variance on fixed seed sets.
|
| 121 |
+
2. Clearer task difficulty progression.
|
| 122 |
+
3. Better fairness and exploit resistance.
|
| 123 |
|
| 124 |
---
|
| 125 |
|
| 126 |
+
## V3 - Extended Benchmark Pack (Optional)
|
| 127 |
|
| 128 |
+
Goal: increase novelty and long-term benchmark value with optional extra tasks.
|
| 129 |
|
| 130 |
+
### Phase 1 - Task D (Non-stationary Load)
|
| 131 |
Sub-goals:
|
| 132 |
+
1. Add shift-based and bursty arrivals.
|
| 133 |
+
2. Grade robustness under changing demand.
|
| 134 |
|
| 135 |
+
### Phase 2 - Task E (Partial Observability)
|
| 136 |
Sub-goals:
|
| 137 |
+
1. Add delayed/noisy metrics.
|
| 138 |
+
2. Grade safe decisions under uncertainty.
|
| 139 |
|
| 140 |
+
### Phase 3 - Public Benchmark Packaging
|
| 141 |
Sub-goals:
|
| 142 |
+
1. Publish official seed suites.
|
| 143 |
+
2. Add benchmark profiles: quick / standard / full.
|
| 144 |
+
3. Provide reference baseline outputs.
|
| 145 |
|
| 146 |
### V3 Exit Criteria
|
| 147 |
+
1. 4-5 total tasks available.
|
| 148 |
+
2. Broader real-world coverage.
|
| 149 |
+
3. Stronger benchmark differentiation.
|
| 150 |
|
| 151 |
---
|
| 152 |
|
| 153 |
+
## Execution Order
|
| 154 |
|
| 155 |
+
Recommended order:
|
| 156 |
+
1. Complete V1 fully and submit.
|
| 157 |
+
2. Continue with V2 for quality hardening.
|
| 158 |
+
3. Do V3 only if timeline allows.
|
| 159 |
|
| 160 |
+
Immediate next implementation step:
|
| 161 |
+
- Start V1 Phase 1 (models + simulator core + deterministic state transitions).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,369 +1,369 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Cloud Queue Env Environment Server
|
| 3 |
-
emoji: 🖨️
|
| 4 |
-
colorFrom: pink
|
| 5 |
-
colorTo: blue
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
app_port: 8000
|
| 9 |
-
base_path: /web
|
| 10 |
-
tags:
|
| 11 |
-
- openenv
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
-
# Cloud Queue Env Environment
|
| 15 |
-
|
| 16 |
-
A real-world queue-operations benchmark for OpenEnv.
|
| 17 |
-
|
| 18 |
-
This environment simulates service operations decisions humans make in production systems:
|
| 19 |
-
- Admission and rejection under load
|
| 20 |
-
- Queue routing and dispatching
|
| 21 |
-
- Priority handling for urgent traffic
|
| 22 |
-
- Capacity scaling under infrastructure cost constraints
|
| 23 |
-
|
| 24 |
-
The benchmark includes three deterministic tasks with partial graders in [0, 1]:
|
| 25 |
-
- easy: single-queue stability
|
| 26 |
-
- medium: multi-server priority routing
|
| 27 |
-
- hard: two-stage queue network with scaling
|
| 28 |
-
|
| 29 |
-
## Quick Start
|
| 30 |
-
|
| 31 |
-
Use the CloudQueueEnv client to connect to a running server or container:
|
| 32 |
-
|
| 33 |
-
```python
|
| 34 |
-
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 35 |
-
|
| 36 |
-
try:
|
| 37 |
-
env = CloudQueueEnv.from_docker_image("cloud_queue_env-env:latest")
|
| 38 |
-
|
| 39 |
-
# Configure task + seed, then reset into that deterministic episode
|
| 40 |
-
env.reset()
|
| 41 |
-
env.step(CloudQueueAction(action_type="configure_task", task_id="easy", seed=11))
|
| 42 |
-
result = env.reset()
|
| 43 |
-
|
| 44 |
-
for _ in range(20):
|
| 45 |
-
obs = result.observation
|
| 46 |
-
if obs.incoming_job_present:
|
| 47 |
-
action = CloudQueueAction(action_type="admit", target_queue=0)
|
| 48 |
-
else:
|
| 49 |
-
action = CloudQueueAction(action_type="dispatch", target_queue=0)
|
| 50 |
-
|
| 51 |
-
result = env.step(action)
|
| 52 |
-
print(
|
| 53 |
-
f"step={obs.sim_time} queues={obs.queue_lengths} "
|
| 54 |
-
f"reward={result.reward:.3f} done={result.done}"
|
| 55 |
-
)
|
| 56 |
-
if result.done:
|
| 57 |
-
break
|
| 58 |
-
|
| 59 |
-
final_score = result.observation.metadata.get("episode_score", 0.0)
|
| 60 |
-
print(f"episode_score={final_score:.3f}")
|
| 61 |
-
|
| 62 |
-
finally:
|
| 63 |
-
env.close()
|
| 64 |
-
```
|
| 65 |
-
|
| 66 |
-
The CloudQueueEnv.from_docker_image() method handles:
|
| 67 |
-
- Starting the Docker container
|
| 68 |
-
- Waiting for the server to be ready
|
| 69 |
-
- Connecting to the environment
|
| 70 |
-
- Container cleanup when you call `close()`
|
| 71 |
-
|
| 72 |
-
## Building the Docker Image
|
| 73 |
-
|
| 74 |
-
Before using the environment, you need to build the Docker image:
|
| 75 |
-
|
| 76 |
-
```bash
|
| 77 |
-
# From project root
|
| 78 |
-
docker build -t cloud_queue_env-env:latest -f server/Dockerfile .
|
| 79 |
-
```
|
| 80 |
-
|
| 81 |
-
## Deploying to Hugging Face Spaces
|
| 82 |
-
|
| 83 |
-
You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
|
| 84 |
-
|
| 85 |
-
```bash
|
| 86 |
-
# From the environment directory (where openenv.yaml is located)
|
| 87 |
-
openenv push
|
| 88 |
-
|
| 89 |
-
# Or specify options
|
| 90 |
-
openenv push --namespace my-org --private
|
| 91 |
-
```
|
| 92 |
-
|
| 93 |
-
The `openenv push` command will:
|
| 94 |
-
1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
|
| 95 |
-
2. Prepare a custom build for Hugging Face Docker space (enables web interface)
|
| 96 |
-
3. Upload to Hugging Face (ensuring you're logged in)
|
| 97 |
-
|
| 98 |
-
### Prerequisites
|
| 99 |
-
|
| 100 |
-
- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
|
| 101 |
-
|
| 102 |
-
### Options
|
| 103 |
-
|
| 104 |
-
- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
|
| 105 |
-
- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
|
| 106 |
-
- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
|
| 107 |
-
- `--private`: Deploy the space as private (default: public)
|
| 108 |
-
|
| 109 |
-
### Examples
|
| 110 |
-
|
| 111 |
-
```bash
|
| 112 |
-
# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
|
| 113 |
-
openenv push
|
| 114 |
-
|
| 115 |
-
# Push to a specific repository
|
| 116 |
-
openenv push --repo-id my-org/my-env
|
| 117 |
-
|
| 118 |
-
# Push with a custom base image
|
| 119 |
-
openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
|
| 120 |
-
|
| 121 |
-
# Push as a private space
|
| 122 |
-
openenv push --private
|
| 123 |
-
|
| 124 |
-
# Combine options
|
| 125 |
-
openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
|
| 126 |
-
```
|
| 127 |
-
|
| 128 |
-
After deployment, your space will be available at:
|
| 129 |
-
`https://huggingface.co/spaces/<repo-id>`
|
| 130 |
-
|
| 131 |
-
The deployed space includes:
|
| 132 |
-
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 133 |
-
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 134 |
-
- **Health Check** at `/health` - Container health monitoring
|
| 135 |
-
- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
|
| 136 |
-
|
| 137 |
-
## Environment Details
|
| 138 |
-
|
| 139 |
-
### Action
|
| 140 |
-
CloudQueueAction fields:
|
| 141 |
-
- action_type: one of configure_task, admit, reject, route, dispatch, scale, reprioritize, noop
|
| 142 |
-
- target_queue: queue index for route/dispatch/admit
|
| 143 |
-
- target_server: optional server index
|
| 144 |
-
- scale_delta: server delta for scale action
|
| 145 |
-
- new_priority: new priority value for reprioritize
|
| 146 |
-
- task_id: easy/medium/hard (used with configure_task)
|
| 147 |
-
- seed: deterministic task seed (used with configure_task)
|
| 148 |
-
|
| 149 |
-
### Observation
|
| 150 |
-
CloudQueueObservation includes:
|
| 151 |
-
- task_id, sim_time, horizon
|
| 152 |
-
- queue_lengths, queue_wait_ema
|
| 153 |
-
- server_busy, server_remaining_service, utilization
|
| 154 |
-
- incoming_job_present, incoming_job_size, incoming_job_priority, incoming_job_deadline, incoming_job_type
|
| 155 |
-
- sla_violation_rate, abandonment_rate, throughput_recent, energy_cost_rate
|
| 156 |
-
- level, optional_history, action_mask
|
| 157 |
-
- reward, done, metadata
|
| 158 |
-
|
| 159 |
-
### Reward
|
| 160 |
-
Per-step reward is dense and multi-objective:
|
| 161 |
-
|
| 162 |
-
$$
|
| 163 |
-
r_t = 0.35R_{wait} + 0.20R_{throughput} + 0.20R_{sla} + 0.15R_{cost} + 0.05R_{fair} + 0.05R_{safe}
|
| 164 |
-
$$
|
| 165 |
-
|
| 166 |
-
Properties:
|
| 167 |
-
- Partial progress signal over the full trajectory
|
| 168 |
-
- Penalties for invalid actions and unsafe/noop behavior under congestion
|
| 169 |
-
- Bounded reward values for stability
|
| 170 |
-
|
| 171 |
-
### Deterministic Graders
|
| 172 |
-
Each task returns a deterministic episode_score in [0, 1], stored in observation metadata.
|
| 173 |
-
|
| 174 |
-
- easy score uses avg wait, throughput, rejection rate, and SLA violations
|
| 175 |
-
- medium score uses urgent/normal p95 waits, urgent SLA, throughput, and action cost
|
| 176 |
-
- hard score uses end-to-end p95, abandonment, SLA, throughput, infra cost, and fairness gap
|
| 177 |
-
|
| 178 |
-
If invalid action rate exceeds threshold, score is capped.
|
| 179 |
-
|
| 180 |
-
## Tasks
|
| 181 |
-
|
| 182 |
-
1. easy (single queue stability)
|
| 183 |
-
- one queue, one server
|
| 184 |
-
- objective: low wait with acceptable throughput and low rejection
|
| 185 |
-
|
| 186 |
-
2. medium (priority routing)
|
| 187 |
-
- two queues and multiple servers
|
| 188 |
-
- objective: protect urgent traffic while maintaining total performance
|
| 189 |
-
|
| 190 |
-
3. hard (queue network + scaling)
|
| 191 |
-
- two-stage queue network with bursty arrivals and heavy-tailed service times
|
| 192 |
-
- objective: balance latency/SLA/abandonment against infra cost and fairness
|
| 193 |
-
|
| 194 |
-
## Baseline Inference
|
| 195 |
-
|
| 196 |
-
Run baseline inference across easy/medium/hard:
|
| 197 |
-
|
| 198 |
-
```bash
|
| 199 |
-
API_KEY=your_provider_key python inference.py
|
| 200 |
-
```
|
| 201 |
-
|
| 202 |
-
Optional variables:
|
| 203 |
-
- API_KEY (OpenAI-compatible provider key for model calls)
|
| 204 |
-
- API_BASE_URL (default: https://router.huggingface.co/v1)
|
| 205 |
-
- MODEL_NAME (default: Qwen/Qwen2.5-72B-Instruct)
|
| 206 |
-
- BASE_URL (if using deployed space)
|
| 207 |
-
- IMAGE_NAME (if launching local docker image)
|
| 208 |
-
- USE_HEURISTIC_ONLY (true/false)
|
| 209 |
-
- DISABLE_MODEL_ON_FIRST_ERROR (true/false)
|
| 210 |
-
- MAX_STEPS_OVERRIDE (integer quick-test cap)
|
| 211 |
-
- TASK_SEEDS_JSON (JSON map for multi-seed runs)
|
| 212 |
-
- ACTION_TRACE_FILE (JSON replay file keyed by task:seed)
|
| 213 |
-
- REPORT_JSON_PATH (write seed/task report JSON)
|
| 214 |
-
- REPORT_CSV_PATH (write per-seed report CSV)
|
| 215 |
-
|
| 216 |
-
Output includes required line types:
|
| 217 |
-
- [START]
|
| 218 |
-
- [STEP]
|
| 219 |
-
- [END]
|
| 220 |
-
|
| 221 |
-
And final aggregate summary:
|
| 222 |
-
- [SUMMARY] easy=<...> medium=<...> hard=<...> final=<...>
|
| 223 |
-
|
| 224 |
-
V2 reporting also includes:
|
| 225 |
-
- [REPORT_SEED] task=<task_id> seed=<seed> score=<score> steps=<n> trace=<digest>
|
| 226 |
-
- [REPORT] task=<task_id> seeds=<n> mean=<score> std=<score> ci95=<score>
|
| 227 |
-
|
| 228 |
-
## Baseline Scores
|
| 229 |
-
|
| 230 |
-
Current reproducible heuristic-only baseline (deployed runtime, single seed per task):
|
| 231 |
-
|
| 232 |
-
| Task | Seed Count | Mean Score |
|
| 233 |
-
|---|---:|---:|
|
| 234 |
-
| easy | 1 | 0.000 |
|
| 235 |
-
| medium | 1 | 0.000 |
|
| 236 |
-
| hard | 1 | 0.000 |
|
| 237 |
-
| final (mean of task means) | - | 0.000 |
|
| 238 |
-
|
| 239 |
-
Notes:
|
| 240 |
-
- These values are from heuristic fallback mode and are expected to be low.
|
| 241 |
-
- Model-based scores depend on provider/model availability and should be recorded from a successful funded run.
|
| 242 |
-
- Keep this table updated with your latest official benchmark run before final submission.
|
| 243 |
-
|
| 244 |
-
## Advanced Usage
|
| 245 |
-
|
| 246 |
-
### Connecting to an Existing Server
|
| 247 |
-
|
| 248 |
-
If you already have a Cloud Queue Env environment server running, you can connect directly:
|
| 249 |
-
|
| 250 |
-
```python
|
| 251 |
-
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 252 |
-
|
| 253 |
-
# Connect to existing server
|
| 254 |
-
cloud_queue_envenv = CloudQueueEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 255 |
-
|
| 256 |
-
# Use as normal
|
| 257 |
-
result = cloud_queue_envenv.reset()
|
| 258 |
-
result = cloud_queue_envenv.step(CloudQueueAction(action_type="dispatch", target_queue=0))
|
| 259 |
-
```
|
| 260 |
-
|
| 261 |
-
Note: When connecting to an existing server, `cloud_queue_envenv.close()` will NOT stop the server.
|
| 262 |
-
|
| 263 |
-
### Using the Context Manager
|
| 264 |
-
|
| 265 |
-
The client supports context manager usage for automatic connection management:
|
| 266 |
-
|
| 267 |
-
```python
|
| 268 |
-
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 269 |
-
|
| 270 |
-
# Connect with context manager (auto-connects and closes)
|
| 271 |
-
with CloudQueueEnv(base_url="http://localhost:8000") as env:
|
| 272 |
-
result = env.reset()
|
| 273 |
-
print(f"Initial queues: {result.observation.queue_lengths}")
|
| 274 |
-
# Multiple steps with low latency
|
| 275 |
-
for _ in range(10):
|
| 276 |
-
result = env.step(CloudQueueAction(action_type="noop"))
|
| 277 |
-
print(f"Reward: {result.reward:.3f}")
|
| 278 |
-
```
|
| 279 |
-
|
| 280 |
-
The client uses WebSocket connections for:
|
| 281 |
-
- **Lower latency**: No HTTP connection overhead per request
|
| 282 |
-
- **Persistent session**: Server maintains your environment state
|
| 283 |
-
- **Efficient for episodes**: Better for many sequential steps
|
| 284 |
-
|
| 285 |
-
### Concurrent WebSocket Sessions
|
| 286 |
-
|
| 287 |
-
The server supports multiple concurrent WebSocket connections. To enable this,
|
| 288 |
-
modify `server/app.py` to use factory mode:
|
| 289 |
-
|
| 290 |
-
```python
|
| 291 |
-
# In server/app.py - use factory mode for concurrent sessions
|
| 292 |
-
app = create_app(
|
| 293 |
-
CloudQueueEnvironment, # Pass class, not instance
|
| 294 |
-
CloudQueueAction,
|
| 295 |
-
CloudQueueObservation,
|
| 296 |
-
max_concurrent_envs=4, # Allow 4 concurrent sessions
|
| 297 |
-
)
|
| 298 |
-
```
|
| 299 |
-
|
| 300 |
-
Then multiple clients can connect simultaneously:
|
| 301 |
-
|
| 302 |
-
```python
|
| 303 |
-
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 304 |
-
from concurrent.futures import ThreadPoolExecutor
|
| 305 |
-
|
| 306 |
-
def run_episode(client_id: int):
|
| 307 |
-
with CloudQueueEnv(base_url="http://localhost:8000") as env:
|
| 308 |
-
result = env.reset()
|
| 309 |
-
for i in range(10):
|
| 310 |
-
result = env.step(CloudQueueAction(action_type="dispatch", target_queue=i % 2))
|
| 311 |
-
return client_id, result.observation.queue_lengths
|
| 312 |
-
|
| 313 |
-
# Run 4 episodes concurrently
|
| 314 |
-
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 315 |
-
results = list(executor.map(run_episode, range(4)))
|
| 316 |
-
```
|
| 317 |
-
|
| 318 |
-
## Development & Testing
|
| 319 |
-
|
| 320 |
-
### Direct Environment Testing
|
| 321 |
-
|
| 322 |
-
Core files:
|
| 323 |
-
- models: typed action/observation schema
|
| 324 |
-
- server environment: queue simulation, reward shaping, grading
|
| 325 |
-
- inference script: task sweep and benchmark logging
|
| 326 |
-
|
| 327 |
-
### Running Locally
|
| 328 |
-
|
| 329 |
-
Run the server locally for development:
|
| 330 |
-
|
| 331 |
-
```bash
|
| 332 |
-
uvicorn server.app:app --reload
|
| 333 |
-
```
|
| 334 |
-
|
| 335 |
-
## Project Structure
|
| 336 |
-
|
| 337 |
-
```
|
| 338 |
-
cloud_queue_env/
|
| 339 |
-
├── .dockerignore
|
| 340 |
-
├── __init__.py
|
| 341 |
-
├── README.md
|
| 342 |
-
├── openenv.yaml
|
| 343 |
-
├── pyproject.toml
|
| 344 |
-
├── client.py
|
| 345 |
-
├── models.py
|
| 346 |
-
├── inference.py
|
| 347 |
-
├── IMPLEMENTATION_ROADMAP.md
|
| 348 |
-
└── server/
|
| 349 |
-
├── __init__.py
|
| 350 |
-
├── cloud_queue_env_environment.py
|
| 351 |
-
├── app.py
|
| 352 |
-
└── Dockerfile
|
| 353 |
-
```
|
| 354 |
-
|
| 355 |
-
TASK A — Easy (150 steps)
|
| 356 |
-
Scenario: 1 queue, 1 server (M/M/1), only admit/reject/dispatch
|
| 357 |
-
Objective: Keep wait low while processing throughput
|
| 358 |
-
Grader: score = 0.40×(1-avg_wait/6) + 0.30×(throughput/70)
|
| 359 |
-
+ 0.15×(1-rejection_rate/0.3) + 0.15×(1-sla_breaches/0.3)
|
| 360 |
-
TASK B — Medium (200 steps)
|
| 361 |
-
Scenario: 2 queues, 3 servers, 28% urgent jobs → route + reprioritize
|
| 362 |
-
Objective: Protect urgent SLA while not starving normal jobs
|
| 363 |
-
Grader: score = 0.35×urgent_wait_score + 0.25×urgent_sla_score
|
| 364 |
-
+ 0.15×normal_wait_score + 0.15×throughput + 0.10×cost
|
| 365 |
-
TASK C — Hard (250 steps)
|
| 366 |
-
Scenario: 2-stage pipeline, 1–6 servers, heavy-tail service, abandonments
|
| 367 |
-
Objective: Maximize quality under budget with fairness
|
| 368 |
-
Grader: score = 0.25×e2e_latency + 0.20×abandonment + 0.20×sla
|
| 369 |
+ 0.15×throughput + 0.10×cost + 0.10×fairness
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Cloud Queue Env Environment Server
|
| 3 |
+
emoji: 🖨️
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Cloud Queue Env Environment
|
| 15 |
+
|
| 16 |
+
A real-world queue-operations benchmark for OpenEnv.
|
| 17 |
+
|
| 18 |
+
This environment simulates service operations decisions humans make in production systems:
|
| 19 |
+
- Admission and rejection under load
|
| 20 |
+
- Queue routing and dispatching
|
| 21 |
+
- Priority handling for urgent traffic
|
| 22 |
+
- Capacity scaling under infrastructure cost constraints
|
| 23 |
+
|
| 24 |
+
The benchmark includes three deterministic tasks with partial graders in [0, 1]:
|
| 25 |
+
- easy: single-queue stability
|
| 26 |
+
- medium: multi-server priority routing
|
| 27 |
+
- hard: two-stage queue network with scaling
|
| 28 |
+
|
| 29 |
+
## Quick Start
|
| 30 |
+
|
| 31 |
+
Use the CloudQueueEnv client to connect to a running server or container:
|
| 32 |
+
|
| 33 |
+
```python
|
| 34 |
+
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
env = CloudQueueEnv.from_docker_image("cloud_queue_env-env:latest")
|
| 38 |
+
|
| 39 |
+
# Configure task + seed, then reset into that deterministic episode
|
| 40 |
+
env.reset()
|
| 41 |
+
env.step(CloudQueueAction(action_type="configure_task", task_id="easy", seed=11))
|
| 42 |
+
result = env.reset()
|
| 43 |
+
|
| 44 |
+
for _ in range(20):
|
| 45 |
+
obs = result.observation
|
| 46 |
+
if obs.incoming_job_present:
|
| 47 |
+
action = CloudQueueAction(action_type="admit", target_queue=0)
|
| 48 |
+
else:
|
| 49 |
+
action = CloudQueueAction(action_type="dispatch", target_queue=0)
|
| 50 |
+
|
| 51 |
+
result = env.step(action)
|
| 52 |
+
print(
|
| 53 |
+
f"step={obs.sim_time} queues={obs.queue_lengths} "
|
| 54 |
+
f"reward={result.reward:.3f} done={result.done}"
|
| 55 |
+
)
|
| 56 |
+
if result.done:
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
+
final_score = result.observation.metadata.get("episode_score", 0.0)
|
| 60 |
+
print(f"episode_score={final_score:.3f}")
|
| 61 |
+
|
| 62 |
+
finally:
|
| 63 |
+
env.close()
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
The CloudQueueEnv.from_docker_image() method handles:
|
| 67 |
+
- Starting the Docker container
|
| 68 |
+
- Waiting for the server to be ready
|
| 69 |
+
- Connecting to the environment
|
| 70 |
+
- Container cleanup when you call `close()`
|
| 71 |
+
|
| 72 |
+
## Building the Docker Image
|
| 73 |
+
|
| 74 |
+
Before using the environment, you need to build the Docker image:
|
| 75 |
+
|
| 76 |
+
```bash
|
| 77 |
+
# From project root
|
| 78 |
+
docker build -t cloud_queue_env-env:latest -f server/Dockerfile .
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
## Deploying to Hugging Face Spaces
|
| 82 |
+
|
| 83 |
+
You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
|
| 84 |
+
|
| 85 |
+
```bash
|
| 86 |
+
# From the environment directory (where openenv.yaml is located)
|
| 87 |
+
openenv push
|
| 88 |
+
|
| 89 |
+
# Or specify options
|
| 90 |
+
openenv push --namespace my-org --private
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
The `openenv push` command will:
|
| 94 |
+
1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
|
| 95 |
+
2. Prepare a custom build for Hugging Face Docker space (enables web interface)
|
| 96 |
+
3. Upload to Hugging Face (ensuring you're logged in)
|
| 97 |
+
|
| 98 |
+
### Prerequisites
|
| 99 |
+
|
| 100 |
+
- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
|
| 101 |
+
|
| 102 |
+
### Options
|
| 103 |
+
|
| 104 |
+
- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
|
| 105 |
+
- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
|
| 106 |
+
- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
|
| 107 |
+
- `--private`: Deploy the space as private (default: public)
|
| 108 |
+
|
| 109 |
+
### Examples
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
|
| 113 |
+
openenv push
|
| 114 |
+
|
| 115 |
+
# Push to a specific repository
|
| 116 |
+
openenv push --repo-id my-org/my-env
|
| 117 |
+
|
| 118 |
+
# Push with a custom base image
|
| 119 |
+
openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
|
| 120 |
+
|
| 121 |
+
# Push as a private space
|
| 122 |
+
openenv push --private
|
| 123 |
+
|
| 124 |
+
# Combine options
|
| 125 |
+
openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
After deployment, your space will be available at:
|
| 129 |
+
`https://huggingface.co/spaces/<repo-id>`
|
| 130 |
+
|
| 131 |
+
The deployed space includes:
|
| 132 |
+
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 133 |
+
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 134 |
+
- **Health Check** at `/health` - Container health monitoring
|
| 135 |
+
- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
|
| 136 |
+
|
| 137 |
+
## Environment Details
|
| 138 |
+
|
| 139 |
+
### Action
|
| 140 |
+
CloudQueueAction fields:
|
| 141 |
+
- action_type: one of configure_task, admit, reject, route, dispatch, scale, reprioritize, noop
|
| 142 |
+
- target_queue: queue index for route/dispatch/admit
|
| 143 |
+
- target_server: optional server index
|
| 144 |
+
- scale_delta: server delta for scale action
|
| 145 |
+
- new_priority: new priority value for reprioritize
|
| 146 |
+
- task_id: easy/medium/hard (used with configure_task)
|
| 147 |
+
- seed: deterministic task seed (used with configure_task)
|
| 148 |
+
|
| 149 |
+
### Observation
|
| 150 |
+
CloudQueueObservation includes:
|
| 151 |
+
- task_id, sim_time, horizon
|
| 152 |
+
- queue_lengths, queue_wait_ema
|
| 153 |
+
- server_busy, server_remaining_service, utilization
|
| 154 |
+
- incoming_job_present, incoming_job_size, incoming_job_priority, incoming_job_deadline, incoming_job_type
|
| 155 |
+
- sla_violation_rate, abandonment_rate, throughput_recent, energy_cost_rate
|
| 156 |
+
- level, optional_history, action_mask
|
| 157 |
+
- reward, done, metadata
|
| 158 |
+
|
| 159 |
+
### Reward
|
| 160 |
+
Per-step reward is dense and multi-objective:
|
| 161 |
+
|
| 162 |
+
$$
|
| 163 |
+
r_t = 0.35R_{wait} + 0.20R_{throughput} + 0.20R_{sla} + 0.15R_{cost} + 0.05R_{fair} + 0.05R_{safe}
|
| 164 |
+
$$
|
| 165 |
+
|
| 166 |
+
Properties:
|
| 167 |
+
- Partial progress signal over the full trajectory
|
| 168 |
+
- Penalties for invalid actions and unsafe/noop behavior under congestion
|
| 169 |
+
- Bounded reward values for stability
|
| 170 |
+
|
| 171 |
+
### Deterministic Graders
|
| 172 |
+
Each task returns a deterministic episode_score in [0, 1], stored in observation metadata.
|
| 173 |
+
|
| 174 |
+
- easy score uses avg wait, throughput, rejection rate, and SLA violations
|
| 175 |
+
- medium score uses urgent/normal p95 waits, urgent SLA, throughput, and action cost
|
| 176 |
+
- hard score uses end-to-end p95, abandonment, SLA, throughput, infra cost, and fairness gap
|
| 177 |
+
|
| 178 |
+
If invalid action rate exceeds threshold, score is capped.
|
| 179 |
+
|
| 180 |
+
## Tasks
|
| 181 |
+
|
| 182 |
+
1. easy (single queue stability)
|
| 183 |
+
- one queue, one server
|
| 184 |
+
- objective: low wait with acceptable throughput and low rejection
|
| 185 |
+
|
| 186 |
+
2. medium (priority routing)
|
| 187 |
+
- two queues and multiple servers
|
| 188 |
+
- objective: protect urgent traffic while maintaining total performance
|
| 189 |
+
|
| 190 |
+
3. hard (queue network + scaling)
|
| 191 |
+
- two-stage queue network with bursty arrivals and heavy-tailed service times
|
| 192 |
+
- objective: balance latency/SLA/abandonment against infra cost and fairness
|
| 193 |
+
|
| 194 |
+
## Baseline Inference
|
| 195 |
+
|
| 196 |
+
Run baseline inference across easy/medium/hard:
|
| 197 |
+
|
| 198 |
+
```bash
|
| 199 |
+
API_KEY=your_provider_key python inference.py
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
Optional variables:
|
| 203 |
+
- API_KEY (OpenAI-compatible provider key for model calls)
|
| 204 |
+
- API_BASE_URL (default: https://router.huggingface.co/v1)
|
| 205 |
+
- MODEL_NAME (default: Qwen/Qwen2.5-72B-Instruct)
|
| 206 |
+
- BASE_URL (if using deployed space)
|
| 207 |
+
- IMAGE_NAME (if launching local docker image)
|
| 208 |
+
- USE_HEURISTIC_ONLY (true/false)
|
| 209 |
+
- DISABLE_MODEL_ON_FIRST_ERROR (true/false)
|
| 210 |
+
- MAX_STEPS_OVERRIDE (integer quick-test cap)
|
| 211 |
+
- TASK_SEEDS_JSON (JSON map for multi-seed runs)
|
| 212 |
+
- ACTION_TRACE_FILE (JSON replay file keyed by task:seed)
|
| 213 |
+
- REPORT_JSON_PATH (write seed/task report JSON)
|
| 214 |
+
- REPORT_CSV_PATH (write per-seed report CSV)
|
| 215 |
+
|
| 216 |
+
Output includes required line types:
|
| 217 |
+
- [START]
|
| 218 |
+
- [STEP]
|
| 219 |
+
- [END]
|
| 220 |
+
|
| 221 |
+
And final aggregate summary:
|
| 222 |
+
- [SUMMARY] easy=<...> medium=<...> hard=<...> final=<...>
|
| 223 |
+
|
| 224 |
+
V2 reporting also includes:
|
| 225 |
+
- [REPORT_SEED] task=<task_id> seed=<seed> score=<score> steps=<n> trace=<digest>
|
| 226 |
+
- [REPORT] task=<task_id> seeds=<n> mean=<score> std=<score> ci95=<score>
|
| 227 |
+
|
| 228 |
+
## Baseline Scores
|
| 229 |
+
|
| 230 |
+
Current reproducible heuristic-only baseline (deployed runtime, single seed per task):
|
| 231 |
+
|
| 232 |
+
| Task | Seed Count | Mean Score |
|
| 233 |
+
|---|---:|---:|
|
| 234 |
+
| easy | 1 | 0.000 |
|
| 235 |
+
| medium | 1 | 0.000 |
|
| 236 |
+
| hard | 1 | 0.000 |
|
| 237 |
+
| final (mean of task means) | - | 0.000 |
|
| 238 |
+
|
| 239 |
+
Notes:
|
| 240 |
+
- These values are from heuristic fallback mode and are expected to be low.
|
| 241 |
+
- Model-based scores depend on provider/model availability and should be recorded from a successful funded run.
|
| 242 |
+
- Keep this table updated with your latest official benchmark run before final submission.
|
| 243 |
+
|
| 244 |
+
## Advanced Usage
|
| 245 |
+
|
| 246 |
+
### Connecting to an Existing Server
|
| 247 |
+
|
| 248 |
+
If you already have a Cloud Queue Env environment server running, you can connect directly:
|
| 249 |
+
|
| 250 |
+
```python
|
| 251 |
+
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 252 |
+
|
| 253 |
+
# Connect to existing server
|
| 254 |
+
cloud_queue_envenv = CloudQueueEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 255 |
+
|
| 256 |
+
# Use as normal
|
| 257 |
+
result = cloud_queue_envenv.reset()
|
| 258 |
+
result = cloud_queue_envenv.step(CloudQueueAction(action_type="dispatch", target_queue=0))
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
Note: When connecting to an existing server, `cloud_queue_envenv.close()` will NOT stop the server.
|
| 262 |
+
|
| 263 |
+
### Using the Context Manager
|
| 264 |
+
|
| 265 |
+
The client supports context manager usage for automatic connection management:
|
| 266 |
+
|
| 267 |
+
```python
|
| 268 |
+
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 269 |
+
|
| 270 |
+
# Connect with context manager (auto-connects and closes)
|
| 271 |
+
with CloudQueueEnv(base_url="http://localhost:8000") as env:
|
| 272 |
+
result = env.reset()
|
| 273 |
+
print(f"Initial queues: {result.observation.queue_lengths}")
|
| 274 |
+
# Multiple steps with low latency
|
| 275 |
+
for _ in range(10):
|
| 276 |
+
result = env.step(CloudQueueAction(action_type="noop"))
|
| 277 |
+
print(f"Reward: {result.reward:.3f}")
|
| 278 |
+
```
|
| 279 |
+
|
| 280 |
+
The client uses WebSocket connections for:
|
| 281 |
+
- **Lower latency**: No HTTP connection overhead per request
|
| 282 |
+
- **Persistent session**: Server maintains your environment state
|
| 283 |
+
- **Efficient for episodes**: Better for many sequential steps
|
| 284 |
+
|
| 285 |
+
### Concurrent WebSocket Sessions
|
| 286 |
+
|
| 287 |
+
The server supports multiple concurrent WebSocket connections. To enable this,
|
| 288 |
+
modify `server/app.py` to use factory mode:
|
| 289 |
+
|
| 290 |
+
```python
|
| 291 |
+
# In server/app.py - use factory mode for concurrent sessions
|
| 292 |
+
app = create_app(
|
| 293 |
+
CloudQueueEnvironment, # Pass class, not instance
|
| 294 |
+
CloudQueueAction,
|
| 295 |
+
CloudQueueObservation,
|
| 296 |
+
max_concurrent_envs=4, # Allow 4 concurrent sessions
|
| 297 |
+
)
|
| 298 |
+
```
|
| 299 |
+
|
| 300 |
+
Then multiple clients can connect simultaneously:
|
| 301 |
+
|
| 302 |
+
```python
|
| 303 |
+
from cloud_queue_env import CloudQueueAction, CloudQueueEnv
|
| 304 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 305 |
+
|
| 306 |
+
def run_episode(client_id: int):
|
| 307 |
+
with CloudQueueEnv(base_url="http://localhost:8000") as env:
|
| 308 |
+
result = env.reset()
|
| 309 |
+
for i in range(10):
|
| 310 |
+
result = env.step(CloudQueueAction(action_type="dispatch", target_queue=i % 2))
|
| 311 |
+
return client_id, result.observation.queue_lengths
|
| 312 |
+
|
| 313 |
+
# Run 4 episodes concurrently
|
| 314 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 315 |
+
results = list(executor.map(run_episode, range(4)))
|
| 316 |
+
```
|
| 317 |
+
|
| 318 |
+
## Development & Testing
|
| 319 |
+
|
| 320 |
+
### Direct Environment Testing
|
| 321 |
+
|
| 322 |
+
Core files:
|
| 323 |
+
- models: typed action/observation schema
|
| 324 |
+
- server environment: queue simulation, reward shaping, grading
|
| 325 |
+
- inference script: task sweep and benchmark logging
|
| 326 |
+
|
| 327 |
+
### Running Locally
|
| 328 |
+
|
| 329 |
+
Run the server locally for development:
|
| 330 |
+
|
| 331 |
+
```bash
|
| 332 |
+
uvicorn server.app:app --reload
|
| 333 |
+
```
|
| 334 |
+
|
| 335 |
+
## Project Structure
|
| 336 |
+
|
| 337 |
+
```
|
| 338 |
+
cloud_queue_env/
|
| 339 |
+
├── .dockerignore
|
| 340 |
+
├── __init__.py
|
| 341 |
+
├── README.md
|
| 342 |
+
├── openenv.yaml
|
| 343 |
+
├── pyproject.toml
|
| 344 |
+
├── client.py
|
| 345 |
+
├── models.py
|
| 346 |
+
├── inference.py
|
| 347 |
+
├── IMPLEMENTATION_ROADMAP.md
|
| 348 |
+
└── server/
|
| 349 |
+
├── __init__.py
|
| 350 |
+
├── cloud_queue_env_environment.py
|
| 351 |
+
├── app.py
|
| 352 |
+
└── Dockerfile
|
| 353 |
+
```
|
| 354 |
+
|
| 355 |
+
TASK A — Easy (150 steps)
|
| 356 |
+
Scenario: 1 queue, 1 server (M/M/1), only admit/reject/dispatch
|
| 357 |
+
Objective: Keep wait low while processing throughput
|
| 358 |
+
Grader: score = 0.40×(1-avg_wait/6) + 0.30×(throughput/70)
|
| 359 |
+
+ 0.15×(1-rejection_rate/0.3) + 0.15×(1-sla_breaches/0.3)
|
| 360 |
+
TASK B — Medium (200 steps)
|
| 361 |
+
Scenario: 2 queues, 3 servers, 28% urgent jobs → route + reprioritize
|
| 362 |
+
Objective: Protect urgent SLA while not starving normal jobs
|
| 363 |
+
Grader: score = 0.35×urgent_wait_score + 0.25×urgent_sla_score
|
| 364 |
+
+ 0.15×normal_wait_score + 0.15×throughput + 0.10×cost
|
| 365 |
+
TASK C — Hard (250 steps)
|
| 366 |
+
Scenario: 2-stage pipeline, 1–6 servers, heavy-tail service, abandonments
|
| 367 |
+
Objective: Maximize quality under budget with fairness
|
| 368 |
+
Grader: score = 0.25×e2e_latency + 0.20×abandonment + 0.20×sla
|
| 369 |
+ 0.15×throughput + 0.10×cost + 0.10×fairness
|
client.py
CHANGED
|
@@ -1,123 +1,123 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""Cloud Queue Env Environment Client."""
|
| 8 |
-
|
| 9 |
-
from typing import Dict
|
| 10 |
-
|
| 11 |
-
from openenv.core import EnvClient
|
| 12 |
-
from openenv.core.client_types import StepResult
|
| 13 |
-
from openenv.core.env_server.types import State
|
| 14 |
-
|
| 15 |
-
from .models import CloudQueueAction, CloudQueueObservation
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class CloudQueueEnv(
|
| 19 |
-
EnvClient[CloudQueueAction, CloudQueueObservation, State]
|
| 20 |
-
):
|
| 21 |
-
"""
|
| 22 |
-
Client for the Cloud Queue Env Environment.
|
| 23 |
-
|
| 24 |
-
This client maintains a persistent WebSocket connection to the environment server,
|
| 25 |
-
enabling efficient multi-step interactions with lower latency.
|
| 26 |
-
Each client instance has its own dedicated environment session on the server.
|
| 27 |
-
|
| 28 |
-
Example:
|
| 29 |
-
>>> # Connect to a running server
|
| 30 |
-
>>> with CloudQueueEnv(base_url="http://localhost:8000") as client:
|
| 31 |
-
... result = client.reset()
|
| 32 |
-
... print(result.observation.queue_lengths)
|
| 33 |
-
...
|
| 34 |
-
... result = client.step(CloudQueueAction(action_type="admit", target_queue=0))
|
| 35 |
-
... print(result.observation.throughput_recent)
|
| 36 |
-
|
| 37 |
-
Example with Docker:
|
| 38 |
-
>>> # Automatically start container and connect
|
| 39 |
-
>>> client = CloudQueueEnv.from_docker_image("cloud_queue_env-env:latest")
|
| 40 |
-
>>> try:
|
| 41 |
-
... result = client.reset()
|
| 42 |
-
... result = client.step(CloudQueueAction(action_type="dispatch", target_queue=0))
|
| 43 |
-
... finally:
|
| 44 |
-
... client.close()
|
| 45 |
-
"""
|
| 46 |
-
|
| 47 |
-
def _step_payload(self, action: CloudQueueAction) -> Dict:
|
| 48 |
-
"""
|
| 49 |
-
Convert CloudQueueAction to JSON payload for step message.
|
| 50 |
-
|
| 51 |
-
Args:
|
| 52 |
-
action: CloudQueueAction instance
|
| 53 |
-
|
| 54 |
-
Returns:
|
| 55 |
-
Dictionary representation suitable for JSON encoding
|
| 56 |
-
"""
|
| 57 |
-
return {
|
| 58 |
-
"action_type": action.action_type,
|
| 59 |
-
"target_queue": action.target_queue,
|
| 60 |
-
"target_server": action.target_server,
|
| 61 |
-
"scale_delta": action.scale_delta,
|
| 62 |
-
"new_priority": action.new_priority,
|
| 63 |
-
"task_id": action.task_id,
|
| 64 |
-
"seed": action.seed,
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
def _parse_result(self, payload: Dict) -> StepResult[CloudQueueObservation]:
|
| 68 |
-
"""
|
| 69 |
-
Parse server response into StepResult[CloudQueueObservation].
|
| 70 |
-
|
| 71 |
-
Args:
|
| 72 |
-
payload: JSON response data from server
|
| 73 |
-
|
| 74 |
-
Returns:
|
| 75 |
-
StepResult with CloudQueueObservation
|
| 76 |
-
"""
|
| 77 |
-
obs_data = payload.get("observation", {})
|
| 78 |
-
observation = CloudQueueObservation(
|
| 79 |
-
task_id=obs_data.get("task_id", "easy"),
|
| 80 |
-
sim_time=obs_data.get("sim_time", 0),
|
| 81 |
-
horizon=obs_data.get("horizon", 0),
|
| 82 |
-
queue_lengths=obs_data.get("queue_lengths", []),
|
| 83 |
-
queue_wait_ema=obs_data.get("queue_wait_ema", []),
|
| 84 |
-
server_busy=obs_data.get("server_busy", []),
|
| 85 |
-
server_remaining_service=obs_data.get("server_remaining_service", []),
|
| 86 |
-
utilization=obs_data.get("utilization", []),
|
| 87 |
-
incoming_job_present=obs_data.get("incoming_job_present", False),
|
| 88 |
-
incoming_job_size=obs_data.get("incoming_job_size", 0.0),
|
| 89 |
-
incoming_job_priority=obs_data.get("incoming_job_priority", 0),
|
| 90 |
-
incoming_job_deadline=obs_data.get("incoming_job_deadline", 0.0),
|
| 91 |
-
incoming_job_type=obs_data.get("incoming_job_type", 0),
|
| 92 |
-
sla_violation_rate=obs_data.get("sla_violation_rate", 0.0),
|
| 93 |
-
abandonment_rate=obs_data.get("abandonment_rate", 0.0),
|
| 94 |
-
throughput_recent=obs_data.get("throughput_recent", 0.0),
|
| 95 |
-
energy_cost_rate=obs_data.get("energy_cost_rate", 0.0),
|
| 96 |
-
level=obs_data.get("level", 1.0),
|
| 97 |
-
optional_history=obs_data.get("optional_history", []),
|
| 98 |
-
action_mask=obs_data.get("action_mask", []),
|
| 99 |
-
done=payload.get("done", False),
|
| 100 |
-
reward=payload.get("reward"),
|
| 101 |
-
metadata=obs_data.get("metadata", {}),
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
return StepResult(
|
| 105 |
-
observation=observation,
|
| 106 |
-
reward=payload.get("reward"),
|
| 107 |
-
done=payload.get("done", False),
|
| 108 |
-
)
|
| 109 |
-
|
| 110 |
-
def _parse_state(self, payload: Dict) -> State:
|
| 111 |
-
"""
|
| 112 |
-
Parse server response into State object.
|
| 113 |
-
|
| 114 |
-
Args:
|
| 115 |
-
payload: JSON response from state request
|
| 116 |
-
|
| 117 |
-
Returns:
|
| 118 |
-
State object with episode_id and step_count
|
| 119 |
-
"""
|
| 120 |
-
return State(
|
| 121 |
-
episode_id=payload.get("episode_id"),
|
| 122 |
-
step_count=payload.get("step_count", 0),
|
| 123 |
-
)
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Cloud Queue Env Environment Client."""
|
| 8 |
+
|
| 9 |
+
from typing import Dict
|
| 10 |
+
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
from openenv.core.env_server.types import State
|
| 14 |
+
|
| 15 |
+
from .models import CloudQueueAction, CloudQueueObservation
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class CloudQueueEnv(
|
| 19 |
+
EnvClient[CloudQueueAction, CloudQueueObservation, State]
|
| 20 |
+
):
|
| 21 |
+
"""
|
| 22 |
+
Client for the Cloud Queue Env Environment.
|
| 23 |
+
|
| 24 |
+
This client maintains a persistent WebSocket connection to the environment server,
|
| 25 |
+
enabling efficient multi-step interactions with lower latency.
|
| 26 |
+
Each client instance has its own dedicated environment session on the server.
|
| 27 |
+
|
| 28 |
+
Example:
|
| 29 |
+
>>> # Connect to a running server
|
| 30 |
+
>>> with CloudQueueEnv(base_url="http://localhost:8000") as client:
|
| 31 |
+
... result = client.reset()
|
| 32 |
+
... print(result.observation.queue_lengths)
|
| 33 |
+
...
|
| 34 |
+
... result = client.step(CloudQueueAction(action_type="admit", target_queue=0))
|
| 35 |
+
... print(result.observation.throughput_recent)
|
| 36 |
+
|
| 37 |
+
Example with Docker:
|
| 38 |
+
>>> # Automatically start container and connect
|
| 39 |
+
>>> client = CloudQueueEnv.from_docker_image("cloud_queue_env-env:latest")
|
| 40 |
+
>>> try:
|
| 41 |
+
... result = client.reset()
|
| 42 |
+
... result = client.step(CloudQueueAction(action_type="dispatch", target_queue=0))
|
| 43 |
+
... finally:
|
| 44 |
+
... client.close()
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def _step_payload(self, action: CloudQueueAction) -> Dict:
|
| 48 |
+
"""
|
| 49 |
+
Convert CloudQueueAction to JSON payload for step message.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
action: CloudQueueAction instance
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Dictionary representation suitable for JSON encoding
|
| 56 |
+
"""
|
| 57 |
+
return {
|
| 58 |
+
"action_type": action.action_type,
|
| 59 |
+
"target_queue": action.target_queue,
|
| 60 |
+
"target_server": action.target_server,
|
| 61 |
+
"scale_delta": action.scale_delta,
|
| 62 |
+
"new_priority": action.new_priority,
|
| 63 |
+
"task_id": action.task_id,
|
| 64 |
+
"seed": action.seed,
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def _parse_result(self, payload: Dict) -> StepResult[CloudQueueObservation]:
|
| 68 |
+
"""
|
| 69 |
+
Parse server response into StepResult[CloudQueueObservation].
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
payload: JSON response data from server
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
StepResult with CloudQueueObservation
|
| 76 |
+
"""
|
| 77 |
+
obs_data = payload.get("observation", {})
|
| 78 |
+
observation = CloudQueueObservation(
|
| 79 |
+
task_id=obs_data.get("task_id", "easy"),
|
| 80 |
+
sim_time=obs_data.get("sim_time", 0),
|
| 81 |
+
horizon=obs_data.get("horizon", 0),
|
| 82 |
+
queue_lengths=obs_data.get("queue_lengths", []),
|
| 83 |
+
queue_wait_ema=obs_data.get("queue_wait_ema", []),
|
| 84 |
+
server_busy=obs_data.get("server_busy", []),
|
| 85 |
+
server_remaining_service=obs_data.get("server_remaining_service", []),
|
| 86 |
+
utilization=obs_data.get("utilization", []),
|
| 87 |
+
incoming_job_present=obs_data.get("incoming_job_present", False),
|
| 88 |
+
incoming_job_size=obs_data.get("incoming_job_size", 0.0),
|
| 89 |
+
incoming_job_priority=obs_data.get("incoming_job_priority", 0),
|
| 90 |
+
incoming_job_deadline=obs_data.get("incoming_job_deadline", 0.0),
|
| 91 |
+
incoming_job_type=obs_data.get("incoming_job_type", 0),
|
| 92 |
+
sla_violation_rate=obs_data.get("sla_violation_rate", 0.0),
|
| 93 |
+
abandonment_rate=obs_data.get("abandonment_rate", 0.0),
|
| 94 |
+
throughput_recent=obs_data.get("throughput_recent", 0.0),
|
| 95 |
+
energy_cost_rate=obs_data.get("energy_cost_rate", 0.0),
|
| 96 |
+
level=obs_data.get("level", 1.0),
|
| 97 |
+
optional_history=obs_data.get("optional_history", []),
|
| 98 |
+
action_mask=obs_data.get("action_mask", []),
|
| 99 |
+
done=payload.get("done", False),
|
| 100 |
+
reward=payload.get("reward"),
|
| 101 |
+
metadata=obs_data.get("metadata", {}),
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
return StepResult(
|
| 105 |
+
observation=observation,
|
| 106 |
+
reward=payload.get("reward"),
|
| 107 |
+
done=payload.get("done", False),
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 111 |
+
"""
|
| 112 |
+
Parse server response into State object.
|
| 113 |
+
|
| 114 |
+
Args:
|
| 115 |
+
payload: JSON response from state request
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
State object with episode_id and step_count
|
| 119 |
+
"""
|
| 120 |
+
return State(
|
| 121 |
+
episode_id=payload.get("episode_id"),
|
| 122 |
+
step_count=payload.get("step_count", 0),
|
| 123 |
+
)
|
inference.py
CHANGED
|
@@ -1,4 +1,12 @@
|
|
| 1 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import csv
|
|
@@ -12,7 +20,7 @@ from urllib.parse import urlparse, urlunparse
|
|
| 12 |
from dotenv import load_dotenv
|
| 13 |
from openai import OpenAI
|
| 14 |
|
| 15 |
-
load_dotenv()
|
| 16 |
|
| 17 |
from cloud_queue_env import CloudQueueAction, CloudQueueEnv, CloudQueueObservation
|
| 18 |
|
|
@@ -20,10 +28,8 @@ from cloud_queue_env import CloudQueueAction, CloudQueueEnv, CloudQueueObservati
|
|
| 20 |
IMAGE_NAME = os.getenv("IMAGE_NAME")
|
| 21 |
BASE_URL = os.getenv("BASE_URL")
|
| 22 |
|
| 23 |
-
|
| 24 |
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 25 |
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
|
| 26 |
-
|
| 27 |
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
|
| 28 |
|
| 29 |
BENCHMARK = os.getenv("BENCHMARK", "queueops-openenv")
|
|
@@ -31,42 +37,43 @@ TASKS = ["easy", "medium", "hard"]
|
|
| 31 |
TASK_SEEDS_JSON = os.getenv("TASK_SEEDS_JSON")
|
| 32 |
SEEDS = [11, 23, 37]
|
| 33 |
TEMPERATURE = 0.2
|
| 34 |
-
MAX_TOKENS =
|
| 35 |
SUCCESS_SCORE_THRESHOLD = 0.60
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
MAX_STEPS_OVERRIDE = int(os.getenv("MAX_STEPS_OVERRIDE", "0") or "0")
|
| 39 |
ACTION_TRACE_FILE = os.getenv("ACTION_TRACE_FILE")
|
| 40 |
REPORT_JSON_PATH = os.getenv("REPORT_JSON_PATH")
|
| 41 |
REPORT_CSV_PATH = os.getenv("REPORT_CSV_PATH")
|
| 42 |
|
|
|
|
|
|
|
|
|
|
| 43 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 44 |
"""
|
| 45 |
You are an agent controlling a cloud queue scheduling environment.
|
| 46 |
Your goal: minimize wait times, SLA violations, and cost while maximizing throughput.
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
"""
|
| 67 |
).strip()
|
| 68 |
|
| 69 |
-
|
| 70 |
ACTION_TYPES = (
|
| 71 |
"configure_task",
|
| 72 |
"admit",
|
|
@@ -84,33 +91,11 @@ TASK_ALLOWED_ACTIONS = {
|
|
| 84 |
"hard": {"admit", "reject", "route", "dispatch", "reprioritize", "scale", "noop"},
|
| 85 |
}
|
| 86 |
|
| 87 |
-
MODEL_ACTION_RESPONSE_FORMAT = {
|
| 88 |
-
"type": "json_schema",
|
| 89 |
-
"json_schema": {
|
| 90 |
-
"name": "cloud_queue_action",
|
| 91 |
-
"strict": True,
|
| 92 |
-
"schema": {
|
| 93 |
-
"type": "object",
|
| 94 |
-
"additionalProperties": False,
|
| 95 |
-
"required": [
|
| 96 |
-
"action_type",
|
| 97 |
-
"target_queue",
|
| 98 |
-
"target_server",
|
| 99 |
-
"scale_delta",
|
| 100 |
-
"new_priority",
|
| 101 |
-
],
|
| 102 |
-
"properties": {
|
| 103 |
-
"action_type": {"type": "string", "enum": list(ACTION_TYPES)},
|
| 104 |
-
"target_queue": {"type": ["integer", "null"], "minimum": 0},
|
| 105 |
-
"target_server": {"type": ["integer", "null"], "minimum": 0},
|
| 106 |
-
"scale_delta": {"type": ["integer", "null"], "minimum": -2, "maximum": 2},
|
| 107 |
-
"new_priority": {"type": ["integer", "null"], "minimum": 0, "maximum": 3},
|
| 108 |
-
},
|
| 109 |
-
},
|
| 110 |
-
},
|
| 111 |
-
}
|
| 112 |
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def log_start(task: str, env: str, model: str) -> None:
|
|
@@ -142,8 +127,8 @@ def parse_task_seed_map() -> dict[str, list[int]]:
|
|
| 142 |
task_map[str(task_name)] = parsed
|
| 143 |
if task_map:
|
| 144 |
return task_map
|
| 145 |
-
except Exception
|
| 146 |
-
|
| 147 |
|
| 148 |
return {
|
| 149 |
"easy": [SEEDS[0]],
|
|
@@ -169,8 +154,7 @@ def load_replay_actions() -> dict[str, list[CloudQueueAction]]:
|
|
| 169 |
try:
|
| 170 |
with open(ACTION_TRACE_FILE, "r", encoding="utf-8") as f:
|
| 171 |
payload = json.load(f)
|
| 172 |
-
except Exception
|
| 173 |
-
print(f"[DEBUG] Failed to load ACTION_TRACE_FILE: {exc}", flush=True)
|
| 174 |
return {}
|
| 175 |
|
| 176 |
replay: dict[str, list[CloudQueueAction]] = {}
|
|
@@ -211,8 +195,8 @@ def write_reports(seed_rows: list[dict], task_score_table: dict[str, list[float]
|
|
| 211 |
try:
|
| 212 |
with open(REPORT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 213 |
json.dump(report_payload, f, indent=2)
|
| 214 |
-
except Exception
|
| 215 |
-
|
| 216 |
|
| 217 |
if REPORT_CSV_PATH:
|
| 218 |
try:
|
|
@@ -228,28 +212,25 @@ def write_reports(seed_rows: list[dict], task_score_table: dict[str, list[float]
|
|
| 228 |
"trace_digest",
|
| 229 |
"invalid_actions",
|
| 230 |
"harmful_scale_down",
|
|
|
|
| 231 |
],
|
| 232 |
)
|
| 233 |
writer.writeheader()
|
| 234 |
for row in seed_rows:
|
| 235 |
writer.writerow(row)
|
| 236 |
-
except Exception
|
| 237 |
-
|
| 238 |
|
| 239 |
|
| 240 |
def build_obs_summary(obs: CloudQueueObservation, task_name: str) -> str:
|
| 241 |
-
"""Build a rich, structured text summary of the observation for the LLM prompt."""
|
| 242 |
-
# Queue fill percentages — helps model know when to reject
|
| 243 |
max_sizes = {"easy": 28, "medium": 42, "hard": 64}
|
| 244 |
max_q = max_sizes.get(task_name, 30)
|
| 245 |
fills = [f"{l}/{max_q}({100*l//max_q}%)" for l in obs.queue_lengths]
|
| 246 |
|
| 247 |
-
# Server status
|
| 248 |
busy_count = sum(obs.server_busy)
|
| 249 |
total_servers = len(obs.server_busy)
|
| 250 |
servers_str = f"{busy_count}/{total_servers} busy"
|
| 251 |
|
| 252 |
-
# Incoming job info
|
| 253 |
if obs.incoming_job_present:
|
| 254 |
urgency = "URGENT" if obs.incoming_job_priority >= 2 else "normal"
|
| 255 |
incoming_str = f"YES [{urgency} size={obs.incoming_job_size:.1f} deadline={obs.incoming_job_deadline:.0f}]"
|
|
@@ -267,7 +248,7 @@ def build_obs_summary(obs: CloudQueueObservation, task_name: str) -> str:
|
|
| 267 |
)
|
| 268 |
|
| 269 |
|
| 270 |
-
def build_user_prompt(step: int, obs_summary: str, last_reward: float, history: List[str]
|
| 271 |
history_block = "\n".join(history[-4:]) if history else "None"
|
| 272 |
return textwrap.dedent(
|
| 273 |
f"""
|
|
@@ -280,16 +261,6 @@ def build_user_prompt(step: int, obs_summary: str, last_reward: float, history:
|
|
| 280 |
).strip()
|
| 281 |
|
| 282 |
|
| 283 |
-
def choose_heuristic_action(task_name: str, queue_lengths: List[int], incoming_present: bool) -> CloudQueueAction:
|
| 284 |
-
if incoming_present:
|
| 285 |
-
if task_name == "hard" and len(queue_lengths) > 1 and queue_lengths[0] > queue_lengths[1]:
|
| 286 |
-
return CloudQueueAction(action_type="route", target_queue=1)
|
| 287 |
-
if task_name == "medium" and len(queue_lengths) > 1 and queue_lengths[1] < queue_lengths[0]:
|
| 288 |
-
return CloudQueueAction(action_type="route", target_queue=1)
|
| 289 |
-
return CloudQueueAction(action_type="admit", target_queue=0)
|
| 290 |
-
return CloudQueueAction(action_type="dispatch", target_queue=0)
|
| 291 |
-
|
| 292 |
-
|
| 293 |
def _coerce_optional_int(value: Any) -> Optional[int]:
|
| 294 |
if value is None:
|
| 295 |
return None
|
|
@@ -318,7 +289,6 @@ def _extract_json_object(text: str) -> Optional[dict[str, Any]]:
|
|
| 318 |
if not cleaned:
|
| 319 |
return None
|
| 320 |
|
| 321 |
-
# Handle common fenced responses first.
|
| 322 |
if cleaned.startswith("```"):
|
| 323 |
chunks = [chunk.strip() for chunk in cleaned.split("```") if chunk.strip()]
|
| 324 |
for chunk in chunks:
|
|
@@ -343,7 +313,6 @@ def _extract_json_object(text: str) -> Optional[dict[str, Any]]:
|
|
| 343 |
except Exception:
|
| 344 |
pass
|
| 345 |
|
| 346 |
-
# Fallback: extract the first balanced JSON object from noisy text.
|
| 347 |
start = 0
|
| 348 |
while True:
|
| 349 |
open_idx = cleaned.find("{", start)
|
|
@@ -371,7 +340,6 @@ def _normalize_action_payload(data: dict[str, Any], task_name: str) -> Optional[
|
|
| 371 |
action_type = str(data.get("action_type", "noop")).strip().lower()
|
| 372 |
if action_type not in ACTION_TYPES:
|
| 373 |
return None
|
| 374 |
-
|
| 375 |
if action_type not in TASK_ALLOWED_ACTIONS.get(task_name, set(ACTION_TYPES)):
|
| 376 |
return None
|
| 377 |
|
|
@@ -412,17 +380,19 @@ def parse_model_action(text: str, task_name: str) -> Optional[CloudQueueAction]:
|
|
| 412 |
data = _extract_json_object(text)
|
| 413 |
if data is None:
|
| 414 |
return None
|
| 415 |
-
|
| 416 |
payload = _normalize_action_payload(data, task_name)
|
| 417 |
if payload is None:
|
| 418 |
return None
|
| 419 |
-
|
| 420 |
try:
|
| 421 |
return CloudQueueAction(**payload)
|
| 422 |
except Exception:
|
| 423 |
return None
|
| 424 |
|
| 425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
def get_model_action(
|
| 427 |
client: OpenAI,
|
| 428 |
task_name: str,
|
|
@@ -431,46 +401,20 @@ def get_model_action(
|
|
| 431 |
last_reward: float,
|
| 432 |
history: List[str],
|
| 433 |
) -> tuple[Optional[CloudQueueAction], Optional[str]]:
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
user_prompt = build_user_prompt(step, obs_summary, last_reward, history, task_name)
|
| 437 |
messages = [
|
| 438 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 439 |
{"role": "user", "content": user_prompt},
|
| 440 |
]
|
| 441 |
|
| 442 |
try:
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
stream=False,
|
| 451 |
-
response_format=MODEL_ACTION_RESPONSE_FORMAT,
|
| 452 |
-
)
|
| 453 |
-
except Exception as schema_exc:
|
| 454 |
-
_SCHEMA_RESPONSE_FORMAT_FAILED = True
|
| 455 |
-
print(
|
| 456 |
-
f"[DEBUG] response_format unavailable, retrying without schema: {schema_exc}",
|
| 457 |
-
flush=True,
|
| 458 |
-
)
|
| 459 |
-
completion = client.chat.completions.create(
|
| 460 |
-
model=MODEL_NAME,
|
| 461 |
-
messages=messages,
|
| 462 |
-
temperature=TEMPERATURE,
|
| 463 |
-
max_tokens=MAX_TOKENS,
|
| 464 |
-
stream=False,
|
| 465 |
-
)
|
| 466 |
-
else:
|
| 467 |
-
completion = client.chat.completions.create(
|
| 468 |
-
model=MODEL_NAME,
|
| 469 |
-
messages=messages,
|
| 470 |
-
temperature=TEMPERATURE,
|
| 471 |
-
max_tokens=MAX_TOKENS,
|
| 472 |
-
stream=False,
|
| 473 |
-
)
|
| 474 |
|
| 475 |
text = (completion.choices[0].message.content or "").strip()
|
| 476 |
action = parse_model_action(text, task_name)
|
|
@@ -479,65 +423,70 @@ def get_model_action(
|
|
| 479 |
return None, f"invalid_model_action_payload: {preview}"
|
| 480 |
return action, None
|
| 481 |
except Exception as exc:
|
| 482 |
-
print(f"[DEBUG] Model request failed: {exc}", flush=True)
|
| 483 |
return None, str(exc)
|
| 484 |
|
| 485 |
|
| 486 |
-
def
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
"""
|
| 492 |
if not base_url:
|
| 493 |
return base_url
|
| 494 |
|
| 495 |
cleaned = base_url.strip().rstrip("/")
|
| 496 |
parsed = urlparse(cleaned)
|
| 497 |
|
| 498 |
-
# Handle Hugging Face repo page URL -> runtime URL used by API/WebSocket.
|
| 499 |
if parsed.netloc.lower() == "huggingface.co":
|
| 500 |
parts = [p for p in parsed.path.strip("/").split("/") if p]
|
| 501 |
if len(parts) >= 3 and parts[0] == "spaces":
|
| 502 |
owner, space = parts[1], parts[2]
|
| 503 |
-
# HF runtime hostnames use lowercase and are TLS-safe.
|
| 504 |
owner = owner.lower().replace("_", "-")
|
| 505 |
space = space.lower().replace("_", "-")
|
| 506 |
return f"https://{owner}-{space}.hf.space"
|
| 507 |
|
| 508 |
-
# Avoid accidentally pointing at the web UI path.
|
| 509 |
if cleaned.endswith("/web"):
|
| 510 |
cleaned = cleaned[:-4]
|
| 511 |
parsed = urlparse(cleaned)
|
| 512 |
|
| 513 |
-
# HF runtime domains should be lowercase and avoid underscores for TLS host checks.
|
| 514 |
host = (parsed.hostname or "").lower()
|
| 515 |
if host.endswith(".hf.space"):
|
| 516 |
safe_host = host.replace("_", "-")
|
| 517 |
if safe_host != host or (parsed.netloc and parsed.netloc != parsed.netloc.lower()):
|
| 518 |
port_part = f":{parsed.port}" if parsed.port else ""
|
| 519 |
-
|
| 520 |
-
parsed = parsed._replace(netloc=netloc)
|
| 521 |
cleaned = urlunparse(parsed)
|
| 522 |
|
| 523 |
return cleaned
|
| 524 |
|
| 525 |
|
| 526 |
def _smoke_test_model(client: OpenAI) -> bool:
|
| 527 |
-
"""Verify the model API is reachable AND can generate a coherent response.
|
| 528 |
-
|
| 529 |
-
Asks a short queue-domain question that requires a real sentence answer.
|
| 530 |
-
An empty or missing reply is treated as failure — not just exceptions.
|
| 531 |
-
|
| 532 |
-
Prints [MODEL_OK] or [MODEL_FAIL] with details.
|
| 533 |
-
Returns True if the model is working, False otherwise.
|
| 534 |
-
"""
|
| 535 |
-
print(f"[MODEL_CHECK] Testing model={MODEL_NAME} at {API_BASE_URL} ...", flush=True)
|
| 536 |
test_question = (
|
| 537 |
"You are a cloud scheduling agent. "
|
| 538 |
"A job queue is 80% full and a new urgent job just arrived. "
|
| 539 |
"Should you admit the job, reject it, or route it to another queue? "
|
| 540 |
-
"Answer
|
| 541 |
)
|
| 542 |
try:
|
| 543 |
resp = client.chat.completions.create(
|
|
@@ -548,41 +497,29 @@ def _smoke_test_model(client: OpenAI) -> bool:
|
|
| 548 |
)
|
| 549 |
reply = (resp.choices[0].message.content or "").strip()
|
| 550 |
if not reply:
|
| 551 |
-
print("[MODEL_FAIL] Model returned an empty response.", flush=True)
|
| 552 |
-
print("[MODEL_FAIL] Will fall back to heuristic for all steps.", flush=True)
|
| 553 |
return False
|
| 554 |
-
print(f"[MODEL_OK] model is reasoning correctly.", flush=True)
|
| 555 |
-
print(f"[MODEL_OK] test reply: {reply}", flush=True)
|
| 556 |
return True
|
| 557 |
-
except Exception
|
| 558 |
-
print(f"[MODEL_FAIL] Cannot reach model: {exc}", flush=True)
|
| 559 |
-
print("[MODEL_FAIL] Will fall back to heuristic for all steps.", flush=True)
|
| 560 |
return False
|
| 561 |
|
| 562 |
|
| 563 |
async def main() -> None:
|
| 564 |
-
if not API_KEY
|
| 565 |
-
raise ValueError("API_KEY is required for model inference.")
|
| 566 |
|
| 567 |
-
client =
|
| 568 |
-
if not
|
| 569 |
-
|
| 570 |
-
runtime_base_url = normalize_base_url(BASE_URL)
|
| 571 |
|
|
|
|
| 572 |
if runtime_base_url:
|
| 573 |
env = CloudQueueEnv(base_url=runtime_base_url)
|
| 574 |
else:
|
| 575 |
if not IMAGE_NAME:
|
| 576 |
-
raise ValueError(
|
| 577 |
-
"Set BASE_URL for deployed env, or IMAGE_NAME for local docker env."
|
| 578 |
-
)
|
| 579 |
env = await CloudQueueEnv.from_docker_image(IMAGE_NAME)
|
| 580 |
|
| 581 |
try:
|
| 582 |
-
# Run smoke test before benchmark — confirms model API is reachable.
|
| 583 |
-
model_enabled = client is not None
|
| 584 |
-
if client is not None:
|
| 585 |
-
model_enabled = _smoke_test_model(client)
|
| 586 |
task_seed_map = parse_task_seed_map()
|
| 587 |
replay_map = load_replay_actions()
|
| 588 |
task_score_table: dict[str, list[float]] = {}
|
|
@@ -599,21 +536,24 @@ async def main() -> None:
|
|
| 599 |
history: List[str] = []
|
| 600 |
rewards: List[float] = []
|
| 601 |
steps_taken = 0
|
| 602 |
-
score =
|
| 603 |
success = False
|
|
|
|
| 604 |
|
| 605 |
log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
|
| 606 |
|
| 607 |
await env.reset()
|
| 608 |
-
await env.step(
|
| 609 |
-
CloudQueueAction(action_type="configure_task", task_id=task_name, seed=seed)
|
| 610 |
-
)
|
| 611 |
result = await env.reset()
|
|
|
|
| 612 |
last_reward = 0.0
|
| 613 |
max_steps = max(1, int(result.observation.horizon))
|
| 614 |
if MAX_STEPS_OVERRIDE > 0:
|
| 615 |
max_steps = min(max_steps, MAX_STEPS_OVERRIDE)
|
| 616 |
|
|
|
|
|
|
|
|
|
|
| 617 |
for step in range(1, max_steps + 1):
|
| 618 |
if result.done:
|
| 619 |
break
|
|
@@ -621,32 +561,33 @@ async def main() -> None:
|
|
| 621 |
obs = result.observation
|
| 622 |
obs_summary = build_obs_summary(obs, task_name)
|
| 623 |
|
| 624 |
-
action = None
|
| 625 |
-
model_error = None
|
| 626 |
-
|
| 627 |
-
replay_actions = replay_map.get(replay_key, [])
|
| 628 |
if step - 1 < len(replay_actions):
|
| 629 |
action = replay_actions[step - 1]
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
action, model_error = get_model_action(
|
| 633 |
client=client,
|
| 634 |
task_name=task_name,
|
| 635 |
step=step,
|
| 636 |
obs_summary=obs_summary,
|
| 637 |
last_reward=last_reward,
|
| 638 |
history=history,
|
|
|
|
| 639 |
)
|
| 640 |
-
if model_error and DISABLE_MODEL_ON_FIRST_ERROR:
|
| 641 |
-
model_enabled = False
|
| 642 |
-
print("[DEBUG] Disabling model calls and switching to heuristic fallback.", flush=True)
|
| 643 |
|
| 644 |
if action is None:
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
|
|
|
|
|
|
|
|
|
| 649 |
)
|
|
|
|
|
|
|
| 650 |
|
| 651 |
result = await env.step(action)
|
| 652 |
reward = float(result.reward or 0.0)
|
|
@@ -666,27 +607,23 @@ async def main() -> None:
|
|
| 666 |
f"d={action.scale_delta},p={action.new_priority})"
|
| 667 |
)
|
| 668 |
log_step(step=step, action=action_str, reward=reward, done=done, error=error)
|
| 669 |
-
|
| 670 |
history.append(f"step={step} action={action_str} reward={reward:.2f}")
|
| 671 |
|
| 672 |
if done:
|
| 673 |
break
|
| 674 |
|
| 675 |
-
if isinstance(result.observation.metadata, dict):
|
| 676 |
-
score = float(result.observation.metadata.get("episode_score",
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
flush=True,
|
| 686 |
-
)
|
| 687 |
-
score = max(0.0, min(1.0, score))
|
| 688 |
task_score_table[task_name].append(score)
|
| 689 |
-
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 690 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 691 |
|
| 692 |
meta = result.observation.metadata or {}
|
|
@@ -694,29 +631,20 @@ async def main() -> None:
|
|
| 694 |
seed_row = {
|
| 695 |
"task": task_name,
|
| 696 |
"seed": int(seed),
|
| 697 |
-
"score":
|
| 698 |
"steps": int(steps_taken),
|
| 699 |
"success": bool(success),
|
| 700 |
"trace_digest": str(meta.get("trace_digest", "")),
|
| 701 |
"invalid_actions": float(metrics.get("invalid_actions", 0.0)),
|
| 702 |
"harmful_scale_down": float(metrics.get("harmful_scale_down", 0.0)),
|
|
|
|
| 703 |
}
|
| 704 |
seed_rows.append(seed_row)
|
| 705 |
-
print(
|
| 706 |
-
"[REPORT_SEED] "
|
| 707 |
-
f"task={seed_row['task']} seed={seed_row['seed']} score={seed_row['score']:.3f} "
|
| 708 |
-
f"steps={seed_row['steps']} trace={seed_row['trace_digest']}",
|
| 709 |
-
flush=True,
|
| 710 |
-
)
|
| 711 |
|
| 712 |
task_scores = task_score_table[task_name]
|
| 713 |
-
task_mean = statistics.mean(task_scores) if task_scores else
|
| 714 |
task_std = statistics.pstdev(task_scores) if len(task_scores) > 1 else 0.0
|
| 715 |
task_ci = ci95(task_scores)
|
| 716 |
-
print(
|
| 717 |
-
f"[REPORT] task={task_name} seeds={len(task_scores)} mean={task_mean:.3f} std={task_std:.3f} ci95={task_ci:.3f}",
|
| 718 |
-
flush=True,
|
| 719 |
-
)
|
| 720 |
|
| 721 |
all_task_means = []
|
| 722 |
for task_name in TASKS:
|
|
@@ -725,23 +653,18 @@ async def main() -> None:
|
|
| 725 |
all_task_means.append(statistics.mean(scores))
|
| 726 |
|
| 727 |
if all_task_means:
|
| 728 |
-
final_score = sum(all_task_means) / len(all_task_means)
|
| 729 |
-
easy_mean = statistics.mean(task_score_table.get("easy", [
|
| 730 |
-
medium_mean = statistics.mean(task_score_table.get("medium", [
|
| 731 |
-
hard_mean = statistics.mean(task_score_table.get("hard", [
|
| 732 |
-
print(
|
| 733 |
-
f"[SUMMARY] easy={easy_mean:.3f} medium={medium_mean:.3f} hard={hard_mean:.3f} final={final_score:.3f}",
|
| 734 |
-
flush=True,
|
| 735 |
-
)
|
| 736 |
-
|
| 737 |
write_reports(seed_rows=seed_rows, task_score_table=task_score_table)
|
| 738 |
|
| 739 |
finally:
|
| 740 |
try:
|
| 741 |
await env.close()
|
| 742 |
-
except Exception
|
| 743 |
-
|
| 744 |
|
| 745 |
|
| 746 |
if __name__ == "__main__":
|
| 747 |
-
asyncio.run(main())
|
|
|
|
| 1 |
+
"""Strict model-only inference runner for the queue operations benchmark.
|
| 2 |
+
|
| 3 |
+
This variant intentionally removes heuristic fallback paths.
|
| 4 |
+
Every decision must come from either:
|
| 5 |
+
1) replay trace input (ACTION_TRACE_FILE), or
|
| 6 |
+
2) model output.
|
| 7 |
+
|
| 8 |
+
If model output is invalid/unavailable, the seed run is marked failed.
|
| 9 |
+
"""
|
| 10 |
|
| 11 |
import asyncio
|
| 12 |
import csv
|
|
|
|
| 20 |
from dotenv import load_dotenv
|
| 21 |
from openai import OpenAI
|
| 22 |
|
| 23 |
+
load_dotenv()
|
| 24 |
|
| 25 |
from cloud_queue_env import CloudQueueAction, CloudQueueEnv, CloudQueueObservation
|
| 26 |
|
|
|
|
| 28 |
IMAGE_NAME = os.getenv("IMAGE_NAME")
|
| 29 |
BASE_URL = os.getenv("BASE_URL")
|
| 30 |
|
|
|
|
| 31 |
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 32 |
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
| 33 |
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
|
| 34 |
|
| 35 |
BENCHMARK = os.getenv("BENCHMARK", "queueops-openenv")
|
|
|
|
| 37 |
TASK_SEEDS_JSON = os.getenv("TASK_SEEDS_JSON")
|
| 38 |
SEEDS = [11, 23, 37]
|
| 39 |
TEMPERATURE = 0.2
|
| 40 |
+
MAX_TOKENS = 780
|
| 41 |
SUCCESS_SCORE_THRESHOLD = 0.60
|
| 42 |
+
# Test-friendly default. Set MAX_STEPS_OVERRIDE=0 for full horizon.
|
| 43 |
+
MAX_STEPS_OVERRIDE = int(os.getenv("MAX_STEPS_OVERRIDE", "8") or "8")
|
|
|
|
| 44 |
ACTION_TRACE_FILE = os.getenv("ACTION_TRACE_FILE")
|
| 45 |
REPORT_JSON_PATH = os.getenv("REPORT_JSON_PATH")
|
| 46 |
REPORT_CSV_PATH = os.getenv("REPORT_CSV_PATH")
|
| 47 |
|
| 48 |
+
OPEN_SCORE_MIN = 0.001
|
| 49 |
+
OPEN_SCORE_MAX = 0.999
|
| 50 |
+
|
| 51 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 52 |
"""
|
| 53 |
You are an agent controlling a cloud queue scheduling environment.
|
| 54 |
Your goal: minimize wait times, SLA violations, and cost while maximizing throughput.
|
| 55 |
|
| 56 |
+
OUTPUT FORMAT (strict):
|
| 57 |
+
- Return exactly one JSON object.
|
| 58 |
+
- No markdown, no code fences, no explanations, no extra keys.
|
| 59 |
+
- Always include all fields below.
|
| 60 |
+
|
| 61 |
+
Required JSON schema:
|
| 62 |
+
{
|
| 63 |
+
"action_type": "admit|reject|route|dispatch|scale|reprioritize|noop",
|
| 64 |
+
"target_queue": integer or null,
|
| 65 |
+
"target_server": integer or null,
|
| 66 |
+
"scale_delta": integer or null,
|
| 67 |
+
"new_priority": integer or null
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
Task constraints:
|
| 71 |
+
- easy: only admit/reject/dispatch/noop
|
| 72 |
+
- medium: only admit/reject/route/dispatch/reprioritize/noop
|
| 73 |
+
- hard: only admit/reject/route/dispatch/reprioritize/scale/noop
|
| 74 |
"""
|
| 75 |
).strip()
|
| 76 |
|
|
|
|
| 77 |
ACTION_TYPES = (
|
| 78 |
"configure_task",
|
| 79 |
"admit",
|
|
|
|
| 91 |
"hard": {"admit", "reject", "route", "dispatch", "reprioritize", "scale", "noop"},
|
| 92 |
}
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
def clamp_open_score(value: float) -> float:
|
| 96 |
+
if not isinstance(value, (int, float)) or not (value == value):
|
| 97 |
+
return OPEN_SCORE_MIN
|
| 98 |
+
return max(OPEN_SCORE_MIN, min(OPEN_SCORE_MAX, float(value)))
|
| 99 |
|
| 100 |
|
| 101 |
def log_start(task: str, env: str, model: str) -> None:
|
|
|
|
| 127 |
task_map[str(task_name)] = parsed
|
| 128 |
if task_map:
|
| 129 |
return task_map
|
| 130 |
+
except Exception:
|
| 131 |
+
pass
|
| 132 |
|
| 133 |
return {
|
| 134 |
"easy": [SEEDS[0]],
|
|
|
|
| 154 |
try:
|
| 155 |
with open(ACTION_TRACE_FILE, "r", encoding="utf-8") as f:
|
| 156 |
payload = json.load(f)
|
| 157 |
+
except Exception:
|
|
|
|
| 158 |
return {}
|
| 159 |
|
| 160 |
replay: dict[str, list[CloudQueueAction]] = {}
|
|
|
|
| 195 |
try:
|
| 196 |
with open(REPORT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 197 |
json.dump(report_payload, f, indent=2)
|
| 198 |
+
except Exception:
|
| 199 |
+
pass
|
| 200 |
|
| 201 |
if REPORT_CSV_PATH:
|
| 202 |
try:
|
|
|
|
| 212 |
"trace_digest",
|
| 213 |
"invalid_actions",
|
| 214 |
"harmful_scale_down",
|
| 215 |
+
"failure_reason",
|
| 216 |
],
|
| 217 |
)
|
| 218 |
writer.writeheader()
|
| 219 |
for row in seed_rows:
|
| 220 |
writer.writerow(row)
|
| 221 |
+
except Exception:
|
| 222 |
+
pass
|
| 223 |
|
| 224 |
|
| 225 |
def build_obs_summary(obs: CloudQueueObservation, task_name: str) -> str:
|
|
|
|
|
|
|
| 226 |
max_sizes = {"easy": 28, "medium": 42, "hard": 64}
|
| 227 |
max_q = max_sizes.get(task_name, 30)
|
| 228 |
fills = [f"{l}/{max_q}({100*l//max_q}%)" for l in obs.queue_lengths]
|
| 229 |
|
|
|
|
| 230 |
busy_count = sum(obs.server_busy)
|
| 231 |
total_servers = len(obs.server_busy)
|
| 232 |
servers_str = f"{busy_count}/{total_servers} busy"
|
| 233 |
|
|
|
|
| 234 |
if obs.incoming_job_present:
|
| 235 |
urgency = "URGENT" if obs.incoming_job_priority >= 2 else "normal"
|
| 236 |
incoming_str = f"YES [{urgency} size={obs.incoming_job_size:.1f} deadline={obs.incoming_job_deadline:.0f}]"
|
|
|
|
| 248 |
)
|
| 249 |
|
| 250 |
|
| 251 |
+
def build_user_prompt(step: int, obs_summary: str, last_reward: float, history: List[str]) -> str:
|
| 252 |
history_block = "\n".join(history[-4:]) if history else "None"
|
| 253 |
return textwrap.dedent(
|
| 254 |
f"""
|
|
|
|
| 261 |
).strip()
|
| 262 |
|
| 263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
def _coerce_optional_int(value: Any) -> Optional[int]:
|
| 265 |
if value is None:
|
| 266 |
return None
|
|
|
|
| 289 |
if not cleaned:
|
| 290 |
return None
|
| 291 |
|
|
|
|
| 292 |
if cleaned.startswith("```"):
|
| 293 |
chunks = [chunk.strip() for chunk in cleaned.split("```") if chunk.strip()]
|
| 294 |
for chunk in chunks:
|
|
|
|
| 313 |
except Exception:
|
| 314 |
pass
|
| 315 |
|
|
|
|
| 316 |
start = 0
|
| 317 |
while True:
|
| 318 |
open_idx = cleaned.find("{", start)
|
|
|
|
| 340 |
action_type = str(data.get("action_type", "noop")).strip().lower()
|
| 341 |
if action_type not in ACTION_TYPES:
|
| 342 |
return None
|
|
|
|
| 343 |
if action_type not in TASK_ALLOWED_ACTIONS.get(task_name, set(ACTION_TYPES)):
|
| 344 |
return None
|
| 345 |
|
|
|
|
| 380 |
data = _extract_json_object(text)
|
| 381 |
if data is None:
|
| 382 |
return None
|
|
|
|
| 383 |
payload = _normalize_action_payload(data, task_name)
|
| 384 |
if payload is None:
|
| 385 |
return None
|
|
|
|
| 386 |
try:
|
| 387 |
return CloudQueueAction(**payload)
|
| 388 |
except Exception:
|
| 389 |
return None
|
| 390 |
|
| 391 |
|
| 392 |
+
def _single_line(text: str) -> str:
|
| 393 |
+
return " ".join((text or "").split())
|
| 394 |
+
|
| 395 |
+
|
| 396 |
def get_model_action(
|
| 397 |
client: OpenAI,
|
| 398 |
task_name: str,
|
|
|
|
| 401 |
last_reward: float,
|
| 402 |
history: List[str],
|
| 403 |
) -> tuple[Optional[CloudQueueAction], Optional[str]]:
|
| 404 |
+
user_prompt = build_user_prompt(step, obs_summary, last_reward, history)
|
|
|
|
|
|
|
| 405 |
messages = [
|
| 406 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 407 |
{"role": "user", "content": user_prompt},
|
| 408 |
]
|
| 409 |
|
| 410 |
try:
|
| 411 |
+
completion = client.chat.completions.create(
|
| 412 |
+
model=MODEL_NAME,
|
| 413 |
+
messages=messages,
|
| 414 |
+
temperature=TEMPERATURE,
|
| 415 |
+
max_tokens=MAX_TOKENS,
|
| 416 |
+
stream=False,
|
| 417 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
text = (completion.choices[0].message.content or "").strip()
|
| 420 |
action = parse_model_action(text, task_name)
|
|
|
|
| 423 |
return None, f"invalid_model_action_payload: {preview}"
|
| 424 |
return action, None
|
| 425 |
except Exception as exc:
|
|
|
|
| 426 |
return None, str(exc)
|
| 427 |
|
| 428 |
|
| 429 |
+
def get_model_action_with_retry(
|
| 430 |
+
client: OpenAI,
|
| 431 |
+
task_name: str,
|
| 432 |
+
step: int,
|
| 433 |
+
obs_summary: str,
|
| 434 |
+
last_reward: float,
|
| 435 |
+
history: List[str],
|
| 436 |
+
retries: int = 2,
|
| 437 |
+
) -> tuple[Optional[CloudQueueAction], Optional[str]]:
|
| 438 |
+
last_error: Optional[str] = None
|
| 439 |
+
for attempt in range(1, retries + 2):
|
| 440 |
+
action, error = get_model_action(
|
| 441 |
+
client=client,
|
| 442 |
+
task_name=task_name,
|
| 443 |
+
step=step,
|
| 444 |
+
obs_summary=obs_summary,
|
| 445 |
+
last_reward=last_reward,
|
| 446 |
+
history=history,
|
| 447 |
+
)
|
| 448 |
+
if action is not None:
|
| 449 |
+
return action, None
|
| 450 |
+
last_error = error
|
| 451 |
+
return None, last_error
|
| 452 |
|
| 453 |
+
|
| 454 |
+
def normalize_base_url(base_url: Optional[str]) -> Optional[str]:
|
|
|
|
| 455 |
if not base_url:
|
| 456 |
return base_url
|
| 457 |
|
| 458 |
cleaned = base_url.strip().rstrip("/")
|
| 459 |
parsed = urlparse(cleaned)
|
| 460 |
|
|
|
|
| 461 |
if parsed.netloc.lower() == "huggingface.co":
|
| 462 |
parts = [p for p in parsed.path.strip("/").split("/") if p]
|
| 463 |
if len(parts) >= 3 and parts[0] == "spaces":
|
| 464 |
owner, space = parts[1], parts[2]
|
|
|
|
| 465 |
owner = owner.lower().replace("_", "-")
|
| 466 |
space = space.lower().replace("_", "-")
|
| 467 |
return f"https://{owner}-{space}.hf.space"
|
| 468 |
|
|
|
|
| 469 |
if cleaned.endswith("/web"):
|
| 470 |
cleaned = cleaned[:-4]
|
| 471 |
parsed = urlparse(cleaned)
|
| 472 |
|
|
|
|
| 473 |
host = (parsed.hostname or "").lower()
|
| 474 |
if host.endswith(".hf.space"):
|
| 475 |
safe_host = host.replace("_", "-")
|
| 476 |
if safe_host != host or (parsed.netloc and parsed.netloc != parsed.netloc.lower()):
|
| 477 |
port_part = f":{parsed.port}" if parsed.port else ""
|
| 478 |
+
parsed = parsed._replace(netloc=f"{safe_host}{port_part}")
|
|
|
|
| 479 |
cleaned = urlunparse(parsed)
|
| 480 |
|
| 481 |
return cleaned
|
| 482 |
|
| 483 |
|
| 484 |
def _smoke_test_model(client: OpenAI) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
test_question = (
|
| 486 |
"You are a cloud scheduling agent. "
|
| 487 |
"A job queue is 80% full and a new urgent job just arrived. "
|
| 488 |
"Should you admit the job, reject it, or route it to another queue? "
|
| 489 |
+
"Answer with exactly one JSON object containing action_type and optional fields."
|
| 490 |
)
|
| 491 |
try:
|
| 492 |
resp = client.chat.completions.create(
|
|
|
|
| 497 |
)
|
| 498 |
reply = (resp.choices[0].message.content or "").strip()
|
| 499 |
if not reply:
|
|
|
|
|
|
|
| 500 |
return False
|
|
|
|
|
|
|
| 501 |
return True
|
| 502 |
+
except Exception:
|
|
|
|
|
|
|
| 503 |
return False
|
| 504 |
|
| 505 |
|
| 506 |
async def main() -> None:
|
| 507 |
+
if not API_KEY:
|
| 508 |
+
raise ValueError("API_KEY or HF_TOKEN is required for strict model inference.")
|
| 509 |
|
| 510 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 511 |
+
if not _smoke_test_model(client):
|
| 512 |
+
raise RuntimeError("Model smoke test failed. Aborting strict model-only run.")
|
|
|
|
| 513 |
|
| 514 |
+
runtime_base_url = normalize_base_url(BASE_URL)
|
| 515 |
if runtime_base_url:
|
| 516 |
env = CloudQueueEnv(base_url=runtime_base_url)
|
| 517 |
else:
|
| 518 |
if not IMAGE_NAME:
|
| 519 |
+
raise ValueError("Set BASE_URL for deployed env, or IMAGE_NAME for local docker env.")
|
|
|
|
|
|
|
| 520 |
env = await CloudQueueEnv.from_docker_image(IMAGE_NAME)
|
| 521 |
|
| 522 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
task_seed_map = parse_task_seed_map()
|
| 524 |
replay_map = load_replay_actions()
|
| 525 |
task_score_table: dict[str, list[float]] = {}
|
|
|
|
| 536 |
history: List[str] = []
|
| 537 |
rewards: List[float] = []
|
| 538 |
steps_taken = 0
|
| 539 |
+
score = OPEN_SCORE_MIN
|
| 540 |
success = False
|
| 541 |
+
failure_reason: Optional[str] = None
|
| 542 |
|
| 543 |
log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
|
| 544 |
|
| 545 |
await env.reset()
|
| 546 |
+
await env.step(CloudQueueAction(action_type="configure_task", task_id=task_name, seed=seed))
|
|
|
|
|
|
|
| 547 |
result = await env.reset()
|
| 548 |
+
|
| 549 |
last_reward = 0.0
|
| 550 |
max_steps = max(1, int(result.observation.horizon))
|
| 551 |
if MAX_STEPS_OVERRIDE > 0:
|
| 552 |
max_steps = min(max_steps, MAX_STEPS_OVERRIDE)
|
| 553 |
|
| 554 |
+
replay_key = f"{task_name}:{seed}"
|
| 555 |
+
replay_actions = replay_map.get(replay_key, [])
|
| 556 |
+
|
| 557 |
for step in range(1, max_steps + 1):
|
| 558 |
if result.done:
|
| 559 |
break
|
|
|
|
| 561 |
obs = result.observation
|
| 562 |
obs_summary = build_obs_summary(obs, task_name)
|
| 563 |
|
| 564 |
+
action: Optional[CloudQueueAction] = None
|
| 565 |
+
model_error: Optional[str] = None
|
| 566 |
+
|
|
|
|
| 567 |
if step - 1 < len(replay_actions):
|
| 568 |
action = replay_actions[step - 1]
|
| 569 |
+
else:
|
| 570 |
+
action, model_error = get_model_action_with_retry(
|
|
|
|
| 571 |
client=client,
|
| 572 |
task_name=task_name,
|
| 573 |
step=step,
|
| 574 |
obs_summary=obs_summary,
|
| 575 |
last_reward=last_reward,
|
| 576 |
history=history,
|
| 577 |
+
retries=2,
|
| 578 |
)
|
|
|
|
|
|
|
|
|
|
| 579 |
|
| 580 |
if action is None:
|
| 581 |
+
failure_reason = f"model_action_unavailable: {model_error}"
|
| 582 |
+
log_step(
|
| 583 |
+
step=step,
|
| 584 |
+
action="model_action_error",
|
| 585 |
+
reward=0.0,
|
| 586 |
+
done=True,
|
| 587 |
+
error=failure_reason,
|
| 588 |
)
|
| 589 |
+
steps_taken = step
|
| 590 |
+
break
|
| 591 |
|
| 592 |
result = await env.step(action)
|
| 593 |
reward = float(result.reward or 0.0)
|
|
|
|
| 607 |
f"d={action.scale_delta},p={action.new_priority})"
|
| 608 |
)
|
| 609 |
log_step(step=step, action=action_str, reward=reward, done=done, error=error)
|
|
|
|
| 610 |
history.append(f"step={step} action={action_str} reward={reward:.2f}")
|
| 611 |
|
| 612 |
if done:
|
| 613 |
break
|
| 614 |
|
| 615 |
+
if failure_reason is None and isinstance(result.observation.metadata, dict):
|
| 616 |
+
score = float(result.observation.metadata.get("episode_score", OPEN_SCORE_MIN) or OPEN_SCORE_MIN)
|
| 617 |
+
elif failure_reason is not None:
|
| 618 |
+
score = OPEN_SCORE_MIN
|
| 619 |
+
|
| 620 |
+
if failure_reason is None and not bool(result.done):
|
| 621 |
+
failure_reason = "episode_not_done_within_max_steps"
|
| 622 |
+
score = OPEN_SCORE_MIN
|
| 623 |
+
|
| 624 |
+
score = clamp_open_score(score)
|
|
|
|
|
|
|
|
|
|
| 625 |
task_score_table[task_name].append(score)
|
| 626 |
+
success = failure_reason is None and score >= SUCCESS_SCORE_THRESHOLD
|
| 627 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 628 |
|
| 629 |
meta = result.observation.metadata or {}
|
|
|
|
| 631 |
seed_row = {
|
| 632 |
"task": task_name,
|
| 633 |
"seed": int(seed),
|
| 634 |
+
"score": score,
|
| 635 |
"steps": int(steps_taken),
|
| 636 |
"success": bool(success),
|
| 637 |
"trace_digest": str(meta.get("trace_digest", "")),
|
| 638 |
"invalid_actions": float(metrics.get("invalid_actions", 0.0)),
|
| 639 |
"harmful_scale_down": float(metrics.get("harmful_scale_down", 0.0)),
|
| 640 |
+
"failure_reason": failure_reason or "",
|
| 641 |
}
|
| 642 |
seed_rows.append(seed_row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
|
| 644 |
task_scores = task_score_table[task_name]
|
| 645 |
+
task_mean = statistics.mean(task_scores) if task_scores else OPEN_SCORE_MIN
|
| 646 |
task_std = statistics.pstdev(task_scores) if len(task_scores) > 1 else 0.0
|
| 647 |
task_ci = ci95(task_scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
|
| 649 |
all_task_means = []
|
| 650 |
for task_name in TASKS:
|
|
|
|
| 653 |
all_task_means.append(statistics.mean(scores))
|
| 654 |
|
| 655 |
if all_task_means:
|
| 656 |
+
final_score = clamp_open_score(sum(all_task_means) / len(all_task_means))
|
| 657 |
+
easy_mean = clamp_open_score(statistics.mean(task_score_table.get("easy", [OPEN_SCORE_MIN])))
|
| 658 |
+
medium_mean = clamp_open_score(statistics.mean(task_score_table.get("medium", [OPEN_SCORE_MIN])))
|
| 659 |
+
hard_mean = clamp_open_score(statistics.mean(task_score_table.get("hard", [OPEN_SCORE_MIN])))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
write_reports(seed_rows=seed_rows, task_score_table=task_score_table)
|
| 661 |
|
| 662 |
finally:
|
| 663 |
try:
|
| 664 |
await env.close()
|
| 665 |
+
except Exception:
|
| 666 |
+
pass
|
| 667 |
|
| 668 |
|
| 669 |
if __name__ == "__main__":
|
| 670 |
+
asyncio.run(main())
|
models.py
CHANGED
|
@@ -1,55 +1,55 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""Data models for the Cloud Queue Env queue operations environment."""
|
| 8 |
-
|
| 9 |
-
from openenv.core.env_server.types import Action, Observation
|
| 10 |
-
from pydantic import Field
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class CloudQueueAction(Action):
|
| 14 |
-
"""Action model for queue control decisions."""
|
| 15 |
-
|
| 16 |
-
action_type: str = Field(
|
| 17 |
-
default="noop",
|
| 18 |
-
description=(
|
| 19 |
-
"One of: configure_task, admit, reject, route, dispatch, scale, reprioritize, noop"
|
| 20 |
-
),
|
| 21 |
-
)
|
| 22 |
-
target_queue: int | None = Field(default=None, description="Queue index for route/dispatch")
|
| 23 |
-
target_server: int | None = Field(default=None, description="Server index for dispatch")
|
| 24 |
-
scale_delta: int | None = Field(default=None, description="Server pool scale delta for scale action")
|
| 25 |
-
new_priority: int | None = Field(default=None, description="Updated priority for reprioritize action")
|
| 26 |
-
task_id: str | None = Field(default=None, description="Task selector: easy, medium, or hard")
|
| 27 |
-
seed: int | None = Field(default=None, description="Deterministic seed for upcoming reset")
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
class CloudQueueObservation(Observation):
|
| 31 |
-
"""Observation model exposing queue system state to the agent."""
|
| 32 |
-
|
| 33 |
-
task_id: str = Field(default="easy", description="Active benchmark task")
|
| 34 |
-
sim_time: int = Field(default=0, description="Discrete simulation time step")
|
| 35 |
-
horizon: int = Field(default=0, description="Episode horizon")
|
| 36 |
-
queue_lengths: list[int] = Field(default_factory=list, description="Length per queue")
|
| 37 |
-
queue_wait_ema: list[float] = Field(default_factory=list, description="EMA wait time per queue")
|
| 38 |
-
server_busy: list[int] = Field(default_factory=list, description="1 if server is busy, else 0")
|
| 39 |
-
server_remaining_service: list[float] = Field(
|
| 40 |
-
default_factory=list,
|
| 41 |
-
description="Remaining service time per server",
|
| 42 |
-
)
|
| 43 |
-
utilization: list[float] = Field(default_factory=list, description="Rolling utilization by server")
|
| 44 |
-
incoming_job_present: bool = Field(default=False, description="Whether a new job is waiting for admission")
|
| 45 |
-
incoming_job_size: float = Field(default=0.0, description="Incoming job estimated size")
|
| 46 |
-
incoming_job_priority: int = Field(default=0, description="Incoming job priority")
|
| 47 |
-
incoming_job_deadline: float = Field(default=0.0, description="Incoming job deadline")
|
| 48 |
-
incoming_job_type: int = Field(default=0, description="Incoming job class/type id")
|
| 49 |
-
sla_violation_rate: float = Field(default=0.0, description="Running SLA violation rate")
|
| 50 |
-
abandonment_rate: float = Field(default=0.0, description="Running abandonment rate")
|
| 51 |
-
throughput_recent: float = Field(default=0.0, description="Completed jobs in current step")
|
| 52 |
-
energy_cost_rate: float = Field(default=0.0, description="Current infrastructure cost rate")
|
| 53 |
-
level: float = Field(default=1.0, description="Difficulty level scalar")
|
| 54 |
-
optional_history: list[float] = Field(default_factory=list, description="Compact recent context")
|
| 55 |
-
action_mask: list[int] = Field(default_factory=list, description="Optional valid action hints")
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Data models for the Cloud Queue Env queue operations environment."""
|
| 8 |
+
|
| 9 |
+
from openenv.core.env_server.types import Action, Observation
|
| 10 |
+
from pydantic import Field
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class CloudQueueAction(Action):
|
| 14 |
+
"""Action model for queue control decisions."""
|
| 15 |
+
|
| 16 |
+
action_type: str = Field(
|
| 17 |
+
default="noop",
|
| 18 |
+
description=(
|
| 19 |
+
"One of: configure_task, admit, reject, route, dispatch, scale, reprioritize, noop"
|
| 20 |
+
),
|
| 21 |
+
)
|
| 22 |
+
target_queue: int | None = Field(default=None, description="Queue index for route/dispatch")
|
| 23 |
+
target_server: int | None = Field(default=None, description="Server index for dispatch")
|
| 24 |
+
scale_delta: int | None = Field(default=None, description="Server pool scale delta for scale action")
|
| 25 |
+
new_priority: int | None = Field(default=None, description="Updated priority for reprioritize action")
|
| 26 |
+
task_id: str | None = Field(default=None, description="Task selector: easy, medium, or hard")
|
| 27 |
+
seed: int | None = Field(default=None, description="Deterministic seed for upcoming reset")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class CloudQueueObservation(Observation):
|
| 31 |
+
"""Observation model exposing queue system state to the agent."""
|
| 32 |
+
|
| 33 |
+
task_id: str = Field(default="easy", description="Active benchmark task")
|
| 34 |
+
sim_time: int = Field(default=0, description="Discrete simulation time step")
|
| 35 |
+
horizon: int = Field(default=0, description="Episode horizon")
|
| 36 |
+
queue_lengths: list[int] = Field(default_factory=list, description="Length per queue")
|
| 37 |
+
queue_wait_ema: list[float] = Field(default_factory=list, description="EMA wait time per queue")
|
| 38 |
+
server_busy: list[int] = Field(default_factory=list, description="1 if server is busy, else 0")
|
| 39 |
+
server_remaining_service: list[float] = Field(
|
| 40 |
+
default_factory=list,
|
| 41 |
+
description="Remaining service time per server",
|
| 42 |
+
)
|
| 43 |
+
utilization: list[float] = Field(default_factory=list, description="Rolling utilization by server")
|
| 44 |
+
incoming_job_present: bool = Field(default=False, description="Whether a new job is waiting for admission")
|
| 45 |
+
incoming_job_size: float = Field(default=0.0, description="Incoming job estimated size")
|
| 46 |
+
incoming_job_priority: int = Field(default=0, description="Incoming job priority")
|
| 47 |
+
incoming_job_deadline: float = Field(default=0.0, description="Incoming job deadline")
|
| 48 |
+
incoming_job_type: int = Field(default=0, description="Incoming job class/type id")
|
| 49 |
+
sla_violation_rate: float = Field(default=0.0, description="Running SLA violation rate")
|
| 50 |
+
abandonment_rate: float = Field(default=0.0, description="Running abandonment rate")
|
| 51 |
+
throughput_recent: float = Field(default=0.0, description="Completed jobs in current step")
|
| 52 |
+
energy_cost_rate: float = Field(default=0.0, description="Current infrastructure cost rate")
|
| 53 |
+
level: float = Field(default=1.0, description="Difficulty level scalar")
|
| 54 |
+
optional_history: list[float] = Field(default_factory=list, description="Compact recent context")
|
| 55 |
+
action_mask: list[int] = Field(default_factory=list, description="Optional valid action hints")
|
openenv.yaml
CHANGED
|
@@ -1,30 +1,30 @@
|
|
| 1 |
-
spec_version: 1
|
| 2 |
-
name: cloud_queue_env
|
| 3 |
-
type: space
|
| 4 |
-
runtime: fastapi
|
| 5 |
-
app: server.app:app
|
| 6 |
-
port: 8000
|
| 7 |
-
|
| 8 |
-
metadata:
|
| 9 |
-
description: >
|
| 10 |
-
A real-world queueing control environment where an agent manages
|
| 11 |
-
cloud request scheduling decisions — admission control, routing,
|
| 12 |
-
dispatching, and dynamic server scaling — under stochastic arrivals
|
| 13 |
-
and service times. Optimizes latency, throughput, SLA compliance,
|
| 14 |
-
fairness, and infrastructure cost across three benchmark tasks
|
| 15 |
-
(Easy / Medium / Hard) with deterministic graders scored in (0, 1).
|
| 16 |
-
tags:
|
| 17 |
-
- openenv
|
| 18 |
-
- reinforcement-learning
|
| 19 |
-
- queueing
|
| 20 |
-
- scheduling
|
| 21 |
-
- cloud-operations
|
| 22 |
-
- multi-objective
|
| 23 |
-
- llm-agent
|
| 24 |
-
difficulty: easy-to-hard
|
| 25 |
-
tasks:
|
| 26 |
-
- easy
|
| 27 |
-
- medium
|
| 28 |
-
- hard
|
| 29 |
-
author: Mrkumar007
|
| 30 |
-
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: cloud_queue_env
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
| 8 |
+
metadata:
|
| 9 |
+
description: >
|
| 10 |
+
A real-world queueing control environment where an agent manages
|
| 11 |
+
cloud request scheduling decisions — admission control, routing,
|
| 12 |
+
dispatching, and dynamic server scaling — under stochastic arrivals
|
| 13 |
+
and service times. Optimizes latency, throughput, SLA compliance,
|
| 14 |
+
fairness, and infrastructure cost across three benchmark tasks
|
| 15 |
+
(Easy / Medium / Hard) with deterministic graders scored in (0, 1).
|
| 16 |
+
tags:
|
| 17 |
+
- openenv
|
| 18 |
+
- reinforcement-learning
|
| 19 |
+
- queueing
|
| 20 |
+
- scheduling
|
| 21 |
+
- cloud-operations
|
| 22 |
+
- multi-objective
|
| 23 |
+
- llm-agent
|
| 24 |
+
difficulty: easy-to-hard
|
| 25 |
+
tasks:
|
| 26 |
+
- easy
|
| 27 |
+
- medium
|
| 28 |
+
- hard
|
| 29 |
+
author: Mrkumar007
|
| 30 |
+
|
openenv_cloud_queue_env.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-cloud_queue_env
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Cloud Queue Env environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core[core]>=0.2.2
|
| 7 |
+
Provides-Extra: dev
|
| 8 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 9 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_cloud_queue_env.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
__init__.py
|
| 3 |
+
client.py
|
| 4 |
+
inference.py
|
| 5 |
+
models.py
|
| 6 |
+
pyproject.toml
|
| 7 |
+
./__init__.py
|
| 8 |
+
./client.py
|
| 9 |
+
./inference.py
|
| 10 |
+
./models.py
|
| 11 |
+
openenv_cloud_queue_env.egg-info/PKG-INFO
|
| 12 |
+
openenv_cloud_queue_env.egg-info/SOURCES.txt
|
| 13 |
+
openenv_cloud_queue_env.egg-info/dependency_links.txt
|
| 14 |
+
openenv_cloud_queue_env.egg-info/entry_points.txt
|
| 15 |
+
openenv_cloud_queue_env.egg-info/requires.txt
|
| 16 |
+
openenv_cloud_queue_env.egg-info/top_level.txt
|
| 17 |
+
server/__init__.py
|
| 18 |
+
server/app.py
|
| 19 |
+
server/cloud_queue_env_environment.py
|
openenv_cloud_queue_env.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_cloud_queue_env.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = cloud_queue_env.server.app:main
|
openenv_cloud_queue_env.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.2
|
| 2 |
+
|
| 3 |
+
[dev]
|
| 4 |
+
pytest>=8.0.0
|
| 5 |
+
pytest-cov>=4.0.0
|
openenv_cloud_queue_env.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
cloud_queue_env
|
server/app.py
CHANGED
|
@@ -1,89 +1,89 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
FastAPI application for the Cloud Queue Env Environment.
|
| 9 |
-
|
| 10 |
-
This module creates an HTTP server that exposes the CloudQueueEnvironment
|
| 11 |
-
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
-
|
| 13 |
-
Endpoints:
|
| 14 |
-
- POST /reset: Reset the environment
|
| 15 |
-
- POST /step: Execute an action
|
| 16 |
-
- GET /state: Get current environment state
|
| 17 |
-
- GET /schema: Get action/observation schemas
|
| 18 |
-
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
-
|
| 20 |
-
Usage:
|
| 21 |
-
# Development (with auto-reload):
|
| 22 |
-
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
-
|
| 24 |
-
# Production:
|
| 25 |
-
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
-
|
| 27 |
-
# Or run directly:
|
| 28 |
-
python -m server.app
|
| 29 |
-
"""
|
| 30 |
-
|
| 31 |
-
try:
|
| 32 |
-
from openenv.core.env_server.http_server import create_app
|
| 33 |
-
except Exception as e: # pragma: no cover
|
| 34 |
-
raise ImportError(
|
| 35 |
-
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
-
) from e
|
| 37 |
-
|
| 38 |
-
try:
|
| 39 |
-
from ..models import CloudQueueAction, CloudQueueObservation
|
| 40 |
-
from .cloud_queue_env_environment import CloudQueueEnvironment
|
| 41 |
-
except ImportError:
|
| 42 |
-
from models import CloudQueueAction, CloudQueueObservation
|
| 43 |
-
from server.cloud_queue_env_environment import CloudQueueEnvironment
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
# Create the app with web interface and README integration
|
| 47 |
-
app = create_app(
|
| 48 |
-
CloudQueueEnvironment,
|
| 49 |
-
CloudQueueAction,
|
| 50 |
-
CloudQueueObservation,
|
| 51 |
-
env_name="cloud_queue_env",
|
| 52 |
-
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 53 |
-
)
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
def main(host: str = "0.0.0.0", port: int = 8000) -> None:
|
| 57 |
-
"""
|
| 58 |
-
Entry point for direct execution via uv run or python -m.
|
| 59 |
-
|
| 60 |
-
This function enables running the server without Docker:
|
| 61 |
-
uv run --project . server
|
| 62 |
-
uv run --project . server --port 8001
|
| 63 |
-
python -m cloud_queue_env.server.app
|
| 64 |
-
|
| 65 |
-
Args:
|
| 66 |
-
host: Host address to bind to (default: "0.0.0.0")
|
| 67 |
-
port: Port number to listen on (default: 8000)
|
| 68 |
-
|
| 69 |
-
For production deployments, consider using uvicorn directly with
|
| 70 |
-
multiple workers:
|
| 71 |
-
uvicorn cloud_queue_env.server.app:app --workers 4
|
| 72 |
-
"""
|
| 73 |
-
import uvicorn
|
| 74 |
-
|
| 75 |
-
uvicorn.run(app, host=host, port=port)
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
def _cli_main() -> None:
|
| 79 |
-
import argparse
|
| 80 |
-
|
| 81 |
-
parser = argparse.ArgumentParser()
|
| 82 |
-
parser.add_argument("--port", type=int, default=8000)
|
| 83 |
-
parser.add_argument("--host", type=str, default="0.0.0.0")
|
| 84 |
-
args = parser.parse_args()
|
| 85 |
-
main(host=args.host, port=args.port)
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
if __name__ == '__main__':
|
| 89 |
-
main()
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Cloud Queue Env Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the CloudQueueEnvironment
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Endpoints:
|
| 14 |
+
- POST /reset: Reset the environment
|
| 15 |
+
- POST /step: Execute an action
|
| 16 |
+
- GET /state: Get current environment state
|
| 17 |
+
- GET /schema: Get action/observation schemas
|
| 18 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Development (with auto-reload):
|
| 22 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
+
|
| 24 |
+
# Production:
|
| 25 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
+
|
| 27 |
+
# Or run directly:
|
| 28 |
+
python -m server.app
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from openenv.core.env_server.http_server import create_app
|
| 33 |
+
except Exception as e: # pragma: no cover
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
+
) from e
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
from ..models import CloudQueueAction, CloudQueueObservation
|
| 40 |
+
from .cloud_queue_env_environment import CloudQueueEnvironment
|
| 41 |
+
except ImportError:
|
| 42 |
+
from models import CloudQueueAction, CloudQueueObservation
|
| 43 |
+
from server.cloud_queue_env_environment import CloudQueueEnvironment
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Create the app with web interface and README integration
|
| 47 |
+
app = create_app(
|
| 48 |
+
CloudQueueEnvironment,
|
| 49 |
+
CloudQueueAction,
|
| 50 |
+
CloudQueueObservation,
|
| 51 |
+
env_name="cloud_queue_env",
|
| 52 |
+
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def main(host: str = "0.0.0.0", port: int = 8000) -> None:
|
| 57 |
+
"""
|
| 58 |
+
Entry point for direct execution via uv run or python -m.
|
| 59 |
+
|
| 60 |
+
This function enables running the server without Docker:
|
| 61 |
+
uv run --project . server
|
| 62 |
+
uv run --project . server --port 8001
|
| 63 |
+
python -m cloud_queue_env.server.app
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
host: Host address to bind to (default: "0.0.0.0")
|
| 67 |
+
port: Port number to listen on (default: 8000)
|
| 68 |
+
|
| 69 |
+
For production deployments, consider using uvicorn directly with
|
| 70 |
+
multiple workers:
|
| 71 |
+
uvicorn cloud_queue_env.server.app:app --workers 4
|
| 72 |
+
"""
|
| 73 |
+
import uvicorn
|
| 74 |
+
|
| 75 |
+
uvicorn.run(app, host=host, port=port)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _cli_main() -> None:
|
| 79 |
+
import argparse
|
| 80 |
+
|
| 81 |
+
parser = argparse.ArgumentParser()
|
| 82 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 83 |
+
parser.add_argument("--host", type=str, default="0.0.0.0")
|
| 84 |
+
args = parser.parse_args()
|
| 85 |
+
main(host=args.host, port=args.port)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
if __name__ == '__main__':
|
| 89 |
+
main()
|
server/cloud_queue_env_environment.py
CHANGED
|
@@ -1,762 +1,781 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""Queue operations environment with deterministic task grading."""
|
| 8 |
-
|
| 9 |
-
import math
|
| 10 |
-
import random
|
| 11 |
-
import hashlib
|
| 12 |
-
from collections import deque
|
| 13 |
-
from dataclasses import dataclass
|
| 14 |
-
from uuid import uuid4
|
| 15 |
-
|
| 16 |
-
from openenv.core.env_server.interfaces import Environment
|
| 17 |
-
from openenv.core.env_server.types import State
|
| 18 |
-
|
| 19 |
-
try:
|
| 20 |
-
from ..models import CloudQueueAction, CloudQueueObservation
|
| 21 |
-
except ImportError:
|
| 22 |
-
from models import CloudQueueAction, CloudQueueObservation
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
@dataclass
|
| 26 |
-
class TaskConfig:
|
| 27 |
-
task_id: str
|
| 28 |
-
horizon: int
|
| 29 |
-
level: float
|
| 30 |
-
queue_count: int
|
| 31 |
-
initial_servers: int
|
| 32 |
-
min_servers: int
|
| 33 |
-
max_servers: int
|
| 34 |
-
arrival_rate: float
|
| 35 |
-
urgent_ratio: float
|
| 36 |
-
service_mean: float
|
| 37 |
-
deadline_base: int
|
| 38 |
-
allow_scaling: bool
|
| 39 |
-
allow_priority: bool
|
| 40 |
-
two_stage: bool
|
| 41 |
-
server_cost: float
|
| 42 |
-
max_queue_size: int
|
| 43 |
-
score_refs: dict[str, float]
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
class CloudQueueEnvironment(Environment):
|
| 47 |
-
"""Deterministic queueing environment with easy/medium/hard benchmark tasks."""
|
| 48 |
-
|
| 49 |
-
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
self.
|
| 55 |
-
self.
|
| 56 |
-
self.
|
| 57 |
-
self.
|
| 58 |
-
self.
|
| 59 |
-
self.
|
| 60 |
-
self.
|
| 61 |
-
self.
|
| 62 |
-
self.
|
| 63 |
-
self.
|
| 64 |
-
self.
|
| 65 |
-
self.
|
| 66 |
-
self.
|
| 67 |
-
self.
|
| 68 |
-
self.
|
| 69 |
-
self.
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
-
"
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
self.
|
| 145 |
-
self.
|
| 146 |
-
self.
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
]
|
| 150 |
-
self.
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
-
"
|
| 161 |
-
"
|
| 162 |
-
"
|
| 163 |
-
"
|
| 164 |
-
"
|
| 165 |
-
"
|
| 166 |
-
"
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
-
"
|
| 170 |
-
"
|
| 171 |
-
"
|
| 172 |
-
"
|
| 173 |
-
"
|
| 174 |
-
"
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
self.
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
return
|
| 211 |
-
|
| 212 |
-
def
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
self._queues[
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
self.
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
self._incoming_job
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
return
|
| 352 |
-
|
| 353 |
-
def
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
if
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
for
|
| 425 |
-
if
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
return True, "
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
self._metrics["
|
| 493 |
-
self._metrics["
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
"
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
"
|
| 556 |
-
"
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
"
|
| 573 |
-
"
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
)
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
if
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
if
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
},
|
| 667 |
-
"
|
| 668 |
-
"
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
"
|
| 719 |
-
"
|
| 720 |
-
"
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
"
|
| 750 |
-
"
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Queue operations environment with deterministic task grading."""
|
| 8 |
+
|
| 9 |
+
import math
|
| 10 |
+
import random
|
| 11 |
+
import hashlib
|
| 12 |
+
from collections import deque
|
| 13 |
+
from dataclasses import dataclass
|
| 14 |
+
from uuid import uuid4
|
| 15 |
+
|
| 16 |
+
from openenv.core.env_server.interfaces import Environment
|
| 17 |
+
from openenv.core.env_server.types import State
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
from ..models import CloudQueueAction, CloudQueueObservation
|
| 21 |
+
except ImportError:
|
| 22 |
+
from models import CloudQueueAction, CloudQueueObservation
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class TaskConfig:
|
| 27 |
+
task_id: str
|
| 28 |
+
horizon: int
|
| 29 |
+
level: float
|
| 30 |
+
queue_count: int
|
| 31 |
+
initial_servers: int
|
| 32 |
+
min_servers: int
|
| 33 |
+
max_servers: int
|
| 34 |
+
arrival_rate: float
|
| 35 |
+
urgent_ratio: float
|
| 36 |
+
service_mean: float
|
| 37 |
+
deadline_base: int
|
| 38 |
+
allow_scaling: bool
|
| 39 |
+
allow_priority: bool
|
| 40 |
+
two_stage: bool
|
| 41 |
+
server_cost: float
|
| 42 |
+
max_queue_size: int
|
| 43 |
+
score_refs: dict[str, float]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class CloudQueueEnvironment(Environment):
|
| 47 |
+
"""Deterministic queueing environment with easy/medium/hard benchmark tasks."""
|
| 48 |
+
|
| 49 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 50 |
+
# Benchmark-safe default: dispatch decisions should come from the agent.
|
| 51 |
+
ASSISTED_AUTODISPATCH: bool = False
|
| 52 |
+
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self._task_configs = self._build_task_configs()
|
| 55 |
+
self._active_task_id = "easy"
|
| 56 |
+
self._pending_task_id = "easy"
|
| 57 |
+
self._pending_seed = 7
|
| 58 |
+
self._rng_streams: dict[str, random.Random] = {}
|
| 59 |
+
self._rng_stream_seeds: dict[str, int] = {}
|
| 60 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 61 |
+
self._sim_time = 0
|
| 62 |
+
self._queues: list[deque[dict]] = []
|
| 63 |
+
self._servers: list[dict] = []
|
| 64 |
+
self._incoming_buffer: deque[dict] = deque()
|
| 65 |
+
self._incoming_job: dict | None = None
|
| 66 |
+
self._done = False
|
| 67 |
+
self._wait_ema: list[float] = []
|
| 68 |
+
self._utilization_ema: list[float] = []
|
| 69 |
+
self._metrics: dict[str, float] = {}
|
| 70 |
+
self._recent_rewards: deque[float] = deque(maxlen=8)
|
| 71 |
+
self._action_trace: list[str] = []
|
| 72 |
+
self._reset_runtime_state()
|
| 73 |
+
|
| 74 |
+
def _build_task_configs(self) -> dict[str, TaskConfig]:
|
| 75 |
+
return {
|
| 76 |
+
"easy": TaskConfig(
|
| 77 |
+
task_id="easy",
|
| 78 |
+
horizon=150,
|
| 79 |
+
level=1.0,
|
| 80 |
+
queue_count=1,
|
| 81 |
+
initial_servers=1,
|
| 82 |
+
min_servers=1,
|
| 83 |
+
max_servers=1,
|
| 84 |
+
arrival_rate=0.78,
|
| 85 |
+
urgent_ratio=0.0,
|
| 86 |
+
service_mean=1.6,
|
| 87 |
+
deadline_base=10,
|
| 88 |
+
allow_scaling=False,
|
| 89 |
+
allow_priority=False,
|
| 90 |
+
two_stage=False,
|
| 91 |
+
server_cost=0.04,
|
| 92 |
+
max_queue_size=28,
|
| 93 |
+
score_refs={"wait": 6.0, "thr": 70.0, "rej": 0.3, "sla": 0.3},
|
| 94 |
+
),
|
| 95 |
+
"medium": TaskConfig(
|
| 96 |
+
task_id="medium",
|
| 97 |
+
horizon=200,
|
| 98 |
+
level=2.3,
|
| 99 |
+
queue_count=2,
|
| 100 |
+
initial_servers=3,
|
| 101 |
+
min_servers=3, # scaling disabled on medium — lock to initial_servers
|
| 102 |
+
max_servers=3, # scaling disabled on medium — lock to initial_servers
|
| 103 |
+
arrival_rate=1.15,
|
| 104 |
+
urgent_ratio=0.28,
|
| 105 |
+
service_mean=1.8,
|
| 106 |
+
deadline_base=8,
|
| 107 |
+
allow_scaling=False,
|
| 108 |
+
allow_priority=True,
|
| 109 |
+
two_stage=False,
|
| 110 |
+
server_cost=0.06,
|
| 111 |
+
max_queue_size=42,
|
| 112 |
+
score_refs={"uw": 7.0, "nw": 10.0, "usla": 0.25, "thr": 125.0, "cost": 14.0},
|
| 113 |
+
),
|
| 114 |
+
"hard": TaskConfig(
|
| 115 |
+
task_id="hard",
|
| 116 |
+
horizon=250,
|
| 117 |
+
level=4.0,
|
| 118 |
+
queue_count=2,
|
| 119 |
+
initial_servers=3,
|
| 120 |
+
min_servers=1,
|
| 121 |
+
max_servers=6,
|
| 122 |
+
arrival_rate=1.45,
|
| 123 |
+
urgent_ratio=0.35,
|
| 124 |
+
service_mean=2.2,
|
| 125 |
+
deadline_base=7,
|
| 126 |
+
allow_scaling=True,
|
| 127 |
+
allow_priority=True,
|
| 128 |
+
two_stage=True,
|
| 129 |
+
server_cost=0.1,
|
| 130 |
+
max_queue_size=64,
|
| 131 |
+
score_refs={
|
| 132 |
+
"e2e": 14.0,
|
| 133 |
+
"abd": 0.25,
|
| 134 |
+
"sla": 0.3,
|
| 135 |
+
"thr": 145.0,
|
| 136 |
+
"cost": 28.0,
|
| 137 |
+
"fair": 0.35,
|
| 138 |
+
},
|
| 139 |
+
),
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
def _reset_runtime_state(self) -> None:
|
| 143 |
+
cfg = self._task_configs[self._active_task_id]
|
| 144 |
+
self._sim_time = 0
|
| 145 |
+
self._done = False
|
| 146 |
+
self._incoming_buffer = deque()
|
| 147 |
+
self._incoming_job = None
|
| 148 |
+
self._action_trace = []
|
| 149 |
+
self._queues = [deque() for _ in range(cfg.queue_count)]
|
| 150 |
+
self._servers = [
|
| 151 |
+
{"remaining": 0.0, "job": None, "active": True}
|
| 152 |
+
for _ in range(cfg.initial_servers)
|
| 153 |
+
]
|
| 154 |
+
self._wait_ema = [0.0 for _ in range(cfg.queue_count)]
|
| 155 |
+
self._utilization_ema = [0.0 for _ in range(cfg.max_servers)]
|
| 156 |
+
self._recent_rewards.clear()
|
| 157 |
+
self._metrics = {
|
| 158 |
+
"arrivals": 0.0,
|
| 159 |
+
"accepted": 0.0,
|
| 160 |
+
"rejected": 0.0,
|
| 161 |
+
"completed": 0.0,
|
| 162 |
+
"completed_urgent": 0.0,
|
| 163 |
+
"abandoned": 0.0,
|
| 164 |
+
"wait_sum": 0.0,
|
| 165 |
+
"wait_count": 0.0,
|
| 166 |
+
"wait_sum_urgent": 0.0,
|
| 167 |
+
"wait_count_urgent": 0.0,
|
| 168 |
+
"wait_sum_normal": 0.0,
|
| 169 |
+
"wait_count_normal": 0.0,
|
| 170 |
+
"sla_breaches": 0.0,
|
| 171 |
+
"sla_breaches_urgent": 0.0,
|
| 172 |
+
"invalid_actions": 0.0,
|
| 173 |
+
"noop_under_load": 0.0,
|
| 174 |
+
"harmful_scale_down": 0.0,
|
| 175 |
+
"action_cost": 0.0,
|
| 176 |
+
"infra_cost": 0.0,
|
| 177 |
+
"fairness_gap_sum": 0.0,
|
| 178 |
+
"fairness_gap_count": 0.0,
|
| 179 |
+
}
|
| 180 |
+
self._wait_samples_all: list[float] = []
|
| 181 |
+
self._wait_samples_urgent: list[float] = []
|
| 182 |
+
self._wait_samples_normal: list[float] = []
|
| 183 |
+
self._e2e_wait_samples: list[float] = []
|
| 184 |
+
|
| 185 |
+
def _init_rng_streams(self, base_seed: int) -> None:
|
| 186 |
+
self._rng_stream_seeds = {
|
| 187 |
+
"arrivals": int(base_seed) + 101,
|
| 188 |
+
"service": int(base_seed) + 211,
|
| 189 |
+
"abandonment": int(base_seed) + 307,
|
| 190 |
+
"exogenous": int(base_seed) + 401,
|
| 191 |
+
}
|
| 192 |
+
self._rng_streams = {
|
| 193 |
+
key: random.Random(seed) for key, seed in self._rng_stream_seeds.items()
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
def _rng(self, stream: str) -> random.Random:
|
| 197 |
+
return self._rng_streams[stream]
|
| 198 |
+
|
| 199 |
+
def _sample_poisson(self, lam: float, rng: random.Random) -> int:
|
| 200 |
+
lam = max(0.0, lam)
|
| 201 |
+
if lam == 0.0:
|
| 202 |
+
return 0
|
| 203 |
+
# Knuth algorithm is sufficient for this environment's lambda scale.
|
| 204 |
+
l_term = math.exp(-lam)
|
| 205 |
+
k = 0
|
| 206 |
+
p = 1.0
|
| 207 |
+
while p > l_term:
|
| 208 |
+
k += 1
|
| 209 |
+
p *= rng.random()
|
| 210 |
+
return max(0, k - 1)
|
| 211 |
+
|
| 212 |
+
def _trace_digest(self) -> str:
|
| 213 |
+
raw = f"task={self._active_task_id}|seed={self._pending_seed}|" + "|".join(self._action_trace)
|
| 214 |
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
| 215 |
+
|
| 216 |
+
def reset(self) -> CloudQueueObservation:
|
| 217 |
+
self._active_task_id = self._pending_task_id if self._pending_task_id in self._task_configs else "easy"
|
| 218 |
+
self._init_rng_streams(self._pending_seed)
|
| 219 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 220 |
+
self._reset_runtime_state()
|
| 221 |
+
return self._build_observation(reward=0.0, done=False, info={"event": "reset"})
|
| 222 |
+
|
| 223 |
+
def _clamp(self, value: float, lo: float, hi: float) -> float:
|
| 224 |
+
return max(lo, min(hi, value))
|
| 225 |
+
|
| 226 |
+
def _sample_service_time(self, cfg: TaskConfig) -> float:
|
| 227 |
+
service_rng = self._rng("service")
|
| 228 |
+
if cfg.task_id == "hard":
|
| 229 |
+
heavy = service_rng.random() < 0.22
|
| 230 |
+
if heavy:
|
| 231 |
+
return self._clamp(service_rng.lognormvariate(1.2, 0.7), 1.0, 12.0)
|
| 232 |
+
return self._clamp(service_rng.expovariate(1.0 / cfg.service_mean), 0.5, 10.0)
|
| 233 |
+
|
| 234 |
+
def _sample_arrivals(self, cfg: TaskConfig) -> int:
|
| 235 |
+
arrival_rng = self._rng("arrivals")
|
| 236 |
+
exogenous_rng = self._rng("exogenous")
|
| 237 |
+
rate = cfg.arrival_rate
|
| 238 |
+
if cfg.task_id == "hard":
|
| 239 |
+
wave = 0.35 * math.sin((self._sim_time + 1) / 13.0)
|
| 240 |
+
jitter = exogenous_rng.uniform(-0.05, 0.05)
|
| 241 |
+
rate += wave + jitter
|
| 242 |
+
return self._sample_poisson(rate, arrival_rng)
|
| 243 |
+
|
| 244 |
+
def _build_arrival_job(self, cfg: TaskConfig, arrival_rng: random.Random) -> dict:
|
| 245 |
+
priority = 2 if arrival_rng.random() < cfg.urgent_ratio else 1
|
| 246 |
+
size = self._sample_service_time(cfg)
|
| 247 |
+
return {
|
| 248 |
+
"priority": priority,
|
| 249 |
+
"queue": 0,
|
| 250 |
+
"created_step": self._state.step_count,
|
| 251 |
+
"wait": 0.0,
|
| 252 |
+
"size": size,
|
| 253 |
+
"remaining": size,
|
| 254 |
+
"deadline": self._state.step_count + cfg.deadline_base - (1 if priority == 2 else 0),
|
| 255 |
+
"type": 1 if priority == 2 else 0,
|
| 256 |
+
"stage": 0,
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
def _promote_next_incoming_job(self) -> None:
|
| 260 |
+
if self._incoming_job is None and self._incoming_buffer:
|
| 261 |
+
self._incoming_job = self._incoming_buffer.popleft()
|
| 262 |
+
|
| 263 |
+
def _spawn_incoming_job(self, cfg: TaskConfig) -> None:
|
| 264 |
+
arrivals = self._sample_arrivals(cfg)
|
| 265 |
+
arrival_rng = self._rng("arrivals")
|
| 266 |
+
if arrivals > 0:
|
| 267 |
+
for _ in range(arrivals):
|
| 268 |
+
self._incoming_buffer.append(self._build_arrival_job(cfg, arrival_rng))
|
| 269 |
+
self._metrics["arrivals"] += float(arrivals)
|
| 270 |
+
self._promote_next_incoming_job()
|
| 271 |
+
|
| 272 |
+
def _update_wait_and_abandonment(self, cfg: TaskConfig) -> float:
|
| 273 |
+
abandonment_rng = self._rng("abandonment")
|
| 274 |
+
abandoned_this_step = 0.0
|
| 275 |
+
for qi, q in enumerate(self._queues):
|
| 276 |
+
kept: deque[dict] = deque()
|
| 277 |
+
while q:
|
| 278 |
+
job = q.popleft()
|
| 279 |
+
job["wait"] += 1.0
|
| 280 |
+
patience = cfg.deadline_base + (2 if job["priority"] == 2 else 4)
|
| 281 |
+
if cfg.task_id == "hard" and job["wait"] > patience and abandonment_rng.random() < 0.35:
|
| 282 |
+
abandoned_this_step += 1.0
|
| 283 |
+
continue
|
| 284 |
+
kept.append(job)
|
| 285 |
+
self._queues[qi] = kept
|
| 286 |
+
if abandoned_this_step:
|
| 287 |
+
self._metrics["abandoned"] += abandoned_this_step
|
| 288 |
+
return abandoned_this_step
|
| 289 |
+
|
| 290 |
+
def _complete_job(self, cfg: TaskConfig, job: dict) -> None:
|
| 291 |
+
if cfg.two_stage and job["stage"] == 0:
|
| 292 |
+
forwarded = dict(job)
|
| 293 |
+
forwarded["stage"] = 1
|
| 294 |
+
forwarded["queue"] = min(1, len(self._queues) - 1)
|
| 295 |
+
forwarded["remaining"] = self._sample_service_time(cfg)
|
| 296 |
+
self._queues[forwarded["queue"]].append(forwarded)
|
| 297 |
+
return
|
| 298 |
+
|
| 299 |
+
self._metrics["completed"] += 1.0
|
| 300 |
+
wait = float(self._state.step_count - job["created_step"])
|
| 301 |
+
self._metrics["wait_sum"] += wait
|
| 302 |
+
self._metrics["wait_count"] += 1.0
|
| 303 |
+
self._wait_samples_all.append(wait)
|
| 304 |
+
self._e2e_wait_samples.append(wait)
|
| 305 |
+
if job["priority"] == 2:
|
| 306 |
+
self._metrics["completed_urgent"] += 1.0
|
| 307 |
+
self._metrics["wait_sum_urgent"] += wait
|
| 308 |
+
self._metrics["wait_count_urgent"] += 1.0
|
| 309 |
+
self._wait_samples_urgent.append(wait)
|
| 310 |
+
else:
|
| 311 |
+
self._metrics["wait_sum_normal"] += wait
|
| 312 |
+
self._metrics["wait_count_normal"] += 1.0
|
| 313 |
+
self._wait_samples_normal.append(wait)
|
| 314 |
+
if self._state.step_count > job["deadline"]:
|
| 315 |
+
self._metrics["sla_breaches"] += 1.0
|
| 316 |
+
if job["priority"] == 2:
|
| 317 |
+
self._metrics["sla_breaches_urgent"] += 1.0
|
| 318 |
+
|
| 319 |
+
def _process_servers(self, cfg: TaskConfig) -> float:
|
| 320 |
+
completed_this_step = 0.0
|
| 321 |
+
for si, server in enumerate(self._servers):
|
| 322 |
+
if not server["active"]:
|
| 323 |
+
continue
|
| 324 |
+
if server["remaining"] > 0:
|
| 325 |
+
server["remaining"] = max(0.0, server["remaining"] - 1.0)
|
| 326 |
+
if server["remaining"] <= 0 and server["job"] is not None:
|
| 327 |
+
self._complete_job(cfg, server["job"])
|
| 328 |
+
completed_this_step += 1.0
|
| 329 |
+
server["job"] = None
|
| 330 |
+
busy_flag = 1.0 if server["job"] is not None else 0.0
|
| 331 |
+
if si < len(self._utilization_ema):
|
| 332 |
+
self._utilization_ema[si] = 0.9 * self._utilization_ema[si] + 0.1 * busy_flag
|
| 333 |
+
return completed_this_step
|
| 334 |
+
|
| 335 |
+
def _admit_job(self, cfg: TaskConfig, queue_idx: int) -> tuple[bool, str]:
|
| 336 |
+
if self._incoming_job is None:
|
| 337 |
+
return False, "no_incoming_job"
|
| 338 |
+
if queue_idx < 0 or queue_idx >= len(self._queues):
|
| 339 |
+
return False, "invalid_queue"
|
| 340 |
+
if len(self._queues[queue_idx]) >= cfg.max_queue_size:
|
| 341 |
+
self._metrics["rejected"] += 1.0
|
| 342 |
+
self._incoming_job = None
|
| 343 |
+
self._promote_next_incoming_job()
|
| 344 |
+
return True, "queue_full_rejected"
|
| 345 |
+
job = dict(self._incoming_job)
|
| 346 |
+
job["queue"] = queue_idx
|
| 347 |
+
self._queues[queue_idx].append(job)
|
| 348 |
+
self._incoming_job = None
|
| 349 |
+
self._metrics["accepted"] += 1.0
|
| 350 |
+
self._promote_next_incoming_job()
|
| 351 |
+
return True, "admitted"
|
| 352 |
+
|
| 353 |
+
def _dispatch(self, queue_idx: int | None) -> tuple[bool, str]:
|
| 354 |
+
target = 0 if queue_idx is None else queue_idx
|
| 355 |
+
if target < 0 or target >= len(self._queues):
|
| 356 |
+
return False, "invalid_dispatch_queue"
|
| 357 |
+
for server in self._servers:
|
| 358 |
+
if not server["active"]:
|
| 359 |
+
continue
|
| 360 |
+
if server["job"] is None and self._queues[target]:
|
| 361 |
+
server["job"] = self._queues[target].popleft()
|
| 362 |
+
server["remaining"] = server["job"]["remaining"]
|
| 363 |
+
return True, "dispatched"
|
| 364 |
+
return False, "no_idle_server_or_empty_queue"
|
| 365 |
+
|
| 366 |
+
def _autodispatch(self) -> None:
|
| 367 |
+
for server in self._servers:
|
| 368 |
+
if not server["active"] or server["job"] is not None:
|
| 369 |
+
continue
|
| 370 |
+
for q in self._queues:
|
| 371 |
+
if q:
|
| 372 |
+
server["job"] = q.popleft()
|
| 373 |
+
server["remaining"] = server["job"]["remaining"]
|
| 374 |
+
break
|
| 375 |
+
|
| 376 |
+
def _apply_action(self, action: CloudQueueAction, cfg: TaskConfig) -> tuple[bool, str]:
|
| 377 |
+
action_type = (action.action_type or "noop").lower()
|
| 378 |
+
|
| 379 |
+
if action_type == "configure_task":
|
| 380 |
+
if action.task_id and action.task_id in self._task_configs:
|
| 381 |
+
self._pending_task_id = action.task_id
|
| 382 |
+
if action.seed is not None:
|
| 383 |
+
self._pending_seed = int(action.seed)
|
| 384 |
+
return True, "configuration_updated_for_next_reset"
|
| 385 |
+
|
| 386 |
+
if self._done:
|
| 387 |
+
return False, "episode_already_done"
|
| 388 |
+
|
| 389 |
+
if action_type == "admit":
|
| 390 |
+
queue_idx = action.target_queue if action.target_queue is not None else 0
|
| 391 |
+
return self._admit_job(cfg, queue_idx)
|
| 392 |
+
|
| 393 |
+
if action_type == "reject":
|
| 394 |
+
if self._incoming_job is None:
|
| 395 |
+
return False, "no_incoming_job"
|
| 396 |
+
self._incoming_job = None
|
| 397 |
+
self._metrics["rejected"] += 1.0
|
| 398 |
+
self._promote_next_incoming_job()
|
| 399 |
+
return True, "rejected"
|
| 400 |
+
|
| 401 |
+
if action_type == "route":
|
| 402 |
+
queue_idx = action.target_queue if action.target_queue is not None else 0
|
| 403 |
+
return self._admit_job(cfg, queue_idx)
|
| 404 |
+
|
| 405 |
+
if action_type == "dispatch":
|
| 406 |
+
return self._dispatch(action.target_queue)
|
| 407 |
+
|
| 408 |
+
if action_type == "scale":
|
| 409 |
+
if not cfg.allow_scaling:
|
| 410 |
+
return False, "scaling_not_supported_for_task"
|
| 411 |
+
delta = action.scale_delta if action.scale_delta is not None else 0
|
| 412 |
+
if delta == 0:
|
| 413 |
+
return True, "no_scale_change"
|
| 414 |
+
active_count = sum(1 for s in self._servers if s["active"])
|
| 415 |
+
requested = int(self._clamp(active_count + delta, cfg.min_servers, cfg.max_servers))
|
| 416 |
+
if requested == active_count:
|
| 417 |
+
return True, "scale_clamped_no_change"
|
| 418 |
+
if requested > active_count:
|
| 419 |
+
for _ in range(requested - active_count):
|
| 420 |
+
self._servers.append({"remaining": 0.0, "job": None, "active": True})
|
| 421 |
+
self._utilization_ema.append(0.0)
|
| 422 |
+
else:
|
| 423 |
+
to_disable = active_count - requested
|
| 424 |
+
for server in reversed(self._servers):
|
| 425 |
+
if to_disable == 0:
|
| 426 |
+
break
|
| 427 |
+
if server["active"] and server["job"] is None:
|
| 428 |
+
server["active"] = False
|
| 429 |
+
to_disable -= 1
|
| 430 |
+
self._metrics["action_cost"] += abs(delta) * 0.35
|
| 431 |
+
return True, "scaled"
|
| 432 |
+
|
| 433 |
+
if action_type == "reprioritize":
|
| 434 |
+
if not cfg.allow_priority:
|
| 435 |
+
return False, "reprioritize_not_supported_for_task"
|
| 436 |
+
new_priority = 2 if (action.new_priority or 1) >= 2 else 1
|
| 437 |
+
for q in self._queues:
|
| 438 |
+
for job in q:
|
| 439 |
+
if job["priority"] == 1:
|
| 440 |
+
job["priority"] = new_priority
|
| 441 |
+
return True, "reprioritized"
|
| 442 |
+
return False, "no_eligible_job"
|
| 443 |
+
|
| 444 |
+
if action_type == "noop":
|
| 445 |
+
return True, "noop"
|
| 446 |
+
|
| 447 |
+
return False, "unknown_action_type"
|
| 448 |
+
|
| 449 |
+
def _percentile(self, values: list[float], p: float) -> float:
|
| 450 |
+
if not values:
|
| 451 |
+
return 0.0
|
| 452 |
+
ordered = sorted(values)
|
| 453 |
+
idx = int(self._clamp(round((len(ordered) - 1) * p), 0, len(ordered) - 1))
|
| 454 |
+
return float(ordered[idx])
|
| 455 |
+
|
| 456 |
+
def _safe_div(self, numerator: float, denominator: float) -> float:
|
| 457 |
+
if denominator <= 0:
|
| 458 |
+
return 0.0
|
| 459 |
+
return numerator / denominator
|
| 460 |
+
|
| 461 |
+
def _current_fairness_gap(self) -> float:
|
| 462 |
+
urgent_avg = self._safe_div(self._metrics["wait_sum_urgent"], self._metrics["wait_count_urgent"])
|
| 463 |
+
normal_avg = self._safe_div(self._metrics["wait_sum_normal"], self._metrics["wait_count_normal"])
|
| 464 |
+
scale = max(1.0, urgent_avg + normal_avg)
|
| 465 |
+
return abs(urgent_avg - normal_avg) / scale
|
| 466 |
+
|
| 467 |
+
def _compute_reward(
|
| 468 |
+
self,
|
| 469 |
+
cfg: TaskConfig,
|
| 470 |
+
action_ok: bool,
|
| 471 |
+
action_type: str,
|
| 472 |
+
action_scale_delta: int,
|
| 473 |
+
completed_step: float,
|
| 474 |
+
) -> tuple[float, dict[str, float]]:
|
| 475 |
+
avg_wait = self._safe_div(self._metrics["wait_sum"], self._metrics["wait_count"])
|
| 476 |
+
queue_pressure = sum(len(q) for q in self._queues) / max(1.0, float(cfg.max_queue_size))
|
| 477 |
+
r_wait = -self._clamp(avg_wait / max(cfg.deadline_base, 1), 0.0, 1.5) - 0.15 * self._clamp(queue_pressure, 0.0, 1.5)
|
| 478 |
+
r_throughput = self._clamp(completed_step / max(1.0, float(cfg.initial_servers)), 0.0, 1.0)
|
| 479 |
+
total_decisions = max(1.0, self._metrics["completed"] + self._metrics["abandoned"])
|
| 480 |
+
r_sla = -self._clamp(self._metrics["sla_breaches"] / total_decisions, 0.0, 1.0)
|
| 481 |
+
active_servers = sum(1 for s in self._servers if s["active"])
|
| 482 |
+
r_cost = -self._clamp(active_servers / max(1.0, float(cfg.max_servers)), 0.0, 1.0)
|
| 483 |
+
fairness_gap = self._current_fairness_gap()
|
| 484 |
+
r_fair = -self._clamp(fairness_gap / 0.5, 0.0, 1.0)
|
| 485 |
+
r_safe = 0.0 if action_ok else -1.0
|
| 486 |
+
if not action_ok:
|
| 487 |
+
self._metrics["invalid_actions"] += 1.0
|
| 488 |
+
if action_type == "noop" and self._incoming_job is not None and sum(len(q) for q in self._queues) > 0:
|
| 489 |
+
r_safe -= 0.05
|
| 490 |
+
self._metrics["noop_under_load"] += 1.0
|
| 491 |
+
|
| 492 |
+
arrivals = max(1.0, self._metrics["arrivals"])
|
| 493 |
+
rejection_rate = self._safe_div(self._metrics["rejected"], arrivals)
|
| 494 |
+
if arrivals > 10 and rejection_rate > 0.4:
|
| 495 |
+
r_safe -= self._clamp((rejection_rate - 0.4) * 0.4, 0.0, 0.2)
|
| 496 |
+
|
| 497 |
+
if action_type == "scale" and action_scale_delta < 0 and queue_pressure > 0.45:
|
| 498 |
+
overload_penalty = self._clamp((queue_pressure - 0.45) * 0.5, 0.0, 0.25)
|
| 499 |
+
r_safe -= overload_penalty
|
| 500 |
+
self._metrics["harmful_scale_down"] += 1.0
|
| 501 |
+
|
| 502 |
+
reward = 0.35 * r_wait + 0.20 * r_throughput + 0.20 * r_sla + 0.15 * r_cost + 0.05 * r_fair + 0.05 * r_safe
|
| 503 |
+
reward = self._clamp(reward, -1.0, 1.0)
|
| 504 |
+
self._recent_rewards.append(reward)
|
| 505 |
+
|
| 506 |
+
self._metrics["infra_cost"] += active_servers * cfg.server_cost
|
| 507 |
+
self._metrics["fairness_gap_sum"] += fairness_gap
|
| 508 |
+
self._metrics["fairness_gap_count"] += 1.0
|
| 509 |
+
|
| 510 |
+
components = {
|
| 511 |
+
"wait": round(r_wait, 4),
|
| 512 |
+
"throughput": round(r_throughput, 4),
|
| 513 |
+
"sla": round(r_sla, 4),
|
| 514 |
+
"cost": round(r_cost, 4),
|
| 515 |
+
"fairness": round(r_fair, 4),
|
| 516 |
+
"safety": round(r_safe, 4),
|
| 517 |
+
}
|
| 518 |
+
return reward, components
|
| 519 |
+
|
| 520 |
+
def _score_task(self, cfg: TaskConfig) -> tuple[float, dict[str, float]]:
|
| 521 |
+
# c01: clamp individual sub-score components to [0, 1] inclusive.
|
| 522 |
+
def c01(value: float) -> float:
|
| 523 |
+
if not math.isfinite(value):
|
| 524 |
+
return 0.0
|
| 525 |
+
return self._clamp(value, 0.0, 1.0)
|
| 526 |
+
|
| 527 |
+
# _strict01: final clamp applied only to the episode score.
|
| 528 |
+
# Validator requires score strictly in (0, 1) — never 0.0 or 1.0.
|
| 529 |
+
_SCORE_MIN = 0.001
|
| 530 |
+
_SCORE_MAX = 0.999
|
| 531 |
+
|
| 532 |
+
def strict01(value: float) -> float:
|
| 533 |
+
if not math.isfinite(value):
|
| 534 |
+
return _SCORE_MIN
|
| 535 |
+
return self._clamp(value, _SCORE_MIN, _SCORE_MAX)
|
| 536 |
+
|
| 537 |
+
completed = self._metrics["completed"]
|
| 538 |
+
arrivals = self._metrics["arrivals"]
|
| 539 |
+
rejected = self._metrics["rejected"]
|
| 540 |
+
avg_wait = self._safe_div(self._metrics["wait_sum"], self._metrics["wait_count"])
|
| 541 |
+
rejection_rate = self._safe_div(rejected, arrivals)
|
| 542 |
+
sla_rate = self._safe_div(self._metrics["sla_breaches"], max(1.0, completed))
|
| 543 |
+
throughput = completed
|
| 544 |
+
fairness_gap = self._safe_div(self._metrics["fairness_gap_sum"], self._metrics["fairness_gap_count"])
|
| 545 |
+
|
| 546 |
+
if cfg.task_id == "easy":
|
| 547 |
+
score_wait = c01(1.0 - avg_wait / cfg.score_refs["wait"])
|
| 548 |
+
score_thr = c01(throughput / cfg.score_refs["thr"])
|
| 549 |
+
score_rej = c01(1.0 - rejection_rate / cfg.score_refs["rej"])
|
| 550 |
+
score_sla = c01(1.0 - sla_rate / cfg.score_refs["sla"])
|
| 551 |
+
score = 0.4 * score_wait + 0.3 * score_thr + 0.15 * score_rej + 0.15 * score_sla
|
| 552 |
+
details = {
|
| 553 |
+
"score_wait": round(score_wait, 4),
|
| 554 |
+
"score_throughput": round(score_thr, 4),
|
| 555 |
+
"score_rejection": round(score_rej, 4),
|
| 556 |
+
"score_sla": round(score_sla, 4),
|
| 557 |
+
}
|
| 558 |
+
elif cfg.task_id == "medium":
|
| 559 |
+
p95_u = self._percentile(self._wait_samples_urgent, 0.95)
|
| 560 |
+
p95_n = self._percentile(self._wait_samples_normal, 0.95)
|
| 561 |
+
urgent_sla = self._safe_div(self._metrics["sla_breaches_urgent"], max(1.0, self._metrics["completed_urgent"]))
|
| 562 |
+
s_uw = c01(1.0 - p95_u / cfg.score_refs["uw"])
|
| 563 |
+
s_nw = c01(1.0 - p95_n / cfg.score_refs["nw"])
|
| 564 |
+
s_usla = c01(1.0 - urgent_sla / cfg.score_refs["usla"])
|
| 565 |
+
s_thr = c01(throughput / cfg.score_refs["thr"])
|
| 566 |
+
s_cost = c01(1.0 - self._metrics["action_cost"] / cfg.score_refs["cost"])
|
| 567 |
+
score = 0.35 * s_uw + 0.15 * s_nw + 0.25 * s_usla + 0.15 * s_thr + 0.10 * s_cost
|
| 568 |
+
details = {
|
| 569 |
+
"score_urgent_wait": round(s_uw, 4),
|
| 570 |
+
"score_normal_wait": round(s_nw, 4),
|
| 571 |
+
"score_urgent_sla": round(s_usla, 4),
|
| 572 |
+
"score_throughput": round(s_thr, 4),
|
| 573 |
+
"score_cost": round(s_cost, 4),
|
| 574 |
+
}
|
| 575 |
+
else:
|
| 576 |
+
e2e_p95 = self._percentile(self._e2e_wait_samples, 0.95)
|
| 577 |
+
abd_rate = self._safe_div(self._metrics["abandoned"], arrivals)
|
| 578 |
+
s_e2e = c01(1.0 - e2e_p95 / cfg.score_refs["e2e"])
|
| 579 |
+
s_abd = c01(1.0 - abd_rate / cfg.score_refs["abd"])
|
| 580 |
+
s_sla = c01(1.0 - sla_rate / cfg.score_refs["sla"])
|
| 581 |
+
s_thr = c01(throughput / cfg.score_refs["thr"])
|
| 582 |
+
s_cost = c01(1.0 - self._metrics["infra_cost"] / cfg.score_refs["cost"])
|
| 583 |
+
s_fair = c01(1.0 - fairness_gap / cfg.score_refs["fair"])
|
| 584 |
+
score = 0.25 * s_e2e + 0.20 * s_abd + 0.20 * s_sla + 0.15 * s_thr + 0.10 * s_cost + 0.10 * s_fair
|
| 585 |
+
details = {
|
| 586 |
+
"score_e2e_p95": round(s_e2e, 4),
|
| 587 |
+
"score_abandonment": round(s_abd, 4),
|
| 588 |
+
"score_sla": round(s_sla, 4),
|
| 589 |
+
"score_throughput": round(s_thr, 4),
|
| 590 |
+
"score_cost": round(s_cost, 4),
|
| 591 |
+
"score_fairness": round(s_fair, 4),
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
if self._metrics["invalid_actions"] > max(3.0, 0.04 * cfg.horizon):
|
| 595 |
+
score = min(score, 0.4)
|
| 596 |
+
# Apply strict open-interval clamp: validator rejects 0.0 and 1.0.
|
| 597 |
+
return strict01(score), details
|
| 598 |
+
|
| 599 |
+
def _compute_action_mask(self, cfg: TaskConfig) -> list[int]:
|
| 600 |
+
"""Compute which of the 8 actions are valid right now.
|
| 601 |
+
|
| 602 |
+
Slot order (matches CloudQueueAction.action_type):
|
| 603 |
+
0: configure_task — always valid (meta, sets next task/seed)
|
| 604 |
+
1: admit — only if an incoming job is waiting
|
| 605 |
+
2: reject — only if an incoming job is waiting
|
| 606 |
+
3: route — only if an incoming job is waiting
|
| 607 |
+
4: dispatch — only if an idle+active server AND a non-empty queue exist
|
| 608 |
+
5: scale — only if cfg.allow_scaling is True
|
| 609 |
+
6: reprioritize — only if cfg.allow_priority AND a normal-priority job is queued
|
| 610 |
+
7: noop — always valid
|
| 611 |
+
"""
|
| 612 |
+
has_incoming = self._incoming_job is not None
|
| 613 |
+
|
| 614 |
+
has_idle_server = any(
|
| 615 |
+
s["active"] and s["job"] is None for s in self._servers
|
| 616 |
+
)
|
| 617 |
+
has_queued_job = any(len(q) > 0 for q in self._queues)
|
| 618 |
+
can_dispatch = 1 if (has_idle_server and has_queued_job) else 0
|
| 619 |
+
|
| 620 |
+
can_reprioritize = 0
|
| 621 |
+
if cfg.allow_priority:
|
| 622 |
+
can_reprioritize = 1 if any(
|
| 623 |
+
job["priority"] == 1 for q in self._queues for job in q
|
| 624 |
+
) else 0
|
| 625 |
+
|
| 626 |
+
return [
|
| 627 |
+
1, # 0: configure_task
|
| 628 |
+
1 if has_incoming else 0, # 1: admit
|
| 629 |
+
1 if has_incoming else 0, # 2: reject
|
| 630 |
+
1 if has_incoming else 0, # 3: route
|
| 631 |
+
can_dispatch, # 4: dispatch
|
| 632 |
+
1 if cfg.allow_scaling else 0, # 5: scale
|
| 633 |
+
can_reprioritize, # 6: reprioritize
|
| 634 |
+
1, # 7: noop
|
| 635 |
+
]
|
| 636 |
+
|
| 637 |
+
def _build_observation(self, reward: float, done: bool, info: dict) -> CloudQueueObservation:
|
| 638 |
+
cfg = self._task_configs[self._active_task_id]
|
| 639 |
+
queue_lengths = [len(q) for q in self._queues]
|
| 640 |
+
for i, q in enumerate(self._queues):
|
| 641 |
+
current_mean_wait = 0.0
|
| 642 |
+
if q:
|
| 643 |
+
current_mean_wait = sum(job["wait"] for job in q) / len(q)
|
| 644 |
+
self._wait_ema[i] = 0.8 * self._wait_ema[i] + 0.2 * current_mean_wait
|
| 645 |
+
|
| 646 |
+
active_servers = max(1, sum(1 for s in self._servers if s["active"]))
|
| 647 |
+
completed = max(1.0, self._metrics["completed"])
|
| 648 |
+
sla_violation_rate = self._safe_div(self._metrics["sla_breaches"], completed)
|
| 649 |
+
abandonment_rate = self._safe_div(self._metrics["abandoned"], max(1.0, self._metrics["arrivals"]))
|
| 650 |
+
throughput_recent = max(0.0, info.get("completed_this_step", 0.0))
|
| 651 |
+
energy_cost_rate = active_servers * cfg.server_cost
|
| 652 |
+
|
| 653 |
+
incoming = self._incoming_job
|
| 654 |
+
incoming_present = incoming is not None
|
| 655 |
+
incoming_size = float(incoming["size"]) if incoming_present else 0.0
|
| 656 |
+
incoming_priority = int(incoming["priority"]) if incoming_present else 0
|
| 657 |
+
incoming_deadline = float(incoming["deadline"]) if incoming_present else 0.0
|
| 658 |
+
incoming_type = int(incoming["type"]) if incoming_present else 0
|
| 659 |
+
|
| 660 |
+
score, score_details = (0.0, {})
|
| 661 |
+
if done:
|
| 662 |
+
score, score_details = self._score_task(cfg)
|
| 663 |
+
|
| 664 |
+
metadata = {
|
| 665 |
+
"info": info,
|
| 666 |
+
"reward_components": info.get("reward_components", {}),
|
| 667 |
+
"applied_action": info.get("applied_action", "noop"),
|
| 668 |
+
"seed": int(self._pending_seed),
|
| 669 |
+
"trace_digest": self._trace_digest(),
|
| 670 |
+
"rng_stream_seeds": self._rng_stream_seeds,
|
| 671 |
+
"metrics": {
|
| 672 |
+
"arrivals": self._metrics["arrivals"],
|
| 673 |
+
"accepted": self._metrics["accepted"],
|
| 674 |
+
"rejected": self._metrics["rejected"],
|
| 675 |
+
"completed": self._metrics["completed"],
|
| 676 |
+
"abandoned": self._metrics["abandoned"],
|
| 677 |
+
"invalid_actions": self._metrics["invalid_actions"],
|
| 678 |
+
"harmful_scale_down": self._metrics["harmful_scale_down"],
|
| 679 |
+
"infra_cost": round(self._metrics["infra_cost"], 4),
|
| 680 |
+
"pending_incoming_jobs": float(len(self._incoming_buffer) + (1 if self._incoming_job else 0)),
|
| 681 |
+
},
|
| 682 |
+
"episode_score": round(score, 4),
|
| 683 |
+
"score_details": score_details,
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
+
return CloudQueueObservation(
|
| 687 |
+
task_id=cfg.task_id,
|
| 688 |
+
sim_time=self._sim_time,
|
| 689 |
+
horizon=cfg.horizon,
|
| 690 |
+
queue_lengths=queue_lengths,
|
| 691 |
+
queue_wait_ema=[round(v, 3) for v in self._wait_ema],
|
| 692 |
+
server_busy=[1 if s["job"] is not None and s["active"] else 0 for s in self._servers],
|
| 693 |
+
server_remaining_service=[round(float(s["remaining"]), 3) for s in self._servers],
|
| 694 |
+
utilization=[round(v, 3) for v in self._utilization_ema[: len(self._servers)]],
|
| 695 |
+
incoming_job_present=incoming_present,
|
| 696 |
+
incoming_job_size=round(incoming_size, 3),
|
| 697 |
+
incoming_job_priority=incoming_priority,
|
| 698 |
+
incoming_job_deadline=round(incoming_deadline, 3),
|
| 699 |
+
incoming_job_type=incoming_type,
|
| 700 |
+
sla_violation_rate=round(sla_violation_rate, 4),
|
| 701 |
+
abandonment_rate=round(abandonment_rate, 4),
|
| 702 |
+
throughput_recent=round(throughput_recent, 4),
|
| 703 |
+
energy_cost_rate=round(energy_cost_rate, 4),
|
| 704 |
+
level=cfg.level,
|
| 705 |
+
optional_history=[round(v, 4) for v in list(self._recent_rewards)],
|
| 706 |
+
action_mask=self._compute_action_mask(cfg),
|
| 707 |
+
done=done,
|
| 708 |
+
reward=round(reward, 6),
|
| 709 |
+
metadata=metadata,
|
| 710 |
+
)
|
| 711 |
+
|
| 712 |
+
def step(self, action: CloudQueueAction) -> CloudQueueObservation: # type: ignore[override]
|
| 713 |
+
cfg = self._task_configs[self._active_task_id]
|
| 714 |
+
|
| 715 |
+
if (action.action_type or "").lower() == "configure_task":
|
| 716 |
+
ok, note = self._apply_action(action, cfg)
|
| 717 |
+
info = {
|
| 718 |
+
"event": "configure_task",
|
| 719 |
+
"applied_action": action.action_type,
|
| 720 |
+
"valid_action": ok,
|
| 721 |
+
"note": note,
|
| 722 |
+
"completed_this_step": 0.0,
|
| 723 |
+
"debug_trace_id": self._trace_digest(),
|
| 724 |
+
}
|
| 725 |
+
return self._build_observation(reward=0.0, done=self._done, info=info)
|
| 726 |
+
|
| 727 |
+
if self._done:
|
| 728 |
+
info = {
|
| 729 |
+
"event": "episode_done",
|
| 730 |
+
"applied_action": action.action_type,
|
| 731 |
+
"valid_action": False,
|
| 732 |
+
"note": "call reset() to start a new episode",
|
| 733 |
+
"completed_this_step": 0.0,
|
| 734 |
+
"reward_components": {},
|
| 735 |
+
"debug_trace_id": self._trace_digest(),
|
| 736 |
+
}
|
| 737 |
+
return self._build_observation(reward=0.0, done=True, info=info)
|
| 738 |
+
|
| 739 |
+
self._state.step_count += 1
|
| 740 |
+
self._sim_time += 1
|
| 741 |
+
|
| 742 |
+
completed_this_step = self._process_servers(cfg)
|
| 743 |
+
abandoned_this_step = self._update_wait_and_abandonment(cfg)
|
| 744 |
+
self._spawn_incoming_job(cfg)
|
| 745 |
+
|
| 746 |
+
action_ok, action_note = self._apply_action(action, cfg)
|
| 747 |
+
action_key = (
|
| 748 |
+
f"{(action.action_type or 'noop').lower()}|"
|
| 749 |
+
f"q={action.target_queue}|s={action.target_server}|"
|
| 750 |
+
f"d={action.scale_delta}|p={action.new_priority}"
|
| 751 |
+
)
|
| 752 |
+
self._action_trace.append(action_key)
|
| 753 |
+
autodispatch_applied = False
|
| 754 |
+
if self.ASSISTED_AUTODISPATCH:
|
| 755 |
+
self._autodispatch()
|
| 756 |
+
autodispatch_applied = True
|
| 757 |
+
reward, reward_components = self._compute_reward(
|
| 758 |
+
cfg,
|
| 759 |
+
action_ok=action_ok,
|
| 760 |
+
action_type=(action.action_type or "noop").lower(),
|
| 761 |
+
action_scale_delta=int(action.scale_delta or 0),
|
| 762 |
+
completed_step=completed_this_step,
|
| 763 |
+
)
|
| 764 |
+
|
| 765 |
+
self._done = self._state.step_count >= cfg.horizon
|
| 766 |
+
info = {
|
| 767 |
+
"event": "step",
|
| 768 |
+
"applied_action": action.action_type,
|
| 769 |
+
"valid_action": action_ok,
|
| 770 |
+
"note": action_note,
|
| 771 |
+
"completed_this_step": completed_this_step,
|
| 772 |
+
"abandoned_this_step": abandoned_this_step,
|
| 773 |
+
"autodispatch_applied": autodispatch_applied,
|
| 774 |
+
"reward_components": reward_components,
|
| 775 |
+
"debug_trace_id": self._trace_digest(),
|
| 776 |
+
}
|
| 777 |
+
return self._build_observation(reward=reward, done=self._done, info=info)
|
| 778 |
+
|
| 779 |
+
@property
|
| 780 |
+
def state(self) -> State:
|
| 781 |
+
return self._state
|