Sizzing commited on
Commit
0f8f2c1
·
verified ·
1 Parent(s): eea2be5

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +8 -5
  2. Makefile +6 -5
  3. README.md +369 -166
  4. __init__.py +7 -2
  5. aws_infra/aws_infra/app.py +122 -0
  6. aws_infra/aws_infra/services/acm.py +15 -0
  7. aws_infra/aws_infra/services/alb.py +32 -0
  8. aws_infra/aws_infra/services/apigateway.py +12 -0
  9. aws_infra/aws_infra/services/apigateway_v1.py +21 -0
  10. aws_infra/aws_infra/services/athena.py +23 -0
  11. aws_infra/aws_infra/services/cloudformation/__init__.py +27 -0
  12. aws_infra/aws_infra/services/cloudwatch.py +24 -0
  13. aws_infra/aws_infra/services/cloudwatch_logs.py +23 -0
  14. aws_infra/aws_infra/services/cognito.py +43 -0
  15. aws_infra/aws_infra/services/dynamodb.py +23 -0
  16. aws_infra/aws_infra/services/ec2.py +68 -0
  17. aws_infra/aws_infra/services/ecs.py +20 -0
  18. aws_infra/aws_infra/services/efs.py +32 -0
  19. aws_infra/aws_infra/services/elasticache.py +26 -0
  20. aws_infra/aws_infra/services/emr.py +31 -0
  21. aws_infra/aws_infra/services/eventbridge.py +23 -0
  22. aws_infra/aws_infra/services/firehose.py +14 -0
  23. aws_infra/aws_infra/services/glue.py +30 -0
  24. aws_infra/aws_infra/services/iam_sts.py +37 -0
  25. aws_infra/aws_infra/services/kinesis.py +19 -0
  26. aws_infra/aws_infra/services/lambda_svc.py +30 -0
  27. aws_infra/aws_infra/services/rds.py +28 -0
  28. aws_infra/aws_infra/services/route53.py +22 -0
  29. aws_infra/aws_infra/services/s3.py +33 -0
  30. aws_infra/aws_infra/services/secretsmanager.py +17 -0
  31. aws_infra/aws_infra/services/ses.py +22 -0
  32. aws_infra/aws_infra/services/ses_v2.py +17 -0
  33. aws_infra/aws_infra/services/sns.py +21 -0
  34. aws_infra/aws_infra/services/sqs.py +17 -0
  35. aws_infra/aws_infra/services/ssm.py +15 -0
  36. aws_infra/aws_infra/services/stepfunctions.py +19 -0
  37. aws_infra/aws_infra/services/waf.py +21 -0
  38. client.py +15 -6
  39. inference-complete.py +13 -14
  40. inference.py +6 -0
  41. models.py +124 -8
  42. pyproject.toml +5 -0
  43. server/app.py +23 -0
  44. server/aws_rl_env_environment.py +122 -23
  45. server/services/aws_backend.py +57 -1
  46. server/services/chaos_engine.py +168 -0
  47. server/services/curriculum.py +58 -18
  48. server/services/drift_engine.py +67 -0
  49. server/services/environment_designer.py +10 -1
  50. server/services/episode_tracker.py +96 -0
Dockerfile CHANGED
@@ -42,16 +42,16 @@ RUN if ! command -v uv >/dev/null 2>&1; then \
42
  # If uv.lock exists, use it; otherwise resolve on the fly
43
  RUN --mount=type=cache,target=/root/.cache/uv \
44
  if [ -f uv.lock ]; then \
45
- uv sync --frozen --no-install-project --no-editable; \
46
  else \
47
- uv sync --no-install-project --no-editable; \
48
  fi
49
 
50
  RUN --mount=type=cache,target=/root/.cache/uv \
51
  if [ -f uv.lock ]; then \
52
- uv sync --frozen --no-editable; \
53
  else \
54
- uv sync --no-editable; \
55
  fi
56
 
57
  # Final runtime stage
@@ -90,7 +90,10 @@ ENV PYTHONPATH="/app/env:$PYTHONPATH"
90
 
91
 
92
  # DEV_MODE=1 enables live reload via --reload flag
93
- ENV DEV_MODE=0
 
 
 
94
 
95
  # Entrypoint: start aws_infra in background, then run the FastAPI server
96
  CMD ["sh", "-c", "aws_infra -d & sleep 2 && uvicorn server.app:app --host 0.0.0.0 --port 8000 $([ \"$DEV_MODE\" = '1' ] && echo '--reload --reload-dir /app/env')"]
 
42
  # If uv.lock exists, use it; otherwise resolve on the fly
43
  RUN --mount=type=cache,target=/root/.cache/uv \
44
  if [ -f uv.lock ]; then \
45
+ uv sync --frozen --extra dev --no-install-project --no-editable; \
46
  else \
47
+ uv sync --extra dev --no-install-project --no-editable; \
48
  fi
49
 
50
  RUN --mount=type=cache,target=/root/.cache/uv \
51
  if [ -f uv.lock ]; then \
52
+ uv sync --frozen --extra dev --no-editable; \
53
  else \
54
+ uv sync --extra dev --no-editable; \
55
  fi
56
 
57
  # Final runtime stage
 
90
 
91
 
92
  # DEV_MODE=1 enables live reload via --reload flag
93
+ ENV DEV_MODE=1
94
+
95
+ ENV API_BASE_URL=https://router.huggingface.co/v1
96
+ ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
97
 
98
  # Entrypoint: start aws_infra in background, then run the FastAPI server
99
  CMD ["sh", "-c", "aws_infra -d & sleep 2 && uvicorn server.app:app --host 0.0.0.0 --port 8000 $([ \"$DEV_MODE\" = '1' ] && echo '--reload --reload-dir /app/env')"]
Makefile CHANGED
@@ -21,9 +21,6 @@ install: ## Install project dependencies
21
  install-dev: ## Install project with dev dependencies
22
  $(UV) sync --frozen --extra dev
23
 
24
- .PHONY: install-train
25
- install-train: ## Install project with training dependencies (trl, torch, peft, etc.)
26
- $(UV) sync --frozen --extra training
27
 
28
  .PHONY: install-all
29
  install-all: ## Install project with all dependencies (dev + training)
@@ -39,7 +36,7 @@ lock: ## Update the lockfile
39
 
40
  .PHONY: run
41
  run: ## Run with MiniStack + FastAPI server (mirrors Docker CMD)
42
- ministack & sleep 2 && $(UV) run uvicorn server.app:app --host $(SERVER_HOST) --port $(SERVER_PORT)
43
 
44
  # ──────────────────────────────────────────────
45
  # Code Quality
@@ -82,7 +79,7 @@ docker-run-dev: ## Run Docker container in dev mode with live reload
82
 
83
  .PHONY: docker-run-detach
84
  docker-run-detach: ## Run Docker container in background
85
- docker run -d --rm -p $(SERVER_PORT):8000 --name $(DOCKER_IMAGE) $(DOCKER_IMAGE):$(DOCKER_TAG)
86
 
87
  .PHONY: docker-stop
88
  docker-stop: ## Stop the running Docker container
@@ -100,6 +97,10 @@ docker-shell: ## Open a shell in the running Docker container
100
  docker-clean: ## Stop and remove all running containers for this image
101
  @docker ps -q --filter ancestor=$(DOCKER_IMAGE):$(DOCKER_TAG) | xargs -r docker rm -f
102
 
 
 
 
 
103
  .PHONY: docker-health
104
  docker-health: ## Check health of the running container
105
  @curl -sf http://localhost:$(SERVER_PORT)/health && echo " OK" || echo " FAIL"
 
21
  install-dev: ## Install project with dev dependencies
22
  $(UV) sync --frozen --extra dev
23
 
 
 
 
24
 
25
  .PHONY: install-all
26
  install-all: ## Install project with all dependencies (dev + training)
 
36
 
37
  .PHONY: run
38
  run: ## Run with MiniStack + FastAPI server (mirrors Docker CMD)
39
+ aws_infra -d & sleep 2 && $(UV) run uvicorn server.app:app --host $(SERVER_HOST) --port $(SERVER_PORT) --reload
40
 
41
  # ──────────────────────────────────────────────
42
  # Code Quality
 
79
 
80
  .PHONY: docker-run-detach
81
  docker-run-detach: ## Run Docker container in background
82
+ docker run -d --rm --name $(DOCKER_IMAGE) -p $(SERVER_PORT):8000 -v $(PWD):/app/env -v /app/env/.venv -e DEV_MODE=1 $(DOCKER_IMAGE):$(DOCKER_TAG)
83
 
84
  .PHONY: docker-stop
85
  docker-stop: ## Stop the running Docker container
 
97
  docker-clean: ## Stop and remove all running containers for this image
98
  @docker ps -q --filter ancestor=$(DOCKER_IMAGE):$(DOCKER_TAG) | xargs -r docker rm -f
99
 
100
+ .PHONY: docker-test
101
+ docker-test: ## Run tests inside the running Docker container
102
+ docker exec $(DOCKER_IMAGE) python -m pytest env/tests -v
103
+
104
  .PHONY: docker-health
105
  docker-health: ## Check health of the running container
106
  @curl -sf http://localhost:$(SERVER_PORT)/health && echo " OK" || echo " FAIL"
README.md CHANGED
@@ -11,19 +11,242 @@ tags:
11
  - openenv
12
  ---
13
 
14
- # AWS RL Environment
15
 
16
- A **Gymnasium-style RL environment** for training LLM agents on real-world AWS cloud operations. The agent sends AWS CLI commands as actions, receives structured observations, and progresses through a **curriculum of 21 tasks** across 5 difficulty tiers — from basic listing to SRE incident response.
17
 
18
- The environment runs a **vendored MiniStack emulator** (34 AWS services, in-memory, zero-cost) inside the same Docker container, so no AWS account is needed.
19
 
20
- ## Key Innovations
21
 
22
- - **Priority-queue curriculum** — Tasks are selected by weakness, novelty, and spaced-repetition schedules instead of random or round-robin sampling
23
- - **Spaced repetition** — Graduated tasks resurface at exponentially increasing intervals (3 -> 6 -> 12 -> ... -> 48 episodes) to prevent catastrophic forgetting
24
- - **Anti-reward-hacking** — Grading verifies ground-truth state in MiniStack, not agent output; partial credit is capped at 0.99; monotonic progress prevents manipulation
25
- - **SRE incident tasks** Expert-tier tasks provision broken infrastructure, then require the agent to diagnose and fix it
26
- - **Shaped rewards**Dense reward signals (progress bonuses, failure penalties) in [0.0, 1.0] guide exploration without enabling gaming
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  ## Quick Start
29
 
@@ -46,76 +269,83 @@ result = env.reset()
46
  result = env.step(AwsRlAction(command="aws s3 ls"))
47
  ```
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ---
50
 
51
  ## Architecture
52
 
53
  ```
54
- ┌─────────────────────────────────────────────────────────
55
- │ Docker Container
56
-
57
  │ ┌─────────────────────┐ ┌────────────────────┐ │
58
- │ │ FastAPI RL Server │ │ MiniStack │ │
59
- │ │ (port 8000) │─────>│ (port 4566) │ │
60
- │ │ │ │ 34 AWS services │ │
61
- │ │ - Environment │ │ In-memory state │ │
62
- │ │ - Curriculum │ │ Reset API │ │
63
- │ │ - Grading Engine │ │ │ │
64
- │ │ - Episode Tracker │ │ │ │
 
65
  │ └─────────────────────┘ └────────────────────┘ │
66
- │ ^ ^
67
- │ | OpenEnv HTTP/WS | AWS CLI calls
68
- └──────────┼─────────────────────────────┼────────────────
69
  | |
70
- RL Agent (client) (internal only)
71
  ```
72
 
73
  ### Episode Lifecycle
74
 
75
- 1. **`reset()`** -- Wipes MiniStack state, selects next task from curriculum, provisions setup commands (if any), returns initial observation
76
- 2. **`step(action)`** -- Validates command (`aws` prefix only), executes against MiniStack, records in tracker, grades with shaped reward, returns observation
77
- 3. **Terminates** when `task_achieved == True` or max steps reached
 
78
 
79
  ---
80
 
 
81
  ## Core Classes
82
 
83
  ### `AwsRlEnvironment`
84
 
85
- [server/aws_rl_env_environment.py](server/aws_rl_env_environment.py) -- Implements the OpenEnv `Environment` interface. Orchestrates all services.
86
 
87
  | Method | Description |
88
  |--------|-------------|
89
  | `reset()` | Wipe infra, select task, provision setup, return initial observation |
90
- | `step(action)` | Execute command, grade, update curriculum, return observation |
91
 
92
  ### `Curriculum`
93
 
94
- [server/services/curriculum.py](server/services/curriculum.py) -- Priority-queue-based task selection with progressive difficulty.
95
 
96
  Selects the next task using a **max-heap scored by**:
97
 
98
  ```
99
  score = (
100
  novelty_bonus # +100 if never attempted (explore first)
101
- + weakness_weight # +50 * (1 - task_success_rate) -- worse tasks get higher priority
102
  + spaced_rep_bonus # +30 if graduated task is "due" for re-test
103
  - recency_penalty # -20 if attempted in last 2 episodes (ensure variety)
104
  )
105
  ```
106
 
107
- | Feature | Detail |
108
- |---------|--------|
109
- | **Per-task mastery** | Sliding-window success rate with exponential decay (0.85^i weighting) |
110
- | **Graduation** | Task is "graduated" when success rate >= mastery_threshold in window |
111
- | **Spaced repetition** | Graduated tasks resurface at doubling intervals (3 -> 6 -> ... -> 48 episodes) |
112
- | **Tier progression** | Advance when tier success rate >= advance_rate after min_episodes |
113
- | **Fast-track** | Skip min_episodes wait after 3 consecutive episodes at >= 90% success |
114
- | **Skill profile** | `get_stats()` returns per-task success rates, weak spots, and due re-tests |
115
-
116
  ### `TaskGrader`
117
 
118
- [server/services/task_grader.py](server/services/task_grader.py) -- Evaluates task completion using a dispatcher pattern. Rewards are always in [0.0, 1.0].
119
 
120
  **Grading strategies by tier:**
121
 
@@ -127,71 +357,101 @@ score = (
127
  | Advanced | Multi-step + services | All steps completed AND all required services touched |
128
  | Expert | State checks | Runs arbitrary AWS CLI commands to assert end-state (ground truth) |
129
 
130
- **Reward shaping:**
131
 
132
- ```
133
- if task_achieved: reward = 1.0
134
- else:
135
- reward = partial_progress * 0.8 # base: scaled to [0.0, 0.8]
136
- if progress_increased: reward += 0.1 # dense signal for advancing
137
- if command_failed: reward *= 0.5 # penalty for errors
138
- reward = clamp(reward, 0.0, 0.99) # never 1.0 without completion
139
- ```
140
 
141
  ### `EpisodeTracker`
142
 
143
- [server/services/episode_tracker.py](server/services/episode_tracker.py) -- Maintains per-episode step history. Parses AWS CLI commands to extract (service, operation, resource) tuples. Tracks credited operations for deduplication and monotonic progress.
144
 
145
  ### `ResourceVerifier`
146
 
147
- [server/services/resource_verifier.py](server/services/resource_verifier.py) -- Queries MiniStack directly to verify ground-truth resource state. Service-specific checks for S3, DynamoDB, Lambda, SQS, SNS, IAM, and API Gateway. Also evaluates `StateCheck` assertions (substring match, JSON path extraction).
148
 
149
  ### `EnvironmentDesigner`
150
 
151
- [server/services/environment_designer.py](server/services/environment_designer.py) -- Provisions initial AWS state via setup commands before the agent acts. Used by SRE/expert tasks to create broken infrastructure the agent must fix.
152
 
153
  ### `AwsBackend`
154
 
155
- [server/services/aws_backend.py](server/services/aws_backend.py) -- Executes AWS CLI commands against MiniStack (`AWS_ENDPOINT_URL=http://localhost:4566`). Provides `reset_environment()` via MiniStack's `/_ministack/reset` endpoint.
156
 
157
  ### `AwsRlEnv` (Client)
158
 
159
- [client.py](client.py) -- OpenEnv HTTP/WebSocket client. Wraps `reset()` and `step()` calls to the server.
160
 
161
  ---
162
 
163
  ## Data Models
164
 
165
- [models.py](models.py) -- All Pydantic models and type aliases.
166
 
167
- ### Action & Observation
168
 
169
  ```python
170
  class AwsRlAction(Action):
171
  command: str # AWS CLI command, e.g. "aws s3 ls"
 
172
 
 
 
 
173
  class AwsRlObservation(Observation):
174
  episode_id: EpisodeID
175
  step_count: StepCount
176
  command_success: bool
177
  command_output: str # stdout from AWS CLI
178
  error: str # stderr if failed
179
- resources: dict[AwsService, dict | list | str]
180
- task: Task | None # current task definition
181
  task_achieved: bool
182
- done: bool
183
- reward: float # shaped reward in [0.0, 1.0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  ```
185
 
186
  ### Task Definitions
187
 
188
  ```python
189
  class Task:
190
- task_id: TaskID # 0-20
191
  difficulty: TaskDifficulty # warmup | beginner | intermediate | advanced | expert
192
  description: str # human-readable goal
193
  success_criteria: SuccessCriteria
194
- setup_commands: list[SetupCommand] # pre-provision for SRE tasks
 
 
 
 
 
 
 
 
 
195
 
196
  class SuccessCriteria:
197
  command_contains: str | None # warmup/beginner
@@ -211,6 +471,7 @@ class TierConfig:
211
  mastery_window: int # sliding window size (default: 10)
212
  mastery_threshold: float # per-task graduation threshold (default: 0.7)
213
  fast_track_rate: float # early promotion threshold (default: 0.9)
 
214
 
215
  class SpacedRepState:
216
  interval: int # episodes until next re-test (3 -> 48)
@@ -219,88 +480,6 @@ class SpacedRepState:
219
 
220
  ---
221
 
222
- ## Task Catalog (21 Tasks)
223
-
224
- ### Warmup (6 tasks) -- Simple listing operations
225
-
226
- | ID | Description | Service |
227
- |----|-------------|---------|
228
- | 0 | List all S3 buckets | S3 |
229
- | 1 | Describe EC2 instances | EC2 |
230
- | 2 | List DynamoDB tables | DynamoDB |
231
- | 3 | List Lambda functions | Lambda |
232
- | 4 | List SQS queues | SQS |
233
- | 5 | List SNS topics | SNS |
234
-
235
- ### Beginner (5 tasks) -- Single-resource creation with verification
236
-
237
- | ID | Description | Verified Resource |
238
- |----|-------------|-------------------|
239
- | 6 | Create an S3 bucket | Bucket exists in MiniStack |
240
- | 7 | Create a DynamoDB table | Table exists |
241
- | 8 | Create an SQS queue | Queue URL resolvable |
242
- | 9 | Create an SNS topic | Topic ARN in list |
243
- | 10 | Create a Lambda function | Function exists |
244
-
245
- ### Intermediate (4 tasks) -- Multi-step workflows
246
-
247
- | ID | Description | Steps |
248
- |----|-------------|-------|
249
- | 11 | Create S3 bucket + upload file | create-bucket, put-object |
250
- | 12 | Create DynamoDB table + insert item | create-table, put-item |
251
- | 13 | Create SNS topic + SQS queue + subscribe | create-topic, create-queue, subscribe |
252
- | 14 | Create IAM role + attach policy | create-role, attach-role-policy |
253
-
254
- ### Advanced (3 tasks) -- Cross-service architectures
255
-
256
- | ID | Description | Services | Steps |
257
- |----|-------------|----------|-------|
258
- | 15 | Lambda + SQS event source pipeline | Lambda, SQS, IAM | 4-5 steps |
259
- | 16 | Serverless API (DynamoDB + Lambda + API Gateway) | DynamoDB, Lambda, API Gateway, IAM | 7 steps |
260
- | 17 | Fan-out notification system (SNS + SQS) | SNS, SQS | 5 steps |
261
-
262
- ### Expert (3 tasks) -- SRE incident response
263
-
264
- | ID | Description | Setup | Fix Required |
265
- |----|-------------|-------|-------------|
266
- | 18 | Fix Lambda missing SQS permissions | Broken role + Lambda + queue | Attach SQS policy, create event source |
267
- | 19 | Enable S3 versioning + lifecycle | Bucket + object | Enable versioning, add lifecycle rule |
268
- | 20 | Fix DynamoDB throttling + alerting | Under-provisioned table + SNS | Scale to 50 RCU/WCU, subscribe SQS |
269
-
270
- Expert tasks use **state checks** (ground-truth AWS CLI assertions) to verify the fix, not just command matching.
271
-
272
- ---
273
-
274
- ## Anti-Reward-Hacking Measures
275
-
276
- | Defense | How it works |
277
- |---------|-------------|
278
- | **Ground-truth verification** | Grader queries MiniStack directly -- agent cannot fake resource state |
279
- | **Deduplication** | `EpisodeTracker.has_executed_operation()` prevents re-earning credit for repeated commands |
280
- | **Invisible grading** | Verification commands run server-side, invisible to the agent's observations |
281
- | **Command allowlisting** | Only commands starting with `aws` are executed; pipes and shell escape are rejected |
282
- | **No credit for read-only** | Running a `state_check` command earns no progress; only mutating `steps` earn credit |
283
- | **Monotonic progress** | `partial_progress` can only increase within an episode |
284
- | **Exact resource names** | `resource_exists` checks the exact name, not just any resource of that type |
285
- | **State checks verify final state** | Expert tasks run actual CLI commands against MiniStack at grading time |
286
-
287
- ---
288
-
289
- ## Supported AWS Services (34)
290
-
291
- | Category | Services |
292
- |----------|----------|
293
- | **Storage & DB** | S3, DynamoDB, RDS, ElastiCache, EFS |
294
- | **Compute** | Lambda, ECS, EC2, Step Functions |
295
- | **Messaging** | SQS, SNS, Kinesis, EventBridge, Firehose |
296
- | **API** | API Gateway v1/v2, ALB/ELBv2 |
297
- | **Security** | IAM, STS, Cognito, ACM, WAF v2, Secrets Manager |
298
- | **Monitoring** | CloudWatch, CloudWatch Logs, SSM |
299
- | **Infrastructure** | CloudFormation, Route53 |
300
- | **Other** | SES, Athena, Glue, EMR |
301
-
302
- ---
303
-
304
  ## Project Structure
305
 
306
  ```
@@ -309,22 +488,48 @@ aws-rl-env/
309
  ├── models.py # Pydantic data models & type aliases
310
  ├── client.py # AwsRlEnv OpenEnv client
311
  ├── inference.py # LLM agent inference script
 
312
  ├── server/
313
  │ ├── app.py # FastAPI application + web UI endpoints
314
  │ ├── aws_rl_env_environment.py # Core RL environment (reset/step)
 
 
 
 
 
315
  │ └── services/
316
  │ ├── aws_backend.py # MiniStack command executor
317
  │ ├── task_grader.py # Grading engine with reward shaping
318
  │ ├── curriculum.py # Curriculum learning manager
319
- │ ├── episode_tracker.py # Per-episode step history
320
  │ ├── resource_verifier.py # Ground-truth state verification
321
  │ ├── environment_designer.py # Setup provisioning for SRE tasks
 
 
 
 
322
  │ └── tasks/
323
- │ ├── warmup.yaml # 6 listing tasks
324
- │ ├── beginner.yaml # 5 creation tasks
325
- │ ├── intermediate.yaml # 4 multi-step tasks
326
- │ ├── advanced.yaml # 3 architecture tasks
327
- ── expert.yaml # 3 SRE incident tasks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  ├── aws_infra/ # Vendored MiniStack emulator
329
  │ └── aws_infra/
330
  │ ├── app.py # MiniStack ASGI router
@@ -351,19 +556,7 @@ make docker-health # Health check
351
 
352
  ### Local (without Docker)
353
 
354
- ```bash
355
- # Terminal 1: Start MiniStack
356
- pip install ministack
357
- ministack # port 4566
358
-
359
- # Terminal 2: Start RL server
360
- export AWS_ENDPOINT_URL=http://localhost:4566
361
- export AWS_ACCESS_KEY_ID=test
362
- export AWS_SECRET_ACCESS_KEY=test
363
- uv run uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
364
- ```
365
-
366
- Or use the combined Makefile target:
367
 
368
  ```bash
369
  make run # Starts MiniStack + server
@@ -388,9 +581,9 @@ make openenv-push # Push to HuggingFace Spaces
388
  | `AWS_SECRET_ACCESS_KEY` | `test` | AWS credentials (any value works) |
389
  | `AWS_DEFAULT_REGION` | `us-east-1` | AWS region |
390
  | `MAX_STEPS` | `15` | Max steps per episode |
391
- | `API_BASE_URL` | -- | LLM API endpoint (for inference.py) |
392
- | `MODEL_NAME` | -- | LLM model name (for inference.py) |
393
- | `HF_TOKEN` | -- | HuggingFace token (for inference.py) |
394
  | `TEMPERATURE` | `0.7` | LLM sampling temperature |
395
 
396
  ---
@@ -413,3 +606,13 @@ curriculum.get_stats()
413
  # "avg_reward_last_10": 0.65
414
  # }
415
  ```
 
 
 
 
 
 
 
 
 
 
 
11
  - openenv
12
  ---
13
 
14
+ # AWS Cloud CLI and SRE Reinforcement Learning Environment
15
 
16
+ A **OpenEnv** RL environment** for training AI agents on real-world AWS cloud operations. The agent sends AWS CLI commands as actions, receives structured observations, and progresses through a **curriculum of 120+ tasks** across 5 difficulty tiers — from basic listing to SRE incident response and security posture auditing.
17
 
18
+ The agents interact with a **real-world AWS Shell simulator** — a vendored MiniStack emulator (34 AWS services, in-memory, zero-cost) inside the same Docker container. The response of every executed command is the same as production AWS. The grading system evaluates rewards and penalties based on the **actual AWS infrastructure state** instead of static metrics. No AWS account needed.
19
 
20
+ > **[Try the Playground](https://sizzing-aws-rl-env.hf.space/web)** | **[API Docs](https://sizzing-aws-rl-env.hf.space/docs)** | **[Hugging Face Space](https://huggingface.co/spaces/Sizzing/aws_rl_env)**
21
 
22
+
23
+ ## Task Tiers (100+ Tasks)
24
+
25
+ ### Warmup20 tasks
26
+ > List resourcessingle read-only commands
27
+
28
+ - Run one AWS CLI command to list or describe a resource type
29
+ - S3 buckets, EC2 instances, DynamoDB tables, Lambda functions, RDS, EBS volumes
30
+ - Graded by **command_match** — checks operation + service pair
31
+ - No setup required, no state mutations
32
+
33
+ ### Beginner — 20 tasks
34
+ > Create single resources with verification
35
+
36
+ - Create an S3 bucket, DynamoDB table, SQS queue, or Lambda function
37
+ - Graded by **resource_creation** — verifies the exact resource exists in the AWS Infrastructure Simulator
38
+ - Introduces resource name validation — "my-bucket-2" won't satisfy a check for "my-bucket"
39
+ - First tier where idempotency bonus (+0.02) can be earned
40
+
41
+ ### Intermediate — 20 tasks
42
+ > Multi-step workflows — create, configure, connect
43
+
44
+ - Ordered sequences: create a bucket then enable versioning, create a table then add an item
45
+ - Graded by **multi_step** — validates each step was completed in order
46
+ - Chaos injection begins at **10% probability** — resources may be silently mutated mid-episode
47
+ - Rollback penalty (-0.1) starts to matter with multi-step create/delete patterns
48
+
49
+ ### Advanced — 20 tasks
50
+ > Cross-service architectures spanning multiple AWS services
51
+
52
+ - Wire Lambda to SQS, configure API Gateway with integrations, build event-driven pipelines
53
+ - Graded by **multi_step + services** — all required services must be configured
54
+ - Chaos injection escalates to **20% probability** — DynamoDB throughput, Lambda configs may change
55
+ - Hints cost more: 3 hints = only 61% of max reward (0.85³ decay)
56
+
57
+ ### Expert — 20 tasks
58
+ > SRE incidents, drift detection & security posture audits
59
+
60
+ - Fix overly permissive S3 policies, replace broad IAM inline policies, repair broken infrastructure
61
+ - Graded by **state_checks** — actual CLI commands run against MiniStack at grading time
62
+ - Chaos injection at **30% probability** — maximum perturbation frequency
63
+ - **6 drift detection tasks** — correct infra is provisioned, then 2-3 random mutations applied from a pool
64
+ - Agent must audit environment, discover which resources drifted, and fix only those
65
+ - Drift is randomized per episode — prevents memorization of fix sequences
66
+
67
+ ---
68
+
69
+ ## Features
70
+
71
+ ### 1. Curriculum & Training
72
+
73
+ Adaptive learning system that tracks mastery and selects optimal tasks.
74
+
75
+ #### Progressive Difficulty
76
+ - **What:** The environment organizes 120+ tasks across 5 tiers: Warmup, Beginner, Intermediate, Advanced, and Expert. Tasks progress from simple listing operations to complex SRE incident response and drift detection scenarios.
77
+ - **Why:** Prevents the agent from being overwhelmed by complex tasks early on. Scaffolded difficulty ensures the agent builds foundational skills before tackling multi-service architectures.
78
+ - **How:** The `CurriculumManager` maintains per-agent tier state. Promotion requires meeting a minimum episode count and success rate threshold. A fast-track mechanism allows agents scoring 90%+ on 3 consecutive episodes to skip the minimum wait.
79
+ - **Metrics:** 5 Difficulty Tiers | 120+ Total Tasks | 90% Fast-track Threshold
80
+
81
+ #### Mastery Tracking
82
+ - **What:** Each task independently tracks the agent's performance using a weighted success rate over a sliding window. Tasks "graduate" when performance exceeds the mastery threshold consistently.
83
+ - **Why:** Ensures the agent truly masters a skill before moving on. Prevents lucky single completions from being treated as mastery. Un-graduation catches skill decay.
84
+ - **How:** A `mastery_window` of 10 episodes and `mastery_threshold` of 0.7 (70% success). Minimum 3 attempts required before graduation. Recent results are weighted more heavily using exponential decay (factor 0.85). Graduated tasks can un-graduate if performance drops.
85
+ - **Metrics:** 70% Mastery Threshold | 10 Window Size | 0.85 Decay Factor
86
+
87
+ #### Spaced Repetition
88
+ - **What:** Graduated tasks don't disappear — they resurface at exponentially increasing intervals (3, 6, 12, 24, 48 episodes) for re-testing, earning a +30 priority bonus when due.
89
+ - **Why:** Prevents catastrophic forgetting. The agent must retain skills even as it learns new ones. Exponential spacing is the most efficient retention schedule, borrowed from cognitive science.
90
+ - **How:** Each task tracks a `spaced_rep_interval` starting at 3 episodes. When re-tested and passes, the interval doubles (up to 48). If it fails, the interval resets. `_is_spaced_rep_due()` checks elapsed episodes against the interval.
91
+ - **Metrics:** +30 Spaced Rep Bonus | 3→48 Interval Range | 2x Interval Growth
92
+
93
+ #### Priority Selection
94
+ - **What:** Tasks are ranked by a composite score combining novelty, weakness, spaced repetition due dates, and recency. The highest-scoring task is selected for each episode.
95
+ - **Why:** Optimizes the training curriculum by ensuring the agent explores new tasks, practices weak areas, revisits graduated skills, and maintains variety — all balanced automatically.
96
+ - **How:** `score = novelty_bonus (+100 if never attempted) + weakness_weight (+50 × (1 - success_rate)) + spaced_rep_bonus (+30 if due) - recency_penalty (-20 if attempted in last 2 episodes)`. Uses exponential decay (0.85) to emphasize recent performance.
97
+ - **Metrics:** +100 Novelty Bonus | +50 Max Weakness Weight | -20 Recency Penalty
98
+
99
+ #### Tier Progression
100
+ - **What:** Agents advance through tiers via standard promotion (minimum episodes + success rate) or fast-track (3 consecutive high-scoring episodes). Tiers gate access to increasingly complex task pools.
101
+ - **Why:** Provides structure to the learning process. Standard promotion ensures sufficient exposure; fast-track rewards agents that demonstrate immediate competence.
102
+ - **How:** Standard: complete `min_episodes` at current tier with `success_rate >= advance_rate`. Fast-track: 3 consecutive episodes at >= 90% success bypasses the minimum episode requirement. Un-promotion is not supported — agents cannot drop tiers.
103
+ - **Metrics:** 3 Fast-track Streak | 90% Fast-track Rate | 5 Total Tiers
104
+
105
+ ### 2. Reward Shaping
106
+
107
+ Dense reward signals that encourage operational discipline and real progress.
108
+
109
+ ```
110
+ if task_achieved: reward = 1.0
111
+ else:
112
+ reward = partial_progress * 0.8 # base: scaled to [0.0, 0.8]
113
+ if progress_increased: reward += 0.1 # dense signal for advancing
114
+ if command_failed: reward *= 0.5 # penalty for errors
115
+ reward = clamp(reward, 0.0, 0.99) # never 1.0 without completion
116
+ reward *= 0.85 ** hints_used # hint decay
117
+ if survived_chaos: reward *= 1.05 # chaos survival bonus
118
+ ```
119
+
120
+ #### Rollback Penalty & Idempotency Bonus
121
+ - **What:** Detects create→delete pairs on the same resource (rollbacks) and penalizes them (-0.1 each). Rewards graceful "already exists" handling (+0.02) where the agent retries idempotently.
122
+ - **Why:** First RL environment rewarding operational discipline. In production, create-then-delete cycles are wasteful. Handling "already exists" gracefully is a sign of robust automation.
123
+ - **How:** `EpisodeTracker.detect_rollbacks()` scans command history for paired create/delete operations on the same resource. Idempotency detection looks for commands that fail with "already exists" patterns (BucketAlreadyExists, ResourceInUseException, etc.) followed by successful continuation.
124
+ - **Metrics:** -0.1 Rollback Penalty | +0.02 Idempotency Bonus | Per-pair Detection
125
+
126
+ #### Shaped Reward System
127
+ - **What:** Rewards are carefully shaped: 1.0 for full completion, 0.0-0.8 for partial progress, +0.1 progress bonus for advancing, ×0.5 for failures, capped at 0.99 without completion. Chaos bonus (×1.05) and hint decay (×0.85^n) layer on top.
128
+ - **Why:** Dense reward signal prevents sparse-reward stagnation. The agent gets meaningful feedback on every step, not just at episode end. Capping at 0.99 ensures only real completion earns full credit.
129
+ - **How:** `TaskGrader` dispatches to 5 strategies by tier: `command_match` (warmup), `resource_creation` (beginner), `multi_step` (intermediate), `multi_step+services` (advanced), and `state_checks` (expert). Each returns `partial_progress` which is converted to reward with bonuses/penalties applied.
130
+ - **Metrics:** 1.0 Max Reward | 0.99 Progress Cap | ×1.05 Chaos Bonus
131
+
132
+ #### Multi-Strategy Grading
133
+ - **What:** Five distinct grading strategies, one per tier: `command_match` checks operation+service pairs, `resource_creation` verifies resources exist, `multi_step` validates ordered sequences, advanced adds service coverage, and expert runs `state_checks` against MiniStack.
134
+ - **Why:** Each tier tests fundamentally different skills. A single grading strategy would either be too lenient for beginners or miss the nuance needed for expert SRE tasks.
135
+ - **How:** `TaskGrader.grade()` dispatches based on the task's `grading_strategy` field. Each strategy returns a `GradeResult` with `partial_progress` (0.0-1.0), `completed` flag, and details. Grading is deterministic and fully automated.
136
+ - **Metrics:** 5 Grading Strategies | 100% Automated | Per-tier Selection
137
+
138
+ ### 3. Resilience & Adaptability
139
+
140
+ Features that test agent robustness under unpredictable conditions.
141
+
142
+ #### Progressive Hint System
143
+ - **What:** A 3-level hint system where each level reveals progressively more detail: Level 1 names the AWS services, Level 2 describes the operations, Level 3 gives near-complete command structure. Each hint reduces the final reward by ×0.85.
144
+ - **Why:** Creates an information-reward tradeoff unique in RL. The agent learns to wean off hints over time — initially relying on them for unfamiliar tasks, then solving independently for maximum reward. From GRPO perspective, it creates a natural exploration/exploitation axis within a single episode.
145
+ - **How:** Agent issues special command `aws help --task-hint` as its action (intercepted before reaching MiniStack). Hints auto-generated from `SuccessCriteria` fields (services, steps, operations). Reward decay: `final_reward *= 0.85 ^ hints_used` — 0 hints: 1.0×, 1 hint: 0.85×, 2 hints: 0.72×, 3 hints: 0.61×. Curriculum naturally penalizes hint-dependent agents: lower rewards → slower graduation.
146
+ - **Metrics:** 3 Hint Levels | ×0.85 Decay Per Hint | ~61% Reward with 3 Hints
147
+
148
+ #### Chaos Injection Engine
149
+ - **What:** Silently mutates AWS resource state mid-episode to test agent resilience. Perturbations are scoped to services the current task uses. If the agent completes despite chaos, it earns a ×1.05 bonus.
150
+ - **Why:** Tests whether the agent can handle unexpected state changes — a critical SRE skill. Prevents brittle memorization of exact command sequences. Probability scales with tier difficulty.
151
+ - **How:** `ChaosEngine` selects perturbation templates specific to the services in use (S3 policy changes, DynamoDB throughput modifications, Lambda config alterations, etc.). Resource names are extracted from successful commands via regex. Chaos probability: 10% (Intermediate), 20% (Advanced), 30% (Expert).
152
+ - **Metrics:** ×1.05 Chaos Survival Bonus | 10-30% Probability by Tier | 5 Service Templates
153
+
154
+ #### Drift Detection Tasks
155
+ - **What:** 6 expert-tier tasks where infrastructure is provisioned correctly, then 2-3 random mutations are applied from a pool. The agent must audit, discover drifted resources, and fix only those — without knowing which drifted.
156
+ - **Why:** Randomized per episode, preventing memorization. Tests real SRE audit skills: the agent must reason about desired vs. actual state, not just follow a script.
157
+ - **How:** `DriftEngine` randomly selects 2-3 mutations from a task's `possible_drifts` pool and applies them after setup. Each task defines a `desired_state_spec` (natural language) and `state_checks` (ground truth CLI commands). Examples: S3 versioning/encryption drift, DynamoDB throughput changes, SNS subscription modifications.
158
+ - **Metrics:** 6 Drift Tasks | 2-3 Mutations Per Episode | Random Selection Per Run
159
+
160
+ ### 4. Security Posture Audit
161
+
162
+ Tests *reasoning about configuration state* — the agent must READ and ANALYZE existing infrastructure, not just build things. Unlike SRE tasks (broken functionality), these have *working but insecure* infrastructure.
163
+
164
+ #### Public S3 Bucket Lockdown
165
+ - **What:** A pre-provisioned S3 bucket "public-assets" has an overly permissive bucket policy granting access to any principal (`Principal: *`). The agent must read the policy, identify the vulnerability, and replace it with a restrictive policy allowing only a specific IAM role.
166
+ - **Why:** Tests security reasoning — the infrastructure is functional but insecure. Unlike SRE tasks where things are broken, here the agent must understand what "correct" security posture looks like and make the right judgment call.
167
+ - **How:** Setup creates the bucket with a wide-open policy. State checks verify the new policy denies `Principal: *` and only allows the `app-role` principal to perform `s3:GetObject`.
168
+ - **Metrics:** S3 Target Service | Policy Attack Surface | Expert Tier
169
+
170
+ #### IAM Least Privilege
171
+ - **What:** An IAM role "app-role" has an inline policy with `Action: *` and `Resource: *` — full admin access. The agent must replace it with a least-privilege policy allowing only `dynamodb:GetItem` and `dynamodb:PutItem` on the users table.
172
+ - **Why:** IAM misconfiguration is the #1 cloud security risk. This task tests whether the agent understands permission scoping and can reason about what access an application actually needs vs. what it currently has.
173
+ - **How:** Setup creates the role with a wildcard policy. The agent must craft a replacement policy document with specific actions and resource ARN. State checks verify the policy document matches the expected least-privilege permissions.
174
+ - **Metrics:** IAM Target Service | 2 Allowed Actions | Expert Tier
175
+
176
+ #### Secrets in Lambda Environment
177
+ - **What:** A Lambda function "data-processor" has a database password stored as a plaintext environment variable (`DB_PASSWORD=hunter2`). The agent must create a secret in Secrets Manager, update the Lambda to reference the secret ARN, and remove the plaintext variable.
178
+ - **Why:** Plaintext secrets in environment variables is a critical security anti-pattern. This task combines multiple services (Lambda + Secrets Manager) and tests the agent's ability to perform a safe credential rotation without breaking the function.
179
+ - **How:** Setup creates the Lambda with the plaintext env var. The agent must: (1) create a secret in Secrets Manager, (2) add `SECRET_ARN` env var to Lambda, (3) remove `DB_PASSWORD`. State checks verify all three conditions.
180
+ - **Metrics:** 2 Services Involved | 3 Required Steps | Expert Tier
181
+
182
+ ### 5. Anti-Reward-Hacking (8 Defense Layers)
183
+
184
+ 8 defense layers that prevent the agent from gaming the reward system.
185
+
186
+ #### 1. Ground-Truth Verification via MiniStack
187
+ - **What:** The grader never trusts agent command output. It independently queries MiniStack (the simulated AWS backend) to verify resource state for 20+ services. Even if the agent crafts fake-looking stdout, the grader checks actual state.
188
+ - **Why:** Prevents reward hacking through output fabrication. The agent cannot game the system by producing convincing but fake CLI output — ground truth is always checked server-side.
189
+ - **How:** `ResourceVerifier` has per-service verification methods that query MiniStack directly. For expert tasks, `StateCheck` assertions run actual AWS CLI commands against MiniStack at grading time, checking either `output_contains` (substring) or `json_path` extraction with expected values.
190
+ - **Metrics:** 20+ Verified Services | 100% Server-side | 0 Agent Visibility
191
+
192
+ #### 2. Deduplication
193
+ - **What:** `EpisodeTracker.has_executed_operation()` tracks which (operation, resource) pairs have been credited. Running the same successful command twice does NOT increase `partial_progress`. Progress can only increase, never re-earn.
194
+ - **Why:** Prevents the agent from gaming the reward system by repeating the same command to accumulate credit. Each unique operation earns credit exactly once.
195
+ - **How:** `credit_operation()` records each (operation, resource) pair. Before granting credit, `is_operation_already_credited()` checks if this exact pair was already rewarded. The check is deterministic and happens at grading time.
196
+ - **Metrics:** 1x Credit Per Operation | Exact Match Type | (op, res) Tracking Granularity
197
+
198
+ #### 3. Grader Invisibility
199
+ - **What:** The verification commands run by `ResourceVerifier` are NOT returned in the observation's `command_output`. They happen server-side during grading. The agent cannot observe or mimic them.
200
+ - **Why:** If the agent could see which verification commands the grader runs, it could learn to craft fake outputs that match expected patterns. Keeping grader logic invisible forces the agent to actually perform the task.
201
+ - **How:** `ResourceVerifier` executes AWS CLI commands against MiniStack in a separate execution context. Results are consumed internally by the grading pipeline. The observation returned to the agent only contains output from the agent's own commands.
202
+ - **Metrics:** 0 Grader Cmds Exposed | Server Execution Context | 20+ Hidden Verifications
203
+
204
+ #### 4. Command Allowlisting
205
+ - **What:** Only commands starting with `aws` are executed. Any attempt to run shell commands, pipe to other tools, use redirects, or escape the sandbox is rejected with `success=False`.
206
+ - **Why:** Prevents the agent from escaping the AWS CLI sandbox. Without this, the agent could potentially execute arbitrary shell commands, access the filesystem, or interfere with the environment.
207
+ - **How:** The environment's `step()` method validates the command before execution. Commands not starting with `aws` are immediately rejected.
208
+ - **Metrics:** `aws *` Allowed Pattern | 0 Shell Access | Instant Rejection
209
+
210
+ #### 5. No Verification Reward
211
+ - **What:** If the agent runs a command that matches a `state_check` command exactly (e.g., `aws s3api get-bucket-versioning --bucket app-config-store`), it gets no progress credit. Progress is only earned through `steps` operations (mutating commands), not read-only queries.
212
+ - **Why:** Prevents the agent from gaming progress by running the same verification commands the grader uses. The agent can run read commands to understand state, but only mutation commands earn progress.
213
+ - **How:** During grading, the `TaskGrader` checks if the agent's command matches any `state_check` command. Matching commands are flagged as verification-only and excluded from credit. Only commands matching `steps` operations (create, put, update, delete) earn `partial_progress`.
214
+ - **Metrics:** 0 Credit for Reads | Mutate Rewarded Actions | Exact Match Detection
215
+
216
+ #### 6. Monotonic Progress
217
+ - **What:** `partial_progress` can only increase within an episode. It is clamped to [0.0, 0.99] — reaching 1.0 requires actual task completion. The agent cannot lose progress, but also cannot re-earn it.
218
+ - **Why:** Prevents cycling strategies where the agent creates and destroys resources repeatedly. Combined with deduplication, this ensures steady forward progress.
219
+ - **How:** In `TaskGrader`, `previous_progress` tracks the highest progress seen. New progress is always `max(previous, current)`. Reward is clamped at 0.99 for partial completion, reserving 1.0 exclusively for verified full completion.
220
+ - **Metrics:** 0.99 Max Without Completion | 1.0 Requires Full Completion | max() Progress Function
221
+
222
+ #### 7. Resource Name Validation
223
+ - **What:** For `resource_exists` checks, the verifier matches the exact resource name, not just any resource of that type. Creating "my-test-bucket-2" doesn't satisfy a check for "my-test-bucket".
224
+ - **Why:** Prevents the agent from creating arbitrarily named resources to game the verification system. Forces precise execution of the task requirements.
225
+ - **How:** `ResourceVerifier`'s per-service methods (`verify_s3_bucket`, `verify_dynamodb_table`, etc.) compare against the exact expected resource name from the task definition. Each of the 20+ supported services has its own verification logic.
226
+ - **Metrics:** Exact Name Matching | 20+ Verified Services | 0 Partial Matches
227
+
228
+ #### 8. State Checks Verify Final State
229
+ - **What:** For expert SRE tasks, `state_checks` run actual AWS CLI commands against MiniStack at grading time. The grader verifies the final infrastructure state — not the commands the agent ran.
230
+ - **Why:** The agent cannot fake the state. MiniStack is the ground truth. This decouples "what the agent did" from "what was actually achieved", making reward hacking extremely difficult.
231
+ - **How:** Each expert task defines `state_checks` with command + assertion pairs. Assertions support `output_contains` (substring match on CLI output) and `json_path + expected` (JSON extraction). The grader runs these checks against the live MiniStack state independently of the agent.
232
+ - **Metrics:** CLI Verification Method | 2 Assertion Types | Live State Source
233
+
234
+ ---
235
+
236
+ ## Supported AWS Services (34)
237
+
238
+ | Category | Services |
239
+ |----------|----------|
240
+ | **Storage & DB** | S3, DynamoDB, RDS, ElastiCache, EFS |
241
+ | **Compute** | Lambda, ECS, EC2, Step Functions |
242
+ | **Messaging** | SQS, SNS, Kinesis, EventBridge, Firehose |
243
+ | **API** | API Gateway v1/v2, ALB/ELBv2 |
244
+ | **Security** | IAM, STS, Cognito, ACM, WAF v2, Secrets Manager |
245
+ | **Monitoring** | CloudWatch, CloudWatch Logs, SSM |
246
+ | **Infrastructure** | CloudFormation, Route53 |
247
+ | **Other** | SES, Athena, Glue, EMR |
248
+
249
+ ---
250
 
251
  ## Quick Start
252
 
 
269
  result = env.step(AwsRlAction(command="aws s3 ls"))
270
  ```
271
 
272
+ WebSocket API:
273
+
274
+ ```python
275
+ import websockets, json
276
+
277
+ async with websockets.connect("wss://sizzing-aws-rl-env.hf.space/ws") as ws:
278
+ await ws.send(json.dumps({"type": "reset"}))
279
+ obs = json.loads(await ws.recv())
280
+
281
+ await ws.send(json.dumps({"type": "step", "data": {"command": "aws s3 ls"}}))
282
+ obs = json.loads(await ws.recv())
283
+ ```
284
+
285
  ---
286
 
287
  ## Architecture
288
 
289
  ```
290
+ ┌─────────────────────────────────────────────────────────┐
291
+ │ Docker Container
292
+
293
  │ ┌─────────────────────┐ ┌────────────────────┐ │
294
+ │ │ FastAPI RL Server │ │ AWS Simulator │ │
295
+ │ │ (port 8000) │─────>│ (port 4566) │ │
296
+ │ │ │ │ 34 AWS services │ │
297
+ │ │ - Environment │ │ In-memory state │ │
298
+ │ │ - Curriculum │ │ Reset API │ │
299
+ │ │ - Grading Engine │ │ (Ministack) │ │
300
+ │ │ - Episode Tracker │ │ │ │
301
+ │ │ - Hint Provider │ │ │ │
302
  │ └─────────────────────┘ └────────────────────┘ │
303
+ │ ^ ^
304
+ │ | OpenEnv HTTP/WS | AWS CLI calls
305
+ └──────────┼─────────────────────────────┼────────────────┘
306
  | |
307
+ RL Agent (client, External) (internal only)
308
  ```
309
 
310
  ### Episode Lifecycle
311
 
312
+ 1. **`reset()`** Wipes AWS Infracture state, selects next task from curriculum, provisions setup commands (if any), returns initial observation
313
+ 2. **`step(action)`** Validates command (`aws` prefix only), executes against MiniStack, records in tracker, grades with shaped reward, returns observation
314
+ 3. **Hint request** Agent sends `aws help --task-hint` to get a progressive hint (costs reward)
315
+ 4. **Terminates** when `task_achieved == True` or max steps reached
316
 
317
  ---
318
 
319
+
320
  ## Core Classes
321
 
322
  ### `AwsRlEnvironment`
323
 
324
+ [server/aws_rl_env_environment.py](server/aws_rl_env_environment.py) Implements the OpenEnv `Environment` interface. Orchestrates all services.
325
 
326
  | Method | Description |
327
  |--------|-------------|
328
  | `reset()` | Wipe infra, select task, provision setup, return initial observation |
329
+ | `step(action)` | Execute command (or intercept hint request), grade, update curriculum, return observation |
330
 
331
  ### `Curriculum`
332
 
333
+ [server/services/curriculum.py](server/services/curriculum.py) Priority-queue-based task selection with progressive difficulty.
334
 
335
  Selects the next task using a **max-heap scored by**:
336
 
337
  ```
338
  score = (
339
  novelty_bonus # +100 if never attempted (explore first)
340
+ + weakness_weight # +50 * (1 - task_success_rate) worse tasks get higher priority
341
  + spaced_rep_bonus # +30 if graduated task is "due" for re-test
342
  - recency_penalty # -20 if attempted in last 2 episodes (ensure variety)
343
  )
344
  ```
345
 
 
 
 
 
 
 
 
 
 
346
  ### `TaskGrader`
347
 
348
+ [server/services/task_grader.py](server/services/task_grader.py) Evaluates task completion using a dispatcher pattern. Rewards are always in [0.0, 1.0].
349
 
350
  **Grading strategies by tier:**
351
 
 
357
  | Advanced | Multi-step + services | All steps completed AND all required services touched |
358
  | Expert | State checks | Runs arbitrary AWS CLI commands to assert end-state (ground truth) |
359
 
360
+ ### `HintProvider`
361
 
362
+ [server/services/hint_provider.py](server/services/hint_provider.py) — Generates progressive hints from `SuccessCriteria` fields.
363
+
364
+ | Hint Level | What it reveals | Example |
365
+ |-----------|----------------|---------|
366
+ | Level 1 | Which AWS services to use | "You'll need IAM and Lambda" |
367
+ | Level 2 | Which operations | "Start with create-role, then put-role-policy" |
368
+ | Level 3 | Near-complete command structure | "Use: aws iam create-role --role-name ..." |
 
369
 
370
  ### `EpisodeTracker`
371
 
372
+ [server/services/episode_tracker.py](server/services/episode_tracker.py) Maintains per-episode step history. Parses AWS CLI commands to extract (service, operation, resource) tuples. Tracks credited operations for deduplication, monotonic progress, and hint usage.
373
 
374
  ### `ResourceVerifier`
375
 
376
+ [server/services/resource_verifier.py](server/services/resource_verifier.py) Queries MiniStack directly to verify ground-truth resource state. Service-specific checks for S3, DynamoDB, Lambda, SQS, SNS, IAM, Secrets Manager, and API Gateway. Also evaluates `StateCheck` assertions (substring match, JSON path extraction).
377
 
378
  ### `EnvironmentDesigner`
379
 
380
+ [server/services/environment_designer.py](server/services/environment_designer.py) Provisions initial AWS state via setup commands before the agent acts. Used by SRE/expert tasks to create broken or insecure infrastructure the agent must fix.
381
 
382
  ### `AwsBackend`
383
 
384
+ [server/services/aws_backend.py](server/services/aws_backend.py) Executes AWS CLI commands against MiniStack (`AWS_ENDPOINT_URL=http://localhost:4566`). Provides `reset_environment()` via MiniStack's `/_ministack/reset` endpoint.
385
 
386
  ### `AwsRlEnv` (Client)
387
 
388
+ [client.py](client.py) OpenEnv HTTP/WebSocket client. Wraps `reset()` and `step()` calls to the server.
389
 
390
  ---
391
 
392
  ## Data Models
393
 
394
+ [models.py](models.py) All Pydantic models and type aliases.
395
 
396
+ ### Action
397
 
398
  ```python
399
  class AwsRlAction(Action):
400
  command: str # AWS CLI command, e.g. "aws s3 ls"
401
+ ```
402
 
403
+ ### Observation
404
+
405
+ ```python
406
  class AwsRlObservation(Observation):
407
  episode_id: EpisodeID
408
  step_count: StepCount
409
  command_success: bool
410
  command_output: str # stdout from AWS CLI
411
  error: str # stderr if failed
412
+ task: TaskInfo | None # masked task definition (hides success criteria)
 
413
  task_achieved: bool
414
+ partial_progress: float # current task progress in [0.0, 1.0]
415
+ hints_used: int # number of hints requested this episode
416
+ hint_text: str # most recent hint text (if any)
417
+ ```
418
+
419
+ ### Environment State
420
+
421
+ ```python
422
+ class AwsRlState(State):
423
+ current_task: Task | None # full task assigned for the episode
424
+ tracker: TrackerState # episode tracker snapshot
425
+ infra_state: dict # AWS infrastructure state keyed by service name
426
+ chaos_occurred: bool # whether chaos was injected this episode
427
+ current_tier: str # agent's current difficulty tier
428
+
429
+ class TrackerState:
430
+ step_count: int # steps taken this episode
431
+ hints_used: int # hints requested this episode
432
+ progress: float # current partial progress [0.0, 1.0]
433
+ commands_executed: list[str] # commands executed this episode
434
+ credited_operations: list[str] # (operation, resource) pairs that earned credit
435
  ```
436
 
437
  ### Task Definitions
438
 
439
  ```python
440
  class Task:
441
+ task_id: TaskID
442
  difficulty: TaskDifficulty # warmup | beginner | intermediate | advanced | expert
443
  description: str # human-readable goal
444
  success_criteria: SuccessCriteria
445
+ setup_commands: list[SetupCommand] # pre-provision for SRE tasks
446
+ desired_state_spec: str | None # natural-language desired end state (drift tasks)
447
+ possible_drifts: list[SetupCommand] # pool of mutations for DriftEngine
448
+
449
+ class TaskInfo:
450
+ """Agent-visible subset of Task — masks success_criteria, setup_commands, and possible_drifts."""
451
+ task_id: TaskID
452
+ difficulty: TaskDifficulty
453
+ description: str
454
+ desired_state_spec: str | None
455
 
456
  class SuccessCriteria:
457
  command_contains: str | None # warmup/beginner
 
471
  mastery_window: int # sliding window size (default: 10)
472
  mastery_threshold: float # per-task graduation threshold (default: 0.7)
473
  fast_track_rate: float # early promotion threshold (default: 0.9)
474
+ chaos_probability: float # probability of chaos injection per step (default: 0.0)
475
 
476
  class SpacedRepState:
477
  interval: int # episodes until next re-test (3 -> 48)
 
480
 
481
  ---
482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  ## Project Structure
484
 
485
  ```
 
488
  ├── models.py # Pydantic data models & type aliases
489
  ├── client.py # AwsRlEnv OpenEnv client
490
  ├── inference.py # LLM agent inference script
491
+ ├── inference-complete.py # Full inference pipeline with curriculum
492
  ├── server/
493
  │ ├── app.py # FastAPI application + web UI endpoints
494
  │ ├── aws_rl_env_environment.py # Core RL environment (reset/step)
495
+ │ ├── templates/
496
+ │ │ └── index.html # Web playground UI
497
+ │ ├── static/
498
+ │ │ ├── css/style.css # Playground styles
499
+ │ │ └── js/app.js # Playground frontend logic
500
  │ └── services/
501
  │ ├── aws_backend.py # MiniStack command executor
502
  │ ├── task_grader.py # Grading engine with reward shaping
503
  │ ├── curriculum.py # Curriculum learning manager
504
+ │ ├── episode_tracker.py # Per-episode step history & hints
505
  │ ├── resource_verifier.py # Ground-truth state verification
506
  │ ├── environment_designer.py # Setup provisioning for SRE tasks
507
+ │ ├── hint_provider.py # Progressive hint generator
508
+ │ ├── chaos_engine.py # Chaos injection engine
509
+ │ ├── drift_engine.py # Drift detection engine
510
+ │ ├── task_solutions.py # Reference solutions for tasks
511
  │ └── tasks/
512
+ │ ├── warmup.yaml # 20 listing tasks
513
+ │ ├── beginner.yaml # 20 creation tasks
514
+ │ ├── intermediate.yaml # 20 multi-step tasks
515
+ │ ├── advanced.yaml # 20 architecture tasks
516
+ ── expert.yaml # 20 SRE/security tasks
517
+ │ └── drift.yaml # Drift detection tasks
518
+ ├── tests/ # Unit tests for core services
519
+ │ ├── test_aws_rl_env_environment.py
520
+ │ ├── test_drift_engine.py
521
+ │ ├── test_environment_designer.py
522
+ │ ├── test_episode_tracker.py
523
+ │ ├── test_hint_provider.py
524
+ │ ├── test_resource_verifier.py
525
+ │ └── test_task_grader.py
526
+ ├── tests_tasks/ # Integration tests per task tier
527
+ │ ├── test_warmup_tasks.py
528
+ │ ├── test_beginner_tasks.py
529
+ │ ├── test_intermediate_tasks.py
530
+ │ ├── test_advanced_tasks.py
531
+ │ ├── test_expert_tasks.py
532
+ │ └── test_drift_tasks.py
533
  ├── aws_infra/ # Vendored MiniStack emulator
534
  │ └── aws_infra/
535
  │ ├── app.py # MiniStack ASGI router
 
556
 
557
  ### Local (without Docker)
558
 
559
+ Use the combined Makefile target:
 
 
 
 
 
 
 
 
 
 
 
 
560
 
561
  ```bash
562
  make run # Starts MiniStack + server
 
581
  | `AWS_SECRET_ACCESS_KEY` | `test` | AWS credentials (any value works) |
582
  | `AWS_DEFAULT_REGION` | `us-east-1` | AWS region |
583
  | `MAX_STEPS` | `15` | Max steps per episode |
584
+ | `API_BASE_URL` | | LLM API endpoint (for inference.py) |
585
+ | `MODEL_NAME` | | LLM model name (for inference.py) |
586
+ | `HF_TOKEN` | | HuggingFace token (for inference.py) |
587
  | `TEMPERATURE` | `0.7` | LLM sampling temperature |
588
 
589
  ---
 
606
  # "avg_reward_last_10": 0.65
607
  # }
608
  ```
609
+
610
+ ---
611
+
612
+ ## Links
613
+
614
+ - **GitHub**: [github.com/udaykiranpadhy/aws-rl-env](https://github.com/udaykiranpadhy/aws-rl-env)
615
+ - **Hugging Face Space**: [huggingface.co/spaces/Sizzing/aws_rl_env](https://huggingface.co/spaces/Sizzing/aws_rl_env)
616
+ - **API Reference**: [/docs](https://sizzing-aws-rl-env.hf.space/docs)
617
+ - **ReDoc**: [/redoc](https://sizzing-aws-rl-env.hf.space/redoc)
618
+ - **Portfolio**: [portfolio.udaykp.dev](https://portfolio.udaykp.dev)
__init__.py CHANGED
@@ -6,8 +6,13 @@
6
 
7
  """Aws Rl Env Environment."""
8
 
9
- from .client import AwsRlEnv
10
- from .models import AwsRlAction, AwsRlObservation
 
 
 
 
 
11
 
12
  __all__ = [
13
  "AwsRlAction",
 
6
 
7
  """Aws Rl Env Environment."""
8
 
9
+ try:
10
+ from .client import AwsRlEnv
11
+ from .models import AwsRlAction, AwsRlObservation
12
+ except ImportError:
13
+ # When imported directly (e.g. by pytest from rootdir) rather than as
14
+ # part of the aws_rl_env package, relative imports are unavailable.
15
+ pass
16
 
17
  __all__ = [
18
  "AwsRlAction",
aws_infra/aws_infra/app.py CHANGED
@@ -235,6 +235,29 @@ async def app(scope, receive, send):
235
  json.dumps({"reset": "ok"}).encode())
236
  return
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  if path == "/_ministack/config" and method == "POST":
239
  _ALLOWED_CONFIG_KEYS = {
240
  "athena.ATHENA_ENGINE", "athena.ATHENA_DATA_DIR",
@@ -570,6 +593,105 @@ def _run_init_scripts():
570
  logger.error("Failed to execute init script %s: %s", script_path, e)
571
 
572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  def _reset_all_state():
574
  """Wipe all in-memory state across every service module, and persisted files if enabled."""
575
  import shutil
 
235
  json.dumps({"reset": "ok"}).encode())
236
  return
237
 
238
+ if path == "/_ministack/state" and method == "GET":
239
+ state = _get_all_state()
240
+ await _send_response(send, 200, {"Content-Type": "application/json"},
241
+ json.dumps(state).encode())
242
+ return
243
+
244
+ if path == "/_ministack/handlers" and method == "GET":
245
+ handlers = _get_all_handlers()
246
+ await _send_response(send, 200, {"Content-Type": "application/json"},
247
+ json.dumps(handlers).encode())
248
+ return
249
+
250
+ if path.startswith("/_ministack/handlers/") and method == "GET":
251
+ service_name = path[len("/_ministack/handlers/"):].strip("/")
252
+ info = _get_service_info(service_name)
253
+ if info is None:
254
+ await _send_response(send, 404, {"Content-Type": "application/json"},
255
+ json.dumps({"error": f"Unknown service: {service_name}"}).encode())
256
+ else:
257
+ await _send_response(send, 200, {"Content-Type": "application/json"},
258
+ json.dumps(info).encode())
259
+ return
260
+
261
  if path == "/_ministack/config" and method == "POST":
262
  _ALLOWED_CONFIG_KEYS = {
263
  "athena.ATHENA_ENGINE", "athena.ATHENA_DATA_DIR",
 
593
  logger.error("Failed to execute init script %s: %s", script_path, e)
594
 
595
 
596
+ def _service_modules() -> list:
597
+ """Return list of (canonical_name, module) for all service modules."""
598
+ from aws_infra.services import iam_sts
599
+ return [
600
+ ("s3", s3), ("sqs", sqs), ("sns", sns), ("dynamodb", dynamodb),
601
+ ("lambda", lambda_svc), ("iam", iam_sts), ("secretsmanager", secretsmanager),
602
+ ("logs", cloudwatch_logs), ("ssm", ssm), ("events", eventbridge),
603
+ ("kinesis", kinesis), ("monitoring", cloudwatch), ("ses", ses),
604
+ ("ses_v2", ses_v2), ("acm", acm), ("wafv2", waf),
605
+ ("states", stepfunctions), ("ecs", ecs), ("rds", rds),
606
+ ("elasticache", elasticache), ("glue", glue), ("athena", athena),
607
+ ("apigateway", apigateway), ("apigateway_v1", apigateway_v1),
608
+ ("firehose", firehose), ("route53", route53), ("cognito", cognito),
609
+ ("ec2", ec2), ("elasticmapreduce", emr), ("elasticloadbalancing", alb),
610
+ ("elasticfilesystem", efs), ("cloudformation", cloudformation),
611
+ ]
612
+
613
+
614
+ # Extra aliases for the /_ministack/handlers/<service> endpoint so users can
615
+ # look up services using common short names (e.g. "lambda", "stepfunctions").
616
+ _HANDLER_LOOKUP_ALIASES = {
617
+ **SERVICE_NAME_ALIASES,
618
+ "lambda": "lambda",
619
+ "iam": "iam",
620
+ "sts": "iam",
621
+ "ses-v2": "ses_v2",
622
+ "sesv2": "ses_v2",
623
+ "apigateway-v1": "apigateway_v1",
624
+ "apigatewayv1": "apigateway_v1",
625
+ "logs": "logs",
626
+ "emr": "elasticmapreduce",
627
+ "alb": "elasticloadbalancing",
628
+ "efs": "elasticfilesystem",
629
+ "cfn": "cloudformation",
630
+ "sf": "states",
631
+ "sfn": "states",
632
+ "cw": "monitoring",
633
+ "cwl": "logs",
634
+ "sm": "secretsmanager",
635
+ "eb": "events",
636
+ "ddb": "dynamodb",
637
+ }
638
+
639
+
640
+ def _resolve_service_module(service_name: str):
641
+ """Resolve a service name (or alias) to its (canonical_name, module) pair."""
642
+ name = service_name.lower().strip()
643
+ canonical = _HANDLER_LOOKUP_ALIASES.get(name, name)
644
+ for svc_name, mod in _service_modules():
645
+ if svc_name == canonical:
646
+ return svc_name, mod
647
+ return None, None
648
+
649
+
650
+ def _get_all_state() -> dict:
651
+ """Collect summary state from every service module."""
652
+ state = {}
653
+ for name, mod in _service_modules():
654
+ if name not in SERVICE_HANDLERS and name not in ("iam", "ses_v2", "apigateway_v1"):
655
+ continue
656
+ try:
657
+ state[name] = mod.get_state()
658
+ except Exception as e:
659
+ logger.warning("get_state() failed for %s: %s", name, e)
660
+ state[name] = {"error": str(e)}
661
+ return {"services": state}
662
+
663
+
664
+ def _get_all_handlers() -> dict:
665
+ """Collect SUPPORTED_ACTIONS from every service module."""
666
+ handlers = {}
667
+ for name, mod in _service_modules():
668
+ if name not in SERVICE_HANDLERS and name not in ("iam", "ses_v2", "apigateway_v1"):
669
+ continue
670
+ actions = getattr(mod, "SUPPORTED_ACTIONS", [])
671
+ handlers[name] = {"actions": actions, "count": len(actions)}
672
+ return {"services": handlers}
673
+
674
+
675
+ def _get_service_info(service_name: str) -> dict | None:
676
+ """Return detailed info for a single service: docstring, actions, and current state."""
677
+ name, mod = _resolve_service_module(service_name)
678
+ if mod is None:
679
+ return None
680
+ docstring = (mod.__doc__ or "").strip()
681
+ actions = getattr(mod, "SUPPORTED_ACTIONS", [])
682
+ try:
683
+ state = mod.get_state()
684
+ except Exception:
685
+ state = {}
686
+ return {
687
+ "service": name,
688
+ "description": docstring,
689
+ "supported_actions": actions,
690
+ "action_count": len(actions),
691
+ "state": state,
692
+ }
693
+
694
+
695
  def _reset_all_state():
696
  """Wipe all in-memory state across every service module, and persisted files if enabled."""
697
  import shutil
aws_infra/aws_infra/services/acm.py CHANGED
@@ -234,5 +234,20 @@ def _resend_validation_email(data):
234
  return json_response({})
235
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  def reset():
238
  _certificates.clear()
 
234
  return json_response({})
235
 
236
 
237
+ SUPPORTED_ACTIONS = [
238
+ "RequestCertificate", "DescribeCertificate", "ListCertificates",
239
+ "DeleteCertificate", "GetCertificate", "ImportCertificate",
240
+ "AddTagsToCertificate", "RemoveTagsFromCertificate",
241
+ "ListTagsForCertificate", "UpdateCertificateOptions",
242
+ "RenewCertificate", "ResendValidationEmail",
243
+ ]
244
+
245
+
246
+ def get_state() -> dict:
247
+ return {
248
+ "certificates": {"count": len(_certificates), "ids": list(_certificates.keys())},
249
+ }
250
+
251
+
252
  def reset():
253
  _certificates.clear()
aws_infra/aws_infra/services/alb.py CHANGED
@@ -1044,6 +1044,38 @@ async def dispatch_request(lb, method, path, headers, body, query_params, port=8
1044
  json.dumps({"message": "No matching ALB rule found"}).encode())
1045
 
1046
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1047
  def reset():
1048
  _lbs.clear()
1049
  _tgs.clear()
 
1044
  json.dumps({"message": "No matching ALB rule found"}).encode())
1045
 
1046
 
1047
+ # ---------------------------------------------------------------------------
1048
+ # Supported Actions
1049
+ # ---------------------------------------------------------------------------
1050
+
1051
+ SUPPORTED_ACTIONS = [
1052
+ "CreateLoadBalancer", "DeleteLoadBalancer", "DescribeLoadBalancers",
1053
+ "ModifyLoadBalancerAttributes", "AddTags", "RemoveTags", "DescribeTags",
1054
+ "CreateTargetGroup", "DeleteTargetGroup", "DescribeTargetGroups",
1055
+ "ModifyTargetGroup", "ModifyTargetGroupAttributes", "CreateListener",
1056
+ "DeleteListener", "DescribeListeners", "ModifyListener", "CreateRule",
1057
+ "DeleteRule", "DescribeRules", "ModifyRule", "RegisterTargets",
1058
+ "DeregisterTargets", "DescribeTargetHealth", "SetRulePriorities",
1059
+ ]
1060
+
1061
+
1062
+ # ---------------------------------------------------------------------------
1063
+ # State
1064
+ # ---------------------------------------------------------------------------
1065
+
1066
+ def get_state() -> dict:
1067
+ return {
1068
+ "load_balancers": {"count": len(_lbs), "names": list(_lbs.keys())},
1069
+ "target_groups": {"count": len(_tgs), "names": list(_tgs.keys())},
1070
+ "listeners": {"count": len(_listeners), "ids": list(_listeners.keys())},
1071
+ "rules": {"count": len(_rules), "ids": list(_rules.keys())},
1072
+ "targets": {"count": sum(len(tgts) for tgts in _targets.values())},
1073
+ "tags": {"count": sum(len(tags) for tags in _tags.values())},
1074
+ "load_balancer_attributes": {"count": sum(len(attrs) for attrs in _lb_attrs.values())},
1075
+ "target_group_attributes": {"count": sum(len(attrs) for attrs in _tg_attrs.values())},
1076
+ }
1077
+
1078
+
1079
  def reset():
1080
  _lbs.clear()
1081
  _tgs.clear()
aws_infra/aws_infra/services/apigateway.py CHANGED
@@ -83,6 +83,18 @@ def _api_arn(api_id: str) -> str:
83
  return f"arn:aws:apigateway:{REGION}::/apis/{api_id}"
84
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # ---- Persistence hooks ----
87
 
88
  def get_state() -> dict:
 
83
  return f"arn:aws:apigateway:{REGION}::/apis/{api_id}"
84
 
85
 
86
+ SUPPORTED_ACTIONS = [
87
+ "CreateApi", "GetApis", "GetApi", "UpdateApi", "DeleteApi",
88
+ "CreateRoute", "GetRoutes", "GetRoute", "UpdateRoute", "DeleteRoute",
89
+ "CreateIntegration", "GetIntegrations", "GetIntegration",
90
+ "UpdateIntegration", "DeleteIntegration", "CreateStage", "GetStages",
91
+ "GetStage", "UpdateStage", "DeleteStage", "CreateDeployment",
92
+ "GetDeployments", "GetDeployment", "DeleteDeployment", "GetTags",
93
+ "TagResource", "UntagResource", "CreateAuthorizer", "GetAuthorizers",
94
+ "GetAuthorizer", "UpdateAuthorizer", "DeleteAuthorizer",
95
+ ]
96
+
97
+
98
  # ---- Persistence hooks ----
99
 
100
  def get_state() -> dict:
aws_infra/aws_infra/services/apigateway_v1.py CHANGED
@@ -245,6 +245,27 @@ async def _call_lambda(func_name, event):
245
  return {"statusCode": 200, "body": "Mock response"}, None
246
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  # ---- Persistence hooks ----
249
 
250
  def get_state():
 
245
  return {"statusCode": 200, "body": "Mock response"}, None
246
 
247
 
248
+ SUPPORTED_ACTIONS = [
249
+ "CreateRestApi", "GetRestApis", "GetRestApi", "UpdateRestApi",
250
+ "DeleteRestApi", "GetResources", "GetResource", "CreateResource",
251
+ "UpdateResource", "DeleteResource", "PutMethod", "GetMethod",
252
+ "DeleteMethod", "PutMethodResponse", "GetMethodResponse",
253
+ "DeleteMethodResponse", "PutIntegration", "GetIntegration",
254
+ "DeleteIntegration", "PutIntegrationResponse", "GetIntegrationResponse",
255
+ "DeleteIntegrationResponse", "CreateDeployment", "GetDeployments",
256
+ "GetDeployment", "UpdateDeployment", "DeleteDeployment", "CreateStage",
257
+ "GetStages", "GetStage", "UpdateStage", "DeleteStage",
258
+ "CreateAuthorizer", "GetAuthorizers", "GetAuthorizer",
259
+ "UpdateAuthorizer", "DeleteAuthorizer", "CreateModel", "GetModels",
260
+ "GetModel", "DeleteModel", "GetApiKeys", "CreateApiKey", "GetApiKey",
261
+ "DeleteApiKey", "GetUsagePlans", "CreateUsagePlan", "GetUsagePlan",
262
+ "DeleteUsagePlan", "GetUsagePlanKeys", "CreateUsagePlanKey",
263
+ "DeleteUsagePlanKey", "GetDomainNames", "CreateDomainName",
264
+ "GetDomainName", "DeleteDomainName", "GetTags", "TagResource",
265
+ "UntagResource",
266
+ ]
267
+
268
+
269
  # ---- Persistence hooks ----
270
 
271
  def get_state():
aws_infra/aws_infra/services/athena.py CHANGED
@@ -853,6 +853,29 @@ def _execution_out(ex):
853
  return {k: v for k, v in ex.items() if not k.startswith("_")}
854
 
855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
856
  def reset():
857
  import time as _time
858
 
 
853
  return {k: v for k, v in ex.items() if not k.startswith("_")}
854
 
855
 
856
+ SUPPORTED_ACTIONS = [
857
+ "StartQueryExecution", "GetQueryExecution", "GetQueryResults", "StopQueryExecution",
858
+ "ListQueryExecutions", "CreateWorkGroup", "DeleteWorkGroup", "GetWorkGroup",
859
+ "ListWorkGroups", "UpdateWorkGroup", "CreateNamedQuery", "DeleteNamedQuery",
860
+ "GetNamedQuery", "ListNamedQueries", "BatchGetNamedQuery", "BatchGetQueryExecution",
861
+ "CreateDataCatalog", "GetDataCatalog", "ListDataCatalogs", "DeleteDataCatalog",
862
+ "UpdateDataCatalog", "CreatePreparedStatement", "GetPreparedStatement",
863
+ "DeletePreparedStatement", "ListPreparedStatements", "GetTableMetadata",
864
+ "ListTableMetadata", "TagResource", "UntagResource", "ListTagsForResource",
865
+ ]
866
+
867
+
868
+ def get_state() -> dict:
869
+ return {
870
+ "workgroups": {"count": len(_workgroups), "names": list(_workgroups.keys())},
871
+ "named_queries": {"count": len(_named_queries), "ids": list(_named_queries.keys())},
872
+ "data_catalogs": {"count": len(_data_catalogs), "names": list(_data_catalogs.keys())},
873
+ "executions": {"count": len(_executions), "ids": list(_executions.keys())},
874
+ "prepared_statements": {"count": len(_prepared_statements), "keys": list(_prepared_statements.keys())},
875
+ "tags": {"count": len(_tags), "arns": list(_tags.keys())},
876
+ }
877
+
878
+
879
  def reset():
880
  import time as _time
881
 
aws_infra/aws_infra/services/cloudformation/__init__.py CHANGED
@@ -54,6 +54,33 @@ async def handle_request(method: str, path: str, headers: dict,
54
  return handler(params)
55
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def reset():
58
  _stacks.clear()
59
  _stack_events.clear()
 
54
  return handler(params)
55
 
56
 
57
+ # ---------------------------------------------------------------------------
58
+ # Supported Actions
59
+ # ---------------------------------------------------------------------------
60
+
61
+ SUPPORTED_ACTIONS = [
62
+ "CreateStack", "UpdateStack", "DeleteStack", "DescribeStacks",
63
+ "ListStacks", "DescribeStackEvents", "DescribeStackResource",
64
+ "DescribeStackResources", "GetTemplate", "ValidateTemplate",
65
+ "ListExports", "CreateChangeSet", "DescribeChangeSet",
66
+ "ExecuteChangeSet", "DeleteChangeSet", "ListChangeSets",
67
+ "GetTemplateSummary",
68
+ ]
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # State
73
+ # ---------------------------------------------------------------------------
74
+
75
+ def get_state() -> dict:
76
+ return {
77
+ "stacks": {"count": len(_stacks), "names": list(_stacks.keys())},
78
+ "change_sets": {"count": len(_change_sets), "ids": list(_change_sets.keys())},
79
+ "stack_events": {"count": len(_stack_events), "ids": list(_stack_events.keys())},
80
+ "exports": {"count": len(_exports), "names": list(_exports.keys())},
81
+ }
82
+
83
+
84
  def reset():
85
  _stacks.clear()
86
  _stack_events.clear()
aws_infra/aws_infra/services/cloudwatch.py CHANGED
@@ -1382,8 +1382,32 @@ def _error(code, message, status, use_json=False):
1382
  return status, {"Content-Type": "application/xml"}, body
1383
 
1384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1385
  def reset():
1386
  _alarms.clear()
1387
  _composite_alarms.clear()
1388
  _alarm_history.clear()
1389
  _resource_tags.clear()
 
 
 
1382
  return status, {"Content-Type": "application/xml"}, body
1383
 
1384
 
1385
+ SUPPORTED_ACTIONS = [
1386
+ "PutMetricData", "GetMetricStatistics", "GetMetricData", "ListMetrics",
1387
+ "PutMetricAlarm", "PutCompositeAlarm", "DescribeAlarms",
1388
+ "DescribeAlarmsForMetric", "DescribeAlarmHistory", "DeleteAlarms",
1389
+ "EnableAlarmActions", "DisableAlarmActions", "SetAlarmState",
1390
+ "TagResource", "UntagResource", "ListTagsForResource",
1391
+ "PutDashboard", "GetDashboard", "DeleteDashboards", "ListDashboards",
1392
+ ]
1393
+
1394
+
1395
+ def get_state() -> dict:
1396
+ return {
1397
+ "metrics": {"count": len(_metrics), "names": [f"{ns}:{mn}" for (ns, mn, _), _ in _metrics.items()]},
1398
+ "alarms": {"count": len(_alarms), "names": list(_alarms.keys())},
1399
+ "composite_alarms": {"count": len(_composite_alarms), "names": list(_composite_alarms.keys())},
1400
+ "dashboards": {"count": len(_dashboards), "names": list(_dashboards.keys())},
1401
+ "alarm_history": {"count": len(_alarm_history)},
1402
+ "resource_tags": {"count": len(_resource_tags), "arns": list(_resource_tags.keys())},
1403
+
1404
+ }
1405
+
1406
+
1407
  def reset():
1408
  _alarms.clear()
1409
  _composite_alarms.clear()
1410
  _alarm_history.clear()
1411
  _resource_tags.clear()
1412
+ _dashboards.clear()
1413
+ _metrics.clear()
aws_infra/aws_infra/services/cloudwatch_logs.py CHANGED
@@ -848,6 +848,29 @@ def _stop_query(data):
848
  return json_response({"success": True})
849
 
850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
851
  def reset():
852
  _log_groups.clear()
853
  _destinations.clear()
 
848
  return json_response({"success": True})
849
 
850
 
851
+ SUPPORTED_ACTIONS = [
852
+ "CreateLogGroup", "DeleteLogGroup", "DescribeLogGroups",
853
+ "CreateLogStream", "DeleteLogStream", "DescribeLogStreams",
854
+ "PutLogEvents", "GetLogEvents", "FilterLogEvents",
855
+ "PutRetentionPolicy", "DeleteRetentionPolicy",
856
+ "PutSubscriptionFilter", "DeleteSubscriptionFilter", "DescribeSubscriptionFilters",
857
+ "TagLogGroup", "UntagLogGroup", "ListTagsLogGroup",
858
+ "TagResource", "UntagResource", "ListTagsForResource",
859
+ "PutDestination", "DeleteDestination", "DescribeDestinations", "PutDestinationPolicy",
860
+ "PutMetricFilter", "DeleteMetricFilter", "DescribeMetricFilters",
861
+ "StartQuery", "GetQueryResults", "StopQuery",
862
+ ]
863
+
864
+
865
+ def get_state() -> dict:
866
+ return {
867
+ "log_groups": {"count": len(_log_groups), "names": list(_log_groups.keys())},
868
+ "destinations": {"count": len(_destinations), "names": list(_destinations.keys())},
869
+ "metric_filters": {"count": len(_metric_filters), "keys": list(_metric_filters.keys())},
870
+ "queries": {"count": len(_queries), "ids": list(_queries.keys())},
871
+ }
872
+
873
+
874
  def reset():
875
  _log_groups.clear()
876
  _destinations.clear()
aws_infra/aws_infra/services/cognito.py CHANGED
@@ -1904,6 +1904,49 @@ def _apply_user_filter(users: list, filter_str: str) -> list:
1904
  return result
1905
 
1906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1907
  # ===========================================================================
1908
  # RESET
1909
  # ===========================================================================
 
1904
  return result
1905
 
1906
 
1907
+ # ===========================================================================
1908
+ # SUPPORTED ACTIONS
1909
+ # ===========================================================================
1910
+
1911
+ SUPPORTED_ACTIONS = [
1912
+ "CreateUserPool", "DeleteUserPool", "DescribeUserPool", "ListUserPools",
1913
+ "UpdateUserPool", "CreateUserPoolClient", "DeleteUserPoolClient",
1914
+ "DescribeUserPoolClient", "ListUserPoolClients", "UpdateUserPoolClient",
1915
+ "AdminCreateUser", "AdminDeleteUser", "AdminGetUser", "ListUsers",
1916
+ "AdminSetUserPassword", "AdminUpdateUserAttributes", "AdminInitiateAuth",
1917
+ "AdminRespondToAuthChallenge", "InitiateAuth", "RespondToAuthChallenge",
1918
+ "SignUp", "ConfirmSignUp", "ForgotPassword", "ConfirmForgotPassword",
1919
+ "ChangePassword", "GetUser", "UpdateUserAttributes", "DeleteUser",
1920
+ "AdminAddUserToGroup", "AdminRemoveUserFromGroup",
1921
+ "AdminListGroupsForUser", "AdminListUserAuthEvents", "CreateGroup",
1922
+ "DeleteGroup", "GetGroup", "ListGroups", "AdminConfirmSignUp",
1923
+ "AdminDisableUser", "AdminEnableUser", "AdminResetUserPassword",
1924
+ "AdminUserGlobalSignOut", "GlobalSignOut", "RevokeToken",
1925
+ "CreateUserPoolDomain", "DeleteUserPoolDomain", "DescribeUserPoolDomain",
1926
+ "GetUserPoolMfaConfig", "SetUserPoolMfaConfig", "AssociateSoftwareToken",
1927
+ "VerifySoftwareToken", "TagResource", "UntagResource",
1928
+ "ListTagsForResource", "CreateIdentityPool", "DeleteIdentityPool",
1929
+ "DescribeIdentityPool", "ListIdentityPools", "UpdateIdentityPool",
1930
+ "GetId", "GetCredentialsForIdentity", "GetOpenIdToken",
1931
+ "SetIdentityPoolRoles", "GetIdentityPoolRoles", "ListIdentities",
1932
+ "DescribeIdentity", "MergeDeveloperIdentities",
1933
+ "UnlinkDeveloperIdentity", "UnlinkIdentity",
1934
+ ]
1935
+
1936
+
1937
+ # ===========================================================================
1938
+ # STATE
1939
+ # ===========================================================================
1940
+
1941
+ def get_state() -> dict:
1942
+ return {
1943
+ "user_pools": {"count": len(_user_pools), "ids": list(_user_pools.keys())},
1944
+ "identity_pools": {"count": len(_identity_pools), "ids": list(_identity_pools.keys())},
1945
+ "pool_domain_map": {"count": len(_pool_domain_map), "domains": list(_pool_domain_map.keys())},
1946
+ "identity_tags": {"count": len(_identity_tags), "arns": list(_identity_tags.keys())},
1947
+ }
1948
+
1949
+
1950
  # ===========================================================================
1951
  # RESET
1952
  # ===========================================================================
aws_infra/aws_infra/services/dynamodb.py CHANGED
@@ -1801,6 +1801,29 @@ def _diff_attributes(old_item, new_item, return_old=True):
1801
  return result
1802
 
1803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1804
  def reset():
1805
  with _lock:
1806
  _tables.clear()
 
1801
  return result
1802
 
1803
 
1804
+ SUPPORTED_ACTIONS = [
1805
+ "CreateTable", "DeleteTable", "DescribeTable", "ListTables", "UpdateTable",
1806
+ "PutItem", "GetItem", "DeleteItem", "UpdateItem",
1807
+ "Query", "Scan",
1808
+ "BatchWriteItem", "BatchGetItem",
1809
+ "TransactWriteItems", "TransactGetItems",
1810
+ "DescribeTimeToLive", "UpdateTimeToLive",
1811
+ "DescribeContinuousBackups", "UpdateContinuousBackups",
1812
+ "DescribeEndpoints",
1813
+ "TagResource", "UntagResource", "ListTagsOfResource",
1814
+ ]
1815
+
1816
+
1817
+ def get_state() -> dict:
1818
+ return {
1819
+ "tables": {"count": len(_tables), "names": list(_tables.keys())},
1820
+ "tags": {"count": len(_tags), "names": list(_tags.keys())},
1821
+ "ttl_settings": {"count": len(_ttl_settings), "names": list(_ttl_settings.keys())},
1822
+ "pitr_settings": {"count": len(_pitr_settings), "names": list(_pitr_settings.keys())},
1823
+ "stream_records": {"count": len(_stream_records), "names": list(_stream_records.keys())},
1824
+ }
1825
+
1826
+
1827
  def reset():
1828
  with _lock:
1829
  _tables.clear()
aws_infra/aws_infra/services/ec2.py CHANGED
@@ -2224,6 +2224,74 @@ def _delete_egress_only_igw(params):
2224
  return _xml(200, "DeleteEgressOnlyInternetGatewayResponse", "<returnCode>true</returnCode>")
2225
 
2226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2227
  # ---------------------------------------------------------------------------
2228
  # Reset
2229
  # ---------------------------------------------------------------------------
 
2224
  return _xml(200, "DeleteEgressOnlyInternetGatewayResponse", "<returnCode>true</returnCode>")
2225
 
2226
 
2227
+ # ---------------------------------------------------------------------------
2228
+ # Supported Actions
2229
+ # ---------------------------------------------------------------------------
2230
+
2231
+ SUPPORTED_ACTIONS = [
2232
+ "RunInstances", "TerminateInstances", "DescribeInstances", "StartInstances",
2233
+ "StopInstances", "RebootInstances", "DescribeImages", "CreateSecurityGroup",
2234
+ "DeleteSecurityGroup", "DescribeSecurityGroups",
2235
+ "AuthorizeSecurityGroupIngress", "RevokeSecurityGroupIngress",
2236
+ "AuthorizeSecurityGroupEgress", "RevokeSecurityGroupEgress",
2237
+ "CreateKeyPair", "DeleteKeyPair", "DescribeKeyPairs", "ImportKeyPair",
2238
+ "DescribeVpcs", "DescribeSubnets", "DescribeAvailabilityZones",
2239
+ "CreateVpc", "DeleteVpc", "CreateSubnet", "DeleteSubnet",
2240
+ "CreateInternetGateway", "DeleteInternetGateway",
2241
+ "DescribeInternetGateways", "AttachInternetGateway",
2242
+ "DetachInternetGateway", "AllocateAddress", "ReleaseAddress",
2243
+ "AssociateAddress", "DisassociateAddress", "DescribeAddresses",
2244
+ "CreateTags", "DeleteTags", "DescribeTags", "ModifyVpcAttribute",
2245
+ "ModifySubnetAttribute", "CreateRouteTable", "DeleteRouteTable",
2246
+ "DescribeRouteTables", "AssociateRouteTable", "DisassociateRouteTable",
2247
+ "CreateRoute", "ReplaceRoute", "DeleteRoute", "CreateNetworkInterface",
2248
+ "DeleteNetworkInterface", "DescribeNetworkInterfaces",
2249
+ "AttachNetworkInterface", "DetachNetworkInterface", "CreateVpcEndpoint",
2250
+ "DeleteVpcEndpoints", "DescribeVpcEndpoints", "CreateVolume",
2251
+ "DeleteVolume", "DescribeVolumes", "DescribeVolumeStatus", "AttachVolume",
2252
+ "DetachVolume", "ModifyVolume", "DescribeVolumesModifications",
2253
+ "EnableVolumeIO", "ModifyVolumeAttribute", "DescribeVolumeAttribute",
2254
+ "CreateSnapshot", "DeleteSnapshot", "DescribeSnapshots",
2255
+ "ModifySnapshotAttribute", "DescribeSnapshotAttribute", "CopySnapshot",
2256
+ "CreateNatGateway", "DescribeNatGateways", "DeleteNatGateway",
2257
+ "CreateNetworkAcl", "DescribeNetworkAcls", "DeleteNetworkAcl",
2258
+ "CreateNetworkAclEntry", "DeleteNetworkAclEntry",
2259
+ "ReplaceNetworkAclEntry", "ReplaceNetworkAclAssociation",
2260
+ "CreateFlowLogs", "DescribeFlowLogs", "DeleteFlowLogs",
2261
+ "CreateVpcPeeringConnection", "AcceptVpcPeeringConnection",
2262
+ "DescribeVpcPeeringConnections", "DeleteVpcPeeringConnection",
2263
+ "CreateDhcpOptions", "AssociateDhcpOptions", "DescribeDhcpOptions",
2264
+ "DeleteDhcpOptions", "CreateEgressOnlyInternetGateway",
2265
+ "DescribeEgressOnlyInternetGateways", "DeleteEgressOnlyInternetGateway",
2266
+ ]
2267
+
2268
+
2269
+ # ---------------------------------------------------------------------------
2270
+ # State
2271
+ # ---------------------------------------------------------------------------
2272
+
2273
+ def get_state() -> dict:
2274
+ return {
2275
+ "instances": {"count": len(_instances), "ids": list(_instances.keys())},
2276
+ "security_groups": {"count": len(_security_groups), "ids": list(_security_groups.keys())},
2277
+ "vpcs": {"count": len(_vpcs), "ids": list(_vpcs.keys())},
2278
+ "subnets": {"count": len(_subnets), "ids": list(_subnets.keys())},
2279
+ "volumes": {"count": len(_volumes), "ids": list(_volumes.keys())},
2280
+ "key_pairs": {"count": len(_key_pairs), "names": list(_key_pairs.keys())},
2281
+ "internet_gateways": {"count": len(_internet_gateways), "ids": list(_internet_gateways.keys())},
2282
+ "nat_gateways": {"count": len(_nat_gateways), "ids": list(_nat_gateways.keys())},
2283
+ "route_tables": {"count": len(_route_tables), "ids": list(_route_tables.keys())},
2284
+ "network_interfaces": {"count": len(_network_interfaces), "ids": list(_network_interfaces.keys())},
2285
+ "vpc_endpoints": {"count": len(_vpc_endpoints), "ids": list(_vpc_endpoints.keys())},
2286
+ "snapshots": {"count": len(_snapshots), "ids": list(_snapshots.keys())},
2287
+ "network_acls": {"count": len(_network_acls), "ids": list(_network_acls.keys())},
2288
+ "flow_logs": {"count": len(_flow_logs), "ids": list(_flow_logs.keys())},
2289
+ "vpc_peering": {"count": len(_vpc_peering), "ids": list(_vpc_peering.keys())},
2290
+ "dhcp_options": {"count": len(_dhcp_options), "ids": list(_dhcp_options.keys())},
2291
+ "egress_igws": {"count": len(_egress_igws), "ids": list(_egress_igws.keys())},
2292
+ }
2293
+
2294
+
2295
  # ---------------------------------------------------------------------------
2296
  # Reset
2297
  # ---------------------------------------------------------------------------
aws_infra/aws_infra/services/ecs.py CHANGED
@@ -1229,6 +1229,26 @@ _ACTION_MAP = {
1229
  }
1230
 
1231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1232
  def reset():
1233
  docker_client = _get_docker()
1234
  if docker_client:
 
1229
  }
1230
 
1231
 
1232
+ SUPPORTED_ACTIONS = [
1233
+ "CreateCluster", "DeleteCluster", "DescribeClusters", "ListClusters", "UpdateCluster",
1234
+ "UpdateClusterSettings", "RegisterTaskDefinition", "DeregisterTaskDefinition",
1235
+ "DescribeTaskDefinition", "ListTaskDefinitions", "CreateService", "DeleteService",
1236
+ "DescribeServices", "UpdateService", "ListServices", "RunTask", "StopTask",
1237
+ "DescribeTasks", "ListTasks", "TagResource", "UntagResource", "ListTagsForResource",
1238
+ "ExecuteCommand", "ListAccountSettings", "PutAccountSetting", "CreateCapacityProvider",
1239
+ "DeleteCapacityProvider", "DescribeCapacityProviders", "PutClusterCapacityProviders",
1240
+ ]
1241
+
1242
+
1243
+ def get_state() -> dict:
1244
+ return {
1245
+ "clusters": {"count": len(_clusters), "names": list(_clusters.keys())},
1246
+ "task_definitions": {"count": len(_task_defs), "names": list(_task_defs.keys())},
1247
+ "services": {"count": len(_services), "names": list(_services.keys())},
1248
+ "tasks": {"count": len(_tasks), "ids": list(_tasks.keys())},
1249
+ }
1250
+
1251
+
1252
  def reset():
1253
  docker_client = _get_docker()
1254
  if docker_client:
aws_infra/aws_infra/services/efs.py CHANGED
@@ -497,6 +497,38 @@ def _error(status, code, message):
497
  return status, {"Content-Type": "application/json", "x-amzn-errortype": code}, body
498
 
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  # ---------------------------------------------------------------------------
501
  # Reset
502
  # ---------------------------------------------------------------------------
 
497
  return status, {"Content-Type": "application/json", "x-amzn-errortype": code}, body
498
 
499
 
500
+ # ---------------------------------------------------------------------------
501
+ # Supported Actions
502
+ # ---------------------------------------------------------------------------
503
+
504
+ SUPPORTED_ACTIONS = [
505
+ "CreateFileSystem", "DeleteFileSystem", "DescribeFileSystems",
506
+ "DescribeFileSystemPolicy", "PutFileSystemPolicy",
507
+ "DeleteFileSystemPolicy", "CreateMountTarget", "DeleteMountTarget",
508
+ "DescribeMountTargets", "ModifyMountTargetSecurityGroups",
509
+ "CreateAccessPoint", "DeleteAccessPoint", "DescribeAccessPoints",
510
+ "TagResource", "UntagResource", "ListTagsForResource",
511
+ "CreateReplicationConfiguration", "DeleteReplicationConfiguration",
512
+ "DescribeReplicationConfigurations", "PutLifecycleConfiguration",
513
+ "GetLifecycleConfiguration", "PutBackupPolicy", "GetBackupPolicy",
514
+ "DescribeAccountPreferences", "PutAccountPreferences",
515
+ ]
516
+
517
+
518
+ # ---------------------------------------------------------------------------
519
+ # State
520
+ # ---------------------------------------------------------------------------
521
+
522
+ def get_state() -> dict:
523
+ return {
524
+ "file_systems": {"count": len(_file_systems), "ids": list(_file_systems.keys())},
525
+ "mount_targets": {"count": len(_mount_targets), "ids": list(_mount_targets.keys())},
526
+ "access_points": {"count": len(_access_points), "ids": list(_access_points.keys())},
527
+ "lifecycle_configs": {"count": len(_lifecycle_configs), "file_systems": list(_lifecycle_configs.keys())},
528
+ "backup_policies": {"count": len(_backup_policies), "file_systems": list(_backup_policies.keys())},
529
+ }
530
+
531
+
532
  # ---------------------------------------------------------------------------
533
  # Reset
534
  # ---------------------------------------------------------------------------
aws_infra/aws_infra/services/elasticache.py CHANGED
@@ -1266,6 +1266,32 @@ def _error(code, message, status):
1266
  return status, {"Content-Type": "application/xml"}, body
1267
 
1268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1269
  def reset():
1270
  docker_client = _get_docker()
1271
  if docker_client:
 
1266
  return status, {"Content-Type": "application/xml"}, body
1267
 
1268
 
1269
+ SUPPORTED_ACTIONS = [
1270
+ "CreateCacheCluster", "DeleteCacheCluster", "DescribeCacheClusters", "ModifyCacheCluster",
1271
+ "RebootCacheCluster", "CreateReplicationGroup", "DeleteReplicationGroup",
1272
+ "DescribeReplicationGroups", "ModifyReplicationGroup", "IncreaseReplicaCount",
1273
+ "DecreaseReplicaCount", "CreateCacheSubnetGroup", "DescribeCacheSubnetGroups",
1274
+ "DeleteCacheSubnetGroup", "ModifyCacheSubnetGroup", "CreateCacheParameterGroup",
1275
+ "DescribeCacheParameterGroups", "DeleteCacheParameterGroup", "DescribeCacheParameters",
1276
+ "ModifyCacheParameterGroup", "ResetCacheParameterGroup", "CreateUser", "DescribeUsers",
1277
+ "DeleteUser", "ModifyUser", "CreateUserGroup", "DescribeUserGroups", "DeleteUserGroup",
1278
+ "ModifyUserGroup", "DescribeCacheEngineVersions", "ListTagsForResource",
1279
+ "AddTagsToResource", "RemoveTagsFromResource", "CreateSnapshot", "DeleteSnapshot",
1280
+ "DescribeSnapshots", "DescribeEvents",
1281
+ ]
1282
+
1283
+
1284
+ def get_state() -> dict:
1285
+ return {
1286
+ "clusters": {"count": len(_clusters), "ids": list(_clusters.keys())},
1287
+ "replication_groups": {"count": len(_replication_groups), "ids": list(_replication_groups.keys())},
1288
+ "users": {"count": len(_users), "ids": list(_users.keys())},
1289
+ "subnet_groups": {"count": len(_subnet_groups), "ids": list(_subnet_groups.keys())},
1290
+ "parameter_groups": {"count": len(_param_groups), "ids": list(_param_groups.keys())},
1291
+ "snapshots": {"count": len(_snapshots), "ids": list(_snapshots.keys())},
1292
+ }
1293
+
1294
+
1295
  def reset():
1296
  docker_client = _get_docker()
1297
  if docker_client:
aws_infra/aws_infra/services/emr.py CHANGED
@@ -568,6 +568,37 @@ async def handle_request(method, path, headers, body, query_params):
568
  return handler(data)
569
 
570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  # ---------------------------------------------------------------------------
572
  # Reset
573
  # ---------------------------------------------------------------------------
 
568
  return handler(data)
569
 
570
 
571
+ # ---------------------------------------------------------------------------
572
+ # Supported Actions
573
+ # ---------------------------------------------------------------------------
574
+
575
+ SUPPORTED_ACTIONS = [
576
+ "CreateCluster", "DescribeCluster", "ListClusters", "TerminateJobFlows",
577
+ "SetTerminationProtection", "AddJobFlowSteps", "DescribeStep",
578
+ "ListSteps", "ModifyInstanceGroups",
579
+ "GetBlockPublicAccessConfiguration",
580
+ "PutBlockPublicAccessConfiguration", "ListInstances",
581
+ "DescribeInstance", "ListBootstrapActions", "GetAutoScalingPolicy",
582
+ "PutAutoScalingPolicy", "RemoveAutoScalingPolicy",
583
+ "ListSecurityConfigurations", "CreateSecurityConfiguration",
584
+ "DeleteSecurityConfiguration", "DescribeSecurityConfiguration",
585
+ "ListStudios", "CreateStudio", "DeleteStudio", "DescribeStudio",
586
+ "ListStudioSessions", "CreateStudioSession", "DeleteStudioSession",
587
+ "GetStudioSessionMapping", "CreateStudioSessionMapping",
588
+ "UpdateStudioSessionMapping", "DeleteStudioSessionMapping",
589
+ ]
590
+
591
+
592
+ # ---------------------------------------------------------------------------
593
+ # State
594
+ # ---------------------------------------------------------------------------
595
+
596
+ def get_state() -> dict:
597
+ return {
598
+ "clusters": {"count": len(_clusters), "ids": list(_clusters.keys())},
599
+ }
600
+
601
+
602
  # ---------------------------------------------------------------------------
603
  # Reset
604
  # ---------------------------------------------------------------------------
aws_infra/aws_infra/services/eventbridge.py CHANGED
@@ -991,6 +991,29 @@ def _update_api_destination(data):
991
  })
992
 
993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
994
  def reset():
995
  global _event_buses
996
  _rules.clear()
 
991
  })
992
 
993
 
994
+ SUPPORTED_ACTIONS = [
995
+ "CreateEventBus", "DeleteEventBus", "ListEventBuses", "DescribeEventBus",
996
+ "PutRule", "DeleteRule", "ListRules", "DescribeRule", "EnableRule", "DisableRule",
997
+ "PutTargets", "RemoveTargets", "ListTargetsByRule", "PutEvents",
998
+ "TagResource", "UntagResource", "ListTagsForResource",
999
+ "CreateArchive", "DeleteArchive", "DescribeArchive", "ListArchives",
1000
+ "PutPermission", "RemovePermission",
1001
+ "CreateConnection", "DescribeConnection", "DeleteConnection", "ListConnections",
1002
+ "UpdateConnection", "CreateApiDestination", "DescribeApiDestination",
1003
+ "DeleteApiDestination", "ListApiDestinations", "UpdateApiDestination",
1004
+ ]
1005
+
1006
+
1007
+ def get_state() -> dict:
1008
+ return {
1009
+ "event_buses": {"count": len(_event_buses), "names": list(_event_buses.keys())},
1010
+ "rules": {"count": len(_rules), "names": list(_rules.keys())},
1011
+ "archives": {"count": len(_archives), "names": list(_archives.keys())},
1012
+ "connections": {"count": len(_connections), "names": list(_connections.keys())},
1013
+ "api_destinations": {"count": len(_api_destinations), "names": list(_api_destinations.keys())},
1014
+ }
1015
+
1016
+
1017
  def reset():
1018
  global _event_buses
1019
  _rules.clear()
aws_infra/aws_infra/services/firehose.py CHANGED
@@ -41,6 +41,20 @@ _lock = threading.Lock()
41
  _dest_counter = 0
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def reset():
45
  global _streams, _dest_counter
46
  with _lock:
 
41
  _dest_counter = 0
42
 
43
 
44
+ SUPPORTED_ACTIONS = [
45
+ "CreateDeliveryStream", "DeleteDeliveryStream", "DescribeDeliveryStream",
46
+ "ListDeliveryStreams", "PutRecord", "PutRecordBatch", "UpdateDestination",
47
+ "StartDeliveryStreamEncryption", "StopDeliveryStreamEncryption",
48
+ "ListTagsForResource", "TagResource", "UntagResource",
49
+ ]
50
+
51
+
52
+ def get_state() -> dict:
53
+ return {
54
+ "delivery_streams": {"count": len(_streams), "names": list(_streams.keys())},
55
+ }
56
+
57
+
58
  def reset():
59
  global _streams, _dest_counter
60
  with _lock:
aws_infra/aws_infra/services/glue.py CHANGED
@@ -1074,6 +1074,36 @@ def _simple_glob_match(pattern, name):
1074
  return fnmatch.fnmatch(name, pattern)
1075
 
1076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1077
  def reset():
1078
  _databases.clear()
1079
  _tables.clear()
 
1074
  return fnmatch.fnmatch(name, pattern)
1075
 
1076
 
1077
+ SUPPORTED_ACTIONS = [
1078
+ "CreateDatabase", "DeleteDatabase", "GetDatabase", "GetDatabases", "UpdateDatabase",
1079
+ "CreateTable", "DeleteTable", "GetTable", "GetTables", "UpdateTable", "BatchDeleteTable",
1080
+ "CreatePartition", "DeletePartition", "GetPartition", "GetPartitions",
1081
+ "BatchCreatePartition", "BatchGetPartition", "CreatePartitionIndex", "GetPartitionIndexes",
1082
+ "CreateConnection", "DeleteConnection", "GetConnection", "GetConnections",
1083
+ "CreateCrawler", "DeleteCrawler", "GetCrawler", "GetCrawlers", "UpdateCrawler",
1084
+ "StartCrawler", "StopCrawler", "GetCrawlerMetrics", "CreateJob", "DeleteJob", "GetJob",
1085
+ "GetJobs", "UpdateJob", "StartJobRun", "GetJobRun", "GetJobRuns", "BatchStopJobRun",
1086
+ "CreateSecurityConfiguration", "DeleteSecurityConfiguration", "GetSecurityConfiguration",
1087
+ "GetSecurityConfigurations", "ListSecurityConfigurations", "CreateClassifier",
1088
+ "DeleteClassifier", "GetClassifier", "GetClassifiers", "UpdateClassifier",
1089
+ "CreateTrigger", "DeleteTrigger", "GetTrigger", "GetTriggers", "UpdateTrigger",
1090
+ "StartTrigger", "StopTrigger", "CreateWorkflow", "DeleteWorkflow", "GetWorkflow",
1091
+ "GetWorkflows", "UpdateWorkflow", "StartWorkflowRun", "GetWorkflowRun",
1092
+ "GetWorkflowRuns", "GetWorkflowRunProperties", "TagResource", "UntagResource",
1093
+ "ListTagsForResource",
1094
+ ]
1095
+
1096
+
1097
+ def get_state() -> dict:
1098
+ return {
1099
+ "databases": {"count": len(_databases), "names": list(_databases.keys())},
1100
+ "crawlers": {"count": len(_crawlers), "names": list(_crawlers.keys())},
1101
+ "jobs": {"count": len(_jobs), "names": list(_jobs.keys())},
1102
+ "connections": {"count": len(_connections), "names": list(_connections.keys())},
1103
+ "workflows": {"count": len(_workflows), "names": list(_workflows.keys())},
1104
+ }
1105
+
1106
+
1107
  def reset():
1108
  _databases.clear()
1109
  _tables.clear()
aws_infra/aws_infra/services/iam_sts.py CHANGED
@@ -1532,6 +1532,43 @@ _IAM_HANDLERS = {
1532
  }
1533
 
1534
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1535
  def reset():
1536
  _users.clear()
1537
  _roles.clear()
 
1532
  }
1533
 
1534
 
1535
+ SUPPORTED_ACTIONS = [
1536
+ "CreateUser", "GetUser", "ListUsers", "DeleteUser",
1537
+ "CreateRole", "GetRole", "ListRoles", "DeleteRole",
1538
+ "CreatePolicy", "GetPolicy", "GetPolicyVersion", "ListPolicyVersions",
1539
+ "ListPolicies", "DeletePolicy", "CreatePolicyVersion", "DeletePolicyVersion",
1540
+ "AttachRolePolicy", "DetachRolePolicy", "ListAttachedRolePolicies",
1541
+ "PutRolePolicy", "GetRolePolicy", "DeleteRolePolicy", "ListRolePolicies",
1542
+ "AttachUserPolicy", "DetachUserPolicy", "ListAttachedUserPolicies",
1543
+ "PutUserPolicy", "GetUserPolicy", "DeleteUserPolicy", "ListUserPolicies",
1544
+ "CreateAccessKey", "ListAccessKeys", "DeleteAccessKey",
1545
+ "CreateInstanceProfile", "DeleteInstanceProfile", "GetInstanceProfile",
1546
+ "AddRoleToInstanceProfile", "RemoveRoleFromInstanceProfile",
1547
+ "ListInstanceProfiles", "ListInstanceProfilesForRole",
1548
+ "UpdateAssumeRolePolicy",
1549
+ "CreateGroup", "GetGroup", "DeleteGroup", "ListGroups",
1550
+ "AddUserToGroup", "RemoveUserFromGroup", "ListGroupsForUser",
1551
+ "CreateServiceLinkedRole",
1552
+ "CreateOpenIDConnectProvider", "GetOpenIDConnectProvider", "DeleteOpenIDConnectProvider",
1553
+ "TagRole", "UntagRole", "ListRoleTags",
1554
+ "TagUser", "UntagUser", "ListUserTags",
1555
+ "TagPolicy", "UntagPolicy", "ListPolicyTags",
1556
+ "SimulatePrincipalPolicy", "SimulateCustomPolicy",
1557
+ "GetCallerIdentity", "AssumeRole", "GetSessionToken",
1558
+ ]
1559
+
1560
+
1561
+ def get_state() -> dict:
1562
+ return {
1563
+ "users": {"count": len(_users), "names": list(_users.keys())},
1564
+ "roles": {"count": len(_roles), "names": list(_roles.keys())},
1565
+ "policies": {"count": len(_policies), "names": list(_policies.keys())},
1566
+ "instance_profiles": {"count": len(_instance_profiles), "names": list(_instance_profiles.keys())},
1567
+ "groups": {"count": len(_groups), "names": list(_groups.keys())},
1568
+ "oidc_providers": {"count": len(_oidc_providers), "names": list(_oidc_providers.keys())},
1569
+ }
1570
+
1571
+
1572
  def reset():
1573
  _users.clear()
1574
  _roles.clear()
aws_infra/aws_infra/services/kinesis.py CHANGED
@@ -899,6 +899,25 @@ def _stream_desc(stream, shard_ids=None):
899
  }
900
 
901
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902
  def reset():
903
  _streams.clear()
904
  _shard_iterators.clear()
 
899
  }
900
 
901
 
902
+ SUPPORTED_ACTIONS = [
903
+ "CreateStream", "DeleteStream", "DescribeStream", "DescribeStreamSummary",
904
+ "ListStreams", "PutRecord", "PutRecords", "GetShardIterator", "GetRecords",
905
+ "MergeShards", "SplitShard", "UpdateShardCount", "ListShards",
906
+ "IncreaseStreamRetentionPeriod", "DecreaseStreamRetentionPeriod",
907
+ "AddTagsToStream", "RemoveTagsFromStream", "ListTagsForStream",
908
+ "RegisterStreamConsumer", "DeregisterStreamConsumer", "ListStreamConsumers",
909
+ "DescribeStreamConsumer", "StartStreamEncryption", "StopStreamEncryption",
910
+ "EnableEnhancedMonitoring", "DisableEnhancedMonitoring",
911
+ ]
912
+
913
+
914
+ def get_state() -> dict:
915
+ return {
916
+ "streams": {"count": len(_streams), "names": list(_streams.keys())},
917
+ "consumers": {"count": len(_consumers), "names": list(_consumers.keys())},
918
+ }
919
+
920
+
921
  def reset():
922
  _streams.clear()
923
  _shard_iterators.clear()
aws_infra/aws_infra/services/lambda_svc.py CHANGED
@@ -2531,6 +2531,36 @@ def _list_function_url_configs(func_name: str, query_params: dict):
2531
  return json_response({"FunctionUrlConfigs": configs})
2532
 
2533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2534
  def reset():
2535
  from aws_infra.core import lambda_runtime
2536
 
 
2531
  return json_response({"FunctionUrlConfigs": configs})
2532
 
2533
 
2534
+ SUPPORTED_ACTIONS = [
2535
+ "CreateFunction", "DeleteFunction", "GetFunction", "GetFunctionConfiguration",
2536
+ "ListFunctions", "Invoke",
2537
+ "UpdateFunctionCode", "UpdateFunctionConfiguration",
2538
+ "PublishVersion", "ListVersionsByFunction",
2539
+ "CreateAlias", "GetAlias", "UpdateAlias", "DeleteAlias", "ListAliases",
2540
+ "AddPermission", "RemovePermission", "GetPolicy",
2541
+ "ListTags", "TagResource", "UntagResource",
2542
+ "PublishLayerVersion", "GetLayerVersion", "GetLayerVersionByArn",
2543
+ "ListLayerVersions", "DeleteLayerVersion", "ListLayers",
2544
+ "AddLayerVersionPermission", "RemoveLayerVersionPermission", "GetLayerVersionPolicy",
2545
+ "CreateEventSourceMapping", "DeleteEventSourceMapping",
2546
+ "GetEventSourceMapping", "ListEventSourceMappings", "UpdateEventSourceMapping",
2547
+ "GetFunctionEventInvokeConfig", "PutFunctionEventInvokeConfig",
2548
+ "PutFunctionConcurrency", "GetFunctionConcurrency", "DeleteFunctionConcurrency",
2549
+ "GetFunctionCodeSigningConfig",
2550
+ "CreateFunctionUrlConfig", "GetFunctionUrlConfig",
2551
+ "UpdateFunctionUrlConfig", "DeleteFunctionUrlConfig", "ListFunctionUrlConfigs",
2552
+ ]
2553
+
2554
+
2555
+ def get_state() -> dict:
2556
+ return {
2557
+ "functions": {"count": len(_functions), "names": list(_functions.keys())},
2558
+ "layers": {"count": len(_layers), "names": list(_layers.keys())},
2559
+ "event_source_mappings": {"count": len(_esms), "ids": list(_esms.keys())},
2560
+ "function_urls": {"count": len(_function_urls), "keys": list(_function_urls.keys())},
2561
+ }
2562
+
2563
+
2564
  def reset():
2565
  from aws_infra.core import lambda_runtime
2566
 
aws_infra/aws_infra/services/rds.py CHANGED
@@ -1972,6 +1972,34 @@ _ACTION_MAP = {
1972
  }
1973
 
1974
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1975
  def reset():
1976
  docker_client = _get_docker()
1977
  if docker_client:
 
1972
  }
1973
 
1974
 
1975
+ SUPPORTED_ACTIONS = [
1976
+ "CreateDBInstance", "DeleteDBInstance", "DescribeDBInstances", "ModifyDBInstance",
1977
+ "StartDBInstance", "StopDBInstance", "RebootDBInstance", "CreateDBCluster",
1978
+ "DeleteDBCluster", "DescribeDBClusters", "ModifyDBCluster", "StartDBCluster",
1979
+ "StopDBCluster", "CreateDBSubnetGroup", "DeleteDBSubnetGroup", "DescribeDBSubnetGroups",
1980
+ "ModifyDBSubnetGroup", "CreateDBParameterGroup", "DeleteDBParameterGroup",
1981
+ "DescribeDBParameterGroups", "DescribeDBParameters", "ModifyDBParameterGroup",
1982
+ "CreateDBClusterParameterGroup", "DescribeDBClusterParameterGroups",
1983
+ "DeleteDBClusterParameterGroup", "DescribeDBClusterParameters",
1984
+ "ModifyDBClusterParameterGroup", "CreateDBSnapshot", "DeleteDBSnapshot",
1985
+ "DescribeDBSnapshots", "CreateDBClusterSnapshot", "DescribeDBClusterSnapshots",
1986
+ "DeleteDBClusterSnapshot", "CreateOptionGroup", "DeleteOptionGroup",
1987
+ "DescribeOptionGroups", "DescribeOptionGroupOptions", "CreateDBInstanceReadReplica",
1988
+ "RestoreDBInstanceFromDBSnapshot", "ListTagsForResource", "AddTagsToResource",
1989
+ "RemoveTagsFromResource", "DescribeDBEngineVersions", "DescribeOrderableDBInstanceOptions",
1990
+ ]
1991
+
1992
+
1993
+ def get_state() -> dict:
1994
+ return {
1995
+ "instances": {"count": len(_instances), "ids": list(_instances.keys())},
1996
+ "clusters": {"count": len(_clusters), "ids": list(_clusters.keys())},
1997
+ "subnet_groups": {"count": len(_subnet_groups), "names": list(_subnet_groups.keys())},
1998
+ "snapshots": {"count": len(_snapshots), "ids": list(_snapshots.keys())},
1999
+ "db_cluster_snapshots": {"count": len(_db_cluster_snapshots), "ids": list(_db_cluster_snapshots.keys())},
2000
+ }
2001
+
2002
+
2003
  def reset():
2004
  docker_client = _get_docker()
2005
  if docker_client:
aws_infra/aws_infra/services/route53.py CHANGED
@@ -45,6 +45,28 @@ _hc_caller_refs: dict = {} # caller_reference -> hc_id
45
  _lock = threading.Lock()
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def reset():
49
  global _zones, _records, _changes, _health_checks, _tags, _caller_refs, _hc_caller_refs
50
  with _lock:
 
45
  _lock = threading.Lock()
46
 
47
 
48
+ SUPPORTED_ACTIONS = [
49
+ "CreateHostedZone", "DeleteHostedZone", "ListHostedZones", "GetHostedZone",
50
+ "UpdateHostedZoneComment", "GetChange", "ListResourceRecordSets",
51
+ "ChangeResourceRecordSets", "GetHostedZoneCount", "GetDNSSEC", "CreateHealthCheck",
52
+ "DeleteHealthCheck", "GetHealthCheck", "ListHealthChecks", "UpdateHealthCheckComment",
53
+ "GetHealthCheckStatus", "GetHealthCheckCount", "ChangeTagsForResource",
54
+ "ListTagsForResource", "ListTagsForResources", "CreateQueryLoggingConfig",
55
+ "DeleteQueryLoggingConfig", "ListQueryLoggingConfigs", "GetQueryLoggingConfig",
56
+ "ListHostedZonesByName", "CreateReusableDelegationSet", "DeleteReusableDelegationSet",
57
+ "ListReusableDelegationSets", "GetReusableDelegationSet",
58
+ ]
59
+
60
+
61
+ def get_state() -> dict:
62
+ return {
63
+ "hosted_zones": {"count": len(_zones), "ids": list(_zones.keys())},
64
+ "health_checks": {"count": len(_health_checks), "ids": list(_health_checks.keys())},
65
+ "tags": {"count": len(_tags), "resources": list(_tags.keys())},
66
+ "record_sets": {"count": sum(len(recs) for recs in _records.values())},
67
+ }
68
+
69
+
70
  def reset():
71
  global _zones, _records, _changes, _health_checks, _tags, _caller_refs, _hc_caller_refs
72
  with _lock:
aws_infra/aws_infra/services/s3.py CHANGED
@@ -2718,6 +2718,39 @@ def _load_persisted_data():
2718
  _load_persisted_data()
2719
 
2720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2721
  def reset():
2722
  """Wipe all in-memory state (used by /_ministack/reset)."""
2723
  global _buckets, _bucket_policies, _bucket_notifications, _bucket_tags
 
2718
  _load_persisted_data()
2719
 
2720
 
2721
+ SUPPORTED_ACTIONS = [
2722
+ "CreateBucket", "DeleteBucket", "ListBuckets", "HeadBucket",
2723
+ "PutObject", "GetObject", "DeleteObject", "HeadObject", "CopyObject",
2724
+ "ListObjectsV1", "ListObjectsV2", "DeleteObjects",
2725
+ "PutObjectTagging", "GetObjectTagging", "DeleteObjectTagging",
2726
+ "ListObjectVersions", "PutBucketVersioning", "GetBucketVersioning",
2727
+ "PutBucketPolicy", "GetBucketPolicy", "DeleteBucketPolicy",
2728
+ "PutBucketNotificationConfiguration", "GetBucketNotificationConfiguration",
2729
+ "PutBucketEncryption", "GetBucketEncryption", "DeleteBucketEncryption",
2730
+ "PutBucketLifecycleConfiguration", "GetBucketLifecycleConfiguration", "DeleteBucketLifecycle",
2731
+ "PutBucketCors", "GetBucketCors", "DeleteBucketCors",
2732
+ "PutBucketAcl", "GetBucketAcl",
2733
+ "PutBucketWebsite", "GetBucketWebsite", "DeleteBucketWebsite",
2734
+ "PutBucketLogging", "GetBucketLogging",
2735
+ "PutBucketAccelerateConfiguration", "GetBucketAccelerateConfiguration",
2736
+ "PutBucketRequestPayment", "GetBucketRequestPayment",
2737
+ "PutObjectLockConfiguration", "GetObjectLockConfiguration",
2738
+ "PutObjectRetention", "GetObjectRetention",
2739
+ "PutObjectLegalHold", "GetObjectLegalHold",
2740
+ "PutBucketReplication", "GetBucketReplication", "DeleteBucketReplication",
2741
+ "CreateMultipartUpload", "UploadPart", "CompleteMultipartUpload",
2742
+ "AbortMultipartUpload", "ListMultipartUploads",
2743
+ "GetBucketLocation",
2744
+ "GetBucketTagging", "PutBucketTagging", "DeleteBucketTagging",
2745
+ ]
2746
+
2747
+
2748
+ def get_state() -> dict:
2749
+ return {
2750
+ "buckets": {"count": len(_buckets), "names": list(_buckets.keys())},
2751
+ }
2752
+
2753
+
2754
  def reset():
2755
  """Wipe all in-memory state (used by /_ministack/reset)."""
2756
  global _buckets, _bucket_policies, _bucket_notifications, _bucket_tags
aws_infra/aws_infra/services/secretsmanager.py CHANGED
@@ -708,6 +708,23 @@ def _validate_resource_policy(data):
708
  })
709
 
710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  def reset():
712
  _secrets.clear()
713
  _resource_policies.clear()
 
708
  })
709
 
710
 
711
+ SUPPORTED_ACTIONS = [
712
+ "CreateSecret", "GetSecretValue", "ListSecrets", "DeleteSecret",
713
+ "RestoreSecret", "UpdateSecret", "DescribeSecret", "PutSecretValue",
714
+ "TagResource", "UntagResource", "ListSecretVersionIds",
715
+ "RotateSecret", "GetRandomPassword", "ReplicateSecretToRegions",
716
+ "PutResourcePolicy", "GetResourcePolicy", "DeleteResourcePolicy",
717
+ "ValidateResourcePolicy",
718
+ ]
719
+
720
+
721
+ def get_state() -> dict:
722
+ return {
723
+ "secrets": {"count": len(_secrets), "names": list(_secrets.keys())},
724
+ "resource_policies": {"count": len(_resource_policies), "arns": list(_resource_policies.keys())},
725
+ }
726
+
727
+
728
  def reset():
729
  _secrets.clear()
730
  _resource_policies.clear()
aws_infra/aws_infra/services/ses.py CHANGED
@@ -1002,6 +1002,28 @@ def _json_error(code, message, status):
1002
  return _json_response(status, {"__type": code, "message": message})
1003
 
1004
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1005
  def reset():
1006
  _identities.clear()
1007
  _sent_emails.clear()
 
1002
  return _json_response(status, {"__type": code, "message": message})
1003
 
1004
 
1005
+ SUPPORTED_ACTIONS = [
1006
+ "SendEmail", "SendRawEmail", "SendTemplatedEmail", "SendBulkTemplatedEmail",
1007
+ "VerifyEmailIdentity", "VerifyEmailAddress", "VerifyDomainIdentity",
1008
+ "VerifyDomainDkim", "ListIdentities", "GetIdentityVerificationAttributes",
1009
+ "DeleteIdentity", "GetSendQuota", "GetSendStatistics",
1010
+ "ListVerifiedEmailAddresses", "CreateConfigurationSet",
1011
+ "DeleteConfigurationSet", "DescribeConfigurationSet", "ListConfigurationSets",
1012
+ "CreateTemplate", "GetTemplate", "DeleteTemplate", "ListTemplates",
1013
+ "UpdateTemplate", "GetIdentityDkimAttributes", "SetIdentityNotificationTopic",
1014
+ "SetIdentityFeedbackForwardingEnabled",
1015
+ ]
1016
+
1017
+
1018
+ def get_state() -> dict:
1019
+ return {
1020
+ "identities": {"count": len(_identities), "names": list(_identities.keys())},
1021
+ "templates": {"count": len(_templates), "names": list(_templates.keys())},
1022
+ "configuration_sets": {"count": len(_configuration_sets), "names": list(_configuration_sets.keys())},
1023
+ "sent_emails": {"count": len(_sent_emails)},
1024
+ }
1025
+
1026
+
1027
  def reset():
1028
  _identities.clear()
1029
  _sent_emails.clear()
aws_infra/aws_infra/services/ses_v2.py CHANGED
@@ -156,6 +156,23 @@ async def handle_request(method, path, headers, body, query_params):
156
  return _json_err("NotFoundException", f"Unknown SES v2 path: {method} {path}", 404)
157
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def reset():
160
  _identities.clear()
161
  _config_sets.clear()
 
156
  return _json_err("NotFoundException", f"Unknown SES v2 path: {method} {path}", 404)
157
 
158
 
159
+ SUPPORTED_ACTIONS = [
160
+ "SendEmail", "CreateEmailIdentity", "GetEmailIdentity", "DeleteEmailIdentity",
161
+ "ListEmailIdentities", "CreateConfigurationSet", "GetConfigurationSet",
162
+ "DeleteConfigurationSet", "ListConfigurationSets", "GetAccount",
163
+ "ListSuppressedDestinations", "PutAccountSuppressionAttributes",
164
+ "TagResource", "UntagResource", "ListTagsForResource",
165
+ ]
166
+
167
+
168
+ def get_state() -> dict:
169
+ return {
170
+ "identities": {"count": len(_identities), "names": list(_identities.keys())},
171
+ "configuration_sets": {"count": len(_config_sets), "names": list(_config_sets.keys())},
172
+ "tags": {"count": len(_ses_tags), "resources": list(_ses_tags.keys())},
173
+ }
174
+
175
+
176
  def reset():
177
  _identities.clear()
178
  _config_sets.clear()
aws_infra/aws_infra/services/sns.py CHANGED
@@ -950,6 +950,27 @@ def _build_envelope(topic_arn: str, msg_id: str, message: str, subject: str,
950
  return json.dumps({k: v for k, v in envelope.items() if v is not None})
951
 
952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
953
  def reset():
954
  _topics.clear()
955
  _sub_arn_to_topic.clear()
 
950
  return json.dumps({k: v for k, v in envelope.items() if v is not None})
951
 
952
 
953
+ SUPPORTED_ACTIONS = [
954
+ "CreateTopic", "DeleteTopic", "ListTopics",
955
+ "GetTopicAttributes", "SetTopicAttributes",
956
+ "Subscribe", "Unsubscribe", "ConfirmSubscription",
957
+ "ListSubscriptions", "ListSubscriptionsByTopic",
958
+ "GetSubscriptionAttributes", "SetSubscriptionAttributes",
959
+ "Publish", "PublishBatch",
960
+ "ListTagsForResource", "TagResource", "UntagResource",
961
+ "CreatePlatformApplication", "CreatePlatformEndpoint",
962
+ ]
963
+
964
+
965
+ def get_state() -> dict:
966
+ return {
967
+ "topics": {"count": len(_topics), "names": list(_topics.keys())},
968
+ "platform_applications": {"count": len(_platform_applications), "names": list(_platform_applications.keys())},
969
+ "platform_endpoints": {"count": len(_platform_endpoints), "names": list(_platform_endpoints.keys())},
970
+ "subscriptions": {"count": len(_sub_arn_to_topic), "sub_arn_to_topic": dict(_sub_arn_to_topic.items())},
971
+ }
972
+
973
+
974
  def reset():
975
  _topics.clear()
976
  _sub_arn_to_topic.clear()
aws_infra/aws_infra/services/sqs.py CHANGED
@@ -1231,6 +1231,23 @@ def _url_from_path(path: str) -> str:
1231
  return ""
1232
 
1233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1234
  def reset():
1235
  _queues.clear()
1236
  _queue_name_to_url.clear()
 
1231
  return ""
1232
 
1233
 
1234
+ SUPPORTED_ACTIONS = [
1235
+ "CreateQueue", "DeleteQueue", "ListQueues", "GetQueueUrl",
1236
+ "GetQueueAttributes", "SetQueueAttributes", "PurgeQueue",
1237
+ "SendMessage", "ReceiveMessage", "DeleteMessage",
1238
+ "ChangeMessageVisibility", "ChangeMessageVisibilityBatch",
1239
+ "SendMessageBatch", "DeleteMessageBatch",
1240
+ "ListQueueTags", "TagQueue", "UntagQueue",
1241
+ ]
1242
+
1243
+
1244
+ def get_state() -> dict:
1245
+ return {
1246
+ "queues": {"count": len(_queues), "names": list(_queues.keys())},
1247
+ "queue_name_to_url": dict(_queue_name_to_url),
1248
+ }
1249
+
1250
+
1251
  def reset():
1252
  _queues.clear()
1253
  _queue_name_to_url.clear()
aws_infra/aws_infra/services/ssm.py CHANGED
@@ -488,6 +488,21 @@ def _param_out(param, with_decryption=False):
488
  return out
489
 
490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  def reset():
492
  _parameters.clear()
493
  _parameter_history.clear()
 
488
  return out
489
 
490
 
491
+ SUPPORTED_ACTIONS = [
492
+ "PutParameter", "GetParameter", "GetParameters", "GetParametersByPath",
493
+ "DeleteParameter", "DeleteParameters", "DescribeParameters",
494
+ "GetParameterHistory", "LabelParameterVersion", "AddTagsToResource",
495
+ "RemoveTagsFromResource", "ListTagsForResource",
496
+ ]
497
+
498
+
499
+ def get_state() -> dict:
500
+ return {
501
+ "parameters": {"count": len(_parameters), "names": list(_parameters.keys())},
502
+ "tags": {"count": len(_tags), "arns": list(_tags.keys())},
503
+ }
504
+
505
+
506
  def reset():
507
  _parameters.clear()
508
  _parameter_history.clear()
aws_infra/aws_infra/services/stepfunctions.py CHANGED
@@ -1786,6 +1786,25 @@ _SERVICE_DISPATCH = {
1786
  }
1787
 
1788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1789
  def reset():
1790
  _state_machines.clear()
1791
  _executions.clear()
 
1786
  }
1787
 
1788
 
1789
+ SUPPORTED_ACTIONS = [
1790
+ "CreateStateMachine", "DeleteStateMachine", "DescribeStateMachine", "UpdateStateMachine",
1791
+ "ListStateMachines", "StartExecution", "StartSyncExecution", "StopExecution",
1792
+ "DescribeExecution", "DescribeStateMachineForExecution", "ListExecutions",
1793
+ "GetExecutionHistory", "SendTaskSuccess", "SendTaskFailure", "SendTaskHeartbeat",
1794
+ "CreateActivity", "DeleteActivity", "DescribeActivity", "ListActivities",
1795
+ "GetActivityTask", "TagResource", "UntagResource", "ListTagsForResource",
1796
+ ]
1797
+
1798
+
1799
+ def get_state() -> dict:
1800
+ return {
1801
+ "state_machines": {"count": len(_state_machines), "names": list(_state_machines.keys())},
1802
+ "executions": {"count": len(_executions), "arns": list(_executions.keys())},
1803
+ "activities": {"count": len(_activities), "names": list(_activities.keys())},
1804
+ "tags": {"count": len(_tags), "resources": list(_tags.keys())},
1805
+ }
1806
+
1807
+
1808
  def reset():
1809
  _state_machines.clear()
1810
  _executions.clear()
aws_infra/aws_infra/services/waf.py CHANGED
@@ -358,6 +358,27 @@ def _describe_managed_rule_group(data):
358
  })
359
 
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  def reset():
362
  _web_acls.clear()
363
  _ip_sets.clear()
 
358
  })
359
 
360
 
361
+ SUPPORTED_ACTIONS = [
362
+ "CreateWebACL", "GetWebACL", "UpdateWebACL", "DeleteWebACL", "ListWebACLs",
363
+ "AssociateWebACL", "DisassociateWebACL", "GetWebACLForResource",
364
+ "ListResourcesForWebACL", "CreateIPSet", "GetIPSet", "UpdateIPSet",
365
+ "DeleteIPSet", "ListIPSets", "CreateRuleGroup", "GetRuleGroup",
366
+ "UpdateRuleGroup", "DeleteRuleGroup", "ListRuleGroups",
367
+ "TagResource", "UntagResource", "ListTagsForResource",
368
+ "CheckCapacity", "DescribeManagedRuleGroup",
369
+ ]
370
+
371
+
372
+ def get_state() -> dict:
373
+ return {
374
+ "web_acls": {"count": len(_web_acls), "ids": list(_web_acls.keys())},
375
+ "ip_sets": {"count": len(_ip_sets), "ids": list(_ip_sets.keys())},
376
+ "rule_groups": {"count": len(_rule_groups), "ids": list(_rule_groups.keys())},
377
+ "associations": {"count": len(_associations), "resources": list(_associations.keys())},
378
+ "waf_tags": {"count": len(_waf_tags), "resources": list(_waf_tags.keys())},
379
+ }
380
+
381
+
382
  def reset():
383
  _web_acls.clear()
384
  _ip_sets.clear()
client.py CHANGED
@@ -10,12 +10,11 @@ from typing import Dict
10
 
11
  from openenv.core import EnvClient
12
  from openenv.core.client_types import StepResult
13
- from openenv.core.env_server.types import State
14
 
15
- from models import AwsRlAction, AwsRlObservation, EpisodeID, StepCount
16
 
17
 
18
- class AwsRlEnv(EnvClient[AwsRlAction, AwsRlObservation, State]):
19
  """
20
  Client for the Aws Rl Env Environment.
21
 
@@ -65,9 +64,19 @@ class AwsRlEnv(EnvClient[AwsRlAction, AwsRlObservation, State]):
65
  done=payload.get("done", False),
66
  )
67
 
68
- def _parse_state(self, payload: Dict) -> State:
69
- """Parse server response into State object."""
70
- return State(
 
 
 
 
 
71
  episode_id=payload.get("episode_id"),
72
  step_count=payload.get("step_count", 0),
 
 
 
 
 
73
  )
 
10
 
11
  from openenv.core import EnvClient
12
  from openenv.core.client_types import StepResult
 
13
 
14
+ from models import AwsRlAction, AwsRlObservation, EpisodeID, StepCount, AwsRlState
15
 
16
 
17
+ class AwsRlEnv(EnvClient[AwsRlAction, AwsRlObservation, AwsRlState]):
18
  """
19
  Client for the Aws Rl Env Environment.
20
 
 
64
  done=payload.get("done", False),
65
  )
66
 
67
+ def _parse_state(self, payload: Dict) -> AwsRlState:
68
+ """Parse server response into AwsRlState object."""
69
+ from models import TrackerState, Task
70
+
71
+ tracker_data = payload.get("tracker", {})
72
+ task_data = payload.get("current_task")
73
+
74
+ return AwsRlState(
75
  episode_id=payload.get("episode_id"),
76
  step_count=payload.get("step_count", 0),
77
+ current_task=Task(**task_data) if task_data else None,
78
+ tracker=TrackerState(**tracker_data) if tracker_data else TrackerState(),
79
+ infra_state=payload.get("infra_state", {}),
80
+ chaos_occurred=payload.get("chaos_occurred", False),
81
+ current_tier=payload.get("current_tier", "warmup"),
82
  )
inference-complete.py CHANGED
@@ -89,6 +89,8 @@ SYSTEM_PROMPT = textwrap.dedent(
89
  - Only send AWS CLI commands (e.g. 'aws s3 ls', 'aws dynamodb create-table ...')
90
  - One command per turn — no pipes, no shell syntax, no chaining
91
  - Reply with ONLY the command, nothing else — no explanations, no quotes
 
 
92
  """
93
  ).strip()
94
 
@@ -165,10 +167,7 @@ def get_model_command(
165
  # ---------------------------------------------------------------------------
166
 
167
 
168
- async def run_episode(
169
- env: AwsRlEnv,
170
- llm_client: OpenAI
171
- ) -> Optional[dict]:
172
  """Run a single episode: reset -> step loop -> return results."""
173
  result = await env.reset()
174
  obs = result.observation
@@ -182,9 +181,9 @@ async def run_episode(
182
  task_desc = task.description
183
  task_id = int(task.task_id)
184
 
185
- print(f"\n{'='*60}")
186
  print(f"Episode {episode_num} -- Task {task_id}: {task_desc} (tier: {tier})")
187
- print(f"\n{'='*60}")
188
 
189
  history: List[str] = []
190
  last_output = obs.command_output
@@ -206,7 +205,6 @@ async def run_episode(
206
  last_reward,
207
  history,
208
  )
209
-
210
 
211
  result = await env.step(AwsRlAction(command=command))
212
  obs = result.observation
@@ -214,21 +212,22 @@ async def run_episode(
214
  reward = result.reward or 0.0
215
  success = obs.command_success
216
  task_achieved = obs.task_achieved
217
- done = result.done
218
 
219
  rewards.append(reward)
220
 
221
  print()
222
- print(f"\n{'-'*60}")
223
  print(
224
- f" [Step {step}] cmd=\"{command}\" command_output={obs.command_output!r} "
225
  f"reward={reward:.2f} command_success={success} achieved={task_achieved}"
226
  )
227
- print(f"\n{'-'*60}")
228
  print()
229
 
230
  status = "OK" if success else "FAIL"
231
- history.append(f"Step {step} [{status}]: {command} [command_output]={obs.command_output!r} [error]={obs.error!r} -> reward={reward:.2f}")
 
 
232
  last_output = obs.command_output
233
  last_error = obs.error
234
  last_reward = reward
@@ -299,9 +298,9 @@ def print_summary(tier_results: dict[str, list]) -> None:
299
  total_passed = 0
300
  total_tasks = 0
301
 
302
- print(f"\n{'='*60}")
303
  print("FINAL RESULTS")
304
- print(f"{'='*60}")
305
 
306
  for tier in ALL_TIERS:
307
  results = tier_results.get(tier, [])
 
89
  - Only send AWS CLI commands (e.g. 'aws s3 ls', 'aws dynamodb create-table ...')
90
  - One command per turn — no pipes, no shell syntax, no chaining
91
  - Reply with ONLY the command, nothing else — no explanations, no quotes
92
+ - If unsure, use 'aws help' to get unstuck, but try to be specific to the service if possible (e.g. 'aws s3 help')
93
+ - When ever you need a hint, use 'aws help --task-hint' to get a task-specific hint (you can use this multiple times for more hints, but hints reduce your reward)
94
  """
95
  ).strip()
96
 
 
167
  # ---------------------------------------------------------------------------
168
 
169
 
170
+ async def run_episode(env: AwsRlEnv, llm_client: OpenAI) -> Optional[dict]:
 
 
 
171
  """Run a single episode: reset -> step loop -> return results."""
172
  result = await env.reset()
173
  obs = result.observation
 
181
  task_desc = task.description
182
  task_id = int(task.task_id)
183
 
184
+ print(f"\n{'=' * 60}")
185
  print(f"Episode {episode_num} -- Task {task_id}: {task_desc} (tier: {tier})")
186
+ print(f"\n{'=' * 60}")
187
 
188
  history: List[str] = []
189
  last_output = obs.command_output
 
205
  last_reward,
206
  history,
207
  )
 
208
 
209
  result = await env.step(AwsRlAction(command=command))
210
  obs = result.observation
 
212
  reward = result.reward or 0.0
213
  success = obs.command_success
214
  task_achieved = obs.task_achieved
 
215
 
216
  rewards.append(reward)
217
 
218
  print()
219
+ print(f"\n{'-' * 60}")
220
  print(
221
+ f' [Step {step}] cmd="{command}" command_output={obs.command_output!r} '
222
  f"reward={reward:.2f} command_success={success} achieved={task_achieved}"
223
  )
224
+ print(f"\n{'-' * 60}")
225
  print()
226
 
227
  status = "OK" if success else "FAIL"
228
+ history.append(
229
+ f"Step {step} [{status}]: {command} [command_output]={obs.command_output!r} [error]={obs.error!r} -> reward={reward:.2f}"
230
+ )
231
  last_output = obs.command_output
232
  last_error = obs.error
233
  last_reward = reward
 
298
  total_passed = 0
299
  total_tasks = 0
300
 
301
+ print(f"\n{'=' * 60}")
302
  print("FINAL RESULTS")
303
+ print(f"{'=' * 60}")
304
 
305
  for tier in ALL_TIERS:
306
  results = tier_results.get(tier, [])
inference.py CHANGED
@@ -54,6 +54,10 @@ load_dotenv() # Load variables from .env file if present
54
 
55
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
56
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
 
 
 
 
57
  HF_TOKEN = os.getenv("HF_TOKEN")
58
  API_KEY = os.getenv("API_KEY") # Optional if using HF_TOKEN
59
 
@@ -77,6 +81,8 @@ SYSTEM_PROMPT = textwrap.dedent(
77
  - Only send AWS CLI commands (e.g. 'aws s3 ls', 'aws dynamodb create-table ...')
78
  - One command per turn — no pipes, no shell syntax, no chaining
79
  - Reply with ONLY the command, nothing else — no explanations, no quotes
 
 
80
  """
81
  ).strip()
82
 
 
54
 
55
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
56
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
57
+ if not API_BASE_URL:
58
+ API_BASE_URL = "https://router.huggingface.co/v1"
59
+ if not MODEL_NAME:
60
+ MODEL_NAME = "Qwen/Qwen2.5-72B-Instruct"
61
  HF_TOKEN = os.getenv("HF_TOKEN")
62
  API_KEY = os.getenv("API_KEY") # Optional if using HF_TOKEN
63
 
 
81
  - Only send AWS CLI commands (e.g. 'aws s3 ls', 'aws dynamodb create-table ...')
82
  - One command per turn — no pipes, no shell syntax, no chaining
83
  - Reply with ONLY the command, nothing else — no explanations, no quotes
84
+ - If unsure, use 'aws help' to get unstuck, but try to be specific to the service if possible (e.g. 'aws s3 help')
85
+ - When ever you need a hint, use 'aws help --task-hint' to get a task-specific hint (you can use this multiple times for more hints, but hints reduce your reward)
86
  """
87
  ).strip()
88
 
models.py CHANGED
@@ -3,9 +3,9 @@ Data models for the Aws Rl Env Environment.
3
  """
4
 
5
  from enum import Enum
6
- from typing import NewType, Union
7
 
8
- from openenv.core.env_server.types import Action, Observation
9
  from pydantic import BaseModel, Field
10
 
11
  # ---------------------------------------------------------------------------
@@ -18,6 +18,7 @@ StepCount = NewType("StepCount", int)
18
 
19
 
20
  class AwsService(str, Enum):
 
21
  S3 = "s3"
22
  EC2 = "ec2"
23
  DYNAMODB = "dynamodb"
@@ -26,6 +27,31 @@ class AwsService(str, Enum):
26
  SNS = "sns"
27
  IAM = "iam"
28
  APIGATEWAY = "apigateway"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  # ---------------------------------------------------------------------------
@@ -62,6 +88,12 @@ class TierConfig(BaseModel):
62
  le=1.0,
63
  description="Success rate for early promotion after 3 episodes",
64
  )
 
 
 
 
 
 
65
 
66
 
67
  class SpacedRepState(BaseModel):
@@ -169,6 +201,82 @@ class Task(BaseModel):
169
  default_factory=list,
170
  description="Commands to run during reset to set up initial state (e.g. for SRE tasks)",
171
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
 
174
  # ---------------------------------------------------------------------------
@@ -199,13 +307,21 @@ class AwsRlObservation(Observation):
199
  default="", description="Stdout from the executed AWS CLI command"
200
  )
201
  error: str = Field(default="", description="Stderr if the command failed")
202
- resources: dict[AwsService, Union[dict, list, str]] = Field(
203
- default_factory=dict,
204
- description="Current resource state from MiniStack, keyed by service name",
205
- )
206
- task: Task | None = Field(
207
- default=None, description="The task the agent is trying to accomplish"
208
  )
209
  task_achieved: bool = Field(
210
  default=False, description="Whether the task has been achieved"
211
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  """
4
 
5
  from enum import Enum
6
+ from typing import NewType
7
 
8
+ from openenv.core.env_server.types import Action, Observation, State
9
  from pydantic import BaseModel, Field
10
 
11
  # ---------------------------------------------------------------------------
 
18
 
19
 
20
  class AwsService(str, Enum):
21
+ # Core services
22
  S3 = "s3"
23
  EC2 = "ec2"
24
  DYNAMODB = "dynamodb"
 
27
  SNS = "sns"
28
  IAM = "iam"
29
  APIGATEWAY = "apigateway"
30
+ SECRETSMANAGER = "secretsmanager"
31
+ # Compute & containers
32
+ ECS = "ecs"
33
+ # Data & analytics
34
+ RDS = "rds"
35
+ ELASTICACHE = "elasticache"
36
+ ATHENA = "athena"
37
+ GLUE = "glue"
38
+ FIREHOSE = "firehose"
39
+ EMR = "emr"
40
+ # Networking & routing
41
+ APIGATEWAYV2 = "apigatewayv2"
42
+ ROUTE53 = "route53"
43
+ ELBV2 = "elbv2"
44
+ # Storage
45
+ EBS = "ebs"
46
+ EFS = "efs"
47
+ # Identity & config
48
+ COGNITO = "cognito-idp"
49
+ SSM = "ssm"
50
+ EVENTBRIDGE = "events"
51
+ # Monitoring
52
+ CLOUDWATCH = "cloudwatch"
53
+ # Infrastructure as code
54
+ CLOUDFORMATION = "cloudformation"
55
 
56
 
57
  # ---------------------------------------------------------------------------
 
88
  le=1.0,
89
  description="Success rate for early promotion after 3 episodes",
90
  )
91
+ chaos_probability: float = Field(
92
+ default=0.0,
93
+ ge=0.0,
94
+ le=1.0,
95
+ description="Probability of chaos injection per step",
96
+ )
97
 
98
 
99
  class SpacedRepState(BaseModel):
 
201
  default_factory=list,
202
  description="Commands to run during reset to set up initial state (e.g. for SRE tasks)",
203
  )
204
+ desired_state_spec: str | None = Field(
205
+ default=None,
206
+ description="Natural-language specification of the desired end state (shown to agent for drift tasks)",
207
+ )
208
+ possible_drifts: list[SetupCommand] = Field(
209
+ default_factory=list,
210
+ description="Pool of mutations the DriftEngine may randomly apply after setup",
211
+ )
212
+
213
+
214
+ class TaskInfo(BaseModel):
215
+ """Agent-visible subset of Task — masks success_criteria, setup_commands, and possible_drifts."""
216
+
217
+ task_id: TaskID = Field(..., ge=0, description="Unique task identifier")
218
+ difficulty: TaskDifficulty = Field(
219
+ default=TaskDifficulty.WARMUP, description="Task difficulty level"
220
+ )
221
+ description: str = Field(..., description="Human-readable task description")
222
+ desired_state_spec: str | None = Field(
223
+ default=None,
224
+ description="Natural-language specification of the desired end state (shown to agent for drift tasks)",
225
+ )
226
+
227
+ @classmethod
228
+ def from_task(cls, task: Task) -> "TaskInfo":
229
+ """Create a masked TaskInfo from a full Task."""
230
+ return cls(
231
+ task_id=task.task_id,
232
+ difficulty=task.difficulty,
233
+ description=task.description,
234
+ desired_state_spec=task.desired_state_spec,
235
+ )
236
+
237
+
238
+ # ---------------------------------------------------------------------------
239
+ # Environment State
240
+ # ---------------------------------------------------------------------------
241
+
242
+
243
+ class TrackerState(BaseModel):
244
+ """Serializable snapshot of the EpisodeTracker."""
245
+
246
+ step_count: int = Field(default=0, ge=0, description="Steps taken this episode")
247
+ hints_used: int = Field(default=0, ge=0, description="Hints requested this episode")
248
+ progress: float = Field(
249
+ default=0.0, ge=0.0, le=1.0, description="Current partial progress"
250
+ )
251
+ commands_executed: list[str] = Field(
252
+ default_factory=list, description="Commands executed this episode"
253
+ )
254
+ credited_operations: list[str] = Field(
255
+ default_factory=list,
256
+ description="(operation, resource) pairs that earned credit",
257
+ )
258
+
259
+
260
+ class AwsRlState(State):
261
+ """Full environment state including task, tracker, and infrastructure."""
262
+
263
+ current_task: Task | None = Field(
264
+ default=None, description="The task assigned for this episode"
265
+ )
266
+ tracker: TrackerState = Field(
267
+ default_factory=TrackerState,
268
+ description="Episode tracker snapshot",
269
+ )
270
+ infra_state: dict = Field(
271
+ default_factory=dict,
272
+ description="AWS infrastructure state keyed by service name",
273
+ )
274
+ chaos_occurred: bool = Field(
275
+ default=False, description="Whether chaos was injected this episode"
276
+ )
277
+ current_tier: str = Field(
278
+ default="warmup", description="Agent's current difficulty tier"
279
+ )
280
 
281
 
282
  # ---------------------------------------------------------------------------
 
307
  default="", description="Stdout from the executed AWS CLI command"
308
  )
309
  error: str = Field(default="", description="Stderr if the command failed")
310
+ task: TaskInfo | None = Field(
311
+ default=None, description="The task the agent is trying to accomplish (masked)"
 
 
 
 
312
  )
313
  task_achieved: bool = Field(
314
  default=False, description="Whether the task has been achieved"
315
  )
316
+ partial_progress: float = Field(
317
+ default=0.0,
318
+ ge=0.0,
319
+ le=1.0,
320
+ description="Current task progress (0.0 to 1.0)",
321
+ )
322
+ hints_used: int = Field(
323
+ default=0, ge=0, description="Number of hints requested this episode"
324
+ )
325
+ hint_text: str = Field(
326
+ default="", description="Text of the most recently requested hint"
327
+ )
pyproject.toml CHANGED
@@ -49,6 +49,11 @@ include-package-data = true
49
  packages = ["aws_rl_env", "aws_rl_env.server"]
50
  package-dir = { "aws_rl_env" = ".", "aws_rl_env.server" = "server" }
51
 
 
 
 
 
 
52
  [tool.ruff]
53
  exclude = ["aws_infra/"]
54
 
 
49
  packages = ["aws_rl_env", "aws_rl_env.server"]
50
  package-dir = { "aws_rl_env" = ".", "aws_rl_env.server" = "server" }
51
 
52
+ [tool.pytest.ini_options]
53
+ addopts = "--import-mode=importlib"
54
+ testpaths = ["tests"]
55
+ pythonpath = ["."]
56
+
57
  [tool.ruff]
58
  exclude = ["aws_infra/"]
59
 
server/app.py CHANGED
@@ -83,6 +83,29 @@ async def web_reset():
83
  }
84
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  @app.post("/web/step", include_in_schema=False)
87
  async def web_step(request: WebStepRequest = Body(...)):
88
  action = AwsRlAction(**request.action)
 
83
  }
84
 
85
 
86
+ @app.get("/web/solution", include_in_schema=False)
87
+ async def web_solution():
88
+ """Return the next solution command for the current task step."""
89
+ if not _env._current_task:
90
+ return {"command": None, "error": "No active task. Start a new episode first."}
91
+
92
+ from server.services.task_solutions import get_next_solution
93
+
94
+ result = get_next_solution(
95
+ task_id=_env._current_task.task_id,
96
+ backend=_env._backend,
97
+ tracker=_env._tracker,
98
+ )
99
+ result["task_id"] = _env._current_task.task_id
100
+ return result
101
+
102
+
103
+ @app.get("/web/state", include_in_schema=False)
104
+ async def web_state():
105
+ """Return the full AwsRlState for the web UI."""
106
+ return _env.state.model_dump()
107
+
108
+
109
  @app.post("/web/step", include_in_schema=False)
110
  async def web_step(request: WebStepRequest = Body(...)):
111
  action = AwsRlAction(**request.action)
server/aws_rl_env_environment.py CHANGED
@@ -18,31 +18,59 @@ from typing import Any, Optional
18
  from uuid import uuid4
19
 
20
  from openenv.core.env_server.interfaces import Environment
21
- from openenv.core.env_server.types import State
22
 
23
- from models import AwsRlAction, AwsRlObservation, EpisodeID, StepCount, Task
 
 
 
 
 
 
 
 
 
24
  from server.services.aws_backend import AwsBackend
 
25
  from server.services.curriculum import Curriculum
26
  from server.services.environment_designer import EnvironmentDesigner
27
  from server.services.episode_tracker import EpisodeTracker
 
28
  from server.services.task_grader import TaskGrader
29
 
30
  logger = logging.getLogger(__name__)
31
 
32
 
33
- class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]):
34
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
35
 
36
  def __init__(self) -> None:
37
  print("Initializing AWS RL Environment...")
38
- self._state = State(episode_id=str(uuid4()), step_count=0)
39
  self._backend = AwsBackend()
40
  self._curriculum = Curriculum()
41
  self._grader = TaskGrader(self._backend)
42
  self._designer = EnvironmentDesigner(self._backend)
43
  self._tracker = EpisodeTracker()
 
 
44
  self._current_task: Task | None = None
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def reset(
47
  self,
48
  seed: Optional[int] = None,
@@ -50,33 +78,29 @@ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]):
50
  **kwargs: Any,
51
  ) -> AwsRlObservation:
52
  self._backend.reset_environment()
53
- self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
54
  self._tracker.reset()
 
55
  self._current_task = self._curriculum.next_task()
56
 
57
  self._designer.apply(self._current_task)
 
58
 
59
  return AwsRlObservation(
60
  episode_id=EpisodeID(self._state.episode_id or ""),
61
  step_count=StepCount(self._state.step_count),
62
  command_success=True,
63
  command_output="Environment reset. Infra state wiped.",
64
- task=self._current_task,
65
  done=False,
66
  reward=0.0,
67
  )
68
 
69
- def step(
70
- self,
71
- action: AwsRlAction,
72
- timeout_s: Optional[float] = None,
73
- **kwargs: Any,
74
- ) -> AwsRlObservation:
75
- assert self._current_task is not None, "Call reset() before step()"
76
- self._state.step_count += 1
77
 
78
- # Anti-hack: only allow AWS CLI commands
79
- command = action.command.strip()
80
  if not command.startswith("aws "):
81
  return AwsRlObservation(
82
  episode_id=EpisodeID(self._state.episode_id or ""),
@@ -84,22 +108,86 @@ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]):
84
  command_success=False,
85
  command_output="",
86
  error="Only AWS CLI commands (starting with 'aws') are allowed.",
87
- task=self._current_task,
 
 
88
  task_achieved=False,
89
  done=False,
90
  reward=0.0,
91
  )
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  success, stdout, stderr = self._backend.execute_command(command)
94
 
95
  # Record in tracker
96
  latest_step = self._tracker.record_step(command, success, stdout, stderr)
97
 
98
- # Grade the task
99
- task_achieved = False
100
-
101
  grade_result = self._grader.grade(
102
- self._current_task, self._tracker, latest_step
 
 
 
 
103
  )
104
  task_achieved = grade_result.task_achieved
105
  reward = grade_result.reward
@@ -109,18 +197,29 @@ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]):
109
  self._current_task, achieved=True, reward=reward
110
  )
111
 
 
 
 
 
 
 
 
 
 
112
  return AwsRlObservation(
113
  episode_id=EpisodeID(self._state.episode_id or ""),
114
  step_count=StepCount(self._state.step_count),
115
  command_success=success,
116
  command_output=stdout,
117
  error=stderr,
118
- task=self._current_task,
119
  task_achieved=task_achieved,
 
120
  done=task_achieved,
121
  reward=reward,
 
122
  )
123
 
124
  @property
125
- def state(self) -> State:
126
  return self._state
 
18
  from uuid import uuid4
19
 
20
  from openenv.core.env_server.interfaces import Environment
 
21
 
22
+ from models import (
23
+ AwsRlAction,
24
+ AwsRlObservation,
25
+ AwsRlState,
26
+ EpisodeID,
27
+ StepCount,
28
+ Task,
29
+ TaskInfo,
30
+ TrackerState,
31
+ )
32
  from server.services.aws_backend import AwsBackend
33
+ from server.services.chaos_engine import ChaosEngine
34
  from server.services.curriculum import Curriculum
35
  from server.services.environment_designer import EnvironmentDesigner
36
  from server.services.episode_tracker import EpisodeTracker
37
+ from server.services.hint_provider import HintProvider, MAX_HINT_LEVEL
38
  from server.services.task_grader import TaskGrader
39
 
40
  logger = logging.getLogger(__name__)
41
 
42
 
43
+ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, AwsRlState]):
44
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
45
 
46
  def __init__(self) -> None:
47
  print("Initializing AWS RL Environment...")
48
+ self._state = AwsRlState(episode_id=str(uuid4()), step_count=0)
49
  self._backend = AwsBackend()
50
  self._curriculum = Curriculum()
51
  self._grader = TaskGrader(self._backend)
52
  self._designer = EnvironmentDesigner(self._backend)
53
  self._tracker = EpisodeTracker()
54
+ self._chaos_engine = ChaosEngine(self._backend)
55
+ self._hint_provider = HintProvider()
56
  self._current_task: Task | None = None
57
 
58
+ def _sync_state(self) -> None:
59
+ """Sync internal state to the AwsRlState object."""
60
+ self._state.current_task = self._current_task
61
+ self._state.tracker = TrackerState(
62
+ step_count=self._tracker.step_count,
63
+ hints_used=self._tracker.hints_used,
64
+ progress=self._tracker.previous_progress,
65
+ commands_executed=[s.command for s in self._tracker.command_history],
66
+ credited_operations=[
67
+ f"{op}:{res}" for op, res in self._tracker._credited_operations
68
+ ],
69
+ )
70
+ self._state.chaos_occurred = self._chaos_engine.chaos_occurred
71
+ self._state.current_tier = self._curriculum.current_difficulty.value
72
+ self._state.infra_state = self._backend.get_infra_state()
73
+
74
  def reset(
75
  self,
76
  seed: Optional[int] = None,
 
78
  **kwargs: Any,
79
  ) -> AwsRlObservation:
80
  self._backend.reset_environment()
81
+ self._state = AwsRlState(episode_id=episode_id or str(uuid4()), step_count=0)
82
  self._tracker.reset()
83
+ self._chaos_engine.reset()
84
  self._current_task = self._curriculum.next_task()
85
 
86
  self._designer.apply(self._current_task)
87
+ self._sync_state()
88
 
89
  return AwsRlObservation(
90
  episode_id=EpisodeID(self._state.episode_id or ""),
91
  step_count=StepCount(self._state.step_count),
92
  command_success=True,
93
  command_output="Environment reset. Infra state wiped.",
94
+ task=TaskInfo.from_task(self._current_task) if self._current_task else None,
95
  done=False,
96
  reward=0.0,
97
  )
98
 
99
+ def _intercept_command(self, command: str) -> AwsRlObservation | None:
100
+ """Handle anti-hack validation, hint requests, and help commands.
 
 
 
 
 
 
101
 
102
+ Returns an observation if the command was intercepted, None otherwise.
103
+ """
104
  if not command.startswith("aws "):
105
  return AwsRlObservation(
106
  episode_id=EpisodeID(self._state.episode_id or ""),
 
108
  command_success=False,
109
  command_output="",
110
  error="Only AWS CLI commands (starting with 'aws') are allowed.",
111
+ task=TaskInfo.from_task(self._current_task)
112
+ if self._current_task
113
+ else None,
114
  task_achieved=False,
115
  done=False,
116
  reward=0.0,
117
  )
118
 
119
+ if command == "aws help --task-hint":
120
+ hint_level = self._tracker.record_hint()
121
+ clamped_level = min(hint_level, MAX_HINT_LEVEL)
122
+ assert self._current_task is not None
123
+ hint_text = self._hint_provider.get_hint(self._current_task, clamped_level)
124
+ return AwsRlObservation(
125
+ episode_id=EpisodeID(self._state.episode_id or ""),
126
+ step_count=StepCount(self._state.step_count),
127
+ command_success=True,
128
+ command_output=hint_text,
129
+ task=TaskInfo.from_task(self._current_task)
130
+ if self._current_task
131
+ else None,
132
+ task_achieved=False,
133
+ done=False,
134
+ reward=0.0,
135
+ hints_used=self._tracker.hints_used,
136
+ hint_text=hint_text,
137
+ )
138
+
139
+ parts = command.split()
140
+ if len(parts) == 3 and parts[0] == "aws":
141
+ service_name = None
142
+ if parts[2] == "help":
143
+ service_name = parts[1]
144
+ elif parts[1] == "help":
145
+ service_name = parts[2]
146
+
147
+ if service_name is not None:
148
+ svc_success, help_text = self._backend.get_service_help(service_name)
149
+ return AwsRlObservation(
150
+ episode_id=EpisodeID(self._state.episode_id or ""),
151
+ step_count=StepCount(self._state.step_count),
152
+ command_success=svc_success,
153
+ command_output=help_text if svc_success else "",
154
+ error="" if svc_success else help_text,
155
+ task=TaskInfo.from_task(self._current_task)
156
+ if self._current_task
157
+ else None,
158
+ task_achieved=False,
159
+ done=False,
160
+ reward=0.0,
161
+ )
162
+
163
+ return None
164
+
165
+ def step(
166
+ self,
167
+ action: AwsRlAction,
168
+ timeout_s: Optional[float] = None,
169
+ **kwargs: Any,
170
+ ) -> AwsRlObservation:
171
+ assert self._current_task is not None, "Call reset() before step()"
172
+ self._state.step_count += 1
173
+
174
+ command = action.command.strip()
175
+ intercepted = self._intercept_command(command)
176
+ if intercepted is not None:
177
+ return intercepted
178
+
179
  success, stdout, stderr = self._backend.execute_command(command)
180
 
181
  # Record in tracker
182
  latest_step = self._tracker.record_step(command, success, stdout, stderr)
183
 
184
+ # Grade the task (pass cumulative chaos flag and hint count)
 
 
185
  grade_result = self._grader.grade(
186
+ self._current_task,
187
+ self._tracker,
188
+ latest_step,
189
+ chaos_occurred=self._chaos_engine.chaos_occurred,
190
+ hints_used=self._tracker.hints_used,
191
  )
192
  task_achieved = grade_result.task_achieved
193
  reward = grade_result.reward
 
197
  self._current_task, achieved=True, reward=reward
198
  )
199
 
200
+ # Inject chaos AFTER grading — disrupts state for future steps
201
+ self._chaos_engine.maybe_inject(
202
+ self._current_task,
203
+ self._tracker,
204
+ self._curriculum.chaos_probability,
205
+ )
206
+
207
+ self._sync_state()
208
+
209
  return AwsRlObservation(
210
  episode_id=EpisodeID(self._state.episode_id or ""),
211
  step_count=StepCount(self._state.step_count),
212
  command_success=success,
213
  command_output=stdout,
214
  error=stderr,
215
+ task=TaskInfo.from_task(self._current_task) if self._current_task else None,
216
  task_achieved=task_achieved,
217
+ partial_progress=self._tracker.previous_progress,
218
  done=task_achieved,
219
  reward=reward,
220
+ hints_used=self._tracker.hints_used,
221
  )
222
 
223
  @property
224
+ def state(self) -> AwsRlState:
225
  return self._state
server/services/aws_backend.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  import logging
4
  import os
 
5
  import subprocess
6
 
7
  import httpx
@@ -27,6 +28,61 @@ class AwsBackend:
27
  logger.warning("Failed to reset MiniStack state: %s", e)
28
  raise
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def execute_command(self, command: str) -> tuple[bool, str, str]:
31
  """Execute an AWS CLI command against MiniStack.
32
 
@@ -46,7 +102,7 @@ class AwsBackend:
46
 
47
  try:
48
  result = subprocess.run(
49
- command.split(),
50
  capture_output=True,
51
  text=True,
52
  timeout=30,
 
2
 
3
  import logging
4
  import os
5
+ import shlex
6
  import subprocess
7
 
8
  import httpx
 
28
  logger.warning("Failed to reset MiniStack state: %s", e)
29
  raise
30
 
31
+ def get_infra_state(self) -> dict:
32
+ """Fetch current infrastructure state from MiniStack via GET /_ministack/state."""
33
+ try:
34
+ resp = httpx.get(f"{self._aws_infra_url}/_ministack/state", timeout=10)
35
+ resp.raise_for_status()
36
+ return resp.json()
37
+ except httpx.HTTPError as e:
38
+ logger.warning("Failed to fetch MiniStack state: %s", e)
39
+ return {}
40
+
41
+ def get_service_help(self, service_name: str) -> tuple[bool, str]:
42
+ """Fetch service info from MiniStack via GET /_ministack/handlers/<service>.
43
+
44
+ Returns:
45
+ Tuple of (success, formatted_help_text)
46
+ """
47
+ try:
48
+ resp = httpx.get(
49
+ f"{self._aws_infra_url}/_ministack/handlers/{service_name}",
50
+ timeout=10,
51
+ )
52
+ resp.raise_for_status()
53
+ data = resp.json()
54
+ lines = [
55
+ f"SERVICE: {data['service']}",
56
+ "",
57
+ "DESCRIPTION",
58
+ data.get("description", "No description available."),
59
+ "",
60
+ f"AVAILABLE ACTIONS ({data['action_count']}):",
61
+ "",
62
+ ]
63
+ for action in data.get("supported_actions", []):
64
+ lines.append(f" - {action}")
65
+ state = data.get("state", {})
66
+ if state:
67
+ lines.append("")
68
+ lines.append("CURRENT STATE:")
69
+ for resource, info in state.items():
70
+ count = info.get("count", 0)
71
+ names = info.get("names", info.get("ids", info.get("arns", [])))
72
+ lines.append(f" {resource}: {count}")
73
+ if names:
74
+ for n in names[:20]:
75
+ lines.append(f" - {n}")
76
+ if len(names) > 20:
77
+ lines.append(f" ... and {len(names) - 20} more")
78
+ return True, "\n".join(lines)
79
+ except httpx.HTTPStatusError as e:
80
+ if e.response.status_code == 404:
81
+ return False, f"Unknown service: {service_name}"
82
+ return False, f"Failed to fetch service help: {e}"
83
+ except httpx.HTTPError as e:
84
+ return False, f"Failed to fetch service help: {e}"
85
+
86
  def execute_command(self, command: str) -> tuple[bool, str, str]:
87
  """Execute an AWS CLI command against MiniStack.
88
 
 
102
 
103
  try:
104
  result = subprocess.run(
105
+ shlex.split(command),
106
  capture_output=True,
107
  text=True,
108
  timeout=30,
server/services/chaos_engine.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Chaos Injection Engine.
3
+
4
+ Silently mutates AWS state mid-episode to test agent resilience and
5
+ situational awareness. Perturbations are scoped to services the current
6
+ task uses and are selected from a per-service catalog of destructive
7
+ AWS CLI commands.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ import random
13
+ import re
14
+
15
+ from models import AwsService, Task
16
+ from server.services.aws_backend import AwsBackend
17
+ from server.services.episode_tracker import EpisodeTracker
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Resource-name extraction patterns (from successful AWS CLI commands)
23
+ # ---------------------------------------------------------------------------
24
+
25
+ _RESOURCE_PATTERNS: dict[AwsService, list[re.Pattern[str]]] = {
26
+ AwsService.S3: [
27
+ re.compile(r"aws\s+s3\s+mb\s+s3://([^\s]+)"),
28
+ re.compile(r"aws\s+s3api\s+create-bucket\s+--bucket\s+([^\s]+)"),
29
+ ],
30
+ AwsService.DYNAMODB: [
31
+ re.compile(r"aws\s+dynamodb\s+create-table\s+.*--table-name\s+([^\s]+)"),
32
+ ],
33
+ AwsService.LAMBDA: [
34
+ re.compile(r"aws\s+lambda\s+create-function\s+.*--function-name\s+([^\s]+)"),
35
+ ],
36
+ AwsService.SQS: [
37
+ re.compile(r"aws\s+sqs\s+create-queue\s+.*--queue-name\s+([^\s]+)"),
38
+ ],
39
+ AwsService.IAM: [
40
+ re.compile(
41
+ r"aws\s+iam\s+attach-role-policy\s+.*--role-name\s+([^\s]+)"
42
+ r"\s+.*--policy-arn\s+([^\s]+)"
43
+ ),
44
+ re.compile(
45
+ r"aws\s+iam\s+attach-role-policy\s+.*--policy-arn\s+([^\s]+)"
46
+ r"\s+.*--role-name\s+([^\s]+)"
47
+ ),
48
+ ],
49
+ }
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Perturbation templates per service
53
+ # ---------------------------------------------------------------------------
54
+
55
+ _PERTURBATION_TEMPLATES: dict[AwsService, list[str]] = {
56
+ AwsService.S3: [
57
+ "aws s3 rb s3://{name} --force",
58
+ ],
59
+ AwsService.DYNAMODB: [
60
+ "aws dynamodb delete-table --table-name {name}",
61
+ ],
62
+ AwsService.LAMBDA: [
63
+ "aws lambda delete-function --function-name {name}",
64
+ ],
65
+ AwsService.SQS: [
66
+ "aws sqs delete-queue --queue-url {name}",
67
+ ],
68
+ AwsService.IAM: [
69
+ "aws iam detach-role-policy --role-name {name} --policy-arn {arn}",
70
+ ],
71
+ }
72
+
73
+
74
+ class ChaosEngine:
75
+ """Silently mutates AWS state mid-episode to test agent resilience."""
76
+
77
+ def __init__(self, backend: AwsBackend) -> None:
78
+ self._backend = backend
79
+ self._enabled = os.environ.get("ENABLE_CHAOS", "true").lower() == "true"
80
+ self._chaos_occurred = False
81
+
82
+ def reset(self) -> None:
83
+ """Reset per-episode chaos state."""
84
+ self._chaos_occurred = False
85
+
86
+ @property
87
+ def chaos_occurred(self) -> bool:
88
+ """Whether chaos was injected at any point during this episode."""
89
+ return self._chaos_occurred
90
+
91
+ def maybe_inject(
92
+ self,
93
+ task: Task,
94
+ tracker: EpisodeTracker,
95
+ probability: float,
96
+ ) -> bool:
97
+ """Roll dice and, if triggered, execute a task-relevant perturbation.
98
+
99
+ Returns True if a perturbation was actually executed.
100
+ """
101
+ if not self._enabled or probability <= 0.0:
102
+ return False
103
+
104
+ if random.random() >= probability:
105
+ return False
106
+
107
+ perturbation = self._select_perturbation(task, tracker)
108
+ if perturbation is None:
109
+ return False
110
+
111
+ logger.info("Chaos injection: %s", perturbation)
112
+ self._backend.execute_command(perturbation)
113
+ self._chaos_occurred = True
114
+ return True
115
+
116
+ # -- Private helpers ------------------------------------------------------
117
+
118
+ def _select_perturbation(
119
+ self,
120
+ task: Task,
121
+ tracker: EpisodeTracker,
122
+ ) -> str | None:
123
+ """Pick a concrete perturbation command scoped to services the task uses."""
124
+ task_services = set(task.success_criteria.services)
125
+ if not task_services:
126
+ return None
127
+
128
+ # Collect all candidate (service, rendered_command) pairs
129
+ candidates: list[str] = []
130
+
131
+ for step in tracker.command_history:
132
+ if not step.success:
133
+ continue
134
+ for service in task_services:
135
+ for pattern in _RESOURCE_PATTERNS.get(service, []):
136
+ match = pattern.search(step.command)
137
+ if not match:
138
+ continue
139
+ templates = _PERTURBATION_TEMPLATES.get(service, [])
140
+ for template in templates:
141
+ rendered = self._render_template(template, match, service)
142
+ if rendered:
143
+ candidates.append(rendered)
144
+
145
+ if not candidates:
146
+ return None
147
+
148
+ return random.choice(candidates)
149
+
150
+ @staticmethod
151
+ def _render_template(
152
+ template: str,
153
+ match: re.Match[str],
154
+ service: AwsService,
155
+ ) -> str | None:
156
+ """Fill a perturbation template from regex match groups."""
157
+ groups = match.groups()
158
+ if not groups:
159
+ return None
160
+
161
+ if service == AwsService.IAM and len(groups) >= 2:
162
+ # IAM patterns capture (role_name, policy_arn) or vice-versa
163
+ # The first pattern has role first, second has arn first
164
+ if "role-name" in template and "policy-arn" in template:
165
+ return template.format(name=groups[0], arn=groups[1])
166
+ return None
167
+
168
+ return template.format(name=groups[0])
server/services/curriculum.py CHANGED
@@ -17,6 +17,7 @@ import logging
17
  import random
18
  from collections import defaultdict
19
  from pathlib import Path
 
20
 
21
  import yaml
22
 
@@ -59,6 +60,7 @@ TIER_CONFIGS: dict[TaskDifficulty, TierConfig] = {
59
  mastery_window=10,
60
  mastery_threshold=0.7,
61
  fast_track_rate=0.9,
 
62
  ),
63
  TaskDifficulty.ADVANCED: TierConfig(
64
  min_episodes=10,
@@ -66,6 +68,7 @@ TIER_CONFIGS: dict[TaskDifficulty, TierConfig] = {
66
  mastery_window=10,
67
  mastery_threshold=0.7,
68
  fast_track_rate=0.9,
 
69
  ),
70
  TaskDifficulty.EXPERT: TierConfig(
71
  min_episodes=0,
@@ -73,6 +76,7 @@ TIER_CONFIGS: dict[TaskDifficulty, TierConfig] = {
73
  mastery_window=10,
74
  mastery_threshold=0.7,
75
  fast_track_rate=1.0,
 
76
  ),
77
  }
78
 
@@ -85,6 +89,11 @@ _TIER_FILES: dict[TaskDifficulty, str] = {
85
  TaskDifficulty.EXPERT: "expert.yaml",
86
  }
87
 
 
 
 
 
 
88
  # ---------------------------------------------------------------------------
89
  # Priority score tuning constants
90
  # ---------------------------------------------------------------------------
@@ -109,8 +118,34 @@ _FAST_TRACK_MIN_EPISODES = 3
109
  # ---------------------------------------------------------------------------
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def load_tier(difficulty: TaskDifficulty, tasks_dir: Path = TASKS_DIR) -> list[Task]:
113
- """Load tasks for a single difficulty tier from its YAML file."""
114
  filename = _TIER_FILES.get(difficulty)
115
  if filename is None:
116
  logger.warning("No file mapping for difficulty: %s", difficulty.value)
@@ -124,24 +159,25 @@ def load_tier(difficulty: TaskDifficulty, tasks_dir: Path = TASKS_DIR) -> list[T
124
  with open(filepath) as f:
125
  entries = yaml.safe_load(f) or []
126
 
127
- tasks = [
128
- Task(
129
- task_id=TaskID(entry["task_id"]),
130
- difficulty=difficulty,
131
- description=entry["description"],
132
- success_criteria=SuccessCriteria(**entry.get("success_criteria", {})),
133
- setup_commands=[
134
- SetupCommand(command=cmd)
135
- if isinstance(cmd, str)
136
- else SetupCommand(**cmd)
137
- for cmd in entry.get("setup_commands", [])
138
- ],
 
 
 
 
139
  )
140
- for entry in entries
141
- ]
142
- logger.info(
143
- "Loaded %d %s tasks from %s", len(tasks), difficulty.value, filepath.name
144
- )
145
  return tasks
146
 
147
 
@@ -237,6 +273,10 @@ class Curriculum:
237
  def is_warmup(self) -> bool:
238
  return self.current_difficulty == TaskDifficulty.WARMUP
239
 
 
 
 
 
240
  # -- Public API -----------------------------------------------------------
241
 
242
  def next_task(self) -> Task:
 
17
  import random
18
  from collections import defaultdict
19
  from pathlib import Path
20
+ from typing import Any
21
 
22
  import yaml
23
 
 
60
  mastery_window=10,
61
  mastery_threshold=0.7,
62
  fast_track_rate=0.9,
63
+ chaos_probability=0.1,
64
  ),
65
  TaskDifficulty.ADVANCED: TierConfig(
66
  min_episodes=10,
 
68
  mastery_window=10,
69
  mastery_threshold=0.7,
70
  fast_track_rate=0.9,
71
+ chaos_probability=0.2,
72
  ),
73
  TaskDifficulty.EXPERT: TierConfig(
74
  min_episodes=0,
 
76
  mastery_window=10,
77
  mastery_threshold=0.7,
78
  fast_track_rate=1.0,
79
+ chaos_probability=0.3,
80
  ),
81
  }
82
 
 
89
  TaskDifficulty.EXPERT: "expert.yaml",
90
  }
91
 
92
+ # Supplementary task files merged into an existing tier
93
+ _SUPPLEMENTARY_FILES: dict[TaskDifficulty, list[str]] = {
94
+ TaskDifficulty.EXPERT: ["drift.yaml"],
95
+ }
96
+
97
  # ---------------------------------------------------------------------------
98
  # Priority score tuning constants
99
  # ---------------------------------------------------------------------------
 
118
  # ---------------------------------------------------------------------------
119
 
120
 
121
+ def _parse_task_entries(
122
+ entries: list[dict[str, Any]], difficulty: TaskDifficulty
123
+ ) -> list[Task]:
124
+ """Convert raw YAML entries into Task models."""
125
+ return [
126
+ Task(
127
+ task_id=TaskID(entry["task_id"]),
128
+ difficulty=difficulty,
129
+ description=entry["description"],
130
+ success_criteria=SuccessCriteria(**entry.get("success_criteria", {})),
131
+ setup_commands=[
132
+ SetupCommand(command=cmd)
133
+ if isinstance(cmd, str)
134
+ else SetupCommand(**cmd)
135
+ for cmd in entry.get("setup_commands", [])
136
+ ],
137
+ desired_state_spec=entry.get("desired_state_spec"),
138
+ possible_drifts=[
139
+ SetupCommand(command=d) if isinstance(d, str) else SetupCommand(**d)
140
+ for d in entry.get("possible_drifts", [])
141
+ ],
142
+ )
143
+ for entry in entries
144
+ ]
145
+
146
+
147
  def load_tier(difficulty: TaskDifficulty, tasks_dir: Path = TASKS_DIR) -> list[Task]:
148
+ """Load tasks for a single difficulty tier from its YAML file(s)."""
149
  filename = _TIER_FILES.get(difficulty)
150
  if filename is None:
151
  logger.warning("No file mapping for difficulty: %s", difficulty.value)
 
159
  with open(filepath) as f:
160
  entries = yaml.safe_load(f) or []
161
 
162
+ tasks = _parse_task_entries(entries, difficulty)
163
+
164
+ # Load supplementary task files for this tier
165
+ for extra_file in _SUPPLEMENTARY_FILES.get(difficulty, []):
166
+ extra_path = tasks_dir / extra_file
167
+ if not extra_path.exists():
168
+ continue
169
+ with open(extra_path) as f:
170
+ extra_entries = yaml.safe_load(f) or []
171
+ extra_tasks = _parse_task_entries(extra_entries, difficulty)
172
+ tasks.extend(extra_tasks)
173
+ logger.info(
174
+ "Loaded %d supplementary %s tasks from %s",
175
+ len(extra_tasks),
176
+ difficulty.value,
177
+ extra_file,
178
  )
179
+
180
+ logger.info("Loaded %d %s tasks total", len(tasks), difficulty.value)
 
 
 
181
  return tasks
182
 
183
 
 
273
  def is_warmup(self) -> bool:
274
  return self.current_difficulty == TaskDifficulty.WARMUP
275
 
276
+ @property
277
+ def chaos_probability(self) -> float:
278
+ return self.tier_config.chaos_probability
279
+
280
  # -- Public API -----------------------------------------------------------
281
 
282
  def next_task(self) -> Task:
server/services/drift_engine.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration Drift Engine.
3
+
4
+ Randomly applies a subset of a task's possible mutations after the correct
5
+ state has been provisioned. This forces the agent to audit and discover
6
+ which resources drifted rather than memorising a fixed solution path.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import random
13
+
14
+ from models import Task
15
+ from server.services.aws_backend import AwsBackend
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Default range for how many drifts to apply (inclusive).
20
+ _MIN_DRIFTS = 2
21
+ _MAX_DRIFTS = 3
22
+
23
+
24
+ class DriftEngine:
25
+ """Selects and applies random configuration drifts for a task."""
26
+
27
+ def __init__(self, backend: AwsBackend) -> None:
28
+ self._backend = backend
29
+
30
+ def apply_drift(self, task: Task) -> list[str]:
31
+ """Randomly select and execute K of N possible drifts.
32
+
33
+ Args:
34
+ task: A task whose ``possible_drifts`` list defines the
35
+ candidate mutations.
36
+
37
+ Returns:
38
+ Human-readable descriptions of the drifts that were applied
39
+ (empty list if none).
40
+ """
41
+ if not task.possible_drifts:
42
+ return []
43
+
44
+ pool = task.possible_drifts
45
+ k = self._pick_count(len(pool))
46
+ selected = random.sample(pool, k)
47
+
48
+ applied: list[str] = []
49
+ for drift in selected:
50
+ success, _stdout, stderr = self._backend.execute_command(drift.command)
51
+ label = drift.description or drift.command
52
+ if success:
53
+ logger.info("Drift applied: %s", label)
54
+ applied.append(label)
55
+ else:
56
+ logger.warning("Drift command failed: %s — %s", drift.command, stderr)
57
+
58
+ return applied
59
+
60
+ @staticmethod
61
+ def _pick_count(pool_size: int) -> int:
62
+ """Determine how many drifts to apply given the pool size."""
63
+ if pool_size <= 1:
64
+ return pool_size
65
+ lo = min(_MIN_DRIFTS, pool_size)
66
+ hi = min(_MAX_DRIFTS, pool_size)
67
+ return random.randint(lo, hi)
server/services/environment_designer.py CHANGED
@@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
14
 
15
  from models import SetupCommand, Task
16
  from server.services.aws_backend import AwsBackend
 
17
 
18
  logger = logging.getLogger(__name__)
19
 
@@ -47,6 +48,7 @@ class EnvironmentDesigner:
47
 
48
  def __init__(self, backend: AwsBackend) -> None:
49
  self._backend = backend
 
50
 
51
  def apply(self, task: Task) -> ProvisionResult:
52
  """Apply the task's environment setup to MiniStack.
@@ -61,7 +63,14 @@ class EnvironmentDesigner:
61
  if not task.setup_commands:
62
  return ProvisionResult(resources_created=0)
63
 
64
- return self._apply_cli_commands(task.setup_commands)
 
 
 
 
 
 
 
65
 
66
  # -- Provisioning strategies ----------------------------------------------
67
 
 
14
 
15
  from models import SetupCommand, Task
16
  from server.services.aws_backend import AwsBackend
17
+ from server.services.drift_engine import DriftEngine
18
 
19
  logger = logging.getLogger(__name__)
20
 
 
48
 
49
  def __init__(self, backend: AwsBackend) -> None:
50
  self._backend = backend
51
+ self._drift_engine = DriftEngine(backend)
52
 
53
  def apply(self, task: Task) -> ProvisionResult:
54
  """Apply the task's environment setup to MiniStack.
 
63
  if not task.setup_commands:
64
  return ProvisionResult(resources_created=0)
65
 
66
+ result = self._apply_cli_commands(task.setup_commands)
67
+
68
+ # Apply random configuration drifts after provisioning correct state
69
+ if task.possible_drifts:
70
+ applied = self._drift_engine.apply_drift(task)
71
+ logger.info("Applied %d configuration drifts", len(applied))
72
+
73
+ return result
74
 
75
  # -- Provisioning strategies ----------------------------------------------
76
 
server/services/episode_tracker.py CHANGED
@@ -63,6 +63,44 @@ def _command_mentions_resource(command: str, resource: str) -> bool:
63
  return False
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  class EpisodeTracker:
67
  """Tracks command history within a single episode for grading."""
68
 
@@ -72,12 +110,14 @@ class EpisodeTracker:
72
  self._previous_progress: float = 0.0
73
  # Track which (operation, resource) pairs have been credited
74
  self._credited_operations: set[tuple[str, str | None]] = set()
 
75
 
76
  def reset(self) -> None:
77
  self._history.clear()
78
  self._step_counter = 0
79
  self._previous_progress = 0.0
80
  self._credited_operations.clear()
 
81
 
82
  def record_step(
83
  self, command: str, success: bool, stdout: str, stderr: str
@@ -136,6 +176,15 @@ class EpisodeTracker:
136
  def step_count(self) -> int:
137
  return self._step_counter
138
 
 
 
 
 
 
 
 
 
 
139
  @property
140
  def previous_progress(self) -> float:
141
  return self._previous_progress
@@ -143,3 +192,50 @@ class EpisodeTracker:
143
  @previous_progress.setter
144
  def previous_progress(self, value: float) -> None:
145
  self._previous_progress = value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  return False
64
 
65
 
66
+ # Maps create operations to their corresponding delete operations.
67
+ _CREATE_DELETE_PAIRS: dict[str, str] = {
68
+ "create-bucket": "delete-bucket",
69
+ "create-table": "delete-table",
70
+ "create-function": "delete-function",
71
+ "create-queue": "delete-queue",
72
+ "create-topic": "delete-topic",
73
+ "create-role": "delete-role",
74
+ "create-rest-api": "delete-rest-api",
75
+ "create-secret": "delete-secret",
76
+ "put-bucket-policy": "delete-bucket-policy",
77
+ "attach-role-policy": "detach-role-policy",
78
+ }
79
+
80
+ _ALREADY_EXISTS_PATTERNS: list[str] = [
81
+ "already exists",
82
+ "BucketAlreadyExists",
83
+ "BucketAlreadyOwnedByYou",
84
+ "ResourceInUseException",
85
+ "ResourceConflictException",
86
+ "EntityAlreadyExists",
87
+ "QueueNameExists",
88
+ "TopicAlreadyExists",
89
+ ]
90
+
91
+
92
+ def _extract_resource_name(command: str) -> str | None:
93
+ """Extract the primary resource name from an AWS CLI command."""
94
+ parts = command.strip().split()
95
+ for i, part in enumerate(parts):
96
+ if part in _RESOURCE_FLAGS and i + 1 < len(parts):
97
+ return parts[i + 1]
98
+ for flag in _RESOURCE_FLAGS:
99
+ if part.startswith(f"{flag}="):
100
+ return part.split("=", 1)[1]
101
+ return None
102
+
103
+
104
  class EpisodeTracker:
105
  """Tracks command history within a single episode for grading."""
106
 
 
110
  self._previous_progress: float = 0.0
111
  # Track which (operation, resource) pairs have been credited
112
  self._credited_operations: set[tuple[str, str | None]] = set()
113
+ self._hints_used: int = 0
114
 
115
  def reset(self) -> None:
116
  self._history.clear()
117
  self._step_counter = 0
118
  self._previous_progress = 0.0
119
  self._credited_operations.clear()
120
+ self._hints_used = 0
121
 
122
  def record_step(
123
  self, command: str, success: bool, stdout: str, stderr: str
 
176
  def step_count(self) -> int:
177
  return self._step_counter
178
 
179
+ def record_hint(self) -> int:
180
+ """Record that a hint was used. Returns the new hint level (1-indexed)."""
181
+ self._hints_used += 1
182
+ return self._hints_used
183
+
184
+ @property
185
+ def hints_used(self) -> int:
186
+ return self._hints_used
187
+
188
  @property
189
  def previous_progress(self) -> float:
190
  return self._previous_progress
 
192
  @previous_progress.setter
193
  def previous_progress(self, value: float) -> None:
194
  self._previous_progress = value
195
+
196
+ def detect_rollbacks(self) -> int:
197
+ """Count create→delete pairs on the same resource (wasteful rollbacks)."""
198
+ # Build a set of (operation, resource) for successful create commands
199
+ creates: list[tuple[str, str]] = []
200
+ for record in self._history:
201
+ if not record.success:
202
+ continue
203
+ _, op = _parse_aws_command(record.command)
204
+ if op is None or op not in _CREATE_DELETE_PAIRS:
205
+ continue
206
+ resource = _extract_resource_name(record.command)
207
+ if resource is not None:
208
+ creates.append((op, resource))
209
+
210
+ rollback_count = 0
211
+ for create_op, resource in creates:
212
+ delete_op = _CREATE_DELETE_PAIRS[create_op]
213
+ for record in self._history:
214
+ if not record.success:
215
+ continue
216
+ _, op = _parse_aws_command(record.command)
217
+ if op == delete_op and _command_mentions_resource(
218
+ record.command, resource
219
+ ):
220
+ rollback_count += 1
221
+ break
222
+
223
+ return rollback_count
224
+
225
+ def detect_idempotent_retries(self) -> int:
226
+ """Count create failures with 'already exists' followed by a successful next step."""
227
+ count = 0
228
+ for i, record in enumerate(self._history):
229
+ if record.success:
230
+ continue
231
+ _, op = _parse_aws_command(record.command)
232
+ if op is None or not op.startswith("create"):
233
+ continue
234
+ # Check stderr for "already exists" patterns
235
+ if not any(pat in record.stderr for pat in _ALREADY_EXISTS_PATTERNS):
236
+ continue
237
+ # Next step must exist and be successful
238
+ if i + 1 < len(self._history) and self._history[i + 1].success:
239
+ count += 1
240
+
241
+ return count