diff --git a/Dockerfile b/Dockerfile index fcf24212b7dffe9ed0ee2789c602dc5dbe5c13e6..3cfb6debf8a839d2a96f1fddc18701342b80461b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,16 +42,16 @@ RUN if ! command -v uv >/dev/null 2>&1; then \ # If uv.lock exists, use it; otherwise resolve on the fly RUN --mount=type=cache,target=/root/.cache/uv \ if [ -f uv.lock ]; then \ - uv sync --frozen --no-install-project --no-editable; \ + uv sync --frozen --extra dev --no-install-project --no-editable; \ else \ - uv sync --no-install-project --no-editable; \ + uv sync --extra dev --no-install-project --no-editable; \ fi RUN --mount=type=cache,target=/root/.cache/uv \ if [ -f uv.lock ]; then \ - uv sync --frozen --no-editable; \ + uv sync --frozen --extra dev --no-editable; \ else \ - uv sync --no-editable; \ + uv sync --extra dev --no-editable; \ fi # Final runtime stage @@ -90,7 +90,10 @@ ENV PYTHONPATH="/app/env:$PYTHONPATH" # DEV_MODE=1 enables live reload via --reload flag -ENV DEV_MODE=0 +ENV DEV_MODE=1 + +ENV API_BASE_URL=https://router.huggingface.co/v1 +ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct # Entrypoint: start aws_infra in background, then run the FastAPI server CMD ["sh", "-c", "aws_infra -d & sleep 2 && uvicorn server.app:app --host 0.0.0.0 --port 8000 $([ \"$DEV_MODE\" = '1' ] && echo '--reload --reload-dir /app/env')"] \ No newline at end of file diff --git a/Makefile b/Makefile index 9c078d3f8c53f31d98c0036c82857129671b8a11..12528fe4ae00c71da2885c682a3c7742597b312e 100644 --- a/Makefile +++ b/Makefile @@ -21,9 +21,6 @@ install: ## Install project dependencies install-dev: ## Install project with dev dependencies $(UV) sync --frozen --extra dev -.PHONY: install-train -install-train: ## Install project with training dependencies (trl, torch, peft, etc.) - $(UV) sync --frozen --extra training .PHONY: install-all install-all: ## Install project with all dependencies (dev + training) @@ -39,7 +36,7 @@ lock: ## Update the lockfile .PHONY: run run: ## Run with MiniStack + FastAPI server (mirrors Docker CMD) - ministack & sleep 2 && $(UV) run uvicorn server.app:app --host $(SERVER_HOST) --port $(SERVER_PORT) + aws_infra -d & sleep 2 && $(UV) run uvicorn server.app:app --host $(SERVER_HOST) --port $(SERVER_PORT) --reload # ────────────────────────────────────────────── # Code Quality @@ -82,7 +79,7 @@ docker-run-dev: ## Run Docker container in dev mode with live reload .PHONY: docker-run-detach docker-run-detach: ## Run Docker container in background - docker run -d --rm -p $(SERVER_PORT):8000 --name $(DOCKER_IMAGE) $(DOCKER_IMAGE):$(DOCKER_TAG) + docker run -d --rm --name $(DOCKER_IMAGE) -p $(SERVER_PORT):8000 -v $(PWD):/app/env -v /app/env/.venv -e DEV_MODE=1 $(DOCKER_IMAGE):$(DOCKER_TAG) .PHONY: docker-stop docker-stop: ## Stop the running Docker container @@ -100,6 +97,10 @@ docker-shell: ## Open a shell in the running Docker container docker-clean: ## Stop and remove all running containers for this image @docker ps -q --filter ancestor=$(DOCKER_IMAGE):$(DOCKER_TAG) | xargs -r docker rm -f +.PHONY: docker-test +docker-test: ## Run tests inside the running Docker container + docker exec $(DOCKER_IMAGE) python -m pytest env/tests -v + .PHONY: docker-health docker-health: ## Check health of the running container @curl -sf http://localhost:$(SERVER_PORT)/health && echo " OK" || echo " FAIL" diff --git a/README.md b/README.md index 324f0b4b83f6cd96536f76ff3427cedc857f7b7d..68dc278b0539d18625063fbf2c0ce75c089b0c9e 100644 --- a/README.md +++ b/README.md @@ -11,19 +11,242 @@ tags: - openenv --- -# AWS RL Environment +# AWS Cloud CLI and SRE Reinforcement Learning Environment -A **Gymnasium-style RL environment** for training LLM agents on real-world AWS cloud operations. The agent sends AWS CLI commands as actions, receives structured observations, and progresses through a **curriculum of 21 tasks** across 5 difficulty tiers — from basic listing to SRE incident response. +A **OpenEnv** RL environment** for training AI agents on real-world AWS cloud operations. The agent sends AWS CLI commands as actions, receives structured observations, and progresses through a **curriculum of 120+ tasks** across 5 difficulty tiers — from basic listing to SRE incident response and security posture auditing. -The environment runs a **vendored MiniStack emulator** (34 AWS services, in-memory, zero-cost) inside the same Docker container, so no AWS account is needed. +The agents interact with a **real-world AWS Shell simulator** — a vendored MiniStack emulator (34 AWS services, in-memory, zero-cost) inside the same Docker container. The response of every executed command is the same as production AWS. The grading system evaluates rewards and penalties based on the **actual AWS infrastructure state** instead of static metrics. No AWS account needed. -## Key Innovations +> **[Try the Playground](https://sizzing-aws-rl-env.hf.space/web)** | **[API Docs](https://sizzing-aws-rl-env.hf.space/docs)** | **[Hugging Face Space](https://huggingface.co/spaces/Sizzing/aws_rl_env)** -- **Priority-queue curriculum** — Tasks are selected by weakness, novelty, and spaced-repetition schedules instead of random or round-robin sampling -- **Spaced repetition** — Graduated tasks resurface at exponentially increasing intervals (3 -> 6 -> 12 -> ... -> 48 episodes) to prevent catastrophic forgetting -- **Anti-reward-hacking** — Grading verifies ground-truth state in MiniStack, not agent output; partial credit is capped at 0.99; monotonic progress prevents manipulation -- **SRE incident tasks** — Expert-tier tasks provision broken infrastructure, then require the agent to diagnose and fix it -- **Shaped rewards** — Dense reward signals (progress bonuses, failure penalties) in [0.0, 1.0] guide exploration without enabling gaming + +## Task Tiers (100+ Tasks) + +### Warmup — 20 tasks +> List resources — single read-only commands + +- Run one AWS CLI command to list or describe a resource type +- S3 buckets, EC2 instances, DynamoDB tables, Lambda functions, RDS, EBS volumes +- Graded by **command_match** — checks operation + service pair +- No setup required, no state mutations + +### Beginner — 20 tasks +> Create single resources with verification + +- Create an S3 bucket, DynamoDB table, SQS queue, or Lambda function +- Graded by **resource_creation** — verifies the exact resource exists in the AWS Infrastructure Simulator +- Introduces resource name validation — "my-bucket-2" won't satisfy a check for "my-bucket" +- First tier where idempotency bonus (+0.02) can be earned + +### Intermediate — 20 tasks +> Multi-step workflows — create, configure, connect + +- Ordered sequences: create a bucket then enable versioning, create a table then add an item +- Graded by **multi_step** — validates each step was completed in order +- Chaos injection begins at **10% probability** — resources may be silently mutated mid-episode +- Rollback penalty (-0.1) starts to matter with multi-step create/delete patterns + +### Advanced — 20 tasks +> Cross-service architectures spanning multiple AWS services + +- Wire Lambda to SQS, configure API Gateway with integrations, build event-driven pipelines +- Graded by **multi_step + services** — all required services must be configured +- Chaos injection escalates to **20% probability** — DynamoDB throughput, Lambda configs may change +- Hints cost more: 3 hints = only 61% of max reward (0.85³ decay) + +### Expert — 20 tasks +> SRE incidents, drift detection & security posture audits + +- Fix overly permissive S3 policies, replace broad IAM inline policies, repair broken infrastructure +- Graded by **state_checks** — actual CLI commands run against MiniStack at grading time +- Chaos injection at **30% probability** — maximum perturbation frequency +- **6 drift detection tasks** — correct infra is provisioned, then 2-3 random mutations applied from a pool +- Agent must audit environment, discover which resources drifted, and fix only those +- Drift is randomized per episode — prevents memorization of fix sequences + +--- + +## Features + +### 1. Curriculum & Training + +Adaptive learning system that tracks mastery and selects optimal tasks. + +#### Progressive Difficulty +- **What:** The environment organizes 120+ tasks across 5 tiers: Warmup, Beginner, Intermediate, Advanced, and Expert. Tasks progress from simple listing operations to complex SRE incident response and drift detection scenarios. +- **Why:** Prevents the agent from being overwhelmed by complex tasks early on. Scaffolded difficulty ensures the agent builds foundational skills before tackling multi-service architectures. +- **How:** The `CurriculumManager` maintains per-agent tier state. Promotion requires meeting a minimum episode count and success rate threshold. A fast-track mechanism allows agents scoring 90%+ on 3 consecutive episodes to skip the minimum wait. +- **Metrics:** 5 Difficulty Tiers | 120+ Total Tasks | 90% Fast-track Threshold + +#### Mastery Tracking +- **What:** Each task independently tracks the agent's performance using a weighted success rate over a sliding window. Tasks "graduate" when performance exceeds the mastery threshold consistently. +- **Why:** Ensures the agent truly masters a skill before moving on. Prevents lucky single completions from being treated as mastery. Un-graduation catches skill decay. +- **How:** A `mastery_window` of 10 episodes and `mastery_threshold` of 0.7 (70% success). Minimum 3 attempts required before graduation. Recent results are weighted more heavily using exponential decay (factor 0.85). Graduated tasks can un-graduate if performance drops. +- **Metrics:** 70% Mastery Threshold | 10 Window Size | 0.85 Decay Factor + +#### Spaced Repetition +- **What:** Graduated tasks don't disappear — they resurface at exponentially increasing intervals (3, 6, 12, 24, 48 episodes) for re-testing, earning a +30 priority bonus when due. +- **Why:** Prevents catastrophic forgetting. The agent must retain skills even as it learns new ones. Exponential spacing is the most efficient retention schedule, borrowed from cognitive science. +- **How:** Each task tracks a `spaced_rep_interval` starting at 3 episodes. When re-tested and passes, the interval doubles (up to 48). If it fails, the interval resets. `_is_spaced_rep_due()` checks elapsed episodes against the interval. +- **Metrics:** +30 Spaced Rep Bonus | 3→48 Interval Range | 2x Interval Growth + +#### Priority Selection +- **What:** Tasks are ranked by a composite score combining novelty, weakness, spaced repetition due dates, and recency. The highest-scoring task is selected for each episode. +- **Why:** Optimizes the training curriculum by ensuring the agent explores new tasks, practices weak areas, revisits graduated skills, and maintains variety — all balanced automatically. +- **How:** `score = novelty_bonus (+100 if never attempted) + weakness_weight (+50 × (1 - success_rate)) + spaced_rep_bonus (+30 if due) - recency_penalty (-20 if attempted in last 2 episodes)`. Uses exponential decay (0.85) to emphasize recent performance. +- **Metrics:** +100 Novelty Bonus | +50 Max Weakness Weight | -20 Recency Penalty + +#### Tier Progression +- **What:** Agents advance through tiers via standard promotion (minimum episodes + success rate) or fast-track (3 consecutive high-scoring episodes). Tiers gate access to increasingly complex task pools. +- **Why:** Provides structure to the learning process. Standard promotion ensures sufficient exposure; fast-track rewards agents that demonstrate immediate competence. +- **How:** Standard: complete `min_episodes` at current tier with `success_rate >= advance_rate`. Fast-track: 3 consecutive episodes at >= 90% success bypasses the minimum episode requirement. Un-promotion is not supported — agents cannot drop tiers. +- **Metrics:** 3 Fast-track Streak | 90% Fast-track Rate | 5 Total Tiers + +### 2. Reward Shaping + +Dense reward signals that encourage operational discipline and real progress. + +``` +if task_achieved: reward = 1.0 +else: + reward = partial_progress * 0.8 # base: scaled to [0.0, 0.8] + if progress_increased: reward += 0.1 # dense signal for advancing + if command_failed: reward *= 0.5 # penalty for errors + reward = clamp(reward, 0.0, 0.99) # never 1.0 without completion + reward *= 0.85 ** hints_used # hint decay + if survived_chaos: reward *= 1.05 # chaos survival bonus +``` + +#### Rollback Penalty & Idempotency Bonus +- **What:** Detects create→delete pairs on the same resource (rollbacks) and penalizes them (-0.1 each). Rewards graceful "already exists" handling (+0.02) where the agent retries idempotently. +- **Why:** First RL environment rewarding operational discipline. In production, create-then-delete cycles are wasteful. Handling "already exists" gracefully is a sign of robust automation. +- **How:** `EpisodeTracker.detect_rollbacks()` scans command history for paired create/delete operations on the same resource. Idempotency detection looks for commands that fail with "already exists" patterns (BucketAlreadyExists, ResourceInUseException, etc.) followed by successful continuation. +- **Metrics:** -0.1 Rollback Penalty | +0.02 Idempotency Bonus | Per-pair Detection + +#### Shaped Reward System +- **What:** Rewards are carefully shaped: 1.0 for full completion, 0.0-0.8 for partial progress, +0.1 progress bonus for advancing, ×0.5 for failures, capped at 0.99 without completion. Chaos bonus (×1.05) and hint decay (×0.85^n) layer on top. +- **Why:** Dense reward signal prevents sparse-reward stagnation. The agent gets meaningful feedback on every step, not just at episode end. Capping at 0.99 ensures only real completion earns full credit. +- **How:** `TaskGrader` dispatches to 5 strategies by tier: `command_match` (warmup), `resource_creation` (beginner), `multi_step` (intermediate), `multi_step+services` (advanced), and `state_checks` (expert). Each returns `partial_progress` which is converted to reward with bonuses/penalties applied. +- **Metrics:** 1.0 Max Reward | 0.99 Progress Cap | ×1.05 Chaos Bonus + +#### Multi-Strategy Grading +- **What:** Five distinct grading strategies, one per tier: `command_match` checks operation+service pairs, `resource_creation` verifies resources exist, `multi_step` validates ordered sequences, advanced adds service coverage, and expert runs `state_checks` against MiniStack. +- **Why:** Each tier tests fundamentally different skills. A single grading strategy would either be too lenient for beginners or miss the nuance needed for expert SRE tasks. +- **How:** `TaskGrader.grade()` dispatches based on the task's `grading_strategy` field. Each strategy returns a `GradeResult` with `partial_progress` (0.0-1.0), `completed` flag, and details. Grading is deterministic and fully automated. +- **Metrics:** 5 Grading Strategies | 100% Automated | Per-tier Selection + +### 3. Resilience & Adaptability + +Features that test agent robustness under unpredictable conditions. + +#### Progressive Hint System +- **What:** A 3-level hint system where each level reveals progressively more detail: Level 1 names the AWS services, Level 2 describes the operations, Level 3 gives near-complete command structure. Each hint reduces the final reward by ×0.85. +- **Why:** Creates an information-reward tradeoff unique in RL. The agent learns to wean off hints over time — initially relying on them for unfamiliar tasks, then solving independently for maximum reward. From GRPO perspective, it creates a natural exploration/exploitation axis within a single episode. +- **How:** Agent issues special command `aws help --task-hint` as its action (intercepted before reaching MiniStack). Hints auto-generated from `SuccessCriteria` fields (services, steps, operations). Reward decay: `final_reward *= 0.85 ^ hints_used` — 0 hints: 1.0×, 1 hint: 0.85×, 2 hints: 0.72×, 3 hints: 0.61×. Curriculum naturally penalizes hint-dependent agents: lower rewards → slower graduation. +- **Metrics:** 3 Hint Levels | ×0.85 Decay Per Hint | ~61% Reward with 3 Hints + +#### Chaos Injection Engine +- **What:** Silently mutates AWS resource state mid-episode to test agent resilience. Perturbations are scoped to services the current task uses. If the agent completes despite chaos, it earns a ×1.05 bonus. +- **Why:** Tests whether the agent can handle unexpected state changes — a critical SRE skill. Prevents brittle memorization of exact command sequences. Probability scales with tier difficulty. +- **How:** `ChaosEngine` selects perturbation templates specific to the services in use (S3 policy changes, DynamoDB throughput modifications, Lambda config alterations, etc.). Resource names are extracted from successful commands via regex. Chaos probability: 10% (Intermediate), 20% (Advanced), 30% (Expert). +- **Metrics:** ×1.05 Chaos Survival Bonus | 10-30% Probability by Tier | 5 Service Templates + +#### Drift Detection Tasks +- **What:** 6 expert-tier tasks where infrastructure is provisioned correctly, then 2-3 random mutations are applied from a pool. The agent must audit, discover drifted resources, and fix only those — without knowing which drifted. +- **Why:** Randomized per episode, preventing memorization. Tests real SRE audit skills: the agent must reason about desired vs. actual state, not just follow a script. +- **How:** `DriftEngine` randomly selects 2-3 mutations from a task's `possible_drifts` pool and applies them after setup. Each task defines a `desired_state_spec` (natural language) and `state_checks` (ground truth CLI commands). Examples: S3 versioning/encryption drift, DynamoDB throughput changes, SNS subscription modifications. +- **Metrics:** 6 Drift Tasks | 2-3 Mutations Per Episode | Random Selection Per Run + +### 4. Security Posture Audit + +Tests *reasoning about configuration state* — the agent must READ and ANALYZE existing infrastructure, not just build things. Unlike SRE tasks (broken functionality), these have *working but insecure* infrastructure. + +#### Public S3 Bucket Lockdown +- **What:** A pre-provisioned S3 bucket "public-assets" has an overly permissive bucket policy granting access to any principal (`Principal: *`). The agent must read the policy, identify the vulnerability, and replace it with a restrictive policy allowing only a specific IAM role. +- **Why:** Tests security reasoning — the infrastructure is functional but insecure. Unlike SRE tasks where things are broken, here the agent must understand what "correct" security posture looks like and make the right judgment call. +- **How:** Setup creates the bucket with a wide-open policy. State checks verify the new policy denies `Principal: *` and only allows the `app-role` principal to perform `s3:GetObject`. +- **Metrics:** S3 Target Service | Policy Attack Surface | Expert Tier + +#### IAM Least Privilege +- **What:** An IAM role "app-role" has an inline policy with `Action: *` and `Resource: *` — full admin access. The agent must replace it with a least-privilege policy allowing only `dynamodb:GetItem` and `dynamodb:PutItem` on the users table. +- **Why:** IAM misconfiguration is the #1 cloud security risk. This task tests whether the agent understands permission scoping and can reason about what access an application actually needs vs. what it currently has. +- **How:** Setup creates the role with a wildcard policy. The agent must craft a replacement policy document with specific actions and resource ARN. State checks verify the policy document matches the expected least-privilege permissions. +- **Metrics:** IAM Target Service | 2 Allowed Actions | Expert Tier + +#### Secrets in Lambda Environment +- **What:** A Lambda function "data-processor" has a database password stored as a plaintext environment variable (`DB_PASSWORD=hunter2`). The agent must create a secret in Secrets Manager, update the Lambda to reference the secret ARN, and remove the plaintext variable. +- **Why:** Plaintext secrets in environment variables is a critical security anti-pattern. This task combines multiple services (Lambda + Secrets Manager) and tests the agent's ability to perform a safe credential rotation without breaking the function. +- **How:** Setup creates the Lambda with the plaintext env var. The agent must: (1) create a secret in Secrets Manager, (2) add `SECRET_ARN` env var to Lambda, (3) remove `DB_PASSWORD`. State checks verify all three conditions. +- **Metrics:** 2 Services Involved | 3 Required Steps | Expert Tier + +### 5. Anti-Reward-Hacking (8 Defense Layers) + +8 defense layers that prevent the agent from gaming the reward system. + +#### 1. Ground-Truth Verification via MiniStack +- **What:** The grader never trusts agent command output. It independently queries MiniStack (the simulated AWS backend) to verify resource state for 20+ services. Even if the agent crafts fake-looking stdout, the grader checks actual state. +- **Why:** Prevents reward hacking through output fabrication. The agent cannot game the system by producing convincing but fake CLI output — ground truth is always checked server-side. +- **How:** `ResourceVerifier` has per-service verification methods that query MiniStack directly. For expert tasks, `StateCheck` assertions run actual AWS CLI commands against MiniStack at grading time, checking either `output_contains` (substring) or `json_path` extraction with expected values. +- **Metrics:** 20+ Verified Services | 100% Server-side | 0 Agent Visibility + +#### 2. Deduplication +- **What:** `EpisodeTracker.has_executed_operation()` tracks which (operation, resource) pairs have been credited. Running the same successful command twice does NOT increase `partial_progress`. Progress can only increase, never re-earn. +- **Why:** Prevents the agent from gaming the reward system by repeating the same command to accumulate credit. Each unique operation earns credit exactly once. +- **How:** `credit_operation()` records each (operation, resource) pair. Before granting credit, `is_operation_already_credited()` checks if this exact pair was already rewarded. The check is deterministic and happens at grading time. +- **Metrics:** 1x Credit Per Operation | Exact Match Type | (op, res) Tracking Granularity + +#### 3. Grader Invisibility +- **What:** The verification commands run by `ResourceVerifier` are NOT returned in the observation's `command_output`. They happen server-side during grading. The agent cannot observe or mimic them. +- **Why:** If the agent could see which verification commands the grader runs, it could learn to craft fake outputs that match expected patterns. Keeping grader logic invisible forces the agent to actually perform the task. +- **How:** `ResourceVerifier` executes AWS CLI commands against MiniStack in a separate execution context. Results are consumed internally by the grading pipeline. The observation returned to the agent only contains output from the agent's own commands. +- **Metrics:** 0 Grader Cmds Exposed | Server Execution Context | 20+ Hidden Verifications + +#### 4. Command Allowlisting +- **What:** Only commands starting with `aws` are executed. Any attempt to run shell commands, pipe to other tools, use redirects, or escape the sandbox is rejected with `success=False`. +- **Why:** Prevents the agent from escaping the AWS CLI sandbox. Without this, the agent could potentially execute arbitrary shell commands, access the filesystem, or interfere with the environment. +- **How:** The environment's `step()` method validates the command before execution. Commands not starting with `aws` are immediately rejected. +- **Metrics:** `aws *` Allowed Pattern | 0 Shell Access | Instant Rejection + +#### 5. No Verification Reward +- **What:** If the agent runs a command that matches a `state_check` command exactly (e.g., `aws s3api get-bucket-versioning --bucket app-config-store`), it gets no progress credit. Progress is only earned through `steps` operations (mutating commands), not read-only queries. +- **Why:** Prevents the agent from gaming progress by running the same verification commands the grader uses. The agent can run read commands to understand state, but only mutation commands earn progress. +- **How:** During grading, the `TaskGrader` checks if the agent's command matches any `state_check` command. Matching commands are flagged as verification-only and excluded from credit. Only commands matching `steps` operations (create, put, update, delete) earn `partial_progress`. +- **Metrics:** 0 Credit for Reads | Mutate Rewarded Actions | Exact Match Detection + +#### 6. Monotonic Progress +- **What:** `partial_progress` can only increase within an episode. It is clamped to [0.0, 0.99] — reaching 1.0 requires actual task completion. The agent cannot lose progress, but also cannot re-earn it. +- **Why:** Prevents cycling strategies where the agent creates and destroys resources repeatedly. Combined with deduplication, this ensures steady forward progress. +- **How:** In `TaskGrader`, `previous_progress` tracks the highest progress seen. New progress is always `max(previous, current)`. Reward is clamped at 0.99 for partial completion, reserving 1.0 exclusively for verified full completion. +- **Metrics:** 0.99 Max Without Completion | 1.0 Requires Full Completion | max() Progress Function + +#### 7. Resource Name Validation +- **What:** For `resource_exists` checks, the verifier matches the exact resource name, not just any resource of that type. Creating "my-test-bucket-2" doesn't satisfy a check for "my-test-bucket". +- **Why:** Prevents the agent from creating arbitrarily named resources to game the verification system. Forces precise execution of the task requirements. +- **How:** `ResourceVerifier`'s per-service methods (`verify_s3_bucket`, `verify_dynamodb_table`, etc.) compare against the exact expected resource name from the task definition. Each of the 20+ supported services has its own verification logic. +- **Metrics:** Exact Name Matching | 20+ Verified Services | 0 Partial Matches + +#### 8. State Checks Verify Final State +- **What:** For expert SRE tasks, `state_checks` run actual AWS CLI commands against MiniStack at grading time. The grader verifies the final infrastructure state — not the commands the agent ran. +- **Why:** The agent cannot fake the state. MiniStack is the ground truth. This decouples "what the agent did" from "what was actually achieved", making reward hacking extremely difficult. +- **How:** Each expert task defines `state_checks` with command + assertion pairs. Assertions support `output_contains` (substring match on CLI output) and `json_path + expected` (JSON extraction). The grader runs these checks against the live MiniStack state independently of the agent. +- **Metrics:** CLI Verification Method | 2 Assertion Types | Live State Source + +--- + +## Supported AWS Services (34) + +| Category | Services | +|----------|----------| +| **Storage & DB** | S3, DynamoDB, RDS, ElastiCache, EFS | +| **Compute** | Lambda, ECS, EC2, Step Functions | +| **Messaging** | SQS, SNS, Kinesis, EventBridge, Firehose | +| **API** | API Gateway v1/v2, ALB/ELBv2 | +| **Security** | IAM, STS, Cognito, ACM, WAF v2, Secrets Manager | +| **Monitoring** | CloudWatch, CloudWatch Logs, SSM | +| **Infrastructure** | CloudFormation, Route53 | +| **Other** | SES, Athena, Glue, EMR | + +--- ## Quick Start @@ -46,76 +269,83 @@ result = env.reset() result = env.step(AwsRlAction(command="aws s3 ls")) ``` +WebSocket API: + +```python +import websockets, json + +async with websockets.connect("wss://sizzing-aws-rl-env.hf.space/ws") as ws: + await ws.send(json.dumps({"type": "reset"})) + obs = json.loads(await ws.recv()) + + await ws.send(json.dumps({"type": "step", "data": {"command": "aws s3 ls"}})) + obs = json.loads(await ws.recv()) +``` + --- ## Architecture ``` -┌──────────────────────────────────────────────────────────┐ -│ Docker Container │ -│ │ +┌─────────────────────────────────────────────────────────┐ +│ Docker Container │ +│ │ │ ┌─────────────────────┐ ┌────────────────────┐ │ -│ │ FastAPI RL Server │ │ MiniStack │ │ -│ │ (port 8000) │─────>│ (port 4566) │ │ -│ │ │ │ 34 AWS services │ │ -│ │ - Environment │ │ In-memory state │ │ -│ │ - Curriculum │ │ Reset API │ │ -│ │ - Grading Engine │ │ │ │ -│ │ - Episode Tracker │ │ │ │ +│ │ FastAPI RL Server │ │ AWS Simulator │ │ +│ │ (port 8000) │─────>│ (port 4566) │ │ +│ │ │ │ 34 AWS services │ │ +│ │ - Environment │ │ In-memory state │ │ +│ │ - Curriculum │ │ Reset API │ │ +│ │ - Grading Engine │ │ (Ministack) │ │ +│ │ - Episode Tracker │ │ │ │ +│ │ - Hint Provider │ │ │ │ │ └─────────────────────┘ └────────────────────┘ │ -│ ^ ^ │ -│ | OpenEnv HTTP/WS | AWS CLI calls │ -└──────────┼─────────────────────────────┼─────────────────┘ +│ ^ ^ │ +│ | OpenEnv HTTP/WS | AWS CLI calls │ +└──────────┼─────────────────────────────┼────────────────┘ | | - RL Agent (client) (internal only) + RL Agent (client, External) (internal only) ``` ### Episode Lifecycle -1. **`reset()`** -- Wipes MiniStack state, selects next task from curriculum, provisions setup commands (if any), returns initial observation -2. **`step(action)`** -- Validates command (`aws` prefix only), executes against MiniStack, records in tracker, grades with shaped reward, returns observation -3. **Terminates** when `task_achieved == True` or max steps reached +1. **`reset()`** — Wipes AWS Infracture state, selects next task from curriculum, provisions setup commands (if any), returns initial observation +2. **`step(action)`** — Validates command (`aws` prefix only), executes against MiniStack, records in tracker, grades with shaped reward, returns observation +3. **Hint request** — Agent sends `aws help --task-hint` to get a progressive hint (costs reward) +4. **Terminates** when `task_achieved == True` or max steps reached --- + ## Core Classes ### `AwsRlEnvironment` -[server/aws_rl_env_environment.py](server/aws_rl_env_environment.py) -- Implements the OpenEnv `Environment` interface. Orchestrates all services. +[server/aws_rl_env_environment.py](server/aws_rl_env_environment.py) — Implements the OpenEnv `Environment` interface. Orchestrates all services. | Method | Description | |--------|-------------| | `reset()` | Wipe infra, select task, provision setup, return initial observation | -| `step(action)` | Execute command, grade, update curriculum, return observation | +| `step(action)` | Execute command (or intercept hint request), grade, update curriculum, return observation | ### `Curriculum` -[server/services/curriculum.py](server/services/curriculum.py) -- Priority-queue-based task selection with progressive difficulty. +[server/services/curriculum.py](server/services/curriculum.py) — Priority-queue-based task selection with progressive difficulty. Selects the next task using a **max-heap scored by**: ``` score = ( novelty_bonus # +100 if never attempted (explore first) - + weakness_weight # +50 * (1 - task_success_rate) -- worse tasks get higher priority + + weakness_weight # +50 * (1 - task_success_rate) — worse tasks get higher priority + spaced_rep_bonus # +30 if graduated task is "due" for re-test - recency_penalty # -20 if attempted in last 2 episodes (ensure variety) ) ``` -| Feature | Detail | -|---------|--------| -| **Per-task mastery** | Sliding-window success rate with exponential decay (0.85^i weighting) | -| **Graduation** | Task is "graduated" when success rate >= mastery_threshold in window | -| **Spaced repetition** | Graduated tasks resurface at doubling intervals (3 -> 6 -> ... -> 48 episodes) | -| **Tier progression** | Advance when tier success rate >= advance_rate after min_episodes | -| **Fast-track** | Skip min_episodes wait after 3 consecutive episodes at >= 90% success | -| **Skill profile** | `get_stats()` returns per-task success rates, weak spots, and due re-tests | - ### `TaskGrader` -[server/services/task_grader.py](server/services/task_grader.py) -- Evaluates task completion using a dispatcher pattern. Rewards are always in [0.0, 1.0]. +[server/services/task_grader.py](server/services/task_grader.py) — Evaluates task completion using a dispatcher pattern. Rewards are always in [0.0, 1.0]. **Grading strategies by tier:** @@ -127,71 +357,101 @@ score = ( | Advanced | Multi-step + services | All steps completed AND all required services touched | | Expert | State checks | Runs arbitrary AWS CLI commands to assert end-state (ground truth) | -**Reward shaping:** +### `HintProvider` -``` -if task_achieved: reward = 1.0 -else: - reward = partial_progress * 0.8 # base: scaled to [0.0, 0.8] - if progress_increased: reward += 0.1 # dense signal for advancing - if command_failed: reward *= 0.5 # penalty for errors - reward = clamp(reward, 0.0, 0.99) # never 1.0 without completion -``` +[server/services/hint_provider.py](server/services/hint_provider.py) — Generates progressive hints from `SuccessCriteria` fields. + +| Hint Level | What it reveals | Example | +|-----------|----------------|---------| +| Level 1 | Which AWS services to use | "You'll need IAM and Lambda" | +| Level 2 | Which operations | "Start with create-role, then put-role-policy" | +| Level 3 | Near-complete command structure | "Use: aws iam create-role --role-name ..." | ### `EpisodeTracker` -[server/services/episode_tracker.py](server/services/episode_tracker.py) -- Maintains per-episode step history. Parses AWS CLI commands to extract (service, operation, resource) tuples. Tracks credited operations for deduplication and monotonic progress. +[server/services/episode_tracker.py](server/services/episode_tracker.py) — Maintains per-episode step history. Parses AWS CLI commands to extract (service, operation, resource) tuples. Tracks credited operations for deduplication, monotonic progress, and hint usage. ### `ResourceVerifier` -[server/services/resource_verifier.py](server/services/resource_verifier.py) -- Queries MiniStack directly to verify ground-truth resource state. Service-specific checks for S3, DynamoDB, Lambda, SQS, SNS, IAM, and API Gateway. Also evaluates `StateCheck` assertions (substring match, JSON path extraction). +[server/services/resource_verifier.py](server/services/resource_verifier.py) — Queries MiniStack directly to verify ground-truth resource state. Service-specific checks for S3, DynamoDB, Lambda, SQS, SNS, IAM, Secrets Manager, and API Gateway. Also evaluates `StateCheck` assertions (substring match, JSON path extraction). ### `EnvironmentDesigner` -[server/services/environment_designer.py](server/services/environment_designer.py) -- Provisions initial AWS state via setup commands before the agent acts. Used by SRE/expert tasks to create broken infrastructure the agent must fix. +[server/services/environment_designer.py](server/services/environment_designer.py) — Provisions initial AWS state via setup commands before the agent acts. Used by SRE/expert tasks to create broken or insecure infrastructure the agent must fix. ### `AwsBackend` -[server/services/aws_backend.py](server/services/aws_backend.py) -- Executes AWS CLI commands against MiniStack (`AWS_ENDPOINT_URL=http://localhost:4566`). Provides `reset_environment()` via MiniStack's `/_ministack/reset` endpoint. +[server/services/aws_backend.py](server/services/aws_backend.py) — Executes AWS CLI commands against MiniStack (`AWS_ENDPOINT_URL=http://localhost:4566`). Provides `reset_environment()` via MiniStack's `/_ministack/reset` endpoint. ### `AwsRlEnv` (Client) -[client.py](client.py) -- OpenEnv HTTP/WebSocket client. Wraps `reset()` and `step()` calls to the server. +[client.py](client.py) — OpenEnv HTTP/WebSocket client. Wraps `reset()` and `step()` calls to the server. --- ## Data Models -[models.py](models.py) -- All Pydantic models and type aliases. +[models.py](models.py) — All Pydantic models and type aliases. -### Action & Observation +### Action ```python class AwsRlAction(Action): command: str # AWS CLI command, e.g. "aws s3 ls" +``` +### Observation + +```python class AwsRlObservation(Observation): episode_id: EpisodeID step_count: StepCount command_success: bool command_output: str # stdout from AWS CLI error: str # stderr if failed - resources: dict[AwsService, dict | list | str] - task: Task | None # current task definition + task: TaskInfo | None # masked task definition (hides success criteria) task_achieved: bool - done: bool - reward: float # shaped reward in [0.0, 1.0] + partial_progress: float # current task progress in [0.0, 1.0] + hints_used: int # number of hints requested this episode + hint_text: str # most recent hint text (if any) +``` + +### Environment State + +```python +class AwsRlState(State): + current_task: Task | None # full task assigned for the episode + tracker: TrackerState # episode tracker snapshot + infra_state: dict # AWS infrastructure state keyed by service name + chaos_occurred: bool # whether chaos was injected this episode + current_tier: str # agent's current difficulty tier + +class TrackerState: + step_count: int # steps taken this episode + hints_used: int # hints requested this episode + progress: float # current partial progress [0.0, 1.0] + commands_executed: list[str] # commands executed this episode + credited_operations: list[str] # (operation, resource) pairs that earned credit ``` ### Task Definitions ```python class Task: - task_id: TaskID # 0-20 + task_id: TaskID difficulty: TaskDifficulty # warmup | beginner | intermediate | advanced | expert description: str # human-readable goal success_criteria: SuccessCriteria - setup_commands: list[SetupCommand] # pre-provision for SRE tasks + setup_commands: list[SetupCommand] # pre-provision for SRE tasks + desired_state_spec: str | None # natural-language desired end state (drift tasks) + possible_drifts: list[SetupCommand] # pool of mutations for DriftEngine + +class TaskInfo: + """Agent-visible subset of Task — masks success_criteria, setup_commands, and possible_drifts.""" + task_id: TaskID + difficulty: TaskDifficulty + description: str + desired_state_spec: str | None class SuccessCriteria: command_contains: str | None # warmup/beginner @@ -211,6 +471,7 @@ class TierConfig: mastery_window: int # sliding window size (default: 10) mastery_threshold: float # per-task graduation threshold (default: 0.7) fast_track_rate: float # early promotion threshold (default: 0.9) + chaos_probability: float # probability of chaos injection per step (default: 0.0) class SpacedRepState: interval: int # episodes until next re-test (3 -> 48) @@ -219,88 +480,6 @@ class SpacedRepState: --- -## Task Catalog (21 Tasks) - -### Warmup (6 tasks) -- Simple listing operations - -| ID | Description | Service | -|----|-------------|---------| -| 0 | List all S3 buckets | S3 | -| 1 | Describe EC2 instances | EC2 | -| 2 | List DynamoDB tables | DynamoDB | -| 3 | List Lambda functions | Lambda | -| 4 | List SQS queues | SQS | -| 5 | List SNS topics | SNS | - -### Beginner (5 tasks) -- Single-resource creation with verification - -| ID | Description | Verified Resource | -|----|-------------|-------------------| -| 6 | Create an S3 bucket | Bucket exists in MiniStack | -| 7 | Create a DynamoDB table | Table exists | -| 8 | Create an SQS queue | Queue URL resolvable | -| 9 | Create an SNS topic | Topic ARN in list | -| 10 | Create a Lambda function | Function exists | - -### Intermediate (4 tasks) -- Multi-step workflows - -| ID | Description | Steps | -|----|-------------|-------| -| 11 | Create S3 bucket + upload file | create-bucket, put-object | -| 12 | Create DynamoDB table + insert item | create-table, put-item | -| 13 | Create SNS topic + SQS queue + subscribe | create-topic, create-queue, subscribe | -| 14 | Create IAM role + attach policy | create-role, attach-role-policy | - -### Advanced (3 tasks) -- Cross-service architectures - -| ID | Description | Services | Steps | -|----|-------------|----------|-------| -| 15 | Lambda + SQS event source pipeline | Lambda, SQS, IAM | 4-5 steps | -| 16 | Serverless API (DynamoDB + Lambda + API Gateway) | DynamoDB, Lambda, API Gateway, IAM | 7 steps | -| 17 | Fan-out notification system (SNS + SQS) | SNS, SQS | 5 steps | - -### Expert (3 tasks) -- SRE incident response - -| ID | Description | Setup | Fix Required | -|----|-------------|-------|-------------| -| 18 | Fix Lambda missing SQS permissions | Broken role + Lambda + queue | Attach SQS policy, create event source | -| 19 | Enable S3 versioning + lifecycle | Bucket + object | Enable versioning, add lifecycle rule | -| 20 | Fix DynamoDB throttling + alerting | Under-provisioned table + SNS | Scale to 50 RCU/WCU, subscribe SQS | - -Expert tasks use **state checks** (ground-truth AWS CLI assertions) to verify the fix, not just command matching. - ---- - -## Anti-Reward-Hacking Measures - -| Defense | How it works | -|---------|-------------| -| **Ground-truth verification** | Grader queries MiniStack directly -- agent cannot fake resource state | -| **Deduplication** | `EpisodeTracker.has_executed_operation()` prevents re-earning credit for repeated commands | -| **Invisible grading** | Verification commands run server-side, invisible to the agent's observations | -| **Command allowlisting** | Only commands starting with `aws` are executed; pipes and shell escape are rejected | -| **No credit for read-only** | Running a `state_check` command earns no progress; only mutating `steps` earn credit | -| **Monotonic progress** | `partial_progress` can only increase within an episode | -| **Exact resource names** | `resource_exists` checks the exact name, not just any resource of that type | -| **State checks verify final state** | Expert tasks run actual CLI commands against MiniStack at grading time | - ---- - -## Supported AWS Services (34) - -| Category | Services | -|----------|----------| -| **Storage & DB** | S3, DynamoDB, RDS, ElastiCache, EFS | -| **Compute** | Lambda, ECS, EC2, Step Functions | -| **Messaging** | SQS, SNS, Kinesis, EventBridge, Firehose | -| **API** | API Gateway v1/v2, ALB/ELBv2 | -| **Security** | IAM, STS, Cognito, ACM, WAF v2, Secrets Manager | -| **Monitoring** | CloudWatch, CloudWatch Logs, SSM | -| **Infrastructure** | CloudFormation, Route53 | -| **Other** | SES, Athena, Glue, EMR | - ---- - ## Project Structure ``` @@ -309,22 +488,48 @@ aws-rl-env/ ├── models.py # Pydantic data models & type aliases ├── client.py # AwsRlEnv OpenEnv client ├── inference.py # LLM agent inference script +├── inference-complete.py # Full inference pipeline with curriculum ├── server/ │ ├── app.py # FastAPI application + web UI endpoints │ ├── aws_rl_env_environment.py # Core RL environment (reset/step) +│ ├── templates/ +│ │ └── index.html # Web playground UI +│ ├── static/ +│ │ ├── css/style.css # Playground styles +│ │ └── js/app.js # Playground frontend logic │ └── services/ │ ├── aws_backend.py # MiniStack command executor │ ├── task_grader.py # Grading engine with reward shaping │ ├── curriculum.py # Curriculum learning manager -│ ├── episode_tracker.py # Per-episode step history +│ ├── episode_tracker.py # Per-episode step history & hints │ ├── resource_verifier.py # Ground-truth state verification │ ├── environment_designer.py # Setup provisioning for SRE tasks +│ ├── hint_provider.py # Progressive hint generator +│ ├── chaos_engine.py # Chaos injection engine +│ ├── drift_engine.py # Drift detection engine +│ ├── task_solutions.py # Reference solutions for tasks │ └── tasks/ -│ ├── warmup.yaml # 6 listing tasks -│ ├── beginner.yaml # 5 creation tasks -│ ├── intermediate.yaml # 4 multi-step tasks -│ ├── advanced.yaml # 3 architecture tasks -│ └── expert.yaml # 3 SRE incident tasks +│ ├── warmup.yaml # 20 listing tasks +│ ├── beginner.yaml # 20 creation tasks +│ ├── intermediate.yaml # 20 multi-step tasks +│ ├── advanced.yaml # 20 architecture tasks +│ ├── expert.yaml # 20 SRE/security tasks +│ └── drift.yaml # Drift detection tasks +├── tests/ # Unit tests for core services +│ ├── test_aws_rl_env_environment.py +│ ├── test_drift_engine.py +│ ├── test_environment_designer.py +│ ├── test_episode_tracker.py +│ ├── test_hint_provider.py +│ ├── test_resource_verifier.py +│ └── test_task_grader.py +├── tests_tasks/ # Integration tests per task tier +│ ├── test_warmup_tasks.py +│ ├── test_beginner_tasks.py +│ ├── test_intermediate_tasks.py +│ ├── test_advanced_tasks.py +│ ├── test_expert_tasks.py +│ └── test_drift_tasks.py ├── aws_infra/ # Vendored MiniStack emulator │ └── aws_infra/ │ ├── app.py # MiniStack ASGI router @@ -351,19 +556,7 @@ make docker-health # Health check ### Local (without Docker) -```bash -# Terminal 1: Start MiniStack -pip install ministack -ministack # port 4566 - -# Terminal 2: Start RL server -export AWS_ENDPOINT_URL=http://localhost:4566 -export AWS_ACCESS_KEY_ID=test -export AWS_SECRET_ACCESS_KEY=test -uv run uvicorn server.app:app --reload --host 0.0.0.0 --port 8000 -``` - -Or use the combined Makefile target: +Use the combined Makefile target: ```bash make run # Starts MiniStack + server @@ -388,9 +581,9 @@ make openenv-push # Push to HuggingFace Spaces | `AWS_SECRET_ACCESS_KEY` | `test` | AWS credentials (any value works) | | `AWS_DEFAULT_REGION` | `us-east-1` | AWS region | | `MAX_STEPS` | `15` | Max steps per episode | -| `API_BASE_URL` | -- | LLM API endpoint (for inference.py) | -| `MODEL_NAME` | -- | LLM model name (for inference.py) | -| `HF_TOKEN` | -- | HuggingFace token (for inference.py) | +| `API_BASE_URL` | — | LLM API endpoint (for inference.py) | +| `MODEL_NAME` | — | LLM model name (for inference.py) | +| `HF_TOKEN` | — | HuggingFace token (for inference.py) | | `TEMPERATURE` | `0.7` | LLM sampling temperature | --- @@ -413,3 +606,13 @@ curriculum.get_stats() # "avg_reward_last_10": 0.65 # } ``` + +--- + +## Links + +- **GitHub**: [github.com/udaykiranpadhy/aws-rl-env](https://github.com/udaykiranpadhy/aws-rl-env) +- **Hugging Face Space**: [huggingface.co/spaces/Sizzing/aws_rl_env](https://huggingface.co/spaces/Sizzing/aws_rl_env) +- **API Reference**: [/docs](https://sizzing-aws-rl-env.hf.space/docs) +- **ReDoc**: [/redoc](https://sizzing-aws-rl-env.hf.space/redoc) +- **Portfolio**: [portfolio.udaykp.dev](https://portfolio.udaykp.dev) diff --git a/__init__.py b/__init__.py index 31a9406d7b46adadc21ac1c989f442a8088dfc80..284b08844bad6830fbe6ece62f63732df9bd9b83 100644 --- a/__init__.py +++ b/__init__.py @@ -6,8 +6,13 @@ """Aws Rl Env Environment.""" -from .client import AwsRlEnv -from .models import AwsRlAction, AwsRlObservation +try: + from .client import AwsRlEnv + from .models import AwsRlAction, AwsRlObservation +except ImportError: + # When imported directly (e.g. by pytest from rootdir) rather than as + # part of the aws_rl_env package, relative imports are unavailable. + pass __all__ = [ "AwsRlAction", diff --git a/aws_infra/aws_infra/app.py b/aws_infra/aws_infra/app.py index 8fa44d5577c4c14ffcbc958f35bdf573694f6cfe..0b095e42b2cbde6abd329533589b2606446d0d91 100644 --- a/aws_infra/aws_infra/app.py +++ b/aws_infra/aws_infra/app.py @@ -235,6 +235,29 @@ async def app(scope, receive, send): json.dumps({"reset": "ok"}).encode()) return + if path == "/_ministack/state" and method == "GET": + state = _get_all_state() + await _send_response(send, 200, {"Content-Type": "application/json"}, + json.dumps(state).encode()) + return + + if path == "/_ministack/handlers" and method == "GET": + handlers = _get_all_handlers() + await _send_response(send, 200, {"Content-Type": "application/json"}, + json.dumps(handlers).encode()) + return + + if path.startswith("/_ministack/handlers/") and method == "GET": + service_name = path[len("/_ministack/handlers/"):].strip("/") + info = _get_service_info(service_name) + if info is None: + await _send_response(send, 404, {"Content-Type": "application/json"}, + json.dumps({"error": f"Unknown service: {service_name}"}).encode()) + else: + await _send_response(send, 200, {"Content-Type": "application/json"}, + json.dumps(info).encode()) + return + if path == "/_ministack/config" and method == "POST": _ALLOWED_CONFIG_KEYS = { "athena.ATHENA_ENGINE", "athena.ATHENA_DATA_DIR", @@ -570,6 +593,105 @@ def _run_init_scripts(): logger.error("Failed to execute init script %s: %s", script_path, e) +def _service_modules() -> list: + """Return list of (canonical_name, module) for all service modules.""" + from aws_infra.services import iam_sts + return [ + ("s3", s3), ("sqs", sqs), ("sns", sns), ("dynamodb", dynamodb), + ("lambda", lambda_svc), ("iam", iam_sts), ("secretsmanager", secretsmanager), + ("logs", cloudwatch_logs), ("ssm", ssm), ("events", eventbridge), + ("kinesis", kinesis), ("monitoring", cloudwatch), ("ses", ses), + ("ses_v2", ses_v2), ("acm", acm), ("wafv2", waf), + ("states", stepfunctions), ("ecs", ecs), ("rds", rds), + ("elasticache", elasticache), ("glue", glue), ("athena", athena), + ("apigateway", apigateway), ("apigateway_v1", apigateway_v1), + ("firehose", firehose), ("route53", route53), ("cognito", cognito), + ("ec2", ec2), ("elasticmapreduce", emr), ("elasticloadbalancing", alb), + ("elasticfilesystem", efs), ("cloudformation", cloudformation), + ] + + +# Extra aliases for the /_ministack/handlers/ endpoint so users can +# look up services using common short names (e.g. "lambda", "stepfunctions"). +_HANDLER_LOOKUP_ALIASES = { + **SERVICE_NAME_ALIASES, + "lambda": "lambda", + "iam": "iam", + "sts": "iam", + "ses-v2": "ses_v2", + "sesv2": "ses_v2", + "apigateway-v1": "apigateway_v1", + "apigatewayv1": "apigateway_v1", + "logs": "logs", + "emr": "elasticmapreduce", + "alb": "elasticloadbalancing", + "efs": "elasticfilesystem", + "cfn": "cloudformation", + "sf": "states", + "sfn": "states", + "cw": "monitoring", + "cwl": "logs", + "sm": "secretsmanager", + "eb": "events", + "ddb": "dynamodb", +} + + +def _resolve_service_module(service_name: str): + """Resolve a service name (or alias) to its (canonical_name, module) pair.""" + name = service_name.lower().strip() + canonical = _HANDLER_LOOKUP_ALIASES.get(name, name) + for svc_name, mod in _service_modules(): + if svc_name == canonical: + return svc_name, mod + return None, None + + +def _get_all_state() -> dict: + """Collect summary state from every service module.""" + state = {} + for name, mod in _service_modules(): + if name not in SERVICE_HANDLERS and name not in ("iam", "ses_v2", "apigateway_v1"): + continue + try: + state[name] = mod.get_state() + except Exception as e: + logger.warning("get_state() failed for %s: %s", name, e) + state[name] = {"error": str(e)} + return {"services": state} + + +def _get_all_handlers() -> dict: + """Collect SUPPORTED_ACTIONS from every service module.""" + handlers = {} + for name, mod in _service_modules(): + if name not in SERVICE_HANDLERS and name not in ("iam", "ses_v2", "apigateway_v1"): + continue + actions = getattr(mod, "SUPPORTED_ACTIONS", []) + handlers[name] = {"actions": actions, "count": len(actions)} + return {"services": handlers} + + +def _get_service_info(service_name: str) -> dict | None: + """Return detailed info for a single service: docstring, actions, and current state.""" + name, mod = _resolve_service_module(service_name) + if mod is None: + return None + docstring = (mod.__doc__ or "").strip() + actions = getattr(mod, "SUPPORTED_ACTIONS", []) + try: + state = mod.get_state() + except Exception: + state = {} + return { + "service": name, + "description": docstring, + "supported_actions": actions, + "action_count": len(actions), + "state": state, + } + + def _reset_all_state(): """Wipe all in-memory state across every service module, and persisted files if enabled.""" import shutil diff --git a/aws_infra/aws_infra/services/acm.py b/aws_infra/aws_infra/services/acm.py index 4cb2c2810b98067f1f407cb8afda595eb9b16c5b..6bebeaf12ac794d7b8c12e3b0a4503713f9d9d58 100644 --- a/aws_infra/aws_infra/services/acm.py +++ b/aws_infra/aws_infra/services/acm.py @@ -234,5 +234,20 @@ def _resend_validation_email(data): return json_response({}) +SUPPORTED_ACTIONS = [ + "RequestCertificate", "DescribeCertificate", "ListCertificates", + "DeleteCertificate", "GetCertificate", "ImportCertificate", + "AddTagsToCertificate", "RemoveTagsFromCertificate", + "ListTagsForCertificate", "UpdateCertificateOptions", + "RenewCertificate", "ResendValidationEmail", +] + + +def get_state() -> dict: + return { + "certificates": {"count": len(_certificates), "ids": list(_certificates.keys())}, + } + + def reset(): _certificates.clear() diff --git a/aws_infra/aws_infra/services/alb.py b/aws_infra/aws_infra/services/alb.py index fc6fe83298f90d458fac3b8505f16b473d4df0ca..5625ce2204413617a59fa36248ac5265150336b4 100644 --- a/aws_infra/aws_infra/services/alb.py +++ b/aws_infra/aws_infra/services/alb.py @@ -1044,6 +1044,38 @@ async def dispatch_request(lb, method, path, headers, body, query_params, port=8 json.dumps({"message": "No matching ALB rule found"}).encode()) +# --------------------------------------------------------------------------- +# Supported Actions +# --------------------------------------------------------------------------- + +SUPPORTED_ACTIONS = [ + "CreateLoadBalancer", "DeleteLoadBalancer", "DescribeLoadBalancers", + "ModifyLoadBalancerAttributes", "AddTags", "RemoveTags", "DescribeTags", + "CreateTargetGroup", "DeleteTargetGroup", "DescribeTargetGroups", + "ModifyTargetGroup", "ModifyTargetGroupAttributes", "CreateListener", + "DeleteListener", "DescribeListeners", "ModifyListener", "CreateRule", + "DeleteRule", "DescribeRules", "ModifyRule", "RegisterTargets", + "DeregisterTargets", "DescribeTargetHealth", "SetRulePriorities", +] + + +# --------------------------------------------------------------------------- +# State +# --------------------------------------------------------------------------- + +def get_state() -> dict: + return { + "load_balancers": {"count": len(_lbs), "names": list(_lbs.keys())}, + "target_groups": {"count": len(_tgs), "names": list(_tgs.keys())}, + "listeners": {"count": len(_listeners), "ids": list(_listeners.keys())}, + "rules": {"count": len(_rules), "ids": list(_rules.keys())}, + "targets": {"count": sum(len(tgts) for tgts in _targets.values())}, + "tags": {"count": sum(len(tags) for tags in _tags.values())}, + "load_balancer_attributes": {"count": sum(len(attrs) for attrs in _lb_attrs.values())}, + "target_group_attributes": {"count": sum(len(attrs) for attrs in _tg_attrs.values())}, + } + + def reset(): _lbs.clear() _tgs.clear() diff --git a/aws_infra/aws_infra/services/apigateway.py b/aws_infra/aws_infra/services/apigateway.py index fbe1e3bb075c862d579e882c8e7d439f4274e5e7..87c013808b007e8d46517d528c74b853c0f16031 100644 --- a/aws_infra/aws_infra/services/apigateway.py +++ b/aws_infra/aws_infra/services/apigateway.py @@ -83,6 +83,18 @@ def _api_arn(api_id: str) -> str: return f"arn:aws:apigateway:{REGION}::/apis/{api_id}" +SUPPORTED_ACTIONS = [ + "CreateApi", "GetApis", "GetApi", "UpdateApi", "DeleteApi", + "CreateRoute", "GetRoutes", "GetRoute", "UpdateRoute", "DeleteRoute", + "CreateIntegration", "GetIntegrations", "GetIntegration", + "UpdateIntegration", "DeleteIntegration", "CreateStage", "GetStages", + "GetStage", "UpdateStage", "DeleteStage", "CreateDeployment", + "GetDeployments", "GetDeployment", "DeleteDeployment", "GetTags", + "TagResource", "UntagResource", "CreateAuthorizer", "GetAuthorizers", + "GetAuthorizer", "UpdateAuthorizer", "DeleteAuthorizer", +] + + # ---- Persistence hooks ---- def get_state() -> dict: diff --git a/aws_infra/aws_infra/services/apigateway_v1.py b/aws_infra/aws_infra/services/apigateway_v1.py index aaaa9fc49f77179f8debbdcfe7a368ffab47aad9..8d4bc0b92e950ef442b3037cdb4e85f3d563406d 100644 --- a/aws_infra/aws_infra/services/apigateway_v1.py +++ b/aws_infra/aws_infra/services/apigateway_v1.py @@ -245,6 +245,27 @@ async def _call_lambda(func_name, event): return {"statusCode": 200, "body": "Mock response"}, None +SUPPORTED_ACTIONS = [ + "CreateRestApi", "GetRestApis", "GetRestApi", "UpdateRestApi", + "DeleteRestApi", "GetResources", "GetResource", "CreateResource", + "UpdateResource", "DeleteResource", "PutMethod", "GetMethod", + "DeleteMethod", "PutMethodResponse", "GetMethodResponse", + "DeleteMethodResponse", "PutIntegration", "GetIntegration", + "DeleteIntegration", "PutIntegrationResponse", "GetIntegrationResponse", + "DeleteIntegrationResponse", "CreateDeployment", "GetDeployments", + "GetDeployment", "UpdateDeployment", "DeleteDeployment", "CreateStage", + "GetStages", "GetStage", "UpdateStage", "DeleteStage", + "CreateAuthorizer", "GetAuthorizers", "GetAuthorizer", + "UpdateAuthorizer", "DeleteAuthorizer", "CreateModel", "GetModels", + "GetModel", "DeleteModel", "GetApiKeys", "CreateApiKey", "GetApiKey", + "DeleteApiKey", "GetUsagePlans", "CreateUsagePlan", "GetUsagePlan", + "DeleteUsagePlan", "GetUsagePlanKeys", "CreateUsagePlanKey", + "DeleteUsagePlanKey", "GetDomainNames", "CreateDomainName", + "GetDomainName", "DeleteDomainName", "GetTags", "TagResource", + "UntagResource", +] + + # ---- Persistence hooks ---- def get_state(): diff --git a/aws_infra/aws_infra/services/athena.py b/aws_infra/aws_infra/services/athena.py index 37bbcaae150f0fcbcce07cee6d27ac7f69b2aa28..3ec4fd50fae0025c541f19e79c04b6f6f2e80689 100644 --- a/aws_infra/aws_infra/services/athena.py +++ b/aws_infra/aws_infra/services/athena.py @@ -853,6 +853,29 @@ def _execution_out(ex): return {k: v for k, v in ex.items() if not k.startswith("_")} +SUPPORTED_ACTIONS = [ + "StartQueryExecution", "GetQueryExecution", "GetQueryResults", "StopQueryExecution", + "ListQueryExecutions", "CreateWorkGroup", "DeleteWorkGroup", "GetWorkGroup", + "ListWorkGroups", "UpdateWorkGroup", "CreateNamedQuery", "DeleteNamedQuery", + "GetNamedQuery", "ListNamedQueries", "BatchGetNamedQuery", "BatchGetQueryExecution", + "CreateDataCatalog", "GetDataCatalog", "ListDataCatalogs", "DeleteDataCatalog", + "UpdateDataCatalog", "CreatePreparedStatement", "GetPreparedStatement", + "DeletePreparedStatement", "ListPreparedStatements", "GetTableMetadata", + "ListTableMetadata", "TagResource", "UntagResource", "ListTagsForResource", +] + + +def get_state() -> dict: + return { + "workgroups": {"count": len(_workgroups), "names": list(_workgroups.keys())}, + "named_queries": {"count": len(_named_queries), "ids": list(_named_queries.keys())}, + "data_catalogs": {"count": len(_data_catalogs), "names": list(_data_catalogs.keys())}, + "executions": {"count": len(_executions), "ids": list(_executions.keys())}, + "prepared_statements": {"count": len(_prepared_statements), "keys": list(_prepared_statements.keys())}, + "tags": {"count": len(_tags), "arns": list(_tags.keys())}, + } + + def reset(): import time as _time diff --git a/aws_infra/aws_infra/services/cloudformation/__init__.py b/aws_infra/aws_infra/services/cloudformation/__init__.py index 29db5340001baf6155294f30ef5a0eb21b22c694..11ac2b51e333a0e0b476c70dba6b3c963129a107 100644 --- a/aws_infra/aws_infra/services/cloudformation/__init__.py +++ b/aws_infra/aws_infra/services/cloudformation/__init__.py @@ -54,6 +54,33 @@ async def handle_request(method: str, path: str, headers: dict, return handler(params) +# --------------------------------------------------------------------------- +# Supported Actions +# --------------------------------------------------------------------------- + +SUPPORTED_ACTIONS = [ + "CreateStack", "UpdateStack", "DeleteStack", "DescribeStacks", + "ListStacks", "DescribeStackEvents", "DescribeStackResource", + "DescribeStackResources", "GetTemplate", "ValidateTemplate", + "ListExports", "CreateChangeSet", "DescribeChangeSet", + "ExecuteChangeSet", "DeleteChangeSet", "ListChangeSets", + "GetTemplateSummary", +] + + +# --------------------------------------------------------------------------- +# State +# --------------------------------------------------------------------------- + +def get_state() -> dict: + return { + "stacks": {"count": len(_stacks), "names": list(_stacks.keys())}, + "change_sets": {"count": len(_change_sets), "ids": list(_change_sets.keys())}, + "stack_events": {"count": len(_stack_events), "ids": list(_stack_events.keys())}, + "exports": {"count": len(_exports), "names": list(_exports.keys())}, + } + + def reset(): _stacks.clear() _stack_events.clear() diff --git a/aws_infra/aws_infra/services/cloudwatch.py b/aws_infra/aws_infra/services/cloudwatch.py index 28c2bedc49cdf321aebb60e4ad7d43c0239bf0b5..61c21e162b9e37dc90af83dcde2483648ecbed6f 100644 --- a/aws_infra/aws_infra/services/cloudwatch.py +++ b/aws_infra/aws_infra/services/cloudwatch.py @@ -1382,8 +1382,32 @@ def _error(code, message, status, use_json=False): return status, {"Content-Type": "application/xml"}, body +SUPPORTED_ACTIONS = [ + "PutMetricData", "GetMetricStatistics", "GetMetricData", "ListMetrics", + "PutMetricAlarm", "PutCompositeAlarm", "DescribeAlarms", + "DescribeAlarmsForMetric", "DescribeAlarmHistory", "DeleteAlarms", + "EnableAlarmActions", "DisableAlarmActions", "SetAlarmState", + "TagResource", "UntagResource", "ListTagsForResource", + "PutDashboard", "GetDashboard", "DeleteDashboards", "ListDashboards", +] + + +def get_state() -> dict: + return { + "metrics": {"count": len(_metrics), "names": [f"{ns}:{mn}" for (ns, mn, _), _ in _metrics.items()]}, + "alarms": {"count": len(_alarms), "names": list(_alarms.keys())}, + "composite_alarms": {"count": len(_composite_alarms), "names": list(_composite_alarms.keys())}, + "dashboards": {"count": len(_dashboards), "names": list(_dashboards.keys())}, + "alarm_history": {"count": len(_alarm_history)}, + "resource_tags": {"count": len(_resource_tags), "arns": list(_resource_tags.keys())}, + + } + + def reset(): _alarms.clear() _composite_alarms.clear() _alarm_history.clear() _resource_tags.clear() + _dashboards.clear() + _metrics.clear() diff --git a/aws_infra/aws_infra/services/cloudwatch_logs.py b/aws_infra/aws_infra/services/cloudwatch_logs.py index acb259d729ce2916f34d08ea90e83be726872115..4b6a47f07dc8d5174697cac2600d5d40794776e2 100644 --- a/aws_infra/aws_infra/services/cloudwatch_logs.py +++ b/aws_infra/aws_infra/services/cloudwatch_logs.py @@ -848,6 +848,29 @@ def _stop_query(data): return json_response({"success": True}) +SUPPORTED_ACTIONS = [ + "CreateLogGroup", "DeleteLogGroup", "DescribeLogGroups", + "CreateLogStream", "DeleteLogStream", "DescribeLogStreams", + "PutLogEvents", "GetLogEvents", "FilterLogEvents", + "PutRetentionPolicy", "DeleteRetentionPolicy", + "PutSubscriptionFilter", "DeleteSubscriptionFilter", "DescribeSubscriptionFilters", + "TagLogGroup", "UntagLogGroup", "ListTagsLogGroup", + "TagResource", "UntagResource", "ListTagsForResource", + "PutDestination", "DeleteDestination", "DescribeDestinations", "PutDestinationPolicy", + "PutMetricFilter", "DeleteMetricFilter", "DescribeMetricFilters", + "StartQuery", "GetQueryResults", "StopQuery", +] + + +def get_state() -> dict: + return { + "log_groups": {"count": len(_log_groups), "names": list(_log_groups.keys())}, + "destinations": {"count": len(_destinations), "names": list(_destinations.keys())}, + "metric_filters": {"count": len(_metric_filters), "keys": list(_metric_filters.keys())}, + "queries": {"count": len(_queries), "ids": list(_queries.keys())}, + } + + def reset(): _log_groups.clear() _destinations.clear() diff --git a/aws_infra/aws_infra/services/cognito.py b/aws_infra/aws_infra/services/cognito.py index f6e51ce59aa66652852cb3895568bedd5608bb14..4a66542f6845d4f046279bf52778bb740dd7369f 100644 --- a/aws_infra/aws_infra/services/cognito.py +++ b/aws_infra/aws_infra/services/cognito.py @@ -1904,6 +1904,49 @@ def _apply_user_filter(users: list, filter_str: str) -> list: return result +# =========================================================================== +# SUPPORTED ACTIONS +# =========================================================================== + +SUPPORTED_ACTIONS = [ + "CreateUserPool", "DeleteUserPool", "DescribeUserPool", "ListUserPools", + "UpdateUserPool", "CreateUserPoolClient", "DeleteUserPoolClient", + "DescribeUserPoolClient", "ListUserPoolClients", "UpdateUserPoolClient", + "AdminCreateUser", "AdminDeleteUser", "AdminGetUser", "ListUsers", + "AdminSetUserPassword", "AdminUpdateUserAttributes", "AdminInitiateAuth", + "AdminRespondToAuthChallenge", "InitiateAuth", "RespondToAuthChallenge", + "SignUp", "ConfirmSignUp", "ForgotPassword", "ConfirmForgotPassword", + "ChangePassword", "GetUser", "UpdateUserAttributes", "DeleteUser", + "AdminAddUserToGroup", "AdminRemoveUserFromGroup", + "AdminListGroupsForUser", "AdminListUserAuthEvents", "CreateGroup", + "DeleteGroup", "GetGroup", "ListGroups", "AdminConfirmSignUp", + "AdminDisableUser", "AdminEnableUser", "AdminResetUserPassword", + "AdminUserGlobalSignOut", "GlobalSignOut", "RevokeToken", + "CreateUserPoolDomain", "DeleteUserPoolDomain", "DescribeUserPoolDomain", + "GetUserPoolMfaConfig", "SetUserPoolMfaConfig", "AssociateSoftwareToken", + "VerifySoftwareToken", "TagResource", "UntagResource", + "ListTagsForResource", "CreateIdentityPool", "DeleteIdentityPool", + "DescribeIdentityPool", "ListIdentityPools", "UpdateIdentityPool", + "GetId", "GetCredentialsForIdentity", "GetOpenIdToken", + "SetIdentityPoolRoles", "GetIdentityPoolRoles", "ListIdentities", + "DescribeIdentity", "MergeDeveloperIdentities", + "UnlinkDeveloperIdentity", "UnlinkIdentity", +] + + +# =========================================================================== +# STATE +# =========================================================================== + +def get_state() -> dict: + return { + "user_pools": {"count": len(_user_pools), "ids": list(_user_pools.keys())}, + "identity_pools": {"count": len(_identity_pools), "ids": list(_identity_pools.keys())}, + "pool_domain_map": {"count": len(_pool_domain_map), "domains": list(_pool_domain_map.keys())}, + "identity_tags": {"count": len(_identity_tags), "arns": list(_identity_tags.keys())}, + } + + # =========================================================================== # RESET # =========================================================================== diff --git a/aws_infra/aws_infra/services/dynamodb.py b/aws_infra/aws_infra/services/dynamodb.py index 457154449f074fe794077c067dc3af27e701abda..0d09882c69faf50a826492b8bd6e20c130a22d11 100644 --- a/aws_infra/aws_infra/services/dynamodb.py +++ b/aws_infra/aws_infra/services/dynamodb.py @@ -1801,6 +1801,29 @@ def _diff_attributes(old_item, new_item, return_old=True): return result +SUPPORTED_ACTIONS = [ + "CreateTable", "DeleteTable", "DescribeTable", "ListTables", "UpdateTable", + "PutItem", "GetItem", "DeleteItem", "UpdateItem", + "Query", "Scan", + "BatchWriteItem", "BatchGetItem", + "TransactWriteItems", "TransactGetItems", + "DescribeTimeToLive", "UpdateTimeToLive", + "DescribeContinuousBackups", "UpdateContinuousBackups", + "DescribeEndpoints", + "TagResource", "UntagResource", "ListTagsOfResource", +] + + +def get_state() -> dict: + return { + "tables": {"count": len(_tables), "names": list(_tables.keys())}, + "tags": {"count": len(_tags), "names": list(_tags.keys())}, + "ttl_settings": {"count": len(_ttl_settings), "names": list(_ttl_settings.keys())}, + "pitr_settings": {"count": len(_pitr_settings), "names": list(_pitr_settings.keys())}, + "stream_records": {"count": len(_stream_records), "names": list(_stream_records.keys())}, + } + + def reset(): with _lock: _tables.clear() diff --git a/aws_infra/aws_infra/services/ec2.py b/aws_infra/aws_infra/services/ec2.py index dcdb304cf0fe8ad1a58c143e353d4e68ac907f42..023dbe20a29367b4561dc2803824c00a5aee9208 100644 --- a/aws_infra/aws_infra/services/ec2.py +++ b/aws_infra/aws_infra/services/ec2.py @@ -2224,6 +2224,74 @@ def _delete_egress_only_igw(params): return _xml(200, "DeleteEgressOnlyInternetGatewayResponse", "true") +# --------------------------------------------------------------------------- +# Supported Actions +# --------------------------------------------------------------------------- + +SUPPORTED_ACTIONS = [ + "RunInstances", "TerminateInstances", "DescribeInstances", "StartInstances", + "StopInstances", "RebootInstances", "DescribeImages", "CreateSecurityGroup", + "DeleteSecurityGroup", "DescribeSecurityGroups", + "AuthorizeSecurityGroupIngress", "RevokeSecurityGroupIngress", + "AuthorizeSecurityGroupEgress", "RevokeSecurityGroupEgress", + "CreateKeyPair", "DeleteKeyPair", "DescribeKeyPairs", "ImportKeyPair", + "DescribeVpcs", "DescribeSubnets", "DescribeAvailabilityZones", + "CreateVpc", "DeleteVpc", "CreateSubnet", "DeleteSubnet", + "CreateInternetGateway", "DeleteInternetGateway", + "DescribeInternetGateways", "AttachInternetGateway", + "DetachInternetGateway", "AllocateAddress", "ReleaseAddress", + "AssociateAddress", "DisassociateAddress", "DescribeAddresses", + "CreateTags", "DeleteTags", "DescribeTags", "ModifyVpcAttribute", + "ModifySubnetAttribute", "CreateRouteTable", "DeleteRouteTable", + "DescribeRouteTables", "AssociateRouteTable", "DisassociateRouteTable", + "CreateRoute", "ReplaceRoute", "DeleteRoute", "CreateNetworkInterface", + "DeleteNetworkInterface", "DescribeNetworkInterfaces", + "AttachNetworkInterface", "DetachNetworkInterface", "CreateVpcEndpoint", + "DeleteVpcEndpoints", "DescribeVpcEndpoints", "CreateVolume", + "DeleteVolume", "DescribeVolumes", "DescribeVolumeStatus", "AttachVolume", + "DetachVolume", "ModifyVolume", "DescribeVolumesModifications", + "EnableVolumeIO", "ModifyVolumeAttribute", "DescribeVolumeAttribute", + "CreateSnapshot", "DeleteSnapshot", "DescribeSnapshots", + "ModifySnapshotAttribute", "DescribeSnapshotAttribute", "CopySnapshot", + "CreateNatGateway", "DescribeNatGateways", "DeleteNatGateway", + "CreateNetworkAcl", "DescribeNetworkAcls", "DeleteNetworkAcl", + "CreateNetworkAclEntry", "DeleteNetworkAclEntry", + "ReplaceNetworkAclEntry", "ReplaceNetworkAclAssociation", + "CreateFlowLogs", "DescribeFlowLogs", "DeleteFlowLogs", + "CreateVpcPeeringConnection", "AcceptVpcPeeringConnection", + "DescribeVpcPeeringConnections", "DeleteVpcPeeringConnection", + "CreateDhcpOptions", "AssociateDhcpOptions", "DescribeDhcpOptions", + "DeleteDhcpOptions", "CreateEgressOnlyInternetGateway", + "DescribeEgressOnlyInternetGateways", "DeleteEgressOnlyInternetGateway", +] + + +# --------------------------------------------------------------------------- +# State +# --------------------------------------------------------------------------- + +def get_state() -> dict: + return { + "instances": {"count": len(_instances), "ids": list(_instances.keys())}, + "security_groups": {"count": len(_security_groups), "ids": list(_security_groups.keys())}, + "vpcs": {"count": len(_vpcs), "ids": list(_vpcs.keys())}, + "subnets": {"count": len(_subnets), "ids": list(_subnets.keys())}, + "volumes": {"count": len(_volumes), "ids": list(_volumes.keys())}, + "key_pairs": {"count": len(_key_pairs), "names": list(_key_pairs.keys())}, + "internet_gateways": {"count": len(_internet_gateways), "ids": list(_internet_gateways.keys())}, + "nat_gateways": {"count": len(_nat_gateways), "ids": list(_nat_gateways.keys())}, + "route_tables": {"count": len(_route_tables), "ids": list(_route_tables.keys())}, + "network_interfaces": {"count": len(_network_interfaces), "ids": list(_network_interfaces.keys())}, + "vpc_endpoints": {"count": len(_vpc_endpoints), "ids": list(_vpc_endpoints.keys())}, + "snapshots": {"count": len(_snapshots), "ids": list(_snapshots.keys())}, + "network_acls": {"count": len(_network_acls), "ids": list(_network_acls.keys())}, + "flow_logs": {"count": len(_flow_logs), "ids": list(_flow_logs.keys())}, + "vpc_peering": {"count": len(_vpc_peering), "ids": list(_vpc_peering.keys())}, + "dhcp_options": {"count": len(_dhcp_options), "ids": list(_dhcp_options.keys())}, + "egress_igws": {"count": len(_egress_igws), "ids": list(_egress_igws.keys())}, + } + + # --------------------------------------------------------------------------- # Reset # --------------------------------------------------------------------------- diff --git a/aws_infra/aws_infra/services/ecs.py b/aws_infra/aws_infra/services/ecs.py index fb0c7b204858cbea576b57d332b6afa015677d69..f216fa51d641897c6b7e1c65a13482be9a9f149d 100644 --- a/aws_infra/aws_infra/services/ecs.py +++ b/aws_infra/aws_infra/services/ecs.py @@ -1229,6 +1229,26 @@ _ACTION_MAP = { } +SUPPORTED_ACTIONS = [ + "CreateCluster", "DeleteCluster", "DescribeClusters", "ListClusters", "UpdateCluster", + "UpdateClusterSettings", "RegisterTaskDefinition", "DeregisterTaskDefinition", + "DescribeTaskDefinition", "ListTaskDefinitions", "CreateService", "DeleteService", + "DescribeServices", "UpdateService", "ListServices", "RunTask", "StopTask", + "DescribeTasks", "ListTasks", "TagResource", "UntagResource", "ListTagsForResource", + "ExecuteCommand", "ListAccountSettings", "PutAccountSetting", "CreateCapacityProvider", + "DeleteCapacityProvider", "DescribeCapacityProviders", "PutClusterCapacityProviders", +] + + +def get_state() -> dict: + return { + "clusters": {"count": len(_clusters), "names": list(_clusters.keys())}, + "task_definitions": {"count": len(_task_defs), "names": list(_task_defs.keys())}, + "services": {"count": len(_services), "names": list(_services.keys())}, + "tasks": {"count": len(_tasks), "ids": list(_tasks.keys())}, + } + + def reset(): docker_client = _get_docker() if docker_client: diff --git a/aws_infra/aws_infra/services/efs.py b/aws_infra/aws_infra/services/efs.py index 448e47441a27d980863d0d4b087d1de122689071..d95ac78618cfb44da3f353cdfc557eb322706bb7 100644 --- a/aws_infra/aws_infra/services/efs.py +++ b/aws_infra/aws_infra/services/efs.py @@ -497,6 +497,38 @@ def _error(status, code, message): return status, {"Content-Type": "application/json", "x-amzn-errortype": code}, body +# --------------------------------------------------------------------------- +# Supported Actions +# --------------------------------------------------------------------------- + +SUPPORTED_ACTIONS = [ + "CreateFileSystem", "DeleteFileSystem", "DescribeFileSystems", + "DescribeFileSystemPolicy", "PutFileSystemPolicy", + "DeleteFileSystemPolicy", "CreateMountTarget", "DeleteMountTarget", + "DescribeMountTargets", "ModifyMountTargetSecurityGroups", + "CreateAccessPoint", "DeleteAccessPoint", "DescribeAccessPoints", + "TagResource", "UntagResource", "ListTagsForResource", + "CreateReplicationConfiguration", "DeleteReplicationConfiguration", + "DescribeReplicationConfigurations", "PutLifecycleConfiguration", + "GetLifecycleConfiguration", "PutBackupPolicy", "GetBackupPolicy", + "DescribeAccountPreferences", "PutAccountPreferences", +] + + +# --------------------------------------------------------------------------- +# State +# --------------------------------------------------------------------------- + +def get_state() -> dict: + return { + "file_systems": {"count": len(_file_systems), "ids": list(_file_systems.keys())}, + "mount_targets": {"count": len(_mount_targets), "ids": list(_mount_targets.keys())}, + "access_points": {"count": len(_access_points), "ids": list(_access_points.keys())}, + "lifecycle_configs": {"count": len(_lifecycle_configs), "file_systems": list(_lifecycle_configs.keys())}, + "backup_policies": {"count": len(_backup_policies), "file_systems": list(_backup_policies.keys())}, + } + + # --------------------------------------------------------------------------- # Reset # --------------------------------------------------------------------------- diff --git a/aws_infra/aws_infra/services/elasticache.py b/aws_infra/aws_infra/services/elasticache.py index 7d46f70247afded7015a6bd74ec2b5c3dee452be..83a45bf8ddb396cf88e52f6b0e582188741f3226 100644 --- a/aws_infra/aws_infra/services/elasticache.py +++ b/aws_infra/aws_infra/services/elasticache.py @@ -1266,6 +1266,32 @@ def _error(code, message, status): return status, {"Content-Type": "application/xml"}, body +SUPPORTED_ACTIONS = [ + "CreateCacheCluster", "DeleteCacheCluster", "DescribeCacheClusters", "ModifyCacheCluster", + "RebootCacheCluster", "CreateReplicationGroup", "DeleteReplicationGroup", + "DescribeReplicationGroups", "ModifyReplicationGroup", "IncreaseReplicaCount", + "DecreaseReplicaCount", "CreateCacheSubnetGroup", "DescribeCacheSubnetGroups", + "DeleteCacheSubnetGroup", "ModifyCacheSubnetGroup", "CreateCacheParameterGroup", + "DescribeCacheParameterGroups", "DeleteCacheParameterGroup", "DescribeCacheParameters", + "ModifyCacheParameterGroup", "ResetCacheParameterGroup", "CreateUser", "DescribeUsers", + "DeleteUser", "ModifyUser", "CreateUserGroup", "DescribeUserGroups", "DeleteUserGroup", + "ModifyUserGroup", "DescribeCacheEngineVersions", "ListTagsForResource", + "AddTagsToResource", "RemoveTagsFromResource", "CreateSnapshot", "DeleteSnapshot", + "DescribeSnapshots", "DescribeEvents", +] + + +def get_state() -> dict: + return { + "clusters": {"count": len(_clusters), "ids": list(_clusters.keys())}, + "replication_groups": {"count": len(_replication_groups), "ids": list(_replication_groups.keys())}, + "users": {"count": len(_users), "ids": list(_users.keys())}, + "subnet_groups": {"count": len(_subnet_groups), "ids": list(_subnet_groups.keys())}, + "parameter_groups": {"count": len(_param_groups), "ids": list(_param_groups.keys())}, + "snapshots": {"count": len(_snapshots), "ids": list(_snapshots.keys())}, + } + + def reset(): docker_client = _get_docker() if docker_client: diff --git a/aws_infra/aws_infra/services/emr.py b/aws_infra/aws_infra/services/emr.py index 670b8c925d5d6e162c3146eac864d9c832cf193d..e3599413e849b48d2be996cc9ea24a8984eb7430 100644 --- a/aws_infra/aws_infra/services/emr.py +++ b/aws_infra/aws_infra/services/emr.py @@ -568,6 +568,37 @@ async def handle_request(method, path, headers, body, query_params): return handler(data) +# --------------------------------------------------------------------------- +# Supported Actions +# --------------------------------------------------------------------------- + +SUPPORTED_ACTIONS = [ + "CreateCluster", "DescribeCluster", "ListClusters", "TerminateJobFlows", + "SetTerminationProtection", "AddJobFlowSteps", "DescribeStep", + "ListSteps", "ModifyInstanceGroups", + "GetBlockPublicAccessConfiguration", + "PutBlockPublicAccessConfiguration", "ListInstances", + "DescribeInstance", "ListBootstrapActions", "GetAutoScalingPolicy", + "PutAutoScalingPolicy", "RemoveAutoScalingPolicy", + "ListSecurityConfigurations", "CreateSecurityConfiguration", + "DeleteSecurityConfiguration", "DescribeSecurityConfiguration", + "ListStudios", "CreateStudio", "DeleteStudio", "DescribeStudio", + "ListStudioSessions", "CreateStudioSession", "DeleteStudioSession", + "GetStudioSessionMapping", "CreateStudioSessionMapping", + "UpdateStudioSessionMapping", "DeleteStudioSessionMapping", +] + + +# --------------------------------------------------------------------------- +# State +# --------------------------------------------------------------------------- + +def get_state() -> dict: + return { + "clusters": {"count": len(_clusters), "ids": list(_clusters.keys())}, + } + + # --------------------------------------------------------------------------- # Reset # --------------------------------------------------------------------------- diff --git a/aws_infra/aws_infra/services/eventbridge.py b/aws_infra/aws_infra/services/eventbridge.py index 659fb451b867b719cd6cc2abff2fda973db76dc0..2664ff5ec5e3fd2b3bad0f38d0ad6bb66607ba97 100644 --- a/aws_infra/aws_infra/services/eventbridge.py +++ b/aws_infra/aws_infra/services/eventbridge.py @@ -991,6 +991,29 @@ def _update_api_destination(data): }) +SUPPORTED_ACTIONS = [ + "CreateEventBus", "DeleteEventBus", "ListEventBuses", "DescribeEventBus", + "PutRule", "DeleteRule", "ListRules", "DescribeRule", "EnableRule", "DisableRule", + "PutTargets", "RemoveTargets", "ListTargetsByRule", "PutEvents", + "TagResource", "UntagResource", "ListTagsForResource", + "CreateArchive", "DeleteArchive", "DescribeArchive", "ListArchives", + "PutPermission", "RemovePermission", + "CreateConnection", "DescribeConnection", "DeleteConnection", "ListConnections", + "UpdateConnection", "CreateApiDestination", "DescribeApiDestination", + "DeleteApiDestination", "ListApiDestinations", "UpdateApiDestination", +] + + +def get_state() -> dict: + return { + "event_buses": {"count": len(_event_buses), "names": list(_event_buses.keys())}, + "rules": {"count": len(_rules), "names": list(_rules.keys())}, + "archives": {"count": len(_archives), "names": list(_archives.keys())}, + "connections": {"count": len(_connections), "names": list(_connections.keys())}, + "api_destinations": {"count": len(_api_destinations), "names": list(_api_destinations.keys())}, + } + + def reset(): global _event_buses _rules.clear() diff --git a/aws_infra/aws_infra/services/firehose.py b/aws_infra/aws_infra/services/firehose.py index 626f61af42622b6d569dc41b676d9c9ceca744eb..1cfe6203abb1a2325f0017a73e24f935925862d4 100644 --- a/aws_infra/aws_infra/services/firehose.py +++ b/aws_infra/aws_infra/services/firehose.py @@ -41,6 +41,20 @@ _lock = threading.Lock() _dest_counter = 0 +SUPPORTED_ACTIONS = [ + "CreateDeliveryStream", "DeleteDeliveryStream", "DescribeDeliveryStream", + "ListDeliveryStreams", "PutRecord", "PutRecordBatch", "UpdateDestination", + "StartDeliveryStreamEncryption", "StopDeliveryStreamEncryption", + "ListTagsForResource", "TagResource", "UntagResource", +] + + +def get_state() -> dict: + return { + "delivery_streams": {"count": len(_streams), "names": list(_streams.keys())}, + } + + def reset(): global _streams, _dest_counter with _lock: diff --git a/aws_infra/aws_infra/services/glue.py b/aws_infra/aws_infra/services/glue.py index 441995dd55c3326f1dca63c5ebad1b4770f42cd6..c231d4c9110ebc7ed0f08b6941855f626cdd903e 100644 --- a/aws_infra/aws_infra/services/glue.py +++ b/aws_infra/aws_infra/services/glue.py @@ -1074,6 +1074,36 @@ def _simple_glob_match(pattern, name): return fnmatch.fnmatch(name, pattern) +SUPPORTED_ACTIONS = [ + "CreateDatabase", "DeleteDatabase", "GetDatabase", "GetDatabases", "UpdateDatabase", + "CreateTable", "DeleteTable", "GetTable", "GetTables", "UpdateTable", "BatchDeleteTable", + "CreatePartition", "DeletePartition", "GetPartition", "GetPartitions", + "BatchCreatePartition", "BatchGetPartition", "CreatePartitionIndex", "GetPartitionIndexes", + "CreateConnection", "DeleteConnection", "GetConnection", "GetConnections", + "CreateCrawler", "DeleteCrawler", "GetCrawler", "GetCrawlers", "UpdateCrawler", + "StartCrawler", "StopCrawler", "GetCrawlerMetrics", "CreateJob", "DeleteJob", "GetJob", + "GetJobs", "UpdateJob", "StartJobRun", "GetJobRun", "GetJobRuns", "BatchStopJobRun", + "CreateSecurityConfiguration", "DeleteSecurityConfiguration", "GetSecurityConfiguration", + "GetSecurityConfigurations", "ListSecurityConfigurations", "CreateClassifier", + "DeleteClassifier", "GetClassifier", "GetClassifiers", "UpdateClassifier", + "CreateTrigger", "DeleteTrigger", "GetTrigger", "GetTriggers", "UpdateTrigger", + "StartTrigger", "StopTrigger", "CreateWorkflow", "DeleteWorkflow", "GetWorkflow", + "GetWorkflows", "UpdateWorkflow", "StartWorkflowRun", "GetWorkflowRun", + "GetWorkflowRuns", "GetWorkflowRunProperties", "TagResource", "UntagResource", + "ListTagsForResource", +] + + +def get_state() -> dict: + return { + "databases": {"count": len(_databases), "names": list(_databases.keys())}, + "crawlers": {"count": len(_crawlers), "names": list(_crawlers.keys())}, + "jobs": {"count": len(_jobs), "names": list(_jobs.keys())}, + "connections": {"count": len(_connections), "names": list(_connections.keys())}, + "workflows": {"count": len(_workflows), "names": list(_workflows.keys())}, + } + + def reset(): _databases.clear() _tables.clear() diff --git a/aws_infra/aws_infra/services/iam_sts.py b/aws_infra/aws_infra/services/iam_sts.py index a3627641a17aa80148ec7a6e77ac2aa6c793c401..92ea070646d5aa1d42defcdfcff488c1eedeb1b4 100644 --- a/aws_infra/aws_infra/services/iam_sts.py +++ b/aws_infra/aws_infra/services/iam_sts.py @@ -1532,6 +1532,43 @@ _IAM_HANDLERS = { } +SUPPORTED_ACTIONS = [ + "CreateUser", "GetUser", "ListUsers", "DeleteUser", + "CreateRole", "GetRole", "ListRoles", "DeleteRole", + "CreatePolicy", "GetPolicy", "GetPolicyVersion", "ListPolicyVersions", + "ListPolicies", "DeletePolicy", "CreatePolicyVersion", "DeletePolicyVersion", + "AttachRolePolicy", "DetachRolePolicy", "ListAttachedRolePolicies", + "PutRolePolicy", "GetRolePolicy", "DeleteRolePolicy", "ListRolePolicies", + "AttachUserPolicy", "DetachUserPolicy", "ListAttachedUserPolicies", + "PutUserPolicy", "GetUserPolicy", "DeleteUserPolicy", "ListUserPolicies", + "CreateAccessKey", "ListAccessKeys", "DeleteAccessKey", + "CreateInstanceProfile", "DeleteInstanceProfile", "GetInstanceProfile", + "AddRoleToInstanceProfile", "RemoveRoleFromInstanceProfile", + "ListInstanceProfiles", "ListInstanceProfilesForRole", + "UpdateAssumeRolePolicy", + "CreateGroup", "GetGroup", "DeleteGroup", "ListGroups", + "AddUserToGroup", "RemoveUserFromGroup", "ListGroupsForUser", + "CreateServiceLinkedRole", + "CreateOpenIDConnectProvider", "GetOpenIDConnectProvider", "DeleteOpenIDConnectProvider", + "TagRole", "UntagRole", "ListRoleTags", + "TagUser", "UntagUser", "ListUserTags", + "TagPolicy", "UntagPolicy", "ListPolicyTags", + "SimulatePrincipalPolicy", "SimulateCustomPolicy", + "GetCallerIdentity", "AssumeRole", "GetSessionToken", +] + + +def get_state() -> dict: + return { + "users": {"count": len(_users), "names": list(_users.keys())}, + "roles": {"count": len(_roles), "names": list(_roles.keys())}, + "policies": {"count": len(_policies), "names": list(_policies.keys())}, + "instance_profiles": {"count": len(_instance_profiles), "names": list(_instance_profiles.keys())}, + "groups": {"count": len(_groups), "names": list(_groups.keys())}, + "oidc_providers": {"count": len(_oidc_providers), "names": list(_oidc_providers.keys())}, + } + + def reset(): _users.clear() _roles.clear() diff --git a/aws_infra/aws_infra/services/kinesis.py b/aws_infra/aws_infra/services/kinesis.py index 42d09092e9ebf758ce76cb7bcc1e6363df2e3b2e..62a28d74107fe2deb17f85d13924c67ae21a5345 100644 --- a/aws_infra/aws_infra/services/kinesis.py +++ b/aws_infra/aws_infra/services/kinesis.py @@ -899,6 +899,25 @@ def _stream_desc(stream, shard_ids=None): } +SUPPORTED_ACTIONS = [ + "CreateStream", "DeleteStream", "DescribeStream", "DescribeStreamSummary", + "ListStreams", "PutRecord", "PutRecords", "GetShardIterator", "GetRecords", + "MergeShards", "SplitShard", "UpdateShardCount", "ListShards", + "IncreaseStreamRetentionPeriod", "DecreaseStreamRetentionPeriod", + "AddTagsToStream", "RemoveTagsFromStream", "ListTagsForStream", + "RegisterStreamConsumer", "DeregisterStreamConsumer", "ListStreamConsumers", + "DescribeStreamConsumer", "StartStreamEncryption", "StopStreamEncryption", + "EnableEnhancedMonitoring", "DisableEnhancedMonitoring", +] + + +def get_state() -> dict: + return { + "streams": {"count": len(_streams), "names": list(_streams.keys())}, + "consumers": {"count": len(_consumers), "names": list(_consumers.keys())}, + } + + def reset(): _streams.clear() _shard_iterators.clear() diff --git a/aws_infra/aws_infra/services/lambda_svc.py b/aws_infra/aws_infra/services/lambda_svc.py index a55db5fe635316d366761f5e81f5e314df807271..a2009b1e9266145847aff9f1b3f3b3afee634e56 100644 --- a/aws_infra/aws_infra/services/lambda_svc.py +++ b/aws_infra/aws_infra/services/lambda_svc.py @@ -2531,6 +2531,36 @@ def _list_function_url_configs(func_name: str, query_params: dict): return json_response({"FunctionUrlConfigs": configs}) +SUPPORTED_ACTIONS = [ + "CreateFunction", "DeleteFunction", "GetFunction", "GetFunctionConfiguration", + "ListFunctions", "Invoke", + "UpdateFunctionCode", "UpdateFunctionConfiguration", + "PublishVersion", "ListVersionsByFunction", + "CreateAlias", "GetAlias", "UpdateAlias", "DeleteAlias", "ListAliases", + "AddPermission", "RemovePermission", "GetPolicy", + "ListTags", "TagResource", "UntagResource", + "PublishLayerVersion", "GetLayerVersion", "GetLayerVersionByArn", + "ListLayerVersions", "DeleteLayerVersion", "ListLayers", + "AddLayerVersionPermission", "RemoveLayerVersionPermission", "GetLayerVersionPolicy", + "CreateEventSourceMapping", "DeleteEventSourceMapping", + "GetEventSourceMapping", "ListEventSourceMappings", "UpdateEventSourceMapping", + "GetFunctionEventInvokeConfig", "PutFunctionEventInvokeConfig", + "PutFunctionConcurrency", "GetFunctionConcurrency", "DeleteFunctionConcurrency", + "GetFunctionCodeSigningConfig", + "CreateFunctionUrlConfig", "GetFunctionUrlConfig", + "UpdateFunctionUrlConfig", "DeleteFunctionUrlConfig", "ListFunctionUrlConfigs", +] + + +def get_state() -> dict: + return { + "functions": {"count": len(_functions), "names": list(_functions.keys())}, + "layers": {"count": len(_layers), "names": list(_layers.keys())}, + "event_source_mappings": {"count": len(_esms), "ids": list(_esms.keys())}, + "function_urls": {"count": len(_function_urls), "keys": list(_function_urls.keys())}, + } + + def reset(): from aws_infra.core import lambda_runtime diff --git a/aws_infra/aws_infra/services/rds.py b/aws_infra/aws_infra/services/rds.py index 76e2d5b3f1e1565aed62ebbd79b9b77da630d421..0170b163ff5bafe1cf3f109559b2f8affcb8c8b4 100644 --- a/aws_infra/aws_infra/services/rds.py +++ b/aws_infra/aws_infra/services/rds.py @@ -1972,6 +1972,34 @@ _ACTION_MAP = { } +SUPPORTED_ACTIONS = [ + "CreateDBInstance", "DeleteDBInstance", "DescribeDBInstances", "ModifyDBInstance", + "StartDBInstance", "StopDBInstance", "RebootDBInstance", "CreateDBCluster", + "DeleteDBCluster", "DescribeDBClusters", "ModifyDBCluster", "StartDBCluster", + "StopDBCluster", "CreateDBSubnetGroup", "DeleteDBSubnetGroup", "DescribeDBSubnetGroups", + "ModifyDBSubnetGroup", "CreateDBParameterGroup", "DeleteDBParameterGroup", + "DescribeDBParameterGroups", "DescribeDBParameters", "ModifyDBParameterGroup", + "CreateDBClusterParameterGroup", "DescribeDBClusterParameterGroups", + "DeleteDBClusterParameterGroup", "DescribeDBClusterParameters", + "ModifyDBClusterParameterGroup", "CreateDBSnapshot", "DeleteDBSnapshot", + "DescribeDBSnapshots", "CreateDBClusterSnapshot", "DescribeDBClusterSnapshots", + "DeleteDBClusterSnapshot", "CreateOptionGroup", "DeleteOptionGroup", + "DescribeOptionGroups", "DescribeOptionGroupOptions", "CreateDBInstanceReadReplica", + "RestoreDBInstanceFromDBSnapshot", "ListTagsForResource", "AddTagsToResource", + "RemoveTagsFromResource", "DescribeDBEngineVersions", "DescribeOrderableDBInstanceOptions", +] + + +def get_state() -> dict: + return { + "instances": {"count": len(_instances), "ids": list(_instances.keys())}, + "clusters": {"count": len(_clusters), "ids": list(_clusters.keys())}, + "subnet_groups": {"count": len(_subnet_groups), "names": list(_subnet_groups.keys())}, + "snapshots": {"count": len(_snapshots), "ids": list(_snapshots.keys())}, + "db_cluster_snapshots": {"count": len(_db_cluster_snapshots), "ids": list(_db_cluster_snapshots.keys())}, + } + + def reset(): docker_client = _get_docker() if docker_client: diff --git a/aws_infra/aws_infra/services/route53.py b/aws_infra/aws_infra/services/route53.py index 6b39daed23928f2c59e31c8f25fb293d31e97776..1887bf23cc3700f486577338cb6e42b77e0e3dff 100644 --- a/aws_infra/aws_infra/services/route53.py +++ b/aws_infra/aws_infra/services/route53.py @@ -45,6 +45,28 @@ _hc_caller_refs: dict = {} # caller_reference -> hc_id _lock = threading.Lock() +SUPPORTED_ACTIONS = [ + "CreateHostedZone", "DeleteHostedZone", "ListHostedZones", "GetHostedZone", + "UpdateHostedZoneComment", "GetChange", "ListResourceRecordSets", + "ChangeResourceRecordSets", "GetHostedZoneCount", "GetDNSSEC", "CreateHealthCheck", + "DeleteHealthCheck", "GetHealthCheck", "ListHealthChecks", "UpdateHealthCheckComment", + "GetHealthCheckStatus", "GetHealthCheckCount", "ChangeTagsForResource", + "ListTagsForResource", "ListTagsForResources", "CreateQueryLoggingConfig", + "DeleteQueryLoggingConfig", "ListQueryLoggingConfigs", "GetQueryLoggingConfig", + "ListHostedZonesByName", "CreateReusableDelegationSet", "DeleteReusableDelegationSet", + "ListReusableDelegationSets", "GetReusableDelegationSet", +] + + +def get_state() -> dict: + return { + "hosted_zones": {"count": len(_zones), "ids": list(_zones.keys())}, + "health_checks": {"count": len(_health_checks), "ids": list(_health_checks.keys())}, + "tags": {"count": len(_tags), "resources": list(_tags.keys())}, + "record_sets": {"count": sum(len(recs) for recs in _records.values())}, + } + + def reset(): global _zones, _records, _changes, _health_checks, _tags, _caller_refs, _hc_caller_refs with _lock: diff --git a/aws_infra/aws_infra/services/s3.py b/aws_infra/aws_infra/services/s3.py index a6b6b0d43b9654d4ccd0632fac0840c86a51af68..b0207682bffcca852c698d2ab9c3530c51a3198c 100644 --- a/aws_infra/aws_infra/services/s3.py +++ b/aws_infra/aws_infra/services/s3.py @@ -2718,6 +2718,39 @@ def _load_persisted_data(): _load_persisted_data() +SUPPORTED_ACTIONS = [ + "CreateBucket", "DeleteBucket", "ListBuckets", "HeadBucket", + "PutObject", "GetObject", "DeleteObject", "HeadObject", "CopyObject", + "ListObjectsV1", "ListObjectsV2", "DeleteObjects", + "PutObjectTagging", "GetObjectTagging", "DeleteObjectTagging", + "ListObjectVersions", "PutBucketVersioning", "GetBucketVersioning", + "PutBucketPolicy", "GetBucketPolicy", "DeleteBucketPolicy", + "PutBucketNotificationConfiguration", "GetBucketNotificationConfiguration", + "PutBucketEncryption", "GetBucketEncryption", "DeleteBucketEncryption", + "PutBucketLifecycleConfiguration", "GetBucketLifecycleConfiguration", "DeleteBucketLifecycle", + "PutBucketCors", "GetBucketCors", "DeleteBucketCors", + "PutBucketAcl", "GetBucketAcl", + "PutBucketWebsite", "GetBucketWebsite", "DeleteBucketWebsite", + "PutBucketLogging", "GetBucketLogging", + "PutBucketAccelerateConfiguration", "GetBucketAccelerateConfiguration", + "PutBucketRequestPayment", "GetBucketRequestPayment", + "PutObjectLockConfiguration", "GetObjectLockConfiguration", + "PutObjectRetention", "GetObjectRetention", + "PutObjectLegalHold", "GetObjectLegalHold", + "PutBucketReplication", "GetBucketReplication", "DeleteBucketReplication", + "CreateMultipartUpload", "UploadPart", "CompleteMultipartUpload", + "AbortMultipartUpload", "ListMultipartUploads", + "GetBucketLocation", + "GetBucketTagging", "PutBucketTagging", "DeleteBucketTagging", +] + + +def get_state() -> dict: + return { + "buckets": {"count": len(_buckets), "names": list(_buckets.keys())}, + } + + def reset(): """Wipe all in-memory state (used by /_ministack/reset).""" global _buckets, _bucket_policies, _bucket_notifications, _bucket_tags diff --git a/aws_infra/aws_infra/services/secretsmanager.py b/aws_infra/aws_infra/services/secretsmanager.py index f65b12a9d7ea194038f7d5b5f684448600a400ba..5bd4791fa89bcf77f3f99c170b399b94becd90a4 100644 --- a/aws_infra/aws_infra/services/secretsmanager.py +++ b/aws_infra/aws_infra/services/secretsmanager.py @@ -708,6 +708,23 @@ def _validate_resource_policy(data): }) +SUPPORTED_ACTIONS = [ + "CreateSecret", "GetSecretValue", "ListSecrets", "DeleteSecret", + "RestoreSecret", "UpdateSecret", "DescribeSecret", "PutSecretValue", + "TagResource", "UntagResource", "ListSecretVersionIds", + "RotateSecret", "GetRandomPassword", "ReplicateSecretToRegions", + "PutResourcePolicy", "GetResourcePolicy", "DeleteResourcePolicy", + "ValidateResourcePolicy", +] + + +def get_state() -> dict: + return { + "secrets": {"count": len(_secrets), "names": list(_secrets.keys())}, + "resource_policies": {"count": len(_resource_policies), "arns": list(_resource_policies.keys())}, + } + + def reset(): _secrets.clear() _resource_policies.clear() diff --git a/aws_infra/aws_infra/services/ses.py b/aws_infra/aws_infra/services/ses.py index f6bf0e248d5ac50d9910f7f0461f1d0a166317d1..df239a478dbf59dbfb9944b53638e2afb072b11b 100644 --- a/aws_infra/aws_infra/services/ses.py +++ b/aws_infra/aws_infra/services/ses.py @@ -1002,6 +1002,28 @@ def _json_error(code, message, status): return _json_response(status, {"__type": code, "message": message}) +SUPPORTED_ACTIONS = [ + "SendEmail", "SendRawEmail", "SendTemplatedEmail", "SendBulkTemplatedEmail", + "VerifyEmailIdentity", "VerifyEmailAddress", "VerifyDomainIdentity", + "VerifyDomainDkim", "ListIdentities", "GetIdentityVerificationAttributes", + "DeleteIdentity", "GetSendQuota", "GetSendStatistics", + "ListVerifiedEmailAddresses", "CreateConfigurationSet", + "DeleteConfigurationSet", "DescribeConfigurationSet", "ListConfigurationSets", + "CreateTemplate", "GetTemplate", "DeleteTemplate", "ListTemplates", + "UpdateTemplate", "GetIdentityDkimAttributes", "SetIdentityNotificationTopic", + "SetIdentityFeedbackForwardingEnabled", +] + + +def get_state() -> dict: + return { + "identities": {"count": len(_identities), "names": list(_identities.keys())}, + "templates": {"count": len(_templates), "names": list(_templates.keys())}, + "configuration_sets": {"count": len(_configuration_sets), "names": list(_configuration_sets.keys())}, + "sent_emails": {"count": len(_sent_emails)}, + } + + def reset(): _identities.clear() _sent_emails.clear() diff --git a/aws_infra/aws_infra/services/ses_v2.py b/aws_infra/aws_infra/services/ses_v2.py index 3cd2cffd381eb0e1e0157bebe081b609bf8c1bf6..7ed9c7094f31b81a1b6c592cdbe68232464ab2a5 100644 --- a/aws_infra/aws_infra/services/ses_v2.py +++ b/aws_infra/aws_infra/services/ses_v2.py @@ -156,6 +156,23 @@ async def handle_request(method, path, headers, body, query_params): return _json_err("NotFoundException", f"Unknown SES v2 path: {method} {path}", 404) +SUPPORTED_ACTIONS = [ + "SendEmail", "CreateEmailIdentity", "GetEmailIdentity", "DeleteEmailIdentity", + "ListEmailIdentities", "CreateConfigurationSet", "GetConfigurationSet", + "DeleteConfigurationSet", "ListConfigurationSets", "GetAccount", + "ListSuppressedDestinations", "PutAccountSuppressionAttributes", + "TagResource", "UntagResource", "ListTagsForResource", +] + + +def get_state() -> dict: + return { + "identities": {"count": len(_identities), "names": list(_identities.keys())}, + "configuration_sets": {"count": len(_config_sets), "names": list(_config_sets.keys())}, + "tags": {"count": len(_ses_tags), "resources": list(_ses_tags.keys())}, + } + + def reset(): _identities.clear() _config_sets.clear() diff --git a/aws_infra/aws_infra/services/sns.py b/aws_infra/aws_infra/services/sns.py index 1b724a43b1a9099c7c6262ee2ac2ed4edf8e8f63..6879dac3425f0e3255cecd1b63aa4b24530ec049 100644 --- a/aws_infra/aws_infra/services/sns.py +++ b/aws_infra/aws_infra/services/sns.py @@ -950,6 +950,27 @@ def _build_envelope(topic_arn: str, msg_id: str, message: str, subject: str, return json.dumps({k: v for k, v in envelope.items() if v is not None}) +SUPPORTED_ACTIONS = [ + "CreateTopic", "DeleteTopic", "ListTopics", + "GetTopicAttributes", "SetTopicAttributes", + "Subscribe", "Unsubscribe", "ConfirmSubscription", + "ListSubscriptions", "ListSubscriptionsByTopic", + "GetSubscriptionAttributes", "SetSubscriptionAttributes", + "Publish", "PublishBatch", + "ListTagsForResource", "TagResource", "UntagResource", + "CreatePlatformApplication", "CreatePlatformEndpoint", +] + + +def get_state() -> dict: + return { + "topics": {"count": len(_topics), "names": list(_topics.keys())}, + "platform_applications": {"count": len(_platform_applications), "names": list(_platform_applications.keys())}, + "platform_endpoints": {"count": len(_platform_endpoints), "names": list(_platform_endpoints.keys())}, + "subscriptions": {"count": len(_sub_arn_to_topic), "sub_arn_to_topic": dict(_sub_arn_to_topic.items())}, + } + + def reset(): _topics.clear() _sub_arn_to_topic.clear() diff --git a/aws_infra/aws_infra/services/sqs.py b/aws_infra/aws_infra/services/sqs.py index 03012dcfe6d676f88a939b33d85cb6c47474bbc8..2319468ae90adfb4aab4299a80e0e4551999429b 100644 --- a/aws_infra/aws_infra/services/sqs.py +++ b/aws_infra/aws_infra/services/sqs.py @@ -1231,6 +1231,23 @@ def _url_from_path(path: str) -> str: return "" +SUPPORTED_ACTIONS = [ + "CreateQueue", "DeleteQueue", "ListQueues", "GetQueueUrl", + "GetQueueAttributes", "SetQueueAttributes", "PurgeQueue", + "SendMessage", "ReceiveMessage", "DeleteMessage", + "ChangeMessageVisibility", "ChangeMessageVisibilityBatch", + "SendMessageBatch", "DeleteMessageBatch", + "ListQueueTags", "TagQueue", "UntagQueue", +] + + +def get_state() -> dict: + return { + "queues": {"count": len(_queues), "names": list(_queues.keys())}, + "queue_name_to_url": dict(_queue_name_to_url), + } + + def reset(): _queues.clear() _queue_name_to_url.clear() diff --git a/aws_infra/aws_infra/services/ssm.py b/aws_infra/aws_infra/services/ssm.py index cf12bea06ee111fddca58b59435e79b3df642ccf..6a4ad6c05bace2b7ef95b98104aaafe6e9e191d9 100644 --- a/aws_infra/aws_infra/services/ssm.py +++ b/aws_infra/aws_infra/services/ssm.py @@ -488,6 +488,21 @@ def _param_out(param, with_decryption=False): return out +SUPPORTED_ACTIONS = [ + "PutParameter", "GetParameter", "GetParameters", "GetParametersByPath", + "DeleteParameter", "DeleteParameters", "DescribeParameters", + "GetParameterHistory", "LabelParameterVersion", "AddTagsToResource", + "RemoveTagsFromResource", "ListTagsForResource", +] + + +def get_state() -> dict: + return { + "parameters": {"count": len(_parameters), "names": list(_parameters.keys())}, + "tags": {"count": len(_tags), "arns": list(_tags.keys())}, + } + + def reset(): _parameters.clear() _parameter_history.clear() diff --git a/aws_infra/aws_infra/services/stepfunctions.py b/aws_infra/aws_infra/services/stepfunctions.py index 0a93d8e52ab1c94c464d713e17a9de1dc6b77f04..020c42773299d795cfd217250aa0298ea2ddd429 100644 --- a/aws_infra/aws_infra/services/stepfunctions.py +++ b/aws_infra/aws_infra/services/stepfunctions.py @@ -1786,6 +1786,25 @@ _SERVICE_DISPATCH = { } +SUPPORTED_ACTIONS = [ + "CreateStateMachine", "DeleteStateMachine", "DescribeStateMachine", "UpdateStateMachine", + "ListStateMachines", "StartExecution", "StartSyncExecution", "StopExecution", + "DescribeExecution", "DescribeStateMachineForExecution", "ListExecutions", + "GetExecutionHistory", "SendTaskSuccess", "SendTaskFailure", "SendTaskHeartbeat", + "CreateActivity", "DeleteActivity", "DescribeActivity", "ListActivities", + "GetActivityTask", "TagResource", "UntagResource", "ListTagsForResource", +] + + +def get_state() -> dict: + return { + "state_machines": {"count": len(_state_machines), "names": list(_state_machines.keys())}, + "executions": {"count": len(_executions), "arns": list(_executions.keys())}, + "activities": {"count": len(_activities), "names": list(_activities.keys())}, + "tags": {"count": len(_tags), "resources": list(_tags.keys())}, + } + + def reset(): _state_machines.clear() _executions.clear() diff --git a/aws_infra/aws_infra/services/waf.py b/aws_infra/aws_infra/services/waf.py index f4dd36aff6b58101a809198668ce16cb680655ad..fb8245551a17b82dca3a89e350f6de8dfe54e1fa 100644 --- a/aws_infra/aws_infra/services/waf.py +++ b/aws_infra/aws_infra/services/waf.py @@ -358,6 +358,27 @@ def _describe_managed_rule_group(data): }) +SUPPORTED_ACTIONS = [ + "CreateWebACL", "GetWebACL", "UpdateWebACL", "DeleteWebACL", "ListWebACLs", + "AssociateWebACL", "DisassociateWebACL", "GetWebACLForResource", + "ListResourcesForWebACL", "CreateIPSet", "GetIPSet", "UpdateIPSet", + "DeleteIPSet", "ListIPSets", "CreateRuleGroup", "GetRuleGroup", + "UpdateRuleGroup", "DeleteRuleGroup", "ListRuleGroups", + "TagResource", "UntagResource", "ListTagsForResource", + "CheckCapacity", "DescribeManagedRuleGroup", +] + + +def get_state() -> dict: + return { + "web_acls": {"count": len(_web_acls), "ids": list(_web_acls.keys())}, + "ip_sets": {"count": len(_ip_sets), "ids": list(_ip_sets.keys())}, + "rule_groups": {"count": len(_rule_groups), "ids": list(_rule_groups.keys())}, + "associations": {"count": len(_associations), "resources": list(_associations.keys())}, + "waf_tags": {"count": len(_waf_tags), "resources": list(_waf_tags.keys())}, + } + + def reset(): _web_acls.clear() _ip_sets.clear() diff --git a/client.py b/client.py index c4ef6bf3fc3fde6b7f464128e0ca6a6d8b58cd6e..c2e25e91b3f8439fdf8988cd7e5c88a85a758d44 100644 --- a/client.py +++ b/client.py @@ -10,12 +10,11 @@ from typing import Dict from openenv.core import EnvClient from openenv.core.client_types import StepResult -from openenv.core.env_server.types import State -from models import AwsRlAction, AwsRlObservation, EpisodeID, StepCount +from models import AwsRlAction, AwsRlObservation, EpisodeID, StepCount, AwsRlState -class AwsRlEnv(EnvClient[AwsRlAction, AwsRlObservation, State]): +class AwsRlEnv(EnvClient[AwsRlAction, AwsRlObservation, AwsRlState]): """ Client for the Aws Rl Env Environment. @@ -65,9 +64,19 @@ class AwsRlEnv(EnvClient[AwsRlAction, AwsRlObservation, State]): done=payload.get("done", False), ) - def _parse_state(self, payload: Dict) -> State: - """Parse server response into State object.""" - return State( + def _parse_state(self, payload: Dict) -> AwsRlState: + """Parse server response into AwsRlState object.""" + from models import TrackerState, Task + + tracker_data = payload.get("tracker", {}) + task_data = payload.get("current_task") + + return AwsRlState( episode_id=payload.get("episode_id"), step_count=payload.get("step_count", 0), + current_task=Task(**task_data) if task_data else None, + tracker=TrackerState(**tracker_data) if tracker_data else TrackerState(), + infra_state=payload.get("infra_state", {}), + chaos_occurred=payload.get("chaos_occurred", False), + current_tier=payload.get("current_tier", "warmup"), ) diff --git a/inference-complete.py b/inference-complete.py index 7ae7ccefb5b92c6b64bd22368891eb0c7ff128ed..b0eeb0c3cfce4f5b74331990403fa9078d5fcdb3 100644 --- a/inference-complete.py +++ b/inference-complete.py @@ -89,6 +89,8 @@ SYSTEM_PROMPT = textwrap.dedent( - Only send AWS CLI commands (e.g. 'aws s3 ls', 'aws dynamodb create-table ...') - One command per turn — no pipes, no shell syntax, no chaining - Reply with ONLY the command, nothing else — no explanations, no quotes + - If unsure, use 'aws help' to get unstuck, but try to be specific to the service if possible (e.g. 'aws s3 help') + - When ever you need a hint, use 'aws help --task-hint' to get a task-specific hint (you can use this multiple times for more hints, but hints reduce your reward) """ ).strip() @@ -165,10 +167,7 @@ def get_model_command( # --------------------------------------------------------------------------- -async def run_episode( - env: AwsRlEnv, - llm_client: OpenAI -) -> Optional[dict]: +async def run_episode(env: AwsRlEnv, llm_client: OpenAI) -> Optional[dict]: """Run a single episode: reset -> step loop -> return results.""" result = await env.reset() obs = result.observation @@ -182,9 +181,9 @@ async def run_episode( task_desc = task.description task_id = int(task.task_id) - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Episode {episode_num} -- Task {task_id}: {task_desc} (tier: {tier})") - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") history: List[str] = [] last_output = obs.command_output @@ -206,7 +205,6 @@ async def run_episode( last_reward, history, ) - result = await env.step(AwsRlAction(command=command)) obs = result.observation @@ -214,21 +212,22 @@ async def run_episode( reward = result.reward or 0.0 success = obs.command_success task_achieved = obs.task_achieved - done = result.done rewards.append(reward) print() - print(f"\n{'-'*60}") + print(f"\n{'-' * 60}") print( - f" [Step {step}] cmd=\"{command}\" command_output={obs.command_output!r} " + f' [Step {step}] cmd="{command}" command_output={obs.command_output!r} ' f"reward={reward:.2f} command_success={success} achieved={task_achieved}" ) - print(f"\n{'-'*60}") + print(f"\n{'-' * 60}") print() status = "OK" if success else "FAIL" - history.append(f"Step {step} [{status}]: {command} [command_output]={obs.command_output!r} [error]={obs.error!r} -> reward={reward:.2f}") + history.append( + f"Step {step} [{status}]: {command} [command_output]={obs.command_output!r} [error]={obs.error!r} -> reward={reward:.2f}" + ) last_output = obs.command_output last_error = obs.error last_reward = reward @@ -299,9 +298,9 @@ def print_summary(tier_results: dict[str, list]) -> None: total_passed = 0 total_tasks = 0 - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print("FINAL RESULTS") - print(f"{'='*60}") + print(f"{'=' * 60}") for tier in ALL_TIERS: results = tier_results.get(tier, []) diff --git a/inference.py b/inference.py index 847005b593d422e67a45ab721ad15c11f92d65eb..63e4daf5a51e7ecd60b631deb317e5c07d7f9370 100644 --- a/inference.py +++ b/inference.py @@ -54,6 +54,10 @@ load_dotenv() # Load variables from .env file if present API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") +if not API_BASE_URL: + API_BASE_URL = "https://router.huggingface.co/v1" +if not MODEL_NAME: + MODEL_NAME = "Qwen/Qwen2.5-72B-Instruct" HF_TOKEN = os.getenv("HF_TOKEN") API_KEY = os.getenv("API_KEY") # Optional if using HF_TOKEN @@ -77,6 +81,8 @@ SYSTEM_PROMPT = textwrap.dedent( - Only send AWS CLI commands (e.g. 'aws s3 ls', 'aws dynamodb create-table ...') - One command per turn — no pipes, no shell syntax, no chaining - Reply with ONLY the command, nothing else — no explanations, no quotes + - If unsure, use 'aws help' to get unstuck, but try to be specific to the service if possible (e.g. 'aws s3 help') + - When ever you need a hint, use 'aws help --task-hint' to get a task-specific hint (you can use this multiple times for more hints, but hints reduce your reward) """ ).strip() diff --git a/models.py b/models.py index 7c6b3360eb69a2d98b3648cb3b6c39d3ebb80d12..24ca2d8078534afb1afba07d9e7f424063e76b2c 100644 --- a/models.py +++ b/models.py @@ -3,9 +3,9 @@ Data models for the Aws Rl Env Environment. """ from enum import Enum -from typing import NewType, Union +from typing import NewType -from openenv.core.env_server.types import Action, Observation +from openenv.core.env_server.types import Action, Observation, State from pydantic import BaseModel, Field # --------------------------------------------------------------------------- @@ -18,6 +18,7 @@ StepCount = NewType("StepCount", int) class AwsService(str, Enum): + # Core services S3 = "s3" EC2 = "ec2" DYNAMODB = "dynamodb" @@ -26,6 +27,31 @@ class AwsService(str, Enum): SNS = "sns" IAM = "iam" APIGATEWAY = "apigateway" + SECRETSMANAGER = "secretsmanager" + # Compute & containers + ECS = "ecs" + # Data & analytics + RDS = "rds" + ELASTICACHE = "elasticache" + ATHENA = "athena" + GLUE = "glue" + FIREHOSE = "firehose" + EMR = "emr" + # Networking & routing + APIGATEWAYV2 = "apigatewayv2" + ROUTE53 = "route53" + ELBV2 = "elbv2" + # Storage + EBS = "ebs" + EFS = "efs" + # Identity & config + COGNITO = "cognito-idp" + SSM = "ssm" + EVENTBRIDGE = "events" + # Monitoring + CLOUDWATCH = "cloudwatch" + # Infrastructure as code + CLOUDFORMATION = "cloudformation" # --------------------------------------------------------------------------- @@ -62,6 +88,12 @@ class TierConfig(BaseModel): le=1.0, description="Success rate for early promotion after 3 episodes", ) + chaos_probability: float = Field( + default=0.0, + ge=0.0, + le=1.0, + description="Probability of chaos injection per step", + ) class SpacedRepState(BaseModel): @@ -169,6 +201,82 @@ class Task(BaseModel): default_factory=list, description="Commands to run during reset to set up initial state (e.g. for SRE tasks)", ) + desired_state_spec: str | None = Field( + default=None, + description="Natural-language specification of the desired end state (shown to agent for drift tasks)", + ) + possible_drifts: list[SetupCommand] = Field( + default_factory=list, + description="Pool of mutations the DriftEngine may randomly apply after setup", + ) + + +class TaskInfo(BaseModel): + """Agent-visible subset of Task — masks success_criteria, setup_commands, and possible_drifts.""" + + task_id: TaskID = Field(..., ge=0, description="Unique task identifier") + difficulty: TaskDifficulty = Field( + default=TaskDifficulty.WARMUP, description="Task difficulty level" + ) + description: str = Field(..., description="Human-readable task description") + desired_state_spec: str | None = Field( + default=None, + description="Natural-language specification of the desired end state (shown to agent for drift tasks)", + ) + + @classmethod + def from_task(cls, task: Task) -> "TaskInfo": + """Create a masked TaskInfo from a full Task.""" + return cls( + task_id=task.task_id, + difficulty=task.difficulty, + description=task.description, + desired_state_spec=task.desired_state_spec, + ) + + +# --------------------------------------------------------------------------- +# Environment State +# --------------------------------------------------------------------------- + + +class TrackerState(BaseModel): + """Serializable snapshot of the EpisodeTracker.""" + + step_count: int = Field(default=0, ge=0, description="Steps taken this episode") + hints_used: int = Field(default=0, ge=0, description="Hints requested this episode") + progress: float = Field( + default=0.0, ge=0.0, le=1.0, description="Current partial progress" + ) + commands_executed: list[str] = Field( + default_factory=list, description="Commands executed this episode" + ) + credited_operations: list[str] = Field( + default_factory=list, + description="(operation, resource) pairs that earned credit", + ) + + +class AwsRlState(State): + """Full environment state including task, tracker, and infrastructure.""" + + current_task: Task | None = Field( + default=None, description="The task assigned for this episode" + ) + tracker: TrackerState = Field( + default_factory=TrackerState, + description="Episode tracker snapshot", + ) + infra_state: dict = Field( + default_factory=dict, + description="AWS infrastructure state keyed by service name", + ) + chaos_occurred: bool = Field( + default=False, description="Whether chaos was injected this episode" + ) + current_tier: str = Field( + default="warmup", description="Agent's current difficulty tier" + ) # --------------------------------------------------------------------------- @@ -199,13 +307,21 @@ class AwsRlObservation(Observation): default="", description="Stdout from the executed AWS CLI command" ) error: str = Field(default="", description="Stderr if the command failed") - resources: dict[AwsService, Union[dict, list, str]] = Field( - default_factory=dict, - description="Current resource state from MiniStack, keyed by service name", - ) - task: Task | None = Field( - default=None, description="The task the agent is trying to accomplish" + task: TaskInfo | None = Field( + default=None, description="The task the agent is trying to accomplish (masked)" ) task_achieved: bool = Field( default=False, description="Whether the task has been achieved" ) + partial_progress: float = Field( + default=0.0, + ge=0.0, + le=1.0, + description="Current task progress (0.0 to 1.0)", + ) + hints_used: int = Field( + default=0, ge=0, description="Number of hints requested this episode" + ) + hint_text: str = Field( + default="", description="Text of the most recently requested hint" + ) diff --git a/pyproject.toml b/pyproject.toml index 0f06b8eb02f9b3f065bce9de8509430a3d5901c3..ad8794739e53fa86350ce16dc1487be28238c9ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,11 @@ include-package-data = true packages = ["aws_rl_env", "aws_rl_env.server"] package-dir = { "aws_rl_env" = ".", "aws_rl_env.server" = "server" } +[tool.pytest.ini_options] +addopts = "--import-mode=importlib" +testpaths = ["tests"] +pythonpath = ["."] + [tool.ruff] exclude = ["aws_infra/"] diff --git a/server/app.py b/server/app.py index ef0619cd5ee1ca272ad3da9187a6d759581bf935..95eaf1a81b4842047b492de2b01f933649464612 100644 --- a/server/app.py +++ b/server/app.py @@ -83,6 +83,29 @@ async def web_reset(): } +@app.get("/web/solution", include_in_schema=False) +async def web_solution(): + """Return the next solution command for the current task step.""" + if not _env._current_task: + return {"command": None, "error": "No active task. Start a new episode first."} + + from server.services.task_solutions import get_next_solution + + result = get_next_solution( + task_id=_env._current_task.task_id, + backend=_env._backend, + tracker=_env._tracker, + ) + result["task_id"] = _env._current_task.task_id + return result + + +@app.get("/web/state", include_in_schema=False) +async def web_state(): + """Return the full AwsRlState for the web UI.""" + return _env.state.model_dump() + + @app.post("/web/step", include_in_schema=False) async def web_step(request: WebStepRequest = Body(...)): action = AwsRlAction(**request.action) diff --git a/server/aws_rl_env_environment.py b/server/aws_rl_env_environment.py index 4b1cf0e7c588e30dd8b49e0635c856904302c0e7..673084878640df0086d0b744e16d561c19eddb92 100644 --- a/server/aws_rl_env_environment.py +++ b/server/aws_rl_env_environment.py @@ -18,31 +18,59 @@ from typing import Any, Optional from uuid import uuid4 from openenv.core.env_server.interfaces import Environment -from openenv.core.env_server.types import State -from models import AwsRlAction, AwsRlObservation, EpisodeID, StepCount, Task +from models import ( + AwsRlAction, + AwsRlObservation, + AwsRlState, + EpisodeID, + StepCount, + Task, + TaskInfo, + TrackerState, +) from server.services.aws_backend import AwsBackend +from server.services.chaos_engine import ChaosEngine from server.services.curriculum import Curriculum from server.services.environment_designer import EnvironmentDesigner from server.services.episode_tracker import EpisodeTracker +from server.services.hint_provider import HintProvider, MAX_HINT_LEVEL from server.services.task_grader import TaskGrader logger = logging.getLogger(__name__) -class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]): +class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, AwsRlState]): SUPPORTS_CONCURRENT_SESSIONS: bool = True def __init__(self) -> None: print("Initializing AWS RL Environment...") - self._state = State(episode_id=str(uuid4()), step_count=0) + self._state = AwsRlState(episode_id=str(uuid4()), step_count=0) self._backend = AwsBackend() self._curriculum = Curriculum() self._grader = TaskGrader(self._backend) self._designer = EnvironmentDesigner(self._backend) self._tracker = EpisodeTracker() + self._chaos_engine = ChaosEngine(self._backend) + self._hint_provider = HintProvider() self._current_task: Task | None = None + def _sync_state(self) -> None: + """Sync internal state to the AwsRlState object.""" + self._state.current_task = self._current_task + self._state.tracker = TrackerState( + step_count=self._tracker.step_count, + hints_used=self._tracker.hints_used, + progress=self._tracker.previous_progress, + commands_executed=[s.command for s in self._tracker.command_history], + credited_operations=[ + f"{op}:{res}" for op, res in self._tracker._credited_operations + ], + ) + self._state.chaos_occurred = self._chaos_engine.chaos_occurred + self._state.current_tier = self._curriculum.current_difficulty.value + self._state.infra_state = self._backend.get_infra_state() + def reset( self, seed: Optional[int] = None, @@ -50,33 +78,29 @@ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]): **kwargs: Any, ) -> AwsRlObservation: self._backend.reset_environment() - self._state = State(episode_id=episode_id or str(uuid4()), step_count=0) + self._state = AwsRlState(episode_id=episode_id or str(uuid4()), step_count=0) self._tracker.reset() + self._chaos_engine.reset() self._current_task = self._curriculum.next_task() self._designer.apply(self._current_task) + self._sync_state() return AwsRlObservation( episode_id=EpisodeID(self._state.episode_id or ""), step_count=StepCount(self._state.step_count), command_success=True, command_output="Environment reset. Infra state wiped.", - task=self._current_task, + task=TaskInfo.from_task(self._current_task) if self._current_task else None, done=False, reward=0.0, ) - def step( - self, - action: AwsRlAction, - timeout_s: Optional[float] = None, - **kwargs: Any, - ) -> AwsRlObservation: - assert self._current_task is not None, "Call reset() before step()" - self._state.step_count += 1 + def _intercept_command(self, command: str) -> AwsRlObservation | None: + """Handle anti-hack validation, hint requests, and help commands. - # Anti-hack: only allow AWS CLI commands - command = action.command.strip() + Returns an observation if the command was intercepted, None otherwise. + """ if not command.startswith("aws "): return AwsRlObservation( episode_id=EpisodeID(self._state.episode_id or ""), @@ -84,22 +108,86 @@ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]): command_success=False, command_output="", error="Only AWS CLI commands (starting with 'aws') are allowed.", - task=self._current_task, + task=TaskInfo.from_task(self._current_task) + if self._current_task + else None, task_achieved=False, done=False, reward=0.0, ) + if command == "aws help --task-hint": + hint_level = self._tracker.record_hint() + clamped_level = min(hint_level, MAX_HINT_LEVEL) + assert self._current_task is not None + hint_text = self._hint_provider.get_hint(self._current_task, clamped_level) + return AwsRlObservation( + episode_id=EpisodeID(self._state.episode_id or ""), + step_count=StepCount(self._state.step_count), + command_success=True, + command_output=hint_text, + task=TaskInfo.from_task(self._current_task) + if self._current_task + else None, + task_achieved=False, + done=False, + reward=0.0, + hints_used=self._tracker.hints_used, + hint_text=hint_text, + ) + + parts = command.split() + if len(parts) == 3 and parts[0] == "aws": + service_name = None + if parts[2] == "help": + service_name = parts[1] + elif parts[1] == "help": + service_name = parts[2] + + if service_name is not None: + svc_success, help_text = self._backend.get_service_help(service_name) + return AwsRlObservation( + episode_id=EpisodeID(self._state.episode_id or ""), + step_count=StepCount(self._state.step_count), + command_success=svc_success, + command_output=help_text if svc_success else "", + error="" if svc_success else help_text, + task=TaskInfo.from_task(self._current_task) + if self._current_task + else None, + task_achieved=False, + done=False, + reward=0.0, + ) + + return None + + def step( + self, + action: AwsRlAction, + timeout_s: Optional[float] = None, + **kwargs: Any, + ) -> AwsRlObservation: + assert self._current_task is not None, "Call reset() before step()" + self._state.step_count += 1 + + command = action.command.strip() + intercepted = self._intercept_command(command) + if intercepted is not None: + return intercepted + success, stdout, stderr = self._backend.execute_command(command) # Record in tracker latest_step = self._tracker.record_step(command, success, stdout, stderr) - # Grade the task - task_achieved = False - + # Grade the task (pass cumulative chaos flag and hint count) grade_result = self._grader.grade( - self._current_task, self._tracker, latest_step + self._current_task, + self._tracker, + latest_step, + chaos_occurred=self._chaos_engine.chaos_occurred, + hints_used=self._tracker.hints_used, ) task_achieved = grade_result.task_achieved reward = grade_result.reward @@ -109,18 +197,29 @@ class AwsRlEnvironment(Environment[AwsRlAction, AwsRlObservation, State]): self._current_task, achieved=True, reward=reward ) + # Inject chaos AFTER grading — disrupts state for future steps + self._chaos_engine.maybe_inject( + self._current_task, + self._tracker, + self._curriculum.chaos_probability, + ) + + self._sync_state() + return AwsRlObservation( episode_id=EpisodeID(self._state.episode_id or ""), step_count=StepCount(self._state.step_count), command_success=success, command_output=stdout, error=stderr, - task=self._current_task, + task=TaskInfo.from_task(self._current_task) if self._current_task else None, task_achieved=task_achieved, + partial_progress=self._tracker.previous_progress, done=task_achieved, reward=reward, + hints_used=self._tracker.hints_used, ) @property - def state(self) -> State: + def state(self) -> AwsRlState: return self._state diff --git a/server/services/aws_backend.py b/server/services/aws_backend.py index f05e6cf9f51ca28cea5734b300a690ea79b098b2..09b2d82d51ef9d726616d31e7b69bcf107730952 100644 --- a/server/services/aws_backend.py +++ b/server/services/aws_backend.py @@ -2,6 +2,7 @@ import logging import os +import shlex import subprocess import httpx @@ -27,6 +28,61 @@ class AwsBackend: logger.warning("Failed to reset MiniStack state: %s", e) raise + def get_infra_state(self) -> dict: + """Fetch current infrastructure state from MiniStack via GET /_ministack/state.""" + try: + resp = httpx.get(f"{self._aws_infra_url}/_ministack/state", timeout=10) + resp.raise_for_status() + return resp.json() + except httpx.HTTPError as e: + logger.warning("Failed to fetch MiniStack state: %s", e) + return {} + + def get_service_help(self, service_name: str) -> tuple[bool, str]: + """Fetch service info from MiniStack via GET /_ministack/handlers/. + + Returns: + Tuple of (success, formatted_help_text) + """ + try: + resp = httpx.get( + f"{self._aws_infra_url}/_ministack/handlers/{service_name}", + timeout=10, + ) + resp.raise_for_status() + data = resp.json() + lines = [ + f"SERVICE: {data['service']}", + "", + "DESCRIPTION", + data.get("description", "No description available."), + "", + f"AVAILABLE ACTIONS ({data['action_count']}):", + "", + ] + for action in data.get("supported_actions", []): + lines.append(f" - {action}") + state = data.get("state", {}) + if state: + lines.append("") + lines.append("CURRENT STATE:") + for resource, info in state.items(): + count = info.get("count", 0) + names = info.get("names", info.get("ids", info.get("arns", []))) + lines.append(f" {resource}: {count}") + if names: + for n in names[:20]: + lines.append(f" - {n}") + if len(names) > 20: + lines.append(f" ... and {len(names) - 20} more") + return True, "\n".join(lines) + except httpx.HTTPStatusError as e: + if e.response.status_code == 404: + return False, f"Unknown service: {service_name}" + return False, f"Failed to fetch service help: {e}" + except httpx.HTTPError as e: + return False, f"Failed to fetch service help: {e}" + def execute_command(self, command: str) -> tuple[bool, str, str]: """Execute an AWS CLI command against MiniStack. @@ -46,7 +102,7 @@ class AwsBackend: try: result = subprocess.run( - command.split(), + shlex.split(command), capture_output=True, text=True, timeout=30, diff --git a/server/services/chaos_engine.py b/server/services/chaos_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..b16574f0ff1e1ac375d59a93f44d150708fead7f --- /dev/null +++ b/server/services/chaos_engine.py @@ -0,0 +1,168 @@ +""" +Chaos Injection Engine. + +Silently mutates AWS state mid-episode to test agent resilience and +situational awareness. Perturbations are scoped to services the current +task uses and are selected from a per-service catalog of destructive +AWS CLI commands. +""" + +import logging +import os +import random +import re + +from models import AwsService, Task +from server.services.aws_backend import AwsBackend +from server.services.episode_tracker import EpisodeTracker + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Resource-name extraction patterns (from successful AWS CLI commands) +# --------------------------------------------------------------------------- + +_RESOURCE_PATTERNS: dict[AwsService, list[re.Pattern[str]]] = { + AwsService.S3: [ + re.compile(r"aws\s+s3\s+mb\s+s3://([^\s]+)"), + re.compile(r"aws\s+s3api\s+create-bucket\s+--bucket\s+([^\s]+)"), + ], + AwsService.DYNAMODB: [ + re.compile(r"aws\s+dynamodb\s+create-table\s+.*--table-name\s+([^\s]+)"), + ], + AwsService.LAMBDA: [ + re.compile(r"aws\s+lambda\s+create-function\s+.*--function-name\s+([^\s]+)"), + ], + AwsService.SQS: [ + re.compile(r"aws\s+sqs\s+create-queue\s+.*--queue-name\s+([^\s]+)"), + ], + AwsService.IAM: [ + re.compile( + r"aws\s+iam\s+attach-role-policy\s+.*--role-name\s+([^\s]+)" + r"\s+.*--policy-arn\s+([^\s]+)" + ), + re.compile( + r"aws\s+iam\s+attach-role-policy\s+.*--policy-arn\s+([^\s]+)" + r"\s+.*--role-name\s+([^\s]+)" + ), + ], +} + +# --------------------------------------------------------------------------- +# Perturbation templates per service +# --------------------------------------------------------------------------- + +_PERTURBATION_TEMPLATES: dict[AwsService, list[str]] = { + AwsService.S3: [ + "aws s3 rb s3://{name} --force", + ], + AwsService.DYNAMODB: [ + "aws dynamodb delete-table --table-name {name}", + ], + AwsService.LAMBDA: [ + "aws lambda delete-function --function-name {name}", + ], + AwsService.SQS: [ + "aws sqs delete-queue --queue-url {name}", + ], + AwsService.IAM: [ + "aws iam detach-role-policy --role-name {name} --policy-arn {arn}", + ], +} + + +class ChaosEngine: + """Silently mutates AWS state mid-episode to test agent resilience.""" + + def __init__(self, backend: AwsBackend) -> None: + self._backend = backend + self._enabled = os.environ.get("ENABLE_CHAOS", "true").lower() == "true" + self._chaos_occurred = False + + def reset(self) -> None: + """Reset per-episode chaos state.""" + self._chaos_occurred = False + + @property + def chaos_occurred(self) -> bool: + """Whether chaos was injected at any point during this episode.""" + return self._chaos_occurred + + def maybe_inject( + self, + task: Task, + tracker: EpisodeTracker, + probability: float, + ) -> bool: + """Roll dice and, if triggered, execute a task-relevant perturbation. + + Returns True if a perturbation was actually executed. + """ + if not self._enabled or probability <= 0.0: + return False + + if random.random() >= probability: + return False + + perturbation = self._select_perturbation(task, tracker) + if perturbation is None: + return False + + logger.info("Chaos injection: %s", perturbation) + self._backend.execute_command(perturbation) + self._chaos_occurred = True + return True + + # -- Private helpers ------------------------------------------------------ + + def _select_perturbation( + self, + task: Task, + tracker: EpisodeTracker, + ) -> str | None: + """Pick a concrete perturbation command scoped to services the task uses.""" + task_services = set(task.success_criteria.services) + if not task_services: + return None + + # Collect all candidate (service, rendered_command) pairs + candidates: list[str] = [] + + for step in tracker.command_history: + if not step.success: + continue + for service in task_services: + for pattern in _RESOURCE_PATTERNS.get(service, []): + match = pattern.search(step.command) + if not match: + continue + templates = _PERTURBATION_TEMPLATES.get(service, []) + for template in templates: + rendered = self._render_template(template, match, service) + if rendered: + candidates.append(rendered) + + if not candidates: + return None + + return random.choice(candidates) + + @staticmethod + def _render_template( + template: str, + match: re.Match[str], + service: AwsService, + ) -> str | None: + """Fill a perturbation template from regex match groups.""" + groups = match.groups() + if not groups: + return None + + if service == AwsService.IAM and len(groups) >= 2: + # IAM patterns capture (role_name, policy_arn) or vice-versa + # The first pattern has role first, second has arn first + if "role-name" in template and "policy-arn" in template: + return template.format(name=groups[0], arn=groups[1]) + return None + + return template.format(name=groups[0]) diff --git a/server/services/curriculum.py b/server/services/curriculum.py index 4de9bbe7c51305d44b81b4c79e7094dc40716d05..ada05d32a433f269c889423dc7d58bde270a416c 100644 --- a/server/services/curriculum.py +++ b/server/services/curriculum.py @@ -17,6 +17,7 @@ import logging import random from collections import defaultdict from pathlib import Path +from typing import Any import yaml @@ -59,6 +60,7 @@ TIER_CONFIGS: dict[TaskDifficulty, TierConfig] = { mastery_window=10, mastery_threshold=0.7, fast_track_rate=0.9, + chaos_probability=0.1, ), TaskDifficulty.ADVANCED: TierConfig( min_episodes=10, @@ -66,6 +68,7 @@ TIER_CONFIGS: dict[TaskDifficulty, TierConfig] = { mastery_window=10, mastery_threshold=0.7, fast_track_rate=0.9, + chaos_probability=0.2, ), TaskDifficulty.EXPERT: TierConfig( min_episodes=0, @@ -73,6 +76,7 @@ TIER_CONFIGS: dict[TaskDifficulty, TierConfig] = { mastery_window=10, mastery_threshold=0.7, fast_track_rate=1.0, + chaos_probability=0.3, ), } @@ -85,6 +89,11 @@ _TIER_FILES: dict[TaskDifficulty, str] = { TaskDifficulty.EXPERT: "expert.yaml", } +# Supplementary task files merged into an existing tier +_SUPPLEMENTARY_FILES: dict[TaskDifficulty, list[str]] = { + TaskDifficulty.EXPERT: ["drift.yaml"], +} + # --------------------------------------------------------------------------- # Priority score tuning constants # --------------------------------------------------------------------------- @@ -109,8 +118,34 @@ _FAST_TRACK_MIN_EPISODES = 3 # --------------------------------------------------------------------------- +def _parse_task_entries( + entries: list[dict[str, Any]], difficulty: TaskDifficulty +) -> list[Task]: + """Convert raw YAML entries into Task models.""" + return [ + Task( + task_id=TaskID(entry["task_id"]), + difficulty=difficulty, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) + if isinstance(cmd, str) + else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + desired_state_spec=entry.get("desired_state_spec"), + possible_drifts=[ + SetupCommand(command=d) if isinstance(d, str) else SetupCommand(**d) + for d in entry.get("possible_drifts", []) + ], + ) + for entry in entries + ] + + def load_tier(difficulty: TaskDifficulty, tasks_dir: Path = TASKS_DIR) -> list[Task]: - """Load tasks for a single difficulty tier from its YAML file.""" + """Load tasks for a single difficulty tier from its YAML file(s).""" filename = _TIER_FILES.get(difficulty) if filename is None: logger.warning("No file mapping for difficulty: %s", difficulty.value) @@ -124,24 +159,25 @@ def load_tier(difficulty: TaskDifficulty, tasks_dir: Path = TASKS_DIR) -> list[T with open(filepath) as f: entries = yaml.safe_load(f) or [] - tasks = [ - Task( - task_id=TaskID(entry["task_id"]), - difficulty=difficulty, - description=entry["description"], - success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), - setup_commands=[ - SetupCommand(command=cmd) - if isinstance(cmd, str) - else SetupCommand(**cmd) - for cmd in entry.get("setup_commands", []) - ], + tasks = _parse_task_entries(entries, difficulty) + + # Load supplementary task files for this tier + for extra_file in _SUPPLEMENTARY_FILES.get(difficulty, []): + extra_path = tasks_dir / extra_file + if not extra_path.exists(): + continue + with open(extra_path) as f: + extra_entries = yaml.safe_load(f) or [] + extra_tasks = _parse_task_entries(extra_entries, difficulty) + tasks.extend(extra_tasks) + logger.info( + "Loaded %d supplementary %s tasks from %s", + len(extra_tasks), + difficulty.value, + extra_file, ) - for entry in entries - ] - logger.info( - "Loaded %d %s tasks from %s", len(tasks), difficulty.value, filepath.name - ) + + logger.info("Loaded %d %s tasks total", len(tasks), difficulty.value) return tasks @@ -237,6 +273,10 @@ class Curriculum: def is_warmup(self) -> bool: return self.current_difficulty == TaskDifficulty.WARMUP + @property + def chaos_probability(self) -> float: + return self.tier_config.chaos_probability + # -- Public API ----------------------------------------------------------- def next_task(self) -> Task: diff --git a/server/services/drift_engine.py b/server/services/drift_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..08b9d54e0b5f7e6ce89fcab2625e21a350866b81 --- /dev/null +++ b/server/services/drift_engine.py @@ -0,0 +1,67 @@ +""" +Configuration Drift Engine. + +Randomly applies a subset of a task's possible mutations after the correct +state has been provisioned. This forces the agent to audit and discover +which resources drifted rather than memorising a fixed solution path. +""" + +from __future__ import annotations + +import logging +import random + +from models import Task +from server.services.aws_backend import AwsBackend + +logger = logging.getLogger(__name__) + +# Default range for how many drifts to apply (inclusive). +_MIN_DRIFTS = 2 +_MAX_DRIFTS = 3 + + +class DriftEngine: + """Selects and applies random configuration drifts for a task.""" + + def __init__(self, backend: AwsBackend) -> None: + self._backend = backend + + def apply_drift(self, task: Task) -> list[str]: + """Randomly select and execute K of N possible drifts. + + Args: + task: A task whose ``possible_drifts`` list defines the + candidate mutations. + + Returns: + Human-readable descriptions of the drifts that were applied + (empty list if none). + """ + if not task.possible_drifts: + return [] + + pool = task.possible_drifts + k = self._pick_count(len(pool)) + selected = random.sample(pool, k) + + applied: list[str] = [] + for drift in selected: + success, _stdout, stderr = self._backend.execute_command(drift.command) + label = drift.description or drift.command + if success: + logger.info("Drift applied: %s", label) + applied.append(label) + else: + logger.warning("Drift command failed: %s — %s", drift.command, stderr) + + return applied + + @staticmethod + def _pick_count(pool_size: int) -> int: + """Determine how many drifts to apply given the pool size.""" + if pool_size <= 1: + return pool_size + lo = min(_MIN_DRIFTS, pool_size) + hi = min(_MAX_DRIFTS, pool_size) + return random.randint(lo, hi) diff --git a/server/services/environment_designer.py b/server/services/environment_designer.py index 9259f8b072144d6759610b7d3af4375b3b1ba190..59fa176dfae622021e09db30ebca3727d8448c96 100644 --- a/server/services/environment_designer.py +++ b/server/services/environment_designer.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, Field from models import SetupCommand, Task from server.services.aws_backend import AwsBackend +from server.services.drift_engine import DriftEngine logger = logging.getLogger(__name__) @@ -47,6 +48,7 @@ class EnvironmentDesigner: def __init__(self, backend: AwsBackend) -> None: self._backend = backend + self._drift_engine = DriftEngine(backend) def apply(self, task: Task) -> ProvisionResult: """Apply the task's environment setup to MiniStack. @@ -61,7 +63,14 @@ class EnvironmentDesigner: if not task.setup_commands: return ProvisionResult(resources_created=0) - return self._apply_cli_commands(task.setup_commands) + result = self._apply_cli_commands(task.setup_commands) + + # Apply random configuration drifts after provisioning correct state + if task.possible_drifts: + applied = self._drift_engine.apply_drift(task) + logger.info("Applied %d configuration drifts", len(applied)) + + return result # -- Provisioning strategies ---------------------------------------------- diff --git a/server/services/episode_tracker.py b/server/services/episode_tracker.py index f5b313800a585e64293b7674719e255d4e78af00..959978ec7afce05c1d29f74906bca2035c1b37cc 100644 --- a/server/services/episode_tracker.py +++ b/server/services/episode_tracker.py @@ -63,6 +63,44 @@ def _command_mentions_resource(command: str, resource: str) -> bool: return False +# Maps create operations to their corresponding delete operations. +_CREATE_DELETE_PAIRS: dict[str, str] = { + "create-bucket": "delete-bucket", + "create-table": "delete-table", + "create-function": "delete-function", + "create-queue": "delete-queue", + "create-topic": "delete-topic", + "create-role": "delete-role", + "create-rest-api": "delete-rest-api", + "create-secret": "delete-secret", + "put-bucket-policy": "delete-bucket-policy", + "attach-role-policy": "detach-role-policy", +} + +_ALREADY_EXISTS_PATTERNS: list[str] = [ + "already exists", + "BucketAlreadyExists", + "BucketAlreadyOwnedByYou", + "ResourceInUseException", + "ResourceConflictException", + "EntityAlreadyExists", + "QueueNameExists", + "TopicAlreadyExists", +] + + +def _extract_resource_name(command: str) -> str | None: + """Extract the primary resource name from an AWS CLI command.""" + parts = command.strip().split() + for i, part in enumerate(parts): + if part in _RESOURCE_FLAGS and i + 1 < len(parts): + return parts[i + 1] + for flag in _RESOURCE_FLAGS: + if part.startswith(f"{flag}="): + return part.split("=", 1)[1] + return None + + class EpisodeTracker: """Tracks command history within a single episode for grading.""" @@ -72,12 +110,14 @@ class EpisodeTracker: self._previous_progress: float = 0.0 # Track which (operation, resource) pairs have been credited self._credited_operations: set[tuple[str, str | None]] = set() + self._hints_used: int = 0 def reset(self) -> None: self._history.clear() self._step_counter = 0 self._previous_progress = 0.0 self._credited_operations.clear() + self._hints_used = 0 def record_step( self, command: str, success: bool, stdout: str, stderr: str @@ -136,6 +176,15 @@ class EpisodeTracker: def step_count(self) -> int: return self._step_counter + def record_hint(self) -> int: + """Record that a hint was used. Returns the new hint level (1-indexed).""" + self._hints_used += 1 + return self._hints_used + + @property + def hints_used(self) -> int: + return self._hints_used + @property def previous_progress(self) -> float: return self._previous_progress @@ -143,3 +192,50 @@ class EpisodeTracker: @previous_progress.setter def previous_progress(self, value: float) -> None: self._previous_progress = value + + def detect_rollbacks(self) -> int: + """Count create→delete pairs on the same resource (wasteful rollbacks).""" + # Build a set of (operation, resource) for successful create commands + creates: list[tuple[str, str]] = [] + for record in self._history: + if not record.success: + continue + _, op = _parse_aws_command(record.command) + if op is None or op not in _CREATE_DELETE_PAIRS: + continue + resource = _extract_resource_name(record.command) + if resource is not None: + creates.append((op, resource)) + + rollback_count = 0 + for create_op, resource in creates: + delete_op = _CREATE_DELETE_PAIRS[create_op] + for record in self._history: + if not record.success: + continue + _, op = _parse_aws_command(record.command) + if op == delete_op and _command_mentions_resource( + record.command, resource + ): + rollback_count += 1 + break + + return rollback_count + + def detect_idempotent_retries(self) -> int: + """Count create failures with 'already exists' followed by a successful next step.""" + count = 0 + for i, record in enumerate(self._history): + if record.success: + continue + _, op = _parse_aws_command(record.command) + if op is None or not op.startswith("create"): + continue + # Check stderr for "already exists" patterns + if not any(pat in record.stderr for pat in _ALREADY_EXISTS_PATTERNS): + continue + # Next step must exist and be successful + if i + 1 < len(self._history) and self._history[i + 1].success: + count += 1 + + return count diff --git a/server/services/hint_provider.py b/server/services/hint_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..490c50fcbb6079007b02c1ee80cb99d20d8833b4 --- /dev/null +++ b/server/services/hint_provider.py @@ -0,0 +1,137 @@ +""" +Progressive Hint Provider. + +Generates increasingly specific hints from a task's SuccessCriteria, +creating an information-reward tradeoff: hints help the agent but each +one decays the final reward via 0.85^hints_used. +""" + +import logging + +from models import Task + +logger = logging.getLogger(__name__) + +# Maximum hint level (1-indexed) +MAX_HINT_LEVEL: int = 3 + + +class HintProvider: + """Generates progressive hints from task success criteria.""" + + def get_hint(self, task: Task, level: int) -> str: + """Return a hint for the given level (1–3). + + Level 1: Which AWS services to use. + Level 2: Which operations to perform. + Level 3: Near-complete command structure. + """ + level = max(1, min(level, MAX_HINT_LEVEL)) + + if level == 1: + return self._hint_services(task) + if level == 2: + return self._hint_operations(task) + return self._hint_commands(task) + + # -- Private generators --------------------------------------------------- + + @staticmethod + def _hint_services(task: Task) -> str: + """Level 1: which AWS services are involved.""" + criteria = task.success_criteria + + services: list[str] = [] + if criteria.services: + services = [s.value for s in criteria.services] + elif criteria.steps: + # Infer service from operation names (e.g. "create-bucket" → s3) + for step in criteria.steps: + svc = _infer_service(step.operation) + if svc and svc not in services: + services.append(svc) + elif criteria.operation: + svc = _infer_service(criteria.operation) + if svc: + services = [svc] + + if services: + return f"You'll need these AWS services: {', '.join(services)}" + return "Review the task description for clues about which AWS services to use." + + @staticmethod + def _hint_operations(task: Task) -> str: + """Level 2: which operations to perform.""" + criteria = task.success_criteria + + operations: list[str] = [] + if criteria.steps: + operations = [step.operation for step in criteria.steps] + elif criteria.operation: + operations = [criteria.operation] + + if operations: + return f"Use these operations in order: {', '.join(operations)}" + return "Check the AWS CLI documentation for the relevant service operations." + + @staticmethod + def _hint_commands(task: Task) -> str: + """Level 3: near-complete command structure.""" + criteria = task.success_criteria + + commands: list[str] = [] + if criteria.steps: + for step in criteria.steps: + svc = _infer_service(step.operation) + svc_prefix = f"{svc} " if svc else "" + if step.resource: + commands.append( + f"aws {svc_prefix}{step.operation} ... {step.resource}" + ) + else: + commands.append(f"aws {svc_prefix}{step.operation} ...") + elif criteria.operation: + svc = _infer_service(criteria.operation) + svc_prefix = f"{svc} " if svc else "" + resource = "" + if criteria.resource_exists: + resource = f" ... {criteria.resource_exists.name}" + commands.append(f"aws {svc_prefix}{criteria.operation}{resource}") + + if commands: + return "Command structure: " + " → ".join(commands) + return "Refer to the task description and use 'aws help' for syntax." + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_OPERATION_SERVICE_MAP: dict[str, str] = { + "bucket": "s3api", + "object": "s3api", + "table": "dynamodb", + "function": "lambda", + "layer": "lambda", + "queue": "sqs", + "topic": "sns", + "subscription": "sns", + "role": "iam", + "policy": "iam", + "user": "iam", + "group": "iam", + "rest-api": "apigateway", + "secret": "secretsmanager", + "instance": "ec2", + "security-group": "ec2", + "vpc": "ec2", + "subnet": "ec2", +} + + +def _infer_service(operation: str) -> str | None: + """Best-effort mapping from an operation name to its AWS CLI service prefix.""" + for keyword, service in _OPERATION_SERVICE_MAP.items(): + if keyword in operation: + return service + return None diff --git a/server/services/resource_verifier.py b/server/services/resource_verifier.py index 4c60af63712cb1bb2819d2fb24ba5f3cd2574de2..5196a1c5557e8af760c834c616e040b1d84ff937 100644 --- a/server/services/resource_verifier.py +++ b/server/services/resource_verifier.py @@ -70,8 +70,23 @@ class ResourceVerifier: "lambda": self._check_lambda_function, "sqs": self._check_sqs_queue, "sns": self._check_sns_topic, - "iam": self._check_iam_role, + "iam": self._check_iam_resource, "apigateway": self._check_apigateway, + "secretsmanager": self._check_secretsmanager, + "ecs": self._check_ecs_cluster, + "rds": self._check_rds_instance, + "elasticache": self._check_elasticache_cluster, + "route53": self._check_route53_hosted_zone, + "elbv2": self._check_elbv2_load_balancer, + "efs": self._check_efs_filesystem, + "cognito-idp": self._check_cognito_user_pool, + "ssm": self._check_ssm_parameter, + "events": self._check_eventbridge_rule, + "apigatewayv2": self._check_apigatewayv2, + "cloudformation": self._check_cloudformation_stack, + "glue": self._check_glue_database, + "ebs": self._check_ebs_volume, + "firehose": self._check_firehose_stream, } verifier = verifiers.get(service_lower) if verifier is None: @@ -159,10 +174,38 @@ class ResourceVerifier: except (json.JSONDecodeError, TypeError): return False - def _check_iam_role(self, name: str) -> bool: + def _check_iam_resource(self, name: str) -> bool: + """Check for IAM roles, users, and policies by name.""" + # Try role first success, _, _ = self._backend.execute_command( f"aws iam get-role --role-name {name}" ) + if success: + return True + # Try user + success, _, _ = self._backend.execute_command( + f"aws iam get-user --user-name {name}" + ) + if success: + return True + # Try policy (list and match by name) + success, stdout, _ = self._backend.execute_command( + "aws iam list-policies --scope Local --output json" + ) + if success: + try: + data = json.loads(stdout) + policies = data.get("Policies", []) + if any(p.get("PolicyName") == name for p in policies): + return True + except (json.JSONDecodeError, TypeError): + pass + return False + + def _check_secretsmanager(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws secretsmanager describe-secret --secret-id {name}" + ) return success def _check_apigateway(self, name: str) -> bool: @@ -177,3 +220,143 @@ class ResourceVerifier: return any(i.get("name") == name for i in items) except (json.JSONDecodeError, TypeError): return False + + def _check_ecs_cluster(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + f"aws ecs describe-clusters --clusters {name}" + ) + if not success: + return False + try: + data = json.loads(stdout) + clusters = data.get("clusters", []) + return any( + c.get("clusterName") == name and c.get("status") != "INACTIVE" + for c in clusters + ) + except (json.JSONDecodeError, TypeError): + return False + + def _check_rds_instance(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws rds describe-db-instances --db-instance-identifier {name}" + ) + return success + + def _check_elasticache_cluster(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws elasticache describe-cache-clusters --cache-cluster-id {name}" + ) + return success + + def _check_route53_hosted_zone(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + "aws route53 list-hosted-zones --output json" + ) + if not success: + return False + try: + data = json.loads(stdout) + zones = data.get("HostedZones", []) + return any(z.get("Name", "").rstrip(".") == name.rstrip(".") for z in zones) + except (json.JSONDecodeError, TypeError): + return False + + def _check_elbv2_load_balancer(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + f"aws elbv2 describe-load-balancers --names {name}" + ) + if not success: + return False + try: + data = json.loads(stdout) + lbs = data.get("LoadBalancers", []) + return any(lb.get("LoadBalancerName") == name for lb in lbs) + except (json.JSONDecodeError, TypeError): + return False + + def _check_efs_filesystem(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + "aws efs describe-file-systems --output json" + ) + if not success: + return False + try: + data = json.loads(stdout) + filesystems = data.get("FileSystems", []) + return any( + fs.get("CreationToken") == name + or any(t.get("Value") == name for t in fs.get("Tags", [])) + for fs in filesystems + ) + except (json.JSONDecodeError, TypeError): + return False + + def _check_cognito_user_pool(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + "aws cognito-idp list-user-pools --max-results 60 --output json" + ) + if not success: + return False + try: + data = json.loads(stdout) + pools = data.get("UserPools", []) + return any(p.get("Name") == name for p in pools) + except (json.JSONDecodeError, TypeError): + return False + + def _check_ssm_parameter(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws ssm get-parameter --name {name}" + ) + return success + + def _check_eventbridge_rule(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws events describe-rule --name {name}" + ) + return success + + def _check_apigatewayv2(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + "aws apigatewayv2 get-apis --output json" + ) + if not success: + return False + try: + data = json.loads(stdout) + items = data.get("Items", []) + return any(i.get("Name") == name for i in items) + except (json.JSONDecodeError, TypeError): + return False + + def _check_cloudformation_stack(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws cloudformation describe-stacks --stack-name {name}" + ) + return success + + def _check_glue_database(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws glue get-database --name {name}" + ) + return success + + def _check_ebs_volume(self, name: str) -> bool: + success, stdout, _ = self._backend.execute_command( + "aws ec2 describe-volumes --output json" + ) + if not success: + return False + try: + data = json.loads(stdout) + volumes = data.get("Volumes", []) + return len(volumes) > 0 + except (json.JSONDecodeError, TypeError): + return False + + def _check_firehose_stream(self, name: str) -> bool: + success, _, _ = self._backend.execute_command( + f"aws firehose describe-delivery-stream --delivery-stream-name {name}" + ) + return success diff --git a/server/services/task_grader.py b/server/services/task_grader.py index 20a9cc0caf44ee0eda2c00d317805e484ca320be..332da9af63fd0f72066a959a63e22dc119069118 100644 --- a/server/services/task_grader.py +++ b/server/services/task_grader.py @@ -42,6 +42,8 @@ class TaskGrader: task: Task, tracker: EpisodeTracker, latest_step: StepRecord, + chaos_occurred: bool = False, + hints_used: int = 0, ) -> GradeResult: criteria = task.success_criteria @@ -58,7 +60,9 @@ class TaskGrader: result = GradeResult(reason="no recognised success_criteria fields") # Compute shaped reward - result.reward = self._compute_reward(result, latest_step, tracker) + result.reward = self._compute_reward( + result, latest_step, tracker, chaos_occurred, hints_used + ) # Update tracker's previous progress (monotonic — never decrease) if result.partial_progress > tracker.previous_progress: @@ -225,10 +229,14 @@ class TaskGrader: result: GradeResult, latest_step: StepRecord, tracker: EpisodeTracker, + chaos_occurred: bool = False, + hints_used: int = 0, ) -> float: - """Compute a shaped reward in [0.0, 1.0].""" + """Compute a shaped reward in [0.0, 1.05].""" if result.task_achieved: - return 1.0 + base = 1.05 if chaos_occurred else 1.0 + # Hint decay: 0.85^hints_used + return base * (0.85**hints_used) # Base: partial progress scaled to 0.0–0.8 range progress_reward = result.partial_progress * 0.8 @@ -242,5 +250,15 @@ class TaskGrader: if not latest_step.success: progress_reward *= 0.5 + # Rollback penalty: wasteful create→delete pairs + progress_reward -= 0.1 * tracker.detect_rollbacks() + + # Idempotency bonus: graceful "already exists" handling + progress_reward += 0.02 * tracker.detect_idempotent_retries() + + # Hint decay: 0.85^hints_used + if hints_used > 0: + progress_reward *= 0.85**hints_used + # Clamp to [0.0, 0.99] — never reach 1.0 without achieving return min(max(progress_reward, 0.0), 0.99) diff --git a/server/services/task_solutions.py b/server/services/task_solutions.py new file mode 100644 index 0000000000000000000000000000000000000000..b59fa207c03f0788722fead073b8dbc1e0df81ca --- /dev/null +++ b/server/services/task_solutions.py @@ -0,0 +1,848 @@ +"""Provides step-by-step solution commands for tasks. + +Returns the next command to execute based on how many steps have been completed. +Dynamic IDs are resolved from actual MiniStack state via the backend. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from server.services.aws_backend import AwsBackend + from server.services.episode_tracker import EpisodeTracker + +_ROLE = "arn:aws:iam::000000000000:role" +_CODE = "--code S3Bucket=dummy,S3Key=dummy.zip" +_SIMPLE_POLICY = """'{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":"s3:GetObject","Resource":"*"}]}'""" + + +def _assume(svc: str) -> str: + doc = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": svc}, + "Action": "sts:AssumeRole", + } + ], + } + ) + return f"'{doc}'" + + +# --------------------------------------------------------------------------- +# Static command sequences (loaded once from test files) +# --------------------------------------------------------------------------- +_static_cache: dict[int, list[str]] | None = None + + +def _load_static() -> dict[int, list[str]]: + global _static_cache + if _static_cache is not None: + return _static_cache + + import importlib.util + + solutions: dict[int, list[str]] = {} + tests_dir = Path(__file__).resolve().parent.parent.parent / "tests_tasks" + + for fname, var in [ + ("test_warmup_tasks.py", "WARMUP_COMMANDS"), + ("test_beginner_tasks.py", "BEGINNER_COMMANDS"), + ("test_intermediate_tasks.py", "INTERMEDIATE_COMMANDS"), + ("test_expert_tasks.py", "EXPERT_COMMANDS"), + ]: + fpath = tests_dir / fname + if not fpath.exists(): + continue + spec = importlib.util.spec_from_file_location(fname.replace(".py", ""), fpath) + if spec is None or spec.loader is None: + continue + mod = importlib.util.module_from_spec(spec) + try: + spec.loader.exec_module(mod) + except Exception: + continue + cmds_map = getattr(mod, var, {}) + for tid, cmd in cmds_map.items(): + if isinstance(cmd, str): + solutions[tid] = [cmd] + elif isinstance(cmd, list): + solutions[tid] = [c for c in cmd if isinstance(c, str)] + + _static_cache = solutions + return solutions + + +# --------------------------------------------------------------------------- +# Advanced tasks — full command sequences with dynamic ID resolution +# --------------------------------------------------------------------------- + + +def _advanced_commands(task_id: int, backend: AwsBackend, step: int) -> list[str]: + """Return the full ordered command list for an advanced task. + + Some commands depend on outputs from prior steps. We execute discovery + commands against MiniStack to resolve dynamic IDs. + """ + a = _assume + if task_id == 15: + return [ + f"aws iam create-role --role-name processor-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name processor --runtime python3.12 --handler index.handler --role {_ROLE}/processor-role {_CODE}", + "aws sqs create-queue --queue-name work-items", + "aws lambda create-event-source-mapping --function-name processor --event-source-arn arn:aws:sqs:us-east-1:000000000000:work-items --batch-size 10", + ] + + if task_id == 16: + cmds = [ + "aws dynamodb create-table --table-name products --key-schema AttributeName=product_id,KeyType=HASH --attribute-definitions AttributeName=product_id,AttributeType=S --billing-mode PAY_PER_REQUEST", + f"aws iam create-role --role-name product-api-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name product-api --runtime python3.12 --handler index.handler --role {_ROLE}/product-api-role {_CODE}", + "aws apigateway create-rest-api --name products-api", + ] + # Steps 5+ need dynamic IDs — resolve from MiniStack + if step >= 4: + ok, out, _ = backend.execute_command("aws apigateway get-rest-apis") + api_id = "UNKNOWN" + try: + for item in json.loads(out).get("items", []): + if item.get("name") == "products-api": + api_id = item["id"] + break + except Exception: + pass + cmds.append(f"aws apigateway get-resources --rest-api-id {api_id}") + + if step >= 5: + ok2, out2, _ = backend.execute_command( + f"aws apigateway get-resources --rest-api-id {api_id}" + ) + root_id = "UNKNOWN" + try: + for item in json.loads(out2).get("items", []): + if item.get("path") == "/": + root_id = item["id"] + break + except Exception: + pass + cmds.append( + f"aws apigateway create-resource --rest-api-id {api_id} --parent-id {root_id} --path-part products" + ) + + if step >= 6: + ok3, out3, _ = backend.execute_command( + f"aws apigateway get-resources --rest-api-id {api_id}" + ) + res_id = "UNKNOWN" + try: + for item in json.loads(out3).get("items", []): + if item.get("pathPart") == "products": + res_id = item["id"] + break + except Exception: + pass + cmds.append( + f"aws apigateway put-method --rest-api-id {api_id} --resource-id {res_id} --http-method GET --authorization-type NONE" + ) + + if step >= 7: + cmds.append( + f"aws apigateway put-integration --rest-api-id {api_id} --resource-id {res_id} --http-method GET --type AWS_PROXY --integration-http-method POST --uri arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:000000000000:function:product-api/invocations" + ) + return cmds + + if task_id == 17: + return [ + "aws sns create-topic --name order-events", + "aws sqs create-queue --queue-name shipping-queue", + "aws sqs create-queue --queue-name billing-queue", + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-events --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:shipping-queue", + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-events --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:billing-queue", + 'aws sns publish --topic-arn arn:aws:sns:us-east-1:000000000000:order-events --message "test order event"', + ] + + if task_id == 87: + return [ + "aws s3api create-bucket --bucket image-uploads", + f"aws iam create-role --role-name image-resizer-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name image-resizer --runtime python3.12 --handler index.handler --role {_ROLE}/image-resizer-role {_CODE}", + """aws s3api put-bucket-notification-configuration --bucket image-uploads --notification-configuration '{"LambdaFunctionConfigurations":[{"LambdaFunctionArn":"arn:aws:lambda:us-east-1:000000000000:function:image-resizer","Events":["s3:ObjectCreated:*"]}]}'""", + 'aws events put-rule --name image-upload-rule --schedule-expression "rate(1 hour)"', + "aws events put-targets --rule image-upload-rule --targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:image-resizer", + ] + + if task_id == 88: + cmds = [ + f"aws iam create-role --role-name ecs-exec-role --assume-role-policy-document {a('ecs-tasks.amazonaws.com')}", + """aws ecs register-task-definition --family web-app-task --container-definitions '[{"name":"web","image":"nginx","memory":256,"cpu":128}]' --requires-compatibilities FARGATE --network-mode awsvpc --cpu 256 --memory 512""", + "aws ecs create-cluster --cluster-name web-cluster", + "aws elbv2 create-target-group --name web-tg --protocol HTTP --port 80 --vpc-id vpc-00000001 --target-type ip", + "aws elbv2 create-load-balancer --name web-alb --subnets subnet-00000001 subnet-00000002", + 'aws ec2 create-security-group --group-name ecs-sg --description "ECS tasks"', + ] + if step >= 6: + tg_arn = lb_arn = "UNKNOWN" + ok, out, _ = backend.execute_command( + "aws elbv2 describe-target-groups --names web-tg" + ) + try: + tg_arn = json.loads(out)["TargetGroups"][0]["TargetGroupArn"] + except Exception: + pass + ok, out, _ = backend.execute_command( + "aws elbv2 describe-load-balancers --names web-alb" + ) + try: + lb_arn = json.loads(out)["LoadBalancers"][0]["LoadBalancerArn"] + except Exception: + pass + cmds.append( + f"aws elbv2 create-listener --load-balancer-arn {lb_arn} --protocol HTTP --port 80 --default-actions Type=forward,TargetGroupArn={tg_arn}" + ) + if step >= 7: + cmds.append( + f"aws ecs create-service --cluster web-cluster --service-name web-service --task-definition web-app-task --desired-count 1 --launch-type FARGATE --network-configuration awsvpcConfiguration={{subnets=[subnet-00000001],securityGroups=[sg-00000001]}} --load-balancers targetGroupArn={tg_arn},containerName=web,containerPort=80" + ) + return cmds + + if task_id == 89: + return [ + "aws dynamodb create-table --table-name orders --key-schema AttributeName=order_id,KeyType=HASH --attribute-definitions AttributeName=order_id,AttributeType=S --billing-mode PAY_PER_REQUEST", + "aws sqs create-queue --queue-name order-queue", + "aws sns create-topic --name order-notifications", + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-notifications --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:order-queue", + f"aws iam create-role --role-name order-processor-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name order-processor --runtime python3.12 --handler index.handler --role {_ROLE}/order-processor-role {_CODE}", + "aws lambda create-event-source-mapping --function-name order-processor --event-source-arn arn:aws:sqs:us-east-1:000000000000:order-queue --batch-size 10", + ] + + if task_id == 90: + return [ + 'aws rds create-db-subnet-group --db-subnet-group-name db-subnets --db-subnet-group-description "DB subnets" --subnet-ids subnet-00000001 subnet-00000002', + "aws rds create-db-instance --db-instance-identifier app-db --engine mysql --db-instance-class db.t3.micro --master-username admin --master-user-password Password123", + """aws secretsmanager create-secret --name db-credentials --secret-string '{"username":"admin","password":"Password123"}'""", + f"aws iam create-role --role-name secret-rotator-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name secret-rotator --runtime python3.12 --handler index.handler --role {_ROLE}/secret-rotator-role {_CODE}", + ] + + if task_id == 91: + cmds = [ + 'aws ec2 create-security-group --group-name web-sg --description "HTTP access"', + "aws elbv2 create-target-group --name frontend-tg --protocol HTTP --port 80 --vpc-id vpc-00000001 --target-type ip", + "aws elbv2 create-load-balancer --name frontend-alb --subnets subnet-00000001 subnet-00000002", + ] + if step >= 3: + tg_arn = lb_arn = "UNKNOWN" + ok, out, _ = backend.execute_command( + "aws elbv2 describe-target-groups --names frontend-tg" + ) + try: + tg_arn = json.loads(out)["TargetGroups"][0]["TargetGroupArn"] + except Exception: + pass + ok, out, _ = backend.execute_command( + "aws elbv2 describe-load-balancers --names frontend-alb" + ) + try: + lb_arn = json.loads(out)["LoadBalancers"][0]["LoadBalancerArn"] + except Exception: + pass + cmds.append( + f"aws elbv2 create-listener --load-balancer-arn {lb_arn} --protocol HTTP --port 80 --default-actions Type=forward,TargetGroupArn={tg_arn}" + ) + if step >= 4: + cmds.append( + "aws route53 create-hosted-zone --name example.internal --caller-reference ref-91" + ) + if step >= 5: + hz_id = "UNKNOWN" + ok, out, _ = backend.execute_command("aws route53 list-hosted-zones") + try: + for hz in json.loads(out).get("HostedZones", []): + if "example.internal" in hz.get("Name", ""): + hz_id = hz["Id"].split("/")[-1] + break + except Exception: + pass + batch = json.dumps( + { + "Changes": [ + { + "Action": "CREATE", + "ResourceRecordSet": { + "Name": "example.internal", + "Type": "A", + "TTL": 300, + "ResourceRecords": [{"Value": "1.2.3.4"}], + }, + } + ] + } + ) + cmds.append( + f"aws route53 change-resource-record-sets --hosted-zone-id {hz_id} --change-batch '{batch}'" + ) + return cmds + + if task_id == 92: + cmds = ["aws cognito-idp create-user-pool --pool-name app-users"] + if step >= 1: + pool_id = "UNKNOWN" + ok, out, _ = backend.execute_command( + "aws cognito-idp list-user-pools --max-results 10" + ) + try: + for p in json.loads(out).get("UserPools", []): + if "app-users" in p.get("Name", ""): + pool_id = p["Id"] + break + except Exception: + pass + cmds.append( + f"aws cognito-idp create-user-pool-client --user-pool-id {pool_id} --client-name app-client" + ) + cmds.append( + f"aws iam create-role --role-name auth-handler-role --assume-role-policy-document {a('lambda.amazonaws.com')}" + ) + cmds.append( + f"aws lambda create-function --function-name auth-handler --runtime python3.12 --handler index.handler --role {_ROLE}/auth-handler-role {_CODE}" + ) + cmds.append( + "aws apigatewayv2 create-api --name auth-api --protocol-type HTTP" + ) + if step >= 5: + api_id = "UNKNOWN" + ok, out, _ = backend.execute_command("aws apigatewayv2 get-apis") + try: + for item in json.loads(out).get("Items", []): + if item.get("Name") == "auth-api": + api_id = item["ApiId"] + break + except Exception: + pass + cmds.append( + f"aws apigatewayv2 create-authorizer --api-id {api_id} --authorizer-type JWT --name cognito-auth --identity-source $request.header.Authorization --jwt-configuration Issuer=https://cognito-idp.us-east-1.amazonaws.com/{pool_id},Audience={pool_id}" + ) + return cmds + + if task_id == 93: + return [ + "aws s3api create-bucket --bucket cfn-templates", + "aws s3api put-object --bucket cfn-templates --key template.yaml --content-type application/x-yaml", + f"aws iam create-role --role-name cfn-deploy-role --assume-role-policy-document {a('cloudformation.amazonaws.com')}", + """aws cloudformation create-stack --stack-name app-stack --template-body '{"AWSTemplateFormatVersion":"2010-09-09","Resources":{}}'""", + ] + + if task_id == 94: + return [ + "aws s3api create-bucket --bucket data-lake-raw", + "aws s3api create-bucket --bucket data-lake-processed", + f"aws iam create-role --role-name glue-etl-role --assume-role-policy-document {a('glue.amazonaws.com')}", + """aws glue create-database --database-input '{"Name":"analytics-db"}'""", + f"""aws glue create-crawler --name raw-data-crawler --role {_ROLE}/glue-etl-role --database-name analytics-db --targets '{{"S3Targets":[{{"Path":"s3://data-lake-raw/"}}]}}'""", + ] + + if task_id == 95: + return [ + "aws s3api create-bucket --bucket event-archive", + f"aws iam create-role --role-name firehose-delivery-role --assume-role-policy-document {a('firehose.amazonaws.com')}", + "aws firehose create-delivery-stream --delivery-stream-name event-stream --s3-destination-configuration RoleARN=arn:aws:iam::000000000000:role/firehose-delivery-role,BucketARN=arn:aws:s3:::event-archive", + "aws firehose put-record --delivery-stream-name event-stream --record Data=dGVzdCBldmVudA==", + ] + + if task_id == 96: + return [ + f"aws iam create-role --role-name db-cleanup-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name db-cleanup --runtime python3.12 --handler index.handler --role {_ROLE}/db-cleanup-role {_CODE}", + 'aws events put-rule --name nightly-cleanup --schedule-expression "cron(0 0 * * ? *)"', + "aws events put-targets --rule nightly-cleanup --targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:db-cleanup", + "aws lambda add-permission --function-name db-cleanup --statement-id events-invoke --action lambda:InvokeFunction --principal events.amazonaws.com --source-arn arn:aws:events:us-east-1:000000000000:rule/nightly-cleanup", + ] + + if task_id == 97: + return [ + "aws ssm put-parameter --name app-config-db-host --type String --value db.internal.local", + "aws ssm put-parameter --name app-config-api-key --type String --value sk-test-123", + f"aws iam create-role --role-name config-reader-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name config-reader --runtime python3.12 --handler index.handler --role {_ROLE}/config-reader-role {_CODE}", + 'aws events put-rule --name config-refresh --schedule-expression "rate(1 hour)"', + "aws events put-targets --rule config-refresh --targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:config-reader", + ] + + if task_id == 98: + cmds = [ + 'aws ec2 create-security-group --group-name cache-sg --description "Redis access"' + ] + if step >= 1: + sg_id = "UNKNOWN" + ok, out, _ = backend.execute_command( + "aws ec2 describe-security-groups --group-names cache-sg" + ) + try: + sg_id = json.loads(out)["SecurityGroups"][0]["GroupId"] + except Exception: + pass + cmds.append( + f"aws ec2 authorize-security-group-ingress --group-id {sg_id} --protocol tcp --port 6379 --cidr 10.0.0.0/16" + ) + cmds.append( + 'aws elasticache create-cache-subnet-group --cache-subnet-group-name cache-subnets --cache-subnet-group-description "subnets" --subnet-ids subnet-00000001' + ) + cmds.append( + f"aws elasticache create-cache-cluster --cache-cluster-id session-store --engine redis --cache-node-type cache.t3.micro --num-cache-nodes 1 --security-group-ids {sg_id}" + ) + cmds.append( + f"aws iam create-policy --policy-name cache-access --policy-document {_SIMPLE_POLICY}" + ) + return cmds + + if task_id == 99: + cmds = [ + 'aws ec2 create-security-group --group-name efs-sg --description "NFS access"' + ] + if step >= 1: + sg_id = "UNKNOWN" + ok, out, _ = backend.execute_command( + "aws ec2 describe-security-groups --group-names efs-sg" + ) + try: + sg_id = json.loads(out)["SecurityGroups"][0]["GroupId"] + except Exception: + pass + cmds.append( + f"aws ec2 authorize-security-group-ingress --group-id {sg_id} --protocol tcp --port 2049 --cidr 10.0.0.0/16" + ) + cmds.append("aws efs create-file-system --creation-token shared-fs") + if step >= 3: + fs_id = "UNKNOWN" + ok, out, _ = backend.execute_command("aws efs describe-file-systems") + try: + for fs in json.loads(out).get("FileSystems", []): + if fs.get("CreationToken") == "shared-fs": + fs_id = fs["FileSystemId"] + break + except Exception: + pass + cmds.append( + f"aws efs create-mount-target --file-system-id {fs_id} --subnet-id subnet-00000001 --security-groups {sg_id}" + ) + cmds.append( + f"aws iam create-policy --policy-name efs-access --policy-document {_SIMPLE_POLICY}" + ) + return cmds + + if task_id == 100: + return [ + "aws s3api create-bucket --bucket emr-logs", + "aws s3api create-bucket --bucket emr-output", + f"aws iam create-role --role-name emr-service-role --assume-role-policy-document {a('elasticmapreduce.amazonaws.com')}", + "aws iam create-instance-profile --instance-profile-name emr-ec2-profile", + "aws emr create-cluster --name analytics-cluster --release-label emr-6.15.0 --instance-type m5.xlarge --instance-count 1", + ] + + if task_id == 101: + cmds = [ + "aws dynamodb create-table --table-name user-activity --key-schema AttributeName=user_id,KeyType=HASH --attribute-definitions AttributeName=user_id,AttributeType=S --billing-mode PAY_PER_REQUEST --stream-specification StreamEnabled=true,StreamViewType=NEW_AND_OLD_IMAGES", + "aws sqs create-queue --queue-name activity-dlq", + f"aws iam create-role --role-name activity-processor-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name activity-processor --runtime python3.12 --handler index.handler --role {_ROLE}/activity-processor-role {_CODE}", + ] + if step >= 4: + stream_arn = "UNKNOWN" + ok, out, _ = backend.execute_command( + "aws dynamodb describe-table --table-name user-activity" + ) + try: + stream_arn = json.loads(out)["Table"]["LatestStreamArn"] + except Exception: + pass + cmds.append( + f"aws lambda create-event-source-mapping --function-name activity-processor --event-source-arn {stream_arn} --starting-position LATEST" + ) + return cmds + + if task_id == 102: + return [ + "aws sns create-topic --name system-alerts", + "aws sqs create-queue --queue-name alert-archive", + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:system-alerts --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:alert-archive", + f"aws iam create-role --role-name alert-handler-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name alert-handler --runtime python3.12 --handler index.handler --role {_ROLE}/alert-handler-role {_CODE}", + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:system-alerts --protocol lambda --notification-endpoint arn:aws:lambda:us-east-1:000000000000:function:alert-handler", + 'aws sns publish --topic-arn arn:aws:sns:us-east-1:000000000000:system-alerts --message "test alert"', + ] + + if task_id == 103: + cmds = [ + "aws dynamodb create-table --table-name tasks-table --key-schema AttributeName=task_id,KeyType=HASH --attribute-definitions AttributeName=task_id,AttributeType=S --billing-mode PAY_PER_REQUEST", + f"aws iam create-role --role-name tasks-api-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name tasks-api-handler --runtime python3.12 --handler index.handler --role {_ROLE}/tasks-api-role {_CODE}", + "aws apigatewayv2 create-api --name tasks-api --protocol-type HTTP", + ] + if step >= 4: + api_id = "UNKNOWN" + ok, out, _ = backend.execute_command("aws apigatewayv2 get-apis") + try: + for item in json.loads(out).get("Items", []): + if item.get("Name") == "tasks-api": + api_id = item["ApiId"] + break + except Exception: + pass + cmds.append( + f"aws apigatewayv2 create-integration --api-id {api_id} --integration-type AWS_PROXY --integration-uri arn:aws:lambda:us-east-1:000000000000:function:tasks-api-handler --payload-format-version 2.0" + ) + cmds.append( + f'aws apigatewayv2 create-route --api-id {api_id} --route-key "GET /tasks"' + ) + return cmds + + if task_id == 104: + _spolicy = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": "*", + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::secure-input/*", + "Condition": { + "StringNotEquals": { + "s3:x-amz-server-side-encryption": "AES256" + } + }, + } + ], + } + ) + return [ + "aws s3api create-bucket --bucket secure-input", + "aws s3api create-bucket --bucket secure-output", + f"aws s3api put-bucket-policy --bucket secure-input --policy '{_spolicy}'", + f"aws iam create-role --role-name data-transformer-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name data-transformer --runtime python3.12 --handler index.handler --role {_ROLE}/data-transformer-role {_CODE}", + ] + + if task_id == 105: + cmds = [ + "aws secretsmanager create-secret --name third-party-api-key --secret-string sk-live-abc123", + f"aws iam create-role --role-name external-caller-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + f"aws lambda create-function --function-name external-caller --runtime python3.12 --handler index.handler --role {_ROLE}/external-caller-role {_CODE}", + "aws apigateway create-rest-api --name external-api", + ] + if step >= 4: + api_id = "UNKNOWN" + ok, out, _ = backend.execute_command("aws apigateway get-rest-apis") + try: + for item in json.loads(out).get("items", []): + if item.get("name") == "external-api": + api_id = item["id"] + break + except Exception: + pass + ok2, out2, _ = backend.execute_command( + f"aws apigateway get-resources --rest-api-id {api_id}" + ) + root_id = "UNKNOWN" + try: + for item in json.loads(out2).get("items", []): + if item.get("path") == "/": + root_id = item["id"] + break + except Exception: + pass + cmds.append( + f"aws apigateway create-resource --rest-api-id {api_id} --parent-id {root_id} --path-part call" + ) + if step >= 5: + res_id = "UNKNOWN" + ok3, out3, _ = backend.execute_command( + f"aws apigateway get-resources --rest-api-id {api_id}" + ) + try: + for item in json.loads(out3).get("items", []): + if item.get("pathPart") == "call": + res_id = item["id"] + break + except Exception: + pass + cmds.append( + f"aws apigateway put-method --rest-api-id {api_id} --resource-id {res_id} --http-method GET --authorization-type NONE" + ) + cmds.append( + f"aws apigateway put-integration --rest-api-id {api_id} --resource-id {res_id} --http-method GET --type AWS_PROXY --integration-http-method POST --uri arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:000000000000:function:external-caller/invocations" + ) + return cmds + + if task_id == 106: + return [ + f"aws iam create-role --role-name batch-task-role --assume-role-policy-document {a('ecs-tasks.amazonaws.com')}", + "aws ecs create-cluster --cluster-name batch-cluster", + """aws ecs register-task-definition --family batch-job --container-definitions '[{"name":"batch","image":"python:3.12","memory":256,"cpu":128}]' --requires-compatibilities FARGATE --network-mode awsvpc --cpu 256 --memory 512""", + 'aws ec2 create-security-group --group-name batch-sg --description "Batch SG"', + "aws ecs run-task --cluster batch-cluster --task-definition batch-job --launch-type FARGATE --network-configuration awsvpcConfiguration={subnets=[subnet-00000001],securityGroups=[sg-00000001]}", + ] + + if task_id == 107: + return [ + "aws s3api create-bucket --bucket query-results", + "aws s3api create-bucket --bucket analytics-data", + """aws glue create-database --database-input '{"Name":"web-analytics"}'""", + f"aws iam create-policy --policy-name athena-access --policy-document {_SIMPLE_POLICY}", + "aws athena create-work-group --name analytics-team --configuration ResultConfiguration={OutputLocation=s3://query-results/}", + ] + + if task_id == 108: + return [ + "aws s3api create-bucket --bucket lambda-artifacts", + "aws s3api put-object --bucket lambda-artifacts --key function.zip --content-type application/zip", + f"aws iam create-role --role-name cfn-lambda-role --assume-role-policy-document {a('cloudformation.amazonaws.com')}", + f"aws iam create-role --role-name lambda-exec-role --assume-role-policy-document {a('lambda.amazonaws.com')}", + """aws cloudformation create-stack --stack-name lambda-stack --template-body '{"AWSTemplateFormatVersion":"2010-09-09","Resources":{}}'""", + ] + + return [] + + +# --------------------------------------------------------------------------- +# Expert tasks with dynamic IDs +# --------------------------------------------------------------------------- + + +def _expert_dynamic_command( + task_id: int, backend: AwsBackend, step: int, static_cmds: list[str] +) -> list[str]: + """Append dynamically resolved commands for expert tasks that need runtime IDs.""" + cmds = list(static_cmds) + + if task_id == 114: + # Route53 zone-id from setup + ok, out, _ = backend.execute_command("aws route53 list-hosted-zones") + zone_id = "UNKNOWN" + try: + for hz in json.loads(out).get("HostedZones", []): + if "example.com" in hz.get("Name", ""): + zone_id = hz["Id"].split("/")[-1] + break + except Exception: + pass + change_batch = json.dumps( + { + "Changes": [ + { + "Action": "UPSERT", + "ResourceRecordSet": { + "Name": "api.example.com", + "Type": "A", + "TTL": 300, + "ResourceRecords": [{"Value": "10.0.1.50"}], + }, + } + ] + } + ) + cmds.append( + f"aws route53 change-resource-record-sets --hosted-zone-id {zone_id} --change-batch '{change_batch}'" + ) + + elif task_id == 115: + ok, out, _ = backend.execute_command( + "aws elbv2 describe-target-groups --names web-targets" + ) + tg_arn = "UNKNOWN" + try: + tg_arn = json.loads(out)["TargetGroups"][0]["TargetGroupArn"] + except Exception: + pass + cmds.append( + f"aws elbv2 modify-target-group --target-group-arn {tg_arn} --health-check-path /health --health-check-port 80 --health-check-interval-seconds 15 --healthy-threshold-count 2" + ) + + elif task_id == 126: + ok, out, _ = backend.execute_command( + "aws cognito-idp list-user-pools --max-results 10" + ) + pool_id = "UNKNOWN" + try: + for pool in json.loads(out).get("UserPools", []): + if "customer-auth" in pool.get("Name", ""): + pool_id = pool["Id"] + break + except Exception: + pass + policies = json.dumps( + { + "PasswordPolicy": { + "MinimumLength": 12, + "RequireUppercase": True, + "RequireLowercase": True, + "RequireNumbers": True, + "RequireSymbols": True, + "TemporaryPasswordValidityDays": 1, + } + } + ) + cmds.append( + f"aws cognito-idp update-user-pool --user-pool-id {pool_id} --policies '{policies}'" + ) + + return cmds + + +# --------------------------------------------------------------------------- +# Intermediate tasks with dynamic follow-ups +# --------------------------------------------------------------------------- + + +def _intermediate_dynamic( + task_id: int, backend: AwsBackend, step: int, static_cmds: list[str] +) -> list[str]: + """Resolve dynamic follow-up commands for intermediate tasks.""" + cmds = list(static_cmds) + + if task_id == 76 and step >= 1: + ok, out, _ = backend.execute_command( + "aws cognito-idp list-user-pools --max-results 10" + ) + pool_id = "UNKNOWN" + try: + for pool in json.loads(out).get("UserPools", []): + if "app-users" in pool.get("Name", ""): + pool_id = pool["Id"] + break + except Exception: + pass + cmds.append( + f"aws cognito-idp create-user-pool-client --user-pool-id {pool_id} --client-name web-app-client" + ) + + elif task_id == 78 and step >= 1: + ok, out, _ = backend.execute_command("aws ec2 describe-volumes") + vol_id = "UNKNOWN" + try: + for vol in json.loads(out).get("Volumes", []): + vol_id = vol["VolumeId"] + break + except Exception: + pass + cmds.append( + f"aws ec2 create-tags --resources {vol_id} --tags Key=Name,Value=data-volume" + ) + + elif task_id == 82 and step >= 1: + ok, out, _ = backend.execute_command("aws apigatewayv2 get-apis") + api_id = "UNKNOWN" + try: + for api in json.loads(out).get("Items", []): + if "products-api" in api.get("Name", ""): + api_id = api["ApiId"] + break + except Exception: + pass + cmds.append( + f'aws apigatewayv2 create-route --api-id {api_id} --route-key "GET /products-api"' + ) + + elif task_id == 84 and step >= 1: + ok, out, _ = backend.execute_command( + "aws sqs get-queue-url --queue-name task-queue" + ) + queue_url = "UNKNOWN" + try: + queue_url = json.loads(out)["QueueUrl"] + except Exception: + pass + cmds.append( + f"""aws sqs send-message --queue-url {queue_url} --message-body '{{"task":"process","id":"task-queue-001"}}'""" + ) + + return cmds + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +_ADVANCED_IDS = { + 15, + 16, + 17, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, +} +_INTERMEDIATE_DYNAMIC_IDS = {76, 78, 82, 84} +_EXPERT_DYNAMIC_IDS = {114, 115, 126} + + +def get_next_solution( + task_id: int, + backend: AwsBackend, + tracker: EpisodeTracker, +) -> dict: + """Return the next solution command for the given task. + + Returns: + {"command": str | None, "step": int, "total_steps": int} + """ + step = tracker.step_count + + # Advanced: fully dynamic command sequences + if task_id in _ADVANCED_IDS: + cmds = _advanced_commands(task_id, backend, step) + if step < len(cmds): + return {"command": cmds[step], "step": step + 1, "total_steps": len(cmds)} + return {"command": None, "step": step, "total_steps": len(cmds)} + + # Load static commands + static = _load_static() + base_cmds = static.get(task_id, []) + + # Intermediate with dynamic follow-ups + if task_id in _INTERMEDIATE_DYNAMIC_IDS: + cmds = _intermediate_dynamic(task_id, backend, step, base_cmds) + if step < len(cmds): + return {"command": cmds[step], "step": step + 1, "total_steps": len(cmds)} + return {"command": None, "step": step, "total_steps": len(cmds)} + + # Expert with dynamic IDs + if task_id in _EXPERT_DYNAMIC_IDS: + cmds = _expert_dynamic_command(task_id, backend, step, base_cmds) + if step < len(cmds): + return {"command": cmds[step], "step": step + 1, "total_steps": len(cmds)} + return {"command": None, "step": step, "total_steps": len(cmds)} + + # Default: static commands + if step < len(base_cmds): + return { + "command": base_cmds[step], + "step": step + 1, + "total_steps": len(base_cmds), + } + return {"command": None, "step": step, "total_steps": len(base_cmds)} diff --git a/server/services/tasks/advanced.yaml b/server/services/tasks/advanced.yaml index d4f3913701821c835c4e5c859832779ba2701c26..35ccb995290c02cacbcae41cf8136038b0cad27e 100644 --- a/server/services/tasks/advanced.yaml +++ b/server/services/tasks/advanced.yaml @@ -58,3 +58,530 @@ - operation: subscribe - operation: subscribe - operation: publish + +- task_id: 87 + description: > + Build an event-driven image processing pipeline. Create an S3 bucket + 'image-uploads', create an IAM execution role for Lambda, create a + Lambda function 'image-resizer' with the execution role, then + configure an S3 event notification to trigger the Lambda on object + creation using the events service. + success_criteria: + services: + - s3 + - iam + - lambda + - events + steps: + - operation: create-bucket + resource: image-uploads + - operation: create-role + - operation: create-function + resource: image-resizer + - operation: put-bucket-notification-configuration + resource: image-uploads + - operation: put-rule + - operation: put-targets + +- task_id: 88 + description: > + Deploy a containerized microservice behind a load balancer. Create an + IAM role for ECS task execution, register an ECS task definition + 'web-app-task', create an ECS cluster 'web-cluster', create a target + group 'web-tg' on port 80, create an application load balancer + 'web-alb', and create an ECS service 'web-service' attached to the + load balancer. + success_criteria: + services: + - iam + - ecs + - elbv2 + - ec2 + steps: + - operation: create-role + - operation: register-task-definition + resource: web-app-task + - operation: create-cluster + resource: web-cluster + - operation: create-target-group + resource: web-tg + - operation: create-load-balancer + resource: web-alb + - operation: create-listener + - operation: create-service + resource: web-service + +- task_id: 89 + description: > + Create an asynchronous order processing system. Create a DynamoDB + table 'orders', create an SQS queue 'order-queue', create an SNS + topic 'order-notifications', subscribe the SQS queue to the SNS + topic, create an IAM role for Lambda, and create a Lambda function + 'order-processor' with the SQS queue as an event source. + success_criteria: + services: + - dynamodb + - sqs + - sns + - lambda + steps: + - operation: create-table + resource: orders + - operation: create-queue + resource: order-queue + - operation: create-topic + resource: order-notifications + - operation: subscribe + - operation: create-role + - operation: create-function + resource: order-processor + - operation: create-event-source-mapping + +- task_id: 90 + description: > + Set up a secure database with rotated credentials. Create an RDS + subnet group 'db-subnets', create an RDS MySQL instance 'app-db', + store the database credentials in Secrets Manager as 'db-credentials', + create an IAM role for Lambda, and create a Lambda function + 'secret-rotator' to handle credential rotation. + success_criteria: + services: + - rds + - secretsmanager + - iam + - lambda + steps: + - operation: create-db-subnet-group + resource: db-subnets + - operation: create-db-instance + resource: app-db + - operation: create-secret + resource: db-credentials + - operation: create-role + - operation: create-function + resource: secret-rotator + +- task_id: 91 + description: > + Build a DNS-routed load-balanced web tier. Create a VPC security + group 'web-sg' allowing HTTP traffic, create a target group + 'frontend-tg' on port 80, create an application load balancer + 'frontend-alb', create a listener on port 80, create a Route53 + hosted zone 'example.internal', and add an alias record pointing + to the load balancer. + success_criteria: + services: + - ec2 + - elbv2 + - route53 + steps: + - operation: create-security-group + resource: web-sg + - operation: create-target-group + resource: frontend-tg + - operation: create-load-balancer + resource: frontend-alb + - operation: create-listener + - operation: create-hosted-zone + resource: example.internal + - operation: change-resource-record-sets + +- task_id: 92 + description: > + Deploy a Cognito-authenticated HTTP API. Create a Cognito user pool + 'app-users', create a user pool client 'app-client', create an IAM + role for Lambda, create a Lambda function 'auth-handler', create an + HTTP API 'auth-api' using API Gateway v2, and attach a JWT authorizer + backed by the Cognito user pool. + success_criteria: + services: + - cognito-idp + - iam + - lambda + - apigatewayv2 + steps: + - operation: create-user-pool + resource: app-users + - operation: create-user-pool-client + resource: app-client + - operation: create-role + - operation: create-function + resource: auth-handler + - operation: create-api + resource: auth-api + - operation: create-authorizer + +- task_id: 93 + description: > + Set up infrastructure-as-code deployment via CloudFormation. Create + an S3 bucket 'cfn-templates' to store templates, upload a template + object to the bucket, create an IAM role 'cfn-deploy-role' for + CloudFormation execution, and create a CloudFormation stack + 'app-stack' using the uploaded template and IAM role. + success_criteria: + services: + - s3 + - iam + - cloudformation + steps: + - operation: create-bucket + resource: cfn-templates + - operation: put-object + - operation: create-role + resource: cfn-deploy-role + - operation: create-stack + resource: app-stack + +- task_id: 94 + description: > + Build an ETL pipeline with AWS Glue. Create an S3 bucket + 'data-lake-raw' for raw data, create a second S3 bucket + 'data-lake-processed' for processed output, create an IAM role + 'glue-etl-role' for Glue execution, create a Glue database + 'analytics-db', and create a Glue crawler 'raw-data-crawler' + targeting the raw data bucket. + success_criteria: + services: + - s3 + - iam + - glue + steps: + - operation: create-bucket + resource: data-lake-raw + - operation: create-bucket + resource: data-lake-processed + - operation: create-role + resource: glue-etl-role + - operation: create-database + resource: analytics-db + - operation: create-crawler + resource: raw-data-crawler + +- task_id: 95 + description: > + Create a real-time data ingestion pipeline with Kinesis Firehose. + Create an S3 bucket 'event-archive' as the delivery destination, + create an IAM role 'firehose-delivery-role' with S3 write + permissions, create a Firehose delivery stream 'event-stream' + delivering to the S3 bucket, and put a test record into the stream. + success_criteria: + services: + - s3 + - iam + - firehose + steps: + - operation: create-bucket + resource: event-archive + - operation: create-role + resource: firehose-delivery-role + - operation: create-delivery-stream + resource: event-stream + - operation: put-record + +- task_id: 96 + description: > + Build a scheduled Lambda maintenance job using EventBridge. Create + an IAM role for Lambda execution, create a Lambda function + 'db-cleanup' using the execution role, create an EventBridge rule + 'nightly-cleanup' with a cron schedule, add the Lambda function as + the rule target, and grant EventBridge permission to invoke the + Lambda. + success_criteria: + services: + - iam + - lambda + - events + steps: + - operation: create-role + - operation: create-function + resource: db-cleanup + - operation: put-rule + resource: nightly-cleanup + - operation: put-targets + - operation: add-permission + +- task_id: 97 + description: > + Deploy a parameter-driven Lambda using Systems Manager. Create + SSM parameters 'app-config-db-host' and 'app-config-api-key' + to store application configuration, create an IAM role with SSM + read permissions for Lambda, create a Lambda function + 'config-reader' that reads the parameters at runtime, and create + an EventBridge rule to invoke it on a schedule. + success_criteria: + services: + - ssm + - iam + - lambda + - events + steps: + - operation: put-parameter + resource: app-config-db-host + - operation: put-parameter + resource: app-config-api-key + - operation: create-role + - operation: create-function + resource: config-reader + - operation: put-rule + - operation: put-targets + +- task_id: 98 + description: > + Provision an ElastiCache cluster with network access. Create a VPC + security group 'cache-sg' allowing inbound Redis traffic on port + 6379, create a cache subnet group 'cache-subnets', create an + ElastiCache Redis cluster 'session-store' in the subnet group with + the security group, and create an IAM policy for application access. + success_criteria: + services: + - ec2 + - elasticache + - iam + steps: + - operation: create-security-group + resource: cache-sg + - operation: authorize-security-group-ingress + - operation: create-cache-subnet-group + resource: cache-subnets + - operation: create-cache-cluster + resource: session-store + - operation: create-policy + +- task_id: 99 + description: > + Set up a shared file system for EC2 instances. Create a VPC security + group 'efs-sg' allowing NFS traffic on port 2049, create an EFS + file system with a creation token 'shared-fs', create a mount target + in a subnet with the security group, and create an IAM policy + granting EFS access to EC2 instances. + success_criteria: + services: + - ec2 + - efs + - iam + steps: + - operation: create-security-group + resource: efs-sg + - operation: authorize-security-group-ingress + - operation: create-file-system + resource: shared-fs + - operation: create-mount-target + - operation: create-policy + +- task_id: 100 + description: > + Launch an EMR cluster for big data processing. Create an S3 bucket + 'emr-logs' for cluster logs, create an S3 bucket 'emr-output' for + job output, create an IAM role 'emr-service-role' for the EMR + service, create an IAM instance profile 'emr-ec2-profile' for + cluster nodes, and run a cluster 'analytics-cluster' with Spark. + success_criteria: + services: + - s3 + - iam + - emr + steps: + - operation: create-bucket + resource: emr-logs + - operation: create-bucket + resource: emr-output + - operation: create-role + resource: emr-service-role + - operation: create-instance-profile + resource: emr-ec2-profile + - operation: create-cluster + resource: analytics-cluster + +- task_id: 101 + description: > + Build a DynamoDB stream processing pipeline. Create a DynamoDB + table 'user-activity' with streams enabled, create an SQS dead + letter queue 'activity-dlq', create an IAM role for Lambda, create + a Lambda function 'activity-processor', and create an event source + mapping from the DynamoDB stream to the Lambda function. + success_criteria: + services: + - dynamodb + - sqs + - iam + - lambda + steps: + - operation: create-table + resource: user-activity + - operation: create-queue + resource: activity-dlq + - operation: create-role + - operation: create-function + resource: activity-processor + - operation: create-event-source-mapping + +- task_id: 102 + description: > + Create a multi-target SNS notification pipeline. Create an SNS topic + 'system-alerts', create an SQS queue 'alert-archive' and subscribe + it to the topic, create an IAM role for Lambda, create a Lambda + function 'alert-handler' and subscribe it to the same topic, and + publish a test alert message. + success_criteria: + services: + - sns + - sqs + - iam + - lambda + steps: + - operation: create-topic + resource: system-alerts + - operation: create-queue + resource: alert-archive + - operation: subscribe + - operation: create-role + - operation: create-function + resource: alert-handler + - operation: subscribe + - operation: publish + +- task_id: 103 + description: > + Deploy a serverless CRUD API with DynamoDB and API Gateway v2. + Create a DynamoDB table 'tasks-table', create an IAM role for + Lambda with DynamoDB permissions, create a Lambda function + 'tasks-api-handler', create an HTTP API 'tasks-api' using API + Gateway v2, create an integration with the Lambda, and create + a route for GET /tasks. + success_criteria: + services: + - dynamodb + - iam + - lambda + - apigatewayv2 + steps: + - operation: create-table + resource: tasks-table + - operation: create-role + - operation: create-function + resource: tasks-api-handler + - operation: create-api + resource: tasks-api + - operation: create-integration + - operation: create-route + +- task_id: 104 + description: > + Set up a secure S3 data pipeline with encryption. Create an S3 + bucket 'secure-input', create a second S3 bucket 'secure-output', + put a bucket policy enforcing encryption on 'secure-input', create + an IAM role for Lambda, and create a Lambda function 'data-transformer' + that reads from input and writes to output. + success_criteria: + services: + - s3 + - iam + - lambda + steps: + - operation: create-bucket + resource: secure-input + - operation: create-bucket + resource: secure-output + - operation: put-bucket-policy + resource: secure-input + - operation: create-role + - operation: create-function + resource: data-transformer + +- task_id: 105 + description: > + Build a secrets-backed Lambda API. Store an API key in Secrets + Manager as 'third-party-api-key', create an IAM role with Secrets + Manager read access, create a Lambda function 'external-caller' + that retrieves the secret at runtime, create an API Gateway REST + API 'external-api', create a resource and method, and integrate + with the Lambda. + success_criteria: + services: + - secretsmanager + - iam + - lambda + - apigateway + steps: + - operation: create-secret + resource: third-party-api-key + - operation: create-role + - operation: create-function + resource: external-caller + - operation: create-rest-api + resource: external-api + - operation: create-resource + - operation: put-method + - operation: put-integration + +- task_id: 106 + description: > + Deploy a containerized batch processor with ECS Fargate. Create an + IAM role 'batch-task-role' for ECS task execution, create an ECS + cluster 'batch-cluster', register a task definition 'batch-job' + with Fargate compatibility, create a security group 'batch-sg', + and run a standalone task in the cluster. + success_criteria: + services: + - iam + - ecs + - ec2 + steps: + - operation: create-role + resource: batch-task-role + - operation: create-cluster + resource: batch-cluster + - operation: register-task-definition + resource: batch-job + - operation: create-security-group + resource: batch-sg + - operation: run-task + +- task_id: 107 + description: > + Create an Athena analytics workspace. Create an S3 bucket + 'query-results' for Athena output, create an S3 bucket + 'analytics-data' for source data, create a Glue database + 'web-analytics', create an IAM policy for Athena access, and + create an Athena workgroup 'analytics-team' configured to use + the results bucket. + success_criteria: + services: + - s3 + - glue + - iam + - athena + steps: + - operation: create-bucket + resource: query-results + - operation: create-bucket + resource: analytics-data + - operation: create-database + resource: web-analytics + - operation: create-policy + - operation: create-work-group + resource: analytics-team + +- task_id: 108 + description: > + Build a CloudFormation-managed Lambda stack with artifact storage. + Create an S3 bucket 'lambda-artifacts' for deployment packages, + upload a Lambda zip package to the bucket, create an IAM role + 'cfn-lambda-role' for CloudFormation, create an IAM role + 'lambda-exec-role' for the Lambda function, and create a + CloudFormation stack 'lambda-stack' referencing the S3 artifact. + success_criteria: + services: + - s3 + - iam + - cloudformation + steps: + - operation: create-bucket + resource: lambda-artifacts + - operation: put-object + - operation: create-role + resource: cfn-lambda-role + - operation: create-role + resource: lambda-exec-role + - operation: create-stack + resource: lambda-stack diff --git a/server/services/tasks/beginner.yaml b/server/services/tasks/beginner.yaml index 8c8dee4904574bf89a63f62139537d9ec7ddc73d..7040c08ee5cdb4039fbfc81607b72c42bd612a4f 100644 --- a/server/services/tasks/beginner.yaml +++ b/server/services/tasks/beginner.yaml @@ -42,3 +42,183 @@ resource_exists: service: lambda name: hello-world + +- task_id: 46 + description: Create an IAM role named 'lambda-exec-role' with an assume role policy that allows the Lambda service to assume it. + success_criteria: + command_contains: iam + operation: create-role + resource_exists: + service: iam + name: lambda-exec-role + +- task_id: 47 + description: Create a secret in Secrets Manager named 'db-credentials' with the value '{"username":"admin","password":"secret123"}'. + success_criteria: + command_contains: secretsmanager + operation: create-secret + resource_exists: + service: secretsmanager + name: db-credentials + +- task_id: 48 + description: Create an ECS cluster named 'web-cluster'. + success_criteria: + command_contains: ecs + operation: create-cluster + resource_exists: + service: ecs + name: web-cluster + +- task_id: 49 + description: Create an RDS DB instance named 'app-database' with engine 'mysql', instance class 'db.t3.micro', master username 'admin', and master password 'Password123'. + success_criteria: + command_contains: rds + operation: create-db-instance + resource_exists: + service: rds + name: app-database + +- task_id: 50 + description: Create an ElastiCache cluster named 'session-cache' with engine 'redis' and cache node type 'cache.t3.micro'. + success_criteria: + command_contains: elasticache + operation: create-cache-cluster + resource_exists: + service: elasticache + name: session-cache + +- task_id: 51 + description: Create a Route53 hosted zone for the domain 'example.internal'. + success_criteria: + command_contains: route53 + operation: create-hosted-zone + resource_exists: + service: route53 + name: example.internal + +- task_id: 52 + description: Create an Application Load Balancer named 'web-alb' with subnets 'subnet-00000001' and 'subnet-00000002'. + success_criteria: + command_contains: elbv2 + operation: create-load-balancer + resource_exists: + service: elbv2 + name: web-alb + +- task_id: 53 + description: Create an EBS volume of 20 GiB in availability zone 'us-east-1a'. + success_criteria: + command_contains: ec2 + operation: create-volume + resource_exists: + service: ebs + name: us-east-1a-volume + +- task_id: 54 + description: Create an EFS file system with a creation token of 'shared-storage'. + success_criteria: + command_contains: efs + operation: create-file-system + resource_exists: + service: efs + name: shared-storage + +- task_id: 55 + description: Create a Cognito user pool named 'app-users'. + success_criteria: + command_contains: cognito-idp + operation: create-user-pool + resource_exists: + service: cognito-idp + name: app-users + +- task_id: 56 + description: Create an SSM parameter named '/config/app/database-url' of type 'String' with value 'mysql://localhost:3306/mydb'. + success_criteria: + command_contains: ssm + operation: put-parameter + resource_exists: + service: ssm + name: /config/app/database-url + +- task_id: 57 + description: Create an EventBridge rule named 'daily-cleanup' with a schedule expression of 'rate(1 day)'. + success_criteria: + command_contains: events + operation: put-rule + resource_exists: + service: events + name: daily-cleanup + +- task_id: 58 + description: Create a CloudFormation stack named 'vpc-stack' using the template URL 'https://s3.amazonaws.com/templates/vpc.yaml'. + success_criteria: + command_contains: cloudformation + operation: create-stack + resource_exists: + service: cloudformation + name: vpc-stack + +- task_id: 59 + description: Create an API Gateway REST API named 'orders-api'. + success_criteria: + command_contains: apigateway + operation: create-rest-api + resource_exists: + service: apigateway + name: orders-api + +- task_id: 60 + description: Create an API Gateway V2 HTTP API named 'payments-api' with protocol type 'HTTP'. + success_criteria: + command_contains: apigatewayv2 + operation: create-api + resource_exists: + service: apigatewayv2 + name: payments-api + +- task_id: 61 + description: Create a Glue database named 'analytics-db' in the default Glue catalog. + success_criteria: + command_contains: glue + operation: create-database + resource_exists: + service: glue + name: analytics-db + +- task_id: 62 + description: Create a Kinesis Firehose delivery stream named 'log-stream' with a direct put source. + success_criteria: + command_contains: firehose + operation: create-delivery-stream + resource_exists: + service: firehose + name: log-stream + +- task_id: 63 + description: Create an IAM policy named 's3-read-policy' that allows s3:GetObject on all resources. + success_criteria: + command_contains: iam + operation: create-policy + resource_exists: + service: iam + name: s3-read-policy + +- task_id: 64 + description: Create an IAM user named 'deploy-bot'. + success_criteria: + command_contains: iam + operation: create-user + resource_exists: + service: iam + name: deploy-bot + +- task_id: 65 + description: Create a Lambda function named 'data-processor' using the python3.12 runtime with handler 'index.handler' and role 'arn:aws:iam::000000000000:role/lambda-exec-role', using --zip-file fileb:///tmp/dummy.zip. + success_criteria: + command_contains: lambda + operation: create-function + resource_exists: + service: lambda + name: data-processor diff --git a/server/services/tasks/drift.yaml b/server/services/tasks/drift.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de832013b98a1d6ade6fcad1e971863e4d14583b --- /dev/null +++ b/server/services/tasks/drift.yaml @@ -0,0 +1,546 @@ +# Configuration Drift Detection Tasks (Expert Tier) +# +# Each task provisions correct infrastructure via setup_commands, then the +# DriftEngine randomly applies a subset of possible_drifts. The agent must +# audit the environment, discover which resources drifted, and fix only those. + +- task_id: 24 + description: > + The following infrastructure should exist: S3 bucket 'config-store' with + versioning enabled, a lifecycle rule named 'expire-old' that expires + non-current object versions after 90 days, and server-side encryption + using AES256. DynamoDB table 'sessions' with provisioned throughput of + 100 RCU and 100 WCU. Some resources may have drifted from the desired + specification. Audit the current state and fix any configuration that + does not match. + desired_state_spec: > + S3 bucket 'config-store': versioning=Enabled, lifecycle rule 'expire-old' + expiring non-current versions after 90 days, SSE with AES256. + DynamoDB table 'sessions': 100 RCU, 100 WCU. + setup_commands: + - aws s3api create-bucket --bucket config-store + - >- + aws s3api put-bucket-versioning --bucket config-store + --versioning-configuration Status=Enabled + - >- + aws s3api put-bucket-lifecycle-configuration --bucket config-store + --lifecycle-configuration '{"Rules":[{"ID":"expire-old","Status":"Enabled","NoncurrentVersionExpiration":{"NoncurrentDays":90},"Filter":{"Prefix":""}}]}' + - >- + aws s3api put-bucket-encryption --bucket config-store + --server-side-encryption-configuration '{"Rules":[{"ApplyServerSideEncryptionByDefault":{"SSEAlgorithm":"AES256"}}]}' + - >- + aws dynamodb create-table --table-name sessions + --attribute-definitions AttributeName=id,AttributeType=S + --key-schema AttributeName=id,KeyType=HASH + --provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=100 + possible_drifts: + - command: >- + aws s3api put-bucket-versioning --bucket config-store + --versioning-configuration Status=Suspended + description: Versioning disabled on 'config-store' + - command: >- + aws s3api delete-bucket-lifecycle --bucket config-store + description: Lifecycle rule removed from 'config-store' + - command: >- + aws s3api delete-bucket-encryption --bucket config-store + description: Encryption removed from 'config-store' + - command: >- + aws dynamodb update-table --table-name sessions + --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=100 + description: DynamoDB RCU reduced to 5 + - command: >- + aws dynamodb update-table --table-name sessions + --provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=5 + description: DynamoDB WCU reduced to 5 + success_criteria: + services: + - s3 + - dynamodb + state_checks: + - command: aws s3api get-bucket-versioning --bucket config-store + output_contains: "Enabled" + - command: aws s3api get-bucket-lifecycle-configuration --bucket config-store + output_contains: "expire-old" + - command: aws s3api get-bucket-encryption --bucket config-store + output_contains: "AES256" + - command: aws dynamodb describe-table --table-name sessions + json_path: "$.Table.ProvisionedThroughput.ReadCapacityUnits" + expected: 100 + - command: aws dynamodb describe-table --table-name sessions + json_path: "$.Table.ProvisionedThroughput.WriteCapacityUnits" + expected: 100 + +- task_id: 25 + description: > + The following infrastructure should exist: SNS topic 'ops-alerts' with + an SQS queue 'ops-inbox' subscribed to it. IAM role 'ops-automation' + with the AmazonSNSFullAccess and AmazonSQSFullAccess policies attached. + Lambda function 'alert-handler' using the 'ops-automation' role. Some + resources may have drifted. Audit and fix. + desired_state_spec: > + SNS topic 'ops-alerts' with SQS subscription 'ops-inbox'. + IAM role 'ops-automation' with AmazonSNSFullAccess and AmazonSQSFullAccess. + Lambda 'alert-handler' using role 'ops-automation'. + setup_commands: + - aws sns create-topic --name ops-alerts + - aws sqs create-queue --queue-name ops-inbox + - >- + aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:ops-alerts + --protocol sqs + --notification-endpoint arn:aws:sqs:us-east-1:000000000000:ops-inbox + - >- + aws iam create-role --role-name ops-automation + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws iam attach-role-policy --role-name ops-automation + --policy-arn arn:aws:iam::aws:policy/AmazonSNSFullAccess + - >- + aws iam attach-role-policy --role-name ops-automation + --policy-arn arn:aws:iam::aws:policy/AmazonSQSFullAccess + - >- + aws lambda create-function --function-name alert-handler + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/ops-automation + --code S3Bucket=dummy,S3Key=dummy.zip + possible_drifts: + - command: >- + aws iam detach-role-policy --role-name ops-automation + --policy-arn arn:aws:iam::aws:policy/AmazonSNSFullAccess + description: SNS policy detached from 'ops-automation' + - command: >- + aws iam detach-role-policy --role-name ops-automation + --policy-arn arn:aws:iam::aws:policy/AmazonSQSFullAccess + description: SQS policy detached from 'ops-automation' + - command: aws lambda delete-function --function-name alert-handler + description: Lambda 'alert-handler' deleted + success_criteria: + services: + - sns + - sqs + - iam + - lambda + state_checks: + - command: aws sns list-subscriptions-by-topic --topic-arn arn:aws:sns:us-east-1:000000000000:ops-alerts + output_contains: "ops-inbox" + - command: aws iam list-attached-role-policies --role-name ops-automation + output_contains: "SNSFullAccess" + - command: aws iam list-attached-role-policies --role-name ops-automation + output_contains: "SQSFullAccess" + - command: aws lambda get-function --function-name alert-handler + output_contains: "alert-handler" + +- task_id: 128 + description: > + The following infrastructure should exist: IAM role 'api-executor' with + AmazonDynamoDBFullAccess and AWSLambdaBasicExecutionRole policies attached. + Lambda function 'api-handler' with 256MB memory, 30s timeout, runtime + python3.12, and environment variable APP_ENV=production. Some resources + may have drifted. Audit the current state and fix any configuration that + does not match. + desired_state_spec: > + IAM role 'api-executor': AmazonDynamoDBFullAccess and AWSLambdaBasicExecutionRole attached. + Lambda 'api-handler': 256MB memory, 30s timeout, python3.12, env APP_ENV=production. + setup_commands: + - >- + aws iam create-role --role-name api-executor + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws iam attach-role-policy --role-name api-executor + --policy-arn arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess + - >- + aws iam attach-role-policy --role-name api-executor + --policy-arn arn:aws:iam::aws:policy/AWSLambdaBasicExecutionRole + - >- + aws lambda create-function --function-name api-handler + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/api-executor + --code S3Bucket=dummy,S3Key=dummy.zip + --memory-size 256 --timeout 30 + --environment '{"Variables":{"APP_ENV":"production"}}' + possible_drifts: + - command: >- + aws iam detach-role-policy --role-name api-executor + --policy-arn arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess + description: DynamoDB policy detached from 'api-executor' + - command: >- + aws lambda update-function-configuration --function-name api-handler + --memory-size 128 + description: Lambda memory changed from 256MB to 128MB + - command: >- + aws lambda update-function-configuration --function-name api-handler + --timeout 3 + description: Lambda timeout changed from 30s to 3s + - command: >- + aws lambda update-function-configuration --function-name api-handler + --environment '{"Variables":{}}' + description: Environment variables removed from 'api-handler' + - command: >- + aws lambda update-function-configuration --function-name api-handler + --runtime python3.9 + description: Lambda runtime changed from python3.12 to python3.9 + success_criteria: + services: + - iam + - lambda + state_checks: + - command: aws iam list-attached-role-policies --role-name api-executor + output_contains: "DynamoDBFullAccess" + - command: aws iam list-attached-role-policies --role-name api-executor + output_contains: "LambdaBasicExecutionRole" + - command: aws lambda get-function-configuration --function-name api-handler + json_path: "$.MemorySize" + expected: 256 + - command: aws lambda get-function-configuration --function-name api-handler + json_path: "$.Timeout" + expected: 30 + - command: aws lambda get-function-configuration --function-name api-handler + json_path: "$.Runtime" + expected: "python3.12" + - command: aws lambda get-function-configuration --function-name api-handler + output_contains: "APP_ENV" + +- task_id: 129 + description: > + The following infrastructure should exist: RDS instance 'app-db' with + instance class db.t3.micro, engine mysql, multi-AZ enabled, and 7-day + backup retention. Secrets Manager secret 'app-db/credentials' with + description 'Database credentials for app-db'. Some resources may have + drifted. Audit the current state and fix any configuration that does + not match. + desired_state_spec: > + RDS 'app-db': db.t3.micro, mysql, multi-AZ enabled, 7-day backup retention. + Secret 'app-db/credentials': description 'Database credentials for app-db'. + setup_commands: + - >- + aws rds create-db-instance --db-instance-identifier app-db + --db-instance-class db.t3.micro --engine mysql + --master-username admin --master-user-password SecurePass123 + --multi-az --backup-retention-period 7 + - >- + aws secretsmanager create-secret --name app-db/credentials + --description 'Database credentials for app-db' + --secret-string '{"username":"admin","password":"SecurePass123"}' + possible_drifts: + - command: >- + aws rds modify-db-instance --db-instance-identifier app-db + --no-multi-az --apply-immediately + description: Multi-AZ disabled on 'app-db' + - command: >- + aws rds modify-db-instance --db-instance-identifier app-db + --backup-retention-period 1 --apply-immediately + description: Backup retention changed from 7 days to 1 day + - command: >- + aws rds modify-db-instance --db-instance-identifier app-db + --db-instance-class db.t3.small --apply-immediately + description: Instance class changed from db.t3.micro to db.t3.small + - command: >- + aws secretsmanager update-secret --secret-id app-db/credentials + --description '' + description: Description removed from secret 'app-db/credentials' + success_criteria: + services: + - rds + - secretsmanager + state_checks: + - command: aws rds describe-db-instances --db-instance-identifier app-db + json_path: "$.DBInstances[0].MultiAZ" + expected: true + - command: aws rds describe-db-instances --db-instance-identifier app-db + json_path: "$.DBInstances[0].BackupRetentionPeriod" + expected: 7 + - command: aws rds describe-db-instances --db-instance-identifier app-db + json_path: "$.DBInstances[0].DBInstanceClass" + expected: "db.t3.micro" + - command: aws secretsmanager describe-secret --secret-id app-db/credentials + output_contains: "Database credentials for app-db" + +- task_id: 131 + description: > + The following infrastructure should exist: ECS cluster 'web-cluster', + task definition 'web-task' (family web-task, container 'app' using + nginx:latest on port 80), ECS service 'web-service' with desired count 3. + IAM role 'ecs-task-role' with AmazonS3ReadOnlyAccess attached. Some + resources may have drifted. Audit the current state and fix any + configuration that does not match. + desired_state_spec: > + ECS cluster 'web-cluster', task definition 'web-task' (nginx:latest, port 80), + service 'web-service' desired count 3. + IAM role 'ecs-task-role': AmazonS3ReadOnlyAccess attached. + setup_commands: + - aws ecs create-cluster --cluster-name web-cluster + - >- + aws iam create-role --role-name ecs-task-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"ecs-tasks.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws iam attach-role-policy --role-name ecs-task-role + --policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess + - >- + aws ecs register-task-definition --family web-task + --container-definitions '[{"name":"app","image":"nginx:latest","portMappings":[{"containerPort":80}],"memory":256}]' + --task-role-arn arn:aws:iam::000000000000:role/ecs-task-role + - >- + aws ecs create-service --cluster web-cluster + --service-name web-service --task-definition web-task + --desired-count 3 + possible_drifts: + - command: >- + aws ecs update-service --cluster web-cluster + --service web-service --desired-count 0 + description: Service desired count changed from 3 to 0 + - command: >- + aws iam detach-role-policy --role-name ecs-task-role + --policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess + description: S3ReadOnlyAccess policy detached from 'ecs-task-role' + - command: >- + aws ecs update-service --cluster web-cluster + --service web-service --task-definition web-task + --desired-count 1 + description: Service desired count changed from 3 to 1 + success_criteria: + services: + - ecs + - iam + state_checks: + - command: aws ecs describe-services --cluster web-cluster --services web-service + json_path: "$.services[0].desiredCount" + expected: 3 + - command: aws iam list-attached-role-policies --role-name ecs-task-role + output_contains: "S3ReadOnlyAccess" + - command: aws iam get-role --role-name ecs-task-role + output_contains: "ecs-task-role" + - command: aws ecs describe-clusters --clusters web-cluster + output_contains: "web-cluster" + +- task_id: 133 + description: > + The following infrastructure should exist: SSM parameter '/app/db-host' + (type String, value 'db.example.com'), SSM parameter '/app/db-port' + (type String, value '5432'). Lambda function 'config-reader' with 128MB + memory and 10s timeout. Some resources may have drifted. Audit the + current state and fix any configuration that does not match. + desired_state_spec: > + SSM '/app/db-host': String, 'db.example.com'. + SSM '/app/db-port': String, '5432'. + Lambda 'config-reader': 128MB memory, 10s timeout. + setup_commands: + - >- + aws ssm put-parameter --name /app/db-host + --type String --value db.example.com + - >- + aws ssm put-parameter --name /app/db-port + --type String --value 5432 + - >- + aws iam create-role --role-name config-reader-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws lambda create-function --function-name config-reader + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/config-reader-role + --code S3Bucket=dummy,S3Key=dummy.zip + --memory-size 128 --timeout 10 + possible_drifts: + - command: >- + aws ssm put-parameter --name /app/db-host + --type String --value localhost --overwrite + description: SSM '/app/db-host' value changed to 'localhost' + - command: >- + aws ssm put-parameter --name /app/db-port + --type String --value 3306 --overwrite + description: SSM '/app/db-port' value changed to '3306' + - command: >- + aws lambda update-function-configuration --function-name config-reader + --memory-size 512 + description: Lambda memory changed from 128MB to 512MB + - command: >- + aws lambda update-function-configuration --function-name config-reader + --timeout 60 + description: Lambda timeout changed from 10s to 60s + - command: aws ssm delete-parameter --name /app/db-port + description: SSM parameter '/app/db-port' deleted + success_criteria: + services: + - ssm + - lambda + state_checks: + - command: aws ssm get-parameter --name /app/db-host + output_contains: "db.example.com" + - command: aws ssm get-parameter --name /app/db-port + output_contains: "5432" + - command: aws lambda get-function-configuration --function-name config-reader + json_path: "$.MemorySize" + expected: 128 + - command: aws lambda get-function-configuration --function-name config-reader + json_path: "$.Timeout" + expected: 10 + +- task_id: 134 + description: > + The following infrastructure should exist: EventBridge rule + 'nightly-cleanup' with schedule expression 'rate(1 day)' in enabled + state, targeting Lambda function 'cleanup-handler'. Lambda + 'cleanup-handler' with 256MB memory and 300s timeout. Some resources + may have drifted. Audit the current state and fix any configuration + that does not match. + desired_state_spec: > + EventBridge rule 'nightly-cleanup': schedule 'rate(1 day)', ENABLED. + Lambda 'cleanup-handler': 256MB memory, 300s timeout, target of rule. + setup_commands: + - >- + aws iam create-role --role-name cleanup-handler-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws lambda create-function --function-name cleanup-handler + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/cleanup-handler-role + --code S3Bucket=dummy,S3Key=dummy.zip + --memory-size 256 --timeout 300 + - >- + aws events put-rule --name nightly-cleanup + --schedule-expression 'rate(1 day)' --state ENABLED + - >- + aws events put-targets --rule nightly-cleanup + --targets '[{"Id":"cleanup-target","Arn":"arn:aws:lambda:us-east-1:000000000000:function:cleanup-handler"}]' + possible_drifts: + - command: aws events disable-rule --name nightly-cleanup + description: EventBridge rule 'nightly-cleanup' disabled + - command: >- + aws events put-rule --name nightly-cleanup + --schedule-expression 'rate(7 days)' --state ENABLED + description: Schedule changed from 'rate(1 day)' to 'rate(7 days)' + - command: >- + aws events remove-targets --rule nightly-cleanup + --ids cleanup-target + description: Lambda target removed from rule 'nightly-cleanup' + - command: >- + aws lambda update-function-configuration --function-name cleanup-handler + --timeout 30 + description: Lambda timeout changed from 300s to 30s + - command: >- + aws lambda update-function-configuration --function-name cleanup-handler + --memory-size 128 + description: Lambda memory changed from 256MB to 128MB + success_criteria: + services: + - events + - lambda + state_checks: + - command: aws events describe-rule --name nightly-cleanup + output_contains: "ENABLED" + - command: aws events describe-rule --name nightly-cleanup + output_contains: "rate(1 day)" + - command: aws events list-targets-by-rule --rule nightly-cleanup + output_contains: "cleanup-handler" + - command: aws lambda get-function-configuration --function-name cleanup-handler + json_path: "$.MemorySize" + expected: 256 + - command: aws lambda get-function-configuration --function-name cleanup-handler + json_path: "$.Timeout" + expected: 300 + +- task_id: 135 + description: > + The following infrastructure should exist: S3 bucket 'analytics-raw' with + versioning enabled and AES256 server-side encryption. Firehose delivery + stream 'clickstream-firehose' delivering to 'analytics-raw' with prefix + 'raw/' and buffer size of 5 MiB. Some resources may have drifted. Audit + the current state and fix any configuration that does not match. + desired_state_spec: > + S3 'analytics-raw': versioning=Enabled, SSE with AES256. + Firehose 'clickstream-firehose': destination analytics-raw, prefix 'raw/', + buffer 5 MiB. + setup_commands: + - aws s3api create-bucket --bucket analytics-raw + - >- + aws s3api put-bucket-versioning --bucket analytics-raw + --versioning-configuration Status=Enabled + - >- + aws s3api put-bucket-encryption --bucket analytics-raw + --server-side-encryption-configuration '{"Rules":[{"ApplyServerSideEncryptionByDefault":{"SSEAlgorithm":"AES256"}}]}' + - >- + aws firehose create-delivery-stream --delivery-stream-name clickstream-firehose + --s3-destination-configuration '{"RoleARN":"arn:aws:iam::000000000000:role/firehose-role","BucketARN":"arn:aws:s3:::analytics-raw","Prefix":"raw/","BufferingHints":{"SizeInMBs":5,"IntervalInSeconds":300}}' + possible_drifts: + - command: >- + aws s3api put-bucket-versioning --bucket analytics-raw + --versioning-configuration Status=Suspended + description: Versioning suspended on 'analytics-raw' + - command: aws s3api delete-bucket-encryption --bucket analytics-raw + description: Encryption removed from 'analytics-raw' + success_criteria: + services: + - firehose + - s3 + state_checks: + - command: aws s3api get-bucket-versioning --bucket analytics-raw + output_contains: "Enabled" + - command: aws s3api get-bucket-encryption --bucket analytics-raw + output_contains: "AES256" + - command: aws firehose describe-delivery-stream --delivery-stream-name clickstream-firehose + output_contains: "raw/" + - command: aws firehose describe-delivery-stream --delivery-stream-name clickstream-firehose + output_contains: "analytics-raw" +- task_id: 139 + description: > + The following infrastructure should exist: DynamoDB table 'users' with + provisioned throughput of 50 RCU and 50 WCU. DynamoDB table 'transactions' + with provisioned throughput of 100 RCU and 100 WCU, and a global secondary + index 'date-index' on the 'date' attribute provisioned at 100 RCU / 100 WCU. + Some resources may have drifted from the desired specification. Audit the + current state and fix any configuration that does not match. + desired_state_spec: > + DynamoDB 'users': 50 RCU, 50 WCU. + DynamoDB 'transactions': 100 RCU, 100 WCU, GSI 'date-index' at 100 RCU / 100 WCU. + setup_commands: + - >- + aws dynamodb create-table --table-name users + --attribute-definitions AttributeName=id,AttributeType=S + --key-schema AttributeName=id,KeyType=HASH + --provisioned-throughput ReadCapacityUnits=50,WriteCapacityUnits=50 + - >- + aws dynamodb create-table --table-name transactions + --attribute-definitions AttributeName=id,AttributeType=S AttributeName=date,AttributeType=S + --key-schema AttributeName=id,KeyType=HASH + --provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=100 + --global-secondary-indexes '[{"IndexName":"date-index","KeySchema":[{"AttributeName":"date","KeyType":"HASH"}],"Projection":{"ProjectionType":"ALL"},"ProvisionedThroughput":{"ReadCapacityUnits":100,"WriteCapacityUnits":100}}]' + possible_drifts: + - command: >- + aws dynamodb update-table --table-name users + --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=50 + description: Users table RCU reduced to 5 + - command: >- + aws dynamodb update-table --table-name users + --provisioned-throughput ReadCapacityUnits=50,WriteCapacityUnits=5 + description: Users table WCU reduced to 5 + - command: >- + aws dynamodb update-table --table-name transactions + --provisioned-throughput ReadCapacityUnits=10,WriteCapacityUnits=100 + description: Transactions table RCU reduced to 10 + - command: >- + aws dynamodb update-table --table-name transactions + --provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=10 + description: Transactions table WCU reduced to 10 + - command: >- + aws dynamodb update-table --table-name transactions + --global-secondary-index-updates '[{"Update":{"IndexName":"date-index","ProvisionedThroughput":{"ReadCapacityUnits":5,"WriteCapacityUnits":5}}}]' + description: GSI 'date-index' throughput reduced to 5 RCU / 5 WCU + success_criteria: + services: + - dynamodb + state_checks: + - command: aws dynamodb describe-table --table-name users + json_path: "$.Table.ProvisionedThroughput.ReadCapacityUnits" + expected: 50 + - command: aws dynamodb describe-table --table-name users + json_path: "$.Table.ProvisionedThroughput.WriteCapacityUnits" + expected: 50 + - command: aws dynamodb describe-table --table-name transactions + json_path: "$.Table.ProvisionedThroughput.ReadCapacityUnits" + expected: 100 + - command: aws dynamodb describe-table --table-name transactions + json_path: "$.Table.ProvisionedThroughput.WriteCapacityUnits" + expected: 100 + - command: aws dynamodb describe-table --table-name transactions + json_path: "$.Table.GlobalSecondaryIndexes[0].ProvisionedThroughput.ReadCapacityUnits" + expected: 100 + + diff --git a/server/services/tasks/expert.yaml b/server/services/tasks/expert.yaml index b09f1f4aa4da03ebad638e7ddf5a8c6ebba9f3dd..cbf3d8555a3c8cd560fa0f99b7e450b403a6d4e8 100644 --- a/server/services/tasks/expert.yaml +++ b/server/services/tasks/expert.yaml @@ -15,7 +15,7 @@ aws lambda create-function --function-name order-processor --runtime python3.12 --handler index.handler --role arn:aws:iam::000000000000:role/broken-lambda-role - --zip-file fileb:///tmp/dummy.zip + --code S3Bucket=dummy,S3Key=dummy.zip - aws sqs create-queue --queue-name incoming-orders success_criteria: services: @@ -41,9 +41,7 @@ expires non-current object versions after 30 days. setup_commands: - aws s3api create-bucket --bucket app-config-store - - >- - aws s3api put-object --bucket app-config-store - --key config/app.json --body /dev/null + - aws s3api put-object --bucket app-config-store --key config/app.json success_criteria: services: - s3 @@ -95,3 +93,686 @@ resource: ops-alert-inbox - operation: subscribe resource: ops-alerts + +- task_id: 21 + description: > + Security Audit: An S3 bucket 'public-assets' has an overly permissive + bucket policy that grants access to any principal ('*'). Review the + current policy, identify the vulnerability, and replace it with a + restrictive policy that only allows the 'app-role' IAM role to perform + s3:GetObject on the bucket's objects. + setup_commands: + - aws s3api create-bucket --bucket public-assets + - >- + aws s3api put-bucket-policy --bucket public-assets + --policy '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":"*","Action":"s3:*","Resource":["arn:aws:s3:::public-assets","arn:aws:s3:::public-assets/*"]}]}' + success_criteria: + services: + - s3 + state_checks: + - command: aws s3api get-bucket-policy --bucket public-assets --output json + output_contains: "app-role" + - command: aws s3api get-bucket-policy --bucket public-assets --output json + output_contains: "s3:GetObject" + steps: + - operation: get-bucket-policy + resource: public-assets + - operation: put-bucket-policy + resource: public-assets + +- task_id: 22 + description: > + Security Audit: An IAM role 'app-role' has an inline policy 'app-access' + with overly broad permissions (Action: '*', Resource: '*'). Replace the + policy with a least-privilege version that only allows 'dynamodb:GetItem' + and 'dynamodb:PutItem' on the 'users' table in us-east-1. + setup_commands: + - >- + aws iam create-role --role-name app-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws iam put-role-policy --role-name app-role + --policy-name app-access + --policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":"*","Resource":"*"}]}' + success_criteria: + services: + - iam + state_checks: + - command: >- + aws iam get-role-policy --role-name app-role + --policy-name app-access --output json + output_contains: "dynamodb:GetItem" + - command: >- + aws iam get-role-policy --role-name app-role + --policy-name app-access --output json + output_contains: "dynamodb:PutItem" + - command: >- + aws iam get-role-policy --role-name app-role + --policy-name app-access --output json + output_contains: "users" + steps: + - operation: get-role-policy + resource: app-role + - operation: put-role-policy + resource: app-role + +- task_id: 23 + description: > + Security Audit: A Lambda function 'data-processor' has a database + password stored as a plaintext environment variable (DB_PASSWORD=hunter2). + Create a secret in Secrets Manager named 'data-processor/db-password' + containing the password, update the Lambda configuration to add a + SECRET_ARN environment variable pointing to the secret, and remove the + plaintext DB_PASSWORD variable. + setup_commands: + - >- + aws iam create-role --role-name data-processor-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws lambda create-function --function-name data-processor + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/data-processor-role + --code S3Bucket=dummy,S3Key=dummy.zip + --environment Variables={DB_PASSWORD=hunter2} + success_criteria: + services: + - secretsmanager + - lambda + state_checks: + - command: >- + aws secretsmanager describe-secret + --secret-id data-processor/db-password + output_contains: "data-processor/db-password" + - command: >- + aws lambda get-function-configuration + --function-name data-processor --output json + output_contains: "SECRET_ARN" + steps: + - operation: create-secret + resource: data-processor/db-password + - operation: update-function-configuration + resource: data-processor + +- task_id: 109 + description: > + SRE Incident: A Lambda function 'payment-webhook' has a timeout of 3 + seconds, causing frequent timeouts when calling a slow downstream API. + The CloudWatch alarm 'payment-webhook-errors' that should monitor + invocation errors does not exist. Update the function timeout to 30 + seconds and create a CloudWatch alarm named 'payment-webhook-errors' + that triggers when the Errors metric exceeds 5 over a 60-second period. + setup_commands: + - >- + aws iam create-role --role-name payment-webhook-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws lambda create-function --function-name payment-webhook + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/payment-webhook-role + --code S3Bucket=dummy,S3Key=dummy.zip + --timeout 3 + success_criteria: + services: + - lambda + - cloudwatch + state_checks: + - command: aws lambda get-function-configuration --function-name payment-webhook + json_path: "$.Timeout" + expected: 30 + - command: aws cloudwatch describe-alarms --alarm-names payment-webhook-errors + output_contains: "payment-webhook-errors" + - command: aws cloudwatch describe-alarms --alarm-names payment-webhook-errors + output_contains: "Errors" + steps: + - operation: update-function-configuration + resource: payment-webhook + - operation: put-metric-alarm + resource: payment-webhook-errors + +- task_id: 110 + description: > + SRE Incident: An ECS service 'api-service' in cluster 'prod-cluster' has + its desired count set to 0 after an accidental scale-down. The task + definition 'api-task' exists but the service's IAM role 'ecs-service-role' + is missing the required ECS policy. Attach the AmazonECS_FullAccess policy + to the role and update the service desired count to 3. + setup_commands: + - aws ecs create-cluster --cluster-name prod-cluster + - >- + aws iam create-role --role-name ecs-service-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"ecs.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws ecs register-task-definition --family api-task + --container-definitions '[{"name":"api","image":"nginx:latest","memory":256,"cpu":128,"essential":true}]' + - >- + aws ecs create-service --cluster prod-cluster + --service-name api-service --task-definition api-task + --desired-count 0 + success_criteria: + services: + - ecs + - iam + state_checks: + - command: aws ecs describe-services --cluster prod-cluster --services api-service + json_path: "$.services[0].desiredCount" + expected: 3 + - command: aws iam list-attached-role-policies --role-name ecs-service-role + output_contains: "ECS" + steps: + - operation: attach-role-policy + resource: ecs-service-role + - operation: update-service + resource: api-service + +- task_id: 111 + description: > + SRE Incident: An RDS instance 'analytics-db' is in stopped state after + a maintenance window and needs to be started. Additionally, its security + group 'analytics-db-sg' only allows inbound access from 0.0.0.0/0 on + port 3306, which is a security risk. Create a new security group + 'analytics-db-sg-fixed' in VPC 'vpc-12345' that restricts MySQL access + to the private subnet CIDR 10.0.1.0/24 and modify the RDS instance + to use the new security group. + setup_commands: + - >- + aws ec2 create-security-group --group-name analytics-db-sg + --description "Overly permissive DB security group" + - >- + aws ec2 authorize-security-group-ingress --group-name analytics-db-sg + --protocol tcp --port 3306 --cidr 0.0.0.0/0 + - >- + aws rds create-db-instance --db-instance-identifier analytics-db + --db-instance-class db.t3.micro --engine mysql + --master-username admin --master-user-password temppass123 + - aws rds stop-db-instance --db-instance-identifier analytics-db + success_criteria: + services: + - rds + - ec2 + state_checks: + - command: aws rds describe-db-instances --db-instance-identifier analytics-db + output_contains: "available" + - command: aws ec2 describe-security-groups --group-names analytics-db-sg-fixed + output_contains: "10.0.1.0/24" + steps: + - operation: start-db-instance + resource: analytics-db + - operation: create-security-group + resource: analytics-db-sg-fixed + - operation: authorize-security-group-ingress + resource: analytics-db-sg-fixed + - operation: modify-db-instance + resource: analytics-db + +- task_id: 113 + description: > + SRE Incident: An SQS queue 'order-processing' has messages accumulating + in its dead-letter queue 'order-processing-dlq'. Investigation shows the + visibility timeout on the main queue is only 5 seconds, causing messages + to be re-delivered before processing completes. Update the visibility + timeout on 'order-processing' to 120 seconds and set the redrive policy + to allow a maximum receive count of 5 before sending to the DLQ. + setup_commands: + - aws sqs create-queue --queue-name order-processing-dlq + - >- + aws sqs create-queue --queue-name order-processing + --attributes VisibilityTimeout=5 + success_criteria: + services: + - sqs + state_checks: + - command: >- + aws sqs get-queue-attributes + --queue-url http://localhost:4566/000000000000/order-processing + --attribute-names VisibilityTimeout + json_path: "$.Attributes.VisibilityTimeout" + expected: "120" + - command: >- + aws sqs get-queue-attributes + --queue-url http://localhost:4566/000000000000/order-processing + --attribute-names RedrivePolicy + output_contains: "order-processing-dlq" + - command: >- + aws sqs get-queue-attributes + --queue-url http://localhost:4566/000000000000/order-processing + --attribute-names RedrivePolicy + output_contains: "maxReceiveCount" + steps: + - operation: set-queue-attributes + resource: order-processing + +- task_id: 114 + description: > + SRE Incident: A Route53 hosted zone 'example.com' has an A record for + 'api.example.com' pointing to the old IP address '10.0.0.99'. The + application has been migrated to a new server at '10.0.1.50'. Update + the A record for 'api.example.com' to point to the new IP address + '10.0.1.50' with a TTL of 300 seconds. + setup_commands: + - aws route53 create-hosted-zone --name example.com --caller-reference ref-001 + - >- + aws route53 change-resource-record-sets --hosted-zone-id zone-001 + --change-batch '{"Changes":[{"Action":"CREATE","ResourceRecordSet":{"Name":"api.example.com","Type":"A","TTL":60,"ResourceRecords":[{"Value":"10.0.0.99"}]}}]}' + success_criteria: + services: + - route53 + state_checks: + - command: aws route53 list-resource-record-sets --hosted-zone-id zone-001 + output_contains: "10.0.1.50" + - command: aws route53 list-resource-record-sets --hosted-zone-id zone-001 + output_contains: "api.example.com" + steps: + - operation: change-resource-record-sets + resource: api.example.com + +- task_id: 115 + description: > + SRE Incident: An Application Load Balancer 'web-alb' has a target group + 'web-targets' with a health check misconfigured to use path '/healthz' + on port 8080, but the application serves health checks on path '/health' + on port 80. All targets are showing as unhealthy. Fix the health check + configuration on the target group to use the correct path '/health' and + port 80, with a healthy threshold of 2 and interval of 15 seconds. + setup_commands: + - >- + aws elbv2 create-load-balancer --name web-alb + --type application --subnets subnet-aaa subnet-bbb + - >- + aws elbv2 create-target-group --name web-targets + --protocol HTTP --port 80 --vpc-id vpc-12345 + --health-check-path /healthz --health-check-port 8080 + --health-check-interval-seconds 60 --healthy-threshold-count 5 + success_criteria: + services: + - elbv2 + state_checks: + - command: aws elbv2 describe-target-groups --names web-targets + output_contains: "/health" + - command: aws elbv2 describe-target-groups --names web-targets + json_path: "$.TargetGroups[0].HealthCheckPort" + expected: "80" + - command: aws elbv2 describe-target-groups --names web-targets + json_path: "$.TargetGroups[0].HealthyThresholdCount" + expected: 2 + steps: + - operation: modify-target-group + resource: web-targets + +- task_id: 116 + description: > + Security Audit: A Lambda function 'public-api-handler' has a resource + policy that allows any AWS account to invoke it (Principal: '*'). This + is a critical security vulnerability. Remove the overly permissive + policy statement 'open-access' and add a new statement 'restricted-access' + that only allows invocation from the API Gateway service principal + 'apigateway.amazonaws.com' with a source ARN condition. + setup_commands: + - >- + aws iam create-role --role-name public-api-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws lambda create-function --function-name public-api-handler + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/public-api-role + --code S3Bucket=dummy,S3Key=dummy.zip + - >- + aws lambda add-permission --function-name public-api-handler + --statement-id open-access --action lambda:InvokeFunction + --principal '*' + success_criteria: + services: + - lambda + - iam + state_checks: + - command: aws lambda get-policy --function-name public-api-handler + output_contains: "restricted-access" + - command: aws lambda get-policy --function-name public-api-handler + output_contains: "apigateway.amazonaws.com" + steps: + - operation: remove-permission + resource: public-api-handler + - operation: add-permission + resource: public-api-handler + +- task_id: 117 + description: > + Security Audit: An S3 bucket 'data-lake-raw' contains sensitive customer + data but has no server-side encryption configured. Enable default + server-side encryption on the bucket using AES256 (SSE-S3). Also add + a bucket policy that denies any PutObject request that does not include + server-side encryption headers. + setup_commands: + - aws s3api create-bucket --bucket data-lake-raw + - aws s3api put-object --bucket data-lake-raw --key customers/data.csv + success_criteria: + services: + - s3 + state_checks: + - command: aws s3api get-bucket-encryption --bucket data-lake-raw + output_contains: "AES256" + - command: aws s3api get-bucket-policy --bucket data-lake-raw --output json + output_contains: "s3:x-amz-server-side-encryption" + - command: aws s3api get-bucket-policy --bucket data-lake-raw --output json + output_contains: "Deny" + steps: + - operation: put-bucket-encryption + resource: data-lake-raw + - operation: put-bucket-policy + resource: data-lake-raw + +- task_id: 118 + description: > + Security Audit: A DynamoDB table 'financial-transactions' stores + sensitive payment data but does not have point-in-time recovery (PITR) + enabled. Additionally, the table lacks a TTL configuration for + automatic cleanup of old records. Enable continuous backups (PITR) on + the table and configure TTL on the 'expiry_timestamp' attribute. + setup_commands: + - >- + aws dynamodb create-table --table-name financial-transactions + --attribute-definitions AttributeName=tx_id,AttributeType=S + --key-schema AttributeName=tx_id,KeyType=HASH + --provisioned-throughput ReadCapacityUnits=10,WriteCapacityUnits=10 + success_criteria: + services: + - dynamodb + state_checks: + - command: >- + aws dynamodb describe-continuous-backups + --table-name financial-transactions + output_contains: "ENABLED" + - command: >- + aws dynamodb describe-time-to-live + --table-name financial-transactions + output_contains: "expiry_timestamp" + steps: + - operation: update-continuous-backups + resource: financial-transactions + - operation: update-time-to-live + resource: financial-transactions + +- task_id: 119 + description: > + Security Audit: An SSM parameter '/app/database/password' stores a + database password as a plain String type instead of SecureString. Create + a new SecureString parameter '/app/database/password-secure' with the + same value 'SuperSecret123', then create a Secrets Manager secret + 'app/database-credentials' to provide rotation capability for the + credential. + setup_commands: + - >- + aws ssm put-parameter --name /app/database/password + --value SuperSecret123 --type String + success_criteria: + services: + - ssm + - secretsmanager + state_checks: + - command: aws ssm get-parameter --name /app/database/password-secure + output_contains: "SecureString" + - command: >- + aws secretsmanager describe-secret + --secret-id app/database-credentials + output_contains: "app/database-credentials" + steps: + - operation: put-parameter + resource: /app/database/password-secure + - operation: create-secret + resource: app/database-credentials + +- task_id: 120 + description: > + Security Audit: An IAM user 'deploy-bot' has an overly permissive + inline policy 'admin-access' granting full admin rights and an + attached managed policy 'arn:aws:iam::aws:policy/IAMFullAccess' that + is unnecessary. Detach the managed policy, delete the overly broad + inline policy, and replace it with a policy named 'deploy-only' that + restricts permissions to 's3:PutObject' and 'codedeploy:*' on all + resources. + setup_commands: + - aws iam create-user --user-name deploy-bot + - >- + aws iam attach-user-policy --user-name deploy-bot + --policy-arn arn:aws:iam::aws:policy/IAMFullAccess + - >- + aws iam put-user-policy --user-name deploy-bot + --policy-name admin-access + --policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":"*","Resource":"*"}]}' + success_criteria: + services: + - iam + state_checks: + - command: aws iam get-user-policy --user-name deploy-bot --policy-name deploy-only + output_contains: "s3:PutObject" + - command: aws iam get-user-policy --user-name deploy-bot --policy-name deploy-only + output_contains: "codedeploy:*" + steps: + - operation: detach-user-policy + resource: deploy-bot + - operation: delete-user-policy + resource: deploy-bot + - operation: put-user-policy + resource: deploy-bot + +- task_id: 121 + description: > + SRE Incident: An EventBridge rule 'nightly-etl-trigger' that should + invoke a Lambda function 'etl-runner' every night at 2 AM UTC is + currently disabled and has no targets configured. The Lambda function + exists but the rule was never properly set up. Enable the rule, set + its schedule expression to 'cron(0 2 * * ? *)', and add the Lambda + function as its target. + setup_commands: + - >- + aws iam create-role --role-name etl-runner-role + --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' + - >- + aws lambda create-function --function-name etl-runner + --runtime python3.12 --handler index.handler + --role arn:aws:iam::000000000000:role/etl-runner-role + --code S3Bucket=dummy,S3Key=dummy.zip + - >- + aws events put-rule --name nightly-etl-trigger + --schedule-expression 'rate(1 day)' --state DISABLED + success_criteria: + services: + - events + - lambda + state_checks: + - command: aws events describe-rule --name nightly-etl-trigger + output_contains: "ENABLED" + - command: aws events describe-rule --name nightly-etl-trigger + output_contains: "cron(0 2 * * ? *)" + - command: aws events list-targets-by-rule --rule nightly-etl-trigger + output_contains: "etl-runner" + steps: + - operation: put-rule + resource: nightly-etl-trigger + - operation: put-targets + resource: nightly-etl-trigger + +- task_id: 122 + description: > + SRE Incident: A Kinesis Firehose delivery stream 'clickstream-delivery' + is writing to S3 bucket 'clickstream-archive' but using the wrong + prefix 'raw/' instead of the required 'clickstream/year=!{timestamp:yyyy}/month=!{timestamp:MM}/'. + The S3 bucket exists but the delivery stream prefix needs to be corrected. + Delete the misconfigured delivery stream and recreate it with the + correct S3 prefix configuration pointing to the 'clickstream-archive' bucket. + setup_commands: + - aws s3api create-bucket --bucket clickstream-archive + - >- + aws firehose create-delivery-stream + --delivery-stream-name clickstream-delivery + --s3-destination-configuration + RoleARN=arn:aws:iam::000000000000:role/firehose-role,BucketARN=arn:aws:s3:::clickstream-archive,Prefix=raw/ + success_criteria: + services: + - firehose + - s3 + state_checks: + - command: aws firehose describe-delivery-stream --delivery-stream-name clickstream-delivery + output_contains: "clickstream-archive" + - command: aws firehose describe-delivery-stream --delivery-stream-name clickstream-delivery + output_contains: "clickstream/year=" + steps: + - operation: delete-delivery-stream + resource: clickstream-delivery + - operation: create-delivery-stream + resource: clickstream-delivery + +- task_id: 123 + description: > + SRE Incident: An SNS topic 'order-notifications' is experiencing failed + deliveries to its SQS subscriber, and there is no dead-letter queue + configured on the subscription to capture failed messages. Create an + SQS queue 'order-notifications-dlq' to serve as the DLQ, then update + the existing subscription's redrive policy to send failed messages to + the DLQ. Also set the SQS queue's message retention period to 14 days + (1209600 seconds). + setup_commands: + - aws sns create-topic --name order-notifications + - aws sqs create-queue --queue-name order-subscriber + - >- + aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-notifications + --protocol sqs + --notification-endpoint arn:aws:sqs:us-east-1:000000000000:order-subscriber + success_criteria: + services: + - sns + - sqs + state_checks: + - command: >- + aws sqs get-queue-attributes + --queue-url http://localhost:4566/000000000000/order-notifications-dlq + --attribute-names MessageRetentionPeriod + json_path: "$.Attributes.MessageRetentionPeriod" + expected: "1209600" + - command: >- + aws sns list-subscriptions-by-topic + --topic-arn arn:aws:sns:us-east-1:000000000000:order-notifications + output_contains: "order-subscriber" + steps: + - operation: create-queue + resource: order-notifications-dlq + - operation: set-queue-attributes + resource: order-notifications-dlq + - operation: set-subscription-attributes + +- task_id: 124 + description: > + Security Audit: An EFS file system 'shared-data' was created without + encryption at rest. Since EFS encryption cannot be enabled after creation, + create a new encrypted EFS file system with the tag Name='shared-data-encrypted' + and creation token 'shared-data-encrypted'. Also create a mount target + security group 'efs-mount-sg' that only allows NFS traffic (port 2049) + from the application subnet CIDR 10.0.2.0/24. + setup_commands: + - >- + aws efs create-file-system --creation-token shared-data + --no-encrypted --tags Key=Name,Value=shared-data + success_criteria: + services: + - efs + - ec2 + state_checks: + - command: aws efs describe-file-systems + output_contains: "shared-data-encrypted" + - command: aws ec2 describe-security-groups --group-names efs-mount-sg + output_contains: "2049" + - command: aws ec2 describe-security-groups --group-names efs-mount-sg + output_contains: "10.0.2.0/24" + steps: + - operation: create-file-system + resource: shared-data-encrypted + - operation: create-security-group + resource: efs-mount-sg + - operation: authorize-security-group-ingress + resource: efs-mount-sg + +- task_id: 125 + description: > + SRE Incident: A Glue ETL job 'daily-transform' is failing because its + script location points to a non-existent S3 path + 's3://glue-scripts-bucket/old/transform.py'. The correct script has been + uploaded to 's3://glue-scripts-bucket/scripts/daily-transform.py'. Update + the Glue job to reference the correct script location. Also ensure the + S3 bucket 'glue-scripts-bucket' exists and contains an object at the + correct key path. + setup_commands: + - aws s3api create-bucket --bucket glue-scripts-bucket + - aws s3api put-object --bucket glue-scripts-bucket --key scripts/daily-transform.py + - >- + aws glue create-job --name daily-transform + --role arn:aws:iam::000000000000:role/glue-role + --command '{"Name":"glueetl","ScriptLocation":"s3://glue-scripts-bucket/old/transform.py","PythonVersion":"3"}' + success_criteria: + services: + - glue + - s3 + state_checks: + - command: aws glue get-job --job-name daily-transform + output_contains: "scripts/daily-transform.py" + - command: >- + aws s3api head-object --bucket glue-scripts-bucket + --key scripts/daily-transform.py + output_contains: "ContentLength" + steps: + - operation: update-job + resource: daily-transform + +- task_id: 126 + description: > + Security Audit: A Cognito user pool 'customer-auth' has a dangerously + weak password policy allowing minimum length of 6 with no requirements + for uppercase, numbers, or symbols. Update the password policy to + require a minimum length of 12, and require uppercase letters, lowercase + letters, numbers, and symbols. Also set the temporary password validity + to 1 day. + setup_commands: + - >- + aws cognito-idp create-user-pool --pool-name customer-auth + --policies '{"PasswordPolicy":{"MinimumLength":6,"RequireUppercase":false,"RequireLowercase":false,"RequireNumbers":false,"RequireSymbols":false,"TemporaryPasswordValidityDays":7}}' + success_criteria: + services: + - cognito-idp + state_checks: + - command: aws cognito-idp describe-user-pool --user-pool-id us-east-1_customer-auth + output_contains: "MinimumLength" + - command: aws cognito-idp describe-user-pool --user-pool-id us-east-1_customer-auth + output_contains: "RequireUppercase" + steps: + - operation: update-user-pool + resource: customer-auth + +- task_id: 127 + description: > + SRE Incident: A CloudFormation stack 'legacy-infra' is stuck in + ROLLBACK_COMPLETE state after a failed update. The stack contains + an S3 bucket 'legacy-data-bucket' with important data that must be + preserved. Create a new S3 bucket 'legacy-data-backup' to serve as + a backup destination, then delete the failed CloudFormation stack + to allow redeployment. Finally, create a new stack 'legacy-infra-v2' + using a template that provisions a DynamoDB table 'legacy-config'. + setup_commands: + - aws s3api create-bucket --bucket legacy-data-bucket + - aws s3api put-object --bucket legacy-data-bucket --key important/data.json + - >- + aws cloudformation create-stack --stack-name legacy-infra + --template-body '{"AWSTemplateFormatVersion":"2010-09-09","Resources":{"Bucket":{"Type":"AWS::S3::Bucket","Properties":{"BucketName":"legacy-data-bucket"}}}}' + success_criteria: + services: + - cloudformation + - s3 + state_checks: + - command: aws s3api head-bucket --bucket legacy-data-backup + output_contains: "" + - command: aws cloudformation describe-stacks --stack-name legacy-infra-v2 + output_contains: "legacy-infra-v2" + steps: + - operation: create-bucket + resource: legacy-data-backup + - operation: delete-stack + resource: legacy-infra + - operation: create-stack + resource: legacy-infra-v2 diff --git a/server/services/tasks/intermediate.yaml b/server/services/tasks/intermediate.yaml index f9710234cc15579fa8236f5704f1cde39879968b..49373f1dd5846f3b5488a9c491676f7e11cd522b 100644 --- a/server/services/tasks/intermediate.yaml +++ b/server/services/tasks/intermediate.yaml @@ -41,3 +41,259 @@ resource: lambda-exec-role - operation: attach-role-policy resource: lambda-exec-role + +- task_id: 66 + description: > + Create an S3 bucket named 'app-assets', then create an IAM policy named + 'app-assets-read-policy' that grants s3:GetObject access to the bucket. + success_criteria: + steps: + - operation: create-bucket + resource: app-assets + - operation: create-policy + resource: app-assets-read-policy + +- task_id: 67 + description: > + Create a DynamoDB table named 'user-sessions' with partition key 'session_id' (S), + then create an S3 bucket named 'session-exports' for exporting table data. + success_criteria: + steps: + - operation: create-table + resource: user-sessions + - operation: create-bucket + resource: session-exports + +- task_id: 68 + description: > + Create an IAM role named 'data-processor-role' with an assume-role policy + for Lambda, then create a Lambda function named 'data-processor' using that role + with runtime python3.12 and handler index.handler using --zip-file fileb:///tmp/dummy.zip. + success_criteria: + steps: + - operation: create-role + resource: data-processor-role + - operation: create-function + resource: data-processor + +- task_id: 69 + description: > + Create an SQS queue named 'order-events', then create an SNS topic named + 'order-notifications' and subscribe the queue to the topic using the sqs protocol. + success_criteria: + steps: + - operation: create-queue + resource: order-events + - operation: create-topic + resource: order-notifications + - operation: subscribe + resource: order-notifications + +- task_id: 70 + description: > + Create a secret in Secrets Manager named 'db-credentials' with a JSON value + containing username and password fields, then create an IAM role named + 'secret-reader-role' with an assume-role policy for Lambda. + success_criteria: + steps: + - operation: create-secret + resource: db-credentials + - operation: create-role + resource: secret-reader-role + +- task_id: 71 + description: > + Create an SSM parameter named '/app/config/db-host' with type String and + value 'db.internal.local', then create a Lambda function named 'config-loader' + with runtime python3.12 and handler index.handler using --zip-file fileb:///tmp/dummy.zip + and role arn:aws:iam::000000000000:role/lambda-exec-role. + success_criteria: + steps: + - operation: put-parameter + resource: /app/config/db-host + - operation: create-function + resource: config-loader + +- task_id: 72 + description: > + Create a Lambda function named 'scheduled-task' with runtime python3.12, + handler index.handler, role arn:aws:iam::000000000000:role/lambda-exec-role, + and --zip-file fileb:///tmp/dummy.zip. Then create an EventBridge rule named + 'every-five-minutes' with a schedule expression of rate(5 minutes) and add the + Lambda function as a target. + success_criteria: + steps: + - operation: create-function + resource: scheduled-task + - operation: put-rule + resource: every-five-minutes + - operation: put-targets + resource: every-five-minutes + +- task_id: 73 + description: > + Create an IAM role named 'ecs-task-role' with an assume-role policy for + ecs-tasks.amazonaws.com, then attach the AmazonS3ReadOnlyAccess managed + policy to it. + success_criteria: + steps: + - operation: create-role + resource: ecs-task-role + - operation: attach-role-policy + resource: ecs-task-role + +- task_id: 74 + description: > + Create a secret in Secrets Manager named 'rds-master-password' with a + JSON value containing host, port, username, and password fields. Then create + an RDS DB instance named 'app-database' with engine mysql, db-instance-class + db.t3.micro, and master credentials. + success_criteria: + steps: + - operation: create-secret + resource: rds-master-password + - operation: create-db-instance + resource: app-database + +- task_id: 75 + description: > + Create an Application Load Balancer target group named 'web-targets' with + protocol HTTP, port 80, and VPC. Then create a Route 53 hosted zone for + 'app.example.com'. + success_criteria: + steps: + - operation: create-target-group + resource: web-targets + - operation: create-hosted-zone + resource: app.example.com + +- task_id: 76 + description: > + Create a Cognito user pool named 'app-users', then create a user pool + client named 'web-app-client' in that user pool. + success_criteria: + steps: + - operation: create-user-pool + resource: app-users + - operation: create-user-pool-client + resource: web-app-client + +- task_id: 77 + description: > + Create an EFS file system with a creation token 'app-storage', then create + a security group named 'efs-mount-sg' with a description allowing NFS access + for mounting the file system. + success_criteria: + steps: + - operation: create-file-system + resource: app-storage + - operation: create-security-group + resource: efs-mount-sg + +- task_id: 78 + description: > + Create an EBS volume of 20 GiB in availability zone us-east-1a with type gp3, + then tag the volume with Name 'data-volume' using create-tags. + success_criteria: + steps: + - operation: create-volume + resource: data-volume + - operation: create-tags + resource: data-volume + +- task_id: 79 + description: > + Create an ElastiCache subnet group named 'cache-subnets' with a description + and subnet IDs, then create an ElastiCache cluster named 'session-cache' with + engine redis, cache-node-type cache.t3.micro, and num-cache-nodes 1. + success_criteria: + steps: + - operation: create-cache-subnet-group + resource: cache-subnets + - operation: create-cache-cluster + resource: session-cache + +- task_id: 80 + description: > + Create a Glue database named 'analytics-db' in the Glue Data Catalog, + then create a Glue crawler named 'raw-data-crawler' targeting an S3 path + with the analytics-db as the target database. + success_criteria: + steps: + - operation: create-database + resource: analytics-db + - operation: create-crawler + resource: raw-data-crawler + +- task_id: 81 + description: > + Create a CloudFormation stack named 'vpc-stack' using a template URL or + template body that defines a simple VPC resource, then describe the stack + to verify it was created successfully. + success_criteria: + steps: + - operation: create-stack + resource: vpc-stack + - operation: describe-stacks + resource: vpc-stack + +- task_id: 82 + description: > + Create an HTTP API in API Gateway V2 named 'products-api' with protocol-type + HTTP, then create a route with route-key 'GET /products' on that API. + success_criteria: + steps: + - operation: create-api + resource: products-api + - operation: create-route + resource: products-api + +- task_id: 83 + description: > + Create an S3 bucket named 'firehose-delivery', then create a Kinesis + Firehose delivery stream named 'event-stream' with an S3 destination + configuration pointing to the firehose-delivery bucket. + success_criteria: + steps: + - operation: create-bucket + resource: firehose-delivery + - operation: create-delivery-stream + resource: event-stream + +- task_id: 84 + description: > + Create an SQS queue named 'task-queue' with a visibility timeout of 60 + seconds, then send a message to the queue with a body containing a JSON + payload representing a processing task. + success_criteria: + steps: + - operation: create-queue + resource: task-queue + - operation: send-message + resource: task-queue + +- task_id: 85 + description: > + Create a DynamoDB table named 'products' with partition key 'product_id' (S) + and sort key 'category' (S), then put an item into the table with product_id + 'P001', category 'electronics', and name 'Wireless Mouse'. + success_criteria: + steps: + - operation: create-table + resource: products + - operation: put-item + resource: products + +- task_id: 86 + description: > + Create an IAM role named 'firehose-delivery-role' with an assume-role policy + for firehose.amazonaws.com, then create an IAM policy named 's3-write-policy' + granting s3:PutObject access and attach it to the role. + success_criteria: + steps: + - operation: create-role + resource: firehose-delivery-role + - operation: create-policy + resource: s3-write-policy + - operation: attach-role-policy + resource: firehose-delivery-role diff --git a/server/services/tasks/warmup.yaml b/server/services/tasks/warmup.yaml index 6f3f7206aa0f0833ed4312781f734b6eee45f830..fa1987c9630fb3e547f2a27346948a3c0c5ce3a6 100644 --- a/server/services/tasks/warmup.yaml +++ b/server/services/tasks/warmup.yaml @@ -33,3 +33,117 @@ success_criteria: command_contains: sns operation: list-topics + +- task_id: 27 + description: List all IAM users in the environment. + success_criteria: + command_contains: iam + operation: list-users + +- task_id: 28 + description: List all secrets stored in Secrets Manager. + success_criteria: + command_contains: secretsmanager + operation: list-secrets + +- task_id: 29 + description: List all ECS clusters in the environment. + success_criteria: + command_contains: ecs + operation: list-clusters + +- task_id: 30 + description: Describe all RDS database instances in the environment. + success_criteria: + command_contains: rds + operation: describe-db-instances + +- task_id: 31 + description: Describe all ElastiCache clusters in the environment. + success_criteria: + command_contains: elasticache + operation: describe-cache-clusters + +- task_id: 32 + description: List all Athena named queries in the environment. + success_criteria: + command_contains: athena + operation: list-named-queries + +- task_id: 33 + description: List all Glue databases in the data catalog. + success_criteria: + command_contains: glue + operation: get-databases + +- task_id: 34 + description: List all Kinesis Firehose delivery streams. + success_criteria: + command_contains: firehose + operation: list-delivery-streams + +- task_id: 35 + description: List all EMR clusters in the environment. + success_criteria: + command_contains: emr + operation: list-clusters + +- task_id: 36 + description: List all HTTP APIs in API Gateway V2. + success_criteria: + command_contains: apigatewayv2 + operation: get-apis + +- task_id: 37 + description: List all Route 53 hosted zones in the environment. + success_criteria: + command_contains: route53 + operation: list-hosted-zones + +- task_id: 38 + description: Describe all Application Load Balancers in the environment. + success_criteria: + command_contains: elbv2 + operation: describe-load-balancers + +- task_id: 39 + description: Describe all EBS volumes in the environment. + success_criteria: + command_contains: ec2 + operation: describe-volumes + +- task_id: 40 + description: Describe all EFS file systems in the environment. + success_criteria: + command_contains: efs + operation: describe-file-systems + +- task_id: 41 + description: List all Cognito user pools in the environment. + success_criteria: + command_contains: cognito-idp + operation: list-user-pools + +- task_id: 42 + description: Describe all SSM parameters in the environment. + success_criteria: + command_contains: ssm + operation: describe-parameters + +- task_id: 43 + description: List all EventBridge rules in the environment. + success_criteria: + command_contains: events + operation: list-rules + +- task_id: 44 + description: List all CloudFormation stacks in the environment. + success_criteria: + command_contains: cloudformation + operation: list-stacks + +- task_id: 45 + description: List all REST APIs in API Gateway. + success_criteria: + command_contains: apigateway + operation: get-rest-apis diff --git a/server/static/css/style.css b/server/static/css/style.css index f2f652d99baf2d85f84d03b9ff3dcc2eb647dcb5..9baf3c4ad400d76d3572f473aee967b06aff6c7e 100644 --- a/server/static/css/style.css +++ b/server/static/css/style.css @@ -7,7 +7,7 @@ --text-muted: #5f6368; --accent-color: #202124; --accent-hover: #000000; - --border-color: #dadce0; + --border-color: #9aa0a6; --grid-dot: #a8adb3; --nav-height: 72px; --blue-accent: #1a73e8; @@ -22,6 +22,10 @@ font-family: 'Google Sans', 'Roboto', system-ui, -apple-system, sans-serif; } +html { + font-size: 18px; +} + body { background-color: var(--bg-color); color: var(--text-main); @@ -57,8 +61,11 @@ nav { transform: translateX(-50%); width: 100%; height: var(--nav-height); - background: rgba(255, 255, 255, 0.95); - border-bottom: 1px solid var(--border-color); + background: rgba(255, 255, 255, 0.55); + backdrop-filter: blur(16px) saturate(180%); + -webkit-backdrop-filter: blur(16px) saturate(180%); + border-bottom: 1px solid rgba(0, 0, 0, 0.12); + box-shadow: 0 2px 12px rgba(0, 0, 0, 0.08); display: flex; align-items: center; justify-content: center; @@ -68,14 +75,16 @@ nav { nav.scrolled { top: 16px; - width: calc(100% - 32px); - max-width: 900px; + width: max-content; + max-width: calc(100% - 32px); height: 56px; border-radius: 28px; - border: 1px solid var(--border-color); - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); - background: rgba(255, 255, 255, 0.98); - padding: 0 1rem; + border: 1px solid rgba(0, 0, 0, 0.1); + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1), 0 1px 4px rgba(0, 0, 0, 0.06); + background: rgba(255, 255, 255, 0.5); + backdrop-filter: blur(16px) saturate(180%); + -webkit-backdrop-filter: blur(16px) saturate(180%); + padding: 0 1.5rem; } .nav-links { @@ -175,17 +184,14 @@ nav.scrolled { opacity: 1; } -.typing-cursor-inline::after { - content: '|'; +.typing-cursor { + display: inline-block; + width: 0; + overflow: visible; color: var(--blue-accent); font-weight: 300; - margin-left: 2px; - position: relative; - top: -2px; -} - -.typing-cursor-inline.blinking::after { animation: blink 1s step-start infinite; + pointer-events: none; } @keyframes blink { @@ -334,7 +340,7 @@ nav.scrolled { font-size: 1rem; letter-spacing: 0.5px; color: var(--text-main); - font-weight: 500; + font-weight: 600; text-transform: uppercase; } @@ -349,6 +355,7 @@ nav.scrolled { border-radius: 24px; padding: 2.5rem; margin-bottom: 2rem; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08); transition: box-shadow 0.2s ease, border-color 0.2s ease; position: relative; overflow: hidden; @@ -389,15 +396,26 @@ nav.scrolled { .card h3 { font-size: 1.4rem; - font-weight: 500; + font-weight: 600; margin-bottom: 0.5rem; } +.card p, +.card li { + font-weight: 450; +} + +.cta-card { + border: 1.5px solid var(--border-color); + box-shadow: 0 2px 12px rgba(0, 0, 0, 0.06); +} + .minimal-card { background: var(--surface-color); border: 1px solid var(--border-color); border-radius: 16px; padding: 1.5rem; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08); transition: all 0.2s ease; height: 100%; position: relative; @@ -452,6 +470,7 @@ nav.scrolled { border: 1px solid transparent; padding: 0.375rem 1rem; border-radius: 16px; + font-weight: 450; font-size: 1rem; color: var(--text-main); font-weight: 400; @@ -555,6 +574,50 @@ nav.scrolled { } /* ===== Code block ===== */ +.code-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 0.75rem; +} + +.copy-btn { + display: inline-flex; + align-items: center; + gap: 0.4rem; + background: #f1f3f4; + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 0.4rem 0.75rem; + font-size: 0.8rem; + font-family: 'Google Sans', 'Roboto', sans-serif; + color: var(--text-muted); + cursor: pointer; + transition: all 0.2s ease; +} + +.copy-btn:hover { + background: #e8eaed; + color: var(--text-main); +} + +.copy-btn.copied { + background: #e6f4ea; + border-color: #34a853; + color: #137333; +} + +/* Syntax highlighting */ +.code-block span { + font-family: inherit; + font-size: inherit; +} +.hl-keyword { color: #1a73e8; font-weight: 500; } +.hl-string { color: #137333; } +.hl-comment { color: #9aa0a6; font-style: italic; } +.hl-builtin { color: #7627bb; } +.hl-punct { color: #5f6368; } + .code-block { background: #f8f9fa; border: 1px solid var(--border-color); @@ -569,19 +632,11 @@ nav.scrolled { } /* ===== Playground ===== */ -.playground-grid { +.pg-row-2col { display: grid; - grid-template-columns: 300px 1fr; - gap: 2rem; - align-items: start; -} - -.playground-sidebar { - position: sticky; - top: calc(var(--nav-height) + 40px); - display: flex; - flex-direction: column; + grid-template-columns: 280px 1fr; gap: 1rem; + align-items: start; } .card-label { @@ -616,185 +671,713 @@ nav.scrolled { color: #9aa0a6; } -/* Task box */ -.task-box { - border-radius: 24px; - padding: 2rem; - border: 1px solid var(--border-color); - border-left: 4px solid var(--border-color); - min-height: 80px; +.cmd-input:disabled { + background: #f1f3f4; + color: #9aa0a6; + cursor: not-allowed; +} + +.btn-secondary:disabled { + background: #f1f3f4; + color: #9aa0a6; + cursor: not-allowed; + border-color: var(--border-color); + box-shadow: none; +} + +/* State box */ +.state-info { display: flex; flex-direction: column; - justify-content: center; - transition: border-color 0.2s ease; + gap: 0.75rem; } -.task-box.empty { - text-align: center; - color: var(--text-muted); +.state-row { + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.75rem; } -.task-box .task-badge { - display: inline-block; - padding: 0.15rem 0.9rem; - border-radius: 12px; - font-size: 0.75rem; - font-weight: 600; - text-transform: uppercase; - letter-spacing: 0.3px; - margin-right: 0.5rem; +.state-label { + font-size: 0.9rem; + color: var(--text-main); + font-weight: 500; } -.task-meta { - color: var(--text-muted); - font-size: 0.85rem; +/* Solution button */ +.btn-solution { + background: #fef7e0; + color: #b05a00; + padding: 0.6rem 1.5rem; + border-radius: 50px; + font-weight: 500; + font-size: 0.95rem; + transition: all 0.2s ease; + display: inline-flex; + align-items: center; + justify-content: center; + gap: 0.5rem; + border: 1px solid #f9ab00; + cursor: pointer; + font-family: 'Google Sans', 'Roboto', sans-serif; } -.task-desc { - color: var(--text-main); - font-size: 1rem; - line-height: 1.5; - margin-top: 0.75rem; +.btn-solution:hover { + background: #f9ab00; + color: #fff; } -/* Status bar */ -.status-bar { - font-size: 0.9rem; - padding: 0.75rem 1.25rem; +.btn-solution:disabled { + background: #f1f3f4; + border-color: var(--border-color); + color: #9aa0a6; + cursor: not-allowed; +} + +/* Solution panel */ +.solution-panel { border-radius: 16px; - background: #f8f9fa; - border: 1px solid var(--border-color); - border-left: 3px solid var(--border-color); - min-height: 40px; - color: var(--text-muted); + padding: 1.25rem; + background: #fffbeb; + border: 1px solid #f9ab00; + border-left: 4px solid #f9ab00; } -.status-bar.success { - border-left-color: #34a853; - background: #e6f4ea; - color: #137333; +.solution-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 0.75rem; } -.status-bar.error { - border-left-color: #ea4335; - background: #fce8e6; - color: #c5221f; +.solution-commands { + display: flex; + flex-direction: column; + gap: 0.5rem; } -.status-bar.info { - border-left-color: var(--blue-accent); - background: #e8f0fe; - color: #174ea6; +.solution-cmd { + display: flex; + align-items: flex-start; + gap: 0.75rem; + background: #fff; + border: 1px solid #f0e6c8; + border-radius: 10px; + padding: 0.75rem 1rem; } -/* Output box */ -.output-box { - background: #f8f9fa; - border: 1px solid var(--border-color); - border-radius: 16px; - padding: 1.25rem; +.solution-step { + min-width: 24px; + height: 24px; + border-radius: 50%; + background: #f9ab00; + color: #fff; + display: flex; + align-items: center; + justify-content: center; + font-size: 0.75rem; + font-weight: 600; + flex-shrink: 0; + margin-top: 0.1rem; +} + +.solution-cmd code { font-family: 'Google Sans Mono', 'SF Mono', 'Fira Code', 'Consolas', monospace; font-size: 0.85rem; - white-space: pre-wrap; - word-break: break-word; - min-height: 100px; - max-height: 280px; - overflow-y: auto; color: var(--text-main); - line-height: 1.6; + word-break: break-all; + line-height: 1.5; } -/* Log table */ -.log-table { - width: 100%; - border-collapse: collapse; - font-size: 0.9rem; +.solution-cmd.is-note { + background: #fff8e1; + border-style: dashed; } -.log-table th { - text-align: left; - color: var(--text-muted); - font-weight: 500; - padding: 0.75rem 1rem; - border-bottom: 1px solid var(--border-color); - font-size: 0.8rem; - text-transform: uppercase; - letter-spacing: 1px; +.solution-cmd.is-note code { + color: #b05a00; + font-style: italic; + font-family: 'Google Sans', 'Roboto', sans-serif; + font-size: 0.9rem; } -.log-table td { - padding: 0.6rem 1rem; - border-bottom: 1px solid #f1f3f4; - color: var(--text-main); +.solution-cmd.is-note .solution-step { + background: #e0a800; } -.log-table .cmd { - font-family: 'Google Sans Mono', 'SF Mono', 'Fira Code', 'Consolas', monospace; - font-size: 0.8rem; +.solution-commands-scroll { + max-height: 150px; + overflow-y: auto; } -.log-table .yes { - color: #34a853; +.state-value { + font-size: 0.95rem; font-weight: 500; + color: var(--text-main); } -.log-table .no { - color: #ea4335; - font-weight: 500; +.progress-bar-container { + flex: 1; + max-width: 120px; + height: 8px; + background: #f1f3f4; + border-radius: 4px; + overflow: hidden; } -.log-empty { - color: var(--text-muted); - text-align: center; - padding: 2rem; - font-size: 0.9rem; +.progress-bar-fill { + height: 100%; + background: var(--blue-accent); + border-radius: 4px; + transition: width 0.4s ease; } -/* Spinner */ -.spinner { - display: inline-block; - width: 14px; - height: 14px; - border: 2px solid var(--border-color); - border-top-color: var(--blue-accent); - border-radius: 50%; - animation: spin 0.6s linear infinite; - vertical-align: middle; - margin-right: 6px; +/* Infrastructure tiles */ +.infra-tiles { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(90px, 1fr)); + gap: 0.75rem; } -@keyframes spin { - to { - transform: rotate(360deg); - } +.infra-tile { + aspect-ratio: 1; + border: 1px solid var(--border-color); + border-radius: 14px; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 0.3rem; + cursor: pointer; + transition: all 0.2s ease; + position: relative; + padding: 0.5rem; } -/* Animations */ -.animate-up { - opacity: 0; - transform: translateY(30px); - transition: opacity 0.8s cubic-bezier(0.16, 1, 0.3, 1), transform 0.8s cubic-bezier(0.16, 1, 0.3, 1); +.infra-tile:hover { + border-color: var(--blue-accent); + box-shadow: 0 2px 8px rgba(26, 115, 232, 0.12); + transform: translateY(-2px); } -.animate-up.visible { - opacity: 1; - transform: translateY(0); +.infra-tile.has-resources { + border-color: var(--blue-accent); + background: rgba(26, 115, 232, 0.04); } -/* Footer */ -footer { - text-align: center; - padding: 3rem 0 2rem 0; - border-top: 1px solid var(--border-color); +.infra-tile-icon { + width: 32px; + height: 32px; + display: flex; + align-items: center; + justify-content: center; + color: var(--text-muted); } -footer p { - font-size: 1rem; - margin-bottom: 0; +.infra-tile.has-resources .infra-tile-icon { + color: var(--blue-accent); +} + +.infra-tile-icon svg { + width: 24px; + height: 24px; + stroke: currentColor; + fill: none; + stroke-width: 1.5; +} + +.infra-tile-name { + font-size: 0.6rem; + text-transform: uppercase; + letter-spacing: 0.2px; + color: var(--text-muted); + font-weight: 600; + text-align: center; + line-height: 1.2; + max-width: 100%; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.infra-tile.has-resources .infra-tile-name { + color: var(--blue-accent); +} + +.infra-tile-badge { + position: absolute; + top: -6px; + right: -6px; + min-width: 20px; + height: 20px; + border-radius: 10px; + background: var(--blue-accent); + color: #fff; + font-size: 0.7rem; + font-weight: 600; + display: flex; + align-items: center; + justify-content: center; + padding: 0 5px; +} + +/* Log scroll */ +.log-scroll { + max-height: 250px; + overflow-y: auto; +} + +.log-table tbody tr { + cursor: pointer; + transition: background 0.15s ease; +} + +.log-table tbody tr:hover { + background: #f8f9fa; +} + +/* Infra modal */ +#infra-modal .modal-container, +#log-modal .modal-container { + max-width: 700px; +} + +#infra-modal, +#log-modal { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.4); + z-index: 2000; + display: none; + opacity: 0; + transition: opacity 0.3s ease; + backdrop-filter: blur(8px); + -webkit-backdrop-filter: blur(8px); + overflow-y: auto; + padding: 4rem 1rem; +} + +#infra-modal.open, +#log-modal.open { + display: block; + opacity: 1; +} + +.infra-res-group { + border: 1px solid var(--border-color); + border-radius: 12px; + margin-bottom: 0.75rem; + overflow: hidden; +} + +.infra-res-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.75rem 1rem; + cursor: pointer; + transition: background 0.15s ease; + user-select: none; +} + +.infra-res-header:hover { + background: #f8f9fa; +} + +.infra-res-title { + font-size: 0.95rem; + font-weight: 500; + color: var(--text-main); + text-transform: capitalize; +} + +.infra-res-count { + font-size: 0.85rem; + color: var(--text-muted); + background: #f1f3f4; + padding: 0.15rem 0.6rem; + border-radius: 8px; +} + +.infra-res-body { + display: none; + padding: 0 1rem 0.75rem; + border-top: 1px solid var(--border-color); +} + +.infra-res-body.open { + display: block; +} + +.infra-res-item { + font-size: 0.85rem; + font-family: 'Google Sans Mono', monospace; + color: var(--text-main); + padding: 0.35rem 0; + border-bottom: 1px solid #f1f3f4; +} + +.infra-res-item:last-child { + border-bottom: none; +} + +.chaos-active { + color: #ea4335; + font-weight: 500; +} + +.chaos-inactive { + color: var(--text-muted); +} + +.state-episode-id { + font-size: 0.7rem; + word-break: break-all; +} + +/* Task box */ +.task-box { + border-radius: 24px; + padding: 2rem; + border: 1px solid var(--border-color); + border-left: 4px solid var(--border-color); + min-height: 80px; + display: flex; + flex-direction: column; + justify-content: center; + transition: border-color 0.2s ease; +} + +.task-box.empty { + text-align: center; + color: var(--text-muted); +} + +.task-box .task-badge { + display: inline-block; + padding: 0.15rem 0.9rem; + border-radius: 12px; + font-size: 0.75rem; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.3px; + margin-right: 0.5rem; +} + +.task-meta { + color: var(--text-muted); + font-size: 0.85rem; +} + +.task-desc { + color: var(--text-main); + font-size: 1rem; + line-height: 1.5; + margin-top: 0.75rem; +} + +/* Status bar */ +.status-bar { + font-size: 0.9rem; + padding: 0.75rem 1.25rem; + border-radius: 16px; + background: #f8f9fa; + border: 1px solid var(--border-color); + border-left: 3px solid var(--border-color); + min-height: 40px; + color: var(--text-muted); +} + +.status-bar.success { + border-left-color: #34a853; + background: #e6f4ea; + color: #137333; +} + +.status-bar.error { + border-left-color: #ea4335; + background: #fce8e6; + color: #c5221f; +} + +.status-bar.info { + border-left-color: var(--blue-accent); + background: #e8f0fe; + color: #174ea6; +} + +/* Output box */ +.output-box { + background: #f8f9fa; + border: 1px solid var(--border-color); + border-radius: 16px; + padding: 1.25rem; + font-family: 'Google Sans Mono', 'SF Mono', 'Fira Code', 'Consolas', monospace; + font-size: 0.85rem; + white-space: pre-wrap; + word-break: break-word; + min-height: 100px; + max-height: 280px; + overflow-y: auto; + color: var(--text-main); + line-height: 1.6; +} + +/* Log table */ +.log-table { + width: 100%; + border-collapse: collapse; + font-size: 0.9rem; +} + +.log-table th { + text-align: left; + color: var(--text-muted); + font-weight: 500; + padding: 0.75rem 1rem; + border-bottom: 1px solid var(--border-color); + font-size: 0.8rem; + text-transform: uppercase; + letter-spacing: 1px; +} + +.log-table td { + padding: 0.6rem 1rem; + border-bottom: 1px solid #f1f3f4; + color: var(--text-main); +} + +.log-table .cmd { + font-family: 'Google Sans Mono', 'SF Mono', 'Fira Code', 'Consolas', monospace; + font-size: 0.8rem; +} + +.log-table .yes { + color: #34a853; + font-weight: 500; +} + +.log-table .no { + color: #ea4335; + font-weight: 500; +} + +.log-empty { + color: var(--text-muted); + text-align: center; + padding: 2rem; + font-size: 0.9rem; +} + +/* Spinner */ +.spinner { + display: inline-block; + width: 14px; + height: 14px; + border: 2px solid var(--border-color); + border-top-color: var(--blue-accent); + border-radius: 50%; + animation: spin 0.6s linear infinite; + vertical-align: middle; + margin-right: 6px; +} + +@keyframes spin { + to { + transform: rotate(360deg); + } +} + +/* Animations */ +.animate-up { + opacity: 0; + transform: translateY(30px); + transition: opacity 0.8s cubic-bezier(0.16, 1, 0.3, 1), transform 0.8s cubic-bezier(0.16, 1, 0.3, 1); +} + +.animate-up.visible { + opacity: 1; + transform: translateY(0); +} + +/* ===== Timeline ===== */ +.timeline { + border-left: 2px dashed var(--border-color); + padding-left: 2.5rem; + margin-left: 0.5rem; +} + +.timeline-item { + position: relative; + margin-bottom: 3rem; +} + +.timeline-item:last-child { + margin-bottom: 0; +} + +.timeline-item::before { + content: ''; + position: absolute; + left: -2.85rem; + top: 0.35rem; + width: 12px; + height: 12px; + background: var(--dot-bg, var(--surface-color)); + border: 2.5px solid var(--dot-color, var(--border-color)); + border-radius: 50%; + transition: all 0.2s ease; +} + +.timeline-item.active::before { + background: var(--dot-color, var(--blue-accent)); + border-color: var(--dot-color, var(--blue-accent)); + box-shadow: 0 0 0 4px var(--dot-bg, rgba(26, 115, 232, 0.1)); +} + +.timeline-header { + display: flex; + justify-content: space-between; + align-items: baseline; + margin-bottom: 0.25rem; + flex-wrap: wrap; + gap: 0.5rem; +} + +.role-title { + font-size: 1.3rem; + color: var(--text-main); + font-weight: 600; +} + +.date-badge { + color: var(--text-muted); + font-size: 0.95rem; + font-weight: 450; +} + +.timeline-subtitle { + color: var(--text-muted); + font-size: 1rem; + font-weight: 450; + margin-bottom: 0.75rem; +} + +.timeline-points { + list-style: none; + padding: 0; + margin: 0; +} + +.timeline-points li { + position: relative; + padding: 0.35rem 0 0.35rem 1.25rem; + color: var(--text-muted); + font-size: 0.95rem; + font-weight: 450; + line-height: 1.5; +} + +.timeline-points li::before { + content: '\2022'; + position: absolute; + left: 0.15rem; + color: var(--dot-color, var(--blue-accent)); + font-weight: bold; + font-size: 1.1rem; + line-height: 1.4; +} + +.timeline-points li strong { + color: var(--text-main); + font-weight: 600; +} + +/* Footer */ +footer { + padding: 4rem 2rem 2rem; + border-top: 1px solid var(--border-color); + max-width: 1200px; + margin: 0 auto; +} + +.footer-content { + display: grid; + grid-template-columns: 2fr 1fr 1fr 1fr; + gap: 2.5rem; + margin-bottom: 3rem; +} + +.footer-brand h3 { + font-size: 1.2rem; + font-weight: 600; + color: var(--text-main); + margin-bottom: 0.75rem; +} + +.footer-brand p { + font-size: 0.9rem; + color: var(--text-muted); + font-weight: 400; + line-height: 1.6; + max-width: 300px; +} + +.footer-links-group h4 { + font-size: 0.85rem; + font-weight: 600; + color: var(--text-main); + text-transform: uppercase; + letter-spacing: 0.5px; + margin-bottom: 1rem; +} + +.footer-links-group ul { + list-style: none; + padding: 0; + margin: 0; +} + +.footer-links-group li { + margin-bottom: 0.6rem; +} + +.footer-links-group a { + font-size: 0.9rem; + font-weight: 450; + color: var(--text-muted); + transition: color 0.2s ease; +} + +.footer-links-group a:hover { + color: var(--text-main); +} + +.footer-bottom { + border-top: 1px solid var(--border-color); + padding-top: 1.5rem; + text-align: center; +} + +.footer-bottom p { + font-size: 0.85rem; + color: var(--text-muted); + font-weight: 400; + margin-bottom: 0; } /* ===== Responsive ===== */ @media (max-width: 768px) { + .footer-content { + grid-template-columns: 1fr 1fr; + gap: 2rem; + } + + .footer-brand { + grid-column: 1 / -1; + } + .hero h1 { font-size: 3rem; } @@ -841,14 +1424,10 @@ footer p { grid-template-columns: 1fr; } - .playground-grid { + .pg-row-2col { grid-template-columns: 1fr; } - .playground-sidebar { - position: static; - } - .nav-links { display: none; } @@ -865,4 +1444,225 @@ footer p { color: var(--text-main); font-size: 1.1rem; } + + .modal-grid { + grid-template-columns: 1fr !important; + } +} + +/* ===== Feature Chips ===== */ +.feature-chips { + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.feature-chip { + display: flex; + align-items: center; + gap: 1rem; + padding: 1rem 1.25rem; + border: 1px solid var(--border-color); + border-radius: 16px; + cursor: pointer; + transition: all 0.2s ease; + position: relative; + overflow: hidden; +} + +.feature-chip::before { + content: ''; + position: absolute; + inset: 0; + border-radius: inherit; + background: radial-gradient(400px circle at var(--mouse-x, 0) var(--mouse-y, 0), rgba(26, 115, 232, 0.06), transparent 40%); + opacity: 0; + transition: opacity 0.3s ease; + pointer-events: none; +} + +.feature-chip:hover { + border-color: var(--blue-accent); + box-shadow: 0 2px 8px rgba(26, 115, 232, 0.12); + transform: translateX(4px); +} + +.feature-chip:hover::before { + opacity: 1; +} + +.feature-chip-icon { + width: 40px; + height: 40px; + min-width: 40px; + border-radius: 12px; + background: #e8f0fe; + color: var(--blue-accent); + display: flex; + align-items: center; + justify-content: center; + font-size: 18px; + transition: all 0.2s ease; +} + +.feature-chip:hover .feature-chip-icon { + background: var(--blue-accent); + color: white; +} + +.feature-chip div { + flex: 1; + min-width: 0; +} + +.feature-chip strong { + display: block; + font-size: 1rem; + font-weight: 500; + color: var(--text-main); + margin-bottom: 0.15rem; +} + +.feature-chip span { + font-size: 0.9rem; + color: var(--text-muted); +} + +.feature-chip code { + background: #f1f3f4; + padding: 0.1rem 0.4rem; + border-radius: 4px; + font-size: 0.85rem; + font-family: 'Google Sans Mono', 'SF Mono', monospace; +} + +.feature-chip-arrow { + color: var(--border-color); + transition: all 0.2s ease; + flex-shrink: 0; +} + +.feature-chip:hover .feature-chip-arrow { + color: var(--blue-accent); + transform: translateX(2px); +} + +/* ===== Feature Modal ===== */ +#feature-modal { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.4); + z-index: 2000; + display: none; + opacity: 0; + transition: opacity 0.3s ease; + backdrop-filter: blur(8px); + -webkit-backdrop-filter: blur(8px); + overflow-y: auto; + padding: 4rem 1rem; +} + +#feature-modal.open { + display: block; + opacity: 1; +} + +.modal-container { + max-width: 900px; + margin: 0 auto; + background: #fff; + border-radius: 32px; + padding: 3rem; + border: 1px solid var(--border-color); + box-shadow: 0 10px 40px rgba(0, 0, 0, 0.05); + position: relative; +} + +.close-modal { + position: absolute; + top: 2rem; + right: 2rem; + width: 44px; + height: 44px; + border-radius: 50%; + background: #f1f3f4; + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + border: none; + font-size: 1.5rem; + color: var(--text-muted); + transition: all 0.2s ease; +} + +.close-modal:hover { + background: #e8eaed; + transform: scale(1.1); +} + +.modal-container h2 { + font-size: 1.8rem; + margin-bottom: 1.5rem; + padding-right: 3rem; +} + +.modal-grid { + display: grid; + grid-template-columns: 1.5fr 1fr; + gap: 3rem; + margin-top: 1rem; +} + +.modal-section { + margin-bottom: 1rem; +} + +.modal-label { + font-size: 0.8rem; + text-transform: uppercase; + letter-spacing: 1px; + color: var(--blue-accent); + font-weight: 700; + margin-bottom: 0.5rem; + display: block; +} + +.modal-section p { + font-size: 1rem; + line-height: 1.7; + margin-bottom: 1.5rem; +} + +.diag-container { + background: #f8f9fa; + border-radius: 20px; + padding: 1.5rem; + border: 1px solid var(--border-color); + margin-top: 0.5rem; +} + +.diag-container svg { + width: 100%; + height: auto; +} + +.perf-card { + background: #e8f0fe; + border-radius: 16px; + padding: 1rem; + margin-bottom: 0.75rem; + border: 1px solid rgba(26, 115, 232, 0.1); +} + +.perf-val { + font-size: 1.5rem; + font-weight: 500; + color: var(--blue-accent); + display: block; +} + +.perf-label { + font-size: 0.85rem; + color: var(--text-muted); } \ No newline at end of file diff --git a/server/static/img/aws/acm.svg b/server/static/img/aws/acm.svg new file mode 100644 index 0000000000000000000000000000000000000000..f7388f8a4edde84177ae71f19b5d5651a284ab61 --- /dev/null +++ b/server/static/img/aws/acm.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Certificate-Manager_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/apigateway.svg b/server/static/img/aws/apigateway.svg new file mode 100644 index 0000000000000000000000000000000000000000..19247ff8f7b9975cec985c19548f01490e9dce6a --- /dev/null +++ b/server/static/img/aws/apigateway.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_ Amazon-API-Gateway_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/apigateway_v1.svg b/server/static/img/aws/apigateway_v1.svg new file mode 100644 index 0000000000000000000000000000000000000000..19247ff8f7b9975cec985c19548f01490e9dce6a --- /dev/null +++ b/server/static/img/aws/apigateway_v1.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_ Amazon-API-Gateway_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/apigatewayv2.svg b/server/static/img/aws/apigatewayv2.svg new file mode 100644 index 0000000000000000000000000000000000000000..19247ff8f7b9975cec985c19548f01490e9dce6a --- /dev/null +++ b/server/static/img/aws/apigatewayv2.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_ Amazon-API-Gateway_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/athena.svg b/server/static/img/aws/athena.svg new file mode 100644 index 0000000000000000000000000000000000000000..fc175feff497be4978d12ecabbaac1f661c3e982 --- /dev/null +++ b/server/static/img/aws/athena.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-Athena_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/cloudformation.svg b/server/static/img/aws/cloudformation.svg new file mode 100644 index 0000000000000000000000000000000000000000..c2271fc7a217fb96b26ba40cbc6808641a1a7e14 --- /dev/null +++ b/server/static/img/aws/cloudformation.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-CloudFormation_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/cloudwatch.svg b/server/static/img/aws/cloudwatch.svg new file mode 100644 index 0000000000000000000000000000000000000000..103369a3f08fab8b237ed85a5424f32ff278416b --- /dev/null +++ b/server/static/img/aws/cloudwatch.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-CloudWatch_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/cognito-idp.svg b/server/static/img/aws/cognito-idp.svg new file mode 100644 index 0000000000000000000000000000000000000000..d9a808e39a9badaf180fb011f9ad23db90f1b6eb --- /dev/null +++ b/server/static/img/aws/cognito-idp.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-Cognito_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/dynamodb.svg b/server/static/img/aws/dynamodb.svg new file mode 100644 index 0000000000000000000000000000000000000000..bd4f2c30f503aadc4fd8548d514416004b6f8cb3 --- /dev/null +++ b/server/static/img/aws/dynamodb.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-DynamoDB_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/ebs.svg b/server/static/img/aws/ebs.svg new file mode 100644 index 0000000000000000000000000000000000000000..f5d7ce369f161ba58cb210f66670583c564601ae --- /dev/null +++ b/server/static/img/aws/ebs.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-Elastic-Block-Store_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/ec2.svg b/server/static/img/aws/ec2.svg new file mode 100644 index 0000000000000000000000000000000000000000..14f083fd6d532bb146b0f893d3b7665142369888 --- /dev/null +++ b/server/static/img/aws/ec2.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-EC2_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/ecs.svg b/server/static/img/aws/ecs.svg new file mode 100644 index 0000000000000000000000000000000000000000..768dfc18034d51be00b12c8531d3201dfd34a5e0 --- /dev/null +++ b/server/static/img/aws/ecs.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_Amazon-ECS-Anywhere_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/efs.svg b/server/static/img/aws/efs.svg new file mode 100644 index 0000000000000000000000000000000000000000..55dbf7954edcbded5f72c5db745efa2bb378bb4c --- /dev/null +++ b/server/static/img/aws/efs.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_Amazon-EFS_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/elasticache.svg b/server/static/img/aws/elasticache.svg new file mode 100644 index 0000000000000000000000000000000000000000..640f35820081bb9ce6413b22073b8bc090c55546 --- /dev/null +++ b/server/static/img/aws/elasticache.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-ElastiCache_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/elasticfilesystem.svg b/server/static/img/aws/elasticfilesystem.svg new file mode 100644 index 0000000000000000000000000000000000000000..55dbf7954edcbded5f72c5db745efa2bb378bb4c --- /dev/null +++ b/server/static/img/aws/elasticfilesystem.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_Amazon-EFS_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/elasticloadbalancing.svg b/server/static/img/aws/elasticloadbalancing.svg new file mode 100644 index 0000000000000000000000000000000000000000..b79b920f8b85c1fc7be06aef6c4e878c44cabbb7 --- /dev/null +++ b/server/static/img/aws/elasticloadbalancing.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Elastic-Load-Balancing_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/elasticmapreduce.svg b/server/static/img/aws/elasticmapreduce.svg new file mode 100644 index 0000000000000000000000000000000000000000..e1338a27a3d4660d8cf27d0bebda8cb08a574322 --- /dev/null +++ b/server/static/img/aws/elasticmapreduce.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-EMR_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/elbv2.svg b/server/static/img/aws/elbv2.svg new file mode 100644 index 0000000000000000000000000000000000000000..b79b920f8b85c1fc7be06aef6c4e878c44cabbb7 --- /dev/null +++ b/server/static/img/aws/elbv2.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Elastic-Load-Balancing_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/emr.svg b/server/static/img/aws/emr.svg new file mode 100644 index 0000000000000000000000000000000000000000..b494fc21e9c7313b872e2fe756099927aa64c67d --- /dev/null +++ b/server/static/img/aws/emr.svg @@ -0,0 +1,20 @@ + + + + Icon-Architecture/64/Arch_AWS-Fargate_64 + Created with Sketch. + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/events.svg b/server/static/img/aws/events.svg new file mode 100644 index 0000000000000000000000000000000000000000..469a4d346a2f9ade49d5d7c74294967dae381da7 --- /dev/null +++ b/server/static/img/aws/events.svg @@ -0,0 +1,16 @@ + + + Icon-Architecture/64/Arch_Amazon-EventBridge_64 + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/firehose.svg b/server/static/img/aws/firehose.svg new file mode 100644 index 0000000000000000000000000000000000000000..0aaaca7fb869432b08697502132d423b968494a8 --- /dev/null +++ b/server/static/img/aws/firehose.svg @@ -0,0 +1,20 @@ + + + + Icon-Architecture/64/Arch_Amazon-Kinesis-Firehose_64 + Created with Sketch. + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/glue.svg b/server/static/img/aws/glue.svg new file mode 100644 index 0000000000000000000000000000000000000000..59100ad18ee195c21757d85decc57018304c8be5 --- /dev/null +++ b/server/static/img/aws/glue.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Glue_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/iam.svg b/server/static/img/aws/iam.svg new file mode 100644 index 0000000000000000000000000000000000000000..f3be42b31a048fd0a5fbf75901831f6356b65fb3 --- /dev/null +++ b/server/static/img/aws/iam.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Single-Sign-On_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/kinesis.svg b/server/static/img/aws/kinesis.svg new file mode 100644 index 0000000000000000000000000000000000000000..622cae9e0418d154a7eeafe131360430135a3fa9 --- /dev/null +++ b/server/static/img/aws/kinesis.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-Kinesis_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/lambda.svg b/server/static/img/aws/lambda.svg new file mode 100644 index 0000000000000000000000000000000000000000..496ef0e723895ee99b0635daa65ba4039434bdc8 --- /dev/null +++ b/server/static/img/aws/lambda.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Lambda_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/logs.svg b/server/static/img/aws/logs.svg new file mode 100644 index 0000000000000000000000000000000000000000..103369a3f08fab8b237ed85a5424f32ff278416b --- /dev/null +++ b/server/static/img/aws/logs.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-CloudWatch_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/monitoring.svg b/server/static/img/aws/monitoring.svg new file mode 100644 index 0000000000000000000000000000000000000000..0c75225312aa507852bce85e55e89e8c9d3e6a4a --- /dev/null +++ b/server/static/img/aws/monitoring.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Cloud-Trail_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/rds.svg b/server/static/img/aws/rds.svg new file mode 100644 index 0000000000000000000000000000000000000000..245d23725ab51c62d8708cc188a8c62929ee2816 --- /dev/null +++ b/server/static/img/aws/rds.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-RDS_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/route53.svg b/server/static/img/aws/route53.svg new file mode 100644 index 0000000000000000000000000000000000000000..dbf747c06c3f629afcbd4832c6b1dd48923cd1db --- /dev/null +++ b/server/static/img/aws/route53.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-Route-53_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/s3.svg b/server/static/img/aws/s3.svg new file mode 100644 index 0000000000000000000000000000000000000000..77b8dea23ffffb8fae924bcb1860324b15413ec1 --- /dev/null +++ b/server/static/img/aws/s3.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/server/static/img/aws/secretsmanager.svg b/server/static/img/aws/secretsmanager.svg new file mode 100644 index 0000000000000000000000000000000000000000..558b227b83e8bc84c65075938bc4c5571670dc16 --- /dev/null +++ b/server/static/img/aws/secretsmanager.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Secrets-Manager_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/ses.svg b/server/static/img/aws/ses.svg new file mode 100644 index 0000000000000000000000000000000000000000..c907bb3b7216858056936e1a7b2729b449c87d55 --- /dev/null +++ b/server/static/img/aws/ses.svg @@ -0,0 +1,16 @@ + + + Icon-Architecture/64/Arch_Amazon-Simple-Email-Service_64 + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/ses_v2.svg b/server/static/img/aws/ses_v2.svg new file mode 100644 index 0000000000000000000000000000000000000000..7f79d9053a5154da381055229609ce312d26e215 --- /dev/null +++ b/server/static/img/aws/ses_v2.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_Amazon-WorkMail_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/sns.svg b/server/static/img/aws/sns.svg new file mode 100644 index 0000000000000000000000000000000000000000..70dcd1149a92f665a2a77281a1be70eea8d40143 --- /dev/null +++ b/server/static/img/aws/sns.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/server/static/img/aws/sqs.svg b/server/static/img/aws/sqs.svg new file mode 100644 index 0000000000000000000000000000000000000000..25f277c1d72af2bb8f6833e8ff73326797bf2692 --- /dev/null +++ b/server/static/img/aws/sqs.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/server/static/img/aws/ssm.svg b/server/static/img/aws/ssm.svg new file mode 100644 index 0000000000000000000000000000000000000000..70f9e8cd9a1124c0568ffd36ca00377998629c25 --- /dev/null +++ b/server/static/img/aws/ssm.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Systems-Manager_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/states.svg b/server/static/img/aws/states.svg new file mode 100644 index 0000000000000000000000000000000000000000..db8765056cccb7f89fe340eabeca775007a5ee91 --- /dev/null +++ b/server/static/img/aws/states.svg @@ -0,0 +1,18 @@ + + + + Icon-Architecture/64/Arch_AWS-Step-Functions_64 + Created with Sketch. + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/img/aws/wafv2.svg b/server/static/img/aws/wafv2.svg new file mode 100644 index 0000000000000000000000000000000000000000..224b2f9a66fd3472bafa80cf99c3d81b63dad11b --- /dev/null +++ b/server/static/img/aws/wafv2.svg @@ -0,0 +1,18 @@ + + + Icon-Architecture/64/Arch_AWS-WAF_64 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/static/js/app.js b/server/static/js/app.js index f5c5f3dbd4ce603a0bfd38cfd4006356671b5be0..6e22ea5c88fa8fc66aa4c1b7dc453bb96f3957b1 100644 --- a/server/static/js/app.js +++ b/server/static/js/app.js @@ -75,47 +75,64 @@ document.addEventListener('mousemove', e => { }, { passive: true }); /* ===== Typewriter — character-by-character reveal ===== */ -function typewrite(el, delay) { +function typewrite(el, delay, speed) { + speed = speed || 30; const text = el.textContent; el.innerHTML = ''; - el.classList.add('typing-cursor-inline'); const chars = []; for (const ch of text) { const span = document.createElement('span'); span.classList.add('char'); - span.textContent = ch === ' ' ? '\u00A0' : ch; + span.textContent = ch; el.appendChild(span); chars.push(span); } - chars.forEach((span, i) => { - setTimeout(() => { - span.classList.add('visible'); - // Last char: switch to blinking cursor - if (i === chars.length - 1) { - el.classList.add('blinking'); - } - }, delay + i * 30); + // Insert a real cursor element that moves with the text + const cursor = document.createElement('span'); + cursor.classList.add('typing-cursor'); + cursor.textContent = '|'; + + return new Promise(resolve => { + chars.forEach((span, i) => { + setTimeout(() => { + span.classList.add('visible'); + // Move cursor right after the latest visible char + span.after(cursor); + if (i === chars.length - 1) { + resolve(); + } + }, delay + i * speed); + }); + if (chars.length === 0) resolve(); }); } -// Typewrite hero elements -document.querySelectorAll('.hero .type-animate').forEach((el, i) => { - typewrite(el, 300 + i * 600); -}); +// Typewrite hero elements sequentially: subtitle starts after title finishes +(async function () { + const heroTitle = document.getElementById('hero-title'); + const heroSub = document.getElementById('hero-subtitle'); + + // Hide subtitle until its turn + if (heroSub) heroSub.style.visibility = 'hidden'; + + if (heroTitle) { + await typewrite(heroTitle, 300); + heroTitle.querySelector('.typing-cursor')?.remove(); + } + + if (heroSub) { + heroSub.style.visibility = 'visible'; + await typewrite(heroSub, 200, 12); + heroSub.querySelector('.typing-cursor')?.remove(); + } -// Fade in hero CTA after typewriter completes -const heroTitle = document.getElementById('hero-title'); -const heroSub = document.getElementById('hero-subtitle'); -if (heroTitle && heroSub) { - const titleLen = heroTitle.textContent.replace(/\u00A0/g, ' ').length; - const subLen = heroSub.textContent.replace(/\u00A0/g, ' ').length; - const totalDelay = 300 + titleLen * 30 + 600 + subLen * 30 + 200; + // Fade in hero CTA after both animations complete setTimeout(() => { document.querySelectorAll('.hero-fade-up').forEach(el => el.classList.add('visible')); - }, totalDelay); -} + }, 200); +})(); /* ===== Intersection Observer — fade-up on scroll ===== */ const observer = new IntersectionObserver(entries => { @@ -140,6 +157,183 @@ const COLOR_BG = { }; let stepCount = 0; +// Services that have official AWS SVG files in /static/img/aws/ +const SVC_IMG_FILES = ['s3', 'sqs', 'sns', 'lambda', 'dynamodb', 'iam', 'ec2', 'rds', 'cloudformation', 'cloudwatch', 'route53', 'apigateway', 'apigateway_v1', 'elasticache', 'elbv2', 'events', 'ssm', 'cognito-idp', 'glue', 'firehose', 'athena', 'emr', 'efs', 'ebs', 'kinesis', 'logs', 'monitoring', 'ses', 'ses_v2', 'acm', 'wafv2', 'states', 'secretsmanager', 'ecs', 'elasticmapreduce', 'elasticloadbalancing', 'elasticfilesystem']; + +const DEFAULT_ICON = ''; + +function _svcIconHtml(svc) { + if (SVC_IMG_FILES.includes(svc)) { + return '' + svc + ''; + } + return '' + DEFAULT_ICON + ''; +} + +// Cache infra data for modal drill-down +let _lastInfraServices = {}; + +async function refreshState() { + try { + const res = await fetch('/web/state'); + const state = await res.json(); + + // Update sidebar stats + document.getElementById('stateSteps').textContent = state.tracker ? state.tracker.step_count : '0'; + document.getElementById('stateHints').textContent = state.tracker ? state.tracker.hints_used : '0'; + const chaosEl = document.getElementById('stateChaos'); + if (state.chaos_occurred) { + chaosEl.textContent = 'Active'; + chaosEl.className = 'state-value chaos-active'; + } else { + chaosEl.textContent = 'None'; + chaosEl.className = 'state-value chaos-inactive'; + } + + // Render infra tiles + const grid = document.getElementById('infraGrid'); + const services = state.infra_state && state.infra_state.services ? state.infra_state.services : {}; + _lastInfraServices = services; + const svcKeys = Object.keys(services); + if (svcKeys.length === 0) { + grid.innerHTML = '

No data.

'; + return; + } + + let html = ''; + for (const svc of svcKeys) { + const data = services[svc]; + let totalCount = 0; + for (const [, resData] of Object.entries(data)) { + if (resData && typeof resData === 'object') { + if (typeof resData.count === 'number') { + totalCount += resData.count; + } else if (Array.isArray(resData)) { + totalCount += resData.length; + } else { + // Nested object keyed by ID (e.g. apigateway_v1 rest_apis) + const keys = Object.keys(resData); + if (keys.length > 0) totalCount += keys.length; + } + } + } + const hasRes = totalCount > 0; + html += '
' + + (hasRes ? '' + totalCount + '' : '') + + '
' + _svcIconHtml(svc) + '
' + + '' + escHtml(svc) + '' + + '
'; + } + grid.className = 'infra-tiles'; + grid.innerHTML = html; + } catch (e) { + // Silent fail + } +} + +// Infra modal +function _renderResItems(obj) { + // Renders items for the modal body — handles arrays, {count,names}, and nested objects + if (!obj || typeof obj !== 'object') return '
' + escHtml(String(obj)) + '
'; + if (Array.isArray(obj)) { + return obj.map(function (item) { return '
' + escHtml(String(item)) + '
'; }).join(''); + } + // Has {count, names/ids} pattern + if (typeof obj.count === 'number') { + var items = obj.names || obj.ids || []; + return items.map(function (item) { return '
' + escHtml(String(item)) + '
'; }).join('') || + '
Empty (' + obj.count + ')
'; + } + // Nested keyed object — render each key as a sub-item + var keys = Object.keys(obj); + if (keys.length === 0) return ''; + var out = ''; + for (var k of keys) { + var val = obj[k]; + if (val && typeof val === 'object' && !Array.isArray(val)) { + // Show key with a summary + var name = val.name || val.Name || val.id || val.Id || k; + var detail = val.description || val.engine || val.runtime || val.protocol || ''; + out += '
' + escHtml(String(name)) + '' + + (detail ? ' \u2014 ' + escHtml(String(detail)) + '' : '') + + '
'; + } else { + out += '
' + escHtml(k + ': ' + JSON.stringify(val)) + '
'; + } + } + return out; +} + +function _countResources(resData) { + if (!resData || typeof resData !== 'object') return 0; + if (typeof resData.count === 'number') return resData.count; + if (Array.isArray(resData)) return resData.length; + return Object.keys(resData).length; +} + +function openInfraModal(svc) { + const data = _lastInfraServices[svc]; + if (!data) return; + document.getElementById('infra-modal-title').textContent = svc.toUpperCase(); + const body = document.getElementById('infra-modal-body'); + let html = ''; + for (const [resType, resData] of Object.entries(data)) { + if (!resData || typeof resData !== 'object') continue; + var count = _countResources(resData); + const groupId = 'infra-g-' + svc + '-' + resType.replace(/[^a-z0-9]/gi, ''); + html += '
' + + '
' + + '' + escHtml(resType.replace(/_/g, ' ')) + '' + + '' + count + '' + + '
'; + var itemsHtml = _renderResItems(resData); + if (itemsHtml) { + html += '
' + itemsHtml + '
'; + } + html += '
'; + } + body.innerHTML = html || '

No resources in this service.

'; + document.getElementById('infra-modal').classList.add('open'); + document.body.style.overflow = 'hidden'; +} + +function closeInfraModal() { + document.getElementById('infra-modal').classList.remove('open'); + document.body.style.overflow = ''; +} + +// Command log modal +let _logEntries = []; + +function openLogModal(index) { + const entry = _logEntries[index]; + if (!entry) return; + document.getElementById('log-modal-title').textContent = 'Step #' + entry.step; + document.getElementById('log-modal-cmd').textContent = entry.command; + document.getElementById('log-modal-status').innerHTML = entry.success + ? 'Success' + : 'Failed'; + document.getElementById('log-modal-reward').textContent = (entry.reward >= 0 ? '+' : '') + entry.reward.toFixed(2); + document.getElementById('log-modal-output').textContent = entry.output || 'No output'; + document.getElementById('log-modal').classList.add('open'); + document.body.style.overflow = 'hidden'; +} + +function closeLogModal() { + document.getElementById('log-modal').classList.remove('open'); + document.body.style.overflow = ''; +} + +// Close modals on Escape / backdrop click +document.addEventListener('keydown', function (e) { + if (e.key === 'Escape') { closeInfraModal(); closeLogModal(); } +}); +['infra-modal', 'log-modal'].forEach(function (id) { + var el = document.getElementById(id); + if (el) el.addEventListener('click', function (e) { + if (e.target.id === id) { closeInfraModal(); closeLogModal(); } + }); +}); + function setStatus(msg, type) { const bar = document.getElementById('statusBar'); bar.className = 'status-bar ' + (type || ''); @@ -147,8 +341,10 @@ function setStatus(msg, type) { } function setLoading(btn, loading) { - btn.disabled = loading; - if (loading) btn.dataset.orig = btn.textContent; + if (loading) { + btn.disabled = true; + btn.dataset.orig = btn.textContent; + } btn.innerHTML = loading ? '' + (btn.dataset.orig || '') : (btn.dataset.orig || btn.textContent); @@ -190,13 +386,30 @@ async function resetEnv() { document.getElementById('outputBox').textContent = obs.command_output || ''; document.getElementById('logBody').innerHTML = 'No commands executed yet'; + _logEntries = []; + // Enable command controls + document.getElementById('cmdInput').disabled = false; + document.getElementById('runBtn').disabled = false; + delete document.getElementById('runBtn').dataset.ended; + document.getElementById('solutionBtn').disabled = false; + document.getElementById('solutionBtn').innerHTML = ' Show Solution'; + document.getElementById('solutionPanel').style.display = 'none'; + document.getElementById('solutionCommands').innerHTML = ''; document.getElementById('cmdInput').value = ''; document.getElementById('cmdInput').focus(); + + // Update state box + document.getElementById('stateTier').textContent = task ? task.difficulty : '\u2014'; + document.getElementById('stateEpisode').textContent = obs.episode_id || '1'; + document.getElementById('stateProgress').style.width = '0%'; + document.getElementById('stateReward').textContent = '0.00'; setStatus('New episode started. Difficulty: ' + (task ? escHtml(task.difficulty) : 'unknown') + '', 'info'); + refreshState(); } catch (e) { setStatus('Reset failed: ' + escHtml(e.message), 'error'); } finally { setLoading(btn, false); + btn.disabled = false; } } @@ -229,10 +442,13 @@ async function runCmd() { document.getElementById('outputBox').textContent = output; const tbody = document.getElementById('logBody'); - if (stepCount === 1) tbody.innerHTML = ''; - const tr = document.createElement('tr'); + if (stepCount === 1) { tbody.innerHTML = ''; _logEntries = []; } const reward = (obs.reward != null ? obs.reward : (data.reward || 0)); - const displayCmd = cmd.length > 50 ? cmd.slice(0, 47) + '...' : cmd; + const logIdx = _logEntries.length; + _logEntries.push({ step: stepCount, command: cmd, success: obs.command_success, reward: reward, output: output }); + const tr = document.createElement('tr'); + tr.onclick = function () { openLogModal(logIdx); }; + const displayCmd = cmd.length > 60 ? cmd.slice(0, 57) + '...' : cmd; tr.innerHTML = '' + stepCount + '' + '' + escHtml(displayCmd) + '' + @@ -240,19 +456,38 @@ async function runCmd() { '' + (reward >= 0 ? '+' : '') + Number(reward).toFixed(2) + ''; tbody.appendChild(tr); + // Update state box + const progress = obs.partial_progress != null ? obs.partial_progress : 0; + document.getElementById('stateProgress').style.width = (progress * 100) + '%'; + const cumReward = parseFloat(document.getElementById('stateReward').textContent) + reward; + document.getElementById('stateReward').textContent = cumReward.toFixed(2); + if (obs.task_achieved) { setStatus('Task completed! Step ' + obs.step_count + ', reward: +' + Number(reward).toFixed(2) + '. Click New Episode for the next task.', 'success'); + document.getElementById('cmdInput').disabled = true; + document.getElementById('runBtn').disabled = true; + document.getElementById('runBtn').dataset.ended = '1'; + document.getElementById('solutionBtn').disabled = true; } else if (data.done) { setStatus('Episode ended. Click New Episode to try again.', 'error'); + document.getElementById('cmdInput').disabled = true; + document.getElementById('runBtn').disabled = true; + document.getElementById('runBtn').dataset.ended = '1'; + document.getElementById('solutionBtn').disabled = true; } else { setStatus('Step ' + obs.step_count + ' — ' + (obs.command_success ? 'Command succeeded.' : 'Command failed.'), obs.command_success ? 'info' : 'error'); } + refreshState(); input.value = ''; input.focus(); } catch (e) { setStatus('Request failed: ' + escHtml(e.message), 'error'); } finally { setLoading(btn, false); + // Re-enable if episode is still active (not disabled by completion/done handlers above) + if (!btn.dataset.ended) { + btn.disabled = false; + } } } diff --git a/server/templates/index.html b/server/templates/index.html index eef633db08b34b07a8f6edab48e4f029eeb220df..b8611f6382e3535bdcc9151eb05729fc1914ade1 100644 --- a/server/templates/index.html +++ b/server/templates/index.html @@ -14,10 +14,11 @@ @@ -25,8 +26,15 @@
-

AWS RL Environment

-

Train AI agents on real AWS cloud tasks

+

AWS Cloud CLI and SRE Reinforcement Learning Environment

+

Train AI agents on real AWS cloud operations. The agents interact with + a real-world AWS Shell simulator. The agents send AWS CLI + commands as actions, receive structured observations, and progress through a set of curriculum tasks + across 5 difficulty tiers — from basic listing to SRE incident response. The response of the executed command is + perfectely same as of the production once. The grading system evaluates the rewards and penalties based on the + AWS Infrastructure's state instead of static metrics. The output is then used to guide the agent's learning + process. It applies a lot of advanced grading, teaching and anti reward hacking techniques which is discussed in + detail in the Features section.

Try the Playground @@ -62,9 +70,13 @@

Learn AWS by doing.

An OpenEnv-compliant RL environment where agents execute real AWS CLI commands against a simulated cloud - powered by MiniStack. A progressive curriculum tracks mastery and adapts difficulty in real-time.

+ infra in real time. The simulator is inspired by MiniStack AWS simulator. A lot of futher refinements were + performed on top of MiniStack simulator to achieve a more realistic and comprehensive learning experience. + A progressive curriculum tracks mastery and adapts difficulty in real-time. +

S3 + ALB EC2 DynamoDB Lambda @@ -84,7 +96,7 @@
- +
@@ -92,40 +104,97 @@
- Tiers + Tasks
-
-
- Warmup - 0 – 5 - List resources — S3 buckets, EC2 instances, DynamoDB tables -
-
- Beginner - 6 – 10 - Create basic resources — buckets, tables, queues, functions -
-
- Intermediate - 11 – 14 - Multi-step operations — create, configure, and connect -
-
- Advanced - 15 – 17 - Cross-service architectures — Lambda + SQS, API Gateway -
-
- Expert - 18 – 20 - SRE incidents — diagnose and fix broken infrastructure +
+
+ +
+
+
Warmup
+
20 tasks
+
+

List resources — single read-only commands

+
    +
  • Run one AWS CLI command to list or describe a resource type
  • +
  • S3 buckets, EC2 instances, DynamoDB tables, Lambda functions, RDS, EBS volumes
  • +
  • Graded by command_match — checks operation + service pair
  • +
  • No setup required, no state mutations
  • +
+
+ +
+
+
Beginner
+
20 tasks
+
+

Create single resources with verification

+
    +
  • Create an S3 bucket, DynamoDB table, SQS queue, or Lambda function
  • +
  • Graded by resource_creation — verifies the exact resource exists in AWS + Infrastructure Simulator +
  • +
  • Introduces resource name validation — "my-bucket-2" won't satisfy a check for "my-bucket"
  • +
  • First tier where idempotency bonus (+0.02) can be earned
  • +
+
+ +
+
+
Intermediate
+
20 tasks
+
+

Multi-step workflows — create, configure, connect

+
    +
  • Ordered sequences: create a bucket then enable versioning, create a table then add an item
  • +
  • Graded by multi_step — validates each step was completed in order
  • +
  • Chaos injection begins at 10% probability — resources may be silently mutated + mid-episode
  • +
  • Rollback penalty (-0.1) starts to matter with multi-step create/delete patterns
  • +
+
+ +
+
+
Advanced
+
20 tasks
+
+

Cross-service architectures spanning multiple AWS services

+
    +
  • Wire Lambda to SQS, configure API Gateway with integrations, build event-driven pipelines
  • +
  • Graded by multi_step + services — all required services must be configured +
  • +
  • Chaos injection escalates to 20% probability — DynamoDB throughput, Lambda + configs may change
  • +
  • Hints cost more: 3 hints = only 61% of max reward (0.85³ decay)
  • +
+
+ +
+
+
Expert
+
20 tasks
+
+

SRE incidents & drift detection — diagnose and fix

+
    +
  • Fix overly permissive S3 policies, replace broad IAM inline policies, repair broken infra
  • +
  • Graded by state_checks — actual CLI commands run against MiniStack at grading + time
  • +
  • Chaos injection at 30% probability — maximum perturbation frequency
  • +
  • 6 drift detection tasks — correct infra is provisioned, then 2-3 random + mutations applied from a pool
  • +
  • Agent must audit environment, discover which resources drifted, and fix only those
  • +
  • Drift is randomized per episode — prevents memorization of fix sequences
  • +
+
+
- +
@@ -137,28 +206,290 @@ Features
-
-
-
-

Progressive Difficulty

-

Start easy, advance as you master each tier automatically.

+ + +
+

Curriculum & Training

+

Adaptive learning system that tracks mastery and selects optimal tasks.

+
+
+ +
+ Progressive Difficulty + 5 tiers from warmup to expert SRE +
+ + + +
+
+ +
+ Mastery Tracking + Per-task graduation with sustained performance +
+ + + +
+
+ +
+ Spaced Repetition + Graduated tasks resurface at increasing intervals +
+ + + +
+
+ +
+ Priority Selection + Novelty, weakness, and recency scoring +
+ + + +
+
+ +
+ Tier Progression + Standard promotion and fast-track system +
+ + + +
-
-
-

Mastery Tracking

-

Per-task graduation with sustained performance tracking.

+
+ + +
+

Reward Shaping

+

Dense reward signals that encourage operational discipline and real progress.

+
+
+ +
+ Rollback Penalty & Idempotency Bonus + Operational discipline rewards +
+ + + +
+
+ 📈 +
+ Shaped Reward System + Progress bonus, failure penalty, clamped rewards +
+ + + +
+
+ +
+ Multi-Strategy Grading + 5 grading strategies across tiers +
+ + + +
+
+
+ + +
+

Resilience & Adaptability

+

Features that test agent robustness under unpredictable conditions.

+
+
+ 💡 +
+ Progressive Hint System + 3-level hints with reward decay +
+ + + +
+
+ +
+ Chaos Injection Engine + Silent mid-episode perturbations +
+ + + +
+
+ 🔍 +
+ Drift Detection Tasks + Randomized config drift per episode +
+ + + +
-
-
-

Spaced Repetition

-

Graduated tasks resurface at increasing intervals.

+
+ + +
+

Security Posture Audit

+

Tests reasoning about configuration state — working but insecure infrastructure the agent must + analyze and harden.

+
+
+ 🔒 +
+ Public S3 Bucket Lockdown + Detect & fix overly permissive bucket policies +
+ + + +
+
+ 🛡 +
+ IAM Least Privilege + Replace wildcard policies with scoped permissions +
+ + + +
+
+ 🔐 +
+ Secrets in Lambda Environment + Move plaintext credentials to Secrets Manager +
+ + + +
-
-
-

Priority Selection

-

Focuses on your weakest skills for efficient learning.

+
+ + +
+

Anti-Reward-Hacking

+

8 defense layers that prevent the agent from gaming the reward system.

+
+
+ 🔎 +
+ Ground-Truth Verification + MiniStack queries for 20+ services +
+ + + +
+
+ 🛡 +
+ Command Allowlisting + Only aws CLI commands allowed +
+ + + +
+
+ 🚫 +
+ Deduplication + No reward for repeated commands +
+ + + +
+
+ 👁 +
+ Grader Invisibility + Verification commands hidden from agent +
+ + + +
+
+ 🔍 +
+ No Verification Reward + Read-only commands earn zero progress +
+ + + +
+
+ +
+ Monotonic Progress + Progress can only increase, never re-earn +
+ + + +
+
+ 🎯 +
+ Resource Name Validation + Exact name match required +
+ + + +
+
+ +
+ State Checks + Verify final state, not command history +
+ + + +
+
@@ -173,38 +504,54 @@ API
-
-
-

WebSocket

-
import websockets, json +
+
+
+

WebSocket

+ +
+
import websockets, json async with websockets.connect( - "wss://your-space.hf.space/ws" + "wss://sizzing-aws-rl-env.hf.space/ws" ) as ws: + # Reset environment await ws.send(json.dumps({ "type": "reset" })) obs = json.loads(await ws.recv()) + # Execute a command await ws.send(json.dumps({ "type": "step", "data": {"command": "aws s3 ls"} })) obs = json.loads(await ws.recv())
-
-

Python Client

-
from aws_rl_env import ( - AwsRlEnv, AwsRlAction - ) - - with AwsRlEnv.from_env( - "your-hf-user/aws-rl-env" +
+
+

Python Client

+ +
+
from aws_rl_env import AwsRlEnv, AwsRlAction + + async with AwsRlEnv.from_env( + "sizzing/aws-rl-env" ) as env: result = await env.step( - AwsRlAction( - command="aws s3 ls" - ) + AwsRlAction(command="aws s3 ls") )
@@ -224,54 +571,81 @@ Play
-
+
- -
-
+ +
+
Controls
-
+
+

Click New Episode to start

+

The curriculum assigns a task matching + your skill level

+
+
+ + +
+
Command - - + + +
-
- Services -
- S3 - EC2 - DynamoDB - Lambda - SQS - SNS - IAM - API GW +
+
Ready.
+
- -
-
-

Click New Episode to start

-

The curriculum assigns a task matching - your skill level

+ +
+
+ State +
+
Tier
+
Episode
+
Progress +
+
+
+
+
Reward0.00
+
Steps0
+
Hints0
+
Chaos
+
- -
Ready.
- -
+
Output
No output yet.
+
-
- Command Log + +
+ Command Log +
@@ -290,17 +664,956 @@ + +
+ AWS Environment +
+

Start an episode to see live infrastructure + state.

+
+
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + +
+
-

AWS RL Environment — An OpenEnv environment powered by MiniStack

+ +
+ \ No newline at end of file diff --git a/tests/test_aws_rl_env_environment.py b/tests/test_aws_rl_env_environment.py new file mode 100644 index 0000000000000000000000000000000000000000..8e8ff51564443721c0c14c7aa5ec867fb806410c --- /dev/null +++ b/tests/test_aws_rl_env_environment.py @@ -0,0 +1,413 @@ +"""Unit tests for AwsRlEnvironment — tests reset/step lifecycle and edge cases. + +All external dependencies (AwsBackend, Curriculum, TaskGrader, etc.) are mocked +so tests run without MiniStack. + +Run: + docker exec python -m pytest env/tests/test_aws_rl_env_environment.py -v +""" + +from unittest.mock import patch + +import pytest + +from models import ( + AwsRlAction, + AwsRlObservation, + Task, + TaskID, + TaskDifficulty, + SuccessCriteria, +) +from server.services.task_grader import GradeResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_DUMMY_TASK = Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.WARMUP, + description="List S3 buckets", + success_criteria=SuccessCriteria(command_contains="s3", operation="ls"), +) + + +def _make_env(): + """Create an AwsRlEnvironment with all dependencies mocked.""" + with ( + patch("server.aws_rl_env_environment.AwsBackend") as MockBackend, + patch("server.aws_rl_env_environment.Curriculum") as MockCurriculum, + patch("server.aws_rl_env_environment.TaskGrader") as MockGrader, + patch("server.aws_rl_env_environment.EnvironmentDesigner") as MockDesigner, + patch("server.aws_rl_env_environment.ChaosEngine") as MockChaos, + patch("server.aws_rl_env_environment.HintProvider") as MockHint, + ): + from server.aws_rl_env_environment import AwsRlEnvironment + + env = AwsRlEnvironment() + + # Grab mock instances + backend = MockBackend.return_value + curriculum = MockCurriculum.return_value + grader = MockGrader.return_value + designer = MockDesigner.return_value + chaos = MockChaos.return_value + hint = MockHint.return_value + + # Default behaviors + curriculum.next_task.return_value = _DUMMY_TASK + curriculum.chaos_probability = 0.0 + backend.execute_command.return_value = (True, "output", "") + chaos.chaos_occurred = False + grader.grade.return_value = GradeResult( + task_achieved=False, partial_progress=0.0, reward=0.0, reason="not done" + ) + + return env, backend, curriculum, grader, designer, chaos, hint + + +# =================================================================== +# reset() +# =================================================================== + + +class TestReset: + def test_returns_observation(self) -> None: + env, *_ = _make_env() + obs = env.reset() + assert isinstance(obs, AwsRlObservation) + + def test_resets_backend(self) -> None: + env, backend, *_ = _make_env() + env.reset() + backend.reset_environment.assert_called_once() + + def test_gets_next_task_from_curriculum(self) -> None: + env, _, curriculum, *_ = _make_env() + env.reset() + curriculum.next_task.assert_called_once() + + def test_applies_designer(self) -> None: + env, _, _, _, designer, *_ = _make_env() + env.reset() + designer.apply.assert_called_once_with(_DUMMY_TASK) + + def test_obs_contains_task(self) -> None: + env, *_ = _make_env() + obs = env.reset() + assert obs.task == _DUMMY_TASK + + def test_obs_step_count_zero(self) -> None: + env, *_ = _make_env() + obs = env.reset() + assert obs.step_count == 0 + + def test_obs_not_done(self) -> None: + env, *_ = _make_env() + obs = env.reset() + assert obs.done is False + assert obs.reward == 0.0 + + def test_obs_command_output_is_reset_message(self) -> None: + env, *_ = _make_env() + obs = env.reset() + assert "reset" in obs.command_output.lower() + + def test_custom_episode_id(self) -> None: + env, *_ = _make_env() + obs = env.reset(episode_id="my-ep-123") + assert obs.episode_id == "my-ep-123" + + def test_auto_episode_id(self) -> None: + env, *_ = _make_env() + obs = env.reset() + assert len(obs.episode_id) > 0 # UUID generated + + def test_resets_chaos_engine(self) -> None: + env, _, _, _, _, chaos, _ = _make_env() + env.reset() + chaos.reset.assert_called_once() + + def test_consecutive_resets_get_fresh_state(self) -> None: + env, backend, *_ = _make_env() + obs1 = env.reset() + obs2 = env.reset() + assert obs1.episode_id != obs2.episode_id + assert backend.reset_environment.call_count == 2 + + +# =================================================================== +# step() — non-AWS command rejection +# =================================================================== + + +class TestStepRejection: + def test_non_aws_command_rejected(self) -> None: + env, *_ = _make_env() + env.reset() + obs = env.step(AwsRlAction(command="ls -la")) + assert not obs.command_success + assert "Only AWS CLI" in obs.error + assert obs.reward == 0.0 + assert not obs.task_achieved + + def test_empty_command_rejected(self) -> None: + env, *_ = _make_env() + env.reset() + obs = env.step(AwsRlAction(command="")) + assert not obs.command_success + + def test_whitespace_only_rejected(self) -> None: + env, *_ = _make_env() + env.reset() + obs = env.step(AwsRlAction(command=" ")) + assert not obs.command_success + + def test_shell_injection_rejected(self) -> None: + env, *_ = _make_env() + env.reset() + obs = env.step(AwsRlAction(command="rm -rf / && aws s3 ls")) + assert not obs.command_success + + def test_rejected_command_increments_step_count(self) -> None: + env, *_ = _make_env() + env.reset() + obs = env.step(AwsRlAction(command="not-aws")) + assert obs.step_count == 1 + + +# =================================================================== +# step() — hint system +# =================================================================== + + +class TestStepHints: + def test_hint_request_returns_hint_text(self) -> None: + env, _, _, _, _, _, hint = _make_env() + hint.get_hint.return_value = "Try using s3" + env.reset() + obs = env.step(AwsRlAction(command="aws help --task-hint")) + assert obs.command_output == "Try using s3" + assert obs.hint_text == "Try using s3" + assert obs.command_success is True + + def test_hint_increments_hints_used(self) -> None: + env, _, _, _, _, _, hint = _make_env() + hint.get_hint.return_value = "hint" + env.reset() + obs1 = env.step(AwsRlAction(command="aws help --task-hint")) + assert obs1.hints_used == 1 + obs2 = env.step(AwsRlAction(command="aws help --task-hint")) + assert obs2.hints_used == 2 + + def test_hint_not_achieved(self) -> None: + env, _, _, _, _, _, hint = _make_env() + hint.get_hint.return_value = "hint" + env.reset() + obs = env.step(AwsRlAction(command="aws help --task-hint")) + assert not obs.task_achieved + assert obs.done is False + assert obs.reward == 0.0 + + def test_hint_does_not_call_backend(self) -> None: + env, backend, _, _, _, _, hint = _make_env() + hint.get_hint.return_value = "hint" + env.reset() + backend.execute_command.reset_mock() + env.step(AwsRlAction(command="aws help --task-hint")) + backend.execute_command.assert_not_called() + + def test_hint_does_not_grade(self) -> None: + env, _, _, grader, _, _, hint = _make_env() + hint.get_hint.return_value = "hint" + env.reset() + env.step(AwsRlAction(command="aws help --task-hint")) + grader.grade.assert_not_called() + + +# =================================================================== +# step() — normal AWS command execution +# =================================================================== + + +class TestStepExecution: + def test_executes_command_on_backend(self) -> None: + env, backend, *_ = _make_env() + env.reset() + backend.execute_command.reset_mock() + env.step(AwsRlAction(command="aws s3 ls")) + backend.execute_command.assert_called_once_with("aws s3 ls") + + def test_returns_stdout(self) -> None: + env, backend, *_ = _make_env() + backend.execute_command.return_value = (True, "bucket-list", "") + env.reset() + obs = env.step(AwsRlAction(command="aws s3 ls")) + assert obs.command_output == "bucket-list" + assert obs.command_success is True + + def test_returns_stderr_on_failure(self) -> None: + env, backend, *_ = _make_env() + backend.execute_command.return_value = (False, "", "access denied") + env.reset() + obs = env.step(AwsRlAction(command="aws s3 ls")) + assert obs.command_success is False + assert obs.error == "access denied" + + def test_step_count_increments(self) -> None: + env, *_ = _make_env() + env.reset() + obs1 = env.step(AwsRlAction(command="aws s3 ls")) + obs2 = env.step(AwsRlAction(command="aws s3 ls")) + obs3 = env.step(AwsRlAction(command="aws s3 ls")) + assert obs1.step_count == 1 + assert obs2.step_count == 2 + assert obs3.step_count == 3 + + def test_strips_command_whitespace(self) -> None: + env, backend, *_ = _make_env() + env.reset() + backend.execute_command.reset_mock() + env.step(AwsRlAction(command=" aws s3 ls ")) + backend.execute_command.assert_called_once_with("aws s3 ls") + + +# =================================================================== +# step() — grading +# =================================================================== + + +class TestStepGrading: + def test_grades_after_execution(self) -> None: + env, _, _, grader, *_ = _make_env() + env.reset() + env.step(AwsRlAction(command="aws s3 ls")) + grader.grade.assert_called_once() + + def test_passes_chaos_flag_to_grader(self) -> None: + env, _, _, grader, _, chaos, _ = _make_env() + chaos.chaos_occurred = True + env.reset() + env.step(AwsRlAction(command="aws s3 ls")) + _, kwargs = grader.grade.call_args + assert kwargs["chaos_occurred"] is True + + def test_passes_hints_used_to_grader(self) -> None: + env, _, _, grader, _, _, hint = _make_env() + hint.get_hint.return_value = "h" + env.reset() + env.step(AwsRlAction(command="aws help --task-hint")) + env.step(AwsRlAction(command="aws s3 ls")) + _, kwargs = grader.grade.call_args + assert kwargs["hints_used"] == 1 + + def test_achieved_sets_done_true(self) -> None: + env, _, _, grader, *_ = _make_env() + grader.grade.return_value = GradeResult( + task_achieved=True, partial_progress=1.0, reward=1.0, reason="done" + ) + env.reset() + obs = env.step(AwsRlAction(command="aws s3 ls")) + assert obs.task_achieved is True + assert obs.done is True + assert obs.reward == 1.0 + + def test_not_achieved_keeps_done_false(self) -> None: + env, _, _, grader, *_ = _make_env() + grader.grade.return_value = GradeResult( + task_achieved=False, partial_progress=0.3, reward=0.2, reason="partial" + ) + env.reset() + obs = env.step(AwsRlAction(command="aws s3 ls")) + assert obs.task_achieved is False + assert obs.done is False + assert obs.reward == 0.2 + + def test_achieved_records_in_curriculum(self) -> None: + env, _, curriculum, grader, *_ = _make_env() + grader.grade.return_value = GradeResult( + task_achieved=True, partial_progress=1.0, reward=1.0, reason="done" + ) + env.reset() + env.step(AwsRlAction(command="aws s3 ls")) + curriculum.record_result.assert_called_once_with( + _DUMMY_TASK, achieved=True, reward=1.0 + ) + + def test_not_achieved_does_not_record(self) -> None: + env, _, curriculum, grader, *_ = _make_env() + grader.grade.return_value = GradeResult( + task_achieved=False, partial_progress=0.0, reward=0.0, reason="no" + ) + env.reset() + env.step(AwsRlAction(command="aws s3 ls")) + curriculum.record_result.assert_not_called() + + +# =================================================================== +# step() — chaos injection +# =================================================================== + + +class TestStepChaos: + def test_chaos_injected_after_grading(self) -> None: + env, _, curriculum, grader, _, chaos, _ = _make_env() + env.reset() + env.step(AwsRlAction(command="aws s3 ls")) + # Chaos should be called after grading + chaos.maybe_inject.assert_called_once() + + def test_chaos_receives_probability(self) -> None: + env, _, curriculum, _, _, chaos, _ = _make_env() + curriculum.chaos_probability = 0.25 + env.reset() + env.step(AwsRlAction(command="aws s3 ls")) + args = chaos.maybe_inject.call_args + assert args[0][2] == 0.25 # third positional arg is probability + + def test_chaos_not_called_on_hint(self) -> None: + env, _, _, _, _, chaos, hint = _make_env() + hint.get_hint.return_value = "h" + env.reset() + env.step(AwsRlAction(command="aws help --task-hint")) + chaos.maybe_inject.assert_not_called() + + def test_chaos_not_called_on_rejected_command(self) -> None: + env, _, _, _, _, chaos, _ = _make_env() + env.reset() + env.step(AwsRlAction(command="not-aws")) + chaos.maybe_inject.assert_not_called() + + +# =================================================================== +# step() without reset +# =================================================================== + + +class TestStepWithoutReset: + def test_raises_without_reset(self) -> None: + env, *_ = _make_env() + # Don't call reset — _current_task is None + with pytest.raises(AssertionError, match="reset"): + env.step(AwsRlAction(command="aws s3 ls")) + + +# =================================================================== +# state property +# =================================================================== + + +class TestState: + def test_state_has_episode_id(self) -> None: + env, *_ = _make_env() + env.reset(episode_id="ep-1") + assert env.state.episode_id == "ep-1" + + def test_state_step_count_tracks(self) -> None: + env, *_ = _make_env() + env.reset() + assert env.state.step_count == 0 + env.step(AwsRlAction(command="aws s3 ls")) + assert env.state.step_count == 1 diff --git a/tests/test_drift_engine.py b/tests/test_drift_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..7a296460e92da1e7e41f77985c8d945f5632e470 --- /dev/null +++ b/tests/test_drift_engine.py @@ -0,0 +1,146 @@ +"""Unit tests for DriftEngine — tests drift selection and application logic. + +Run: + docker exec python -m pytest env/tests/test_drift_engine.py -v +""" + +from unittest.mock import MagicMock + +import pytest + +from models import Task, TaskID, TaskDifficulty, SuccessCriteria, SetupCommand +from server.services.drift_engine import DriftEngine, _MIN_DRIFTS, _MAX_DRIFTS + + +def _task_with_drifts(n: int) -> Task: + """Create a task with N possible drifts.""" + return Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.EXPERT, + description="test", + success_criteria=SuccessCriteria(), + possible_drifts=[ + SetupCommand(command=f"aws cmd-{i}", description=f"drift-{i}") + for i in range(n) + ], + ) + + +@pytest.fixture +def mock_backend() -> MagicMock: + backend = MagicMock() + backend.execute_command.return_value = (True, "", "") + return backend + + +@pytest.fixture +def engine(mock_backend: MagicMock) -> DriftEngine: + return DriftEngine(mock_backend) + + +# =================================================================== +# apply_drift +# =================================================================== + + +class TestApplyDrift: + def test_no_drifts_returns_empty(self, engine: DriftEngine) -> None: + task = Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.EXPERT, + description="t", + success_criteria=SuccessCriteria(), + ) + assert engine.apply_drift(task) == [] + + def test_single_drift_always_selected( + self, engine: DriftEngine, mock_backend: MagicMock + ) -> None: + task = _task_with_drifts(1) + applied = engine.apply_drift(task) + assert len(applied) == 1 + assert applied[0] == "drift-0" + mock_backend.execute_command.assert_called_once_with("aws cmd-0") + + def test_selects_between_min_and_max(self, engine: DriftEngine) -> None: + task = _task_with_drifts(10) + for _ in range(20): + applied = engine.apply_drift(task) + assert _MIN_DRIFTS <= len(applied) <= _MAX_DRIFTS + + def test_never_exceeds_pool_size(self, engine: DriftEngine) -> None: + task = _task_with_drifts(2) + for _ in range(20): + applied = engine.apply_drift(task) + assert len(applied) <= 2 + + def test_selected_drifts_are_unique(self, engine: DriftEngine) -> None: + task = _task_with_drifts(5) + for _ in range(20): + applied = engine.apply_drift(task) + assert len(applied) == len(set(applied)) + + def test_failed_drift_not_in_applied( + self, engine: DriftEngine, mock_backend: MagicMock + ) -> None: + mock_backend.execute_command.return_value = (False, "", "error") + task = _task_with_drifts(1) + applied = engine.apply_drift(task) + assert len(applied) == 0 + + def test_partial_failure_only_returns_successful( + self, engine: DriftEngine, mock_backend: MagicMock + ) -> None: + task = _task_with_drifts(2) + mock_backend.execute_command.side_effect = [ + (True, "", ""), + (False, "", "fail"), + ] + applied = engine.apply_drift(task) + assert len(applied) == 1 + + def test_uses_description_as_label(self, engine: DriftEngine) -> None: + task = Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.EXPERT, + description="t", + success_criteria=SuccessCriteria(), + possible_drifts=[ + SetupCommand(command="aws test", description="My drift label"), + ], + ) + applied = engine.apply_drift(task) + assert applied == ["My drift label"] + + def test_uses_command_as_fallback_label(self, engine: DriftEngine) -> None: + task = Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.EXPERT, + description="t", + success_criteria=SuccessCriteria(), + possible_drifts=[SetupCommand(command="aws fallback-cmd")], + ) + applied = engine.apply_drift(task) + assert applied == ["aws fallback-cmd"] + + +# =================================================================== +# _pick_count +# =================================================================== + + +class TestPickCount: + def test_zero_pool(self) -> None: + assert DriftEngine._pick_count(0) == 0 + + def test_one_pool(self) -> None: + assert DriftEngine._pick_count(1) == 1 + + def test_two_pool_returns_two(self) -> None: + # pool_size=2: lo=min(2,2)=2, hi=min(3,2)=2 => always 2 + assert DriftEngine._pick_count(2) == 2 + + def test_large_pool_within_bounds(self) -> None: + for _ in range(50): + count = DriftEngine._pick_count(10) + assert _MIN_DRIFTS <= count <= _MAX_DRIFTS diff --git a/tests/test_environment_designer.py b/tests/test_environment_designer.py new file mode 100644 index 0000000000000000000000000000000000000000..c00fb06c3d0195220ea5b362006241188e8ae5ec --- /dev/null +++ b/tests/test_environment_designer.py @@ -0,0 +1,190 @@ +"""Unit tests for EnvironmentDesigner — tests provisioning and drift integration. + +Run: + docker exec python -m pytest env/tests/test_environment_designer.py -v +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from models import Task, TaskID, TaskDifficulty, SuccessCriteria, SetupCommand +from server.services.environment_designer import ( + EnvironmentDesigner, + ProvisionMethod, + ProvisionResult, +) + + +def _task( + setup_commands: list[SetupCommand] | None = None, + possible_drifts: list[SetupCommand] | None = None, +) -> Task: + return Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.BEGINNER, + description="test", + success_criteria=SuccessCriteria(), + setup_commands=setup_commands or [], + possible_drifts=possible_drifts or [], + ) + + +@pytest.fixture +def mock_backend() -> MagicMock: + backend = MagicMock() + backend.execute_command.return_value = (True, "", "") + return backend + + +@pytest.fixture +def designer(mock_backend: MagicMock) -> EnvironmentDesigner: + return EnvironmentDesigner(mock_backend) + + +# =================================================================== +# apply — no setup commands +# =================================================================== + + +class TestApplyNoSetup: + def test_no_commands_returns_success(self, designer: EnvironmentDesigner) -> None: + result = designer.apply(_task()) + assert result.success + assert result.resources_created == 0 + + def test_no_commands_no_backend_calls( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + designer.apply(_task()) + mock_backend.execute_command.assert_not_called() + + +# =================================================================== +# apply — CLI commands +# =================================================================== + + +class TestApplyCliCommands: + def test_all_succeed( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + task = _task( + setup_commands=[ + SetupCommand(command="aws s3api create-bucket --bucket a"), + SetupCommand(command="aws s3api create-bucket --bucket b"), + ] + ) + result = designer.apply(task) + assert result.success + assert result.resources_created == 2 + assert result.method == ProvisionMethod.CLI_COMMANDS + assert mock_backend.execute_command.call_count == 2 + + def test_failure_recorded_in_errors( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + mock_backend.execute_command.side_effect = [ + (True, "", ""), + (False, "", "bucket already exists"), + ] + task = _task( + setup_commands=[ + SetupCommand(command="aws s3api create-bucket --bucket a"), + SetupCommand(command="aws s3api create-bucket --bucket a"), + ] + ) + result = designer.apply(task) + assert not result.success + assert result.resources_created == 1 + assert len(result.errors) == 1 + assert "bucket already exists" in result.errors[0] + + def test_ignore_failure_continues( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + mock_backend.execute_command.side_effect = [ + (False, "", "already exists"), + (True, "", ""), + ] + task = _task( + setup_commands=[ + SetupCommand(command="cmd1", ignore_failure=True), + SetupCommand(command="cmd2"), + ] + ) + result = designer.apply(task) + assert result.success # ignored failure doesn't count + assert result.resources_created == 1 + assert len(result.errors) == 0 + + def test_multiple_failures( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + mock_backend.execute_command.return_value = (False, "", "err") + task = _task( + setup_commands=[ + SetupCommand(command="cmd1"), + SetupCommand(command="cmd2"), + SetupCommand(command="cmd3"), + ] + ) + result = designer.apply(task) + assert not result.success + assert result.resources_created == 0 + assert len(result.errors) == 3 + + def test_commands_executed_in_order( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + task = _task( + setup_commands=[ + SetupCommand(command="first"), + SetupCommand(command="second"), + SetupCommand(command="third"), + ] + ) + designer.apply(task) + calls = [c.args[0] for c in mock_backend.execute_command.call_args_list] + assert calls == ["first", "second", "third"] + + +# =================================================================== +# apply — drift integration +# =================================================================== + + +class TestApplyWithDrifts: + def test_drifts_applied_after_setup( + self, designer: EnvironmentDesigner, mock_backend: MagicMock + ) -> None: + task = _task( + setup_commands=[SetupCommand(command="setup-cmd")], + possible_drifts=[SetupCommand(command="drift-cmd", description="d")], + ) + with patch.object( + designer._drift_engine, "apply_drift", return_value=["d"] + ) as mock_drift: + result = designer.apply(task) + mock_drift.assert_called_once_with(task) + assert result.success + + def test_no_drifts_skips_drift_engine(self, designer: EnvironmentDesigner) -> None: + task = _task(setup_commands=[SetupCommand(command="cmd")]) + with patch.object(designer._drift_engine, "apply_drift") as mock_drift: + designer.apply(task) + mock_drift.assert_not_called() + + +# =================================================================== +# ProvisionResult model +# =================================================================== + + +class TestProvisionResult: + def test_defaults(self) -> None: + r = ProvisionResult() + assert r.success is True + assert r.method == ProvisionMethod.CLI_COMMANDS + assert r.resources_created == 0 + assert r.errors == [] diff --git a/tests/test_episode_tracker.py b/tests/test_episode_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..4bfcedc617425b37c25b467e6648c6c598745825 --- /dev/null +++ b/tests/test_episode_tracker.py @@ -0,0 +1,457 @@ +"""Unit tests for the EpisodeTracker — command history, rollback detection, and grading helpers. + +These are pure unit tests that do not require MiniStack or Docker. + +Run: + python -m pytest tests/test_episode_tracker.py -v +""" + +from server.services.episode_tracker import ( + EpisodeTracker, + StepRecord, + _command_mentions_resource, + _extract_resource_name, + _parse_aws_command, +) + + +# --------------------------------------------------------------------------- +# _parse_aws_command +# --------------------------------------------------------------------------- + + +class TestParseAwsCommand: + def test_standard_command(self) -> None: + assert _parse_aws_command("aws s3api create-bucket --bucket foo") == ( + "s3api", + "create-bucket", + ) + + def test_simple_service(self) -> None: + assert _parse_aws_command("aws iam list-roles") == ("iam", "list-roles") + + def test_too_few_parts(self) -> None: + assert _parse_aws_command("aws s3") == (None, None) + + def test_not_aws(self) -> None: + assert _parse_aws_command("gcloud compute instances list") == (None, None) + + def test_empty_string(self) -> None: + assert _parse_aws_command("") == (None, None) + + def test_leading_whitespace(self) -> None: + assert _parse_aws_command(" aws lambda list-functions") == ( + "lambda", + "list-functions", + ) + + +# --------------------------------------------------------------------------- +# _command_mentions_resource +# --------------------------------------------------------------------------- + + +class TestCommandMentionsResource: + def test_flag_match(self) -> None: + assert _command_mentions_resource( + "aws s3api create-bucket --bucket my-bucket", "my-bucket" + ) + + def test_flag_value_syntax(self) -> None: + assert _command_mentions_resource( + "aws dynamodb describe-table --table-name=orders", "orders" + ) + + def test_function_name_flag(self) -> None: + assert _command_mentions_resource( + "aws lambda invoke --function-name processor /dev/null", "processor" + ) + + def test_arn_word_boundary(self) -> None: + assert _command_mentions_resource( + "aws lambda create-event-source-mapping " + "--event-source-arn arn:aws:sqs:us-east-1:000000000000:my-queue", + "my-queue", + ) + + def test_no_match(self) -> None: + assert not _command_mentions_resource( + "aws s3api create-bucket --bucket other-bucket", "my-bucket" + ) + + def test_different_resource_no_match(self) -> None: + assert not _command_mentions_resource( + "aws s3api create-bucket --bucket test-bucket", "prod-bucket" + ) + + def test_role_name(self) -> None: + assert _command_mentions_resource( + "aws iam attach-role-policy --role-name my-role " + "--policy-arn arn:aws:iam::aws:policy/ReadOnly", + "my-role", + ) + + +# --------------------------------------------------------------------------- +# _extract_resource_name +# --------------------------------------------------------------------------- + + +class TestExtractResourceName: + def test_bucket(self) -> None: + assert _extract_resource_name("aws s3api create-bucket --bucket demo") == "demo" + + def test_table_name_equals(self) -> None: + assert ( + _extract_resource_name("aws dynamodb describe-table --table-name=users") + == "users" + ) + + def test_no_resource_flag(self) -> None: + assert _extract_resource_name("aws sts get-caller-identity") is None + + def test_first_flag_wins(self) -> None: + cmd = "aws s3api put-object --bucket first --name second" + assert _extract_resource_name(cmd) == "first" + + +# --------------------------------------------------------------------------- +# EpisodeTracker — record_step & basic properties +# --------------------------------------------------------------------------- + + +class TestRecordStep: + def test_returns_step_record(self) -> None: + t = EpisodeTracker() + step = t.record_step("aws s3 ls", True, "buckets...", "") + assert isinstance(step, StepRecord) + assert step.command == "aws s3 ls" + assert step.success is True + assert step.step_number == 0 + + def test_increments_step_counter(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3 ls", True, "", "") + t.record_step("aws ec2 describe-instances", True, "", "") + assert t.step_count == 2 + + def test_command_history(self) -> None: + t = EpisodeTracker() + t.record_step("cmd1", True, "", "") + t.record_step("cmd2", False, "", "err") + assert len(t.command_history) == 2 + assert t.command_history[0].command == "cmd1" + assert t.command_history[1].success is False + + def test_history_is_copy(self) -> None: + t = EpisodeTracker() + t.record_step("cmd", True, "", "") + history = t.command_history + history.clear() + assert t.step_count == 1 # internal state not affected + + +# --------------------------------------------------------------------------- +# EpisodeTracker — reset +# --------------------------------------------------------------------------- + + +class TestReset: + def test_clears_all_state(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3 ls", True, "", "") + t.credit_operation("ls", None) + t.record_hint() + t.previous_progress = 0.5 + + t.reset() + + assert t.step_count == 0 + assert t.command_history == [] + assert t.hints_used == 0 + assert t.previous_progress == 0.0 + assert not t.is_operation_already_credited("ls", None) + + +# --------------------------------------------------------------------------- +# EpisodeTracker — has_executed_operation +# --------------------------------------------------------------------------- + + +class TestHasExecutedOperation: + def test_matches_successful_command(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + assert t.has_executed_operation("create-bucket") + + def test_ignores_failed_command(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", False, "", "err") + assert not t.has_executed_operation("create-bucket") + + def test_matches_with_resource(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + assert t.has_executed_operation("create-bucket", "demo") + + def test_wrong_resource(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + assert not t.has_executed_operation("create-bucket", "other") + + def test_wrong_operation(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + assert not t.has_executed_operation("delete-bucket") + + def test_resource_none_matches_any(self) -> None: + t = EpisodeTracker() + t.record_step("aws dynamodb create-table --table-name orders", True, "", "") + assert t.has_executed_operation("create-table") + assert t.has_executed_operation("create-table", "orders") + + def test_empty_history(self) -> None: + assert not EpisodeTracker().has_executed_operation("anything") + + +# --------------------------------------------------------------------------- +# EpisodeTracker — has_used_service +# --------------------------------------------------------------------------- + + +class TestHasUsedService: + def test_exact_service(self) -> None: + t = EpisodeTracker() + t.record_step("aws sqs create-queue --queue-name q1", True, "", "") + assert t.has_used_service("sqs") + + def test_substring_match(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket b", True, "", "") + assert t.has_used_service("s3") # "s3" in "s3api" + + def test_ignores_failed(self) -> None: + t = EpisodeTracker() + t.record_step("aws iam list-roles", False, "", "err") + assert not t.has_used_service("iam") + + def test_no_match(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3 ls", True, "", "") + assert not t.has_used_service("lambda") + + def test_non_aws_command(self) -> None: + t = EpisodeTracker() + t.record_step("echo hello", True, "hello", "") + assert not t.has_used_service("echo") + + +# --------------------------------------------------------------------------- +# EpisodeTracker — credit_operation / is_operation_already_credited +# --------------------------------------------------------------------------- + + +class TestCreditedOperations: + def test_not_credited_by_default(self) -> None: + t = EpisodeTracker() + assert not t.is_operation_already_credited("create-bucket", "demo") + + def test_credit_and_check(self) -> None: + t = EpisodeTracker() + t.credit_operation("create-bucket", "demo") + assert t.is_operation_already_credited("create-bucket", "demo") + + def test_different_resource_not_credited(self) -> None: + t = EpisodeTracker() + t.credit_operation("create-bucket", "demo") + assert not t.is_operation_already_credited("create-bucket", "other") + + def test_none_resource(self) -> None: + t = EpisodeTracker() + t.credit_operation("list-buckets", None) + assert t.is_operation_already_credited("list-buckets", None) + assert not t.is_operation_already_credited("list-buckets", "demo") + + +# --------------------------------------------------------------------------- +# EpisodeTracker — hints +# --------------------------------------------------------------------------- + + +class TestHints: + def test_initial_zero(self) -> None: + assert EpisodeTracker().hints_used == 0 + + def test_record_hint_increments(self) -> None: + t = EpisodeTracker() + assert t.record_hint() == 1 + assert t.record_hint() == 2 + assert t.hints_used == 2 + + def test_reset_clears_hints(self) -> None: + t = EpisodeTracker() + t.record_hint() + t.reset() + assert t.hints_used == 0 + + +# --------------------------------------------------------------------------- +# EpisodeTracker — previous_progress +# --------------------------------------------------------------------------- + + +class TestPreviousProgress: + def test_default_zero(self) -> None: + assert EpisodeTracker().previous_progress == 0.0 + + def test_setter(self) -> None: + t = EpisodeTracker() + t.previous_progress = 0.75 + assert t.previous_progress == 0.75 + + +# --------------------------------------------------------------------------- +# EpisodeTracker — detect_rollbacks +# --------------------------------------------------------------------------- + + +class TestDetectRollbacks: + def test_no_rollbacks(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + assert t.detect_rollbacks() == 0 + + def test_create_then_delete(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + t.record_step("aws s3api delete-bucket --bucket demo", True, "", "") + assert t.detect_rollbacks() == 1 + + def test_failed_delete_not_counted(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + t.record_step("aws s3api delete-bucket --bucket demo", False, "", "err") + assert t.detect_rollbacks() == 0 + + def test_different_resource_not_counted(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket a", True, "", "") + t.record_step("aws s3api delete-bucket --bucket b", True, "", "") + assert t.detect_rollbacks() == 0 + + def test_multiple_rollbacks(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket a", True, "", "") + t.record_step("aws s3api delete-bucket --bucket a", True, "", "") + t.record_step("aws dynamodb create-table --table-name t1", True, "", "") + t.record_step("aws dynamodb delete-table --table-name t1", True, "", "") + assert t.detect_rollbacks() == 2 + + def test_attach_detach_role_policy(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws iam attach-role-policy --role-name r1 " + "--policy-arn arn:aws:iam::aws:policy/ReadOnly", + True, + "", + "", + ) + t.record_step( + "aws iam detach-role-policy --role-name r1 " + "--policy-arn arn:aws:iam::aws:policy/ReadOnly", + True, + "", + "", + ) + assert t.detect_rollbacks() == 1 + + def test_failed_create_not_tracked(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", False, "", "err") + t.record_step("aws s3api delete-bucket --bucket demo", True, "", "") + assert t.detect_rollbacks() == 0 + + +# --------------------------------------------------------------------------- +# EpisodeTracker — detect_idempotent_retries +# --------------------------------------------------------------------------- + + +class TestDetectIdempotentRetries: + def test_no_retries(self) -> None: + t = EpisodeTracker() + t.record_step("aws s3api create-bucket --bucket demo", True, "", "") + assert t.detect_idempotent_retries() == 0 + + def test_already_exists_then_success(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws s3api create-bucket --bucket demo", + False, + "", + "BucketAlreadyOwnedByYou", + ) + t.record_step("aws s3api put-object --bucket demo --key f", True, "", "") + assert t.detect_idempotent_retries() == 1 + + def test_already_exists_no_followup(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws s3api create-bucket --bucket demo", + False, + "", + "BucketAlreadyExists", + ) + # No next step + assert t.detect_idempotent_retries() == 0 + + def test_already_exists_followed_by_failure(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws sqs create-queue --queue-name q", + False, + "", + "QueueNameExists", + ) + t.record_step("aws sqs send-message --queue-url q", False, "", "err") + assert t.detect_idempotent_retries() == 0 + + def test_generic_already_exists(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws lambda create-function --function-name fn", + False, + "", + "Resource already exists", + ) + t.record_step("aws lambda invoke --function-name fn", True, "", "") + assert t.detect_idempotent_retries() == 1 + + def test_non_create_failure_ignored(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws s3api delete-bucket --bucket demo", + False, + "", + "BucketAlreadyExists", # nonsensical but tests the guard + ) + t.record_step("aws s3 ls", True, "", "") + assert t.detect_idempotent_retries() == 0 + + def test_multiple_retries(self) -> None: + t = EpisodeTracker() + t.record_step( + "aws s3api create-bucket --bucket a", + False, + "", + "BucketAlreadyExists", + ) + t.record_step("aws s3api put-object --bucket a --key f", True, "", "") + t.record_step( + "aws sqs create-queue --queue-name q", + False, + "", + "QueueNameExists", + ) + t.record_step("aws sqs send-message --queue-url q", True, "", "") + assert t.detect_idempotent_retries() == 2 diff --git a/tests/test_hint_provider.py b/tests/test_hint_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..0a49ab1056025a47e3f8e56078eeaf22a2156584 --- /dev/null +++ b/tests/test_hint_provider.py @@ -0,0 +1,232 @@ +"""Unit tests for HintProvider — tests progressive hint generation. + +Run: + docker exec python -m pytest env/tests/test_hint_provider.py -v +""" + +import pytest + +from models import ( + Task, + TaskID, + TaskDifficulty, + SuccessCriteria, + StepCriteria, + ResourceExistsCheck, +) +from server.services.hint_provider import HintProvider, MAX_HINT_LEVEL, _infer_service + + +@pytest.fixture +def provider() -> HintProvider: + return HintProvider() + + +def _task(criteria: SuccessCriteria) -> Task: + return Task( + task_id=TaskID(1), + difficulty=TaskDifficulty.WARMUP, + description="test", + success_criteria=criteria, + ) + + +# =================================================================== +# Level 1: Service hints +# =================================================================== + + +class TestHintServices: + def test_explicit_services(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria(services=["s3", "iam"])) + hint = provider.get_hint(task, 1) + assert "s3" in hint + assert "iam" in hint + + def test_inferred_from_steps(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="create-function", resource="fn"), + ] + ) + ) + hint = provider.get_hint(task, 1) + assert "s3api" in hint + assert "lambda" in hint + + def test_inferred_from_operation(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria(command_contains="dynamodb", operation="create-table") + ) + hint = provider.get_hint(task, 1) + assert "dynamodb" in hint + + def test_no_services_fallback(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria()) + hint = provider.get_hint(task, 1) + assert "Review" in hint + + def test_no_duplicate_services(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket"), + StepCriteria(operation="put-object"), # both map to s3api + ] + ) + ) + hint = provider.get_hint(task, 1) + assert hint.count("s3api") == 1 + + +# =================================================================== +# Level 2: Operation hints +# =================================================================== + + +class TestHintOperations: + def test_from_steps(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + steps=[ + StepCriteria(operation="create-table", resource="t"), + StepCriteria(operation="put-item", resource="t"), + ] + ) + ) + hint = provider.get_hint(task, 2) + assert "create-table" in hint + assert "put-item" in hint + assert "in order" in hint.lower() + + def test_from_single_operation(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria(operation="list-buckets")) + hint = provider.get_hint(task, 2) + assert "list-buckets" in hint + + def test_no_operations_fallback(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria()) + hint = provider.get_hint(task, 2) + assert "documentation" in hint.lower() + + +# =================================================================== +# Level 3: Command structure hints +# =================================================================== + + +class TestHintCommands: + def test_from_steps_with_resource(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="my-bucket"), + ] + ) + ) + hint = provider.get_hint(task, 3) + assert "create-bucket" in hint + assert "my-bucket" in hint + assert "aws" in hint + + def test_from_steps_without_resource(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + steps=[ + StepCriteria(operation="create-role"), + ] + ) + ) + hint = provider.get_hint(task, 3) + assert "create-role" in hint + assert "..." in hint + + def test_from_operation_with_resource_exists(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + operation="create-bucket", + resource_exists=ResourceExistsCheck(service="s3", name="data-bucket"), + ) + ) + hint = provider.get_hint(task, 3) + assert "create-bucket" in hint + assert "data-bucket" in hint + + def test_multi_step_uses_arrow_separator(self, provider: HintProvider) -> None: + task = _task( + SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ] + ) + ) + hint = provider.get_hint(task, 3) + assert "→" in hint + + def test_no_commands_fallback(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria()) + hint = provider.get_hint(task, 3) + assert "help" in hint.lower() + + +# =================================================================== +# Level clamping +# =================================================================== + + +class TestLevelClamping: + def test_level_zero_clamped_to_one(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria(services=["s3"])) + hint = provider.get_hint(task, 0) + assert "s3" in hint # level 1 output + + def test_negative_level_clamped(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria(services=["s3"])) + hint = provider.get_hint(task, -5) + assert "s3" in hint + + def test_level_above_max_clamped(self, provider: HintProvider) -> None: + task = _task(SuccessCriteria(operation="create-bucket")) + hint = provider.get_hint(task, 99) + # Should return level 3 (command structure) + assert "create-bucket" in hint + + def test_max_hint_level_is_three(self) -> None: + assert MAX_HINT_LEVEL == 3 + + +# =================================================================== +# _infer_service helper +# =================================================================== + + +class TestInferService: + @pytest.mark.parametrize( + "operation,expected", + [ + ("create-bucket", "s3api"), + ("put-object", "s3api"), + ("create-table", "dynamodb"), + ("create-function", "lambda"), + ("create-queue", "sqs"), + ("create-topic", "sns"), + ("create-role", "iam"), + ("create-policy", "iam"), + ("create-user", "iam"), + ("create-rest-api", "apigateway"), + ("create-secret", "secretsmanager"), + ("describe-instances", "ec2"), + ("create-security-group", "iam"), # "group" keyword matches iam before ec2 + ], + ) + def test_known_operations(self, operation: str, expected: str) -> None: + assert _infer_service(operation) == expected + + def test_unknown_operation_returns_none(self) -> None: + assert _infer_service("unknown-operation") is None + + def test_empty_operation_returns_none(self) -> None: + assert _infer_service("") is None diff --git a/tests/test_resource_verifier.py b/tests/test_resource_verifier.py new file mode 100644 index 0000000000000000000000000000000000000000..0bc496590a033e33fe225d0d73220b10bfee3a1d --- /dev/null +++ b/tests/test_resource_verifier.py @@ -0,0 +1,541 @@ +"""Unit tests for ResourceVerifier — resource existence checks, state checks, and JSON path extraction. + +Uses a mock AwsBackend so tests run without MiniStack/Docker. + +Run: + python -m pytest tests/test_resource_verifier.py -v +""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock + + +from server.services.aws_backend import AwsBackend +from server.services.resource_verifier import ResourceVerifier, _extract_json_path + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _mock_backend(responses: dict[str, tuple[bool, str, str]]) -> AwsBackend: + """Create a mock AwsBackend that returns preset responses keyed by substring match.""" + backend = MagicMock(spec=AwsBackend) + + def execute(cmd: str) -> tuple[bool, str, str]: + for pattern, result in responses.items(): + if pattern in cmd: + return result + return (False, "", "unknown command") + + backend.execute_command.side_effect = execute + return backend + + +# --------------------------------------------------------------------------- +# _extract_json_path +# --------------------------------------------------------------------------- + + +class TestExtractJsonPath: + def test_simple_dot_path(self) -> None: + data = {"Table": {"Name": "orders"}} + assert _extract_json_path(data, "$.Table.Name") == "orders" + + def test_nested_numeric(self) -> None: + data = {"Table": {"ProvisionedThroughput": {"ReadCapacityUnits": 50}}} + assert ( + _extract_json_path(data, "$.Table.ProvisionedThroughput.ReadCapacityUnits") + == 50 + ) + + def test_array_index(self) -> None: + data = {"Rules": [{"ID": "first"}, {"ID": "second"}]} + assert _extract_json_path(data, "$.Rules[0].ID") == "first" + assert _extract_json_path(data, "$.Rules[1].ID") == "second" + + def test_array_index_out_of_bounds(self) -> None: + data = {"Rules": [{"ID": "only"}]} + assert _extract_json_path(data, "$.Rules[5].ID") is None + + def test_wildcard_array(self) -> None: + data = {"Buckets": [{"Name": "a"}, {"Name": "b"}]} + assert _extract_json_path(data, "$.Buckets[].Name") == ["a", "b"] + + def test_wildcard_no_remaining(self) -> None: + data = {"Items": [1, 2, 3]} + assert _extract_json_path(data, "$.Items[]") == [1, 2, 3] + + def test_missing_key(self) -> None: + assert _extract_json_path({"a": 1}, "$.b.c") is None + + def test_none_data(self) -> None: + assert _extract_json_path(None, "$.foo") is None + + def test_non_dict_intermediate(self) -> None: + data = {"a": "string_not_dict"} + assert _extract_json_path(data, "$.a.b") is None + + def test_services_nested_path(self) -> None: + data = {"services": [{"desiredCount": 3}]} + assert _extract_json_path(data, "$.services[0].desiredCount") == 3 + + def test_attributes_path(self) -> None: + data = {"Attributes": {"VisibilityTimeout": "120"}} + assert _extract_json_path(data, "$.Attributes.VisibilityTimeout") == "120" + + +# --------------------------------------------------------------------------- +# ResourceVerifier.check_state +# --------------------------------------------------------------------------- + + +class TestCheckState: + def test_output_contains_pass(self) -> None: + backend = _mock_backend({"list-attached": (True, "AmazonSQSFullAccess", "")}) + v = ResourceVerifier(backend) + assert v.check_state( + {"command": "aws iam list-attached-role-policies", "output_contains": "SQS"} + ) + + def test_output_contains_fail(self) -> None: + backend = _mock_backend({"list-attached": (True, "AmazonS3ReadOnly", "")}) + v = ResourceVerifier(backend) + assert not v.check_state( + {"command": "aws iam list-attached-role-policies", "output_contains": "SQS"} + ) + + def test_command_fails(self) -> None: + backend = _mock_backend({"describe": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.check_state( + {"command": "aws describe-something", "output_contains": "ok"} + ) + + def test_empty_command(self) -> None: + backend = _mock_backend({}) + v = ResourceVerifier(backend) + assert not v.check_state({"command": ""}) + assert not v.check_state({}) + + def test_json_path_expected(self) -> None: + stdout = json.dumps( + {"Table": {"ProvisionedThroughput": {"ReadCapacityUnits": 50}}} + ) + backend = _mock_backend({"describe-table": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.check_state( + { + "command": "aws dynamodb describe-table --table-name t", + "json_path": "$.Table.ProvisionedThroughput.ReadCapacityUnits", + "expected": 50, + } + ) + + def test_json_path_string_comparison(self) -> None: + stdout = json.dumps({"Attributes": {"VisibilityTimeout": "120"}}) + backend = _mock_backend({"get-queue": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.check_state( + { + "command": "aws sqs get-queue-attributes", + "json_path": "$.Attributes.VisibilityTimeout", + "expected": "120", + } + ) + + def test_json_path_mismatch(self) -> None: + stdout = json.dumps( + {"Table": {"ProvisionedThroughput": {"ReadCapacityUnits": 5}}} + ) + backend = _mock_backend({"describe-table": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.check_state( + { + "command": "aws dynamodb describe-table", + "json_path": "$.Table.ProvisionedThroughput.ReadCapacityUnits", + "expected": 50, + } + ) + + def test_json_path_invalid_json(self) -> None: + backend = _mock_backend({"describe": (True, "not-json{", "")}) + v = ResourceVerifier(backend) + assert not v.check_state( + { + "command": "aws describe-something", + "json_path": "$.foo", + "expected": "bar", + } + ) + + def test_both_output_contains_and_json_path(self) -> None: + stdout = json.dumps({"Timeout": 30, "FunctionName": "payment-webhook"}) + backend = _mock_backend({"get-function": (True, stdout, "")}) + v = ResourceVerifier(backend) + # Both checks must pass + assert v.check_state( + { + "command": "aws lambda get-function-configuration", + "output_contains": "payment-webhook", + "json_path": "$.Timeout", + "expected": 30, + } + ) + + def test_output_contains_pass_json_path_fail(self) -> None: + stdout = json.dumps({"Timeout": 3, "FunctionName": "payment-webhook"}) + backend = _mock_backend({"get-function": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.check_state( + { + "command": "aws lambda get-function-configuration", + "output_contains": "payment-webhook", + "json_path": "$.Timeout", + "expected": 30, + } + ) + + def test_only_json_path_no_expected_still_passes(self) -> None: + # json_path without expected is not evaluated + backend = _mock_backend({"cmd": (True, '{"a":1}', "")}) + v = ResourceVerifier(backend) + assert v.check_state({"command": "aws cmd", "json_path": "$.a"}) + + +# --------------------------------------------------------------------------- +# ResourceVerifier.resource_exists — service verifiers +# --------------------------------------------------------------------------- + + +class TestResourceExistsS3: + def test_bucket_exists(self) -> None: + stdout = json.dumps({"Buckets": [{"Name": "my-bucket"}, {"Name": "other"}]}) + backend = _mock_backend({"list-buckets": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("s3", "my-bucket") + + def test_bucket_missing(self) -> None: + stdout = json.dumps({"Buckets": [{"Name": "other"}]}) + backend = _mock_backend({"list-buckets": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("s3", "my-bucket") + + def test_list_fails(self) -> None: + backend = _mock_backend({"list-buckets": (False, "", "err")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("s3", "demo") + + +class TestResourceExistsDynamoDB: + def test_table_exists(self) -> None: + backend = _mock_backend({"describe-table": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("dynamodb", "orders") + + def test_table_missing(self) -> None: + backend = _mock_backend({"describe-table": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("dynamodb", "orders") + + +class TestResourceExistsLambda: + def test_function_exists(self) -> None: + backend = _mock_backend({"get-function": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("lambda", "processor") + + def test_function_missing(self) -> None: + backend = _mock_backend({"get-function": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("lambda", "processor") + + +class TestResourceExistsSQS: + def test_queue_exists(self) -> None: + backend = _mock_backend({"get-queue-url": (True, "http://...", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("sqs", "my-queue") + + def test_queue_missing(self) -> None: + backend = _mock_backend({"get-queue-url": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("sqs", "my-queue") + + +class TestResourceExistsSNS: + def test_topic_exists(self) -> None: + stdout = json.dumps( + {"Topics": [{"TopicArn": "arn:aws:sns:us-east-1:000000000000:alerts"}]} + ) + backend = _mock_backend({"list-topics": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("sns", "alerts") + + def test_topic_missing(self) -> None: + stdout = json.dumps( + {"Topics": [{"TopicArn": "arn:aws:sns:us-east-1:000000000000:other"}]} + ) + backend = _mock_backend({"list-topics": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("sns", "alerts") + + +class TestResourceExistsIAM: + def test_role_exists(self) -> None: + backend = _mock_backend({"get-role": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("iam", "my-role") + + def test_user_exists(self) -> None: + backend = _mock_backend( + {"get-role": (False, "", ""), "get-user": (True, "{}", "")} + ) + v = ResourceVerifier(backend) + assert v.resource_exists("iam", "deploy-bot") + + def test_policy_exists(self) -> None: + stdout = json.dumps({"Policies": [{"PolicyName": "my-policy"}]}) + backend = _mock_backend( + { + "get-role": (False, "", ""), + "get-user": (False, "", ""), + "list-policies": (True, stdout, ""), + } + ) + v = ResourceVerifier(backend) + assert v.resource_exists("iam", "my-policy") + + def test_iam_not_found(self) -> None: + backend = _mock_backend( + { + "get-role": (False, "", ""), + "get-user": (False, "", ""), + "list-policies": (True, json.dumps({"Policies": []}), ""), + } + ) + v = ResourceVerifier(backend) + assert not v.resource_exists("iam", "ghost") + + +class TestResourceExistsSecretsManager: + def test_secret_exists(self) -> None: + backend = _mock_backend({"describe-secret": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("secretsmanager", "db-creds") + + def test_secret_missing(self) -> None: + backend = _mock_backend({"describe-secret": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("secretsmanager", "db-creds") + + +class TestResourceExistsApiGateway: + def test_api_exists(self) -> None: + stdout = json.dumps({"items": [{"name": "my-api"}]}) + backend = _mock_backend({"get-rest-apis": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("apigateway", "my-api") + + def test_api_missing(self) -> None: + stdout = json.dumps({"items": []}) + backend = _mock_backend({"get-rest-apis": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("apigateway", "my-api") + + +class TestResourceExistsECS: + def test_cluster_exists_active(self) -> None: + stdout = json.dumps({"clusters": [{"clusterName": "prod", "status": "ACTIVE"}]}) + backend = _mock_backend({"describe-clusters": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("ecs", "prod") + + def test_cluster_inactive(self) -> None: + stdout = json.dumps( + {"clusters": [{"clusterName": "prod", "status": "INACTIVE"}]} + ) + backend = _mock_backend({"describe-clusters": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("ecs", "prod") + + def test_cluster_not_found(self) -> None: + backend = _mock_backend({"describe-clusters": (False, "", "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("ecs", "prod") + + +class TestResourceExistsRDS: + def test_instance_exists(self) -> None: + backend = _mock_backend({"describe-db-instances": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("rds", "my-db") + + def test_instance_missing(self) -> None: + backend = _mock_backend({"describe-db-instances": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("rds", "my-db") + + +class TestResourceExistsElastiCache: + def test_cluster_exists(self) -> None: + backend = _mock_backend({"describe-cache-clusters": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("elasticache", "session-cache") + + +class TestResourceExistsRoute53: + def test_zone_exists(self) -> None: + stdout = json.dumps({"HostedZones": [{"Name": "example.com."}]}) + backend = _mock_backend({"list-hosted-zones": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("route53", "example.com") + + def test_zone_trailing_dot_normalized(self) -> None: + stdout = json.dumps({"HostedZones": [{"Name": "example.com."}]}) + backend = _mock_backend({"list-hosted-zones": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("route53", "example.com.") + + +class TestResourceExistsELBv2: + def test_lb_exists(self) -> None: + stdout = json.dumps({"LoadBalancers": [{"LoadBalancerName": "web-alb"}]}) + backend = _mock_backend({"describe-load-balancers": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("elbv2", "web-alb") + + def test_lb_missing(self) -> None: + backend = _mock_backend({"describe-load-balancers": (False, "", "not found")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("elbv2", "web-alb") + + +class TestResourceExistsEFS: + def test_fs_by_creation_token(self) -> None: + stdout = json.dumps( + {"FileSystems": [{"CreationToken": "app-storage", "Tags": []}]} + ) + backend = _mock_backend({"describe-file-systems": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("efs", "app-storage") + + def test_fs_by_tag(self) -> None: + stdout = json.dumps( + { + "FileSystems": [ + { + "CreationToken": "token-123", + "Tags": [{"Key": "Name", "Value": "shared-data"}], + } + ] + } + ) + backend = _mock_backend({"describe-file-systems": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("efs", "shared-data") + + def test_fs_missing(self) -> None: + stdout = json.dumps({"FileSystems": []}) + backend = _mock_backend({"describe-file-systems": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("efs", "nonexistent") + + +class TestResourceExistsCognito: + def test_pool_exists(self) -> None: + stdout = json.dumps({"UserPools": [{"Name": "customer-auth"}]}) + backend = _mock_backend({"list-user-pools": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("cognito-idp", "customer-auth") + + def test_pool_missing(self) -> None: + stdout = json.dumps({"UserPools": []}) + backend = _mock_backend({"list-user-pools": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("cognito-idp", "customer-auth") + + +class TestResourceExistsSSM: + def test_param_exists(self) -> None: + backend = _mock_backend({"get-parameter": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("ssm", "/app/config") + + +class TestResourceExistsEventBridge: + def test_rule_exists(self) -> None: + backend = _mock_backend({"describe-rule": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("events", "nightly-etl") + + +class TestResourceExistsApiGatewayV2: + def test_api_exists(self) -> None: + stdout = json.dumps({"Items": [{"Name": "products-api"}]}) + backend = _mock_backend({"get-apis": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("apigatewayv2", "products-api") + + def test_api_missing(self) -> None: + stdout = json.dumps({"Items": []}) + backend = _mock_backend({"get-apis": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("apigatewayv2", "products-api") + + +class TestResourceExistsCloudFormation: + def test_stack_exists(self) -> None: + backend = _mock_backend({"describe-stacks": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("cloudformation", "vpc-stack") + + +class TestResourceExistsGlue: + def test_database_exists(self) -> None: + backend = _mock_backend({"get-database": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("glue", "analytics-db") + + +class TestResourceExistsEBS: + def test_volume_exists(self) -> None: + stdout = json.dumps({"Volumes": [{"VolumeId": "vol-123"}]}) + backend = _mock_backend({"describe-volumes": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("ebs", "data-volume") + + def test_no_volumes(self) -> None: + stdout = json.dumps({"Volumes": []}) + backend = _mock_backend({"describe-volumes": (True, stdout, "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("ebs", "data-volume") + + +class TestResourceExistsFirehose: + def test_stream_exists(self) -> None: + backend = _mock_backend({"describe-delivery-stream": (True, "{}", "")}) + v = ResourceVerifier(backend) + assert v.resource_exists("firehose", "event-stream") + + +class TestResourceExistsUnknownService: + def test_unknown_service(self) -> None: + backend = _mock_backend({}) + v = ResourceVerifier(backend) + assert not v.resource_exists("unknown-service", "name") + + +class TestResourceExistsInvalidJson: + def test_s3_bad_json(self) -> None: + backend = _mock_backend({"list-buckets": (True, "not-json", "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("s3", "demo") + + def test_sns_bad_json(self) -> None: + backend = _mock_backend({"list-topics": (True, "{bad", "")}) + v = ResourceVerifier(backend) + assert not v.resource_exists("sns", "alerts") diff --git a/tests/test_task_grader.py b/tests/test_task_grader.py new file mode 100644 index 0000000000000000000000000000000000000000..f8419cf0d4671e329ea095c1f7f9a67ee2ca7478 --- /dev/null +++ b/tests/test_task_grader.py @@ -0,0 +1,648 @@ +"""Unit tests for TaskGrader — tests all grading strategies and reward shaping. + +These tests mock AwsBackend/ResourceVerifier so they run without MiniStack. + +Run: + uv run pytest tests/test_task_grader.py -v + docker exec python -m pytest env/tests/test_task_grader.py -v +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from models import ( + SuccessCriteria, + Task, + TaskID, + TaskDifficulty, + ResourceExistsCheck, + StepCriteria, + StateCheck, +) +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker, StepRecord + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_backend() -> MagicMock: + return MagicMock() + + +@pytest.fixture +def grader(mock_backend: MagicMock) -> TaskGrader: + return TaskGrader(mock_backend) + + +@pytest.fixture +def tracker() -> EpisodeTracker: + return EpisodeTracker() + + +def _step(command: str, success: bool = True) -> StepRecord: + return StepRecord( + command=command, success=success, stdout="", stderr="", step_number=0 + ) + + +def _task( + criteria: SuccessCriteria, difficulty: TaskDifficulty = TaskDifficulty.WARMUP +) -> Task: + return Task( + task_id=TaskID(999), + difficulty=difficulty, + description="test task", + success_criteria=criteria, + ) + + +# =================================================================== +# _grade_command_match (warmup tier) +# =================================================================== + + +class TestGradeCommandMatch: + def test_correct_command_achieves( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert result.task_achieved + assert result.reward == 1.0 + + def test_wrong_service_fails( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws ec2 describe-instances") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert not result.task_achieved + + def test_wrong_operation_fails( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 mb s3://bucket") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert not result.task_achieved + + def test_failed_command_not_achieved( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls", success=False) + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert not result.task_achieved + + def test_case_insensitive( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="S3", operation="LS") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert result.task_achieved + + +# =================================================================== +# _grade_resource_creation (beginner tier) +# =================================================================== + + +class TestGradeResourceCreation: + def test_resource_exists_achieves( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + command_contains="s3api", + operation="create-bucket", + resource_exists=ResourceExistsCheck(service="s3", name="my-bucket"), + ) + step = _step("aws s3api create-bucket --bucket my-bucket") + tracker.record_step(step.command, step.success, "", "") + + with patch.object(grader._verifier, "resource_exists", return_value=True): + result = grader.grade( + _task(criteria, TaskDifficulty.BEGINNER), tracker, step + ) + assert result.task_achieved + assert result.reward == 1.0 + assert result.partial_progress == 1.0 + + def test_resource_missing_but_cmd_ok_gives_partial( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + command_contains="s3api", + operation="create-bucket", + resource_exists=ResourceExistsCheck(service="s3", name="my-bucket"), + ) + step = _step("aws s3api create-bucket --bucket my-bucket") + tracker.record_step(step.command, step.success, "", "") + + with patch.object(grader._verifier, "resource_exists", return_value=False): + result = grader.grade( + _task(criteria, TaskDifficulty.BEGINNER), tracker, step + ) + assert not result.task_achieved + assert result.partial_progress == 0.5 + + def test_wrong_command_and_no_resource_gives_zero( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + command_contains="s3api", + operation="create-bucket", + resource_exists=ResourceExistsCheck(service="s3", name="my-bucket"), + ) + step = _step("aws sts get-caller-identity") + tracker.record_step(step.command, step.success, "", "") + + with patch.object(grader._verifier, "resource_exists", return_value=False): + result = grader.grade( + _task(criteria, TaskDifficulty.BEGINNER), tracker, step + ) + assert not result.task_achieved + assert result.partial_progress == 0.0 + + +# =================================================================== +# _grade_multi_step (intermediate/advanced tier) +# =================================================================== + + +class TestGradeMultiStep: + def test_all_steps_completed_achieves( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="data"), + StepCriteria(operation="put-object", resource="data"), + ] + ) + tracker.record_step("aws s3api create-bucket --bucket data", True, "", "") + step = tracker.record_step( + "aws s3api put-object --bucket data --key f", True, "", "" + ) + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + assert result.task_achieved + assert result.reward == 1.0 + + def test_partial_steps_gives_progress( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="data"), + StepCriteria(operation="put-object", resource="data"), + ] + ) + step = tracker.record_step( + "aws s3api create-bucket --bucket data", True, "", "" + ) + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + assert not result.task_achieved + assert result.partial_progress == 0.5 + + def test_ordered_stops_at_first_missing( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-table", resource="orders"), + StepCriteria(operation="put-item", resource="orders"), + StepCriteria(operation="query", resource="orders"), + ] + ) + # Skip step 2, do step 1 and 3 + tracker.record_step( + "aws dynamodb create-table --table-name orders", True, "", "" + ) + step = tracker.record_step( + "aws dynamodb query --table-name orders", True, "", "" + ) + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + assert not result.task_achieved + # Only 1/3 completed because step 2 is missing and ordering is enforced + assert result.partial_progress == pytest.approx(1 / 3) + + def test_services_required_must_be_met( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["iam", "lambda"], + steps=[ + StepCriteria(operation="create-role"), + StepCriteria(operation="create-function", resource="my-fn"), + ], + ) + tracker.record_step("aws iam create-role --role-name r", True, "", "") + step = tracker.record_step( + "aws lambda create-function --function-name my-fn", True, "", "" + ) + result = grader.grade(_task(criteria, TaskDifficulty.ADVANCED), tracker, step) + assert result.task_achieved + + def test_missing_service_prevents_achievement( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["iam", "lambda", "sqs"], + steps=[ + StepCriteria(operation="create-role"), + StepCriteria(operation="create-function", resource="my-fn"), + ], + ) + tracker.record_step("aws iam create-role --role-name r", True, "", "") + step = tracker.record_step( + "aws lambda create-function --function-name my-fn", True, "", "" + ) + result = grader.grade(_task(criteria, TaskDifficulty.ADVANCED), tracker, step) + assert not result.task_achieved # sqs service never used + + def test_empty_steps_not_achieved( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(steps=[]) + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + assert not result.task_achieved + + def test_failed_command_not_counted( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="data"), + ] + ) + step = tracker.record_step( + "aws s3api create-bucket --bucket data", False, "", "error" + ) + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + assert not result.task_achieved + + +# =================================================================== +# _grade_state_checks (expert tier) +# =================================================================== + + +class TestGradeStateChecks: + def test_all_checks_pass_achieves( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["s3"], + state_checks=[ + StateCheck( + command="aws s3api get-bucket-versioning --bucket b", + output_contains="Enabled", + ), + ], + ) + step = tracker.record_step( + "aws s3api put-bucket-versioning --bucket b", True, "", "" + ) + + with patch.object(grader._verifier, "check_state", return_value=True): + result = grader.grade(_task(criteria, TaskDifficulty.EXPERT), tracker, step) + assert result.task_achieved + + def test_failing_check_prevents_achievement( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["s3"], + state_checks=[ + StateCheck(command="cmd1", output_contains="x"), + StateCheck(command="cmd2", output_contains="y"), + ], + ) + step = tracker.record_step("aws s3 ls", True, "", "") + + with patch.object(grader._verifier, "check_state", side_effect=[True, False]): + result = grader.grade(_task(criteria, TaskDifficulty.EXPERT), tracker, step) + assert not result.task_achieved + assert result.partial_progress > 0 # partial credit for 1/2 checks + + def test_services_required_for_state_checks( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["s3", "dynamodb"], + state_checks=[ + StateCheck(command="cmd1", output_contains="ok"), + ], + ) + # Only use s3, not dynamodb + step = tracker.record_step("aws s3 ls", True, "", "") + + with patch.object(grader._verifier, "check_state", return_value=True): + result = grader.grade(_task(criteria, TaskDifficulty.EXPERT), tracker, step) + assert not result.task_achieved # dynamodb service not used + + def test_steps_give_partial_progress( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["s3"], + state_checks=[ + StateCheck(command="cmd1", output_contains="ok"), + ], + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ], + ) + tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + step = tracker.record_step( + "aws s3api put-object --bucket b --key k", True, "", "" + ) + + with patch.object(grader._verifier, "check_state", return_value=True): + result = grader.grade(_task(criteria, TaskDifficulty.EXPERT), tracker, step) + assert result.task_achieved + # Progress: 2/2 steps * 0.7 + 1/1 checks * 0.3 = 1.0 + assert result.partial_progress == 1.0 + + def test_no_state_checks_not_achieved( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + services=["s3"], + state_checks=[], + ) + step = tracker.record_step("aws s3 ls", True, "", "") + # state_checks dispatch requires non-empty; but empty list means 0 checks + # The grader returns state_checks dispatch with all_checks_pass=False + result = grader.grade(_task(criteria, TaskDifficulty.EXPERT), tracker, step) + # Empty state_checks => no criteria matched => falls through to command_match or empty + assert not result.task_achieved + + +# =================================================================== +# _compute_reward (reward shaping) +# =================================================================== + + +class TestComputeReward: + def test_achieved_gives_1_0( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert result.reward == 1.0 + + def test_chaos_bonus(self, grader: TaskGrader, tracker: EpisodeTracker) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step, chaos_occurred=True) + assert result.reward == 1.05 + + def test_hint_decay_on_achieved( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step, hints_used=1) + assert result.reward == pytest.approx(0.85) + + def test_hint_decay_on_achieved_stacks( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step, hints_used=3) + assert result.reward == pytest.approx(0.85**3) + + def test_chaos_plus_hints( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade( + _task(criteria), tracker, step, chaos_occurred=True, hints_used=2 + ) + assert result.reward == pytest.approx(1.05 * 0.85**2) + + def test_failed_command_halves_reward( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="s3", operation="ls") + step = _step("aws ec2 describe-instances", success=False) + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + # Not achieved, no progress, failed command => 0.0 * 0.5 = 0.0 + assert result.reward == 0.0 + + def test_progress_bonus_for_advancing( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ] + ) + # First step — progress goes from 0.0 to 0.5 + step = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + # partial_progress=0.5, progress_delta > 0 => +0.1 bonus + assert result.reward == pytest.approx(0.5 * 0.8 + 0.1) + + def test_no_bonus_for_same_progress( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ] + ) + step = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + # First grade sets previous_progress + grader.grade(_task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step) + # Second grade with same command — no progress advancement + step2 = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step2 + ) + # No progress delta bonus + assert result.reward == pytest.approx(0.5 * 0.8) + + def test_reward_clamped_below_1( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria(command_contains="xyz", operation="nope") + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert result.reward <= 0.99 + + def test_rollback_penalty( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ] + ) + # Create then delete (rollback) + tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + tracker.record_step("aws s3api delete-bucket --bucket b", True, "", "") + step = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + # 2 rollbacks detected (both create-bucket commands pair with delete-bucket) + base = 0.5 * 0.8 + 0.1 # progress + delta bonus + expected = base - 0.1 * 2 # 2 rollback penalties + assert result.reward == pytest.approx(expected) + + def test_idempotent_retry_bonus( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ] + ) + # Failed create with "already exists", then successful next step + tracker.record_step( + "aws s3api create-bucket --bucket b", False, "", "BucketAlreadyOwnedByYou" + ) + step = tracker.record_step( + "aws s3api put-object --bucket b --key k", True, "", "" + ) + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + # Only put-object counted (create-bucket failed), so 0/2 completed (ordered, first fails) + # But idempotent retry gives +0.02 + # Actually: step 1 (create-bucket) failed, so has_executed_operation won't find it + # Ordered: stops at step 1 (not found). progress = 0/2 = 0.0 + # progress_reward = 0.0 * 0.8 + 0.1 (delta bonus if first time) + 0.02 (idempotent) + # Actually delta: 0.0 - 0.0 = 0, no bonus. Also success=True on latest. + assert result.reward >= 0.0 + + +# =================================================================== +# Dispatch logic +# =================================================================== + + +class TestDispatch: + def test_state_checks_takes_priority( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + """state_checks present => uses _grade_state_checks even if steps also present.""" + criteria = SuccessCriteria( + services=["s3"], + state_checks=[StateCheck(command="cmd", output_contains="ok")], + steps=[StepCriteria(operation="create-bucket", resource="b")], + ) + step = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + with patch.object(grader._verifier, "check_state", return_value=True): + result = grader.grade(_task(criteria, TaskDifficulty.EXPERT), tracker, step) + assert "state_checks" in result.reason + + def test_steps_over_resource_exists( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + """steps present => uses _grade_multi_step even if resource_exists also set.""" + criteria = SuccessCriteria( + steps=[StepCriteria(operation="create-bucket", resource="b")], + resource_exists=ResourceExistsCheck(service="s3", name="b"), + ) + step = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + result = grader.grade( + _task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step + ) + assert "multi_step" in result.reason + + def test_resource_exists_over_command_match( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + """resource_exists present => uses _grade_resource_creation.""" + criteria = SuccessCriteria( + command_contains="s3api", + operation="create-bucket", + resource_exists=ResourceExistsCheck(service="s3", name="b"), + ) + step = _step("aws s3api create-bucket --bucket b") + tracker.record_step(step.command, step.success, "", "") + with patch.object(grader._verifier, "resource_exists", return_value=True): + result = grader.grade( + _task(criteria, TaskDifficulty.BEGINNER), tracker, step + ) + assert "resource_creation" in result.reason + + def test_no_criteria_gives_zero( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria() + step = _step("aws s3 ls") + tracker.record_step(step.command, step.success, "", "") + result = grader.grade(_task(criteria), tracker, step) + assert not result.task_achieved + assert "no recognised" in result.reason + + +# =================================================================== +# Progress monotonicity +# =================================================================== + + +class TestProgressMonotonicity: + def test_previous_progress_never_decreases( + self, grader: TaskGrader, tracker: EpisodeTracker + ) -> None: + criteria = SuccessCriteria( + steps=[ + StepCriteria(operation="create-bucket", resource="b"), + StepCriteria(operation="put-object", resource="b"), + ] + ) + # Step 1 gives 0.5 progress + step1 = tracker.record_step("aws s3api create-bucket --bucket b", True, "", "") + grader.grade(_task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step1) + assert tracker.previous_progress == 0.5 + + # Wrong command gives 0.5 progress again (step 2 still incomplete) + step2 = tracker.record_step("aws sts get-caller-identity", True, "", "") + grader.grade(_task(criteria, TaskDifficulty.INTERMEDIATE), tracker, step2) + # previous_progress should NOT decrease + assert tracker.previous_progress == 0.5 diff --git a/tests_tasks/test_advanced_tasks.py b/tests_tasks/test_advanced_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..2416e30da23090280174c9b0ae3777f94fc6130c --- /dev/null +++ b/tests_tasks/test_advanced_tasks.py @@ -0,0 +1,685 @@ +"""Tests for advanced-tier tasks — verifies multi-service, multi-step grading. + +Advanced tasks require the agent to execute ordered commands across multiple AWS +services. The grader checks both step completion and service usage via the +EpisodeTracker. + +Run inside Docker: + docker exec aws-rl-env python -m pytest tests/test_advanced_tasks.py -v +""" + +import json + +import pytest +import yaml +from pathlib import Path + +from models import SuccessCriteria, Task, TaskID, TaskDifficulty, SetupCommand +from server.services.aws_backend import AwsBackend +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker + +TASKS_FILE = ( + Path(__file__).resolve().parent.parent + / "server" + / "services" + / "tasks" + / "advanced.yaml" +) + +_LAMBDA_CODE = "--code S3Bucket=dummy,S3Key=dummy.zip" +_ROLE = "arn:aws:iam::000000000000:role" +_SIMPLE_POLICY = '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":"s3:GetObject","Resource":"*"}]}\'' + + +def _run(backend: AwsBackend, cmd: str) -> tuple[str, bool, str, str]: + """Execute a command and return (cmd, success, stdout, stderr).""" + success, stdout, stderr = backend.execute_command(cmd) + return (cmd, success, stdout, stderr) + + +def _assume(service: str) -> str: + """Build an assume-role-policy-document JSON for a given AWS service.""" + doc = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": service}, + "Action": "sts:AssumeRole", + } + ], + } + ) + return f"'{doc}'" + + +def _execute_task( + task_id: int, backend: AwsBackend +) -> list[tuple[str, bool, str, str]]: + """Execute the full command sequence for a task, returning all results. + + Handles dynamic ID discovery inline — commands are built and executed + sequentially, each using outputs from prior commands as needed. + """ + R: list[tuple[str, bool, str, str]] = [] + run = lambda cmd: R.append(_run(backend, cmd)) or R[-1] # noqa: E731 + + if task_id == 15: + run( + f"aws iam create-role --role-name processor-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name processor --runtime python3.12 --handler index.handler --role {_ROLE}/processor-role {_LAMBDA_CODE}" + ) + run("aws sqs create-queue --queue-name work-items") + run( + "aws lambda create-event-source-mapping --function-name processor --event-source-arn arn:aws:sqs:us-east-1:000000000000:work-items --batch-size 10" + ) + + elif task_id == 16: + run( + "aws dynamodb create-table --table-name products --key-schema AttributeName=product_id,KeyType=HASH --attribute-definitions AttributeName=product_id,AttributeType=S --billing-mode PAY_PER_REQUEST" + ) + run( + f"aws iam create-role --role-name product-api-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name product-api --runtime python3.12 --handler index.handler --role {_ROLE}/product-api-role {_LAMBDA_CODE}" + ) + _, _, api_out, _ = run("aws apigateway create-rest-api --name products-api") + api_id = json.loads(api_out)["id"] + _, _, res_list, _ = run(f"aws apigateway get-resources --rest-api-id {api_id}") + root_id = next( + i["id"] for i in json.loads(res_list)["items"] if i["path"] == "/" + ) + _, _, res_out, _ = run( + f"aws apigateway create-resource --rest-api-id {api_id} --parent-id {root_id} --path-part products" + ) + res_id = json.loads(res_out)["id"] + run( + f"aws apigateway put-method --rest-api-id {api_id} --resource-id {res_id} --http-method GET --authorization-type NONE" + ) + run( + f"aws apigateway put-integration --rest-api-id {api_id} --resource-id {res_id} --http-method GET --type AWS_PROXY --integration-http-method POST --uri arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:000000000000:function:product-api/invocations" + ) + + elif task_id == 17: + run("aws sns create-topic --name order-events") + run("aws sqs create-queue --queue-name shipping-queue") + run("aws sqs create-queue --queue-name billing-queue") + run( + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-events --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:shipping-queue" + ) + run( + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-events --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:billing-queue" + ) + run( + 'aws sns publish --topic-arn arn:aws:sns:us-east-1:000000000000:order-events --message "test order event"' + ) + + elif task_id == 87: + run("aws s3api create-bucket --bucket image-uploads") + run( + f"aws iam create-role --role-name image-resizer-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name image-resizer --runtime python3.12 --handler index.handler --role {_ROLE}/image-resizer-role {_LAMBDA_CODE}" + ) + run( + 'aws s3api put-bucket-notification-configuration --bucket image-uploads --notification-configuration \'{"LambdaFunctionConfigurations":[{"LambdaFunctionArn":"arn:aws:lambda:us-east-1:000000000000:function:image-resizer","Events":["s3:ObjectCreated:*"]}]}\'' + ) + run( + 'aws events put-rule --name image-upload-rule --schedule-expression "rate(1 hour)"' + ) + run( + "aws events put-targets --rule image-upload-rule --targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:image-resizer" + ) + + elif task_id == 88: + run( + f"aws iam create-role --role-name ecs-exec-role --assume-role-policy-document {_assume('ecs-tasks.amazonaws.com')}" + ) + run( + 'aws ecs register-task-definition --family web-app-task --container-definitions \'[{"name":"web","image":"nginx","memory":256,"cpu":128}]\' --requires-compatibilities FARGATE --network-mode awsvpc --cpu 256 --memory 512' + ) + run("aws ecs create-cluster --cluster-name web-cluster") + _, _, tg_out, _ = run( + "aws elbv2 create-target-group --name web-tg --protocol HTTP --port 80 --vpc-id vpc-00000001 --target-type ip" + ) + tg_arn = json.loads(tg_out)["TargetGroups"][0]["TargetGroupArn"] + _, _, lb_out, _ = run( + "aws elbv2 create-load-balancer --name web-alb --subnets subnet-00000001 subnet-00000002" + ) + lb_arn = json.loads(lb_out)["LoadBalancers"][0]["LoadBalancerArn"] + run( + 'aws ec2 create-security-group --group-name ecs-sg --description "ECS tasks"' + ) + run( + f"aws elbv2 create-listener --load-balancer-arn {lb_arn} --protocol HTTP --port 80 --default-actions Type=forward,TargetGroupArn={tg_arn}" + ) + run( + f"aws ecs create-service --cluster web-cluster --service-name web-service --task-definition web-app-task --desired-count 1 --launch-type FARGATE --network-configuration awsvpcConfiguration={{subnets=[subnet-00000001],securityGroups=[sg-00000001]}} --load-balancers targetGroupArn={tg_arn},containerName=web,containerPort=80" + ) + + elif task_id == 89: + run( + "aws dynamodb create-table --table-name orders --key-schema AttributeName=order_id,KeyType=HASH --attribute-definitions AttributeName=order_id,AttributeType=S --billing-mode PAY_PER_REQUEST" + ) + run("aws sqs create-queue --queue-name order-queue") + run("aws sns create-topic --name order-notifications") + run( + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:order-notifications --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:order-queue" + ) + run( + f"aws iam create-role --role-name order-processor-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name order-processor --runtime python3.12 --handler index.handler --role {_ROLE}/order-processor-role {_LAMBDA_CODE}" + ) + run( + "aws lambda create-event-source-mapping --function-name order-processor --event-source-arn arn:aws:sqs:us-east-1:000000000000:order-queue --batch-size 10" + ) + + elif task_id == 90: + run( + 'aws rds create-db-subnet-group --db-subnet-group-name db-subnets --db-subnet-group-description "DB subnets" --subnet-ids subnet-00000001 subnet-00000002' + ) + run( + "aws rds create-db-instance --db-instance-identifier app-db --engine mysql --db-instance-class db.t3.micro --master-username admin --master-user-password Password123" + ) + run( + 'aws secretsmanager create-secret --name db-credentials --secret-string \'{"username":"admin","password":"Password123"}\'' + ) + run( + f"aws iam create-role --role-name secret-rotator-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name secret-rotator --runtime python3.12 --handler index.handler --role {_ROLE}/secret-rotator-role {_LAMBDA_CODE}" + ) + + elif task_id == 91: + run( + 'aws ec2 create-security-group --group-name web-sg --description "HTTP access"' + ) + _, _, tg_out, _ = run( + "aws elbv2 create-target-group --name frontend-tg --protocol HTTP --port 80 --vpc-id vpc-00000001 --target-type ip" + ) + tg_arn = json.loads(tg_out)["TargetGroups"][0]["TargetGroupArn"] + _, _, lb_out, _ = run( + "aws elbv2 create-load-balancer --name frontend-alb --subnets subnet-00000001 subnet-00000002" + ) + lb_arn = json.loads(lb_out)["LoadBalancers"][0]["LoadBalancerArn"] + run( + f"aws elbv2 create-listener --load-balancer-arn {lb_arn} --protocol HTTP --port 80 --default-actions Type=forward,TargetGroupArn={tg_arn}" + ) + _, _, hz_out, _ = run( + "aws route53 create-hosted-zone --name example.internal --caller-reference ref-91" + ) + hz_id = json.loads(hz_out)["HostedZone"]["Id"].split("/")[-1] + batch = json.dumps( + { + "Changes": [ + { + "Action": "CREATE", + "ResourceRecordSet": { + "Name": "example.internal", + "Type": "A", + "TTL": 300, + "ResourceRecords": [{"Value": "1.2.3.4"}], + }, + } + ] + } + ) + run( + f"aws route53 change-resource-record-sets --hosted-zone-id {hz_id} --change-batch '{batch}'" + ) + + elif task_id == 92: + _, _, pool_out, _ = run( + "aws cognito-idp create-user-pool --pool-name app-users" + ) + pool_id = json.loads(pool_out)["UserPool"]["Id"] + run( + f"aws cognito-idp create-user-pool-client --user-pool-id {pool_id} --client-name app-client" + ) + run( + f"aws iam create-role --role-name auth-handler-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name auth-handler --runtime python3.12 --handler index.handler --role {_ROLE}/auth-handler-role {_LAMBDA_CODE}" + ) + _, _, api_out, _ = run( + "aws apigatewayv2 create-api --name auth-api --protocol-type HTTP" + ) + api_id = json.loads(api_out)["ApiId"] + run( + f"aws apigatewayv2 create-authorizer --api-id {api_id} --authorizer-type JWT --name cognito-auth --identity-source $request.header.Authorization --jwt-configuration Issuer=https://cognito-idp.us-east-1.amazonaws.com/{pool_id},Audience={pool_id}" + ) + + elif task_id == 93: + run("aws s3api create-bucket --bucket cfn-templates") + run( + "aws s3api put-object --bucket cfn-templates --key template.yaml --content-type application/x-yaml" + ) + run( + f"aws iam create-role --role-name cfn-deploy-role --assume-role-policy-document {_assume('cloudformation.amazonaws.com')}" + ) + run( + 'aws cloudformation create-stack --stack-name app-stack --template-body \'{"AWSTemplateFormatVersion":"2010-09-09","Resources":{}}\'' + ) + + elif task_id == 94: + run("aws s3api create-bucket --bucket data-lake-raw") + run("aws s3api create-bucket --bucket data-lake-processed") + run( + f"aws iam create-role --role-name glue-etl-role --assume-role-policy-document {_assume('glue.amazonaws.com')}" + ) + run('aws glue create-database --database-input \'{"Name":"analytics-db"}\'') + run( + f'aws glue create-crawler --name raw-data-crawler --role {_ROLE}/glue-etl-role --database-name analytics-db --targets \'{{"S3Targets":[{{"Path":"s3://data-lake-raw/"}}]}}\'' + ) + + elif task_id == 95: + run("aws s3api create-bucket --bucket event-archive") + run( + f"aws iam create-role --role-name firehose-delivery-role --assume-role-policy-document {_assume('firehose.amazonaws.com')}" + ) + run( + "aws firehose create-delivery-stream --delivery-stream-name event-stream --s3-destination-configuration RoleARN=arn:aws:iam::000000000000:role/firehose-delivery-role,BucketARN=arn:aws:s3:::event-archive" + ) + run( + "aws firehose put-record --delivery-stream-name event-stream --record Data=dGVzdCBldmVudA==" + ) + + elif task_id == 96: + run( + f"aws iam create-role --role-name db-cleanup-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name db-cleanup --runtime python3.12 --handler index.handler --role {_ROLE}/db-cleanup-role {_LAMBDA_CODE}" + ) + run( + 'aws events put-rule --name nightly-cleanup --schedule-expression "cron(0 0 * * ? *)"' + ) + run( + "aws events put-targets --rule nightly-cleanup --targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:db-cleanup" + ) + run( + "aws lambda add-permission --function-name db-cleanup --statement-id events-invoke --action lambda:InvokeFunction --principal events.amazonaws.com --source-arn arn:aws:events:us-east-1:000000000000:rule/nightly-cleanup" + ) + + elif task_id == 97: + run( + "aws ssm put-parameter --name app-config-db-host --type String --value db.internal.local" + ) + run( + "aws ssm put-parameter --name app-config-api-key --type String --value sk-test-123" + ) + run( + f"aws iam create-role --role-name config-reader-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name config-reader --runtime python3.12 --handler index.handler --role {_ROLE}/config-reader-role {_LAMBDA_CODE}" + ) + run( + 'aws events put-rule --name config-refresh --schedule-expression "rate(1 hour)"' + ) + run( + "aws events put-targets --rule config-refresh --targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:config-reader" + ) + + elif task_id == 98: + _, _, sg_out, _ = run( + 'aws ec2 create-security-group --group-name cache-sg --description "Redis access"' + ) + sg_id = json.loads(sg_out)["GroupId"] + run( + f"aws ec2 authorize-security-group-ingress --group-id {sg_id} --protocol tcp --port 6379 --cidr 10.0.0.0/16" + ) + run( + 'aws elasticache create-cache-subnet-group --cache-subnet-group-name cache-subnets --cache-subnet-group-description "subnets" --subnet-ids subnet-00000001' + ) + run( + f"aws elasticache create-cache-cluster --cache-cluster-id session-store --engine redis --cache-node-type cache.t3.micro --num-cache-nodes 1 --security-group-ids {sg_id}" + ) + run( + f"aws iam create-policy --policy-name cache-access --policy-document {_SIMPLE_POLICY}" + ) + + elif task_id == 99: + _, _, sg_out, _ = run( + 'aws ec2 create-security-group --group-name efs-sg --description "NFS access"' + ) + sg_id = json.loads(sg_out)["GroupId"] + run( + f"aws ec2 authorize-security-group-ingress --group-id {sg_id} --protocol tcp --port 2049 --cidr 10.0.0.0/16" + ) + _, _, efs_out, _ = run("aws efs create-file-system --creation-token shared-fs") + fs_id = json.loads(efs_out)["FileSystemId"] + run( + f"aws efs create-mount-target --file-system-id {fs_id} --subnet-id subnet-00000001 --security-groups {sg_id}" + ) + run( + f"aws iam create-policy --policy-name efs-access --policy-document {_SIMPLE_POLICY}" + ) + + elif task_id == 100: + run("aws s3api create-bucket --bucket emr-logs") + run("aws s3api create-bucket --bucket emr-output") + run( + f"aws iam create-role --role-name emr-service-role --assume-role-policy-document {_assume('elasticmapreduce.amazonaws.com')}" + ) + run("aws iam create-instance-profile --instance-profile-name emr-ec2-profile") + run( + "aws emr create-cluster --name analytics-cluster --release-label emr-6.15.0 --instance-type m5.xlarge --instance-count 1" + ) + + elif task_id == 101: + _, _, table_out, _ = run( + "aws dynamodb create-table --table-name user-activity --key-schema AttributeName=user_id,KeyType=HASH --attribute-definitions AttributeName=user_id,AttributeType=S --billing-mode PAY_PER_REQUEST --stream-specification StreamEnabled=true,StreamViewType=NEW_AND_OLD_IMAGES" + ) + stream_arn = ( + json.loads(table_out) + .get("TableDescription", {}) + .get( + "LatestStreamArn", + "arn:aws:dynamodb:us-east-1:000000000000:table/user-activity/stream/dummy", + ) + ) + run("aws sqs create-queue --queue-name activity-dlq") + run( + f"aws iam create-role --role-name activity-processor-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name activity-processor --runtime python3.12 --handler index.handler --role {_ROLE}/activity-processor-role {_LAMBDA_CODE}" + ) + run( + f"aws lambda create-event-source-mapping --function-name activity-processor --event-source-arn {stream_arn} --starting-position LATEST" + ) + + elif task_id == 102: + run("aws sns create-topic --name system-alerts") + run("aws sqs create-queue --queue-name alert-archive") + run( + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:system-alerts --protocol sqs --notification-endpoint arn:aws:sqs:us-east-1:000000000000:alert-archive" + ) + run( + f"aws iam create-role --role-name alert-handler-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name alert-handler --runtime python3.12 --handler index.handler --role {_ROLE}/alert-handler-role {_LAMBDA_CODE}" + ) + run( + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:system-alerts --protocol lambda --notification-endpoint arn:aws:lambda:us-east-1:000000000000:function:alert-handler" + ) + run( + 'aws sns publish --topic-arn arn:aws:sns:us-east-1:000000000000:system-alerts --message "test alert"' + ) + + elif task_id == 103: + run( + "aws dynamodb create-table --table-name tasks-table --key-schema AttributeName=task_id,KeyType=HASH --attribute-definitions AttributeName=task_id,AttributeType=S --billing-mode PAY_PER_REQUEST" + ) + run( + f"aws iam create-role --role-name tasks-api-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name tasks-api-handler --runtime python3.12 --handler index.handler --role {_ROLE}/tasks-api-role {_LAMBDA_CODE}" + ) + _, _, api_out, _ = run( + "aws apigatewayv2 create-api --name tasks-api --protocol-type HTTP" + ) + api_id = json.loads(api_out)["ApiId"] + run( + f"aws apigatewayv2 create-integration --api-id {api_id} --integration-type AWS_PROXY --integration-uri arn:aws:lambda:us-east-1:000000000000:function:tasks-api-handler --payload-format-version 2.0" + ) + run(f'aws apigatewayv2 create-route --api-id {api_id} --route-key "GET /tasks"') + + elif task_id == 104: + run("aws s3api create-bucket --bucket secure-input") + run("aws s3api create-bucket --bucket secure-output") + policy = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": "*", + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::secure-input/*", + "Condition": { + "StringNotEquals": { + "s3:x-amz-server-side-encryption": "AES256" + } + }, + } + ], + } + ) + run(f"aws s3api put-bucket-policy --bucket secure-input --policy '{policy}'") + run( + f"aws iam create-role --role-name data-transformer-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name data-transformer --runtime python3.12 --handler index.handler --role {_ROLE}/data-transformer-role {_LAMBDA_CODE}" + ) + + elif task_id == 105: + run( + "aws secretsmanager create-secret --name third-party-api-key --secret-string sk-live-abc123" + ) + run( + f"aws iam create-role --role-name external-caller-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + f"aws lambda create-function --function-name external-caller --runtime python3.12 --handler index.handler --role {_ROLE}/external-caller-role {_LAMBDA_CODE}" + ) + _, _, api_out, _ = run("aws apigateway create-rest-api --name external-api") + api_id = json.loads(api_out)["id"] + _, _, res_list, _ = run(f"aws apigateway get-resources --rest-api-id {api_id}") + root_id = next( + i["id"] for i in json.loads(res_list)["items"] if i["path"] == "/" + ) + _, _, res_out, _ = run( + f"aws apigateway create-resource --rest-api-id {api_id} --parent-id {root_id} --path-part call" + ) + res_id = json.loads(res_out)["id"] + run( + f"aws apigateway put-method --rest-api-id {api_id} --resource-id {res_id} --http-method GET --authorization-type NONE" + ) + run( + f"aws apigateway put-integration --rest-api-id {api_id} --resource-id {res_id} --http-method GET --type AWS_PROXY --integration-http-method POST --uri arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:000000000000:function:external-caller/invocations" + ) + + elif task_id == 106: + run( + f"aws iam create-role --role-name batch-task-role --assume-role-policy-document {_assume('ecs-tasks.amazonaws.com')}" + ) + run("aws ecs create-cluster --cluster-name batch-cluster") + run( + 'aws ecs register-task-definition --family batch-job --container-definitions \'[{"name":"batch","image":"python:3.12","memory":256,"cpu":128}]\' --requires-compatibilities FARGATE --network-mode awsvpc --cpu 256 --memory 512' + ) + run( + 'aws ec2 create-security-group --group-name batch-sg --description "Batch SG"' + ) + run( + "aws ecs run-task --cluster batch-cluster --task-definition batch-job --launch-type FARGATE --network-configuration awsvpcConfiguration={subnets=[subnet-00000001],securityGroups=[sg-00000001]}" + ) + + elif task_id == 107: + run("aws s3api create-bucket --bucket query-results") + run("aws s3api create-bucket --bucket analytics-data") + run('aws glue create-database --database-input \'{"Name":"web-analytics"}\'') + run( + f"aws iam create-policy --policy-name athena-access --policy-document {_SIMPLE_POLICY}" + ) + run( + "aws athena create-work-group --name analytics-team --configuration ResultConfiguration={OutputLocation=s3://query-results/}" + ) + + elif task_id == 108: + run("aws s3api create-bucket --bucket lambda-artifacts") + run( + "aws s3api put-object --bucket lambda-artifacts --key function.zip --content-type application/zip" + ) + run( + f"aws iam create-role --role-name cfn-lambda-role --assume-role-policy-document {_assume('cloudformation.amazonaws.com')}" + ) + run( + f"aws iam create-role --role-name lambda-exec-role --assume-role-policy-document {_assume('lambda.amazonaws.com')}" + ) + run( + 'aws cloudformation create-stack --stack-name lambda-stack --template-body \'{"AWSTemplateFormatVersion":"2010-09-09","Resources":{}}\'' + ) + + return R + + +# All task IDs from the YAML +ALL_TASK_IDS = [ + 15, + 16, + 17, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, +] + + +@pytest.fixture +def backend() -> AwsBackend: + b = AwsBackend() + b.reset_environment() + return b + + +@pytest.fixture +def grader(backend: AwsBackend) -> TaskGrader: + return TaskGrader(backend) + + +@pytest.fixture(scope="module") +def advanced_tasks() -> list[dict]: + with open(TASKS_FILE) as f: + return yaml.safe_load(f) + + +def _build_task(entry: dict) -> Task: + return Task( + task_id=TaskID(entry["task_id"]), + difficulty=TaskDifficulty.ADVANCED, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) if isinstance(cmd, str) else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + ) + + +def test_all_advanced_tasks_have_commands(advanced_tasks: list[dict]) -> None: + """Every advanced task in the YAML must have a corresponding test.""" + missing = [t["task_id"] for t in advanced_tasks if t["task_id"] not in ALL_TASK_IDS] + assert not missing, f"No test commands mapped for task_ids: {missing}" + + +@pytest.mark.parametrize( + "task_id", ALL_TASK_IDS, ids=[f"task_{t}" for t in ALL_TASK_IDS] +) +def test_advanced_task_commands_execute(task_id: int, backend: AwsBackend) -> None: + """All commands must execute successfully against MiniStack.""" + results = _execute_task(task_id, backend) + for i, (cmd, success, stdout, stderr) in enumerate(results): + assert success, ( + f"Command {i + 1}/{len(results)} failed for task {task_id}.\n" + f" Command: {cmd}\n" + f" Stderr: {stderr}" + ) + + +@pytest.mark.parametrize( + "task_id", ALL_TASK_IDS, ids=[f"task_{t}" for t in ALL_TASK_IDS] +) +def test_advanced_task_grading( + task_id: int, + advanced_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Execute full sequence and verify grader marks task as achieved.""" + entry = next((t for t in advanced_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in advanced.yaml" + + task = _build_task(entry) + results = _execute_task(task_id, backend) + + tracker = EpisodeTracker() + for cmd, success, stdout, stderr in results: + step = tracker.record_step(cmd, success, stdout, stderr) + + result = grader.grade(task, tracker, step) + + all_cmds = [r[0] for r in results] + assert result.task_achieved, ( + f"Task {task_id} not achieved.\n" + f" Description: {entry['description']}\n" + f" Commands: {all_cmds}\n" + f" Reason: {result.reason}\n" + f" Reward: {result.reward}" + ) + assert result.reward == 1.0, f"Expected reward=1.0, got {result.reward}" + + +@pytest.mark.parametrize( + "task_id", ALL_TASK_IDS, ids=[f"task_{t}_partial" for t in ALL_TASK_IDS] +) +def test_advanced_task_partial_gives_no_completion( + task_id: int, + advanced_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Executing only the first command should not achieve a multi-step task.""" + entry = next((t for t in advanced_tasks if t["task_id"] == task_id), None) + assert entry is not None + + steps = entry.get("success_criteria", {}).get("steps", []) + if len(steps) < 2: + pytest.skip("Single-step task") + + task = _build_task(entry) + + # Run only the first command + results = _execute_task(task_id, backend) + cmd, success, stdout, stderr = results[0] + tracker = EpisodeTracker() + step = tracker.record_step(cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert not result.task_achieved, ( + f"Task {task_id} should NOT be achieved with only the first command.\n" + f" Command: {cmd}\n Reason: {result.reason}" + ) + assert result.reward < 1.0 diff --git a/tests_tasks/test_beginner_tasks.py b/tests_tasks/test_beginner_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..90fa6efb24df0bdf60791c90790fbf17658f0b9f --- /dev/null +++ b/tests_tasks/test_beginner_tasks.py @@ -0,0 +1,232 @@ +"""Tests for beginner-tier tasks — verifies resource creation and grading. + +Beginner tasks require the agent to create a specific AWS resource. The grader +checks both command matching AND that the resource actually exists in MiniStack +via the ResourceVerifier. + +Each test resets MiniStack, runs the correct create command, and asserts the +grader returns task_achieved=True with reward=1.0. + +Run inside Docker: + docker exec aws-rl-env python -m pytest tests/test_beginner_tasks.py -v +""" + +import pytest +import yaml +from pathlib import Path + +from models import SuccessCriteria, Task, TaskID, TaskDifficulty, SetupCommand +from server.services.aws_backend import AwsBackend +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker + +TASKS_FILE = ( + Path(__file__).resolve().parent.parent + / "server" + / "services" + / "tasks" + / "beginner.yaml" +) + +# Mapping of task_id -> correct AWS CLI command to create the resource +BEGINNER_COMMANDS: dict[int, str] = { + 6: "aws s3api create-bucket --bucket my-test-bucket", + 7: ( + "aws dynamodb create-table --table-name users " + "--key-schema AttributeName=user_id,KeyType=HASH " + "--attribute-definitions AttributeName=user_id,AttributeType=S " + "--billing-mode PAY_PER_REQUEST" + ), + 8: "aws sqs create-queue --queue-name task-queue", + 9: "aws sns create-topic --name notifications", + 10: ( + "aws lambda create-function --function-name hello-world " + "--runtime python3.12 --role arn:aws:iam::000000000000:role/lambda-role " + "--handler index.handler --code S3Bucket=dummy,S3Key=dummy.zip" + ), + 46: ( + "aws iam create-role --role-name lambda-exec-role " + '--assume-role-policy-document \'{"Version":"2012-10-17",' + '"Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},' + '"Action":"sts:AssumeRole"}]}\'' + ), + 47: ( + "aws secretsmanager create-secret --name db-credentials " + '--secret-string \'{"username":"admin","password":"secret123"}\'' + ), + 48: "aws ecs create-cluster --cluster-name web-cluster", + 49: ( + "aws rds create-db-instance --db-instance-identifier app-database " + "--engine mysql --db-instance-class db.t3.micro " + "--master-username admin --master-user-password Password123" + ), + 50: ( + "aws elasticache create-cache-cluster --cache-cluster-id session-cache " + "--engine redis --cache-node-type cache.t3.micro --num-cache-nodes 1" + ), + 51: ( + "aws route53 create-hosted-zone --name example.internal " + "--caller-reference unique-ref-123" + ), + 52: ( + "aws elbv2 create-load-balancer --name web-alb " + "--subnets subnet-00000001 subnet-00000002" + ), + 53: "aws ec2 create-volume --size 20 --availability-zone us-east-1a", + 54: "aws efs create-file-system --creation-token shared-storage", + 55: "aws cognito-idp create-user-pool --pool-name app-users", + 56: ( + "aws ssm put-parameter --name /config/app/database-url " + "--type String --value mysql://localhost:3306/mydb" + ), + 57: 'aws events put-rule --name daily-cleanup --schedule-expression "rate(1 day)"', + 58: ( + "aws cloudformation create-stack --stack-name vpc-stack " + '--template-body \'{"AWSTemplateFormatVersion":"2010-09-09","Resources":{}}\'' + ), + 59: "aws apigateway create-rest-api --name orders-api", + 60: "aws apigatewayv2 create-api --name payments-api --protocol-type HTTP", + 61: 'aws glue create-database --database-input \'{"Name":"analytics-db"}\'', + 62: "aws firehose create-delivery-stream --delivery-stream-name log-stream", + 63: ( + "aws iam create-policy --policy-name s3-read-policy " + '--policy-document \'{"Version":"2012-10-17",' + '"Statement":[{"Effect":"Allow","Action":"s3:GetObject","Resource":"*"}]}\'' + ), + 64: "aws iam create-user --user-name deploy-bot", + 65: ( + "aws lambda create-function --function-name data-processor " + "--runtime python3.12 --handler index.handler " + "--role arn:aws:iam::000000000000:role/lambda-exec-role " + "--code S3Bucket=dummy,S3Key=dummy.zip" + ), +} + + +@pytest.fixture(scope="module") +def backend() -> AwsBackend: + return AwsBackend() + + +@pytest.fixture(scope="module") +def grader(backend: AwsBackend) -> TaskGrader: + return TaskGrader(backend) + + +@pytest.fixture(scope="module") +def beginner_tasks() -> list[dict]: + with open(TASKS_FILE) as f: + return yaml.safe_load(f) + + +def _build_task(entry: dict) -> Task: + """Build a Task model from a raw YAML entry.""" + return Task( + task_id=TaskID(entry["task_id"]), + difficulty=TaskDifficulty.BEGINNER, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) if isinstance(cmd, str) else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + ) + + +def test_all_beginner_tasks_have_commands(beginner_tasks: list[dict]) -> None: + """Every beginner task in the YAML must have a corresponding test command.""" + missing = [ + t["task_id"] for t in beginner_tasks if t["task_id"] not in BEGINNER_COMMANDS + ] + assert not missing, f"No test command mapped for task_ids: {missing}" + + +@pytest.mark.parametrize( + "task_id", + sorted(BEGINNER_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(BEGINNER_COMMANDS.keys())], +) +def test_beginner_task_command_executes( + task_id: int, + backend: AwsBackend, +) -> None: + """The create command must execute successfully against MiniStack.""" + backend.reset_environment() + cmd = BEGINNER_COMMANDS[task_id] + success, stdout, stderr = backend.execute_command(cmd) + assert success, ( + f"Command failed for task {task_id}.\n Command: {cmd}\n Stderr: {stderr}" + ) + + +@pytest.mark.parametrize( + "task_id", + sorted(BEGINNER_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(BEGINNER_COMMANDS.keys())], +) +def test_beginner_task_grading( + task_id: int, + beginner_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Create the resource and verify the grader marks the task as achieved.""" + entry = next((t for t in beginner_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in beginner.yaml" + + # Reset MiniStack for a clean slate + backend.reset_environment() + + task = _build_task(entry) + cmd = BEGINNER_COMMANDS[task_id] + + # Execute the create command + success, stdout, stderr = backend.execute_command(cmd) + assert success, ( + f"Command failed for task {task_id}.\n Command: {cmd}\n Stderr: {stderr}" + ) + + # Grade the step + tracker = EpisodeTracker() + step = tracker.record_step(cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert result.task_achieved, ( + f"Task {task_id} not achieved.\n" + f" Description: {entry['description']}\n" + f" Command: {cmd}\n" + f" Reason: {result.reason}\n" + f" Reward: {result.reward}" + ) + assert result.reward == 1.0, f"Expected reward=1.0, got {result.reward}" + + +@pytest.mark.parametrize( + "task_id", + sorted(BEGINNER_COMMANDS.keys()), + ids=[f"task_{tid}_wrong_cmd" for tid in sorted(BEGINNER_COMMANDS.keys())], +) +def test_beginner_task_rejects_wrong_command( + task_id: int, + beginner_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """A wrong command should not achieve a beginner task.""" + entry = next((t for t in beginner_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in beginner.yaml" + + backend.reset_environment() + task = _build_task(entry) + + # Use a deliberately wrong command (list instead of create) + wrong_cmd = "aws sts get-caller-identity" + success, stdout, stderr = backend.execute_command(wrong_cmd) + tracker = EpisodeTracker() + step = tracker.record_step(wrong_cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert not result.task_achieved, ( + f"Task {task_id} should NOT be achieved with wrong command '{wrong_cmd}'" + ) + assert result.reward < 1.0 diff --git a/tests_tasks/test_drift_tasks.py b/tests_tasks/test_drift_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..8fd4386c79bc918eb7d58732c90154410ee3551e --- /dev/null +++ b/tests_tasks/test_drift_tasks.py @@ -0,0 +1,213 @@ +"""Tests for drift detection tasks (expert tier) — verifies setup and state checks. + +Drift tasks provision correct infrastructure via setup_commands, then the agent +must audit and fix any drifts. This test verifies that: +1. All setup_commands execute successfully against MiniStack +2. After setup (no drift applied), all state_checks pass +3. The grader marks the task as achieved when state is correct + +Run inside Docker: + docker exec python -m pytest tests/test_drift_tasks.py -v +""" + +import json + +import pytest +import yaml +from pathlib import Path + +from models import SuccessCriteria, Task, TaskID, TaskDifficulty, SetupCommand +from server.services.aws_backend import AwsBackend +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker +from server.services.resource_verifier import ResourceVerifier + +TASKS_FILE = ( + Path(__file__).resolve().parent.parent + / "server" + / "services" + / "tasks" + / "drift.yaml" +) + + +@pytest.fixture(scope="module") +def all_drift_tasks() -> list[dict]: + with open(TASKS_FILE) as f: + return yaml.safe_load(f) + + +@pytest.fixture +def backend() -> AwsBackend: + b = AwsBackend() + b.reset_environment() + return b + + +@pytest.fixture +def grader(backend: AwsBackend) -> TaskGrader: + return TaskGrader(backend) + + +def _build_task(entry: dict) -> Task: + return Task( + task_id=TaskID(entry["task_id"]), + difficulty=TaskDifficulty.EXPERT, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) if isinstance(cmd, str) else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + desired_state_spec=entry.get("desired_state_spec"), + possible_drifts=[ + SetupCommand(command=d["command"]) + if isinstance(d, dict) + else SetupCommand(command=d) + for d in entry.get("possible_drifts", []) + ], + ) + + +def _get_task_ids(tasks: list[dict]) -> list[int]: + return [t["task_id"] for t in tasks] + + +# Load task IDs at import time for parametrize +with open(TASKS_FILE) as _f: + _ALL_ENTRIES = yaml.safe_load(_f) + _TASK_IDS = [t["task_id"] for t in _ALL_ENTRIES] + + +# --------------------------------------------------------------------------- +# Test 1: All setup_commands execute successfully +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("task_id", _TASK_IDS, ids=[f"task_{t}" for t in _TASK_IDS]) +def test_drift_setup_commands_execute( + task_id: int, + all_drift_tasks: list[dict], + backend: AwsBackend, +) -> None: + """Every setup_command must succeed against MiniStack.""" + backend.reset_environment() + entry = next(t for t in all_drift_tasks if t["task_id"] == task_id) + setup_cmds = entry.get("setup_commands", []) + + for i, cmd in enumerate(setup_cmds): + success, stdout, stderr = backend.execute_command(cmd) + assert success, ( + f"Setup command {i + 1}/{len(setup_cmds)} failed for task {task_id}.\n" + f" Command: {cmd}\n" + f" Stderr: {stderr}" + ) + + +# --------------------------------------------------------------------------- +# Test 2: After setup, all state_checks pass (no drift applied) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("task_id", _TASK_IDS, ids=[f"task_{t}" for t in _TASK_IDS]) +def test_drift_state_checks_pass_after_setup( + task_id: int, + all_drift_tasks: list[dict], + backend: AwsBackend, +) -> None: + """After running setup_commands, all state_checks must pass.""" + backend.reset_environment() + entry = next(t for t in all_drift_tasks if t["task_id"] == task_id) + verifier = ResourceVerifier(backend) + + # Run setup + for cmd in entry.get("setup_commands", []): + backend.execute_command(cmd) + + # Verify each state_check + state_checks = entry.get("success_criteria", {}).get("state_checks", []) + for i, check in enumerate(state_checks): + passed = verifier.check_state(check) + assert passed, ( + f"State check {i + 1}/{len(state_checks)} failed for task {task_id}.\n" + f" Check: {json.dumps(check, indent=2)}" + ) + + +# --------------------------------------------------------------------------- +# Test 3: Grader marks task as achieved after setup + fix commands +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("task_id", _TASK_IDS, ids=[f"task_{t}" for t in _TASK_IDS]) +def test_drift_grading_after_setup( + task_id: int, + all_drift_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """The grader should mark the task as achieved when state is correct.""" + backend.reset_environment() + entry = next(t for t in all_drift_tasks if t["task_id"] == task_id) + task = _build_task(entry) + + # Run setup commands and record them as the agent's "fix" actions. + # Commands are only run once — the tracker records the initial successful + # provisioning, which satisfies both the state_checks and services requirements. + tracker = EpisodeTracker() + for cmd in entry.get("setup_commands", []): + success, stdout, stderr = backend.execute_command(cmd) + step = tracker.record_step(cmd, success, stdout, stderr) + + result = grader.grade(task, tracker, step) + + assert result.task_achieved, ( + f"Task {task_id} not achieved.\n" + f" Description: {entry['description']}\n" + f" Reason: {result.reason}\n" + f" Reward: {result.reward}" + ) + + +# --------------------------------------------------------------------------- +# Test 4: Each possible drift breaks at least one state_check +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("task_id", _TASK_IDS, ids=[f"task_{t}" for t in _TASK_IDS]) +def test_drift_mutations_break_state( + task_id: int, + all_drift_tasks: list[dict], + backend: AwsBackend, +) -> None: + """Applying each drift mutation should cause at least one state_check to fail.""" + entry = next(t for t in all_drift_tasks if t["task_id"] == task_id) + verifier = ResourceVerifier(backend) + state_checks = entry.get("success_criteria", {}).get("state_checks", []) + drifts = entry.get("possible_drifts", []) + + if not drifts: + pytest.skip("No possible drifts defined") + + for drift in drifts: + drift_cmd = drift["command"] if isinstance(drift, dict) else drift + drift_desc = ( + drift.get("description", drift_cmd) + if isinstance(drift, dict) + else drift_cmd + ) + + # Fresh setup + backend.reset_environment() + for cmd in entry.get("setup_commands", []): + backend.execute_command(cmd) + + # Apply drift + backend.execute_command(drift_cmd) + + # At least one state_check should now fail + all_pass = all(verifier.check_state(check) for check in state_checks) + assert not all_pass, ( + f"Drift did not break any state_check for task {task_id}.\n" + f" Drift: {drift_desc}" + ) diff --git a/tests_tasks/test_expert_tasks.py b/tests_tasks/test_expert_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..bd07dfcfe8adc3a9f5a40a2d938ac54996315433 --- /dev/null +++ b/tests_tasks/test_expert_tasks.py @@ -0,0 +1,812 @@ +"""Tests for expert-tier tasks — verifies SRE incident resolution and security audit grading. + +Expert tasks require setup commands to provision initial (broken/vulnerable) state, +then the agent must diagnose and fix issues via multi-step AWS CLI commands. +The grader uses state_checks as ground truth for task completion. + +Each test resets MiniStack, provisions the setup state, executes the solution +command sequence, and asserts the grader returns task_achieved=True with reward=1.0. + +Run inside Docker: + docker exec -w /app/env aws-rl-env python -m pytest tests/test_expert_tasks.py -v +""" + +import json +import re + +import pytest +import yaml +from pathlib import Path + +from models import SuccessCriteria, Task, TaskID, TaskDifficulty, SetupCommand +from server.services.aws_backend import AwsBackend +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker + +TASKS_FILE = ( + Path(__file__).resolve().parent.parent + / "server" + / "services" + / "tasks" + / "expert.yaml" +) + +# --------------------------------------------------------------------------- +# Solution commands for each expert task — ordered list of AWS CLI commands +# that resolve the SRE incident or pass the security audit. +# Diagnostic commands (list/describe) are included where needed to satisfy +# the services requirement in grading. +# --------------------------------------------------------------------------- + +EXPERT_COMMANDS: dict[int, list[str]] = { + # -- Task 18: SRE — Lambda missing SQS permissions + event source mapping -- + 18: [ + "aws sqs get-queue-url --queue-name incoming-orders", + ( + "aws iam attach-role-policy --role-name broken-lambda-role " + "--policy-arn arn:aws:iam::aws:policy/AmazonSQSFullAccess" + ), + ( + "aws lambda create-event-source-mapping " + "--function-name order-processor " + "--event-source-arn arn:aws:sqs:us-east-1:000000000000:incoming-orders " + "--batch-size 10" + ), + ], + # -- Task 19: SRE — S3 versioning + lifecycle rule ------------------------- + 19: [ + ( + "aws s3api put-bucket-versioning --bucket app-config-store " + "--versioning-configuration Status=Enabled" + ), + ( + "aws s3api put-bucket-lifecycle-configuration --bucket app-config-store " + "--lifecycle-configuration " + '\'{"Rules":[{"ID":"cleanup-old-versions","Status":"Enabled",' + '"NoncurrentVersionExpiration":{"NoncurrentDays":30},' + '"Filter":{"Prefix":""}}]}\'' + ), + ], + # -- Task 20: SRE — DynamoDB throughput + SNS subscription ----------------- + 20: [ + ( + "aws dynamodb update-table --table-name session-store " + "--provisioned-throughput ReadCapacityUnits=50,WriteCapacityUnits=50" + ), + "aws sqs create-queue --queue-name ops-alert-inbox", + ( + "aws sns subscribe " + "--topic-arn arn:aws:sns:us-east-1:000000000000:ops-alerts " + "--protocol sqs " + "--notification-endpoint arn:aws:sqs:us-east-1:000000000000:ops-alert-inbox" + ), + ], + # -- Task 21: Security — Replace overly permissive S3 bucket policy -------- + 21: [ + "aws s3api get-bucket-policy --bucket public-assets", + ( + "aws s3api put-bucket-policy --bucket public-assets " + "--policy " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Principal":{"AWS":"arn:aws:iam::000000000000:role/app-role"},' + '"Action":"s3:GetObject",' + '"Resource":"arn:aws:s3:::public-assets/*"}]}\'' + ), + ], + # -- Task 22: Security — Replace overly broad IAM inline policy ------------ + 22: [ + "aws iam get-role-policy --role-name app-role --policy-name app-access", + ( + "aws iam put-role-policy --role-name app-role " + "--policy-name app-access " + "--policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Action":["dynamodb:GetItem","dynamodb:PutItem"],' + '"Resource":"arn:aws:dynamodb:us-east-1:000000000000:table/users"}]}\'' + ), + ], + # -- Task 23: Security — Move plaintext password to Secrets Manager -------- + 23: [ + ( + "aws secretsmanager create-secret " + "--name data-processor/db-password " + "--secret-string hunter2" + ), + ( + "aws lambda update-function-configuration " + "--function-name data-processor " + "--environment " + "Variables={SECRET_ARN=arn:aws:secretsmanager:us-east-1:000000000000:secret:data-processor/db-password}" + ), + ], + # -- Task 109: SRE — Lambda timeout + CloudWatch alarm --------------------- + 109: [ + ( + "aws lambda update-function-configuration " + "--function-name payment-webhook --timeout 30" + ), + ( + "aws cloudwatch put-metric-alarm --alarm-name payment-webhook-errors " + "--metric-name Errors --namespace AWS/Lambda --statistic Sum " + "--period 60 --evaluation-periods 1 --threshold 5 " + "--comparison-operator GreaterThanThreshold " + "--dimensions Name=FunctionName,Value=payment-webhook" + ), + ], + # -- Task 110: SRE — ECS service role policy + desired count --------------- + 110: [ + ( + "aws iam attach-role-policy --role-name ecs-service-role " + "--policy-arn arn:aws:iam::aws:policy/AmazonECS_FullAccess" + ), + ( + "aws ecs update-service --cluster prod-cluster " + "--service api-service --desired-count 3" + ), + ], + # -- Task 111: SRE — Start RDS + fix security group ----------------------- + 111: [ + "aws rds start-db-instance --db-instance-identifier analytics-db", + ( + "aws ec2 create-security-group --group-name analytics-db-sg-fixed " + '--description "Restricted MySQL access"' + ), + # authorize-security-group-ingress resolved dynamically (needs group-id) + ( + "aws rds modify-db-instance --db-instance-identifier analytics-db " + "--vpc-security-group-ids analytics-db-sg-fixed" + ), + ], + # -- Task 113: SRE — SQS visibility timeout (redrive resolved dynamically) - + 113: [ + ( + "aws sqs set-queue-attributes " + "--queue-url http://localhost:4566/000000000000/order-processing " + "--attributes VisibilityTimeout=120" + ), + # RedrivePolicy resolved dynamically (JSON format issue with shorthand) + ], + # -- Task 114: SRE — Route53 DNS record update (zone-id from setup) -------- + 114: [ + # change-resource-record-sets resolved dynamically (needs zone ID) + ], + # -- Task 115: SRE — ALB target group health check fix (DYNAMIC) ----------- + 115: [ + # Resolved dynamically after setup — needs target group ARN + ], + # -- Task 116: Security — Lambda resource policy fix ----------------------- + 116: [ + "aws iam list-roles", + ( + "aws lambda remove-permission " + "--function-name public-api-handler " + "--statement-id open-access" + ), + ( + "aws lambda add-permission " + "--function-name public-api-handler " + "--statement-id restricted-access " + "--action lambda:InvokeFunction " + "--principal apigateway.amazonaws.com " + "--source-arn arn:aws:execute-api:us-east-1:000000000000:*" + ), + ], + # -- Task 117: Security — S3 encryption + deny unencrypted uploads --------- + 117: [ + ( + "aws s3api put-bucket-encryption --bucket data-lake-raw " + "--server-side-encryption-configuration " + '\'{"Rules":[{"ApplyServerSideEncryptionByDefault":' + '{"SSEAlgorithm":"AES256"}}]}\'' + ), + ( + "aws s3api put-bucket-policy --bucket data-lake-raw " + "--policy " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Deny",' + '"Principal":"*","Action":"s3:PutObject",' + '"Resource":"arn:aws:s3:::data-lake-raw/*",' + '"Condition":{"StringNotEquals":' + '{"s3:x-amz-server-side-encryption":"AES256"}}}]}\'' + ), + ], + # -- Task 118: Security — DynamoDB PITR + TTL ------------------------------ + 118: [ + ( + "aws dynamodb update-continuous-backups " + "--table-name financial-transactions " + "--point-in-time-recovery-specification PointInTimeRecoveryEnabled=true" + ), + ( + "aws dynamodb update-time-to-live " + "--table-name financial-transactions " + "--time-to-live-specification Enabled=true,AttributeName=expiry_timestamp" + ), + ], + # -- Task 119: Security — SSM SecureString + Secrets Manager --------------- + 119: [ + ( + "aws ssm put-parameter --name /app/database/password-secure " + "--value SuperSecret123 --type SecureString" + ), + ( + "aws secretsmanager create-secret " + "--name app/database-credentials " + "--secret-string " + '\'{"username":"admin","password":"SuperSecret123"}\'' + ), + ], + # -- Task 120: Security — IAM user managed + inline policy fix ------------ + 120: [ + ( + "aws iam detach-user-policy --user-name deploy-bot " + "--policy-arn arn:aws:iam::aws:policy/IAMFullAccess" + ), + ( + "aws iam delete-user-policy --user-name deploy-bot " + "--policy-name admin-access" + ), + ( + "aws iam put-user-policy --user-name deploy-bot " + "--policy-name deploy-only " + "--policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Action":["s3:PutObject","codedeploy:*"],' + '"Resource":"*"}]}\'' + ), + ], + # -- Task 121: SRE — EventBridge rule enable + Lambda target --------------- + 121: [ + "aws lambda get-function --function-name etl-runner", + ( + "aws events put-rule --name nightly-etl-trigger " + '--schedule-expression "cron(0 2 * * ? *)" ' + "--state ENABLED" + ), + ( + "aws events put-targets --rule nightly-etl-trigger " + "--targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:etl-runner" + ), + ], + # -- Task 122: SRE — Firehose delivery stream prefix fix ------------------- + 122: [ + "aws s3api head-bucket --bucket clickstream-archive", + ( + "aws firehose delete-delivery-stream " + "--delivery-stream-name clickstream-delivery" + ), + ( + "aws firehose create-delivery-stream " + "--delivery-stream-name clickstream-delivery " + "--s3-destination-configuration " + '\'{"RoleARN":"arn:aws:iam::000000000000:role/firehose-role",' + '"BucketARN":"arn:aws:s3:::clickstream-archive",' + '"Prefix":"clickstream/year=!{timestamp:yyyy}/month=!{timestamp:MM}/"}\'' + ), + ], + # -- Task 123: SRE — SNS subscription DLQ + retention (DYNAMIC) ------------ + 123: [ + "aws sqs create-queue --queue-name order-notifications-dlq", + ( + "aws sqs set-queue-attributes " + "--queue-url http://localhost:4566/000000000000/order-notifications-dlq " + "--attributes MessageRetentionPeriod=1209600" + ), + # Dynamic: set-subscription-attributes resolved after setup + ], + # -- Task 124: Security — Encrypted EFS + NFS security group --------------- + 124: [ + ( + "aws efs create-file-system --creation-token shared-data-encrypted " + "--encrypted --tags Key=Name,Value=shared-data-encrypted" + ), + ( + "aws ec2 create-security-group --group-name efs-mount-sg " + '--description "NFS access for EFS"' + ), + # authorize-security-group-ingress resolved dynamically (needs group-id) + ], + # -- Task 125: SRE — Glue job script location fix -------------------------- + 125: [ + ( + "aws s3api head-object --bucket glue-scripts-bucket " + "--key scripts/daily-transform.py" + ), + ( + "aws glue update-job --job-name daily-transform " + "--job-update " + '\'{"Role":"arn:aws:iam::000000000000:role/glue-role",' + '"Command":{"Name":"glueetl",' + '"ScriptLocation":"s3://glue-scripts-bucket/scripts/daily-transform.py",' + '"PythonVersion":"3"}}\'' + ), + ], + # -- Task 126: Security — Cognito password policy fix (pool-id dynamic) ---- + 126: [ + # update-user-pool resolved dynamically (needs pool ID from setup) + ], + # -- Task 127: SRE — CloudFormation stack recovery ------------------------- + 127: [ + "aws s3api create-bucket --bucket legacy-data-backup", + "aws cloudformation delete-stack --stack-name legacy-infra", + ( + "aws cloudformation create-stack --stack-name legacy-infra-v2 " + "--template-body " + '\'{"AWSTemplateFormatVersion":"2010-09-09","Resources":{"Table":' + '{"Type":"AWS::DynamoDB::Table","Properties":{"TableName":"legacy-config",' + '"AttributeDefinitions":[{"AttributeName":"id","AttributeType":"S"}],' + '"KeySchema":[{"AttributeName":"id","KeyType":"HASH"}],' + '"BillingMode":"PAY_PER_REQUEST"}}}}\'' + ), + ], +} + +# Tasks that need dynamic command resolution from setup state +_DYNAMIC_TASK_IDS = {111, 113, 114, 115, 123, 124, 126} + +# --------------------------------------------------------------------------- +# MiniStack Compatibility — patching setup commands +# --------------------------------------------------------------------------- + + +def _patch_setup_command(cmd: str, state: dict[str, str]) -> str: + """Patch setup commands for MiniStack compatibility.""" + # Replace hardcoded Route53 zone-001 with tracked zone ID + if "zone-001" in cmd and "route53_zone_id" in state: + cmd = cmd.replace("zone-001", state["route53_zone_id"]) + + # Replace --group-name with --group-id for authorize-security-group-ingress + if "authorize-security-group-ingress" in cmd: + for key, val in state.items(): + if key.startswith("sg_"): + group_name = key[3:] + if f"--group-name {group_name}" in cmd: + cmd = cmd.replace( + f"--group-name {group_name}", + f"--group-id {val}", + ) + + return cmd + + +def _track_state(cmd: str, stdout: str, state: dict[str, str]) -> None: + """Track dynamic IDs from command outputs for subsequent commands.""" + try: + data = json.loads(stdout) if stdout.strip() else {} + except json.JSONDecodeError: + return + + # Track Route53 hosted zone ID + if "create-hosted-zone" in cmd and isinstance(data, dict): + hz = data.get("HostedZone", {}) + zone_id = hz.get("Id", "") + if "/" in zone_id: + zone_id = zone_id.split("/")[-1] + if zone_id: + state["route53_zone_id"] = zone_id + + # Track security group IDs + if "create-security-group" in cmd and isinstance(data, dict): + group_id = data.get("GroupId", "") + if group_id: + match = re.search(r"--group-name\s+(\S+)", cmd) + if match: + state[f"sg_{match.group(1)}"] = group_id + + # Track Cognito user pool ID + if "create-user-pool" in cmd and isinstance(data, dict): + pool = data.get("UserPool", {}) + pool_id = pool.get("Id", "") + if pool_id: + state["cognito_pool_id"] = pool_id + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _execute_setup( + task_entry: dict, backend: AwsBackend +) -> tuple[list[tuple[str, bool, str, str]], dict[str, str]]: + """Execute setup commands with patching; return results and tracked state.""" + results: list[tuple[str, bool, str, str]] = [] + state: dict[str, str] = {} + + for cmd in task_entry.get("setup_commands", []): + cmd = _patch_setup_command(cmd, state) + success, stdout, stderr = backend.execute_command(cmd) + results.append((cmd, success, stdout, stderr)) + if success: + _track_state(cmd, stdout, state) + + return results, state + + +def _resolve_dynamic_commands( + task_id: int, backend: AwsBackend, state: dict[str, str] +) -> list[str]: + """Generate commands that depend on dynamic IDs from setup state.""" + if task_id == 111: + # authorize-security-group-ingress needs group-id + sg_id = state.get("sg_analytics-db-sg-fixed", "") + if not sg_id: + # Try to get it from the create output + _, stdout, _ = backend.execute_command( + "aws ec2 describe-security-groups --group-names analytics-db-sg-fixed" + ) + try: + data = json.loads(stdout) + sg_id = data["SecurityGroups"][0]["GroupId"] + except (json.JSONDecodeError, KeyError, IndexError): + sg_id = "" + return [ + f"aws ec2 authorize-security-group-ingress " + f"--group-id {sg_id} " + f"--protocol tcp --port 3306 --cidr 10.0.1.0/24" + ] + + if task_id == 113: + # RedrivePolicy needs JSON format to avoid shorthand parsing issues + redrive = json.dumps( + { + "deadLetterTargetArn": "arn:aws:sqs:us-east-1:000000000000:order-processing-dlq", + "maxReceiveCount": "5", + } + ) + attrs = json.dumps({"RedrivePolicy": redrive}) + return [ + f"aws sqs set-queue-attributes " + f"--queue-url http://localhost:4566/000000000000/order-processing " + f"--attributes '{attrs}'" + ] + + if task_id == 114: + # Route53 zone-id from setup + zone_id = state.get("route53_zone_id", "zone-001") + change_batch = json.dumps( + { + "Changes": [ + { + "Action": "UPSERT", + "ResourceRecordSet": { + "Name": "api.example.com", + "Type": "A", + "TTL": 300, + "ResourceRecords": [{"Value": "10.0.1.50"}], + }, + } + ] + } + ) + return [ + f"aws route53 change-resource-record-sets " + f"--hosted-zone-id {zone_id} " + f"--change-batch '{change_batch}'" + ] + + if task_id == 115: + # Need target group ARN for modify-target-group + success, stdout, _ = backend.execute_command( + "aws elbv2 describe-target-groups --names web-targets" + ) + try: + data = json.loads(stdout) + tg_arn = data["TargetGroups"][0]["TargetGroupArn"] + except (json.JSONDecodeError, KeyError, IndexError): + tg_arn = "unknown" + return [ + f"aws elbv2 modify-target-group --target-group-arn {tg_arn} " + f"--health-check-path /health --health-check-port 80 " + f"--health-check-interval-seconds 15 --healthy-threshold-count 2" + ] + + if task_id == 123: + # Need subscription ARN for set-subscription-attributes + success, stdout, _ = backend.execute_command( + "aws sns list-subscriptions-by-topic " + "--topic-arn arn:aws:sns:us-east-1:000000000000:order-notifications" + ) + try: + data = json.loads(stdout) + sub_arn = data["Subscriptions"][0]["SubscriptionArn"] + except (json.JSONDecodeError, KeyError, IndexError): + sub_arn = "unknown" + redrive = json.dumps( + { + "deadLetterTargetArn": "arn:aws:sqs:us-east-1:000000000000:order-notifications-dlq" + } + ) + return [ + f"aws sns set-subscription-attributes --subscription-arn {sub_arn} " + f"--attribute-name RedrivePolicy " + f"--attribute-value '{redrive}'" + ] + + if task_id == 124: + # authorize-security-group-ingress needs group-id + sg_id = state.get("sg_efs-mount-sg", "") + if not sg_id: + _, stdout, _ = backend.execute_command( + "aws ec2 describe-security-groups --group-names efs-mount-sg" + ) + try: + data = json.loads(stdout) + sg_id = data["SecurityGroups"][0]["GroupId"] + except (json.JSONDecodeError, KeyError, IndexError): + sg_id = "" + return [ + f"aws ec2 authorize-security-group-ingress " + f"--group-id {sg_id} " + f"--protocol tcp --port 2049 --cidr 10.0.2.0/24" + ] + + if task_id == 126: + # Cognito user-pool-id from setup + pool_id = state.get("cognito_pool_id", "us-east-1_customer-auth") + policies = json.dumps( + { + "PasswordPolicy": { + "MinimumLength": 12, + "RequireUppercase": True, + "RequireLowercase": True, + "RequireNumbers": True, + "RequireSymbols": True, + "TemporaryPasswordValidityDays": 1, + } + } + ) + return [ + f"aws cognito-idp update-user-pool " + f"--user-pool-id {pool_id} " + f"--policies '{policies}'" + ] + + return [] + + +def _execute_all_commands( + task_id: int, backend: AwsBackend, state: dict[str, str] | None = None +) -> list[tuple[str, bool, str, str]]: + """Execute static + dynamic solution commands, return all (cmd, ok, out, err).""" + if state is None: + state = {} + + static_cmds = EXPERT_COMMANDS[task_id] + results: list[tuple[str, bool, str, str]] = [] + + for cmd in static_cmds: + success, stdout, stderr = backend.execute_command(cmd) + results.append((cmd, success, stdout, stderr)) + # Track security group IDs from solution commands too + if success: + _track_state(cmd, stdout, state) + + if task_id in _DYNAMIC_TASK_IDS: + extra_cmds = _resolve_dynamic_commands(task_id, backend, state) + for cmd in extra_cmds: + success, stdout, stderr = backend.execute_command(cmd) + results.append((cmd, success, stdout, stderr)) + + return results + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def backend() -> AwsBackend: + return AwsBackend() + + +@pytest.fixture(scope="module") +def grader(backend: AwsBackend) -> TaskGrader: + return TaskGrader(backend) + + +@pytest.fixture(scope="module") +def expert_tasks() -> list[dict]: + with open(TASKS_FILE) as f: + return yaml.safe_load(f) + + +def _build_task(entry: dict, state: dict[str, str] | None = None) -> Task: + """Build a Task model, patching state_check commands with dynamic IDs.""" + task = Task( + task_id=TaskID(entry["task_id"]), + difficulty=TaskDifficulty.EXPERT, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) if isinstance(cmd, str) else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + ) + + # Patch state_check commands with dynamic IDs from setup + if state: + for check in task.success_criteria.state_checks: + if "route53_zone_id" in state and "zone-001" in check.command: + check.command = check.command.replace( + "zone-001", state["route53_zone_id"] + ) + if "cognito_pool_id" in state: + pool_id = state["cognito_pool_id"] + check.command = check.command.replace( + "us-east-1_customer-auth", pool_id + ) + + return task + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +def test_all_expert_tasks_have_commands(expert_tasks: list[dict]) -> None: + """Every expert task in the YAML must have a corresponding test command sequence.""" + missing = [ + t["task_id"] for t in expert_tasks if t["task_id"] not in EXPERT_COMMANDS + ] + assert not missing, f"No test commands mapped for task_ids: {missing}" + + +@pytest.mark.parametrize( + "task_id", + sorted(EXPERT_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(EXPERT_COMMANDS.keys())], +) +def test_expert_task_setup_executes( + task_id: int, + expert_tasks: list[dict], + backend: AwsBackend, +) -> None: + """All setup commands must execute successfully to provision initial state.""" + entry = next((t for t in expert_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in expert.yaml" + + backend.reset_environment() + results, _ = _execute_setup(entry, backend) + for i, (cmd, success, stdout, stderr) in enumerate(results): + assert success, ( + f"Setup command {i + 1}/{len(results)} failed for task {task_id}.\n" + f" Command: {cmd}\n" + f" Stderr: {stderr}" + ) + + +@pytest.mark.parametrize( + "task_id", + sorted(EXPERT_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(EXPERT_COMMANDS.keys())], +) +def test_expert_task_commands_execute( + task_id: int, + expert_tasks: list[dict], + backend: AwsBackend, +) -> None: + """All solution commands must execute successfully after setup.""" + entry = next((t for t in expert_tasks if t["task_id"] == task_id), None) + assert entry is not None + + backend.reset_environment() + _, state = _execute_setup(entry, backend) + results = _execute_all_commands(task_id, backend, state) + for i, (cmd, success, stdout, stderr) in enumerate(results): + assert success, ( + f"Command {i + 1}/{len(results)} failed for task {task_id}.\n" + f" Command: {cmd}\n" + f" Stderr: {stderr}" + ) + + +@pytest.mark.parametrize( + "task_id", + sorted(EXPERT_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(EXPERT_COMMANDS.keys())], +) +def test_expert_task_grading( + task_id: int, + expert_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Execute setup + full solution and verify the grader marks the task as achieved.""" + entry = next((t for t in expert_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in expert.yaml" + + backend.reset_environment() + _, state = _execute_setup(entry, backend) + task = _build_task(entry, state) + results = _execute_all_commands(task_id, backend, state) + + tracker = EpisodeTracker() + for cmd, success, stdout, stderr in results: + step = tracker.record_step(cmd, success, stdout, stderr) + + result = grader.grade(task, tracker, step) + + all_cmds = [r[0] for r in results] + assert result.task_achieved, ( + f"Task {task_id} not achieved.\n" + f" Description: {entry['description']}\n" + f" Commands: {all_cmds}\n" + f" Reason: {result.reason}\n" + f" Reward: {result.reward}" + ) + assert result.reward == 1.0, f"Expected reward=1.0, got {result.reward}" + + +@pytest.mark.parametrize( + "task_id", + sorted(EXPERT_COMMANDS.keys()), + ids=[f"task_{tid}_setup_only" for tid in sorted(EXPERT_COMMANDS.keys())], +) +def test_expert_task_setup_only_gives_no_completion( + task_id: int, + expert_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Running only setup commands (no agent fix actions) should not achieve the task.""" + entry = next((t for t in expert_tasks if t["task_id"] == task_id), None) + assert entry is not None + + backend.reset_environment() + _, state = _execute_setup(entry, backend) + task = _build_task(entry, state) + + # Agent does a no-op command to produce a StepRecord + tracker = EpisodeTracker() + success, stdout, stderr = backend.execute_command("aws sts get-caller-identity") + step = tracker.record_step("aws sts get-caller-identity", success, stdout, stderr) + + result = grader.grade(task, tracker, step) + assert not result.task_achieved, ( + f"Task {task_id} should NOT be achieved with only setup + no-op.\n" + f" Reason: {result.reason}" + ) + assert result.reward < 1.0 + + +@pytest.mark.parametrize( + "task_id", + sorted(EXPERT_COMMANDS.keys()), + ids=[f"task_{tid}_partial" for tid in sorted(EXPERT_COMMANDS.keys())], +) +def test_expert_task_partial_gives_no_completion( + task_id: int, + expert_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Executing only the first solution command should not achieve a multi-step task.""" + entry = next((t for t in expert_tasks if t["task_id"] == task_id), None) + assert entry is not None + + state_checks = entry.get("success_criteria", {}).get("state_checks", []) + if len(state_checks) < 2: + pytest.skip("Single state-check task — partial test not applicable") + + static_cmds = EXPERT_COMMANDS[task_id] + if len(static_cmds) < 1: + pytest.skip("No static commands — dynamic-only task") + + backend.reset_environment() + _, state = _execute_setup(entry, backend) + task = _build_task(entry, state) + + cmd = static_cmds[0] + success, stdout, stderr = backend.execute_command(cmd) + tracker = EpisodeTracker() + step = tracker.record_step(cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert not result.task_achieved, ( + f"Task {task_id} should NOT be achieved with only the first command.\n" + f" Command: {cmd}\n" + f" Reason: {result.reason}" + ) + assert result.reward < 1.0 diff --git a/tests_tasks/test_intermediate_tasks.py b/tests_tasks/test_intermediate_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..1031b8f7284b0764a9bfe2a2b838f69ec2c8d97b --- /dev/null +++ b/tests_tasks/test_intermediate_tasks.py @@ -0,0 +1,476 @@ +"""Tests for intermediate-tier tasks — verifies multi-step command sequences and grading. + +Intermediate tasks require the agent to execute multiple AWS CLI commands in order. +The grader checks that each step's operation + resource has been executed successfully +via the EpisodeTracker. + +Each test resets MiniStack, executes the full command sequence, and asserts the grader +returns task_achieved=True with reward=1.0. + +Run inside Docker: + docker exec aws-rl-env python -m pytest tests/test_intermediate_tasks.py -v +""" + +import json + +import pytest +import yaml +from pathlib import Path + +from models import SuccessCriteria, Task, TaskID, TaskDifficulty, SetupCommand +from server.services.aws_backend import AwsBackend +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker + +TASKS_FILE = ( + Path(__file__).resolve().parent.parent + / "server" + / "services" + / "tasks" + / "intermediate.yaml" +) + +# Mapping of task_id -> ordered list of AWS CLI commands to complete the task +INTERMEDIATE_COMMANDS: dict[int, list[str]] = { + 11: [ + "aws s3api create-bucket --bucket data-pipeline", + "aws s3api put-object --bucket data-pipeline --key test.txt --content-type text/plain", + ], + 12: [ + ( + "aws dynamodb create-table --table-name orders " + "--key-schema AttributeName=order_id,KeyType=HASH " + "--attribute-definitions AttributeName=order_id,AttributeType=S " + "--billing-mode PAY_PER_REQUEST" + ), + ( + "aws dynamodb put-item --table-name orders " + '--item \'{"order_id":{"S":"001"},"status":{"S":"pending"}}\'' + ), + ], + 13: [ + "aws sns create-topic --name alerts", + "aws sqs create-queue --queue-name alert-inbox", + ( + "aws sns subscribe --topic-arn arn:aws:sns:us-east-1:000000000000:alerts " + "--protocol sqs " + "--notification-endpoint arn:aws:sqs:us-east-1:000000000000:alert-inbox" + ), + ], + 14: [ + ( + "aws iam create-role --role-name lambda-exec-role " + "--assume-role-policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}\'' + ), + ( + "aws iam attach-role-policy --role-name lambda-exec-role " + "--policy-arn arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ), + ], + 66: [ + "aws s3api create-bucket --bucket app-assets", + ( + "aws iam create-policy --policy-name app-assets-read-policy " + "--policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Action":"s3:GetObject","Resource":"arn:aws:s3:::app-assets/*"}]}\'' + ), + ], + 67: [ + ( + "aws dynamodb create-table --table-name user-sessions " + "--key-schema AttributeName=session_id,KeyType=HASH " + "--attribute-definitions AttributeName=session_id,AttributeType=S " + "--billing-mode PAY_PER_REQUEST" + ), + "aws s3api create-bucket --bucket session-exports", + ], + 68: [ + ( + "aws iam create-role --role-name data-processor-role " + "--assume-role-policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}\'' + ), + ( + "aws lambda create-function --function-name data-processor " + "--runtime python3.12 --handler index.handler " + "--role arn:aws:iam::000000000000:role/data-processor-role " + "--code S3Bucket=dummy,S3Key=dummy.zip" + ), + ], + 69: [ + "aws sqs create-queue --queue-name order-events", + "aws sns create-topic --name order-notifications", + ( + "aws sns subscribe " + "--topic-arn arn:aws:sns:us-east-1:000000000000:order-notifications " + "--protocol sqs " + "--notification-endpoint arn:aws:sqs:us-east-1:000000000000:order-events" + ), + ], + 70: [ + ( + "aws secretsmanager create-secret --name db-credentials " + '--secret-string \'{"username":"admin","password":"secret123"}\'' + ), + ( + "aws iam create-role --role-name secret-reader-role " + "--assume-role-policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}\'' + ), + ], + 71: [ + ( + "aws ssm put-parameter --name /app/config/db-host " + "--type String --value db.internal.local" + ), + ( + "aws lambda create-function --function-name config-loader " + "--runtime python3.12 --handler index.handler " + "--role arn:aws:iam::000000000000:role/lambda-exec-role " + "--code S3Bucket=dummy,S3Key=dummy.zip" + ), + ], + 72: [ + ( + "aws lambda create-function --function-name scheduled-task " + "--runtime python3.12 --handler index.handler " + "--role arn:aws:iam::000000000000:role/lambda-exec-role " + "--code S3Bucket=dummy,S3Key=dummy.zip" + ), + 'aws events put-rule --name every-five-minutes --schedule-expression "rate(5 minutes)"', + ( + "aws events put-targets --rule every-five-minutes " + "--targets Id=1,Arn=arn:aws:lambda:us-east-1:000000000000:function:scheduled-task" + ), + ], + 73: [ + ( + "aws iam create-role --role-name ecs-task-role " + "--assume-role-policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Principal":{"Service":"ecs-tasks.amazonaws.com"},"Action":"sts:AssumeRole"}]}\'' + ), + ( + "aws iam attach-role-policy --role-name ecs-task-role " + "--policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + ), + ], + 74: [ + ( + "aws secretsmanager create-secret --name rds-master-password " + "--secret-string " + '\'{"host":"db.local","port":"3306","username":"admin","password":"secret"}\'' + ), + ( + "aws rds create-db-instance --db-instance-identifier app-database " + "--engine mysql --db-instance-class db.t3.micro " + "--master-username admin --master-user-password secret" + ), + ], + 75: [ + ( + "aws elbv2 create-target-group --name web-targets " + "--protocol HTTP --port 80 --vpc-id vpc-00000001" + ), + ( + "aws route53 create-hosted-zone --name app.example.com " + "--caller-reference unique-ref-75" + ), + ], + 76: [ + "aws cognito-idp create-user-pool --pool-name app-users", + # second command placeholder — needs dynamic user-pool-id (see DYNAMIC_TASKS) + ], + 77: [ + "aws efs create-file-system --creation-token app-storage", + ( + "aws ec2 create-security-group --group-name efs-mount-sg " + '--description "Allow NFS access for EFS mount"' + ), + ], + 78: [ + "aws ec2 create-volume --size 20 --availability-zone us-east-1a --volume-type gp3 " + "--tag-specifications ResourceType=volume,Tags=[{Key=Name,Value=data-volume}]", + # second command placeholder — needs dynamic volume-id (see DYNAMIC_TASKS) + ], + 79: [ + ( + "aws elasticache create-cache-subnet-group " + "--cache-subnet-group-name cache-subnets " + '--cache-subnet-group-description "Cache subnets" ' + "--subnet-ids subnet-00000001 subnet-00000002" + ), + ( + "aws elasticache create-cache-cluster --cache-cluster-id session-cache " + "--engine redis --cache-node-type cache.t3.micro --num-cache-nodes 1" + ), + ], + 80: [ + 'aws glue create-database --database-input \'{"Name":"analytics-db"}\'', + ( + "aws glue create-crawler --name raw-data-crawler " + "--role arn:aws:iam::000000000000:role/glue-role " + "--database-name analytics-db " + '--targets \'{"S3Targets":[{"Path":"s3://data-bucket/raw/"}]}\'' + ), + ], + 81: [ + ( + "aws cloudformation create-stack --stack-name vpc-stack " + '--template-body \'{"AWSTemplateFormatVersion":"2010-09-09","Resources":{}}\'' + ), + "aws cloudformation describe-stacks --stack-name vpc-stack", + ], + 82: [ + "aws apigatewayv2 create-api --name products-api --protocol-type HTTP", + # second command placeholder — needs dynamic api-id (see DYNAMIC_TASKS) + ], + 83: [ + "aws s3api create-bucket --bucket firehose-delivery", + ( + "aws firehose create-delivery-stream --delivery-stream-name event-stream " + "--s3-destination-configuration " + "RoleARN=arn:aws:iam::000000000000:role/firehose-role," + "BucketARN=arn:aws:s3:::firehose-delivery" + ), + ], + 84: [ + "aws sqs create-queue --queue-name task-queue", + # second command placeholder — needs dynamic queue-url (see DYNAMIC_TASKS) + ], + 85: [ + ( + "aws dynamodb create-table --table-name products " + "--key-schema AttributeName=product_id,KeyType=HASH " + "AttributeName=category,KeyType=RANGE " + "--attribute-definitions AttributeName=product_id,AttributeType=S " + "AttributeName=category,AttributeType=S " + "--billing-mode PAY_PER_REQUEST" + ), + ( + "aws dynamodb put-item --table-name products " + '--item \'{"product_id":{"S":"P001"},"category":{"S":"electronics"},' + '"name":{"S":"Wireless Mouse"}}\'' + ), + ], + 86: [ + ( + "aws iam create-role --role-name firehose-delivery-role " + "--assume-role-policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Principal":{"Service":"firehose.amazonaws.com"},"Action":"sts:AssumeRole"}]}\'' + ), + ( + "aws iam create-policy --policy-name s3-write-policy " + "--policy-document " + '\'{"Version":"2012-10-17","Statement":[{"Effect":"Allow",' + '"Action":"s3:PutObject","Resource":"*"}]}\'' + ), + ( + "aws iam attach-role-policy --role-name firehose-delivery-role " + "--policy-arn arn:aws:iam::000000000000:policy/s3-write-policy" + ), + ], +} + + +def _resolve_dynamic_commands(task_id: int, outputs: list[str]) -> list[str]: + """Generate additional commands for tasks that need dynamic IDs from prior outputs. + + Returns extra commands to append after the static ones have run. + """ + if task_id == 76: + # create-user-pool-client needs the user-pool-id from create-user-pool output + data = json.loads(outputs[0]) + pool_id = data["UserPool"]["Id"] + return [ + f"aws cognito-idp create-user-pool-client --user-pool-id {pool_id} " + f"--client-name web-app-client" + ] + if task_id == 78: + # create-tags needs the volume-id from create-volume output + data = json.loads(outputs[0]) + vol_id = data["VolumeId"] + return [ + f"aws ec2 create-tags --resources {vol_id} " + f"--tags Key=Name,Value=data-volume" + ] + if task_id == 82: + # create-route needs the api-id from create-api output + data = json.loads(outputs[0]) + api_id = data["ApiId"] + return [ + f"aws apigatewayv2 create-route --api-id {api_id} " + f'--route-key "GET /products-api"' + ] + if task_id == 84: + # send-message needs the queue-url from create-queue output + data = json.loads(outputs[0]) + queue_url = data["QueueUrl"] + return [ + f"aws sqs send-message --queue-url {queue_url} " + f'--message-body \'{{"task":"process","id":"task-queue-001"}}\'' + ] + return [] + + +# Tasks that have placeholder entries and need dynamic command resolution +_DYNAMIC_TASK_IDS = {76, 78, 82, 84} + + +def _execute_all_commands( + task_id: int, backend: AwsBackend +) -> list[tuple[str, bool, str, str]]: + """Execute static commands, resolve dynamic follow-ups, return all (cmd, ok, out, err).""" + static_cmds = INTERMEDIATE_COMMANDS[task_id] + results: list[tuple[str, bool, str, str]] = [] + + for cmd in static_cmds: + success, stdout, stderr = backend.execute_command(cmd) + results.append((cmd, success, stdout, stderr)) + + if task_id in _DYNAMIC_TASK_IDS: + outputs = [r[2] for r in results] + extra_cmds = _resolve_dynamic_commands(task_id, outputs) + for cmd in extra_cmds: + success, stdout, stderr = backend.execute_command(cmd) + results.append((cmd, success, stdout, stderr)) + + return results + + +@pytest.fixture(scope="module") +def backend() -> AwsBackend: + return AwsBackend() + + +@pytest.fixture(scope="module") +def grader(backend: AwsBackend) -> TaskGrader: + return TaskGrader(backend) + + +@pytest.fixture(scope="module") +def intermediate_tasks() -> list[dict]: + with open(TASKS_FILE) as f: + return yaml.safe_load(f) + + +def _build_task(entry: dict) -> Task: + """Build a Task model from a raw YAML entry.""" + return Task( + task_id=TaskID(entry["task_id"]), + difficulty=TaskDifficulty.INTERMEDIATE, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) if isinstance(cmd, str) else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + ) + + +def test_all_intermediate_tasks_have_commands(intermediate_tasks: list[dict]) -> None: + """Every intermediate task in the YAML must have a corresponding test command sequence.""" + missing = [ + t["task_id"] + for t in intermediate_tasks + if t["task_id"] not in INTERMEDIATE_COMMANDS + ] + assert not missing, f"No test commands mapped for task_ids: {missing}" + + +@pytest.mark.parametrize( + "task_id", + sorted(INTERMEDIATE_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(INTERMEDIATE_COMMANDS.keys())], +) +def test_intermediate_task_commands_execute( + task_id: int, + backend: AwsBackend, +) -> None: + """All commands in the sequence must execute successfully against MiniStack.""" + backend.reset_environment() + results = _execute_all_commands(task_id, backend) + for i, (cmd, success, stdout, stderr) in enumerate(results): + assert success, ( + f"Command {i + 1}/{len(results)} failed for task {task_id}.\n" + f" Command: {cmd}\n" + f" Stderr: {stderr}" + ) + + +@pytest.mark.parametrize( + "task_id", + sorted(INTERMEDIATE_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(INTERMEDIATE_COMMANDS.keys())], +) +def test_intermediate_task_grading( + task_id: int, + intermediate_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Execute the full command sequence and verify the grader marks the task as achieved.""" + entry = next((t for t in intermediate_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in intermediate.yaml" + + backend.reset_environment() + task = _build_task(entry) + results = _execute_all_commands(task_id, backend) + + tracker = EpisodeTracker() + for cmd, success, stdout, stderr in results: + step = tracker.record_step(cmd, success, stdout, stderr) + + result = grader.grade(task, tracker, step) + + all_cmds = [r[0] for r in results] + assert result.task_achieved, ( + f"Task {task_id} not achieved.\n" + f" Description: {entry['description']}\n" + f" Commands: {all_cmds}\n" + f" Reason: {result.reason}\n" + f" Reward: {result.reward}" + ) + assert result.reward == 1.0, f"Expected reward=1.0, got {result.reward}" + + +@pytest.mark.parametrize( + "task_id", + sorted(INTERMEDIATE_COMMANDS.keys()), + ids=[f"task_{tid}_partial" for tid in sorted(INTERMEDIATE_COMMANDS.keys())], +) +def test_intermediate_task_partial_gives_no_completion( + task_id: int, + intermediate_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Executing only the first command of a multi-step task should not achieve it.""" + entry = next((t for t in intermediate_tasks if t["task_id"] == task_id), None) + assert entry is not None + + steps = entry.get("success_criteria", {}).get("steps", []) + if len(steps) < 2: + pytest.skip("Single-step task — partial test not applicable") + + backend.reset_environment() + task = _build_task(entry) + + cmd = INTERMEDIATE_COMMANDS[task_id][0] + success, stdout, stderr = backend.execute_command(cmd) + tracker = EpisodeTracker() + step = tracker.record_step(cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert not result.task_achieved, ( + f"Task {task_id} should NOT be achieved with only the first command.\n" + f" Command: {cmd}\n" + f" Reason: {result.reason}" + ) + assert result.reward < 1.0 diff --git a/tests_tasks/test_warmup_tasks.py b/tests_tasks/test_warmup_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..c493240108d42f606bf5087c0027f72bc4719dba --- /dev/null +++ b/tests_tasks/test_warmup_tasks.py @@ -0,0 +1,159 @@ +"""Tests for warmup-tier tasks — verifies every task executes and grades correctly. + +Each test sends the correct AWS CLI command for a warmup task against MiniStack +and asserts the grader returns task_achieved=True with reward=1.0. + +Run inside Docker: + docker exec aws-rl-env python -m pytest tests/test_warmup_tasks.py -v +""" + +import pytest +import yaml +from pathlib import Path + +from models import SuccessCriteria, Task, TaskID, TaskDifficulty, SetupCommand +from server.services.aws_backend import AwsBackend +from server.services.task_grader import TaskGrader +from server.services.episode_tracker import EpisodeTracker + +TASKS_FILE = ( + Path(__file__).resolve().parent.parent + / "server" + / "services" + / "tasks" + / "warmup.yaml" +) + +# Mapping of task_id -> correct AWS CLI command +WARMUP_COMMANDS: dict[int, str] = { + 0: "aws s3 ls", + 1: "aws ec2 describe-instances", + 2: "aws dynamodb list-tables", + 3: "aws lambda list-functions", + 4: "aws sqs list-queues", + 5: "aws sns list-topics", + 27: "aws iam list-users", + 28: "aws secretsmanager list-secrets", + 29: "aws ecs list-clusters", + 30: "aws rds describe-db-instances", + 31: "aws elasticache describe-cache-clusters", + 32: "aws athena list-named-queries", + 33: "aws glue get-databases", + 34: "aws firehose list-delivery-streams", + 35: "aws emr list-clusters", + 36: "aws apigatewayv2 get-apis", + 37: "aws route53 list-hosted-zones", + 38: "aws elbv2 describe-load-balancers", + 39: "aws ec2 describe-volumes", + 40: "aws efs describe-file-systems", + 41: "aws cognito-idp list-user-pools --max-results 10", + 42: "aws ssm describe-parameters", + 43: "aws events list-rules", + 44: "aws cloudformation list-stacks", + 45: "aws apigateway get-rest-apis", +} + + +@pytest.fixture(scope="module") +def backend() -> AwsBackend: + return AwsBackend() + + +@pytest.fixture(scope="module") +def grader(backend: AwsBackend) -> TaskGrader: + return TaskGrader(backend) + + +@pytest.fixture(scope="module") +def warmup_tasks() -> list[dict]: + with open(TASKS_FILE) as f: + return yaml.safe_load(f) + + +def _build_task(entry: dict) -> Task: + """Build a Task model from a raw YAML entry.""" + return Task( + task_id=TaskID(entry["task_id"]), + difficulty=TaskDifficulty.WARMUP, + description=entry["description"], + success_criteria=SuccessCriteria(**entry.get("success_criteria", {})), + setup_commands=[ + SetupCommand(command=cmd) if isinstance(cmd, str) else SetupCommand(**cmd) + for cmd in entry.get("setup_commands", []) + ], + ) + + +def test_all_warmup_tasks_have_commands(warmup_tasks: list[dict]) -> None: + """Every warmup task in the YAML must have a corresponding test command.""" + missing = [ + t["task_id"] for t in warmup_tasks if t["task_id"] not in WARMUP_COMMANDS + ] + assert not missing, f"No test command mapped for task_ids: {missing}" + + +@pytest.mark.parametrize( + "task_id", + sorted(WARMUP_COMMANDS.keys()), + ids=[f"task_{tid}" for tid in sorted(WARMUP_COMMANDS.keys())], +) +def test_warmup_task_grading( + task_id: int, + warmup_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """Send the correct command for a warmup task and verify it grades as achieved.""" + entry = next((t for t in warmup_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in warmup.yaml" + + task = _build_task(entry) + cmd = WARMUP_COMMANDS[task_id] + + # Execute against MiniStack + success, stdout, stderr = backend.execute_command(cmd) + assert success, f"Command failed: {cmd}\nstderr: {stderr}" + + # Grade the step + tracker = EpisodeTracker() + step = tracker.record_step(cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert result.task_achieved, ( + f"Task {task_id} not achieved.\n" + f" Command: {cmd}\n" + f" Reason: {result.reason}\n" + f" Reward: {result.reward}" + ) + assert result.reward == 1.0, f"Expected reward=1.0, got {result.reward}" + + +@pytest.mark.parametrize( + "task_id", + sorted(WARMUP_COMMANDS.keys()), + ids=[f"task_{tid}_wrong_cmd" for tid in sorted(WARMUP_COMMANDS.keys())], +) +def test_warmup_task_rejects_wrong_command( + task_id: int, + warmup_tasks: list[dict], + backend: AwsBackend, + grader: TaskGrader, +) -> None: + """A wrong command should not achieve a warmup task.""" + entry = next((t for t in warmup_tasks if t["task_id"] == task_id), None) + assert entry is not None, f"task_id {task_id} not found in warmup.yaml" + + task = _build_task(entry) + + # Use a deliberately wrong command (different service) + wrong_cmd = "aws sts get-caller-identity" + + success, stdout, stderr = backend.execute_command(wrong_cmd) + tracker = EpisodeTracker() + step = tracker.record_step(wrong_cmd, success, stdout, stderr) + result = grader.grade(task, tracker, step) + + assert not result.task_achieved, ( + f"Task {task_id} should NOT be achieved with wrong command '{wrong_cmd}'" + ) + assert result.reward < 1.0 diff --git a/validate-submission.sh b/validate-submission.sh new file mode 100644 index 0000000000000000000000000000000000000000..d335c4d6e8a3731c7024609f14c08dc24650bb13 --- /dev/null +++ b/validate-submission.sh @@ -0,0 +1,185 @@ +#!/usr/bin/env bash +# +# validate-submission.sh — OpenEnv Submission Validator +# +# Checks that your HF Space is live, Docker image builds, and openenv validate passes. +# +# Prerequisites: +# - Docker: https://docs.docker.com/get-docker/ +# - openenv-core: pip install openenv-core +# - curl (usually pre-installed) +# +# Run: +# curl -fsSL https://raw.githubusercontent.com///main/scripts/validate-submission.sh | bash -s -- [repo_dir] +# +# Or download and run locally: +# chmod +x validate-submission.sh +# ./validate-submission.sh [repo_dir] +# +# Arguments: +# ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space) +# repo_dir Path to your repo (default: current directory) +# +# Examples: +# ./validate-submission.sh https://sizzing-aws-rl-env.hf.space +# ./validate-submission.sh https://sizzing-aws-rl-env.hf.space ./my-repo +# + +set -uo pipefail + +DOCKER_BUILD_TIMEOUT=600 +if [ -t 1 ]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[1;33m' + BOLD='\033[1m' + NC='\033[0m' +else + RED='' GREEN='' YELLOW='' BOLD='' NC='' +fi + +run_with_timeout() { + local secs="$1"; shift + if command -v timeout &>/dev/null; then + timeout "$secs" "$@" + elif command -v gtimeout &>/dev/null; then + gtimeout "$secs" "$@" + else + "$@" & + local pid=$! + ( sleep "$secs" && kill "$pid" 2>/dev/null ) & + local watcher=$! + wait "$pid" 2>/dev/null + local rc=$? + kill "$watcher" 2>/dev/null + wait "$watcher" 2>/dev/null + return $rc + fi +} + +portable_mktemp() { + local prefix="${1:-validate}" + mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp +} + +CLEANUP_FILES=() +cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; } +trap cleanup EXIT + +PING_URL="${1:-}" +REPO_DIR="${2:-.}" + +if [ -z "$PING_URL" ]; then + printf "Usage: %s [repo_dir]\n" "$0" + printf "\n" + printf " ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n" + printf " repo_dir Path to your repo (default: current directory)\n" + exit 1 +fi + +if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then + printf "Error: directory '%s' not found\n" "${2:-.}" + exit 1 +fi +PING_URL="${PING_URL%/}" +export PING_URL +PASS=0 + +log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; } +pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); } +fail() { log "${RED}FAILED${NC} -- $1"; } +hint() { printf " ${YELLOW}Hint:${NC} %b\n" "$1"; } +stop_at() { + printf "\n" + printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1" + exit 1 +} + +printf "\n" +printf "${BOLD}========================================${NC}\n" +printf "${BOLD} OpenEnv Submission Validator${NC}\n" +printf "${BOLD}========================================${NC}\n" +log "Repo: $REPO_DIR" +log "Ping URL: $PING_URL" +printf "\n" + +log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..." + +CURL_OUTPUT=$(portable_mktemp "validate-curl") +CLEANUP_FILES+=("$CURL_OUTPUT") +HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \ + -H "Content-Type: application/json" -d '{}' \ + "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000") + +if [ "$HTTP_CODE" = "200" ]; then + pass "HF Space is live and responds to /reset" +elif [ "$HTTP_CODE" = "000" ]; then + fail "HF Space not reachable (connection failed or timed out)" + hint "Check your network connection and that the Space is running." + hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset" + stop_at "Step 1" +else + fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)" + hint "Make sure your Space is running and the URL is correct." + hint "Try opening $PING_URL in your browser first." + stop_at "Step 1" +fi + +log "${BOLD}Step 2/3: Running docker build${NC} ..." + +if ! command -v docker &>/dev/null; then + fail "docker command not found" + hint "Install Docker: https://docs.docker.com/get-docker/" + stop_at "Step 2" +fi + +if [ -f "$REPO_DIR/Dockerfile" ]; then + DOCKER_CONTEXT="$REPO_DIR" +elif [ -f "$REPO_DIR/server/Dockerfile" ]; then + DOCKER_CONTEXT="$REPO_DIR/server" +else + fail "No Dockerfile found in repo root or server/ directory" + stop_at "Step 2" +fi + +log " Found Dockerfile in $DOCKER_CONTEXT" + +BUILD_OK=false +BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true + +if [ "$BUILD_OK" = true ]; then + pass "Docker build succeeded" +else + fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)" + printf "%s\n" "$BUILD_OUTPUT" | tail -20 + stop_at "Step 2" +fi + +log "${BOLD}Step 3/3: Running openenv validate${NC} ..." + +if ! command -v openenv &>/dev/null; then + fail "openenv command not found" + hint "Install it: pip install openenv-core" + stop_at "Step 3" +fi + +VALIDATE_OK=false +VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true + +if [ "$VALIDATE_OK" = true ]; then + pass "openenv validate passed" + [ -n "$VALIDATE_OUTPUT" ] && log " $VALIDATE_OUTPUT" +else + fail "openenv validate failed" + printf "%s\n" "$VALIDATE_OUTPUT" + stop_at "Step 3" +fi + +printf "\n" +printf "${BOLD}========================================${NC}\n" +printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n" +printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n" +printf "${BOLD}========================================${NC}\n" +printf "\n" + +exit 0 \ No newline at end of file