Spaces:
Running
Running
| """Unit tests for the EpisodeTracker — command history, rollback detection, and grading helpers. | |
| These are pure unit tests that do not require MiniStack or Docker. | |
| Run: | |
| python -m pytest tests/test_episode_tracker.py -v | |
| """ | |
| from server.services.episode_tracker import ( | |
| EpisodeTracker, | |
| StepRecord, | |
| _command_mentions_resource, | |
| _extract_resource_name, | |
| _parse_aws_command, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # _parse_aws_command | |
| # --------------------------------------------------------------------------- | |
| class TestParseAwsCommand: | |
| def test_standard_command(self) -> None: | |
| assert _parse_aws_command("aws s3api create-bucket --bucket foo") == ( | |
| "s3api", | |
| "create-bucket", | |
| ) | |
| def test_simple_service(self) -> None: | |
| assert _parse_aws_command("aws iam list-roles") == ("iam", "list-roles") | |
| def test_too_few_parts(self) -> None: | |
| assert _parse_aws_command("aws s3") == (None, None) | |
| def test_not_aws(self) -> None: | |
| assert _parse_aws_command("gcloud compute instances list") == (None, None) | |
| def test_empty_string(self) -> None: | |
| assert _parse_aws_command("") == (None, None) | |
| def test_leading_whitespace(self) -> None: | |
| assert _parse_aws_command(" aws lambda list-functions") == ( | |
| "lambda", | |
| "list-functions", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # _command_mentions_resource | |
| # --------------------------------------------------------------------------- | |
| class TestCommandMentionsResource: | |
| def test_flag_match(self) -> None: | |
| assert _command_mentions_resource( | |
| "aws s3api create-bucket --bucket my-bucket", "my-bucket" | |
| ) | |
| def test_flag_value_syntax(self) -> None: | |
| assert _command_mentions_resource( | |
| "aws dynamodb describe-table --table-name=orders", "orders" | |
| ) | |
| def test_function_name_flag(self) -> None: | |
| assert _command_mentions_resource( | |
| "aws lambda invoke --function-name processor /dev/null", "processor" | |
| ) | |
| def test_arn_word_boundary(self) -> None: | |
| assert _command_mentions_resource( | |
| "aws lambda create-event-source-mapping " | |
| "--event-source-arn arn:aws:sqs:us-east-1:000000000000:my-queue", | |
| "my-queue", | |
| ) | |
| def test_no_match(self) -> None: | |
| assert not _command_mentions_resource( | |
| "aws s3api create-bucket --bucket other-bucket", "my-bucket" | |
| ) | |
| def test_different_resource_no_match(self) -> None: | |
| assert not _command_mentions_resource( | |
| "aws s3api create-bucket --bucket test-bucket", "prod-bucket" | |
| ) | |
| def test_role_name(self) -> None: | |
| assert _command_mentions_resource( | |
| "aws iam attach-role-policy --role-name my-role " | |
| "--policy-arn arn:aws:iam::aws:policy/ReadOnly", | |
| "my-role", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # _extract_resource_name | |
| # --------------------------------------------------------------------------- | |
| class TestExtractResourceName: | |
| def test_bucket(self) -> None: | |
| assert _extract_resource_name("aws s3api create-bucket --bucket demo") == "demo" | |
| def test_table_name_equals(self) -> None: | |
| assert ( | |
| _extract_resource_name("aws dynamodb describe-table --table-name=users") | |
| == "users" | |
| ) | |
| def test_no_resource_flag(self) -> None: | |
| assert _extract_resource_name("aws sts get-caller-identity") is None | |
| def test_first_flag_wins(self) -> None: | |
| cmd = "aws s3api put-object --bucket first --name second" | |
| assert _extract_resource_name(cmd) == "first" | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — record_step & basic properties | |
| # --------------------------------------------------------------------------- | |
| class TestRecordStep: | |
| def test_returns_step_record(self) -> None: | |
| t = EpisodeTracker() | |
| step = t.record_step("aws s3 ls", True, "buckets...", "") | |
| assert isinstance(step, StepRecord) | |
| assert step.command == "aws s3 ls" | |
| assert step.success is True | |
| assert step.step_number == 0 | |
| def test_increments_step_counter(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3 ls", True, "", "") | |
| t.record_step("aws ec2 describe-instances", True, "", "") | |
| assert t.step_count == 2 | |
| def test_command_history(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("cmd1", True, "", "") | |
| t.record_step("cmd2", False, "", "err") | |
| assert len(t.command_history) == 2 | |
| assert t.command_history[0].command == "cmd1" | |
| assert t.command_history[1].success is False | |
| def test_history_is_copy(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("cmd", True, "", "") | |
| history = t.command_history | |
| history.clear() | |
| assert t.step_count == 1 # internal state not affected | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — reset | |
| # --------------------------------------------------------------------------- | |
| class TestReset: | |
| def test_clears_all_state(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3 ls", True, "", "") | |
| t.credit_operation("ls", None) | |
| t.record_hint() | |
| t.previous_progress = 0.5 | |
| t.reset() | |
| assert t.step_count == 0 | |
| assert t.command_history == [] | |
| assert t.hints_used == 0 | |
| assert t.previous_progress == 0.0 | |
| assert not t.is_operation_already_credited("ls", None) | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — has_executed_operation | |
| # --------------------------------------------------------------------------- | |
| class TestHasExecutedOperation: | |
| def test_matches_successful_command(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| assert t.has_executed_operation("create-bucket") | |
| def test_ignores_failed_command(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", False, "", "err") | |
| assert not t.has_executed_operation("create-bucket") | |
| def test_matches_with_resource(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| assert t.has_executed_operation("create-bucket", "demo") | |
| def test_wrong_resource(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| assert not t.has_executed_operation("create-bucket", "other") | |
| def test_wrong_operation(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| assert not t.has_executed_operation("delete-bucket") | |
| def test_resource_none_matches_any(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws dynamodb create-table --table-name orders", True, "", "") | |
| assert t.has_executed_operation("create-table") | |
| assert t.has_executed_operation("create-table", "orders") | |
| def test_empty_history(self) -> None: | |
| assert not EpisodeTracker().has_executed_operation("anything") | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — has_used_service | |
| # --------------------------------------------------------------------------- | |
| class TestHasUsedService: | |
| def test_exact_service(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws sqs create-queue --queue-name q1", True, "", "") | |
| assert t.has_used_service("sqs") | |
| def test_substring_match(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket b", True, "", "") | |
| assert t.has_used_service("s3") # "s3" in "s3api" | |
| def test_ignores_failed(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws iam list-roles", False, "", "err") | |
| assert not t.has_used_service("iam") | |
| def test_no_match(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3 ls", True, "", "") | |
| assert not t.has_used_service("lambda") | |
| def test_non_aws_command(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("echo hello", True, "hello", "") | |
| assert not t.has_used_service("echo") | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — credit_operation / is_operation_already_credited | |
| # --------------------------------------------------------------------------- | |
| class TestCreditedOperations: | |
| def test_not_credited_by_default(self) -> None: | |
| t = EpisodeTracker() | |
| assert not t.is_operation_already_credited("create-bucket", "demo") | |
| def test_credit_and_check(self) -> None: | |
| t = EpisodeTracker() | |
| t.credit_operation("create-bucket", "demo") | |
| assert t.is_operation_already_credited("create-bucket", "demo") | |
| def test_different_resource_not_credited(self) -> None: | |
| t = EpisodeTracker() | |
| t.credit_operation("create-bucket", "demo") | |
| assert not t.is_operation_already_credited("create-bucket", "other") | |
| def test_none_resource(self) -> None: | |
| t = EpisodeTracker() | |
| t.credit_operation("list-buckets", None) | |
| assert t.is_operation_already_credited("list-buckets", None) | |
| assert not t.is_operation_already_credited("list-buckets", "demo") | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — hints | |
| # --------------------------------------------------------------------------- | |
| class TestHints: | |
| def test_initial_zero(self) -> None: | |
| assert EpisodeTracker().hints_used == 0 | |
| def test_record_hint_increments(self) -> None: | |
| t = EpisodeTracker() | |
| assert t.record_hint() == 1 | |
| assert t.record_hint() == 2 | |
| assert t.hints_used == 2 | |
| def test_reset_clears_hints(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_hint() | |
| t.reset() | |
| assert t.hints_used == 0 | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — previous_progress | |
| # --------------------------------------------------------------------------- | |
| class TestPreviousProgress: | |
| def test_default_zero(self) -> None: | |
| assert EpisodeTracker().previous_progress == 0.0 | |
| def test_setter(self) -> None: | |
| t = EpisodeTracker() | |
| t.previous_progress = 0.75 | |
| assert t.previous_progress == 0.75 | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — detect_rollbacks | |
| # --------------------------------------------------------------------------- | |
| class TestDetectRollbacks: | |
| def test_no_rollbacks(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| assert t.detect_rollbacks() == 0 | |
| def test_create_then_delete(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| t.record_step("aws s3api delete-bucket --bucket demo", True, "", "") | |
| assert t.detect_rollbacks() == 1 | |
| def test_failed_delete_not_counted(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| t.record_step("aws s3api delete-bucket --bucket demo", False, "", "err") | |
| assert t.detect_rollbacks() == 0 | |
| def test_different_resource_not_counted(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket a", True, "", "") | |
| t.record_step("aws s3api delete-bucket --bucket b", True, "", "") | |
| assert t.detect_rollbacks() == 0 | |
| def test_multiple_rollbacks(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket a", True, "", "") | |
| t.record_step("aws s3api delete-bucket --bucket a", True, "", "") | |
| t.record_step("aws dynamodb create-table --table-name t1", True, "", "") | |
| t.record_step("aws dynamodb delete-table --table-name t1", True, "", "") | |
| assert t.detect_rollbacks() == 2 | |
| def test_attach_detach_role_policy(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws iam attach-role-policy --role-name r1 " | |
| "--policy-arn arn:aws:iam::aws:policy/ReadOnly", | |
| True, | |
| "", | |
| "", | |
| ) | |
| t.record_step( | |
| "aws iam detach-role-policy --role-name r1 " | |
| "--policy-arn arn:aws:iam::aws:policy/ReadOnly", | |
| True, | |
| "", | |
| "", | |
| ) | |
| assert t.detect_rollbacks() == 1 | |
| def test_failed_create_not_tracked(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", False, "", "err") | |
| t.record_step("aws s3api delete-bucket --bucket demo", True, "", "") | |
| assert t.detect_rollbacks() == 0 | |
| # --------------------------------------------------------------------------- | |
| # EpisodeTracker — detect_idempotent_retries | |
| # --------------------------------------------------------------------------- | |
| class TestDetectIdempotentRetries: | |
| def test_no_retries(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step("aws s3api create-bucket --bucket demo", True, "", "") | |
| assert t.detect_idempotent_retries() == 0 | |
| def test_already_exists_then_success(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws s3api create-bucket --bucket demo", | |
| False, | |
| "", | |
| "BucketAlreadyOwnedByYou", | |
| ) | |
| t.record_step("aws s3api put-object --bucket demo --key f", True, "", "") | |
| assert t.detect_idempotent_retries() == 1 | |
| def test_already_exists_no_followup(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws s3api create-bucket --bucket demo", | |
| False, | |
| "", | |
| "BucketAlreadyExists", | |
| ) | |
| # No next step | |
| assert t.detect_idempotent_retries() == 0 | |
| def test_already_exists_followed_by_failure(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws sqs create-queue --queue-name q", | |
| False, | |
| "", | |
| "QueueNameExists", | |
| ) | |
| t.record_step("aws sqs send-message --queue-url q", False, "", "err") | |
| assert t.detect_idempotent_retries() == 0 | |
| def test_generic_already_exists(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws lambda create-function --function-name fn", | |
| False, | |
| "", | |
| "Resource already exists", | |
| ) | |
| t.record_step("aws lambda invoke --function-name fn", True, "", "") | |
| assert t.detect_idempotent_retries() == 1 | |
| def test_non_create_failure_ignored(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws s3api delete-bucket --bucket demo", | |
| False, | |
| "", | |
| "BucketAlreadyExists", # nonsensical but tests the guard | |
| ) | |
| t.record_step("aws s3 ls", True, "", "") | |
| assert t.detect_idempotent_retries() == 0 | |
| def test_multiple_retries(self) -> None: | |
| t = EpisodeTracker() | |
| t.record_step( | |
| "aws s3api create-bucket --bucket a", | |
| False, | |
| "", | |
| "BucketAlreadyExists", | |
| ) | |
| t.record_step("aws s3api put-object --bucket a --key f", True, "", "") | |
| t.record_step( | |
| "aws sqs create-queue --queue-name q", | |
| False, | |
| "", | |
| "QueueNameExists", | |
| ) | |
| t.record_step("aws sqs send-message --queue-url q", True, "", "") | |
| assert t.detect_idempotent_retries() == 2 | |