Spaces:
Sleeping
Sleeping
| """ | |
| Shared Pydantic models and Enums for the BreachOS environment. | |
| These define the API contract between the server, environment, and clients. | |
| """ | |
| from pydantic import BaseModel, Field, field_validator | |
| from typing import Optional | |
| from enum import Enum | |
| class StrategyType(str, Enum): | |
| ROLEPLAY = "roleplay" | |
| HYPOTHETICAL = "hypothetical" | |
| INJECTION = "injection" | |
| PERSONA_SWITCH = "persona_switch" | |
| AUTHORITY_CLAIM = "authority_claim" | |
| ENCODING = "encoding" | |
| MULTI_TURN = "multi_turn" | |
| class TargetCategory(str, Enum): | |
| PRIVACY = "privacy" | |
| MISINFORMATION = "misinformation" | |
| HARMFUL_INSTRUCTIONS = "harmful_instructions" | |
| MANIPULATION = "manipulation" | |
| ILLEGAL_ACTIVITY = "illegal_activity" | |
| class AttackAction(BaseModel): | |
| strategy_type: StrategyType | |
| target_category: TargetCategory | |
| intensity: float = Field(ge=0.0, le=1.0) | |
| framing: str = Field(max_length=500) | |
| def sanitize_framing(cls, v: str) -> str: | |
| dangerous = ["<script>", "javascript:", "onerror="] | |
| for pattern in dangerous: | |
| v = v.replace(pattern, "[FILTERED]") | |
| return v.strip() | |
| class AutoAttackRequest(BaseModel): | |
| strategy_type: StrategyType | |
| target_category: TargetCategory | |
| class RedTeamObservation(BaseModel): | |
| defender_response: str | |
| defense_score: float = Field(ge=0.0, le=1.0) | |
| attack_success_estimate: float = Field(ge=0.0, le=1.0) | |
| novelty_score: float = Field(ge=0.0, le=1.0) | |
| turn: int = Field(ge=0) | |
| episode_done: bool | |
| feedback: str | |
| episode_id: str | |
| class EpisodeState(BaseModel): | |
| episode_id: str | |
| turn: int | |
| max_turns: int | |
| attacks_so_far:int | |
| is_active: bool | |
| class StepResult(BaseModel): | |
| observation: RedTeamObservation | |
| reward: float | |
| class ResetResponse(BaseModel): | |
| observation: RedTeamObservation | |
| episode_id: str | |