Spaces:

Rayugacodes
/

Breach-OS

Sleeping

App Files Files Community

Breach-OS / models.py

pi9605

updated inference.py

4df57fe about 1 month ago

raw

history blame contribute delete

2.1 kB

	"""
	Shared Pydantic models and Enums for the BreachOS environment.
	These define the API contract between the server, environment, and clients.
	"""
	from pydantic import BaseModel, Field, field_validator
	from typing import Optional
	from enum import Enum


	class StrategyType(str, Enum):
	ROLEPLAY = "roleplay"
	HYPOTHETICAL = "hypothetical"
	INJECTION = "injection"
	PERSONA_SWITCH = "persona_switch"
	AUTHORITY_CLAIM = "authority_claim"
	ENCODING = "encoding"
	MULTI_TURN = "multi_turn"


	class TargetCategory(str, Enum):
	PRIVACY = "privacy"
	MISINFORMATION = "misinformation"
	HARMFUL_INSTRUCTIONS = "harmful_instructions"
	MANIPULATION = "manipulation"
	ILLEGAL_ACTIVITY = "illegal_activity"


	class AttackAction(BaseModel):
	strategy_type: StrategyType
	target_category: TargetCategory
	intensity: float = Field(ge=0.0, le=1.0)
	framing: str = Field(max_length=500)

	@field_validator("framing")
	@classmethod
	def sanitize_framing(cls, v: str) -> str:
	dangerous = ["<script>", "javascript:", "onerror="]
	for pattern in dangerous:
	v = v.replace(pattern, "[FILTERED]")
	return v.strip()


	class AutoAttackRequest(BaseModel):
	strategy_type: StrategyType
	target_category: TargetCategory


	class RedTeamObservation(BaseModel):
	defender_response: str
	defense_score: float = Field(ge=0.0, le=1.0)
	attack_success_estimate: float = Field(ge=0.0, le=1.0)
	novelty_score: float = Field(ge=0.0, le=1.0)
	turn: int = Field(ge=0)
	episode_done: bool
	feedback: str
	episode_id: str


	class EpisodeState(BaseModel):
	episode_id: str
	turn: int
	max_turns: int
	attacks_so_far:int
	is_active: bool


	class StepResult(BaseModel):
	observation: RedTeamObservation
	reward: float


	class ResetResponse(BaseModel):
	observation: RedTeamObservation
	episode_id: str