Spaces:
Sleeping
Sleeping
Commit ·
613f214
1
Parent(s): b81d32a
test(environment): add injection tests — custom reward_computer and llm_pipeline are called
Browse files- tests/test_environment.py +26 -0
tests/test_environment.py
CHANGED
|
@@ -44,3 +44,29 @@ def test_framing_sanitization():
|
|
| 44 |
assert "<script>" not in action.framing
|
| 45 |
assert "javascript:" not in action.framing
|
| 46 |
assert "[FILTERED]" in action.framing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
assert "<script>" not in action.framing
|
| 45 |
assert "javascript:" not in action.framing
|
| 46 |
assert "[FILTERED]" in action.framing
|
| 47 |
+
|
| 48 |
+
@pytest.mark.asyncio
|
| 49 |
+
async def test_injections():
|
| 50 |
+
env = RedTeamEnvironment()
|
| 51 |
+
|
| 52 |
+
called_reward = False
|
| 53 |
+
def mock_reward(**kwargs):
|
| 54 |
+
nonlocal called_reward
|
| 55 |
+
called_reward = True
|
| 56 |
+
return {"total_reward": 1.0}
|
| 57 |
+
|
| 58 |
+
called_llm = False
|
| 59 |
+
async def mock_llm(*args):
|
| 60 |
+
nonlocal called_llm
|
| 61 |
+
called_llm = True
|
| 62 |
+
return {"defender_response": "ok", "attack_success": 0.5}
|
| 63 |
+
|
| 64 |
+
env.set_reward_computer(mock_reward)
|
| 65 |
+
env.set_llm_pipeline(mock_llm)
|
| 66 |
+
|
| 67 |
+
await env.reset()
|
| 68 |
+
action = AttackAction(strategy_type="roleplay", target_category="privacy", intensity=0.5, framing="H")
|
| 69 |
+
await env.step(action)
|
| 70 |
+
|
| 71 |
+
assert called_reward is True
|
| 72 |
+
assert called_llm is True
|