fineprint-env / server /tasks.py
vigneshmoovendhan's picture
Fine Print RL final
0b6a889
"""Task definitions for the FinePrint OpenEnv environment.
Defines three progressively harder policy compliance scenarios:
1. quote_accuracy (EASY) - Quote policies correctly with no drift.
2. drift_detection (MEDIUM) - Detect and adapt to policy changes.
3. compliance_storm (HARD) - Full compliance under heavy drift.
Each task configures the environment with specific workflows, drift
parameters, and grading criteria.
"""
from __future__ import annotations
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
TASK_IDS: list[str] = [
"quote_accuracy",
"drift_detection",
"compliance_storm",
]
def get_task(task_id: str) -> dict:
"""Return the full task configuration for *task_id*.
Returns
-------
dict with keys:
task_id – str
description – human-readable problem statement
workflows – list[str] workflow names to run
max_versions – int how many policy versions are available
drift_probability – float
silent_drift_ratio – float
max_steps – int agent step budget
"""
builders = {
"quote_accuracy": _build_quote_accuracy,
"drift_detection": _build_drift_detection,
"compliance_storm": _build_compliance_storm,
}
builder = builders.get(task_id)
if builder is None:
raise ValueError(
f"Unknown task_id {task_id!r}. "
f"Valid IDs: {', '.join(TASK_IDS)}"
)
return builder()
# ---------------------------------------------------------------------------
# Task 1 β€” quote_accuracy (EASY)
# ---------------------------------------------------------------------------
def _build_quote_accuracy() -> dict:
"""Two workflows, zero drift. Tests basic policy quoting accuracy."""
return {
"task_id": "quote_accuracy",
"description": (
"You are a customer service agent. Handle the following customer "
"workflows by quoting company policies accurately.\n\n"
"There are NO policy changes during this task β€” just quote "
"the current policies correctly.\n\n"
"Workflows: shopping checkout, product return\n\n"
"Tips:\n"
"- Use 'view_policies' to see current policy values\n"
"- Use 'quote_policy' with the correct field path and value\n"
"- Use 'respond_to_user' for non-policy messages\n"
"- Use 'take_action' to process workflow steps\n"
"- Use 'submit' when all workflows are complete"
),
"workflows": ["shop", "return"],
"max_versions": 1,
"drift_probability": 0.0,
"silent_drift_ratio": 0.0,
"max_steps": 20,
}
# ---------------------------------------------------------------------------
# Task 2 β€” drift_detection (MEDIUM)
# ---------------------------------------------------------------------------
def _build_drift_detection() -> dict:
"""Three workflows with moderate drift. Tests drift detection."""
return {
"task_id": "drift_detection",
"description": (
"You are a customer service agent handling multiple workflows. "
"Company policies may change mid-conversation without warning.\n\n"
"You must detect when policies have changed and adapt your "
"responses accordingly. Quoting stale policies is penalized.\n\n"
"Workflows: shopping checkout, product return, subscription signup\n\n"
"Tips:\n"
"- Use 'request_verification' periodically to check for updates\n"
"- Watch for system notifications about policy changes\n"
"- After verification, re-read policies with 'view_policies'\n"
"- Stale policy quotes (from old versions) incur heavy penalties\n"
"- Use 'submit' when all workflows are complete"
),
"workflows": ["shop", "return", "subscribe"],
"max_versions": 4,
"drift_probability": 0.30,
"silent_drift_ratio": 0.50,
"max_steps": 30,
}
# ---------------------------------------------------------------------------
# Task 3 β€” compliance_storm (HARD)
# ---------------------------------------------------------------------------
def _build_compliance_storm() -> dict:
"""All five workflows with aggressive silent drift. Full compliance test."""
return {
"task_id": "compliance_storm",
"description": (
"You are a customer service agent under extreme conditions. "
"Handle ALL customer workflows while policies change frequently "
"and silently. Critical policy fields may change without notice.\n\n"
"This task tests your ability to maintain compliance under "
"pressure β€” balancing workflow progress with policy freshness.\n\n"
"Workflows: shopping, returns, subscriptions, bookings, complaints\n\n"
"Tips:\n"
"- Policies change frequently and often silently (no notification)\n"
"- Verify policies before every quote when possible\n"
"- Severity of errors: HIGH for stale quotes, CRITICAL for "
"scope changes\n"
"- The policy version may jump multiple versions at once\n"
"- Balance speed (workflow completion) with accuracy (compliance)\n"
"- Use 'submit' when all workflows are complete"
),
"workflows": ["shop", "return", "subscribe", "book", "complain"],
"max_versions": 8,
"drift_probability": 0.50,
"silent_drift_ratio": 0.80,
"max_steps": 45,
}