modelbuilderhq commited on
Commit
2ade2c6
·
verified ·
1 Parent(s): 35d990a

Upload folder using huggingface_hub

Browse files
graders/__init__.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task-specific graders for the SupportDesk environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Optional
7
+
8
+ from models import SupportCaseProgress, SupportDeskObservation
9
+
10
+
11
+ def _as_case(obj) -> SupportCaseProgress:
12
+ """Normalize observation/state/case to SupportCaseProgress."""
13
+ if isinstance(obj, SupportCaseProgress):
14
+ return obj
15
+ if hasattr(obj, "case"):
16
+ return obj.case # type: ignore[attr-defined]
17
+ raise TypeError(f"Unsupported object for grading: {type(obj)}")
18
+
19
+
20
+ @dataclass
21
+ class GradeBreakdown:
22
+ score: float
23
+ message: str
24
+ penalties: dict[str, float]
25
+ completed_milestones: list[str] = None
26
+
27
+ @property
28
+ def total_score(self) -> float:
29
+ return self.score
30
+
31
+ def __post_init__(self):
32
+ if self.completed_milestones is None:
33
+ object.__setattr__(self, "completed_milestones", [])
34
+
35
+
36
+ def _clamp(v: float) -> float:
37
+ return max(0.01, min(0.99, v))
38
+
39
+
40
+ def grade_task_id(task_id: str, observation: SupportDeskObservation | SupportCaseProgress) -> GradeBreakdown:
41
+ case = _as_case(observation)
42
+ if task_id == "billing_refund_easy":
43
+ return BillingRefundEasyGrader().score(case)
44
+ if task_id == "account_takeover_medium":
45
+ return AccountTakeoverMediumGrader().score(case)
46
+ if task_id == "api_incident_hard":
47
+ return ApiIncidentHardGrader().score(case)
48
+ if task_id == "regulated_export_exception_hard":
49
+ return RegulatedExportExceptionHardGrader().score(case)
50
+ return GradeBreakdown(0.01, "Unknown task", {"unknown_task": 1.0})
51
+
52
+
53
+ def grade_case(task_or_id, observation) -> GradeBreakdown:
54
+ """Return a GradeBreakdown for the given task and case/observation."""
55
+ task_id = task_or_id.task_id if hasattr(task_or_id, "task_id") else str(task_or_id)
56
+ case = _as_case(observation)
57
+ return grade_task_id(task_id, case)
58
+
59
+
60
+ class BillingRefundEasyGrader:
61
+ def score(self, case: SupportCaseProgress) -> GradeBreakdown:
62
+ penalties: dict[str, float] = {}
63
+ score = 1.0
64
+
65
+ reply = (case.reply or "").lower()
66
+ note = (case.internal_note or "").lower()
67
+
68
+ if reply:
69
+ if "refund" not in reply:
70
+ penalties["missing_refund"] = 0.25
71
+ else:
72
+ penalties["no_reply"] = 0.55
73
+
74
+ if note:
75
+ if "duplicate" not in note:
76
+ penalties["note_missing_duplicate"] = 0.2
77
+ else:
78
+ penalties["no_note"] = 0.2
79
+
80
+ if case.status != "resolved":
81
+ penalties["status_not_resolved"] = 0.1
82
+
83
+ score -= sum(penalties.values())
84
+ score = round(score, 2)
85
+ return GradeBreakdown(_clamp(score), "Billing refund evaluation", penalties)
86
+
87
+ def grade(self, case: SupportCaseProgress) -> float:
88
+ return self.score(case).score
89
+
90
+
91
+ class AccountTakeoverMediumGrader:
92
+ def score(self, case: SupportCaseProgress) -> GradeBreakdown:
93
+ penalties: dict[str, float] = {}
94
+ score = 0.2
95
+
96
+ reply = (case.reply or "").lower()
97
+ if reply:
98
+ if "lock" not in reply:
99
+ penalties["missing_lock"] = 0.2
100
+ if "verify" not in reply:
101
+ penalties["missing_verify"] = 0.2
102
+ if "ownership" not in reply:
103
+ penalties["missing_ownership"] = 0.2
104
+ else:
105
+ penalties["no_reply"] = 0.4
106
+
107
+ if case.status not in ("escalated", "waiting_on_customer"):
108
+ penalties["wrong_status"] = 0.2
109
+
110
+ score -= sum(penalties.values())
111
+ score = round(score, 2)
112
+ return GradeBreakdown(_clamp(score), "Account takeover evaluation", penalties)
113
+
114
+ def grade(self, case: SupportCaseProgress) -> float:
115
+ return self.score(case).score
116
+
117
+
118
+ class ApiIncidentHardGrader:
119
+ def score(self, case: SupportCaseProgress) -> GradeBreakdown:
120
+ penalties: dict[str, float] = {}
121
+ score = 0.2
122
+
123
+ reply = (case.reply or "").lower()
124
+ if reply:
125
+ if "status" not in reply:
126
+ penalties["missing_status_page"] = 0.15
127
+ if "request" not in reply or "id" not in reply:
128
+ penalties["missing_request_ids"] = 0.2
129
+ if "escalat" not in reply:
130
+ penalties["missing_escalation"] = 0.2
131
+ else:
132
+ penalties["no_reply"] = 0.4
133
+
134
+ if case.queue != "platform_engineering":
135
+ penalties["wrong_queue"] = 0.15
136
+
137
+ score -= sum(penalties.values())
138
+ score = round(score, 2)
139
+ return GradeBreakdown(_clamp(score), "API incident evaluation", penalties)
140
+
141
+ def grade(self, case: SupportCaseProgress) -> float:
142
+ return self.score(case).score
143
+
144
+
145
+ class RegulatedExportExceptionHardGrader:
146
+ def score(self, case: SupportCaseProgress) -> GradeBreakdown:
147
+ penalties: dict[str, float] = {}
148
+ score = 0.2
149
+
150
+ reply = (case.reply or "").lower()
151
+ if reply:
152
+ if "compliance" not in reply:
153
+ penalties["missing_compliance"] = 0.2
154
+ if "cannot promise" not in reply and "not promise" not in reply:
155
+ penalties["missing_no_promise"] = 0.2
156
+ if "recipient" not in reply or "identity" not in reply:
157
+ penalties["missing_recipient"] = 0.15
158
+ else:
159
+ penalties["no_reply"] = 0.4
160
+
161
+ if case.status != "waiting_on_customer":
162
+ penalties["wrong_status"] = 0.15
163
+
164
+ score -= sum(penalties.values())
165
+ score = round(score, 2)
166
+ return GradeBreakdown(_clamp(score), "Regulated export evaluation", penalties)
167
+
168
+ def grade(self, case: SupportCaseProgress) -> float:
169
+ return self.score(case).score
pyproject.toml CHANGED
@@ -38,4 +38,4 @@ server = "server.app:main"
38
 
39
  [tool.setuptools]
40
  include-package-data = true
41
- packages = ["server"]
 
38
 
39
  [tool.setuptools]
40
  include-package-data = true
41
+ packages = ["server", "tasks", "graders"]
server/app.py CHANGED
@@ -14,10 +14,57 @@ try:
14
  except ImportError:
15
  try:
16
  from openenv_core.env_server import http_server as openenv_http_server
17
- except Exception as e: # pragma: no cover
18
- raise ImportError(
19
- "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
20
- ) from e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  from models import SupportDeskAction, SupportDeskObservation, SupportDeskState
23
  from server.supportdesk_environment import SupportDeskEnvironment
 
14
  except ImportError:
15
  try:
16
  from openenv_core.env_server import http_server as openenv_http_server
17
+ except Exception:
18
+ # Minimal fallback for test runs when openenv is unavailable.
19
+ from pydantic import BaseModel, ValidationError as _PydValidationError
20
+ from fastapi import FastAPI
21
+
22
+ class _ResetRequest(BaseModel):
23
+ seed: int | None = None
24
+ episode_id: str | None = None
25
+ task_id: str | None = None
26
+ timeout_s: float | None = None
27
+
28
+ class _StepRequest(BaseModel):
29
+ action: dict
30
+ timeout_s: float | None = None
31
+ episode_id: str | None = None
32
+
33
+ def _deserialize_action(data, ActionCls):
34
+ return ActionCls.model_validate(data)
35
+
36
+ def _create_app(env_cls, action_cls, obs_cls, env_name: str = "env", max_concurrent_envs: int = 1):
37
+ app = FastAPI()
38
+
39
+ @app.post("/reset")
40
+ def _reset(req: _ResetRequest = _ResetRequest()):
41
+ env = env_cls()
42
+ kwargs = req.model_dump(exclude_none=True)
43
+ obs = env.reset(**kwargs)
44
+ return {"observation": obs.model_dump(), "reward": obs.reward, "done": obs.done}
45
+
46
+ @app.post("/step")
47
+ def _step(req: _StepRequest):
48
+ env = env_cls()
49
+ action = _deserialize_action(req.action, action_cls)
50
+ obs = env.step(action, timeout_s=req.timeout_s, episode_id=req.episode_id)
51
+ return {"observation": obs.model_dump(), "reward": obs.reward, "done": obs.done}
52
+
53
+ @app.get("/state")
54
+ def _state():
55
+ env = env_cls()
56
+ return env.state.model_dump()
57
+
58
+ return app
59
+
60
+ class _Shim:
61
+ ResetRequest = _ResetRequest
62
+ StepRequest = _StepRequest
63
+ ValidationError = _PydValidationError
64
+ deserialize_action = staticmethod(_deserialize_action)
65
+ create_app = staticmethod(_create_app)
66
+
67
+ openenv_http_server = _Shim()
68
 
69
  from models import SupportDeskAction, SupportDeskObservation, SupportDeskState
70
  from server.supportdesk_environment import SupportDeskEnvironment
tasks/__init__.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task registry for the SupportDesk environment (per-task modules)."""
2
+
3
+ from tasks.base import (
4
+ ALL_ISSUE_TYPES,
5
+ ALL_PRIORITIES,
6
+ ALL_QUEUES,
7
+ ALL_STATUSES,
8
+ SupportTaskSpec,
9
+ )
10
+ from tasks.billing_refund_easy import TASK as BILLING_REFUND_EASY
11
+ from tasks.account_takeover_medium import TASK as ACCOUNT_TAKEOVER_MEDIUM
12
+ from tasks.api_incident_hard import TASK as API_INCIDENT_HARD
13
+ from tasks.regulated_export_exception_hard import TASK as REGULATED_EXPORT_EXCEPTION_HARD
14
+
15
+ TASKS: dict[str, SupportTaskSpec] = {
16
+ t.task_id: t
17
+ for t in (
18
+ BILLING_REFUND_EASY,
19
+ ACCOUNT_TAKEOVER_MEDIUM,
20
+ API_INCIDENT_HARD,
21
+ REGULATED_EXPORT_EXCEPTION_HARD,
22
+ )
23
+ }
24
+
25
+
26
+ def get_task(task_id: str) -> SupportTaskSpec:
27
+ return TASKS[task_id]
28
+
29
+
30
+ def list_task_ids() -> list[str]:
31
+ return list(TASKS.keys())
32
+
33
+
34
+ __all__ = [
35
+ "SupportTaskSpec",
36
+ "ALL_QUEUES",
37
+ "ALL_PRIORITIES",
38
+ "ALL_STATUSES",
39
+ "ALL_ISSUE_TYPES",
40
+ "TASKS",
41
+ "get_task",
42
+ "list_task_ids",
43
+ ]
tasks/account_takeover_medium.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from tasks.base import SupportTaskSpec, KnowledgeSnippet, SupportTicket
4
+
5
+ TASK = SupportTaskSpec(
6
+ task_id="account_takeover_medium",
7
+ difficulty="medium",
8
+ title="Account takeover recovery",
9
+ objective=(
10
+ "Identify an account compromise, lock the account, start recovery, "
11
+ "and communicate next steps without leaking security-sensitive details."
12
+ ),
13
+ ticket=SupportTicket(
14
+ customer_name="Anjali Menon",
15
+ customer_tier="enterprise",
16
+ company="Saffron Logistics",
17
+ subject="Someone changed our admin email",
18
+ body=(
19
+ "Our main admin email was changed and we lost access. "
20
+ "We’re seeing unfamiliar login alerts from overseas. Urgent help needed."
21
+ ),
22
+ region="us-east-1",
23
+ affected_users=240,
24
+ sla_minutes_remaining=120,
25
+ ),
26
+ knowledge_base=(
27
+ KnowledgeSnippet(
28
+ article_id="kb-compromise-protocol",
29
+ title="Account compromise protocol",
30
+ content=(
31
+ "Immediately lock the account, invalidate active sessions, require admin re-verification, "
32
+ "and collect proof of ownership (billing zip, last 4 of card, admin ID)."
33
+ ),
34
+ ),
35
+ KnowledgeSnippet(
36
+ article_id="kb-enterprise-sla",
37
+ title="Enterprise SLA",
38
+ content="Enterprise incidents must be contained within 2 hours. Provide status updates every 30 minutes.",
39
+ ),
40
+ ),
41
+ gold_queue="trust_and_safety",
42
+ gold_priority="urgent",
43
+ gold_issue_type="account_compromise",
44
+ gold_status="escalated",
45
+ gold_resolution_code="security_lock_and_verify",
46
+ required_requested_fields=("ownership_proof", "billing_zip", "last4_card"),
47
+ required_reply_markers=(
48
+ ("locked", "account"),
49
+ ("invalidate", "sessions"),
50
+ ("verify", "ownership"),
51
+ ("sla", "updates"),
52
+ ),
53
+ required_note_markers=(("account locked",), ("ownership proof requested",)),
54
+ risk_flags=("security_incident", "sla_breach"),
55
+ follow_up_outcome="partial",
56
+ follow_up_message="Customer provided billing zip but not card last4 yet.",
57
+ follow_up_provided_fields=("billing_zip",),
58
+ follow_up_wrong_fields=("card_last4",),
59
+ sla_step_cost=18,
60
+ over_escalation_queues=("security_ops",),
61
+ under_escalation_deadline_step=4,
62
+ max_steps=7,
63
+ )
tasks/api_incident_hard.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from tasks.base import SupportTaskSpec, KnowledgeSnippet, SupportTicket
4
+
5
+ TASK = SupportTaskSpec(
6
+ task_id="api_incident_hard",
7
+ difficulty="hard",
8
+ title="Production API incident triage",
9
+ objective=(
10
+ "Triage a production API latency/5xx incident affecting multiple customers; "
11
+ "collect diagnostics, apply runbook mitigations, and escalate to platform engineering appropriately."
12
+ ),
13
+ ticket=SupportTicket(
14
+ customer_name="Marco Alvarez",
15
+ customer_tier="enterprise",
16
+ company="Northwind Labs",
17
+ subject="API timeouts for createOrder",
18
+ body=(
19
+ "Since 3 hours ago, createOrder calls are timing out or returning 500s across regions. "
20
+ "We rolled back our last deploy and still see issues. Need RCA and mitigation ASAP."
21
+ ),
22
+ region="us-west-2",
23
+ affected_users=4200,
24
+ sla_minutes_remaining=90,
25
+ ),
26
+ knowledge_base=(
27
+ KnowledgeSnippet(
28
+ article_id="kb-api-runbook",
29
+ title="API latency/5xx runbook",
30
+ content=(
31
+ "Capture request IDs, time window, regions, and payload samples. "
32
+ "Check current status page and incident channel. "
33
+ "If multiple regions impacted, escalate to platform_engineering and set customer expectations."
34
+ ),
35
+ ),
36
+ KnowledgeSnippet(
37
+ article_id="kb-status-page",
38
+ title="Status page policy",
39
+ content="If 2+ enterprise customers report API errors, post a preliminary status within 15 minutes.",
40
+ ),
41
+ ),
42
+ gold_queue="platform_engineering",
43
+ gold_priority="urgent",
44
+ gold_issue_type="production_incident",
45
+ gold_status="escalated",
46
+ gold_resolution_code="runbook_investigation",
47
+ required_requested_fields=("request_ids", "time_window", "regions", "payload_sample"),
48
+ required_reply_markers=(
49
+ ("acknowledge", "incident"),
50
+ ("collect", "request ids"),
51
+ ("status", "page"),
52
+ ("escalate", "platform"),
53
+ ),
54
+ required_note_markers=(
55
+ ("status page",),
56
+ ("platform escalation",),
57
+ ("request ids",),
58
+ ),
59
+ risk_flags=("sla_breach", "p1_incident"),
60
+ follow_up_outcome="partial",
61
+ follow_up_message="Platform team investigating elevated DB latency; ETA 20 minutes.",
62
+ follow_up_provided_fields=("request_ids", "time_window"),
63
+ follow_up_wrong_fields=("payload_sample",),
64
+ sla_step_cost=20,
65
+ over_escalation_queues=(),
66
+ under_escalation_deadline_step=3,
67
+ max_steps=8,
68
+ )
tasks/base.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared task structures for SupportDesk."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Literal
7
+
8
+ from models import KnowledgeSnippet, SupportTicket
9
+
10
+ ALL_QUEUES = [
11
+ "billing_ops",
12
+ "trust_and_safety",
13
+ "platform_engineering",
14
+ "compliance_ops",
15
+ "general_support",
16
+ ]
17
+ ALL_PRIORITIES = ["low", "normal", "high", "urgent"]
18
+ ALL_STATUSES = ["new", "waiting_on_customer", "resolved", "escalated"]
19
+ ALL_ISSUE_TYPES = [
20
+ "duplicate_charge",
21
+ "account_compromise",
22
+ "production_incident",
23
+ "regulated_exception",
24
+ "general_question",
25
+ ]
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class SupportTaskSpec:
30
+ task_id: str
31
+ difficulty: Literal["easy", "medium", "hard"]
32
+ title: str
33
+ objective: str
34
+ ticket: SupportTicket
35
+ knowledge_base: tuple[KnowledgeSnippet, ...]
36
+ gold_queue: str
37
+ gold_priority: str
38
+ gold_issue_type: str
39
+ gold_status: str
40
+ gold_resolution_code: str
41
+ required_requested_fields: tuple[str, ...]
42
+ required_reply_markers: tuple[tuple[str, ...], ...]
43
+ required_note_markers: tuple[tuple[str, ...], ...]
44
+ forbidden_reply_markers: tuple[str, ...] = ()
45
+ risk_flags: tuple[str, ...] = ()
46
+ follow_up_outcome: Literal["none", "partial", "complete", "incorrect"] = "none"
47
+ follow_up_message: str = ""
48
+ follow_up_provided_fields: tuple[str, ...] = ()
49
+ follow_up_wrong_fields: tuple[str, ...] = ()
50
+ sla_step_cost: int = 15
51
+ over_escalation_queues: tuple[str, ...] = ()
52
+ under_escalation_deadline_step: int | None = None
53
+ max_steps: int = 6
54
+
55
+
56
+ __all__ = [
57
+ "SupportTaskSpec",
58
+ "KnowledgeSnippet",
59
+ "SupportTicket",
60
+ "ALL_QUEUES",
61
+ "ALL_PRIORITIES",
62
+ "ALL_STATUSES",
63
+ "ALL_ISSUE_TYPES",
64
+ ]
tasks/billing_refund_easy.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from tasks.base import SupportTaskSpec, KnowledgeSnippet, SupportTicket
4
+
5
+ TASK = SupportTaskSpec(
6
+ task_id="billing_refund_easy",
7
+ difficulty="easy",
8
+ title="Duplicate charge refund triage",
9
+ objective=(
10
+ "Triage a duplicate-charge billing ticket, send the correct customer response, "
11
+ "and close the case only if no further customer information is required."
12
+ ),
13
+ ticket=SupportTicket(
14
+ customer_name="Riya Shah",
15
+ customer_tier="pro",
16
+ company="PixelNorth Studio",
17
+ subject="Charged twice after I canceled",
18
+ body=(
19
+ "I canceled our Pro annual workspace yesterday, but my card was charged again "
20
+ "this morning and I still see the old invoice. We only had one workspace, "
21
+ "so this looks like a duplicate charge. Please fix it quickly."
22
+ ),
23
+ region="ap-south-1",
24
+ affected_users=12,
25
+ sla_minutes_remaining=240,
26
+ ),
27
+ knowledge_base=(
28
+ KnowledgeSnippet(
29
+ article_id="kb-refund-dup",
30
+ title="Refund duplicate charges",
31
+ content=(
32
+ "If a customer is double-charged, verify the duplicate invoice IDs, refund the extra charge, "
33
+ "and send a confirmation summarizing the refund amount and timeline."
34
+ ),
35
+ ),
36
+ KnowledgeSnippet(
37
+ article_id="kb-pro-refund-policy",
38
+ title="Pro tier refund policy",
39
+ content=(
40
+ "Pro annual refunds are prorated to the current billing month; processing takes 5-7 business days. "
41
+ "Provide the refund reference ID in your reply."
42
+ ),
43
+ ),
44
+ ),
45
+ gold_queue="billing_ops",
46
+ gold_priority="high",
47
+ gold_issue_type="duplicate_charge",
48
+ gold_status="resolved",
49
+ gold_resolution_code="refund_processed",
50
+ required_requested_fields=("invoice_ids",),
51
+ required_reply_markers=(
52
+ ("refund", "processed"),
53
+ ("reference", "id"),
54
+ ("timeline",),
55
+ ),
56
+ required_note_markers=(("duplicate charge",), ("refund issued",)),
57
+ risk_flags=("sla_breach", "finance_close_risk"),
58
+ follow_up_outcome="complete",
59
+ follow_up_message="Customer confirms the refund hit their card.",
60
+ follow_up_provided_fields=("refund_reference",),
61
+ follow_up_wrong_fields=(),
62
+ sla_step_cost=12,
63
+ over_escalation_queues=(),
64
+ under_escalation_deadline_step=None,
65
+ max_steps=6,
66
+ )
tasks/regulated_export_exception_hard.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from tasks.base import SupportTaskSpec, KnowledgeSnippet, SupportTicket
4
+
5
+ TASK = SupportTaskSpec(
6
+ task_id="regulated_export_exception_hard",
7
+ difficulty="hard",
8
+ title="Regulated export exception review",
9
+ objective=(
10
+ "Handle a compliance exception request for export-controlled data; gather required attestations, "
11
+ "route to compliance, and avoid promising approval."
12
+ ),
13
+ ticket=SupportTicket(
14
+ customer_name="Lena Fischer",
15
+ customer_tier="pro",
16
+ company="Helio Analytics",
17
+ subject="Export of model outputs to restricted country",
18
+ body=(
19
+ "We need to send model outputs to a contractor in a restricted country for 2 weeks. "
20
+ "Requesting an exception. We will use VPN + access controls."
21
+ ),
22
+ region="eu-central-1",
23
+ affected_users=35,
24
+ sla_minutes_remaining=180,
25
+ ),
26
+ knowledge_base=(
27
+ KnowledgeSnippet(
28
+ article_id="kb-export-policy",
29
+ title="Export control policy",
30
+ content=(
31
+ "Exceptions require compliance review. Collect justification, data sensitivity, duration, "
32
+ "recipient identity, and technical safeguards. Do not promise approval."
33
+ ),
34
+ ),
35
+ KnowledgeSnippet(
36
+ article_id="kb-temp-access",
37
+ title="Temporary access rules",
38
+ content="Temporary access must be time-bounded and audited; VPN alone is insufficient without logging.",
39
+ ),
40
+ ),
41
+ gold_queue="compliance_ops",
42
+ gold_priority="high",
43
+ gold_issue_type="regulated_exception",
44
+ gold_status="waiting_on_customer",
45
+ gold_resolution_code="compliance_review_pending",
46
+ required_requested_fields=("justification", "data_type", "recipient_identity", "duration", "safeguards"),
47
+ required_reply_markers=(
48
+ ("cannot promise", "approval"),
49
+ ("compliance", "review"),
50
+ ("collect", "recipient"),
51
+ ("time-bound", "access"),
52
+ ),
53
+ required_note_markers=(
54
+ ("exception request",),
55
+ ("awaiting compliance",),
56
+ ),
57
+ risk_flags=("legal",),
58
+ follow_up_outcome="incorrect",
59
+ follow_up_message="Customer insists VPN is sufficient and did not provide recipient identity.",
60
+ follow_up_provided_fields=("duration",),
61
+ follow_up_wrong_fields=("safeguards",),
62
+ sla_step_cost=14,
63
+ over_escalation_queues=("legal_ops",),
64
+ under_escalation_deadline_step=4,
65
+ max_steps=8,
66
+ )