Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +1 -1
- README.md +11 -9
- __init__.py +49 -2
- client.py +37 -2
- graders.py +168 -11
- inference.py +6 -6
- main.py +1 -1
- models.py +121 -2
- openenv.yaml +1 -1
- openenv_compat.py +76 -0
- policies.py +84 -0
- pyproject.toml +3 -3
- server/__init__.py +5 -1
- server/app.py +185 -13
- server/supportdesk_environment.py +544 -2
- tasks.py +404 -2
- tests/test_supportdesk.py +9 -9
Dockerfile
CHANGED
|
@@ -80,4 +80,4 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
|
| 80 |
|
| 81 |
# Run the FastAPI server
|
| 82 |
# The module path is constructed to work with this repo's package layout.
|
| 83 |
-
CMD ["sh", "-c", "cd /app/env && uvicorn
|
|
|
|
| 80 |
|
| 81 |
# Run the FastAPI server
|
| 82 |
# The module path is constructed to work with this repo's package layout.
|
| 83 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -303,15 +303,17 @@ Examples:
|
|
| 303 |
|-- pyproject.toml
|
| 304 |
|-- Dockerfile
|
| 305 |
|-- uv.lock
|
| 306 |
-
|--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
| |-- __init__.py
|
| 308 |
-
| |--
|
| 309 |
-
|
|
| 310 |
-
| |-- policies.py
|
| 311 |
-
| |-- tasks.py
|
| 312 |
-
| `-- server
|
| 313 |
-
| |-- app.py
|
| 314 |
-
| `-- supportdesk_environment.py
|
| 315 |
|-- tests
|
| 316 |
| `-- test_supportdesk.py
|
| 317 |
`-- examples
|
|
@@ -344,7 +346,7 @@ python -m openenv.cli validate .
|
|
| 344 |
Start the local server:
|
| 345 |
|
| 346 |
```bash
|
| 347 |
-
python -m
|
| 348 |
```
|
| 349 |
|
| 350 |
Or use the entrypoint:
|
|
|
|
| 303 |
|-- pyproject.toml
|
| 304 |
|-- Dockerfile
|
| 305 |
|-- uv.lock
|
| 306 |
+
|-- __init__.py
|
| 307 |
+
|-- client.py
|
| 308 |
+
|-- graders.py
|
| 309 |
+
|-- models.py
|
| 310 |
+
|-- openenv_compat.py
|
| 311 |
+
|-- policies.py
|
| 312 |
+
|-- tasks.py
|
| 313 |
+
|-- server
|
| 314 |
| |-- __init__.py
|
| 315 |
+
| |-- app.py
|
| 316 |
+
| `-- supportdesk_environment.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|-- tests
|
| 318 |
| `-- test_supportdesk.py
|
| 319 |
`-- examples
|
|
|
|
| 346 |
Start the local server:
|
| 347 |
|
| 348 |
```bash
|
| 349 |
+
python -m server.app
|
| 350 |
```
|
| 351 |
|
| 352 |
Or use the entrypoint:
|
__init__.py
CHANGED
|
@@ -1,3 +1,50 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""SupportDesk OpenEnv environment package (flat layout)."""
|
| 2 |
|
| 3 |
+
from client import SupportDeskEnv
|
| 4 |
+
from graders import (
|
| 5 |
+
AccountTakeoverMediumGrader,
|
| 6 |
+
ApiIncidentHardGrader,
|
| 7 |
+
BillingRefundEasyGrader,
|
| 8 |
+
GradeBreakdown,
|
| 9 |
+
RegulatedExportExceptionHardGrader,
|
| 10 |
+
grade_case,
|
| 11 |
+
grade_task_id,
|
| 12 |
+
)
|
| 13 |
+
from models import (
|
| 14 |
+
ActionHistoryEntry,
|
| 15 |
+
KnowledgeSnippet,
|
| 16 |
+
SupportCaseProgress,
|
| 17 |
+
SupportDeskAction,
|
| 18 |
+
SupportDeskObservation,
|
| 19 |
+
SupportDeskState,
|
| 20 |
+
SupportTicket,
|
| 21 |
+
)
|
| 22 |
+
from policies import default_note, default_reply, heuristic_action
|
| 23 |
+
from server.supportdesk_environment import SupportDeskEnvironment
|
| 24 |
+
from tasks import TASKS, SupportTaskSpec, get_task, list_task_ids
|
| 25 |
+
|
| 26 |
+
__all__ = [
|
| 27 |
+
"ActionHistoryEntry",
|
| 28 |
+
"GradeBreakdown",
|
| 29 |
+
"KnowledgeSnippet",
|
| 30 |
+
"SupportCaseProgress",
|
| 31 |
+
"SupportDeskAction",
|
| 32 |
+
"SupportDeskEnv",
|
| 33 |
+
"SupportDeskEnvironment",
|
| 34 |
+
"SupportDeskObservation",
|
| 35 |
+
"SupportDeskState",
|
| 36 |
+
"SupportTaskSpec",
|
| 37 |
+
"SupportTicket",
|
| 38 |
+
"TASKS",
|
| 39 |
+
"default_note",
|
| 40 |
+
"default_reply",
|
| 41 |
+
"get_task",
|
| 42 |
+
"grade_case",
|
| 43 |
+
"grade_task_id",
|
| 44 |
+
"heuristic_action",
|
| 45 |
+
"list_task_ids",
|
| 46 |
+
"AccountTakeoverMediumGrader",
|
| 47 |
+
"ApiIncidentHardGrader",
|
| 48 |
+
"BillingRefundEasyGrader",
|
| 49 |
+
"RegulatedExportExceptionHardGrader",
|
| 50 |
+
]
|
client.py
CHANGED
|
@@ -1,3 +1,38 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HTTP client for interacting with a deployed SupportDesk environment."""
|
| 2 |
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from models import SupportDeskAction, SupportDeskObservation, SupportDeskState
|
| 6 |
+
from openenv_compat import EnvClient, StepResult
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _validate(model_cls, payload):
|
| 10 |
+
if hasattr(model_cls, "model_validate"):
|
| 11 |
+
return model_cls.model_validate(payload)
|
| 12 |
+
return model_cls(**payload) # pragma: no cover - pydantic v1 fallback
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class SupportDeskEnv(EnvClient[SupportDeskAction, SupportDeskObservation, SupportDeskState]):
|
| 16 |
+
"""Typed client for a locally running or deployed OpenEnv server."""
|
| 17 |
+
|
| 18 |
+
def _step_payload(self, action: SupportDeskAction) -> dict:
|
| 19 |
+
"""Convert a typed action into the JSON payload expected by the server."""
|
| 20 |
+
|
| 21 |
+
if hasattr(action, "model_dump"):
|
| 22 |
+
return action.model_dump()
|
| 23 |
+
return action.dict()
|
| 24 |
+
|
| 25 |
+
def _parse_state(self, payload) -> SupportDeskState:
|
| 26 |
+
return _validate(SupportDeskState, payload)
|
| 27 |
+
|
| 28 |
+
def _parse_reset(self, payload) -> SupportDeskObservation:
|
| 29 |
+
return _validate(SupportDeskObservation, payload)
|
| 30 |
+
|
| 31 |
+
def _parse_result(self, payload) -> StepResult[SupportDeskObservation]:
|
| 32 |
+
observation = _validate(SupportDeskObservation, payload["observation"])
|
| 33 |
+
# OpenEnv StepResult only accepts observation/reward/done in this runtime.
|
| 34 |
+
return StepResult(
|
| 35 |
+
observation=observation,
|
| 36 |
+
reward=payload["reward"],
|
| 37 |
+
done=payload["done"],
|
| 38 |
+
)
|
graders.py
CHANGED
|
@@ -1,14 +1,171 @@
|
|
| 1 |
-
"""
|
| 2 |
-
|
| 3 |
-
from
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
__all__ = [
|
| 14 |
"AccountTakeoverMediumGrader",
|
|
|
|
| 1 |
+
"""Deterministic graders and reward helpers for SupportDesk."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import re
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
|
| 8 |
+
from models import SupportCaseProgress
|
| 9 |
+
from tasks import SupportTaskSpec, get_task
|
| 10 |
+
|
| 11 |
+
STRICT_SCORE_EPSILON = 0.01
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass(frozen=True)
|
| 15 |
+
class GradeBreakdown:
|
| 16 |
+
"""A scored view of how close a case is to the gold solution."""
|
| 17 |
+
|
| 18 |
+
total_score: float
|
| 19 |
+
queue_score: float
|
| 20 |
+
priority_score: float
|
| 21 |
+
issue_type_score: float
|
| 22 |
+
requested_fields_score: float
|
| 23 |
+
reply_score: float
|
| 24 |
+
note_score: float
|
| 25 |
+
status_score: float
|
| 26 |
+
resolution_score: float
|
| 27 |
+
completed_milestones: tuple[str, ...]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _normalize(text: str | None) -> str:
|
| 31 |
+
if not text:
|
| 32 |
+
return ""
|
| 33 |
+
normalized = text.lower().replace("-", " ")
|
| 34 |
+
return re.sub(r"[^a-z0-9\s]", " ", normalized)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _marker_group_score(text: str | None, marker_groups: tuple[tuple[str, ...], ...]) -> float:
|
| 38 |
+
if not marker_groups:
|
| 39 |
+
return 1.0
|
| 40 |
+
|
| 41 |
+
normalized = _normalize(text)
|
| 42 |
+
if not normalized:
|
| 43 |
+
return 0.0
|
| 44 |
+
|
| 45 |
+
matches = 0
|
| 46 |
+
for group in marker_groups:
|
| 47 |
+
if any(_normalize(marker) in normalized for marker in group):
|
| 48 |
+
matches += 1
|
| 49 |
+
return matches / len(marker_groups)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _requested_fields_score(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
|
| 53 |
+
required = set(task.required_requested_fields)
|
| 54 |
+
requested = set(case.requested_fields)
|
| 55 |
+
|
| 56 |
+
if not required:
|
| 57 |
+
return 1.0 if not requested else 0.0
|
| 58 |
+
if not requested:
|
| 59 |
+
return 0.0
|
| 60 |
+
|
| 61 |
+
matched = len(required.intersection(requested))
|
| 62 |
+
extras = len(requested.difference(required))
|
| 63 |
+
raw = matched / len(required)
|
| 64 |
+
penalty = min(0.25, extras * 0.05)
|
| 65 |
+
return max(0.0, raw - penalty)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _reply_penalty(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
|
| 69 |
+
text = _normalize(case.reply)
|
| 70 |
+
if not text:
|
| 71 |
+
return 0.0
|
| 72 |
+
return 0.0 if not any(_normalize(marker) in text for marker in task.forbidden_reply_markers) else 0.5
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _strict_open_unit_interval(score: float) -> float:
|
| 76 |
+
"""Keep final task scores strictly within (0, 1) for evaluator compatibility."""
|
| 77 |
+
|
| 78 |
+
return min(1.0 - STRICT_SCORE_EPSILON, max(STRICT_SCORE_EPSILON, score))
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def grade_case(task: SupportTaskSpec, case: SupportCaseProgress) -> GradeBreakdown:
|
| 82 |
+
"""Score a case deterministically with total_score strictly inside (0, 1)."""
|
| 83 |
+
|
| 84 |
+
queue_score = 1.0 if case.queue == task.gold_queue else 0.0
|
| 85 |
+
priority_score = 1.0 if case.priority == task.gold_priority else 0.0
|
| 86 |
+
issue_type_score = 1.0 if case.issue_type == task.gold_issue_type else 0.0
|
| 87 |
+
requested_fields_score = _requested_fields_score(case, task)
|
| 88 |
+
reply_score = max(0.0, _marker_group_score(case.reply, task.required_reply_markers) - _reply_penalty(case, task))
|
| 89 |
+
note_score = _marker_group_score(case.internal_note, task.required_note_markers)
|
| 90 |
+
status_score = 1.0 if case.status == task.gold_status else 0.0
|
| 91 |
+
resolution_score = 1.0 if case.resolution_code == task.gold_resolution_code else 0.0
|
| 92 |
+
|
| 93 |
+
weighted_total = (
|
| 94 |
+
queue_score * 0.15
|
| 95 |
+
+ priority_score * 0.10
|
| 96 |
+
+ issue_type_score * 0.10
|
| 97 |
+
+ requested_fields_score * 0.15
|
| 98 |
+
+ reply_score * 0.25
|
| 99 |
+
+ note_score * 0.10
|
| 100 |
+
+ status_score * 0.10
|
| 101 |
+
+ resolution_score * 0.05
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
milestones: list[str] = []
|
| 105 |
+
if queue_score:
|
| 106 |
+
milestones.append("queue")
|
| 107 |
+
if priority_score:
|
| 108 |
+
milestones.append("priority")
|
| 109 |
+
if issue_type_score:
|
| 110 |
+
milestones.append("issue_type")
|
| 111 |
+
if requested_fields_score >= 0.99:
|
| 112 |
+
milestones.append("requested_fields")
|
| 113 |
+
if reply_score >= 0.99:
|
| 114 |
+
milestones.append("reply")
|
| 115 |
+
if note_score >= 0.99:
|
| 116 |
+
milestones.append("internal_note")
|
| 117 |
+
if status_score:
|
| 118 |
+
milestones.append("status")
|
| 119 |
+
if resolution_score:
|
| 120 |
+
milestones.append("resolution_code")
|
| 121 |
+
|
| 122 |
+
return GradeBreakdown(
|
| 123 |
+
total_score=round(_strict_open_unit_interval(weighted_total), 4),
|
| 124 |
+
queue_score=queue_score,
|
| 125 |
+
priority_score=priority_score,
|
| 126 |
+
issue_type_score=issue_type_score,
|
| 127 |
+
requested_fields_score=round(requested_fields_score, 4),
|
| 128 |
+
reply_score=round(reply_score, 4),
|
| 129 |
+
note_score=round(note_score, 4),
|
| 130 |
+
status_score=status_score,
|
| 131 |
+
resolution_score=resolution_score,
|
| 132 |
+
completed_milestones=tuple(milestones),
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def grade_task_id(task_id: str, case: SupportCaseProgress) -> GradeBreakdown:
|
| 137 |
+
"""Convenience wrapper used by tests and evaluation scripts."""
|
| 138 |
+
|
| 139 |
+
return grade_case(get_task(task_id), case)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
class _TaskSpecificGrader:
|
| 143 |
+
"""Importable task-specific grader wrapper for validator task discovery."""
|
| 144 |
+
|
| 145 |
+
task_id: str = ""
|
| 146 |
+
|
| 147 |
+
def grade(self, case: SupportCaseProgress) -> float:
|
| 148 |
+
return grade_task_id(self.task_id, case).total_score
|
| 149 |
+
|
| 150 |
+
def __call__(self, case: SupportCaseProgress) -> float:
|
| 151 |
+
return self.grade(case)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class BillingRefundEasyGrader(_TaskSpecificGrader):
|
| 155 |
+
task_id = "billing_refund_easy"
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class AccountTakeoverMediumGrader(_TaskSpecificGrader):
|
| 159 |
+
task_id = "account_takeover_medium"
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
class ApiIncidentHardGrader(_TaskSpecificGrader):
|
| 163 |
+
task_id = "api_incident_hard"
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
class RegulatedExportExceptionHardGrader(_TaskSpecificGrader):
|
| 167 |
+
task_id = "regulated_export_exception_hard"
|
| 168 |
+
|
| 169 |
|
| 170 |
__all__ = [
|
| 171 |
"AccountTakeoverMediumGrader",
|
inference.py
CHANGED
|
@@ -14,12 +14,12 @@ try:
|
|
| 14 |
except ImportError: # pragma: no cover - local fallback mode
|
| 15 |
OpenAI = None # type: ignore[assignment]
|
| 16 |
|
| 17 |
-
from
|
| 18 |
-
from
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
|
| 24 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 25 |
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
| 14 |
except ImportError: # pragma: no cover - local fallback mode
|
| 15 |
OpenAI = None # type: ignore[assignment]
|
| 16 |
|
| 17 |
+
from client import SupportDeskEnv
|
| 18 |
+
from graders import grade_case
|
| 19 |
+
from models import SupportDeskAction, SupportDeskObservation
|
| 20 |
+
from policies import heuristic_action
|
| 21 |
+
from server.supportdesk_environment import SupportDeskEnvironment
|
| 22 |
+
from tasks import get_task, list_task_ids
|
| 23 |
|
| 24 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 25 |
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
|
main.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
from
|
| 6 |
|
| 7 |
|
| 8 |
def main() -> None:
|
|
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
from server.app import app, main as _run_server
|
| 6 |
|
| 7 |
|
| 8 |
def main() -> None:
|
models.py
CHANGED
|
@@ -1,3 +1,122 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Typed models for the SupportDesk OpenEnv environment."""
|
| 2 |
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Literal
|
| 6 |
+
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
|
| 9 |
+
from openenv_compat import Action, Observation, State
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class KnowledgeSnippet(BaseModel):
|
| 13 |
+
"""A policy or runbook excerpt the agent can use during triage."""
|
| 14 |
+
|
| 15 |
+
article_id: str
|
| 16 |
+
title: str
|
| 17 |
+
content: str
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class SupportTicket(BaseModel):
|
| 21 |
+
"""Static task input representing the inbound support ticket."""
|
| 22 |
+
|
| 23 |
+
customer_name: str
|
| 24 |
+
customer_tier: Literal["free", "pro", "enterprise"]
|
| 25 |
+
company: str
|
| 26 |
+
subject: str
|
| 27 |
+
body: str
|
| 28 |
+
region: str
|
| 29 |
+
affected_users: int | None = None
|
| 30 |
+
sla_minutes_remaining: int | None = None
|
| 31 |
+
business_impact: str | None = None
|
| 32 |
+
secondary_concerns: list[str] = Field(default_factory=list)
|
| 33 |
+
attachments: list[str] = Field(default_factory=list)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class ActionHistoryEntry(BaseModel):
|
| 37 |
+
"""A concise trace entry used in observations and state dumps."""
|
| 38 |
+
|
| 39 |
+
step: int
|
| 40 |
+
operation: str
|
| 41 |
+
summary: str
|
| 42 |
+
reward_delta: float = 0.0
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class CustomerFollowUp(BaseModel):
|
| 46 |
+
"""A scripted customer response that arrives after a request for more information."""
|
| 47 |
+
|
| 48 |
+
status: Literal["none", "pending", "partial", "complete", "incorrect"] = "none"
|
| 49 |
+
message: str | None = None
|
| 50 |
+
provided_fields: list[str] = Field(default_factory=list)
|
| 51 |
+
wrong_fields: list[str] = Field(default_factory=list)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class SupportCaseProgress(BaseModel):
|
| 55 |
+
"""Mutable case state that graders score against."""
|
| 56 |
+
|
| 57 |
+
queue: str | None = None
|
| 58 |
+
priority: str | None = None
|
| 59 |
+
issue_type: str | None = None
|
| 60 |
+
status: str = "new"
|
| 61 |
+
resolution_code: str | None = None
|
| 62 |
+
requested_fields: list[str] = Field(default_factory=list)
|
| 63 |
+
reply: str | None = None
|
| 64 |
+
internal_note: str | None = None
|
| 65 |
+
customer_follow_up: CustomerFollowUp = Field(default_factory=CustomerFollowUp)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class SupportDeskAction(Action):
|
| 69 |
+
"""One structured action the agent can take at each step."""
|
| 70 |
+
|
| 71 |
+
operation: Literal["classify", "request_info", "draft_reply", "add_internal_note", "submit", "wait"]
|
| 72 |
+
queue: str | None = None
|
| 73 |
+
priority: str | None = None
|
| 74 |
+
issue_type: str | None = None
|
| 75 |
+
status: str | None = None
|
| 76 |
+
resolution_code: str | None = None
|
| 77 |
+
requested_fields: list[str] = Field(default_factory=list)
|
| 78 |
+
reply: str | None = None
|
| 79 |
+
internal_note: str | None = None
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class SupportDeskObservation(Observation):
|
| 83 |
+
"""Observation emitted to the agent after reset and each step."""
|
| 84 |
+
|
| 85 |
+
task_id: str
|
| 86 |
+
difficulty: Literal["easy", "medium", "hard"]
|
| 87 |
+
objective: str
|
| 88 |
+
ticket: SupportTicket
|
| 89 |
+
knowledge_base: list[KnowledgeSnippet]
|
| 90 |
+
available_queues: list[str]
|
| 91 |
+
available_priorities: list[str]
|
| 92 |
+
available_statuses: list[str]
|
| 93 |
+
available_issue_types: list[str]
|
| 94 |
+
case: SupportCaseProgress
|
| 95 |
+
current_sla_minutes_remaining: int | None = None
|
| 96 |
+
workflow_stage: str
|
| 97 |
+
required_next_actions: list[str] = Field(default_factory=list)
|
| 98 |
+
risk_flags: list[str] = Field(default_factory=list)
|
| 99 |
+
action_history: list[ActionHistoryEntry] = Field(default_factory=list)
|
| 100 |
+
feedback: str = ""
|
| 101 |
+
remaining_steps: int = 0
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class SupportDeskState(State):
|
| 105 |
+
"""Current environment state returned by the OpenEnv state() API."""
|
| 106 |
+
|
| 107 |
+
episode_id: str | None = None
|
| 108 |
+
task_id: str
|
| 109 |
+
difficulty: Literal["easy", "medium", "hard"]
|
| 110 |
+
step_count: int = 0
|
| 111 |
+
reward: float = 0.0
|
| 112 |
+
done: bool = False
|
| 113 |
+
current_score: float = 0.0
|
| 114 |
+
max_steps: int = 0
|
| 115 |
+
case: SupportCaseProgress
|
| 116 |
+
current_sla_minutes_remaining: int | None = None
|
| 117 |
+
workflow_stage: str
|
| 118 |
+
required_next_actions: list[str] = Field(default_factory=list)
|
| 119 |
+
risk_flags: list[str] = Field(default_factory=list)
|
| 120 |
+
action_history: list[ActionHistoryEntry] = Field(default_factory=list)
|
| 121 |
+
completed_milestones: list[str] = Field(default_factory=list)
|
| 122 |
+
last_feedback: str = ""
|
openenv.yaml
CHANGED
|
@@ -3,7 +3,7 @@ name: HyperBrickCaseOps
|
|
| 3 |
env_name: supportdesk_env
|
| 4 |
type: space
|
| 5 |
runtime: fastapi
|
| 6 |
-
app:
|
| 7 |
port: 8000
|
| 8 |
description: Enterprise support operations environment with SLA pressure, business-impact aware triage, and primary-vs-secondary issue prioritization.
|
| 9 |
tasks:
|
|
|
|
| 3 |
env_name: supportdesk_env
|
| 4 |
type: space
|
| 5 |
runtime: fastapi
|
| 6 |
+
app: server.app:app
|
| 7 |
port: 8000
|
| 8 |
description: Enterprise support operations environment with SLA pressure, business-impact aware triage, and primary-vs-secondary issue prioritization.
|
| 9 |
tasks:
|
openenv_compat.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Compatibility helpers for environments where openenv-core is not installed."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from dataclasses import dataclass, field
|
| 6 |
+
from typing import Any, Generic, TypeVar
|
| 7 |
+
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
|
| 10 |
+
A = TypeVar("A")
|
| 11 |
+
O = TypeVar("O")
|
| 12 |
+
S = TypeVar("S")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
OPENENV_AVAILABLE = True
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
from openenv.core.client_types import StepResult # type: ignore
|
| 19 |
+
from openenv.core.env_client import EnvClient # type: ignore
|
| 20 |
+
from openenv.core.env_server.interfaces import Environment # type: ignore
|
| 21 |
+
from openenv.core.env_server.types import Action, Observation, State # type: ignore
|
| 22 |
+
from openenv.core.env_server.types import EnvironmentMetadata # type: ignore
|
| 23 |
+
except ImportError:
|
| 24 |
+
try:
|
| 25 |
+
from openenv_core.client_types import StepResult # type: ignore
|
| 26 |
+
from openenv_core.http_env_client import HTTPEnvClient as EnvClient # type: ignore
|
| 27 |
+
from openenv_core.env_server.interfaces import Environment # type: ignore
|
| 28 |
+
from openenv_core.env_server.types import Action, Observation, State # type: ignore
|
| 29 |
+
from openenv_core.env_server.types import EnvironmentMetadata # type: ignore
|
| 30 |
+
except ImportError:
|
| 31 |
+
OPENENV_AVAILABLE = False
|
| 32 |
+
|
| 33 |
+
class Action(BaseModel):
|
| 34 |
+
"""Fallback Action base type for local import-only workflows."""
|
| 35 |
+
|
| 36 |
+
class Observation(BaseModel):
|
| 37 |
+
"""Fallback Observation base type for local import-only workflows."""
|
| 38 |
+
|
| 39 |
+
reward: float = 0.0
|
| 40 |
+
done: bool = False
|
| 41 |
+
|
| 42 |
+
class State(BaseModel):
|
| 43 |
+
"""Fallback State base type for local import-only workflows."""
|
| 44 |
+
|
| 45 |
+
class Environment(Generic[A, O, S]):
|
| 46 |
+
"""Minimal base class used for local unit tests and import-based demos."""
|
| 47 |
+
|
| 48 |
+
def __init__(self) -> None:
|
| 49 |
+
super().__init__()
|
| 50 |
+
|
| 51 |
+
class EnvironmentMetadata(BaseModel):
|
| 52 |
+
"""Fallback metadata model used when OpenEnv is absent."""
|
| 53 |
+
|
| 54 |
+
name: str
|
| 55 |
+
description: str
|
| 56 |
+
readme_content: str | None = None
|
| 57 |
+
version: str | None = None
|
| 58 |
+
author: str | None = None
|
| 59 |
+
|
| 60 |
+
@dataclass
|
| 61 |
+
class StepResult(Generic[O]):
|
| 62 |
+
"""Fallback step result for local-only client compatibility."""
|
| 63 |
+
|
| 64 |
+
observation: O
|
| 65 |
+
reward: float
|
| 66 |
+
done: bool
|
| 67 |
+
info: dict[str, Any] = field(default_factory=dict)
|
| 68 |
+
|
| 69 |
+
class EnvClient(Generic[A, O, S]):
|
| 70 |
+
"""Placeholder client that fails only when actually used."""
|
| 71 |
+
|
| 72 |
+
def __init__(self, *args, **kwargs) -> None:
|
| 73 |
+
raise ImportError(
|
| 74 |
+
"SupportDeskEnv requires openenv-core to be installed. "
|
| 75 |
+
"Run `py -3 -m pip install openenv-core` to use the HTTP client."
|
| 76 |
+
)
|
policies.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reusable policy helpers for local baselines and training examples."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from models import SupportDeskAction, SupportDeskObservation
|
| 6 |
+
from tasks import get_task
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def default_reply(task_id: str) -> str:
|
| 10 |
+
"""Return a task-specific high-signal customer reply."""
|
| 11 |
+
|
| 12 |
+
if task_id == "billing_refund_easy":
|
| 13 |
+
return (
|
| 14 |
+
"Thanks for flagging the duplicate charge. I have started the refund for the extra "
|
| 15 |
+
"charge, and the funds usually appear within 5-7 business days."
|
| 16 |
+
)
|
| 17 |
+
if task_id == "account_takeover_medium":
|
| 18 |
+
return (
|
| 19 |
+
"We have escalated this to our trust team. Please reset your password, scan your "
|
| 20 |
+
"device for malware, and reply with your workspace_id, last successful login time, "
|
| 21 |
+
"and billing email so we can verify the account safely."
|
| 22 |
+
)
|
| 23 |
+
if task_id == "regulated_export_exception_hard":
|
| 24 |
+
return (
|
| 25 |
+
"We cannot provide a bypass or temporary unlock yet. Our compliance team is running "
|
| 26 |
+
"a compliance review, and we need your tenant_region, dpa_amendment_id, and "
|
| 27 |
+
"legal_contact_email to continue that review."
|
| 28 |
+
)
|
| 29 |
+
return (
|
| 30 |
+
"We are treating this as an active incident and our on-call engineering team is engaged. "
|
| 31 |
+
"Please send the affected request IDs, UTC timestamps, and the impacted region so we can "
|
| 32 |
+
"speed up the investigation."
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def default_note(task_id: str) -> str:
|
| 37 |
+
"""Return a task-specific internal note."""
|
| 38 |
+
|
| 39 |
+
if task_id == "billing_refund_easy":
|
| 40 |
+
return "Duplicate charge confirmed from attached invoice; refund approved."
|
| 41 |
+
if task_id == "account_takeover_medium":
|
| 42 |
+
return "Suspicious login alert reported and customer is locked out."
|
| 43 |
+
if task_id == "regulated_export_exception_hard":
|
| 44 |
+
return (
|
| 45 |
+
"Audit-driven export exception request tied to an EU residency policy block; "
|
| 46 |
+
"customer asked for a manual bypass before legal approval."
|
| 47 |
+
)
|
| 48 |
+
return "EU data residency rollout hit intermittent HTTP 500s and the customer launches tonight."
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def heuristic_action(observation: SupportDeskObservation) -> SupportDeskAction:
|
| 52 |
+
"""Deterministic high-performing policy used by the baseline."""
|
| 53 |
+
|
| 54 |
+
task = get_task(observation.task_id)
|
| 55 |
+
case = observation.case
|
| 56 |
+
|
| 57 |
+
if case.queue is None or case.priority is None or case.issue_type is None:
|
| 58 |
+
return SupportDeskAction(
|
| 59 |
+
operation="classify",
|
| 60 |
+
queue=task.gold_queue,
|
| 61 |
+
priority=task.gold_priority,
|
| 62 |
+
issue_type=task.gold_issue_type,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
if task.required_requested_fields and sorted(case.requested_fields) != sorted(task.required_requested_fields):
|
| 66 |
+
return SupportDeskAction(
|
| 67 |
+
operation="request_info",
|
| 68 |
+
requested_fields=list(task.required_requested_fields),
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
if case.customer_follow_up.status == "pending":
|
| 72 |
+
return SupportDeskAction(operation="wait")
|
| 73 |
+
|
| 74 |
+
if not case.reply:
|
| 75 |
+
return SupportDeskAction(operation="draft_reply", reply=default_reply(observation.task_id))
|
| 76 |
+
|
| 77 |
+
if not case.internal_note:
|
| 78 |
+
return SupportDeskAction(operation="add_internal_note", internal_note=default_note(observation.task_id))
|
| 79 |
+
|
| 80 |
+
return SupportDeskAction(
|
| 81 |
+
operation="submit",
|
| 82 |
+
status=task.gold_status,
|
| 83 |
+
resolution_code=task.gold_resolution_code,
|
| 84 |
+
)
|
pyproject.toml
CHANGED
|
@@ -33,9 +33,9 @@ dev = [
|
|
| 33 |
|
| 34 |
[project.scripts]
|
| 35 |
# Server entry point - enables running via: uv run --project . server
|
| 36 |
-
# or: python -m
|
| 37 |
-
server = "
|
| 38 |
|
| 39 |
[tool.setuptools]
|
| 40 |
include-package-data = true
|
| 41 |
-
packages = ["
|
|
|
|
| 33 |
|
| 34 |
[project.scripts]
|
| 35 |
# Server entry point - enables running via: uv run --project . server
|
| 36 |
+
# or: python -m server.app
|
| 37 |
+
server = "server.app:main"
|
| 38 |
|
| 39 |
[tool.setuptools]
|
| 40 |
include-package-data = true
|
| 41 |
+
packages = ["server"]
|
server/__init__.py
CHANGED
|
@@ -1 +1,5 @@
|
|
| 1 |
-
"""Server package for the SupportDesk OpenEnv environment."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Server package for the SupportDesk OpenEnv environment."""
|
| 2 |
+
|
| 3 |
+
from server.supportdesk_environment import SupportDeskEnvironment
|
| 4 |
+
|
| 5 |
+
__all__ = ["SupportDeskEnvironment"]
|
server/app.py
CHANGED
|
@@ -1,33 +1,205 @@
|
|
| 1 |
-
"""FastAPI
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
import uvicorn
|
|
|
|
|
|
|
| 8 |
|
| 9 |
try:
|
| 10 |
-
from openenv.core.env_server
|
| 11 |
-
except ImportError:
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
from
|
| 15 |
-
from
|
|
|
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
app = create_app(
|
| 18 |
SupportDeskEnvironment,
|
| 19 |
-
|
| 20 |
-
|
| 21 |
env_name="supportdesk_env",
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
|
| 25 |
-
|
| 26 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
uvicorn.run("supportdesk_env.server.app:app", host="0.0.0.0", port=port)
|
| 30 |
|
| 31 |
|
| 32 |
-
if __name__ ==
|
| 33 |
main()
|
|
|
|
| 1 |
+
"""FastAPI application for the SupportDesk environment."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import os
|
| 6 |
+
from typing import Any
|
| 7 |
|
| 8 |
import uvicorn
|
| 9 |
+
from fastapi import Body, HTTPException
|
| 10 |
+
from fastapi.routing import APIRoute
|
| 11 |
|
| 12 |
try:
|
| 13 |
+
from openenv.core.env_server import http_server as openenv_http_server
|
| 14 |
+
except ImportError:
|
| 15 |
+
try:
|
| 16 |
+
from openenv_core.env_server import http_server as openenv_http_server
|
| 17 |
+
except Exception as e: # pragma: no cover
|
| 18 |
+
raise ImportError(
|
| 19 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 20 |
+
) from e
|
| 21 |
|
| 22 |
+
from models import SupportDeskAction, SupportDeskObservation, SupportDeskState
|
| 23 |
+
from server.supportdesk_environment import SupportDeskEnvironment
|
| 24 |
+
from tasks import TASKS
|
| 25 |
|
| 26 |
+
# Bind the default OpenEnv /state route to the full typed state model.
|
| 27 |
+
openenv_http_server.State = SupportDeskState
|
| 28 |
+
create_app = openenv_http_server.create_app
|
| 29 |
+
|
| 30 |
+
# Create the app with web interface and README integration.
|
| 31 |
app = create_app(
|
| 32 |
SupportDeskEnvironment,
|
| 33 |
+
SupportDeskAction,
|
| 34 |
+
SupportDeskObservation,
|
| 35 |
env_name="supportdesk_env",
|
| 36 |
+
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 37 |
)
|
| 38 |
|
| 39 |
|
| 40 |
+
TASK_GRADER_PATHS = {
|
| 41 |
+
"billing_refund_easy": "graders:BillingRefundEasyGrader",
|
| 42 |
+
"account_takeover_medium": "graders:AccountTakeoverMediumGrader",
|
| 43 |
+
"api_incident_hard": "graders:ApiIncidentHardGrader",
|
| 44 |
+
"regulated_export_exception_hard": "graders:RegulatedExportExceptionHardGrader",
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _replace_route(path: str, methods: set[str]) -> None:
|
| 49 |
+
"""Remove a generated route so we can register a score-aware replacement."""
|
| 50 |
+
|
| 51 |
+
app.router.routes = [
|
| 52 |
+
route
|
| 53 |
+
for route in app.router.routes
|
| 54 |
+
if not (
|
| 55 |
+
isinstance(route, APIRoute)
|
| 56 |
+
and route.path == path
|
| 57 |
+
and methods.issubset(set(route.methods or set()))
|
| 58 |
+
)
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _score_response(env: SupportDeskEnvironment, observation: SupportDeskObservation) -> dict[str, Any]:
|
| 63 |
+
"""Return the standard OpenEnv shape plus an explicit top-level score."""
|
| 64 |
+
|
| 65 |
+
return {
|
| 66 |
+
"observation": observation.model_dump(),
|
| 67 |
+
"reward": observation.reward,
|
| 68 |
+
"done": observation.done,
|
| 69 |
+
"score": env.state.current_score,
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
_replace_route("/reset", {"POST"})
|
| 74 |
+
_replace_route("/step", {"POST"})
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
@app.post("/reset")
|
| 78 |
+
async def reset_with_score(
|
| 79 |
+
request: openenv_http_server.ResetRequest = Body(default_factory=openenv_http_server.ResetRequest),
|
| 80 |
+
) -> dict[str, Any]:
|
| 81 |
+
"""Reset the environment and expose the initial deterministic score at top level."""
|
| 82 |
+
|
| 83 |
+
env = SupportDeskEnvironment()
|
| 84 |
+
try:
|
| 85 |
+
kwargs = request.model_dump(exclude_unset=True)
|
| 86 |
+
observation = env.reset(**kwargs)
|
| 87 |
+
return _score_response(env, observation)
|
| 88 |
+
finally:
|
| 89 |
+
env.close()
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@app.post("/step")
|
| 93 |
+
async def step_with_score(request: openenv_http_server.StepRequest) -> dict[str, Any]:
|
| 94 |
+
"""Execute a step and expose the current deterministic score at top level."""
|
| 95 |
+
|
| 96 |
+
action_data = request.action
|
| 97 |
+
try:
|
| 98 |
+
action = openenv_http_server.deserialize_action(action_data, SupportDeskAction)
|
| 99 |
+
except openenv_http_server.ValidationError as exc:
|
| 100 |
+
raise HTTPException(status_code=422, detail=exc.errors()) from exc
|
| 101 |
+
|
| 102 |
+
env = SupportDeskEnvironment()
|
| 103 |
+
try:
|
| 104 |
+
kwargs = request.model_dump(exclude_unset=True, exclude={"action"})
|
| 105 |
+
observation = env.step(action, **kwargs)
|
| 106 |
+
return _score_response(env, observation)
|
| 107 |
+
finally:
|
| 108 |
+
env.close()
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
@app.get("/tasks")
|
| 112 |
+
def list_tasks() -> dict[str, Any]:
|
| 113 |
+
"""Expose a stable task catalog for UI, debugging, and pre-submit checks."""
|
| 114 |
+
|
| 115 |
+
return {
|
| 116 |
+
"environment": {
|
| 117 |
+
"name": "supportdesk_env",
|
| 118 |
+
"version": "0.1.0",
|
| 119 |
+
"grader_type": "deterministic",
|
| 120 |
+
"score_range": [0.0, 1.0],
|
| 121 |
+
},
|
| 122 |
+
"total_tasks": len(TASKS),
|
| 123 |
+
"tasks": [
|
| 124 |
+
{
|
| 125 |
+
"task_id": task.task_id,
|
| 126 |
+
"grader": TASK_GRADER_PATHS[task.task_id],
|
| 127 |
+
"title": task.title,
|
| 128 |
+
"difficulty": task.difficulty,
|
| 129 |
+
"objective": task.objective,
|
| 130 |
+
"max_steps": task.max_steps,
|
| 131 |
+
"gold_issue_type": task.gold_issue_type,
|
| 132 |
+
"gold_queue": task.gold_queue,
|
| 133 |
+
"gold_priority": task.gold_priority,
|
| 134 |
+
"ticket_context": {
|
| 135 |
+
"customer_tier": task.ticket.customer_tier,
|
| 136 |
+
"region": task.ticket.region,
|
| 137 |
+
"affected_users": task.ticket.affected_users,
|
| 138 |
+
"sla_minutes_remaining": task.ticket.sla_minutes_remaining,
|
| 139 |
+
},
|
| 140 |
+
}
|
| 141 |
+
for task in TASKS.values()
|
| 142 |
+
],
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@app.get("/episodes/{episode_id}/state", response_model=SupportDeskState)
|
| 147 |
+
def get_episode_state(episode_id: str) -> SupportDeskState:
|
| 148 |
+
"""Optional explicit state helper for robust episode-addressable inspection."""
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
return SupportDeskEnvironment.state_for_episode(episode_id)
|
| 152 |
+
except ValueError as exc:
|
| 153 |
+
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
@app.post("/episodes/{episode_id}/step")
|
| 157 |
+
def step_episode(
|
| 158 |
+
episode_id: str,
|
| 159 |
+
payload: dict[str, Any] = Body(...),
|
| 160 |
+
) -> dict[str, Any]:
|
| 161 |
+
"""Optional explicit step helper that does not require sticky request context."""
|
| 162 |
+
|
| 163 |
+
action_payload = payload.get("action")
|
| 164 |
+
if not isinstance(action_payload, dict):
|
| 165 |
+
raise HTTPException(status_code=422, detail="Request body must include an 'action' object.")
|
| 166 |
+
|
| 167 |
+
timeout_s = payload.get("timeout_s")
|
| 168 |
+
try:
|
| 169 |
+
action = SupportDeskAction.model_validate(action_payload)
|
| 170 |
+
env = SupportDeskEnvironment()
|
| 171 |
+
observation = env.step(action, timeout_s=timeout_s, episode_id=episode_id)
|
| 172 |
+
except ValueError as exc:
|
| 173 |
+
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
| 174 |
+
|
| 175 |
+
return {
|
| 176 |
+
"observation": observation.model_dump(),
|
| 177 |
+
"reward": observation.reward,
|
| 178 |
+
"done": observation.done,
|
| 179 |
+
"score": SupportDeskEnvironment.state_for_episode(episode_id).current_score,
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def main(host: str = "0.0.0.0", port: int = 8000) -> None:
|
| 184 |
+
"""
|
| 185 |
+
Entry point for direct execution via uv run or python -m.
|
| 186 |
+
|
| 187 |
+
This function enables running the server without Docker:
|
| 188 |
+
uv run --project . server
|
| 189 |
+
uv run --project . server --port 8001
|
| 190 |
+
python -m server.app
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
host: Host address to bind to (default: "0.0.0.0")
|
| 194 |
+
port: Port number to listen on (default: 8000)
|
| 195 |
+
|
| 196 |
+
For production deployments, consider using uvicorn directly with
|
| 197 |
+
multiple workers:
|
| 198 |
+
uvicorn server.app:app --workers 4
|
| 199 |
+
"""
|
| 200 |
|
| 201 |
+
uvicorn.run("server.app:app", host=host, port=port)
|
|
|
|
| 202 |
|
| 203 |
|
| 204 |
+
if __name__ == '__main__':
|
| 205 |
main()
|
server/supportdesk_environment.py
CHANGED
|
@@ -1,3 +1,545 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""SupportDesk environment implementation."""
|
| 2 |
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import threading
|
| 7 |
+
import uuid
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import ClassVar
|
| 10 |
+
|
| 11 |
+
from graders import grade_case
|
| 12 |
+
from models import (
|
| 13 |
+
ActionHistoryEntry,
|
| 14 |
+
CustomerFollowUp,
|
| 15 |
+
SupportCaseProgress,
|
| 16 |
+
SupportDeskAction,
|
| 17 |
+
SupportDeskObservation,
|
| 18 |
+
SupportDeskState,
|
| 19 |
+
)
|
| 20 |
+
from openenv_compat import Environment, EnvironmentMetadata
|
| 21 |
+
from tasks import (
|
| 22 |
+
ALL_ISSUE_TYPES,
|
| 23 |
+
ALL_PRIORITIES,
|
| 24 |
+
ALL_QUEUES,
|
| 25 |
+
ALL_STATUSES,
|
| 26 |
+
SupportTaskSpec,
|
| 27 |
+
get_task,
|
| 28 |
+
list_task_ids,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class SupportDeskEnvironment(
|
| 33 |
+
Environment[SupportDeskAction, SupportDeskObservation, SupportDeskState]
|
| 34 |
+
):
|
| 35 |
+
"""A realistic customer support triage environment with dense rewards."""
|
| 36 |
+
|
| 37 |
+
_state_lock: ClassVar[threading.RLock] = threading.RLock()
|
| 38 |
+
_episode_store: ClassVar[dict[str, SupportDeskState]] = {}
|
| 39 |
+
_episode_task_ids: ClassVar[dict[str, str]] = {}
|
| 40 |
+
_latest_episode_id: ClassVar[str | None] = None
|
| 41 |
+
_shared_reset_counter: ClassVar[int] = 0
|
| 42 |
+
|
| 43 |
+
def __init__(self, task_id: str | None = None):
|
| 44 |
+
super().__init__()
|
| 45 |
+
env_task_id = os.getenv("SUPPORTDESK_TASK_ID")
|
| 46 |
+
self._explicit_task_id = task_id is not None or env_task_id is not None
|
| 47 |
+
requested_task = task_id or env_task_id or list_task_ids()[0]
|
| 48 |
+
self.task: SupportTaskSpec = get_task(requested_task)
|
| 49 |
+
self._max_steps = self.task.max_steps
|
| 50 |
+
self._step_count = 0
|
| 51 |
+
self._reward_total = 0.0
|
| 52 |
+
self._done = False
|
| 53 |
+
self._last_feedback = ""
|
| 54 |
+
self._history: list[ActionHistoryEntry] = []
|
| 55 |
+
self._case = SupportCaseProgress()
|
| 56 |
+
self._episode_id: str | None = None
|
| 57 |
+
self._current_sla_minutes_remaining = self.task.ticket.sla_minutes_remaining
|
| 58 |
+
initial_grade = grade_case(self.task, self._case)
|
| 59 |
+
self._score = initial_grade.total_score
|
| 60 |
+
self._completed_milestones = list(initial_grade.completed_milestones)
|
| 61 |
+
|
| 62 |
+
@classmethod
|
| 63 |
+
def _build_initial_state(cls, task: SupportTaskSpec, episode_id: str) -> SupportDeskState:
|
| 64 |
+
initial_case = SupportCaseProgress()
|
| 65 |
+
initial_grade = grade_case(task, initial_case)
|
| 66 |
+
return SupportDeskState(
|
| 67 |
+
episode_id=episode_id,
|
| 68 |
+
task_id=task.task_id,
|
| 69 |
+
difficulty=task.difficulty,
|
| 70 |
+
step_count=0,
|
| 71 |
+
reward=0.0,
|
| 72 |
+
done=False,
|
| 73 |
+
current_score=initial_grade.total_score,
|
| 74 |
+
max_steps=task.max_steps,
|
| 75 |
+
case=initial_case,
|
| 76 |
+
current_sla_minutes_remaining=task.ticket.sla_minutes_remaining,
|
| 77 |
+
workflow_stage="intake",
|
| 78 |
+
required_next_actions=["classify"],
|
| 79 |
+
risk_flags=[],
|
| 80 |
+
action_history=[],
|
| 81 |
+
completed_milestones=list(initial_grade.completed_milestones),
|
| 82 |
+
last_feedback="New case loaded. Review the ticket and policy snippets before acting.",
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
@classmethod
|
| 86 |
+
def _extract_episode_id(cls, episode_id: str | None = None, **kwargs) -> str | None:
|
| 87 |
+
if episode_id:
|
| 88 |
+
return episode_id
|
| 89 |
+
for key in ("episode_id", "request_id"):
|
| 90 |
+
value = kwargs.get(key)
|
| 91 |
+
if isinstance(value, str) and value:
|
| 92 |
+
return value
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
def _load_episode(self, episode_id: str | None = None, **kwargs) -> None:
|
| 96 |
+
resolved_episode_id = self._extract_episode_id(episode_id, **kwargs) or self.__class__._latest_episode_id
|
| 97 |
+
if not resolved_episode_id:
|
| 98 |
+
return
|
| 99 |
+
|
| 100 |
+
episode_state = self.__class__._episode_store.get(resolved_episode_id)
|
| 101 |
+
if episode_state is None:
|
| 102 |
+
raise ValueError(
|
| 103 |
+
f"Unknown episode_id '{resolved_episode_id}'. Call reset() first or provide a valid episode_id."
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
task = get_task(self.__class__._episode_task_ids.get(resolved_episode_id, episode_state.task_id))
|
| 107 |
+
self.task = task
|
| 108 |
+
self._max_steps = episode_state.max_steps
|
| 109 |
+
self._step_count = episode_state.step_count
|
| 110 |
+
self._reward_total = episode_state.reward
|
| 111 |
+
self._done = episode_state.done
|
| 112 |
+
self._last_feedback = episode_state.last_feedback
|
| 113 |
+
self._history = [entry.model_copy(deep=True) for entry in episode_state.action_history]
|
| 114 |
+
self._case = episode_state.case.model_copy(deep=True)
|
| 115 |
+
self._episode_id = resolved_episode_id
|
| 116 |
+
self._score = episode_state.current_score
|
| 117 |
+
self._completed_milestones = list(episode_state.completed_milestones)
|
| 118 |
+
self._current_sla_minutes_remaining = episode_state.current_sla_minutes_remaining
|
| 119 |
+
|
| 120 |
+
def _persist_episode(self) -> None:
|
| 121 |
+
if self._episode_id is None:
|
| 122 |
+
return
|
| 123 |
+
self.__class__._episode_store[self._episode_id] = SupportDeskState(
|
| 124 |
+
episode_id=self._episode_id,
|
| 125 |
+
task_id=self.task.task_id,
|
| 126 |
+
difficulty=self.task.difficulty,
|
| 127 |
+
step_count=self._step_count,
|
| 128 |
+
reward=round(self._reward_total, 4),
|
| 129 |
+
done=self._done,
|
| 130 |
+
current_score=round(self._score, 4),
|
| 131 |
+
max_steps=self._max_steps,
|
| 132 |
+
case=self._case.model_copy(deep=True),
|
| 133 |
+
current_sla_minutes_remaining=self._current_sla_minutes_remaining,
|
| 134 |
+
workflow_stage=self._workflow_stage(),
|
| 135 |
+
required_next_actions=self._required_next_actions(),
|
| 136 |
+
risk_flags=self._risk_flags(),
|
| 137 |
+
action_history=[entry.model_copy(deep=True) for entry in self._history],
|
| 138 |
+
completed_milestones=list(self._completed_milestones),
|
| 139 |
+
last_feedback=self._last_feedback,
|
| 140 |
+
)
|
| 141 |
+
self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
|
| 142 |
+
self.__class__._latest_episode_id = self._episode_id
|
| 143 |
+
|
| 144 |
+
@property
|
| 145 |
+
def state(self) -> SupportDeskState:
|
| 146 |
+
with self.__class__._state_lock:
|
| 147 |
+
self._load_episode()
|
| 148 |
+
return SupportDeskState(
|
| 149 |
+
episode_id=self._episode_id,
|
| 150 |
+
task_id=self.task.task_id,
|
| 151 |
+
difficulty=self.task.difficulty,
|
| 152 |
+
step_count=self._step_count,
|
| 153 |
+
reward=round(self._reward_total, 4),
|
| 154 |
+
done=self._done,
|
| 155 |
+
current_score=round(self._score, 4),
|
| 156 |
+
max_steps=self._max_steps,
|
| 157 |
+
case=self._case.model_copy(deep=True),
|
| 158 |
+
current_sla_minutes_remaining=self._current_sla_minutes_remaining,
|
| 159 |
+
workflow_stage=self._workflow_stage(),
|
| 160 |
+
required_next_actions=self._required_next_actions(),
|
| 161 |
+
risk_flags=self._risk_flags(),
|
| 162 |
+
action_history=[entry.model_copy(deep=True) for entry in self._history],
|
| 163 |
+
completed_milestones=list(self._completed_milestones),
|
| 164 |
+
last_feedback=self._last_feedback,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
def reset(
|
| 168 |
+
self,
|
| 169 |
+
seed: int | None = None,
|
| 170 |
+
episode_id: str | None = None,
|
| 171 |
+
**kwargs,
|
| 172 |
+
) -> SupportDeskObservation:
|
| 173 |
+
with self.__class__._state_lock:
|
| 174 |
+
if not self._explicit_task_id:
|
| 175 |
+
task_ids = list_task_ids()
|
| 176 |
+
next_task_id = task_ids[self.__class__._shared_reset_counter % len(task_ids)]
|
| 177 |
+
self.__class__._shared_reset_counter += 1
|
| 178 |
+
self.task = get_task(next_task_id)
|
| 179 |
+
self._max_steps = self.task.max_steps
|
| 180 |
+
self._episode_id = episode_id or f"{self.task.task_id}-{uuid.uuid4().hex[:8]}"
|
| 181 |
+
initial_state = self.__class__._build_initial_state(self.task, self._episode_id)
|
| 182 |
+
self.__class__._episode_store[self._episode_id] = initial_state
|
| 183 |
+
self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
|
| 184 |
+
self.__class__._latest_episode_id = self._episode_id
|
| 185 |
+
self._load_episode(self._episode_id)
|
| 186 |
+
return self._build_observation(reward=0.0, done=False)
|
| 187 |
+
|
| 188 |
+
def step(
|
| 189 |
+
self,
|
| 190 |
+
action: SupportDeskAction,
|
| 191 |
+
timeout_s: float | None = None,
|
| 192 |
+
episode_id: str | None = None,
|
| 193 |
+
**kwargs,
|
| 194 |
+
) -> SupportDeskObservation:
|
| 195 |
+
with self.__class__._state_lock:
|
| 196 |
+
self._load_episode(episode_id, **kwargs)
|
| 197 |
+
|
| 198 |
+
if self._done:
|
| 199 |
+
return self._build_observation(
|
| 200 |
+
reward=-0.05,
|
| 201 |
+
done=True,
|
| 202 |
+
feedback="Episode already finished. Call reset() before taking more actions.",
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
previous_grade = grade_case(self.task, self._case)
|
| 206 |
+
previous_stage = self._workflow_stage()
|
| 207 |
+
self._apply_action(action)
|
| 208 |
+
self._step_count += 1
|
| 209 |
+
self._advance_external_events(action)
|
| 210 |
+
self._degrade_sla()
|
| 211 |
+
|
| 212 |
+
current_grade = grade_case(self.task, self._case)
|
| 213 |
+
reward = current_grade.total_score - previous_grade.total_score
|
| 214 |
+
reward += self._process_bonus(action, previous_stage, current_grade.total_score)
|
| 215 |
+
reward += self._action_penalty(
|
| 216 |
+
action,
|
| 217 |
+
current_grade.total_score,
|
| 218 |
+
previous_grade.total_score,
|
| 219 |
+
)
|
| 220 |
+
reward = round(reward, 4)
|
| 221 |
+
|
| 222 |
+
self._score = current_grade.total_score
|
| 223 |
+
self._completed_milestones = list(current_grade.completed_milestones)
|
| 224 |
+
|
| 225 |
+
if action.operation == "submit":
|
| 226 |
+
self._done = True
|
| 227 |
+
self._last_feedback = (
|
| 228 |
+
"Case submitted. Final deterministic grade is "
|
| 229 |
+
f"{current_grade.total_score:.2f}."
|
| 230 |
+
)
|
| 231 |
+
elif self._step_count >= self._max_steps:
|
| 232 |
+
self._done = True
|
| 233 |
+
self._last_feedback = (
|
| 234 |
+
f"Reached max steps ({self._max_steps}). Final deterministic grade is "
|
| 235 |
+
f"{current_grade.total_score:.2f}."
|
| 236 |
+
)
|
| 237 |
+
else:
|
| 238 |
+
self._last_feedback = self._build_feedback(current_grade, reward)
|
| 239 |
+
|
| 240 |
+
self._reward_total = round(self._reward_total + reward, 4)
|
| 241 |
+
self._history.append(
|
| 242 |
+
ActionHistoryEntry(
|
| 243 |
+
step=self._step_count,
|
| 244 |
+
operation=action.operation,
|
| 245 |
+
summary=self._summarize_action(action),
|
| 246 |
+
reward_delta=reward,
|
| 247 |
+
)
|
| 248 |
+
)
|
| 249 |
+
self._persist_episode()
|
| 250 |
+
|
| 251 |
+
return self._build_observation(reward=reward, done=self._done)
|
| 252 |
+
|
| 253 |
+
@classmethod
|
| 254 |
+
def state_for_episode(cls, episode_id: str) -> SupportDeskState:
|
| 255 |
+
with cls._state_lock:
|
| 256 |
+
state = cls._episode_store.get(episode_id)
|
| 257 |
+
if state is None:
|
| 258 |
+
raise ValueError(f"Unknown episode_id '{episode_id}'. Call reset() first.")
|
| 259 |
+
return state.model_copy(deep=True)
|
| 260 |
+
|
| 261 |
+
def close(self) -> None:
|
| 262 |
+
"""No-op close hook for compatibility with local scripts."""
|
| 263 |
+
|
| 264 |
+
def get_metadata(self) -> EnvironmentMetadata:
|
| 265 |
+
"""Return richer metadata for docs, validators, and HF Space UI."""
|
| 266 |
+
|
| 267 |
+
readme_path = Path(__file__).resolve().parents[1] / "README.md"
|
| 268 |
+
readme_content = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None
|
| 269 |
+
return EnvironmentMetadata(
|
| 270 |
+
name="supportdesk_env",
|
| 271 |
+
description=(
|
| 272 |
+
"A policy-heavy enterprise operations desk with deterministic grading, delayed "
|
| 273 |
+
"customer follow-ups, SLA pressure, escalation tradeoffs, and sharper cross-functional triage."
|
| 274 |
+
),
|
| 275 |
+
readme_content=readme_content,
|
| 276 |
+
version="0.1.0",
|
| 277 |
+
author="HyperBrick",
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
def _apply_action(self, action: SupportDeskAction) -> None:
|
| 281 |
+
if action.operation == "classify":
|
| 282 |
+
if action.queue is not None:
|
| 283 |
+
self._case.queue = action.queue
|
| 284 |
+
if action.priority is not None:
|
| 285 |
+
self._case.priority = action.priority
|
| 286 |
+
if action.issue_type is not None:
|
| 287 |
+
self._case.issue_type = action.issue_type
|
| 288 |
+
return
|
| 289 |
+
|
| 290 |
+
if action.operation == "request_info":
|
| 291 |
+
if action.requested_fields:
|
| 292 |
+
merged = {item for item in self._case.requested_fields}
|
| 293 |
+
merged.update(action.requested_fields)
|
| 294 |
+
self._case.requested_fields = sorted(merged)
|
| 295 |
+
if self.task.follow_up_outcome != "none" and self._case.customer_follow_up.status == "none":
|
| 296 |
+
self._case.customer_follow_up = CustomerFollowUp(status="pending")
|
| 297 |
+
return
|
| 298 |
+
|
| 299 |
+
if action.operation == "draft_reply":
|
| 300 |
+
if action.reply is not None:
|
| 301 |
+
self._case.reply = action.reply
|
| 302 |
+
return
|
| 303 |
+
|
| 304 |
+
if action.operation == "add_internal_note":
|
| 305 |
+
if action.internal_note is not None:
|
| 306 |
+
self._case.internal_note = action.internal_note
|
| 307 |
+
return
|
| 308 |
+
|
| 309 |
+
if action.operation == "submit":
|
| 310 |
+
if action.status is not None:
|
| 311 |
+
self._case.status = action.status
|
| 312 |
+
if action.resolution_code is not None:
|
| 313 |
+
self._case.resolution_code = action.resolution_code
|
| 314 |
+
|
| 315 |
+
def _advance_external_events(self, action: SupportDeskAction) -> None:
|
| 316 |
+
if self._case.customer_follow_up.status == "pending" and action.operation == "wait":
|
| 317 |
+
self._case.customer_follow_up = CustomerFollowUp(
|
| 318 |
+
status=self.task.follow_up_outcome,
|
| 319 |
+
message=self.task.follow_up_message or None,
|
| 320 |
+
provided_fields=list(self.task.follow_up_provided_fields),
|
| 321 |
+
wrong_fields=list(self.task.follow_up_wrong_fields),
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
def _degrade_sla(self) -> None:
|
| 325 |
+
if self._current_sla_minutes_remaining is None:
|
| 326 |
+
return
|
| 327 |
+
self._current_sla_minutes_remaining = max(
|
| 328 |
+
0,
|
| 329 |
+
self._current_sla_minutes_remaining - self.task.sla_step_cost,
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
def _action_penalty(
|
| 333 |
+
self,
|
| 334 |
+
action: SupportDeskAction,
|
| 335 |
+
current_score: float,
|
| 336 |
+
previous_score: float,
|
| 337 |
+
) -> float:
|
| 338 |
+
penalty = 0.0
|
| 339 |
+
if current_score <= previous_score:
|
| 340 |
+
penalty -= 0.03
|
| 341 |
+
penalty -= self._mixed_action_penalty(action)
|
| 342 |
+
penalty -= self._escalation_tradeoff_penalty()
|
| 343 |
+
if action.operation == "draft_reply" and not action.reply:
|
| 344 |
+
penalty -= 0.03
|
| 345 |
+
if action.operation == "request_info" and not action.requested_fields:
|
| 346 |
+
penalty -= 0.03
|
| 347 |
+
if action.operation == "add_internal_note" and not action.internal_note:
|
| 348 |
+
penalty -= 0.03
|
| 349 |
+
if action.operation == "classify" and not any(
|
| 350 |
+
[action.queue, action.priority, action.issue_type, action.status, action.resolution_code]
|
| 351 |
+
):
|
| 352 |
+
penalty -= 0.03
|
| 353 |
+
if action.operation == "wait" and self._case.customer_follow_up.status != "pending":
|
| 354 |
+
penalty -= 0.02
|
| 355 |
+
if action.operation == "submit" and self._required_next_actions():
|
| 356 |
+
penalty -= 0.08
|
| 357 |
+
if (
|
| 358 |
+
self.task.under_escalation_deadline_step is not None
|
| 359 |
+
and self._step_count >= self.task.under_escalation_deadline_step
|
| 360 |
+
and (self._case.queue != self.task.gold_queue or self._case.priority != self.task.gold_priority)
|
| 361 |
+
):
|
| 362 |
+
penalty -= 0.04
|
| 363 |
+
if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 15:
|
| 364 |
+
penalty -= 0.02
|
| 365 |
+
return round(penalty, 4)
|
| 366 |
+
|
| 367 |
+
def _build_feedback(self, grade, reward: float) -> str:
|
| 368 |
+
return (
|
| 369 |
+
f"Reward delta {reward:+.2f}. Current score {grade.total_score:.2f}. "
|
| 370 |
+
f"SLA remaining: {self._current_sla_minutes_remaining if self._current_sla_minutes_remaining is not None else 'n/a'} minutes. "
|
| 371 |
+
f"Stage: {self._workflow_stage()}. "
|
| 372 |
+
f"Customer follow-up: {self._case.customer_follow_up.status}. "
|
| 373 |
+
f"Next actions: {', '.join(self._required_next_actions()) or 'none'}. "
|
| 374 |
+
f"Completed milestones: {', '.join(grade.completed_milestones) or 'none yet'}."
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
def _summarize_action(self, action: SupportDeskAction) -> str:
|
| 378 |
+
parts = [action.operation]
|
| 379 |
+
if action.queue:
|
| 380 |
+
parts.append(f"queue={action.queue}")
|
| 381 |
+
if action.priority:
|
| 382 |
+
parts.append(f"priority={action.priority}")
|
| 383 |
+
if action.issue_type:
|
| 384 |
+
parts.append(f"issue_type={action.issue_type}")
|
| 385 |
+
if action.status:
|
| 386 |
+
parts.append(f"status={action.status}")
|
| 387 |
+
if action.resolution_code:
|
| 388 |
+
parts.append(f"resolution={action.resolution_code}")
|
| 389 |
+
if action.requested_fields:
|
| 390 |
+
parts.append(f"requested={','.join(action.requested_fields)}")
|
| 391 |
+
if action.reply:
|
| 392 |
+
parts.append("reply=yes")
|
| 393 |
+
if action.internal_note:
|
| 394 |
+
parts.append("note=yes")
|
| 395 |
+
return " | ".join(parts)
|
| 396 |
+
|
| 397 |
+
def _build_observation(
|
| 398 |
+
self,
|
| 399 |
+
reward: float,
|
| 400 |
+
done: bool,
|
| 401 |
+
feedback: str | None = None,
|
| 402 |
+
) -> SupportDeskObservation:
|
| 403 |
+
return SupportDeskObservation(
|
| 404 |
+
task_id=self.task.task_id,
|
| 405 |
+
difficulty=self.task.difficulty,
|
| 406 |
+
objective=self.task.objective,
|
| 407 |
+
ticket=self.task.ticket,
|
| 408 |
+
knowledge_base=list(self.task.knowledge_base),
|
| 409 |
+
available_queues=list(ALL_QUEUES),
|
| 410 |
+
available_priorities=list(ALL_PRIORITIES),
|
| 411 |
+
available_statuses=list(ALL_STATUSES),
|
| 412 |
+
available_issue_types=list(ALL_ISSUE_TYPES),
|
| 413 |
+
case=self._case.model_copy(deep=True),
|
| 414 |
+
current_sla_minutes_remaining=self._current_sla_minutes_remaining,
|
| 415 |
+
workflow_stage=self._workflow_stage(),
|
| 416 |
+
required_next_actions=self._required_next_actions(),
|
| 417 |
+
risk_flags=self._risk_flags(),
|
| 418 |
+
action_history=[entry.model_copy(deep=True) for entry in self._history],
|
| 419 |
+
feedback=feedback or self._last_feedback,
|
| 420 |
+
remaining_steps=max(self._max_steps - self._step_count, 0),
|
| 421 |
+
reward=reward,
|
| 422 |
+
done=done,
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
def _workflow_stage(self) -> str:
|
| 426 |
+
if self._done:
|
| 427 |
+
return "closed"
|
| 428 |
+
if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
|
| 429 |
+
return "intake"
|
| 430 |
+
if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
|
| 431 |
+
return "verification"
|
| 432 |
+
if self._case.customer_follow_up.status == "pending":
|
| 433 |
+
return "awaiting_customer"
|
| 434 |
+
if self._case.customer_follow_up.status in {"partial", "incorrect"}:
|
| 435 |
+
return "follow_up_review"
|
| 436 |
+
if not self._case.reply:
|
| 437 |
+
return "customer_communication"
|
| 438 |
+
if not self._case.internal_note:
|
| 439 |
+
return "internal_handoff"
|
| 440 |
+
if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
|
| 441 |
+
return "final_resolution"
|
| 442 |
+
return "ready_to_submit"
|
| 443 |
+
|
| 444 |
+
def _required_next_actions(self) -> list[str]:
|
| 445 |
+
if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
|
| 446 |
+
return ["classify"]
|
| 447 |
+
if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
|
| 448 |
+
return ["request_info"]
|
| 449 |
+
if self._case.customer_follow_up.status == "pending":
|
| 450 |
+
return ["wait"]
|
| 451 |
+
needed: list[str] = []
|
| 452 |
+
if not self._case.reply:
|
| 453 |
+
needed.append("draft_reply")
|
| 454 |
+
if not self._case.internal_note:
|
| 455 |
+
needed.append("add_internal_note")
|
| 456 |
+
if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
|
| 457 |
+
needed.append("submit")
|
| 458 |
+
return needed
|
| 459 |
+
|
| 460 |
+
def _risk_flags(self) -> list[str]:
|
| 461 |
+
flags = list(self.task.risk_flags)
|
| 462 |
+
if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 30:
|
| 463 |
+
flags.append("sla_breach_risk")
|
| 464 |
+
if self.task.ticket.affected_users and self.task.ticket.affected_users >= 1000:
|
| 465 |
+
flags.append("high_customer_impact")
|
| 466 |
+
if self.task.ticket.secondary_concerns:
|
| 467 |
+
flags.append("secondary_issue_present")
|
| 468 |
+
if self._case.customer_follow_up.status == "partial":
|
| 469 |
+
flags.append("customer_reply_incomplete")
|
| 470 |
+
if self._case.customer_follow_up.status == "incorrect":
|
| 471 |
+
flags.append("customer_reply_irrelevant")
|
| 472 |
+
return sorted(set(flags))
|
| 473 |
+
|
| 474 |
+
def _process_bonus(
|
| 475 |
+
self,
|
| 476 |
+
action: SupportDeskAction,
|
| 477 |
+
previous_stage: str,
|
| 478 |
+
current_score: float,
|
| 479 |
+
) -> float:
|
| 480 |
+
bonus = 0.0
|
| 481 |
+
stage_rank = {
|
| 482 |
+
"intake": 0,
|
| 483 |
+
"verification": 1,
|
| 484 |
+
"awaiting_customer": 2,
|
| 485 |
+
"follow_up_review": 3,
|
| 486 |
+
"customer_communication": 4,
|
| 487 |
+
"internal_handoff": 5,
|
| 488 |
+
"final_resolution": 6,
|
| 489 |
+
"ready_to_submit": 7,
|
| 490 |
+
"closed": 8,
|
| 491 |
+
}
|
| 492 |
+
current_stage = self._workflow_stage()
|
| 493 |
+
if stage_rank.get(current_stage, 0) > stage_rank.get(previous_stage, 0):
|
| 494 |
+
bonus += 0.02
|
| 495 |
+
if action.operation == "classify" and self._step_count == 1:
|
| 496 |
+
if self._case.queue == self.task.gold_queue and self._case.priority == self.task.gold_priority:
|
| 497 |
+
bonus += 0.03
|
| 498 |
+
if action.operation == "request_info" and current_score > 0 and self.task.required_requested_fields:
|
| 499 |
+
bonus += 0.02
|
| 500 |
+
if action.operation == "wait" and self._case.customer_follow_up.status in {"partial", "complete", "incorrect"}:
|
| 501 |
+
bonus += 0.02
|
| 502 |
+
if action.operation == "submit" and not self._required_next_actions():
|
| 503 |
+
bonus += 0.03
|
| 504 |
+
if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining > 0:
|
| 505 |
+
if self.task.gold_priority == "urgent" and self._step_count <= 2 and self._case.queue == self.task.gold_queue:
|
| 506 |
+
bonus += 0.02
|
| 507 |
+
return round(bonus, 4)
|
| 508 |
+
|
| 509 |
+
def _mixed_action_penalty(self, action: SupportDeskAction) -> float:
|
| 510 |
+
allowed_fields = {
|
| 511 |
+
"classify": {"queue", "priority", "issue_type"},
|
| 512 |
+
"request_info": {"requested_fields"},
|
| 513 |
+
"draft_reply": {"reply"},
|
| 514 |
+
"add_internal_note": {"internal_note"},
|
| 515 |
+
"submit": {"status", "resolution_code"},
|
| 516 |
+
"wait": set(),
|
| 517 |
+
}
|
| 518 |
+
populated_fields = {
|
| 519 |
+
"queue": action.queue,
|
| 520 |
+
"priority": action.priority,
|
| 521 |
+
"issue_type": action.issue_type,
|
| 522 |
+
"status": action.status,
|
| 523 |
+
"resolution_code": action.resolution_code,
|
| 524 |
+
"requested_fields": action.requested_fields,
|
| 525 |
+
"reply": action.reply,
|
| 526 |
+
"internal_note": action.internal_note,
|
| 527 |
+
}
|
| 528 |
+
extras = 0
|
| 529 |
+
for field_name, value in populated_fields.items():
|
| 530 |
+
if field_name in allowed_fields[action.operation]:
|
| 531 |
+
continue
|
| 532 |
+
if value is None:
|
| 533 |
+
continue
|
| 534 |
+
if isinstance(value, list) and not value:
|
| 535 |
+
continue
|
| 536 |
+
if isinstance(value, str) and not value:
|
| 537 |
+
continue
|
| 538 |
+
extras += 1
|
| 539 |
+
return min(0.06, extras * 0.02)
|
| 540 |
+
|
| 541 |
+
def _escalation_tradeoff_penalty(self) -> float:
|
| 542 |
+
penalty = 0.0
|
| 543 |
+
if self._case.queue in self.task.over_escalation_queues and self._case.queue != self.task.gold_queue:
|
| 544 |
+
penalty += 0.06
|
| 545 |
+
return round(penalty, 4)
|
tasks.py
CHANGED
|
@@ -1,3 +1,405 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Task registry for the SupportDesk environment."""
|
| 2 |
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
from typing import Literal
|
| 7 |
+
|
| 8 |
+
from models import KnowledgeSnippet, SupportTicket
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
ALL_QUEUES = [
|
| 12 |
+
"billing_ops",
|
| 13 |
+
"trust_and_safety",
|
| 14 |
+
"platform_engineering",
|
| 15 |
+
"compliance_ops",
|
| 16 |
+
"general_support",
|
| 17 |
+
]
|
| 18 |
+
ALL_PRIORITIES = ["low", "normal", "high", "urgent"]
|
| 19 |
+
ALL_STATUSES = ["new", "waiting_on_customer", "resolved", "escalated"]
|
| 20 |
+
ALL_ISSUE_TYPES = [
|
| 21 |
+
"duplicate_charge",
|
| 22 |
+
"account_compromise",
|
| 23 |
+
"production_incident",
|
| 24 |
+
"regulated_exception",
|
| 25 |
+
"general_question",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass(frozen=True)
|
| 30 |
+
class SupportTaskSpec:
|
| 31 |
+
"""Immutable definition of a single support triage task."""
|
| 32 |
+
|
| 33 |
+
task_id: str
|
| 34 |
+
difficulty: Literal["easy", "medium", "hard"]
|
| 35 |
+
title: str
|
| 36 |
+
objective: str
|
| 37 |
+
ticket: SupportTicket
|
| 38 |
+
knowledge_base: tuple[KnowledgeSnippet, ...]
|
| 39 |
+
gold_queue: str
|
| 40 |
+
gold_priority: str
|
| 41 |
+
gold_issue_type: str
|
| 42 |
+
gold_status: str
|
| 43 |
+
gold_resolution_code: str
|
| 44 |
+
required_requested_fields: tuple[str, ...]
|
| 45 |
+
required_reply_markers: tuple[tuple[str, ...], ...]
|
| 46 |
+
required_note_markers: tuple[tuple[str, ...], ...]
|
| 47 |
+
forbidden_reply_markers: tuple[str, ...] = ()
|
| 48 |
+
risk_flags: tuple[str, ...] = ()
|
| 49 |
+
follow_up_outcome: Literal["none", "partial", "complete", "incorrect"] = "none"
|
| 50 |
+
follow_up_message: str = ""
|
| 51 |
+
follow_up_provided_fields: tuple[str, ...] = ()
|
| 52 |
+
follow_up_wrong_fields: tuple[str, ...] = ()
|
| 53 |
+
sla_step_cost: int = 15
|
| 54 |
+
over_escalation_queues: tuple[str, ...] = ()
|
| 55 |
+
under_escalation_deadline_step: int | None = None
|
| 56 |
+
max_steps: int = 6
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
TASKS: dict[str, SupportTaskSpec] = {
|
| 60 |
+
"billing_refund_easy": SupportTaskSpec(
|
| 61 |
+
task_id="billing_refund_easy",
|
| 62 |
+
difficulty="easy",
|
| 63 |
+
title="Duplicate charge refund triage",
|
| 64 |
+
objective=(
|
| 65 |
+
"Triage a duplicate-charge billing ticket, send the correct customer response, "
|
| 66 |
+
"and close the case only if no further customer information is required."
|
| 67 |
+
),
|
| 68 |
+
ticket=SupportTicket(
|
| 69 |
+
customer_name="Riya Shah",
|
| 70 |
+
customer_tier="pro",
|
| 71 |
+
company="PixelNorth Studio",
|
| 72 |
+
subject="Charged twice after I canceled",
|
| 73 |
+
body=(
|
| 74 |
+
"I canceled our Pro annual workspace yesterday, but my card was charged again "
|
| 75 |
+
"this morning and I still see the old invoice. We only had one workspace, "
|
| 76 |
+
"so this looks like a duplicate charge. Please fix it quickly."
|
| 77 |
+
),
|
| 78 |
+
region="ap-south-1",
|
| 79 |
+
affected_users=12,
|
| 80 |
+
sla_minutes_remaining=240,
|
| 81 |
+
business_impact="Finance ops are blocked from closing the monthly books until the duplicate invoice is fixed.",
|
| 82 |
+
secondary_concerns=["The customer also wants confirmation that the canceled workspace will stay deactivated."],
|
| 83 |
+
attachments=["invoice_7741.pdf"],
|
| 84 |
+
),
|
| 85 |
+
knowledge_base=(
|
| 86 |
+
KnowledgeSnippet(
|
| 87 |
+
article_id="KB-101",
|
| 88 |
+
title="Duplicate charges and same-day cancellations",
|
| 89 |
+
content=(
|
| 90 |
+
"If a customer reports a duplicate charge and the subscription is already "
|
| 91 |
+
"canceled, route the ticket to billing_ops with high priority. Billing can "
|
| 92 |
+
"approve the refund immediately without requesting extra information when an "
|
| 93 |
+
"invoice is attached."
|
| 94 |
+
),
|
| 95 |
+
),
|
| 96 |
+
KnowledgeSnippet(
|
| 97 |
+
article_id="KB-102",
|
| 98 |
+
title="Refund communication checklist",
|
| 99 |
+
content=(
|
| 100 |
+
"Customer replies for approved duplicate-charge refunds must confirm that a "
|
| 101 |
+
"refund is being processed, mention the duplicate charge, and set the "
|
| 102 |
+
"expectation that funds typically appear within 5-7 business days."
|
| 103 |
+
),
|
| 104 |
+
),
|
| 105 |
+
KnowledgeSnippet(
|
| 106 |
+
article_id="KB-103",
|
| 107 |
+
title="When to close a billing case",
|
| 108 |
+
content=(
|
| 109 |
+
"Close the case as resolved only after the refund path is clear and no more "
|
| 110 |
+
"customer details are needed."
|
| 111 |
+
),
|
| 112 |
+
),
|
| 113 |
+
),
|
| 114 |
+
gold_queue="billing_ops",
|
| 115 |
+
gold_priority="high",
|
| 116 |
+
gold_issue_type="duplicate_charge",
|
| 117 |
+
gold_status="resolved",
|
| 118 |
+
gold_resolution_code="refund_approved",
|
| 119 |
+
required_requested_fields=(),
|
| 120 |
+
required_reply_markers=(
|
| 121 |
+
("refund", "refunded", "reimburse"),
|
| 122 |
+
("duplicate charge", "charged twice", "double charge"),
|
| 123 |
+
("5-7 business days", "5 to 7 business days", "within 7 business days"),
|
| 124 |
+
),
|
| 125 |
+
required_note_markers=(
|
| 126 |
+
("duplicate charge", "double charge"),
|
| 127 |
+
("refund", "refund approved"),
|
| 128 |
+
),
|
| 129 |
+
forbidden_reply_markers=("chargeback", "security team"),
|
| 130 |
+
risk_flags=("finance_close_risk", "avoid_unnecessary_back_and_forth"),
|
| 131 |
+
over_escalation_queues=("trust_and_safety", "platform_engineering", "compliance_ops"),
|
| 132 |
+
sla_step_cost=10,
|
| 133 |
+
max_steps=6,
|
| 134 |
+
),
|
| 135 |
+
"account_takeover_medium": SupportTaskSpec(
|
| 136 |
+
task_id="account_takeover_medium",
|
| 137 |
+
difficulty="medium",
|
| 138 |
+
title="Suspicious login recovery triage",
|
| 139 |
+
objective=(
|
| 140 |
+
"Handle a potential account-compromise case, request the missing verification "
|
| 141 |
+
"details, communicate safe next steps, and keep the case open until the customer replies. "
|
| 142 |
+
"The agent must protect account safety without promising an unsafe immediate unlock."
|
| 143 |
+
),
|
| 144 |
+
ticket=SupportTicket(
|
| 145 |
+
customer_name="Marcus Lee",
|
| 146 |
+
customer_tier="pro",
|
| 147 |
+
company="Northline Analytics",
|
| 148 |
+
subject="Locked out after strange login alert",
|
| 149 |
+
body=(
|
| 150 |
+
"Our workspace admin got a login alert from a country none of us have visited, "
|
| 151 |
+
"and now I can't get back into the account. Please unlock it ASAP. The billing "
|
| 152 |
+
"email is still ours, but I'm worried someone got in."
|
| 153 |
+
),
|
| 154 |
+
region="us-east-1",
|
| 155 |
+
affected_users=34,
|
| 156 |
+
sla_minutes_remaining=90,
|
| 157 |
+
business_impact="The admin is locked out of the analytics workspace ahead of the Monday executive review.",
|
| 158 |
+
secondary_concerns=["The customer wants the account unlocked immediately, but the verification flow cannot be skipped."],
|
| 159 |
+
attachments=[],
|
| 160 |
+
),
|
| 161 |
+
knowledge_base=(
|
| 162 |
+
KnowledgeSnippet(
|
| 163 |
+
article_id="SEC-201",
|
| 164 |
+
title="Account compromise routing",
|
| 165 |
+
content=(
|
| 166 |
+
"Potential account-takeover reports route to trust_and_safety with urgent "
|
| 167 |
+
"priority. Do not resolve the case immediately."
|
| 168 |
+
),
|
| 169 |
+
),
|
| 170 |
+
KnowledgeSnippet(
|
| 171 |
+
article_id="SEC-202",
|
| 172 |
+
title="Verification details before unlock",
|
| 173 |
+
content=(
|
| 174 |
+
"Before access can be restored, ask the customer for the workspace_id, the "
|
| 175 |
+
"last successful login time, and the billing email on file. Keep the status "
|
| 176 |
+
"waiting_on_customer until the details arrive."
|
| 177 |
+
),
|
| 178 |
+
),
|
| 179 |
+
KnowledgeSnippet(
|
| 180 |
+
article_id="SEC-203",
|
| 181 |
+
title="Customer response checklist",
|
| 182 |
+
content=(
|
| 183 |
+
"Security replies should tell the customer to reset their password, scan "
|
| 184 |
+
"their device for malware, and explain that the trust team is reviewing the case."
|
| 185 |
+
),
|
| 186 |
+
),
|
| 187 |
+
),
|
| 188 |
+
gold_queue="trust_and_safety",
|
| 189 |
+
gold_priority="urgent",
|
| 190 |
+
gold_issue_type="account_compromise",
|
| 191 |
+
gold_status="waiting_on_customer",
|
| 192 |
+
gold_resolution_code="verification_needed",
|
| 193 |
+
required_requested_fields=("workspace_id", "last_successful_login", "billing_email"),
|
| 194 |
+
required_reply_markers=(
|
| 195 |
+
("reset your password", "change your password"),
|
| 196 |
+
("scan", "malware", "device check"),
|
| 197 |
+
("trust team", "security team", "trust and safety"),
|
| 198 |
+
),
|
| 199 |
+
required_note_markers=(
|
| 200 |
+
("suspicious login", "strange login"),
|
| 201 |
+
("locked out", "can't get back", "cannot get back"),
|
| 202 |
+
),
|
| 203 |
+
risk_flags=("unsafe_unlock_request", "identity_verification_required"),
|
| 204 |
+
follow_up_outcome="partial",
|
| 205 |
+
follow_up_message=(
|
| 206 |
+
"Customer follow-up: workspace_id=ws_9021 and billing email confirmed, "
|
| 207 |
+
"but they could not provide the last successful login time yet."
|
| 208 |
+
),
|
| 209 |
+
follow_up_provided_fields=("workspace_id", "billing_email"),
|
| 210 |
+
sla_step_cost=18,
|
| 211 |
+
under_escalation_deadline_step=2,
|
| 212 |
+
max_steps=7,
|
| 213 |
+
),
|
| 214 |
+
"api_incident_hard": SupportTaskSpec(
|
| 215 |
+
task_id="api_incident_hard",
|
| 216 |
+
difficulty="hard",
|
| 217 |
+
title="Production API incident escalation",
|
| 218 |
+
objective=(
|
| 219 |
+
"Triage a high-pressure enterprise incident, ask for the right diagnostics, notify "
|
| 220 |
+
"the customer that engineering is engaged, and escalate instead of resolving. "
|
| 221 |
+
"The agent must prioritize the outage over a tempting secondary compliance question."
|
| 222 |
+
),
|
| 223 |
+
ticket=SupportTicket(
|
| 224 |
+
customer_name="Asha Verma",
|
| 225 |
+
customer_tier="enterprise",
|
| 226 |
+
company="Kairo Health",
|
| 227 |
+
subject="EU rollout blocked by intermittent 500s",
|
| 228 |
+
body=(
|
| 229 |
+
"We're launching our EU workspace tonight. Since enabling EU data residency we "
|
| 230 |
+
"see intermittent HTTP 500 responses from /v1/exports in production. Our "
|
| 231 |
+
"compliance lead is also asking whether this affects the audit trail, but the "
|
| 232 |
+
"main issue is the outage. We need help immediately."
|
| 233 |
+
),
|
| 234 |
+
region="eu-west-1",
|
| 235 |
+
affected_users=1800,
|
| 236 |
+
sla_minutes_remaining=25,
|
| 237 |
+
business_impact="A production launch and a customer-facing compliance review are both at risk tonight if the outage persists.",
|
| 238 |
+
secondary_concerns=["The compliance lead is asking whether audit trails are affected, but the live outage is the primary incident."],
|
| 239 |
+
attachments=["error_screenshot.png"],
|
| 240 |
+
),
|
| 241 |
+
knowledge_base=(
|
| 242 |
+
KnowledgeSnippet(
|
| 243 |
+
article_id="INC-301",
|
| 244 |
+
title="Production availability incidents",
|
| 245 |
+
content=(
|
| 246 |
+
"Any active production 5xx incident for a paying customer routes to "
|
| 247 |
+
"platform_engineering with urgent priority and should be escalated, not resolved."
|
| 248 |
+
),
|
| 249 |
+
),
|
| 250 |
+
KnowledgeSnippet(
|
| 251 |
+
article_id="INC-302",
|
| 252 |
+
title="Minimum diagnostics for API incidents",
|
| 253 |
+
content=(
|
| 254 |
+
"Before engineering can investigate, request concrete examples including "
|
| 255 |
+
"request_ids, UTC timestamps, and the affected region."
|
| 256 |
+
),
|
| 257 |
+
),
|
| 258 |
+
KnowledgeSnippet(
|
| 259 |
+
article_id="INC-303",
|
| 260 |
+
title="Customer communication during an incident",
|
| 261 |
+
content=(
|
| 262 |
+
"The reply should acknowledge an incident, say the on-call engineering team "
|
| 263 |
+
"is engaged, and ask for the diagnostics needed to speed investigation."
|
| 264 |
+
),
|
| 265 |
+
),
|
| 266 |
+
KnowledgeSnippet(
|
| 267 |
+
article_id="INC-304",
|
| 268 |
+
title="Primary issue triage rule",
|
| 269 |
+
content=(
|
| 270 |
+
"When a production outage appears alongside a secondary compliance or audit "
|
| 271 |
+
"question, resolve the live outage first and avoid treating the secondary "
|
| 272 |
+
"question as the primary queue-driving issue."
|
| 273 |
+
),
|
| 274 |
+
),
|
| 275 |
+
),
|
| 276 |
+
gold_queue="platform_engineering",
|
| 277 |
+
gold_priority="urgent",
|
| 278 |
+
gold_issue_type="production_incident",
|
| 279 |
+
gold_status="escalated",
|
| 280 |
+
gold_resolution_code="incident_opened",
|
| 281 |
+
required_requested_fields=("request_ids", "timestamp_utc", "region"),
|
| 282 |
+
required_reply_markers=(
|
| 283 |
+
("incident", "outage", "investigating"),
|
| 284 |
+
("on-call", "engineering team", "engineering is engaged"),
|
| 285 |
+
("request id", "request_ids"),
|
| 286 |
+
("utc", "timestamp"),
|
| 287 |
+
),
|
| 288 |
+
required_note_markers=(
|
| 289 |
+
("eu data residency", "eu rollout"),
|
| 290 |
+
("500", "http 500"),
|
| 291 |
+
("launch tonight", "tonight"),
|
| 292 |
+
),
|
| 293 |
+
risk_flags=("sev1_launch_risk", "secondary_issue_distraction", "engineering_escalation_required"),
|
| 294 |
+
follow_up_outcome="complete",
|
| 295 |
+
follow_up_message=(
|
| 296 |
+
"Customer follow-up: request_ids=req_991, req_998; UTC timestamps=2026-04-01T19:20Z, "
|
| 297 |
+
"2026-04-01T19:27Z; region=eu-west-1."
|
| 298 |
+
),
|
| 299 |
+
follow_up_provided_fields=("request_ids", "timestamp_utc", "region"),
|
| 300 |
+
sla_step_cost=25,
|
| 301 |
+
under_escalation_deadline_step=2,
|
| 302 |
+
max_steps=8,
|
| 303 |
+
),
|
| 304 |
+
"regulated_export_exception_hard": SupportTaskSpec(
|
| 305 |
+
task_id="regulated_export_exception_hard",
|
| 306 |
+
difficulty="hard",
|
| 307 |
+
title="Regulated export exception triage",
|
| 308 |
+
objective=(
|
| 309 |
+
"Handle a regulated export-access request where the customer wants a manual bypass. "
|
| 310 |
+
"The agent must avoid escalating to engineering just because an export error is mentioned, "
|
| 311 |
+
"route to compliance operations, request the right legal details, and keep the case open "
|
| 312 |
+
"until the required approvals are supplied."
|
| 313 |
+
),
|
| 314 |
+
ticket=SupportTicket(
|
| 315 |
+
customer_name="Dr. Elena Kovacs",
|
| 316 |
+
customer_tier="enterprise",
|
| 317 |
+
company="NorthSea Clinics",
|
| 318 |
+
subject="Need urgent export unlock before audit review",
|
| 319 |
+
body=(
|
| 320 |
+
"Our audit team needs tonight's export package, but after enabling EU residency the "
|
| 321 |
+
"export workflow now shows a policy block. Can you temporarily disable the restriction "
|
| 322 |
+
"or give us a manual bypass just for this audit? We are under pressure and can share "
|
| 323 |
+
"screenshots if needed."
|
| 324 |
+
),
|
| 325 |
+
region="eu-central-1",
|
| 326 |
+
affected_users=240,
|
| 327 |
+
sla_minutes_remaining=55,
|
| 328 |
+
business_impact="A regulated healthcare audit review may slip tonight if the export path remains blocked.",
|
| 329 |
+
secondary_concerns=[
|
| 330 |
+
"The customer suggests an engineering workaround, but legal approval is the real gating factor."
|
| 331 |
+
],
|
| 332 |
+
attachments=["policy_block.png"],
|
| 333 |
+
),
|
| 334 |
+
knowledge_base=(
|
| 335 |
+
KnowledgeSnippet(
|
| 336 |
+
article_id="CMP-401",
|
| 337 |
+
title="Export policy exceptions",
|
| 338 |
+
content=(
|
| 339 |
+
"Requests to bypass region or export restrictions route to compliance_ops. "
|
| 340 |
+
"Do not promise a manual engineering workaround before approvals are verified."
|
| 341 |
+
),
|
| 342 |
+
),
|
| 343 |
+
KnowledgeSnippet(
|
| 344 |
+
article_id="CMP-402",
|
| 345 |
+
title="Minimum legal details for export review",
|
| 346 |
+
content=(
|
| 347 |
+
"Ask for the tenant_region, dpa_amendment_id, and legal_contact_email before "
|
| 348 |
+
"compliance can review an export exception."
|
| 349 |
+
),
|
| 350 |
+
),
|
| 351 |
+
KnowledgeSnippet(
|
| 352 |
+
article_id="CMP-403",
|
| 353 |
+
title="Customer response rules for regulated exceptions",
|
| 354 |
+
content=(
|
| 355 |
+
"Replies must explain that no temporary bypass can be granted yet, mention the "
|
| 356 |
+
"compliance review, and request the required legal approval details."
|
| 357 |
+
),
|
| 358 |
+
),
|
| 359 |
+
),
|
| 360 |
+
gold_queue="compliance_ops",
|
| 361 |
+
gold_priority="high",
|
| 362 |
+
gold_issue_type="regulated_exception",
|
| 363 |
+
gold_status="waiting_on_customer",
|
| 364 |
+
gold_resolution_code="legal_approval_required",
|
| 365 |
+
required_requested_fields=("tenant_region", "dpa_amendment_id", "legal_contact_email"),
|
| 366 |
+
required_reply_markers=(
|
| 367 |
+
("no temporary bypass", "cannot provide a bypass", "can’t provide a bypass"),
|
| 368 |
+
("compliance review", "compliance team"),
|
| 369 |
+
("tenant_region", "tenant region"),
|
| 370 |
+
("dpa_amendment_id", "dpa amendment", "amendment id"),
|
| 371 |
+
),
|
| 372 |
+
required_note_markers=(
|
| 373 |
+
("audit", "audit review"),
|
| 374 |
+
("eu residency", "policy block"),
|
| 375 |
+
("manual bypass", "workaround"),
|
| 376 |
+
),
|
| 377 |
+
forbidden_reply_markers=("engineering workaround", "disable the restriction", "temporary unlock approved"),
|
| 378 |
+
risk_flags=("regulated_data_risk", "unsafe_shortcut_pressure", "over_escalation_risk"),
|
| 379 |
+
follow_up_outcome="incorrect",
|
| 380 |
+
follow_up_message=(
|
| 381 |
+
"Customer follow-up: sent a screenshot and export job ID, but did not include the DPA "
|
| 382 |
+
"amendment ID or legal contact."
|
| 383 |
+
),
|
| 384 |
+
follow_up_wrong_fields=("screenshot", "job_id"),
|
| 385 |
+
sla_step_cost=16,
|
| 386 |
+
over_escalation_queues=("platform_engineering",),
|
| 387 |
+
max_steps=8,
|
| 388 |
+
),
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
def get_task(task_id: str) -> SupportTaskSpec:
|
| 393 |
+
"""Return a task definition or raise a helpful error."""
|
| 394 |
+
|
| 395 |
+
try:
|
| 396 |
+
return TASKS[task_id]
|
| 397 |
+
except KeyError as exc: # pragma: no cover - defensive
|
| 398 |
+
valid = ", ".join(sorted(TASKS))
|
| 399 |
+
raise ValueError(f"Unknown task_id '{task_id}'. Valid task ids: {valid}") from exc
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def list_task_ids() -> list[str]:
|
| 403 |
+
"""List tasks in a stable evaluation order."""
|
| 404 |
+
|
| 405 |
+
return list(TASKS)
|
tests/test_supportdesk.py
CHANGED
|
@@ -10,10 +10,10 @@ try:
|
|
| 10 |
except RuntimeError:
|
| 11 |
TestClient = None # type: ignore[assignment]
|
| 12 |
|
| 13 |
-
from
|
| 14 |
-
from
|
| 15 |
-
from
|
| 16 |
-
from
|
| 17 |
|
| 18 |
|
| 19 |
def test_all_tasks_are_registered():
|
|
@@ -90,13 +90,13 @@ def test_grade_is_bounded_between_zero_and_one():
|
|
| 90 |
|
| 91 |
|
| 92 |
def test_task_specific_graders_are_importable_and_clamped():
|
| 93 |
-
from
|
| 94 |
AccountTakeoverMediumGrader,
|
| 95 |
ApiIncidentHardGrader,
|
| 96 |
BillingRefundEasyGrader,
|
| 97 |
RegulatedExportExceptionHardGrader,
|
| 98 |
)
|
| 99 |
-
from
|
| 100 |
|
| 101 |
case = SupportCaseProgress()
|
| 102 |
scores = [
|
|
@@ -176,7 +176,7 @@ def test_follow_up_arrives_after_wait():
|
|
| 176 |
|
| 177 |
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
|
| 178 |
def test_http_reset_step_state_are_session_consistent():
|
| 179 |
-
from
|
| 180 |
|
| 181 |
client = TestClient(app)
|
| 182 |
|
|
@@ -219,7 +219,7 @@ def test_http_reset_step_state_are_session_consistent():
|
|
| 219 |
|
| 220 |
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
|
| 221 |
def test_http_explicit_episode_helpers_work():
|
| 222 |
-
from
|
| 223 |
|
| 224 |
client = TestClient(app)
|
| 225 |
|
|
@@ -256,7 +256,7 @@ def test_http_explicit_episode_helpers_work():
|
|
| 256 |
|
| 257 |
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
|
| 258 |
def test_http_tasks_include_truthy_grader_field():
|
| 259 |
-
from
|
| 260 |
|
| 261 |
client = TestClient(app)
|
| 262 |
|
|
|
|
| 10 |
except RuntimeError:
|
| 11 |
TestClient = None # type: ignore[assignment]
|
| 12 |
|
| 13 |
+
from graders import grade_case
|
| 14 |
+
from models import SupportCaseProgress, SupportDeskAction
|
| 15 |
+
from server.supportdesk_environment import SupportDeskEnvironment
|
| 16 |
+
from tasks import get_task, list_task_ids
|
| 17 |
|
| 18 |
|
| 19 |
def test_all_tasks_are_registered():
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
def test_task_specific_graders_are_importable_and_clamped():
|
| 93 |
+
from graders import (
|
| 94 |
AccountTakeoverMediumGrader,
|
| 95 |
ApiIncidentHardGrader,
|
| 96 |
BillingRefundEasyGrader,
|
| 97 |
RegulatedExportExceptionHardGrader,
|
| 98 |
)
|
| 99 |
+
from models import SupportCaseProgress
|
| 100 |
|
| 101 |
case = SupportCaseProgress()
|
| 102 |
scores = [
|
|
|
|
| 176 |
|
| 177 |
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
|
| 178 |
def test_http_reset_step_state_are_session_consistent():
|
| 179 |
+
from server.app import app
|
| 180 |
|
| 181 |
client = TestClient(app)
|
| 182 |
|
|
|
|
| 219 |
|
| 220 |
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
|
| 221 |
def test_http_explicit_episode_helpers_work():
|
| 222 |
+
from server.app import app
|
| 223 |
|
| 224 |
client = TestClient(app)
|
| 225 |
|
|
|
|
| 256 |
|
| 257 |
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
|
| 258 |
def test_http_tasks_include_truthy_grader_field():
|
| 259 |
+
from server.app import app
|
| 260 |
|
| 261 |
client = TestClient(app)
|
| 262 |
|