DevanshuDon commited on
Commit
d09534e
·
verified ·
1 Parent(s): 722231e

Upload 8 files

Browse files
server/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """ExecAssist server package."""
server/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (245 Bytes). View file
 
server/__pycache__/app.cpython-312.pyc ADDED
Binary file (11.4 kB). View file
 
server/__pycache__/data.cpython-312.pyc ADDED
Binary file (20.6 kB). View file
 
server/__pycache__/models.cpython-312.pyc ADDED
Binary file (4.21 kB). View file
 
server/app.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py — Executive Assistant OpenEnv Environment
3
+
4
+ FastAPI server + environment logic for email and calendar management.
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ from pathlib import Path
10
+
11
+ # Ensure server/ directory is on the path
12
+ sys.path.insert(0, str(Path(__file__).parent))
13
+
14
+ from fastapi import FastAPI, HTTPException
15
+ from models import AssistantAction, AssistantObservation, AssistantState
16
+ from typing import Optional
17
+ import statistics
18
+
19
+ # Import scoring functions from data.py (teammate will implement these)
20
+ from data import (
21
+ generate_scenario,
22
+ compute_email_quality,
23
+ check_scheduling_correctness,
24
+ compute_conflict_resolution,
25
+ apply_penalties,
26
+ TASK_DEFINITIONS,
27
+ )
28
+
29
+
30
+ # ============================================================
31
+ # THE ENVIRONMENT CLASS
32
+ # ============================================================
33
+
34
+ class ExecAssistEnv:
35
+ def __init__(self):
36
+ self.current_scenario = None
37
+ self.calendar_state = None
38
+ self.episode_done = False
39
+ self.steps_taken = 0
40
+ self.total_score = 0.0
41
+ self.current_task = None
42
+ self.seed = 42
43
+
44
+ def reset(self, task: str = "easy"):
45
+ """Start a new episode."""
46
+
47
+ if task not in TASK_DEFINITIONS:
48
+ raise ValueError(f"Unknown task: {task}. Choose from: easy, medium, hard")
49
+
50
+ self.current_task = task
51
+ self.episode_done = False
52
+ self.steps_taken = 0
53
+ self.total_score = 0.0
54
+
55
+ # Generate scenario (teammate implements this in data.py)
56
+ self.current_scenario = generate_scenario(task, seed=self.seed)
57
+
58
+ return {
59
+ "observation": self._build_observation(),
60
+ "reward": 0.0,
61
+ "done": False,
62
+ "info": {
63
+ "task": task,
64
+ "scenario_id": self.current_scenario.get("id", "unknown"),
65
+ }
66
+ }
67
+
68
+ def step(self, action: dict):
69
+ """Agent submits action — environment scores it."""
70
+
71
+ if self.episode_done:
72
+ return {
73
+ "observation": {"message": "Episode is done. Call /reset to start again."},
74
+ "reward": 0.0,
75
+ "done": True,
76
+ "info": {"total_score": self.total_score}
77
+ }
78
+
79
+ # Parse action
80
+ try:
81
+ assistant_action = AssistantAction(**action)
82
+ except Exception as e:
83
+ return {
84
+ "observation": {"message": f"Invalid action format: {str(e)}"},
85
+ "reward": -0.5,
86
+ "done": False,
87
+ "info": {"error": "invalid_action_format"}
88
+ }
89
+
90
+ # Validate basic action structure
91
+ if not assistant_action.email_reply or len(assistant_action.email_reply.strip()) == 0:
92
+ return {
93
+ "observation": {"message": "Empty email reply. Penalty applied."},
94
+ "reward": -0.3,
95
+ "done": False,
96
+ "info": {"error": "empty_email_reply"}
97
+ }
98
+
99
+ if assistant_action.calendar_action not in ["book", "propose_alternatives", "reschedule", "decline"]:
100
+ return {
101
+ "observation": {"message": f"Invalid calendar_action: {assistant_action.calendar_action}"},
102
+ "reward": -0.2,
103
+ "done": False,
104
+ "info": {"error": "invalid_calendar_action"}
105
+ }
106
+
107
+ # Compute rewards using teammate's functions
108
+ email_score = compute_email_quality(
109
+ assistant_action.email_reply,
110
+ self.current_scenario
111
+ )
112
+
113
+ # Convert meeting_details to dict if it exists
114
+ meeting_details_dict = assistant_action.meeting_details.dict() if assistant_action.meeting_details else None
115
+
116
+ scheduling_result = check_scheduling_correctness(
117
+ meeting_details_dict,
118
+ self.current_scenario
119
+ )
120
+
121
+ conflict_score = compute_conflict_resolution(
122
+ assistant_action.dict(), # ← Add .dict() here
123
+ self.current_scenario
124
+ )
125
+
126
+
127
+ penalty = apply_penalties(assistant_action.dict(), self.current_scenario)
128
+
129
+ # Combine scores based on task difficulty
130
+ task_def = TASK_DEFINITIONS[self.current_task]
131
+ weights = task_def["reward_weights"]
132
+
133
+ total_reward = (
134
+ weights["email"] * email_score +
135
+ weights["scheduling"] * scheduling_result["score"] +
136
+ weights["conflict"] * conflict_score
137
+ )
138
+
139
+ total_reward = max(0.0, min(1.0, total_reward - penalty))
140
+
141
+ self.total_score = total_reward
142
+ self.episode_done = True
143
+ self.steps_taken += 1
144
+
145
+ return {
146
+ "observation": self._build_completion_message(assistant_action, total_reward),
147
+ "reward": round(total_reward, 4),
148
+ "done": True,
149
+ "info": {
150
+ "email_score": round(email_score, 4),
151
+ "scheduling_score": round(scheduling_result["score"], 4),
152
+ "conflict_score": round(conflict_score, 4),
153
+ "penalty": round(penalty, 4),
154
+ "scheduling_checks": scheduling_result.get("checks", {}),
155
+ }
156
+ }
157
+
158
+ def _build_observation(self) -> dict:
159
+ """Build what the agent sees."""
160
+
161
+ scenario = self.current_scenario
162
+ task_def = TASK_DEFINITIONS[self.current_task]
163
+
164
+ obs = {
165
+ "task": self.current_task,
166
+ "description": task_def["description"],
167
+ "emails": scenario["emails"],
168
+ "calendar": scenario["calendar"],
169
+ "contacts": scenario.get("contacts", {}),
170
+ "action_required": task_def["action_required"],
171
+ }
172
+
173
+ return obs
174
+
175
+ def _build_completion_message(self, action: AssistantAction, score: float) -> dict:
176
+ """Build feedback message after step."""
177
+
178
+ if score >= 0.9:
179
+ message = f"Excellent work! Score: {score:.2f}"
180
+ elif score >= 0.7:
181
+ message = f"Good response. Score: {score:.2f}"
182
+ elif score >= 0.5:
183
+ message = f"Acceptable. Score: {score:.2f}"
184
+ else:
185
+ message = f"Needs improvement. Score: {score:.2f}"
186
+
187
+ return {
188
+ "message": message,
189
+ "email_sent": action.email_reply[:100] + "..." if len(action.email_reply) > 100 else action.email_reply,
190
+ "calendar_action": action.calendar_action,
191
+ }
192
+
193
+ def state(self):
194
+ """Return current state."""
195
+ return {
196
+ "current_task": self.current_task,
197
+ "emails_pending": len(self.current_scenario.get("emails", [])) if self.current_scenario else 0,
198
+ "episode_done": self.episode_done,
199
+ "steps_taken": self.steps_taken,
200
+ "total_score": self.total_score,
201
+ }
202
+
203
+
204
+ # ============================================================
205
+ # FASTAPI SERVER
206
+ # ============================================================
207
+
208
+ app = FastAPI(
209
+ title="ExecAssist Environment",
210
+ description=(
211
+ "An OpenEnv environment where AI agents learn to manage email and calendar "
212
+ "for a busy executive. Agents must draft professional replies, schedule meetings, "
213
+ "and resolve conflicts."
214
+ ),
215
+ version="1.0.0"
216
+ )
217
+
218
+ env = ExecAssistEnv()
219
+
220
+
221
+ @app.post("/reset")
222
+ def reset(task: str = "easy"):
223
+ return env.reset(task)
224
+
225
+
226
+ @app.post("/step")
227
+ def step(action: AssistantAction):
228
+ return env.step(action.dict())
229
+
230
+
231
+ @app.get("/state")
232
+ def state():
233
+ return env.state()
234
+
235
+
236
+ @app.get("/tasks")
237
+ def tasks():
238
+ return {
239
+ task_name: {
240
+ "description": td["description"],
241
+ "action_required": td["action_required"],
242
+ "reward_weights": td["reward_weights"],
243
+ }
244
+ for task_name, td in TASK_DEFINITIONS.items()
245
+ }
246
+
247
+
248
+ @app.get("/health")
249
+ def health():
250
+ return {"status": "healthy"}
251
+
252
+
253
+ # ============================================================
254
+ # OPENENV REQUIRED ENDPOINTS
255
+ # ============================================================
256
+
257
+ @app.get("/metadata")
258
+ def metadata():
259
+ """Return environment name and description."""
260
+ return {
261
+ "name": "exec-assist",
262
+ "description": (
263
+ "Executive Assistant environment where AI agents learn to manage email "
264
+ "and calendar for busy professionals. Agents must balance professionalism, "
265
+ "scheduling correctness, and conflict resolution."
266
+ ),
267
+ "version": "1.0.0",
268
+ "author": "Gang-gay",
269
+ "tasks": ["easy", "medium", "hard"],
270
+ }
271
+
272
+
273
+ @app.get("/schema")
274
+ def schema():
275
+ """Return action, observation, and state schemas."""
276
+ return {
277
+ "action": {
278
+ "type": "object",
279
+ "properties": {
280
+ "email_reply": {"type": "string"},
281
+ "calendar_action": {"type": "string", "enum": ["book", "propose_alternatives", "reschedule", "decline"]},
282
+ "meeting_details": {"type": "object"},
283
+ },
284
+ "required": ["email_reply", "calendar_action"],
285
+ },
286
+ "observation": {
287
+ "type": "object",
288
+ "properties": {
289
+ "task": {"type": "string"},
290
+ "emails": {"type": "array"},
291
+ "calendar": {"type": "object"},
292
+ "contacts": {"type": "object"},
293
+ },
294
+ },
295
+ "state": {
296
+ "type": "object",
297
+ "properties": {
298
+ "current_task": {"type": "string"},
299
+ "emails_pending": {"type": "integer"},
300
+ "episode_done": {"type": "boolean"},
301
+ "steps_taken": {"type": "integer"},
302
+ "total_score": {"type": "number"},
303
+ },
304
+ },
305
+ }
306
+
307
+
308
+ @app.post("/mcp")
309
+ async def mcp_endpoint(request_body: dict = {}):
310
+ """MCP JSON-RPC endpoint."""
311
+ method = request_body.get("method", "")
312
+ req_id = request_body.get("id", 1)
313
+
314
+ if method == "initialize":
315
+ return {
316
+ "jsonrpc": "2.0",
317
+ "id": req_id,
318
+ "result": {
319
+ "protocolVersion": "2024-11-05",
320
+ "serverInfo": {"name": "exec-assist", "version": "1.0.0"},
321
+ "capabilities": {"tools": {"listChanged": False}},
322
+ },
323
+ }
324
+
325
+ elif method == "tools/list":
326
+ return {
327
+ "jsonrpc": "2.0",
328
+ "id": req_id,
329
+ "result": {
330
+ "tools": [
331
+ {
332
+ "name": "reset",
333
+ "description": "Start new episode (easy/medium/hard)",
334
+ "inputSchema": {
335
+ "type": "object",
336
+ "properties": {"task": {"type": "string", "enum": ["easy", "medium", "hard"]}},
337
+ },
338
+ },
339
+ {
340
+ "name": "step",
341
+ "description": "Submit email reply and calendar action",
342
+ "inputSchema": {
343
+ "type": "object",
344
+ "properties": {
345
+ "email_reply": {"type": "string"},
346
+ "calendar_action": {"type": "string"},
347
+ "meeting_details": {"type": "object"},
348
+ },
349
+ "required": ["email_reply", "calendar_action"],
350
+ },
351
+ },
352
+ {"name": "state", "description": "Get current state", "inputSchema": {"type": "object"}},
353
+ ],
354
+ },
355
+ }
356
+
357
+ return {"jsonrpc": "2.0", "id": req_id, "result": {}}
358
+
359
+
360
+ def main():
361
+ import uvicorn
362
+ uvicorn.run(app, host="0.0.0.0", port=8000)
363
+
364
+
365
+ if __name__ == "__main__":
366
+ main()
server/data.py ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ data.py — ExecAssist Environment Data & Scoring
3
+
4
+ Contains:
5
+ - Scenario templates for easy/medium/hard tasks
6
+ - Reward functions (email quality, scheduling correctness, conflict resolution)
7
+ - Anti-reward hacking penalties
8
+ - Helper functions for time/calendar logic
9
+ """
10
+
11
+ import random
12
+ import os
13
+ from datetime import datetime, timedelta
14
+ from typing import Dict, List, Optional
15
+ from openai import OpenAI
16
+
17
+ # ============================================================
18
+ # TASK DEFINITIONS
19
+ # ============================================================
20
+
21
+ TASK_DEFINITIONS = {
22
+ "easy": {
23
+ "description": (
24
+ "Simple meeting request with clear calendar availability. "
25
+ "Draft professional reply and book the meeting."
26
+ ),
27
+ "action_required": "Send email confirmation and book meeting in available slot",
28
+ "reward_weights": {
29
+ "email": 0.5,
30
+ "scheduling": 0.5,
31
+ "conflict": 0.0,
32
+ },
33
+ },
34
+ "medium": {
35
+ "description": (
36
+ "Scheduling conflict — requested time is already booked. "
37
+ "Identify conflict, propose 2-3 alternatives, explain professionally."
38
+ ),
39
+ "action_required": "Send email with alternative times and explain conflict",
40
+ "reward_weights": {
41
+ "email": 0.3,
42
+ "scheduling": 0.3,
43
+ "conflict": 0.4,
44
+ },
45
+ },
46
+ "hard": {
47
+ "description": (
48
+ "Multi-party coordination with priority conflicts. "
49
+ "3 emails requesting meetings, prioritize and reschedule."
50
+ ),
51
+ "action_required": "Coordinate multiple meetings, prioritize, and reschedule",
52
+ "reward_weights": {
53
+ "email": 0.34,
54
+ "scheduling": 0.33,
55
+ "conflict": 0.33,
56
+ },
57
+ },
58
+ }
59
+
60
+
61
+ # ============================================================
62
+ # SCENARIO DATA POOLS
63
+ # ============================================================
64
+
65
+ MEETING_TOPICS = [
66
+ "Q2 roadmap review",
67
+ "Budget planning session",
68
+ "Project status update",
69
+ "Team sync",
70
+ "1-on-1 check-in",
71
+ "Client presentation prep",
72
+ "Sprint retrospective",
73
+ "Product demo",
74
+ "Strategy discussion",
75
+ "Performance review",
76
+ ]
77
+
78
+ SENDER_NAMES = [
79
+ ("John Smith", "john.smith@company.com"),
80
+ ("Sarah Johnson", "sarah.johnson@company.com"),
81
+ ("Michael Chen", "michael.chen@company.com"),
82
+ ("Emily Rodriguez", "emily.rodriguez@company.com"),
83
+ ("David Kim", "david.kim@company.com"),
84
+ ("Lisa Wang", "lisa.wang@company.com"),
85
+ ("James Anderson", "james.anderson@company.com"),
86
+ ("Maria Garcia", "maria.garcia@company.com"),
87
+ ]
88
+
89
+
90
+ # ============================================================
91
+ # SCENARIO GENERATION
92
+ # ============================================================
93
+
94
+ def generate_scenario(task: str, seed: int = None) -> dict:
95
+ """
96
+ Generate a scenario for the given task difficulty.
97
+
98
+ Returns dict with:
99
+ - id: scenario identifier
100
+ - emails: list of email objects
101
+ - calendar: calendar state with existing meetings
102
+ - contacts: contact information
103
+ - expected_behavior: what agent should do
104
+ - has_conflict: True if scheduling conflict exists
105
+ """
106
+
107
+ if seed is not None:
108
+ rng = random.Random(seed)
109
+ else:
110
+ rng = random.Random()
111
+
112
+ if task == "easy":
113
+ return _generate_easy_scenario(rng)
114
+ elif task == "medium":
115
+ return _generate_medium_scenario(rng)
116
+ elif task == "hard":
117
+ return _generate_hard_scenario(rng)
118
+ else:
119
+ raise ValueError(f"Unknown task: {task}")
120
+
121
+
122
+ def _generate_easy_scenario(rng: random.Random) -> dict:
123
+ """Generate simple meeting request with clear availability."""
124
+
125
+ sender_name, sender_email = rng.choice(SENDER_NAMES)
126
+ topic = rng.choice(MEETING_TOPICS)
127
+
128
+ base_date = datetime(2026, 4, 28, 9, 0) # Monday 9 AM
129
+
130
+ existing_meetings = [
131
+ {
132
+ "id": "mtg_001",
133
+ "participants": ["alex.chen@company.com", "team@company.com"],
134
+ "start_time": (base_date + timedelta(hours=1)).isoformat(),
135
+ "end_time": (base_date + timedelta(hours=2)).isoformat(),
136
+ "subject": "Team standup",
137
+ "priority": "normal",
138
+ },
139
+ {
140
+ "id": "mtg_002",
141
+ "participants": ["alex.chen@company.com", "client@external.com"],
142
+ "start_time": (base_date + timedelta(days=1, hours=5)).isoformat(),
143
+ "end_time": (base_date + timedelta(days=1, hours=6, minutes=30)).isoformat(),
144
+ "subject": "Client call",
145
+ "priority": "high",
146
+ },
147
+ ]
148
+
149
+ email_body = f"Hi Alex,\n\nCan we meet sometime next week to discuss {topic.lower()}? 30 minutes should be enough. I'm flexible on timing.\n\nBest,\n{sender_name}"
150
+
151
+ return {
152
+ "id": "easy_001",
153
+ "task": "easy",
154
+ "emails": [
155
+ {
156
+ "sender": sender_email,
157
+ "subject": f"Meeting request: {topic}",
158
+ "body": email_body,
159
+ "timestamp": datetime.now().isoformat(),
160
+ "priority": "normal",
161
+ }
162
+ ],
163
+ "calendar": {
164
+ "existing_meetings": existing_meetings,
165
+ "working_hours": {
166
+ "monday": "9-17",
167
+ "tuesday": "9-17",
168
+ "wednesday": "9-17",
169
+ "thursday": "9-17",
170
+ "friday": "9-16",
171
+ },
172
+ "executive_name": "Alex Chen",
173
+ },
174
+ "contacts": {
175
+ sender_email: {
176
+ "name": sender_name,
177
+ "email": sender_email,
178
+ "timezone": "America/Los_Angeles",
179
+ "title": "Senior Manager",
180
+ }
181
+ },
182
+ "expected_behavior": "Book meeting in open slot",
183
+ "has_conflict": False,
184
+ }
185
+
186
+
187
+ def _generate_medium_scenario(rng: random.Random) -> dict:
188
+ """Generate scenario with scheduling conflict."""
189
+
190
+ sender_name, sender_email = rng.choice(SENDER_NAMES)
191
+ topic = rng.choice(MEETING_TOPICS)
192
+
193
+ base_date = datetime(2026, 4, 28, 9, 0)
194
+
195
+ # Conflict: Monday 2-4 PM is already booked
196
+ conflict_start = base_date + timedelta(hours=5)
197
+ conflict_end = base_date + timedelta(hours=7)
198
+
199
+ existing_meetings = [
200
+ {
201
+ "id": "mtg_001",
202
+ "participants": ["alex.chen@company.com", "board@company.com"],
203
+ "start_time": conflict_start.isoformat(),
204
+ "end_time": conflict_end.isoformat(),
205
+ "subject": "Board meeting",
206
+ "priority": "high",
207
+ },
208
+ {
209
+ "id": "mtg_002",
210
+ "participants": ["alex.chen@company.com", "manager@company.com"],
211
+ "start_time": (base_date + timedelta(days=1, hours=0)).isoformat(),
212
+ "end_time": (base_date + timedelta(days=1, hours=1)).isoformat(),
213
+ "subject": "1-on-1 with manager",
214
+ "priority": "normal",
215
+ },
216
+ ]
217
+
218
+ email_body = f"Hi Alex,\n\nWe need to discuss {topic.lower()}. I'm available Monday 2-4pm or Tuesday morning. Can we make this work? It's fairly urgent.\n\nThanks,\n{sender_name}"
219
+
220
+ return {
221
+ "id": "medium_001",
222
+ "task": "medium",
223
+ "emails": [
224
+ {
225
+ "sender": sender_email,
226
+ "subject": f"Urgent: {topic}",
227
+ "body": email_body,
228
+ "timestamp": datetime.now().isoformat(),
229
+ "priority": "high",
230
+ }
231
+ ],
232
+ "calendar": {
233
+ "existing_meetings": existing_meetings,
234
+ "working_hours": {
235
+ "monday": "9-17",
236
+ "tuesday": "9-17",
237
+ "wednesday": "9-17",
238
+ "thursday": "9-17",
239
+ "friday": "9-16",
240
+ },
241
+ "executive_name": "Alex Chen",
242
+ },
243
+ "contacts": {
244
+ sender_email: {
245
+ "name": sender_name,
246
+ "email": sender_email,
247
+ "timezone": "America/Los_Angeles",
248
+ "title": "Director",
249
+ }
250
+ },
251
+ "expected_behavior": "Identify conflict, propose Tuesday 10-11 AM as alternative",
252
+ "has_conflict": True,
253
+ }
254
+
255
+
256
+ def _generate_hard_scenario(rng: random.Random) -> dict:
257
+ """Generate multi-party coordination scenario with 3 emails and priority conflicts."""
258
+
259
+ senders = rng.sample(SENDER_NAMES, 3)
260
+ topics = rng.sample(MEETING_TOPICS, 3)
261
+
262
+ base_date = datetime(2026, 4, 28, 9, 0) # Monday 9 AM
263
+
264
+ # Existing calendar — Monday 2-3 PM blocked with team sync
265
+ existing_meetings = [
266
+ {
267
+ "id": "mtg_001",
268
+ "participants": ["alex.chen@company.com", "team@company.com"],
269
+ "start_time": (base_date + timedelta(hours=5)).isoformat(), # Monday 2 PM
270
+ "end_time": (base_date + timedelta(hours=6)).isoformat(), # Monday 3 PM
271
+ "subject": "Team sync",
272
+ "priority": "normal",
273
+ },
274
+ {
275
+ "id": "mtg_002",
276
+ "participants": ["alex.chen@company.com", "exec@company.com"],
277
+ "start_time": (base_date + timedelta(days=2, hours=2)).isoformat(), # Wed 11 AM
278
+ "end_time": (base_date + timedelta(days=2, hours=3)).isoformat(), # Wed 12 PM
279
+ "subject": "Executive review",
280
+ "priority": "high",
281
+ },
282
+ ]
283
+
284
+ # Three competing email requests
285
+ emails = [
286
+ {
287
+ "sender": senders[0][1],
288
+ "subject": f"Meeting: {topics[0]}",
289
+ "body": (
290
+ f"Hi Alex,\n\nCan we meet Monday 2:30-3:30 PM to discuss {topics[0].lower()}? "
291
+ f"I'd really appreciate your input.\n\nThanks,\n{senders[0][0]}"
292
+ ),
293
+ "timestamp": datetime.now().isoformat(),
294
+ "priority": "normal",
295
+ },
296
+ {
297
+ "sender": senders[1][1],
298
+ "subject": f"URGENT: {topics[1]}",
299
+ "body": (
300
+ f"Alex,\n\nWe need to discuss {topics[1].lower()} ASAP. "
301
+ f"Monday afternoon works for me — ideally 2-3 PM. "
302
+ f"This is time-sensitive and high priority.\n\nBest,\n{senders[1][0]}"
303
+ ),
304
+ "timestamp": datetime.now().isoformat(),
305
+ "priority": "high",
306
+ },
307
+ {
308
+ "sender": senders[2][1],
309
+ "subject": f"{topics[2]} discussion",
310
+ "body": (
311
+ f"Hi Alex,\n\nCan we sync on {topics[2].lower()} sometime this week? "
312
+ f"I'm flexible — any 30-minute slot works for me.\n\nThanks,\n{senders[2][0]}"
313
+ ),
314
+ "timestamp": datetime.now().isoformat(),
315
+ "priority": "normal",
316
+ },
317
+ ]
318
+
319
+ contacts = {
320
+ sender[1]: {
321
+ "name": sender[0],
322
+ "email": sender[1],
323
+ "timezone": "America/Los_Angeles",
324
+ "title": "Manager",
325
+ }
326
+ for sender in senders
327
+ }
328
+
329
+ return {
330
+ "id": "hard_001",
331
+ "task": "hard",
332
+ "emails": emails,
333
+ "calendar": {
334
+ "existing_meetings": existing_meetings,
335
+ "working_hours": {
336
+ "monday": "9-17",
337
+ "tuesday": "9-17",
338
+ "wednesday": "9-17",
339
+ "thursday": "9-17",
340
+ "friday": "9-16",
341
+ },
342
+ "executive_name": "Alex Chen",
343
+ },
344
+ "contacts": contacts,
345
+ "expected_behavior": (
346
+ "Prioritize URGENT email (sender 2). Book that meeting. "
347
+ "Propose alternatives to sender 1 (conflicts with urgent). "
348
+ "Offer flexible times to sender 3."
349
+ ),
350
+ "has_conflict": True,
351
+ }
352
+
353
+
354
+ # ============================================================
355
+ # REWARD FUNCTION 1: EMAIL QUALITY
356
+ # ============================================================
357
+
358
+ def compute_email_quality(reply: str, scenario: dict) -> float:
359
+ """
360
+ Score email quality using rule-based checks + LLM judge.
361
+
362
+ Returns score 0.0 to 1.0.
363
+
364
+ Components:
365
+ - Politeness (15%)
366
+ - Greeting/closing (10%)
367
+ - Sufficient detail (15%)
368
+ - Not overly uncertain (10%)
369
+ - Professional tone (10%)
370
+ - LLM judge for nuance (40%)
371
+ """
372
+
373
+ score = 0.0
374
+ reply_lower = reply.lower()
375
+
376
+ # Rule 1: Politeness markers (15%)
377
+ if any(phrase in reply_lower for phrase in ["thank you", "thanks", "appreciate"]):
378
+ score += 0.15
379
+
380
+ # Rule 2: Proper greeting (5%) and closing (5%)
381
+ if any(greeting in reply_lower for greeting in ["hi ", "hello", "dear"]):
382
+ score += 0.05
383
+ if any(closing in reply_lower for closing in ["best", "regards", "sincerely", "thanks,"]):
384
+ score += 0.05
385
+
386
+ # Rule 3: Sufficient detail (15%)
387
+ word_count = len(reply.split())
388
+ if word_count >= 20:
389
+ score += 0.15
390
+ elif word_count >= 10:
391
+ score += 0.08
392
+
393
+ # Rule 4: Not overly uncertain (10%)
394
+ question_marks = reply.count("?")
395
+ if question_marks <= 2:
396
+ score += 0.10
397
+
398
+ # Rule 5: Professional tone — no negative phrases (10%)
399
+ negative_phrases = ["can't", "won't", "impossible", "sorry but no", "unfortunately not", "no way"]
400
+ if not any(neg in reply_lower for neg in negative_phrases):
401
+ score += 0.10
402
+
403
+ # Rule 6: LLM-as-judge for nuance (40%)
404
+ llm_score = _llm_judge_professionalism(reply)
405
+ score += llm_score * 0.40
406
+
407
+ return min(1.0, score)
408
+
409
+
410
+ def _llm_judge_professionalism(reply: str) -> float:
411
+ """
412
+ LLM-as-judge for email professionalism using OpenRouter API.
413
+ Falls back to heuristic if API unavailable.
414
+ """
415
+
416
+ api_key = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
417
+
418
+ # Fallback if no API key
419
+ if not api_key:
420
+ # Simple heuristic fallback
421
+ sentences = [s.strip() for s in reply.split('.') if s.strip()]
422
+ if len(sentences) >= 2 and len(reply) >= 50:
423
+ return 0.7
424
+ return 0.4
425
+
426
+ try:
427
+ client = OpenAI(
428
+ base_url=os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1"),
429
+ api_key=api_key,
430
+ )
431
+
432
+ prompt = f"""Rate the professionalism of this email reply on a scale of 0.0 to 1.0.
433
+
434
+ Email reply:
435
+ \"\"\"{reply}\"\"\"
436
+
437
+ Criteria:
438
+ - Clear and concise
439
+ - Professional tone
440
+ - No typos or grammar errors
441
+ - Appropriate level of formality
442
+ - Addresses the request directly
443
+
444
+ Respond with ONLY a single decimal number between 0.0 and 1.0. No explanation, just the number."""
445
+
446
+ response = client.chat.completions.create(
447
+ model=os.getenv("MODEL_NAME", "nvidia/nemotron-3-super-120b-a12b:free"),
448
+ messages=[{"role": "user", "content": prompt}],
449
+ temperature=0.1,
450
+ max_tokens=10,
451
+ )
452
+
453
+ score_text = response.choices[0].message.content.strip()
454
+ # Extract just the number
455
+ for token in score_text.split():
456
+ try:
457
+ score = float(token)
458
+ return max(0.0, min(1.0, score))
459
+ except ValueError:
460
+ continue
461
+
462
+ return 0.5
463
+
464
+ except Exception as e:
465
+ print(f"LLM judge error: {e}")
466
+ # Fallback heuristic
467
+ sentences = [s.strip() for s in reply.split('.') if s.strip()]
468
+ if len(sentences) >= 2 and len(reply) >= 50:
469
+ return 0.7
470
+ return 0.4
471
+
472
+
473
+ # ============================================================
474
+ # REWARD FUNCTION 2: SCHEDULING CORRECTNESS
475
+ # ============================================================
476
+
477
+ def check_scheduling_correctness(meeting_details: Optional[dict], scenario: dict) -> dict:
478
+ """
479
+ Verify scheduling correctness with hard checks.
480
+
481
+ """
482
+
483
+ # DEBUG: Print what we received
484
+ print("=== DEBUG check_scheduling_correctness ===")
485
+ print(f"meeting_details: {meeting_details}")
486
+ print(f"scenario keys: {scenario.keys() if scenario else 'None'}")
487
+ print(f"calendar: {scenario.get('calendar') if scenario else 'None'}")
488
+ print("==========================================")
489
+
490
+ if not meeting_details:
491
+ return {
492
+ "checks": {
493
+ "meeting_provided": False,
494
+ "no_double_booking": False,
495
+ "within_working_hours": False,
496
+ "appropriate_duration": False,
497
+ },
498
+ "score": 0.0,
499
+ }
500
+
501
+ calendar = scenario["calendar"]
502
+ existing_meetings = calendar["existing_meetings"]
503
+
504
+ results = {
505
+ "meeting_provided": True,
506
+ "no_double_booking": True,
507
+ "within_working_hours": True,
508
+ "appropriate_duration": True,
509
+ }
510
+
511
+ # Parse meeting times
512
+ try:
513
+ meeting_start = datetime.fromisoformat(meeting_details["start_time"])
514
+ meeting_end = datetime.fromisoformat(meeting_details["end_time"])
515
+ except (KeyError, ValueError, TypeError):
516
+ return {
517
+ "checks": {
518
+ "meeting_provided": True,
519
+ "no_double_booking": False,
520
+ "within_working_hours": False,
521
+ "appropriate_duration": False,
522
+ },
523
+ "score": 0.25, # Some credit for trying
524
+ }
525
+
526
+ # Check 1: No double booking
527
+ for existing in existing_meetings:
528
+ try:
529
+ existing_start = datetime.fromisoformat(existing["start_time"])
530
+ existing_end = datetime.fromisoformat(existing["end_time"])
531
+
532
+ # Check for overlap
533
+ if not (meeting_end <= existing_start or meeting_start >= existing_end):
534
+ results["no_double_booking"] = False
535
+ break
536
+ except (KeyError, ValueError):
537
+ continue
538
+
539
+ # Check 2: Within working hours (9 AM - 5 PM)
540
+ if meeting_start.hour < 9 or meeting_end.hour > 17:
541
+ results["within_working_hours"] = False
542
+ if meeting_end.hour == 17 and meeting_end.minute > 0:
543
+ results["within_working_hours"] = False
544
+
545
+ # Check 3: Appropriate duration (15 min to 2 hours)
546
+ duration_minutes = (meeting_end - meeting_start).total_seconds() / 60
547
+ if not (15 <= duration_minutes <= 120):
548
+ results["appropriate_duration"] = False
549
+
550
+ # Compute overall score
551
+ score = sum(results.values()) / len(results)
552
+
553
+ return {
554
+ "checks": results,
555
+ "score": score,
556
+ }
557
+
558
+
559
+ # ============================================================
560
+ # REWARD FUNCTION 3: CONFLICT RESOLUTION
561
+ # ============================================================
562
+
563
+ def compute_conflict_resolution(action: dict, scenario: dict) -> float:
564
+ """
565
+ Score how well the agent handled scheduling conflicts.
566
+
567
+ Returns score 0.0 to 1.0.
568
+ """
569
+
570
+ has_conflict = scenario.get("has_conflict", False)
571
+ calendar_action = action.get("calendar_action", "")
572
+ email_reply = action.get("email_reply", "")
573
+ meeting_details = action.get("meeting_details") or {}
574
+
575
+ score = 0.0
576
+
577
+ if has_conflict:
578
+ # Agent should recognize the conflict
579
+ if calendar_action in ["propose_alternatives", "reschedule"]:
580
+ score += 0.4
581
+ elif calendar_action == "book":
582
+ # Check if they at least booked at a non-conflicting time
583
+ score += 0.1
584
+
585
+ # Check if alternatives were provided
586
+ alternatives = meeting_details.get("proposed_alternatives", []) or []
587
+ if alternatives:
588
+ num_alternatives = len(alternatives)
589
+ score += min(0.4, num_alternatives * 0.2) # 2 alts = 0.4, 3+ = capped at 0.4
590
+
591
+ # Check if email mentions the conflict
592
+ conflict_keywords = ["conflict", "already booked", "unavailable", "scheduled", "occupied", "another meeting"]
593
+ if any(word in email_reply.lower() for word in conflict_keywords):
594
+ score += 0.2
595
+ else:
596
+ # No conflict — agent should just book
597
+ if calendar_action == "book":
598
+ score = 1.0
599
+ elif calendar_action == "propose_alternatives":
600
+ score = 0.5 # Partial credit
601
+ else:
602
+ score = 0.3
603
+
604
+ return min(1.0, score)
605
+
606
+
607
+ # ============================================================
608
+ # ANTI-REWARD HACKING: PENALTIES
609
+ # ============================================================
610
+
611
+ def apply_penalties(action: dict, scenario: dict) -> float:
612
+ """
613
+ Detect and penalize reward hacking behaviors.
614
+
615
+ Returns penalty amount (0.0 = no penalty, higher = worse).
616
+ """
617
+
618
+ penalty = 0.0
619
+ email_reply = action.get("email_reply", "")
620
+ calendar_action = action.get("calendar_action", "")
621
+ meeting_details = action.get("meeting_details")
622
+
623
+ # Penalty 1: Email too short (lazy response)
624
+ if len(email_reply.strip()) < 30:
625
+ penalty += 0.3
626
+
627
+ # Penalty 2: Claimed to book but no details provided
628
+ if calendar_action == "book" and not meeting_details:
629
+ penalty += 0.4
630
+
631
+ # Penalty 3: Generic templated phrases
632
+ generic_phrases = [
633
+ "as per your request",
634
+ "please find attached",
635
+ "hope this helps",
636
+ "let me know if you have any questions",
637
+ "do not hesitate to contact",
638
+ ]
639
+ if any(phrase in email_reply.lower() for phrase in generic_phrases):
640
+ penalty += 0.10
641
+
642
+ # Penalty 4: Overly long email (rambling)
643
+ if len(email_reply.split()) > 200:
644
+ penalty += 0.15
645
+
646
+ # Penalty 5: Repeating the same content multiple times
647
+ words = email_reply.lower().split()
648
+ if len(words) > 20:
649
+ word_diversity = len(set(words)) / len(words)
650
+ if word_diversity < 0.4: # Less than 40% unique words = repetitive
651
+ penalty += 0.20
652
+
653
+ return min(1.0, penalty)
654
+
655
+
656
+ # ============================================================
657
+ # HELPER FUNCTIONS
658
+ # ============================================================
659
+
660
+ def parse_time_slot(time_str: str) -> Optional[datetime]:
661
+ """Parse ISO time string to datetime object."""
662
+ try:
663
+ return datetime.fromisoformat(time_str)
664
+ except (ValueError, TypeError):
665
+ return None
666
+
667
+
668
+ def format_time_slot(dt: datetime) -> str:
669
+ """Format datetime to readable string."""
670
+ return dt.strftime("%A, %B %d at %I:%M %p")
server/models.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ models.py — Typed Pydantic models for Executive Assistant Environment
3
+
4
+ Defines Action, Observation, and State types used by the OpenEnv spec.
5
+ """
6
+
7
+ from pydantic import BaseModel
8
+ from typing import List, Optional, Dict
9
+
10
+
11
+ # ============================================================
12
+ # ACTION — what the agent sends
13
+ # ============================================================
14
+
15
+ class TimeSlot(BaseModel):
16
+ """Proposed meeting time."""
17
+ start_time: str # ISO format: "2026-04-28T14:00:00"
18
+ end_time: str
19
+ note: Optional[str] = None # e.g., "This works better for all attendees"
20
+
21
+
22
+ class MeetingDetails(BaseModel):
23
+ """Complete meeting information."""
24
+ participants: List[str]
25
+ start_time: str
26
+ end_time: str
27
+ subject: str
28
+ location: Optional[str] = "Conference Room A"
29
+ proposed_alternatives: Optional[List[TimeSlot]] = None
30
+
31
+
32
+ class AssistantAction(BaseModel):
33
+ """Agent's response to email scenario."""
34
+ email_reply: str # Draft response to sender
35
+ calendar_action: str # "book" | "propose_alternatives" | "reschedule" | "decline"
36
+ meeting_details: Optional[MeetingDetails] = None
37
+
38
+
39
+ # ============================================================
40
+ # OBSERVATION — what the agent sees
41
+ # ============================================================
42
+
43
+ class Meeting(BaseModel):
44
+ """Existing calendar meeting."""
45
+ id: str
46
+ participants: List[str]
47
+ start_time: str
48
+ end_time: str
49
+ subject: str
50
+ priority: str = "normal" # "low" | "normal" | "high"
51
+
52
+
53
+ class ContactInfo(BaseModel):
54
+ """Contact metadata."""
55
+ name: str
56
+ email: str
57
+ timezone: str = "America/Los_Angeles"
58
+ title: Optional[str] = None
59
+
60
+
61
+ class EmailInbox(BaseModel):
62
+ """Incoming email request."""
63
+ sender: str
64
+ subject: str
65
+ body: str
66
+ timestamp: str
67
+ priority: str = "normal"
68
+
69
+
70
+ class CalendarState(BaseModel):
71
+ """Current calendar state."""
72
+ existing_meetings: List[Meeting]
73
+ working_hours: Dict[str, str] # {"monday": "9-17", ...}
74
+ executive_name: str = "Alex Chen"
75
+
76
+
77
+ class AssistantObservation(BaseModel):
78
+ """What the agent receives after reset() or step()."""
79
+ task: Optional[str] = None
80
+ description: Optional[str] = None
81
+ emails: Optional[List[EmailInbox]] = None
82
+ calendar: Optional[CalendarState] = None
83
+ contacts: Optional[Dict[str, ContactInfo]] = None
84
+ action_required: Optional[str] = None
85
+ message: Optional[str] = None
86
+
87
+
88
+ # ============================================================
89
+ # STATE — current environment state
90
+ # ============================================================
91
+
92
+ class AssistantState(BaseModel):
93
+ """Current state of the environment."""
94
+ current_task: Optional[str] = None
95
+ emails_pending: int = 0
96
+ episode_done: bool = False
97
+ steps_taken: int = 0
98
+ total_score: float = 0.0