Spaces:

Mmanikandan
/

SupportFlowAI

Running

App Files Files Community

Mmanikandan commited on 14 days ago

Commit

c74d5fa

1 Parent(s): 4e9153c

phase 2 fix

Browse files

Files changed (4) hide show

inference.py +6 -4
openenv.yaml +9 -18
server/environment.py +49 -9
server/grader.py +53 -0

inference.py CHANGED Viewed

@@ -648,7 +648,8 @@ def run_inference(config: Optional[Dict[str, str]] = None) -> None:
         reset_data = reset_response.json()
         observation = reset_data.get("observation", {})
-        task_name = observation.get("email_id", "email_workflow")
         email_subject = observation.get("subject", "")
         email_body = observation.get("body", "")
         customer_history = observation.get("customer_history", "")
@@ -761,9 +762,10 @@ def run_inference(config: Optional[Dict[str, str]] = None) -> None:
             # Log step
             log_step(step_num, action_str, reward, done, None)
-        # Prepare final metrics
-        # CRITICAL FIX: Use the environment's official cumulative reward instead of manual summation
-        normalized_score = step_data.get("info", {}).get("total_reward", sum(rewards))
         # Clamp just in case, though the environment already does this
         normalized_score = min(max(normalized_score, 0.0), 1.0)

         reset_data = reset_response.json()
         observation = reset_data.get("observation", {})
+        info = reset_data.get("info", {})
+        task_name = info.get("task_id", observation.get("email_id", "email_workflow"))
         email_subject = observation.get("subject", "")
         email_body = observation.get("body", "")
         customer_history = observation.get("customer_history", "")
             # Log step
             log_step(step_num, action_str, reward, done, None)
+        # PHASE 2 REQUIREMENT: Use the programmatic grader's score if available
+        # Fallback to total_reward or manual sum for robust reporting
+        final_info = step_data.get("info", {})
+        normalized_score = final_info.get("score", final_info.get("total_reward", sum(rewards)))
         # Clamp just in case, though the environment already does this
         normalized_score = min(max(normalized_score, 0.0), 1.0)

openenv.yaml CHANGED Viewed

@@ -157,35 +157,26 @@ reward:
       description: "Escalation bonus or penalty for appropriate decision"
 tasks:
-  - id: email_001
-    name: Easy Email
     difficulty: easy
-    description: >
-      Clear billing issue. Straightforward double-charge complaint
-      from good customer. Requires correct classification and
-      appropriate urgency response.
     ground_truth:
       category: billing
       priority: high
-  - id: email_002
-    name: Medium Email
     difficulty: medium
-    description: >
-      Technical issue with app. Requires interpretation of problem
-      and prioritization judgment. Customer history is important context.
     ground_truth:
       category: tech
       priority: medium
-  - id: email_003
-    name: Hard Email
     difficulty: hard
-    description: >
-      Emotional complaint from enterprise customer. Requires nuanced
-      understanding of tone, prior history, and business impact.
-      Response must show empathy and urgency. Failure to prioritize
-      properly could lead to business loss.
     ground_truth:
       category: complaint
       priority: high

       description: "Escalation bonus or penalty for appropriate decision"
 tasks:
+  - id: easy_refund
+    name: Easy Refund Task
     difficulty: easy
+    description: Handle a straightforward billing refund request for a duplicate charge.
     ground_truth:
       category: billing
       priority: high
+  - id: medium_tech
+    name: Medium Tech Task
     difficulty: medium
+    description: Resolve a technical issue regarding app crashes and provide instructions.
     ground_truth:
       category: tech
       priority: medium
+  - id: hard_escalation
+    name: Hard Escalation Task
     difficulty: hard
+    description: Handle a high-value enterprise complaint that requires escalation or financial compensation.
     ground_truth:
       category: complaint
       priority: high

server/environment.py CHANGED Viewed

@@ -18,9 +18,16 @@ from models import (
 from .grader import (
     calculate_step_reward, grade_workflow_completion,
     analyze_customer_sentiment, extract_urgency_indicators,
-    check_escalation_requirement
 )
 def search_knowledge_base(query: str):
     if "refund" in query.lower():
         return {
@@ -320,7 +327,19 @@ class CustomerSupportEnv:
         if not self.task_queue:
             self.task_queue = self._load_tasks()
-        self.current_task = self._prepare_task_data(self.task_queue.pop(0))
         self.episode_count += 1
         # Initialize workflow state
@@ -362,6 +381,7 @@ class CustomerSupportEnv:
                 "episode_id": self.current_state.episode_id,
                 "difficulty": self.current_task.get("difficulty", "unknown"),
                 "email_id": self.current_task["id"],
                 "workflow_step": 0,
                 "max_steps": 5
             }
@@ -502,19 +522,39 @@ class CustomerSupportEnv:
             reward_breakdown["escalation_bonus"] = escalation_bonus
             reward_breakdown.update(completion_breakdown)
         return {
             "observation": observation,
             "reward": step_reward,
             "done": done,
-            "info": {
-                "workflow_state": self.workflow_state.copy(),
-                "total_reward": self.current_state.total_reward,
-                "reward_breakdown": reward_breakdown,
-                "step_count": self.current_state.step_count,
-                "episode_complete": done
-            }
         }
     def _is_episode_complete(self) -> bool:
         """
         Check if the current episode is complete.

 from .grader import (
     calculate_step_reward, grade_workflow_completion,
     analyze_customer_sentiment, extract_urgency_indicators,
+    check_escalation_requirement, refund_grader, tech_grader, escalation_grader
 )
+# Mandatory Task definitions for OpenEnv validation
+TASKS = [
+    {"task_id": "easy_refund", "email_id": "email_001", "difficulty": "easy"},
+    {"task_id": "medium_tech", "email_id": "email_002", "difficulty": "medium"},
+    {"task_id": "hard_escalation", "email_id": "email_003", "difficulty": "hard"},
+]
 def search_knowledge_base(query: str):
     if "refund" in query.lower():
         return {
         if not self.task_queue:
             self.task_queue = self._load_tasks()
+        # Phase 2 Fix: Wrap selection in structured TASKS context
+        # We cycle through the 3 mandatory tasks for consistent evaluation
+        task_idx = self.episode_count % len(TASKS)
+        selected_task_metadata = TASKS[task_idx]
+        task_id = selected_task_metadata["task_id"]
+        # Find corresponding email data in current queue or database
+        # For simplicity in this fix, we search task_queue for matching email_id
+        email_data = next((t for t in self.task_queue if t["id"] == selected_task_metadata["email_id"]), self.task_queue[0])
+        self.current_task = self._prepare_task_data(email_data)
+        self.current_task["task_id"] = task_id # Attach mandatory task_id
         self.episode_count += 1
         # Initialize workflow state
                 "episode_id": self.current_state.episode_id,
                 "difficulty": self.current_task.get("difficulty", "unknown"),
                 "email_id": self.current_task["id"],
+                "task_id": self.current_task.get("task_id"),
                 "workflow_step": 0,
                 "max_steps": 5
             }
             reward_breakdown["escalation_bonus"] = escalation_bonus
             reward_breakdown.update(completion_breakdown)
+        info = {
+            "workflow_state": self.workflow_state.copy(),
+            "total_reward": self.current_state.total_reward,
+            "reward_breakdown": reward_breakdown,
+            "step_count": self.current_state.step_count,
+            "episode_complete": done,
+            "task_id": self.current_task.get("task_id")
+        }
+        # PHASE 2 REQUIREMENT: Return score only at end
+        if done:
+            info["score"] = self.compute_score()
         return {
             "observation": observation,
             "reward": step_reward,
             "done": done,
+            "info": info
         }
+    def compute_score(self) -> float:
+        """ Programmatic score computation for OpenEnv validation. """
+        task_id = self.current_task.get("task_id")
+        if task_id == "easy_refund":
+            return refund_grader(self.workflow_state)
+        elif task_id == "medium_tech":
+            return tech_grader(self.workflow_state)
+        elif task_id == "hard_escalation":
+            return escalation_grader(self.workflow_state)
+        return 0.0
     def _is_episode_complete(self) -> bool:
         """
         Check if the current episode is complete.

server/grader.py CHANGED Viewed

@@ -702,3 +702,56 @@ def check_escalation_requirement(email_task: Dict[str, Any], state: Dict[str, An
         bonus = 0.1   # Bonus for correct escalation
     return penalty, bonus

         bonus = 0.1   # Bonus for correct escalation
     return penalty, bonus
+def refund_grader(state: Dict[str, Any]) -> float:
+    """ Programmatic grader for easy_refund task. """
+    score = 0.0
+    if state.get("classification") == "billing":
+        score += 0.3
+    if state.get("priority") == "high":
+        score += 0.2
+    if state.get("strategy") == "offer_refund":
+        score += 0.3
+    response = state.get("response")
+    if response and "refund" in response.lower():
+        score += 0.2
+    return min(score, 1.0)
+def tech_grader(state: Dict[str, Any]) -> float:
+    """ Programmatic grader for medium_tech task. """
+    score = 0.0
+    if state.get("classification") == "tech":
+        score += 0.3
+    if state.get("priority") in ["medium", "high"]:
+        score += 0.2
+    if state.get("strategy") in ["auto_resolve", "request_more_info"]:
+        score += 0.3
+    response = state.get("response")
+    if response and len(response) > 20:
+        score += 0.2
+    return min(score, 1.0)
+def escalation_grader(state: Dict[str, Any]) -> float:
+    """ Programmatic grader for hard_escalation task. """
+    score = 0.0
+    if state.get("classification") == "complaint":
+        score += 0.2
+    if state.get("priority") == "high":
+        score += 0.2
+    if state.get("strategy") in ["escalate_to_human", "offer_refund"]:
+        score += 0.3
+    # Check if escalation payload exists
+    if state.get("escalation"):
+        score += 0.3
+    return min(score, 1.0)