Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 5

Commit

b180c39

1 Parent(s): 240c11f

fix: prompt and loosen intent_classifier

Browse files

Files changed (2) hide show

modules/info_extractor.py +41 -39
modules/travel_assistant.py +10 -2

modules/info_extractor.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-import re  # 导入正则表达式模块
 from utils.logger import log
 from .ai_model import AIModel
@@ -9,18 +9,17 @@ class InfoExtractor:
         self.prompt_template = self._build_prompt_template()
     def _build_prompt_template(self) -> str:
-        # --- 重点更新：使用更强大、更明确的Prompt ---
-        return """你是一个专门用于从文本中提取结构化旅行信息的AI助理。
-你的唯一任务是分析用户提供的文本，并严格按照指定的JSON格式输出提取的信息。
-**输出要求:**
-1.  **严格的JSON格式**: 输出必须是一个单一、完整、有效的JSON对象。
-2.  **禁止任何额外文本**: 不要在JSON对象前后添加任何解释、注释、Markdown标记或任何其他文字。
-3.  **遵循指定结构**: JSON的键和层级结构必须与下方定义的格式完全一致。
-4.  **处理缺失信息**: 如果用户输入中没有提到某个字段，请将该字段的值设为 null。
-5.  **处理无关输入**: 如果用户输入是简单的问候或与旅行无关，请返回一个空的JSON对象 `{{}}`。
-**JSON输出格式定义:**
 ```json
 {{
   "destination": {{
@@ -38,19 +37,18 @@ class InfoExtractor:
 ```
 **示例:**
-用户输入: "我想去巴黎玩一个星期，预算大概是经济型的"
-你的输出:
 ```json
 {{
   "destination": {{
-    "name": "巴黎"
   }},
   "duration": {{
-    "days": 7
   }},
   "budget": {{
-    "type": "economy",
     "amount": null,
     "currency": null
   }}
@@ -58,41 +56,27 @@ class InfoExtractor:
 ```
 ---
-现在，请处理以下用户输入。
 **用户输入:**
-```
-{user_message}
-```
-**你的输出:**
 """
     def extract(self, message: str) -> dict:
-        """
-        使用LLM从用户消息中提取结构化信息。
-        """
         log.info(f"🧠 使用LLM开始提取信息，消息: '{message}'")
-        # 1. 构建完整的Prompt
         prompt = self.prompt_template.format(user_message=message)
-        # 2. 调用AI模型生成结果
         raw_response = self.ai_model.generate(prompt)
         if not raw_response:
             log.error("❌ LLM模型没有返回任何内容。")
             return {}
-        # --- 重点更新：使用更稳健的JSON解析逻辑 ---
         try:
-            # 优先使用正则表达式从 ```json ... ``` 代码块中提取
             match = re.search(r'```json\s*(\{.*?\})\s*```', raw_response, re.DOTALL)
             if match:
                 json_str = match.group(1)
             else:
-                # 如果正则没匹配到，就粗暴地寻找第一个'{'和最后一个'}'
                 start_index = raw_response.find('{')
                 end_index = raw_response.rfind('}')
                 if start_index != -1 and end_index != -1 and end_index > start_index:
@@ -106,10 +90,28 @@ class InfoExtractor:
             log.error(f"❌ 无法解析LLM返回的JSON: '{raw_response}'. 错误: {e}")
             return {}
-        # 4. 清理和格式化提取出的数据
-        final_info = {
-            key: value for key, value in extracted_data.items() if value and any(v is not None for v in value.values())
-        }
-        log.info(f"📊 LLM最终提取结果: {list(final_info.keys())}")
         return final_info

 import json
+import re
 from utils.logger import log
 from .ai_model import AIModel
         self.prompt_template = self._build_prompt_template()
     def _build_prompt_template(self) -> str:
+        # --- 重点更新：使用更严格的指令和结构化示例 ---
+        return """你的任务是且仅是作为文本解析器。
+严格分析用户输入，并以一个纯净、无注释的JSON对象格式返回。
+**核心规则:**
+1.  **绝对禁止** 在JSON之外添加任何文本、注释、解释或Markdown标记。你的输出必须从 `{` 开始，到 `}` 结束。
+2.  **必须严格遵守** 下方定义的嵌套JSON结构。不要创造新的键，也不要改变层级。
+3.  如果信息未提供，对应的键值必须为 `null`，而不是省略该键。
+4.  如果用户输入与旅行无关（如 "你好"），必须返回一个空的JSON对象: `{{}}`。
+**强制JSON输出结构:**
 ```json
 {{
   "destination": {{
 ```
 **示例:**
+- 用户输入: "我想去柏林玩3天"
+- 你的输出:
 ```json
 {{
   "destination": {{
+    "name": "柏林"
   }},
   "duration": {{
+    "days": 3
   }},
   "budget": {{
+    "type": null,
     "amount": null,
     "currency": null
   }}
 ```
 ---
 **用户输入:**
+`{user_message}`
+**你的输出 (必须是纯JSON):**
 """
     def extract(self, message: str) -> dict:
         log.info(f"🧠 使用LLM开始提取信息，消息: '{message}'")
         prompt = self.prompt_template.format(user_message=message)
         raw_response = self.ai_model.generate(prompt)
         if not raw_response:
             log.error("❌ LLM模型没有返回任何内容。")
             return {}
+        json_str = ""
         try:
             match = re.search(r'```json\s*(\{.*?\})\s*```', raw_response, re.DOTALL)
             if match:
                 json_str = match.group(1)
             else:
                 start_index = raw_response.find('{')
                 end_index = raw_response.rfind('}')
                 if start_index != -1 and end_index != -1 and end_index > start_index:
             log.error(f"❌ 无法解析LLM返回的JSON: '{raw_response}'. 错误: {e}")
             return {}
+        # --- 重点更新：使用更健壮、更安全的逻辑来清理数据 ---
+        final_info = {}
+        # 安全地处理 'destination'
+        destination_data = extracted_data.get("destination")
+        if isinstance(destination_data, dict) and destination_data.get("name"):
+            final_info["destination"] = {"name": destination_data["name"]}
+        # 安全地处理 'duration'
+        duration_data = extracted_data.get("duration")
+        if isinstance(duration_data, dict) and duration_data.get("days"):
+            try:
+                final_info["duration"] = {"days": int(duration_data["days"])}
+            except (ValueError, TypeError):
+                log.warning(f"⚠️ 无法将duration days '{duration_data.get('days')}' 转换为整数。")
+        # 安全地处理 'budget'
+        budget_data = extracted_data.get("budget")
+        if isinstance(budget_data, dict):
+            # 只要budget对象里有任何非null的值，就把它加进来
+            if any(v is not None for v in budget_data.values()):
+                final_info["budget"] = budget_data
+        log.info(f"📊 LLM最终提取结果 (安全处理后): {list(final_info.keys())}")
         return final_info

modules/travel_assistant.py CHANGED Viewed

@@ -46,10 +46,18 @@ class TravelAssistant:
             log.info(f"✅ 设置persona: {persona_info['name']}")
         # 3. 意图识别 (前置守卫)
-        intent = self.intent_classifier.classify(message)
-        log.info(f"🔍 用户意图识别结果: '{intent}'")
         extracted_info = {}
         # 4.: 根据意图进行逻辑分流
         if intent == 'PROVIDING_TRAVEL_INFO':

             log.info(f"✅ 设置persona: {persona_info['name']}")
         # 3. 意图识别 (前置守卫)
+        raw_intent = self.intent_classifier.classify(message)
+        log.info(f"🔍 用户意图识别结果: '{raw_intent}'")
         extracted_info = {}
+        intent = 'OTHER'
+        if 'PROVIDING_TRAVEL_INFO' in raw_intent:
+            intent = 'PROVIDING_TRAVEL_INFO'
+        elif 'GREETING' in raw_intent:
+            intent = 'GREETING'
+        log.info(f"✅ 解析后用户意图: '{intent}'")
         # 4.: 根据意图进行逻辑分流
         if intent == 'PROVIDING_TRAVEL_INFO':