Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 6

Commit

e86b23a

1 Parent(s): 2b453dd

improve: context management

Browse files

Files changed (1) hide show

modules/info_extractor.py +27 -9

modules/info_extractor.py CHANGED Viewed

@@ -3,6 +3,7 @@ import re
 from utils.logger import log
 import jieba
 from typing import List, Tuple
 class InfoExtractor:
     def __init__(self):
@@ -285,17 +286,23 @@ class InfoExtractor:
             'weekend': 2, 'week': 7, 'month': 30, 'vacation': 7, 'holiday': 3
         }
-    def extract(self, user_message: str) -> dict:
         # 输入验证
         if not user_message or not isinstance(user_message, str):
             log.warning("⚠️ 收到无效的用户消息")
-            return {}
         if len(user_message.strip()) < 2:
             log.warning("⚠️ 用户消息过短，跳过信息提取")
-            return {}
         log.info(f"🛠️ 使用分词策略提取信息：'{user_message[:50]}...'")
         # 1. 智能分词
@@ -303,25 +310,36 @@ class InfoExtractor:
         log.info(f"📝 分词结果：{tokens}")
         # 2. 基于分词进行信息提取
-        result = {}
         # 提取目的地信息
         destination_info = self._extract_destination_from_tokens(tokens)
         if destination_info:
-            result["destination"] = destination_info
         # 提取时长信息
         duration_info = self._extract_duration_from_tokens(tokens)
         if duration_info:
-            result["duration"] = duration_info
         # 提取预算信息
         budget_info = self._extract_budget_from_tokens(tokens)
         if budget_info:
-            result["budget"] = budget_info
-        log.info(f"📊 分词提取结果: {result}")
-        return result
     def _tokenize_message(self, text: str) -> list:
         """智能分词，支持中英文混合"""

 from utils.logger import log
 import jieba
 from typing import List, Tuple
+import copy
 class InfoExtractor:
     def __init__(self):
             'weekend': 2, 'week': 7, 'month': 30, 'vacation': 7, 'holiday': 3
         }
+    def extract(self, user_message: str,existing_info: dict = None) -> dict:
         # 输入验证
         if not user_message or not isinstance(user_message, str):
             log.warning("⚠️ 收到无效的用户消息")
+            return existing_info or {}
         if len(user_message.strip()) < 2:
             log.warning("⚠️ 用户消息过短，跳过信息提取")
+            return existing_info or {}
+        if existing_info:
+            log.info(f"接收到上下文信息，将在此基础上更新: {existing_info}")
+            result = copy.deepcopy(existing_info)
+        else:
+            result = {}
         log.info(f"🛠️ 使用分词策略提取信息：'{user_message[:50]}...'")
         # 1. 智能分词
         log.info(f"📝 分词结果：{tokens}")
         # 2. 基于分词进行信息提取
+        newly_extracted_info = {}
         # 提取目的地信息
         destination_info = self._extract_destination_from_tokens(tokens)
         if destination_info:
+            newly_extracted_info["destination"] = destination_info
         # 提取时长信息
         duration_info = self._extract_duration_from_tokens(tokens)
         if duration_info:
+            newly_extracted_info["duration"] = duration_info
         # 提取预算信息
         budget_info = self._extract_budget_from_tokens(tokens)
         if budget_info:
+            newly_extracted_info["budget"] = budget_info
+        log.info(f"📊 分词提取结果: {newly_extracted_info}")
+        return newly_extracted_info
+    def _merge_info(self, new_info: dict, existing_info: dict) -> dict:
+        for key, value in new_info.items():
+            # 如果新旧信息中同一个键的值都是字典，则递归深入合并
+            if isinstance(value, dict) and key in existing_info and isinstance(existing_info[key], dict):
+                self._merge_info(value, existing_info[key])
+            else:
+                # 否则，直接用新信息覆盖或添加
+                existing_info[key] = value
+        return existing_info
     def _tokenize_message(self, text: str) -> list:
         """智能分词，支持中英文混合"""