Eliot0110 commited on
Commit
e86b23a
·
1 Parent(s): 2b453dd

improve: context management

Browse files
Files changed (1) hide show
  1. modules/info_extractor.py +27 -9
modules/info_extractor.py CHANGED
@@ -3,6 +3,7 @@ import re
3
  from utils.logger import log
4
  import jieba
5
  from typing import List, Tuple
 
6
 
7
  class InfoExtractor:
8
  def __init__(self):
@@ -285,17 +286,23 @@ class InfoExtractor:
285
  'weekend': 2, 'week': 7, 'month': 30, 'vacation': 7, 'holiday': 3
286
  }
287
 
288
- def extract(self, user_message: str) -> dict:
289
 
290
  # 输入验证
291
  if not user_message or not isinstance(user_message, str):
292
  log.warning("⚠️ 收到无效的用户消息")
293
- return {}
294
 
295
  if len(user_message.strip()) < 2:
296
  log.warning("⚠️ 用户消息过短,跳过信息提取")
297
- return {}
298
 
 
 
 
 
 
 
299
  log.info(f"🛠️ 使用分词策略提取信息:'{user_message[:50]}...'")
300
 
301
  # 1. 智能分词
@@ -303,25 +310,36 @@ class InfoExtractor:
303
  log.info(f"📝 分词结果:{tokens}")
304
 
305
  # 2. 基于分词进行信息提取
306
- result = {}
307
 
308
  # 提取目的地信息
309
  destination_info = self._extract_destination_from_tokens(tokens)
310
  if destination_info:
311
- result["destination"] = destination_info
312
 
313
  # 提取时长信息
314
  duration_info = self._extract_duration_from_tokens(tokens)
315
  if duration_info:
316
- result["duration"] = duration_info
317
 
318
  # 提取预算信息
319
  budget_info = self._extract_budget_from_tokens(tokens)
320
  if budget_info:
321
- result["budget"] = budget_info
322
 
323
- log.info(f"📊 分词提取结果: {result}")
324
- return result
 
 
 
 
 
 
 
 
 
 
 
325
 
326
  def _tokenize_message(self, text: str) -> list:
327
  """智能分词,支持中英文混合"""
 
3
  from utils.logger import log
4
  import jieba
5
  from typing import List, Tuple
6
+ import copy
7
 
8
  class InfoExtractor:
9
  def __init__(self):
 
286
  'weekend': 2, 'week': 7, 'month': 30, 'vacation': 7, 'holiday': 3
287
  }
288
 
289
+ def extract(self, user_message: str,existing_info: dict = None) -> dict:
290
 
291
  # 输入验证
292
  if not user_message or not isinstance(user_message, str):
293
  log.warning("⚠️ 收到无效的用户消息")
294
+ return existing_info or {}
295
 
296
  if len(user_message.strip()) < 2:
297
  log.warning("⚠️ 用户消息过短,跳过信息提取")
298
+ return existing_info or {}
299
 
300
+ if existing_info:
301
+ log.info(f"接收到上下文信息,将在此基础上更新: {existing_info}")
302
+ result = copy.deepcopy(existing_info)
303
+ else:
304
+ result = {}
305
+
306
  log.info(f"🛠️ 使用分词策略提取信息:'{user_message[:50]}...'")
307
 
308
  # 1. 智能分词
 
310
  log.info(f"📝 分词结果:{tokens}")
311
 
312
  # 2. 基于分词进行信息提取
313
+ newly_extracted_info = {}
314
 
315
  # 提取目的地信息
316
  destination_info = self._extract_destination_from_tokens(tokens)
317
  if destination_info:
318
+ newly_extracted_info["destination"] = destination_info
319
 
320
  # 提取时长信息
321
  duration_info = self._extract_duration_from_tokens(tokens)
322
  if duration_info:
323
+ newly_extracted_info["duration"] = duration_info
324
 
325
  # 提取预算信息
326
  budget_info = self._extract_budget_from_tokens(tokens)
327
  if budget_info:
328
+ newly_extracted_info["budget"] = budget_info
329
 
330
+ log.info(f"📊 分词提取结果: {newly_extracted_info}")
331
+ return newly_extracted_info
332
+
333
+ def _merge_info(self, new_info: dict, existing_info: dict) -> dict:
334
+
335
+ for key, value in new_info.items():
336
+ # 如果新旧信息中同一个键的值都是字典,则递归深入合并
337
+ if isinstance(value, dict) and key in existing_info and isinstance(existing_info[key], dict):
338
+ self._merge_info(value, existing_info[key])
339
+ else:
340
+ # 否则,直接用新信息覆盖或添加
341
+ existing_info[key] = value
342
+ return existing_info
343
 
344
  def _tokenize_message(self, text: str) -> list:
345
  """智能分词,支持中英文混合"""