Eliot0110 commited on
Commit
7f130dd
·
1 Parent(s): e86b23a

improve: budget mapping

Browse files
modules/info_extractor.py CHANGED
@@ -754,14 +754,14 @@ class InfoExtractor:
754
  return result
755
 
756
  def _extract_budget_from_tokens(self, tokens: list) -> dict:
757
- """从tokens中提取预算信息"""
758
- result = {}
759
 
760
- # 1. 查找金额
 
 
761
  for i, token in enumerate(tokens):
762
  amount = None
763
- currency = "RMB" # 默认货币
764
-
765
  # 处理包含货币的token "2000欧", "5000元"
766
  currency_patterns = [
767
  (r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
@@ -783,27 +783,26 @@ class InfoExtractor:
783
  (r'(\d+(?:\.\d+)?)gbp', 'GBP'),
784
  (r'(\d+(?:\.\d+)?)chf', 'CHF'),
785
  ]
786
-
787
  for pattern, curr in currency_patterns:
788
  match = re.search(pattern, token.lower())
789
  if match:
790
  amount = float(match.group(1))
791
  currency = curr
792
  break
793
-
794
  # 处理纯数字token(需要查看上下文)
795
- if not amount and re.match(r'^\d+(?:\.\d+)?$', token):
796
  number = float(token)
797
-
798
  # 检查前面的token是否有预算相关词汇
799
  budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
800
  has_budget_context = False
801
-
802
  if i > 0 and tokens[i-1] in budget_indicators:
803
  has_budget_context = True
804
  elif i > 1 and tokens[i-2] in budget_indicators:
805
  has_budget_context = True
806
-
807
  # 检查后面是否有货币单位
808
  if i < len(tokens) - 1:
809
  next_token = tokens[i + 1].lower()
@@ -813,28 +812,16 @@ class InfoExtractor:
813
  '瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
814
  'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
815
  }
816
-
817
  if next_token in currency_units:
818
  amount = number
819
  currency = currency_units[next_token]
820
- has_budget_context = True
821
-
822
- # 如果有预算上下文但没有明确货币单位,根据数字大小推断
823
- if has_budget_context and not amount:
824
- if number < 100: # 可能是欧元或美元
825
- # 查看是否有欧洲城市上下文
826
- has_european_context = any(self._normalize_city_name(t) for t in tokens)
827
- if has_european_context:
828
- currency = 'EUR'
829
- else:
830
- currency = 'USD'
831
- else:
832
- currency = 'RMB' # 大数字更可能是人民币
833
- amount = number
834
 
 
 
 
835
  # 处理万、千等单位
836
  if amount:
837
- # 检查是否有万、千修饰符
838
  if i > 0:
839
  prev_token = tokens[i-1]
840
  if '万' in prev_token or 'w' in prev_token.lower():
@@ -847,96 +834,50 @@ class InfoExtractor:
847
  amount *= 10000
848
  elif '千' in next_token or 'k' in next_token.lower():
849
  amount *= 1000
850
-
851
  if amount > 0:
852
  result["amount"] = int(amount)
853
- result["currency"] = currency
 
854
  break
855
-
856
- # 2. 查找预算类型
857
  budget_type_keywords = {
858
- 'economy': [
859
- '经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '青年',
860
- '预算有限', '钱不多', '不贵', '划算', '性价比', '背包客',
861
- '简单', '基础', '低成本', '节约', 'budget', 'cheap', 'economy', 'affordable'
862
- ],
863
- 'comfortable': [
864
- '舒适', '中等', '适中', '一般', '标准', '普通', '正常', '常规',
865
- '中档', '中级', '合理', '平均', '中间档次', 'comfortable', 'standard', 'moderate'
866
- ],
867
- 'luxury': [
868
- '豪华', '奢华', '高端', '顶级', '精品', '奢侈', '贵族', '皇家',
869
- '贵一点', '不差钱', '任性', '土豪', '有钱', '五星', 'VIP',
870
- 'luxury', 'premium', 'high-end', 'expensive', 'fancy'
871
- ]
872
  }
873
-
 
 
 
874
  for token in tokens:
875
  token_lower = token.lower()
876
  for budget_type, keywords in budget_type_keywords.items():
877
- if any(keyword in token_lower for keyword in keywords):
878
- result["type"] = budget_type
879
-
880
- # 找到第一个匹配的关键词作为描述
881
- for keyword in keywords:
882
- if keyword in token_lower:
883
- result["description"] = keyword if len(keyword) > 2 else token
884
- break
885
- break
886
- if result.get("type"):
887
- break
888
-
889
- # 3. 如果有金额但没有类型,根据金额推断类型
890
- if result.get("amount") and not result.get("type"):
891
- amount = result["amount"]
892
- currency = result.get("currency", "RMB")
893
-
894
- # 根据欧洲旅行成本设置阈值
895
- if currency == "EUR":
896
- if amount < 1500: # 总预算
897
- result["type"] = "economy"
898
- result["description"] = "经济预算"
899
- elif amount < 4000:
900
- result["type"] = "comfortable"
901
- result["description"] = "舒适预算"
902
- else:
903
- result["type"] = "luxury"
904
- result["description"] = "豪华预算"
905
- elif currency == "USD":
906
- if amount < 2000:
907
- result["type"] = "economy"
908
- result["description"] = "经济预算"
909
- elif amount < 5000:
910
- result["type"] = "comfortable"
911
- result["description"] = "舒适预算"
912
- else:
913
- result["type"] = "luxury"
914
- result["description"] = "豪华预算"
915
- elif currency == "RMB":
916
- if amount < 8000:
917
- result["type"] = "economy"
918
- result["description"] = "经济预算"
919
- elif amount < 20000:
920
- result["type"] = "comfortable"
921
- result["description"] = "舒适预算"
922
- else:
923
- result["type"] = "luxury"
924
- result["description"] = "豪华预算"
925
-
926
- # 4. 处理中文数字金额
927
  chinese_money_mapping = {
928
  '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
929
  '六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
930
  '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
931
  }
932
-
933
  if not result.get("amount"):
934
  for token in tokens:
935
  if token in chinese_money_mapping:
936
  result["amount"] = chinese_money_mapping[token]
937
- result["currency"] = "RMB"
938
  break
939
-
940
  return result
941
 
942
  # 保持向后兼容的验证方法
 
754
  return result
755
 
756
  def _extract_budget_from_tokens(self, tokens: list) -> dict:
 
 
757
 
758
+ result = {}
759
+
760
+ # 1. 查找金额和货币单位(不再有默认值)
761
  for i, token in enumerate(tokens):
762
  amount = None
763
+ currency = None # <--- 修改点:不再预设 "RMB",初始为 None
764
+
765
  # 处理包含货币的token "2000欧", "5000元"
766
  currency_patterns = [
767
  (r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
 
783
  (r'(\d+(?:\.\d+)?)gbp', 'GBP'),
784
  (r'(\d+(?:\.\d+)?)chf', 'CHF'),
785
  ]
786
+
787
  for pattern, curr in currency_patterns:
788
  match = re.search(pattern, token.lower())
789
  if match:
790
  amount = float(match.group(1))
791
  currency = curr
792
  break
793
+
794
  # 处理纯数字token(需要查看上下文)
795
+ if not amount and re.match(r'^\\d+(?:\\.\\d+)?$', token):
796
  number = float(token)
797
+
798
  # 检查前面的token是否有预算相关词汇
799
  budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
800
  has_budget_context = False
 
801
  if i > 0 and tokens[i-1] in budget_indicators:
802
  has_budget_context = True
803
  elif i > 1 and tokens[i-2] in budget_indicators:
804
  has_budget_context = True
805
+
806
  # 检查后面是否有货币单位
807
  if i < len(tokens) - 1:
808
  next_token = tokens[i + 1].lower()
 
812
  '瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
813
  'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
814
  }
 
815
  if next_token in currency_units:
816
  amount = number
817
  currency = currency_units[next_token]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
 
819
+ if has_budget_context and not currency:
820
+ amount = number
821
+
822
  # 处理万、千等单位
823
  if amount:
824
+ # (这部分逻辑保持不变)
825
  if i > 0:
826
  prev_token = tokens[i-1]
827
  if '万' in prev_token or 'w' in prev_token.lower():
 
834
  amount *= 10000
835
  elif '千' in next_token or 'k' in next_token.lower():
836
  amount *= 1000
837
+
838
  if amount > 0:
839
  result["amount"] = int(amount)
840
+ if currency: # 只有当识别到货币时才赋值
841
+ result["currency"] = currency
842
  break
843
+
844
+ # 2. 查找预算类型(此部分逻辑与金额完全无关)
845
  budget_type_keywords = {
846
+ 'economy': ['经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '背包客', '预算有限', '性价比', 'budget', 'cheap'],
847
+ 'comfortable': ['舒适', '中等', '适中', '标准', '普通', '中档', '合理', 'comfortable', 'standard'],
848
+ 'luxury': ['豪华', '奢华', '高端', '顶级', '精品', '不差钱', '任性', '土豪', '五星', 'luxury', 'premium']
 
 
 
 
 
 
 
 
 
 
 
849
  }
850
+
851
+ # 查找最能代表预算类型的关键词
852
+ found_type_keyword = ""
853
+ found_type = ""
854
  for token in tokens:
855
  token_lower = token.lower()
856
  for budget_type, keywords in budget_type_keywords.items():
857
+ for keyword in keywords:
858
+ if keyword in token_lower:
859
+ # 优先选择更长的、更具体的关键词作为描述
860
+ if len(keyword) > len(found_type_keyword):
861
+ found_type_keyword = keyword
862
+ found_type = budget_type
863
+
864
+ if found_type:
865
+ result["type"] = found_type
866
+ result["description"] = found_type_keyword # 使用找到的最匹配的关键词作为描述
867
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
868
  chinese_money_mapping = {
869
  '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
870
  '六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
871
  '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
872
  }
873
+
874
  if not result.get("amount"):
875
  for token in tokens:
876
  if token in chinese_money_mapping:
877
  result["amount"] = chinese_money_mapping[token]
878
+ # 这里同样不设置默认货币
879
  break
880
+
881
  return result
882
 
883
  # 保持向后兼容的验证方法
modules/response_generator.py CHANGED
@@ -80,6 +80,9 @@ class ResponseGenerator:
80
  acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
81
  if acknowledgement:
82
  response_parts.append(acknowledgement)
 
 
 
83
 
84
  # 2. 检查是否需要询问下一个信息
85
  next_question = self._get_dynamic_next_question(session_state)
@@ -108,30 +111,63 @@ class ResponseGenerator:
108
 
109
  def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
110
  """
111
- 【新增实现】根据最新提取的信息,生成一句生动的、非模板化的确认语。
 
112
  """
 
113
  if "destination" in extracted_info and extracted_info["destination"]:
114
- dest_name = extracted_info["destination"]['name']
 
115
 
116
- # 尝试使用您定义的城市特色描述,让回复更精彩
 
 
117
  if dest_name in self.city_descriptions:
118
  feature = random.choice(self.city_descriptions[dest_name])
119
  return f"{dest_name}!一个绝佳的选择,那可是著名的'{feature}'。目的地已为您记录。"
120
  else:
121
- dest_country = extracted_info["destination"][0]['country']
122
- return f"好的,目的地已确认为 {dest_country} {dest_name}!一个充满魅力的地方。"
 
 
 
 
123
 
 
124
  if "duration" in extracted_info and extracted_info["duration"]:
125
- days = extracted_info["duration"]
126
- return f"了解,{days} 天的行程,时间很充裕,可以深度体验了!"
 
127
 
 
128
  if "budget" in extracted_info and extracted_info["budget"]:
129
- amount = extracted_info["budget"]
130
- return f"预算 {amount} 已记录,我会为您规划性价比最高的方案。"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- # 如果没有提取到新信息,返回空字符串
 
 
 
 
 
133
  return ""
134
 
 
135
  def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
136
 
137
  if not session_state.get('destination'):
 
80
  acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
81
  if acknowledgement:
82
  response_parts.append(acknowledgement)
83
+ # 如果确认信息本身已经是一个问题(比如追问货币),就直接返回,避免再问下一个问题
84
+ if acknowledgement.strip().endswith(('?', '?')):
85
+ return " ".join(response_parts)
86
 
87
  # 2. 检查是否需要询问下一个信息
88
  next_question = self._get_dynamic_next_question(session_state)
 
111
 
112
  def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
113
  """
114
+ 【更新版本】根据最新提取的信息,生成一句生动的、非模板化的确认语。
115
+ 此版本已重构,以处理更复杂的预算逻辑,并实现主动询问。
116
  """
117
+ # --- 目的地确认 ---
118
  if "destination" in extracted_info and extracted_info["destination"]:
119
+ dest_info = extracted_info["destination"]
120
+ dest_name = dest_info.get('name')
121
 
122
+ if not dest_name: return "" # 如果没有提取到有效名称,则不作回复
123
+
124
+ # 优先使用预设的城市描述,让回复更生动
125
  if dest_name in self.city_descriptions:
126
  feature = random.choice(self.city_descriptions[dest_name])
127
  return f"{dest_name}!一个绝佳的选择,那可是著名的'{feature}'。目的地已为您记录。"
128
  else:
129
+ # 修复了原代码中从列表获取国家信息的bug
130
+ dest_country = dest_info.get('country')
131
+ if dest_country:
132
+ return f"好的,目的地已确认为 {dest_country} 的 {dest_name}!一个充满魅力的地方。"
133
+ else:
134
+ return f"好的,目的地 {dest_name} 已记录!听起来是个很棒的地方。"
135
 
136
+ # --- 旅行时长确认 ---
137
  if "duration" in extracted_info and extracted_info["duration"]:
138
+ # 使用 .get() 增加代码健壮性
139
+ duration_description = extracted_info["duration"].get('description', '一段美好的时光')
140
+ return f"了解,{duration_description}的行程,时间很充裕,可以深度体验了!"
141
 
142
+ # --- 预算确认(核心修改部分) ---
143
  if "budget" in extracted_info and extracted_info["budget"]:
144
+ budget_dict = extracted_info["budget"]
145
+ amount = budget_dict.get("amount")
146
+ currency = budget_dict.get("currency")
147
+ budget_type_desc = budget_dict.get("description") # 例如:“穷游”
148
+
149
+ # 场景一:用户提供了金额,但没说货币单位 -> 主动追问货币
150
+ if amount and not currency:
151
+ return f"收到,您的预算是 {amount}。请问这是以什么货币计算的呢?(例如:人民币、欧元、美元)"
152
+
153
+ # 场景二:用户提供了完整的金额和货币 -> 优雅地确认
154
+ if amount and currency:
155
+ if budget_type_desc:
156
+ # 例如,用户说:“我预算5000元,想穷游”
157
+ return f"好的,您「{budget_type_desc}」的预算({amount} {currency})已为您记录,我会为您规划性价比最高的方案。"
158
+ else:
159
+ # 例如,用户说:“我预算5000元”
160
+ return f"好的,预算 {amount} {currency} 已为您记录,我会为您规划性价比最高的方案。"
161
 
162
+ # 场景三:用户只提了预算类型,没说金额 -> 只确认风格
163
+ if budget_type_desc and not amount:
164
+ # 例如,用户说:“我这次想穷游”
165
+ return f"了解,您偏爱「{budget_type_desc}」的旅行方式,我会按这个风格为您规划。"
166
+
167
+ # 如果没有提取到任何新信息,返回空字符串
168
  return ""
169
 
170
+
171
  def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
172
 
173
  if not session_state.get('destination'):