Spaces:
Sleeping
Sleeping
improve: budget mapping
Browse files- modules/info_extractor.py +40 -99
- modules/response_generator.py +46 -10
modules/info_extractor.py
CHANGED
|
@@ -754,14 +754,14 @@ class InfoExtractor:
|
|
| 754 |
return result
|
| 755 |
|
| 756 |
def _extract_budget_from_tokens(self, tokens: list) -> dict:
|
| 757 |
-
"""从tokens中提取预算信息"""
|
| 758 |
-
result = {}
|
| 759 |
|
| 760 |
-
|
|
|
|
|
|
|
| 761 |
for i, token in enumerate(tokens):
|
| 762 |
amount = None
|
| 763 |
-
currency = "RMB"
|
| 764 |
-
|
| 765 |
# 处理包含货币的token "2000欧", "5000元"
|
| 766 |
currency_patterns = [
|
| 767 |
(r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
|
|
@@ -783,27 +783,26 @@ class InfoExtractor:
|
|
| 783 |
(r'(\d+(?:\.\d+)?)gbp', 'GBP'),
|
| 784 |
(r'(\d+(?:\.\d+)?)chf', 'CHF'),
|
| 785 |
]
|
| 786 |
-
|
| 787 |
for pattern, curr in currency_patterns:
|
| 788 |
match = re.search(pattern, token.lower())
|
| 789 |
if match:
|
| 790 |
amount = float(match.group(1))
|
| 791 |
currency = curr
|
| 792 |
break
|
| 793 |
-
|
| 794 |
# 处理纯数字token(需要查看上下文)
|
| 795 |
-
if not amount and re.match(r'
|
| 796 |
number = float(token)
|
| 797 |
-
|
| 798 |
# 检查前面的token是否有预算相关词汇
|
| 799 |
budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
|
| 800 |
has_budget_context = False
|
| 801 |
-
|
| 802 |
if i > 0 and tokens[i-1] in budget_indicators:
|
| 803 |
has_budget_context = True
|
| 804 |
elif i > 1 and tokens[i-2] in budget_indicators:
|
| 805 |
has_budget_context = True
|
| 806 |
-
|
| 807 |
# 检查后面是否有货币单位
|
| 808 |
if i < len(tokens) - 1:
|
| 809 |
next_token = tokens[i + 1].lower()
|
|
@@ -813,28 +812,16 @@ class InfoExtractor:
|
|
| 813 |
'瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
|
| 814 |
'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
|
| 815 |
}
|
| 816 |
-
|
| 817 |
if next_token in currency_units:
|
| 818 |
amount = number
|
| 819 |
currency = currency_units[next_token]
|
| 820 |
-
has_budget_context = True
|
| 821 |
-
|
| 822 |
-
# 如果有预算上下文但没有明确货币单位,根据数字大小推断
|
| 823 |
-
if has_budget_context and not amount:
|
| 824 |
-
if number < 100: # 可能是欧元或美元
|
| 825 |
-
# 查看是否有欧洲城市上下文
|
| 826 |
-
has_european_context = any(self._normalize_city_name(t) for t in tokens)
|
| 827 |
-
if has_european_context:
|
| 828 |
-
currency = 'EUR'
|
| 829 |
-
else:
|
| 830 |
-
currency = 'USD'
|
| 831 |
-
else:
|
| 832 |
-
currency = 'RMB' # 大数字更可能是人民币
|
| 833 |
-
amount = number
|
| 834 |
|
|
|
|
|
|
|
|
|
|
| 835 |
# 处理万、千等单位
|
| 836 |
if amount:
|
| 837 |
-
#
|
| 838 |
if i > 0:
|
| 839 |
prev_token = tokens[i-1]
|
| 840 |
if '万' in prev_token or 'w' in prev_token.lower():
|
|
@@ -847,96 +834,50 @@ class InfoExtractor:
|
|
| 847 |
amount *= 10000
|
| 848 |
elif '千' in next_token or 'k' in next_token.lower():
|
| 849 |
amount *= 1000
|
| 850 |
-
|
| 851 |
if amount > 0:
|
| 852 |
result["amount"] = int(amount)
|
| 853 |
-
|
|
|
|
| 854 |
break
|
| 855 |
-
|
| 856 |
-
# 2.
|
| 857 |
budget_type_keywords = {
|
| 858 |
-
'economy': [
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
'简单', '基础', '低成本', '节约', 'budget', 'cheap', 'economy', 'affordable'
|
| 862 |
-
],
|
| 863 |
-
'comfortable': [
|
| 864 |
-
'舒适', '中等', '适中', '一般', '标准', '普通', '正常', '常规',
|
| 865 |
-
'中档', '中级', '合理', '平均', '中间档次', 'comfortable', 'standard', 'moderate'
|
| 866 |
-
],
|
| 867 |
-
'luxury': [
|
| 868 |
-
'豪华', '奢华', '高端', '顶级', '精品', '奢侈', '贵族', '皇家',
|
| 869 |
-
'贵一点', '不差钱', '任性', '土豪', '有钱', '五星', 'VIP',
|
| 870 |
-
'luxury', 'premium', 'high-end', 'expensive', 'fancy'
|
| 871 |
-
]
|
| 872 |
}
|
| 873 |
-
|
|
|
|
|
|
|
|
|
|
| 874 |
for token in tokens:
|
| 875 |
token_lower = token.lower()
|
| 876 |
for budget_type, keywords in budget_type_keywords.items():
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
# 3. 如果有金额但没有类型,根据金额推断类型
|
| 890 |
-
if result.get("amount") and not result.get("type"):
|
| 891 |
-
amount = result["amount"]
|
| 892 |
-
currency = result.get("currency", "RMB")
|
| 893 |
-
|
| 894 |
-
# 根据欧洲旅行成本设置阈值
|
| 895 |
-
if currency == "EUR":
|
| 896 |
-
if amount < 1500: # 总预算
|
| 897 |
-
result["type"] = "economy"
|
| 898 |
-
result["description"] = "经济预算"
|
| 899 |
-
elif amount < 4000:
|
| 900 |
-
result["type"] = "comfortable"
|
| 901 |
-
result["description"] = "舒适预算"
|
| 902 |
-
else:
|
| 903 |
-
result["type"] = "luxury"
|
| 904 |
-
result["description"] = "豪华预算"
|
| 905 |
-
elif currency == "USD":
|
| 906 |
-
if amount < 2000:
|
| 907 |
-
result["type"] = "economy"
|
| 908 |
-
result["description"] = "经济预算"
|
| 909 |
-
elif amount < 5000:
|
| 910 |
-
result["type"] = "comfortable"
|
| 911 |
-
result["description"] = "舒适预算"
|
| 912 |
-
else:
|
| 913 |
-
result["type"] = "luxury"
|
| 914 |
-
result["description"] = "豪华预算"
|
| 915 |
-
elif currency == "RMB":
|
| 916 |
-
if amount < 8000:
|
| 917 |
-
result["type"] = "economy"
|
| 918 |
-
result["description"] = "经济预算"
|
| 919 |
-
elif amount < 20000:
|
| 920 |
-
result["type"] = "comfortable"
|
| 921 |
-
result["description"] = "舒适预算"
|
| 922 |
-
else:
|
| 923 |
-
result["type"] = "luxury"
|
| 924 |
-
result["description"] = "豪华预算"
|
| 925 |
-
|
| 926 |
-
# 4. 处理中文数字金额
|
| 927 |
chinese_money_mapping = {
|
| 928 |
'一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
|
| 929 |
'六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
|
| 930 |
'一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
|
| 931 |
}
|
| 932 |
-
|
| 933 |
if not result.get("amount"):
|
| 934 |
for token in tokens:
|
| 935 |
if token in chinese_money_mapping:
|
| 936 |
result["amount"] = chinese_money_mapping[token]
|
| 937 |
-
|
| 938 |
break
|
| 939 |
-
|
| 940 |
return result
|
| 941 |
|
| 942 |
# 保持向后兼容的验证方法
|
|
|
|
| 754 |
return result
|
| 755 |
|
| 756 |
def _extract_budget_from_tokens(self, tokens: list) -> dict:
|
|
|
|
|
|
|
| 757 |
|
| 758 |
+
result = {}
|
| 759 |
+
|
| 760 |
+
# 1. 查找金额和货币单位(不再有默认值)
|
| 761 |
for i, token in enumerate(tokens):
|
| 762 |
amount = None
|
| 763 |
+
currency = None # <--- 修改点:不再预设 "RMB",初始为 None
|
| 764 |
+
|
| 765 |
# 处理包含货币的token "2000欧", "5000元"
|
| 766 |
currency_patterns = [
|
| 767 |
(r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
|
|
|
|
| 783 |
(r'(\d+(?:\.\d+)?)gbp', 'GBP'),
|
| 784 |
(r'(\d+(?:\.\d+)?)chf', 'CHF'),
|
| 785 |
]
|
| 786 |
+
|
| 787 |
for pattern, curr in currency_patterns:
|
| 788 |
match = re.search(pattern, token.lower())
|
| 789 |
if match:
|
| 790 |
amount = float(match.group(1))
|
| 791 |
currency = curr
|
| 792 |
break
|
| 793 |
+
|
| 794 |
# 处理纯数字token(需要查看上下文)
|
| 795 |
+
if not amount and re.match(r'^\\d+(?:\\.\\d+)?$', token):
|
| 796 |
number = float(token)
|
| 797 |
+
|
| 798 |
# 检查前面的token是否有预算相关词汇
|
| 799 |
budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
|
| 800 |
has_budget_context = False
|
|
|
|
| 801 |
if i > 0 and tokens[i-1] in budget_indicators:
|
| 802 |
has_budget_context = True
|
| 803 |
elif i > 1 and tokens[i-2] in budget_indicators:
|
| 804 |
has_budget_context = True
|
| 805 |
+
|
| 806 |
# 检查后面是否有货币单位
|
| 807 |
if i < len(tokens) - 1:
|
| 808 |
next_token = tokens[i + 1].lower()
|
|
|
|
| 812 |
'瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
|
| 813 |
'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
|
| 814 |
}
|
|
|
|
| 815 |
if next_token in currency_units:
|
| 816 |
amount = number
|
| 817 |
currency = currency_units[next_token]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
|
| 819 |
+
if has_budget_context and not currency:
|
| 820 |
+
amount = number
|
| 821 |
+
|
| 822 |
# 处理万、千等单位
|
| 823 |
if amount:
|
| 824 |
+
# (这部分逻辑保持不变)
|
| 825 |
if i > 0:
|
| 826 |
prev_token = tokens[i-1]
|
| 827 |
if '万' in prev_token or 'w' in prev_token.lower():
|
|
|
|
| 834 |
amount *= 10000
|
| 835 |
elif '千' in next_token or 'k' in next_token.lower():
|
| 836 |
amount *= 1000
|
| 837 |
+
|
| 838 |
if amount > 0:
|
| 839 |
result["amount"] = int(amount)
|
| 840 |
+
if currency: # 只有当识别到货币时才赋值
|
| 841 |
+
result["currency"] = currency
|
| 842 |
break
|
| 843 |
+
|
| 844 |
+
# 2. 查找预算类型(此部分逻辑与金额完全无关)
|
| 845 |
budget_type_keywords = {
|
| 846 |
+
'economy': ['经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '背包客', '预算有限', '性价比', 'budget', 'cheap'],
|
| 847 |
+
'comfortable': ['舒适', '中等', '适中', '标准', '普通', '中档', '合理', 'comfortable', 'standard'],
|
| 848 |
+
'luxury': ['豪华', '奢华', '高端', '顶级', '精品', '不差钱', '任性', '土豪', '五星', 'luxury', 'premium']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 849 |
}
|
| 850 |
+
|
| 851 |
+
# 查找最能代表预算类型的关键词
|
| 852 |
+
found_type_keyword = ""
|
| 853 |
+
found_type = ""
|
| 854 |
for token in tokens:
|
| 855 |
token_lower = token.lower()
|
| 856 |
for budget_type, keywords in budget_type_keywords.items():
|
| 857 |
+
for keyword in keywords:
|
| 858 |
+
if keyword in token_lower:
|
| 859 |
+
# 优先选择更长的、更具体的关键词作为描述
|
| 860 |
+
if len(keyword) > len(found_type_keyword):
|
| 861 |
+
found_type_keyword = keyword
|
| 862 |
+
found_type = budget_type
|
| 863 |
+
|
| 864 |
+
if found_type:
|
| 865 |
+
result["type"] = found_type
|
| 866 |
+
result["description"] = found_type_keyword # 使用找到的最匹配的关键词作为描述
|
| 867 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 868 |
chinese_money_mapping = {
|
| 869 |
'一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
|
| 870 |
'六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
|
| 871 |
'一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
|
| 872 |
}
|
| 873 |
+
|
| 874 |
if not result.get("amount"):
|
| 875 |
for token in tokens:
|
| 876 |
if token in chinese_money_mapping:
|
| 877 |
result["amount"] = chinese_money_mapping[token]
|
| 878 |
+
# 这里同样不设置默认货币
|
| 879 |
break
|
| 880 |
+
|
| 881 |
return result
|
| 882 |
|
| 883 |
# 保持向后兼容的验证方法
|
modules/response_generator.py
CHANGED
|
@@ -80,6 +80,9 @@ class ResponseGenerator:
|
|
| 80 |
acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
|
| 81 |
if acknowledgement:
|
| 82 |
response_parts.append(acknowledgement)
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
# 2. 检查是否需要询问下一个信息
|
| 85 |
next_question = self._get_dynamic_next_question(session_state)
|
|
@@ -108,30 +111,63 @@ class ResponseGenerator:
|
|
| 108 |
|
| 109 |
def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
|
| 110 |
"""
|
| 111 |
-
|
|
|
|
| 112 |
"""
|
|
|
|
| 113 |
if "destination" in extracted_info and extracted_info["destination"]:
|
| 114 |
-
|
|
|
|
| 115 |
|
| 116 |
-
#
|
|
|
|
|
|
|
| 117 |
if dest_name in self.city_descriptions:
|
| 118 |
feature = random.choice(self.city_descriptions[dest_name])
|
| 119 |
return f"{dest_name}!一个绝佳的选择,那可是著名的'{feature}'。目的地已为您记录。"
|
| 120 |
else:
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
|
|
|
| 124 |
if "duration" in extracted_info and extracted_info["duration"]:
|
| 125 |
-
|
| 126 |
-
|
|
|
|
| 127 |
|
|
|
|
| 128 |
if "budget" in extracted_info and extracted_info["budget"]:
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
return ""
|
| 134 |
|
|
|
|
| 135 |
def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
|
| 136 |
|
| 137 |
if not session_state.get('destination'):
|
|
|
|
| 80 |
acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
|
| 81 |
if acknowledgement:
|
| 82 |
response_parts.append(acknowledgement)
|
| 83 |
+
# 如果确认信息本身已经是一个问题(比如追问货币),就直接返回,避免再问下一个问题
|
| 84 |
+
if acknowledgement.strip().endswith(('?', '?')):
|
| 85 |
+
return " ".join(response_parts)
|
| 86 |
|
| 87 |
# 2. 检查是否需要询问下一个信息
|
| 88 |
next_question = self._get_dynamic_next_question(session_state)
|
|
|
|
| 111 |
|
| 112 |
def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
|
| 113 |
"""
|
| 114 |
+
【更新版本】根据最新提取的信息,生成一句生动的、非模板化的确认语。
|
| 115 |
+
此版本已重构,以处理更复杂的预算逻辑,并实现主动询问。
|
| 116 |
"""
|
| 117 |
+
# --- 目的地确认 ---
|
| 118 |
if "destination" in extracted_info and extracted_info["destination"]:
|
| 119 |
+
dest_info = extracted_info["destination"]
|
| 120 |
+
dest_name = dest_info.get('name')
|
| 121 |
|
| 122 |
+
if not dest_name: return "" # 如果没有提取到有效名称,则不作回复
|
| 123 |
+
|
| 124 |
+
# 优先使用预设的城市描述,让回复更生动
|
| 125 |
if dest_name in self.city_descriptions:
|
| 126 |
feature = random.choice(self.city_descriptions[dest_name])
|
| 127 |
return f"{dest_name}!一个绝佳的选择,那可是著名的'{feature}'。目的地已为您记录。"
|
| 128 |
else:
|
| 129 |
+
# 修复了原代码中从列表获取国家信息的bug
|
| 130 |
+
dest_country = dest_info.get('country')
|
| 131 |
+
if dest_country:
|
| 132 |
+
return f"好的,目的地已确认为 {dest_country} 的 {dest_name}!一个充满魅力的地方。"
|
| 133 |
+
else:
|
| 134 |
+
return f"好的,目的地 {dest_name} 已记录!听起来是个很棒的地方。"
|
| 135 |
|
| 136 |
+
# --- 旅行时长确认 ---
|
| 137 |
if "duration" in extracted_info and extracted_info["duration"]:
|
| 138 |
+
# 使用 .get() 增加代码健壮性
|
| 139 |
+
duration_description = extracted_info["duration"].get('description', '一段美好的时光')
|
| 140 |
+
return f"了解,{duration_description}的行程,时间很充裕,可以深度体验了!"
|
| 141 |
|
| 142 |
+
# --- 预算确认(核心修改部分) ---
|
| 143 |
if "budget" in extracted_info and extracted_info["budget"]:
|
| 144 |
+
budget_dict = extracted_info["budget"]
|
| 145 |
+
amount = budget_dict.get("amount")
|
| 146 |
+
currency = budget_dict.get("currency")
|
| 147 |
+
budget_type_desc = budget_dict.get("description") # 例如:“穷游”
|
| 148 |
+
|
| 149 |
+
# 场景一:用户提供了金额,但没说货币单位 -> 主动追问货币
|
| 150 |
+
if amount and not currency:
|
| 151 |
+
return f"收到,您的预算是 {amount}。请问这是以什么货币计算的呢?(例如:人民币、欧元、美元)"
|
| 152 |
+
|
| 153 |
+
# 场景二:用户提供了完整的金额和货币 -> 优雅地确认
|
| 154 |
+
if amount and currency:
|
| 155 |
+
if budget_type_desc:
|
| 156 |
+
# 例如,用户说:“我预算5000元,想穷游”
|
| 157 |
+
return f"好的,您「{budget_type_desc}」的预算({amount} {currency})已为您记录,我会为您规划性价比最高的方案。"
|
| 158 |
+
else:
|
| 159 |
+
# 例如,用户说:“我预算5000元”
|
| 160 |
+
return f"好的,预算 {amount} {currency} 已为您记录,我会为您规划性价比最高的方案。"
|
| 161 |
|
| 162 |
+
# 场景三:用户只提了预算类型,没说金额 -> 只确认风格
|
| 163 |
+
if budget_type_desc and not amount:
|
| 164 |
+
# 例如,用户说:“我这次想穷游”
|
| 165 |
+
return f"了解,您偏爱「{budget_type_desc}」的旅行方式,我会按这个风格为您规划。"
|
| 166 |
+
|
| 167 |
+
# 如果没有提取到任何新信息,返回空字符串
|
| 168 |
return ""
|
| 169 |
|
| 170 |
+
|
| 171 |
def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
|
| 172 |
|
| 173 |
if not session_state.get('destination'):
|