Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 6

Commit

7f130dd

1 Parent(s): e86b23a

improve: budget mapping

Browse files

Files changed (2) hide show

modules/info_extractor.py +40 -99
modules/response_generator.py +46 -10

modules/info_extractor.py CHANGED Viewed

@@ -754,14 +754,14 @@ class InfoExtractor:
         return result
     def _extract_budget_from_tokens(self, tokens: list) -> dict:
-        """从tokens中提取预算信息"""
-        result = {}
-        # 1. 查找金额
         for i, token in enumerate(tokens):
             amount = None
-            currency = "RMB"  # 默认货币
             # 处理包含货币的token "2000欧", "5000元"
             currency_patterns = [
                 (r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
@@ -783,27 +783,26 @@ class InfoExtractor:
                 (r'(\d+(?:\.\d+)?)gbp', 'GBP'),
                 (r'(\d+(?:\.\d+)?)chf', 'CHF'),
             ]
             for pattern, curr in currency_patterns:
                 match = re.search(pattern, token.lower())
                 if match:
                     amount = float(match.group(1))
                     currency = curr
                     break
             # 处理纯数字token（需要查看上下文）
-            if not amount and re.match(r'^\d+(?:\.\d+)?$', token):
                 number = float(token)
                 # 检查前面的token是否有预算相关词汇
                 budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
                 has_budget_context = False
                 if i > 0 and tokens[i-1] in budget_indicators:
                     has_budget_context = True
                 elif i > 1 and tokens[i-2] in budget_indicators:
                     has_budget_context = True
                 # 检查后面是否有货币单位
                 if i < len(tokens) - 1:
                     next_token = tokens[i + 1].lower()
@@ -813,28 +812,16 @@ class InfoExtractor:
                         '瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
                         'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
                     }
                     if next_token in currency_units:
                         amount = number
                         currency = currency_units[next_token]
-                        has_budget_context = True
-                # 如果有预算上下文但没有明确货币单位，根据数字大小推断
-                if has_budget_context and not amount:
-                    if number < 100:  # 可能是欧元或美元
-                        # 查看是否有欧洲城市上下文
-                        has_european_context = any(self._normalize_city_name(t) for t in tokens)
-                        if has_european_context:
-                            currency = 'EUR'
-                        else:
-                            currency = 'USD'
-                    else:
-                        currency = 'RMB'  # 大数字更可能是人民币
-                    amount = number
             # 处理万、千等单位
             if amount:
-                # 检查是否有万、千修饰符
                 if i > 0:
                     prev_token = tokens[i-1]
                     if '万' in prev_token or 'w' in prev_token.lower():
@@ -847,96 +834,50 @@ class InfoExtractor:
                         amount *= 10000
                     elif '千' in next_token or 'k' in next_token.lower():
                         amount *= 1000
                 if amount > 0:
                     result["amount"] = int(amount)
-                    result["currency"] = currency
                     break
-        # 2. 查找预算类型
         budget_type_keywords = {
-            'economy': [
-                '经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '青年',
-                '预算有限', '钱不多', '不贵', '划算', '性价比', '背包客',
-                '简单', '基础', '低成本', '节约', 'budget', 'cheap', 'economy', 'affordable'
-            ],
-            'comfortable': [
-                '舒适', '中等', '适中', '一般', '标准', '普通', '正常', '常规',
-                '中档', '中级', '合理', '平均', '中间档次', 'comfortable', 'standard', 'moderate'
-            ],
-            'luxury': [
-                '豪华', '奢华', '高端', '顶级', '精品', '奢侈', '贵族', '皇家',
-                '贵一点', '不差钱', '任性', '土豪', '有钱', '五星', 'VIP',
-                'luxury', 'premium', 'high-end', 'expensive', 'fancy'
-            ]
         }
         for token in tokens:
             token_lower = token.lower()
             for budget_type, keywords in budget_type_keywords.items():
-                if any(keyword in token_lower for keyword in keywords):
-                    result["type"] = budget_type
-                    # 找到第一个匹配的关键词作为描述
-                    for keyword in keywords:
-                        if keyword in token_lower:
-                            result["description"] = keyword if len(keyword) > 2 else token
-                            break
-                    break
-            if result.get("type"):
-                break
-        # 3. 如果有金额但没有类型，根据金额推断类型
-        if result.get("amount") and not result.get("type"):
-            amount = result["amount"]
-            currency = result.get("currency", "RMB")
-            # 根据欧洲旅行成本设置阈值
-            if currency == "EUR":
-                if amount < 1500:  # 总预算
-                    result["type"] = "economy"
-                    result["description"] = "经济预算"
-                elif amount < 4000:
-                    result["type"] = "comfortable"
-                    result["description"] = "舒适预算"
-                else:
-                    result["type"] = "luxury"
-                    result["description"] = "豪华预算"
-            elif currency == "USD":
-                if amount < 2000:
-                    result["type"] = "economy"
-                    result["description"] = "经济预算"
-                elif amount < 5000:
-                    result["type"] = "comfortable"
-                    result["description"] = "舒适预算"
-                else:
-                    result["type"] = "luxury"
-                    result["description"] = "豪华预算"
-            elif currency == "RMB":
-                if amount < 8000:
-                    result["type"] = "economy"
-                    result["description"] = "经济预算"
-                elif amount < 20000:
-                    result["type"] = "comfortable"
-                    result["description"] = "舒适预算"
-                else:
-                    result["type"] = "luxury"
-                    result["description"] = "豪华预算"
-        # 4. 处理中文数字金额
         chinese_money_mapping = {
             '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
             '六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
             '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
         }
         if not result.get("amount"):
             for token in tokens:
                 if token in chinese_money_mapping:
                     result["amount"] = chinese_money_mapping[token]
-                    result["currency"] = "RMB"
                     break
         return result
     # 保持向后兼容的验证方法

         return result
     def _extract_budget_from_tokens(self, tokens: list) -> dict:
+        result = {}
+        # 1. 查找金额和货币单位（不再有默认值）
         for i, token in enumerate(tokens):
             amount = None
+            currency = None  # <--- 修改点：不再预设 "RMB"，初始为 None
             # 处理包含货币的token "2000欧", "5000元"
             currency_patterns = [
                 (r'(\d+(?:\.\d+)?)欧(?:元)?', 'EUR'),
                 (r'(\d+(?:\.\d+)?)gbp', 'GBP'),
                 (r'(\d+(?:\.\d+)?)chf', 'CHF'),
             ]
             for pattern, curr in currency_patterns:
                 match = re.search(pattern, token.lower())
                 if match:
                     amount = float(match.group(1))
                     currency = curr
                     break
             # 处理纯数字token（需要查看上下文）
+            if not amount and re.match(r'^\\d+(?:\\.\\d+)?$', token):
                 number = float(token)
                 # 检查前面的token是否有预算相关词汇
                 budget_indicators = ['预算', '花费', '费用', '成本', '开销', '支出', '总共', '一共', 'budget', 'cost', 'spend']
                 has_budget_context = False
                 if i > 0 and tokens[i-1] in budget_indicators:
                     has_budget_context = True
                 elif i > 1 and tokens[i-2] in budget_indicators:
                     has_budget_context = True
                 # 检查后面是否有货币单位
                 if i < len(tokens) - 1:
                     next_token = tokens[i + 1].lower()
                         '瑞郎': 'CHF', '日元': 'JPY', '韩元': 'KRW',
                         'rmb': 'RMB', 'usd': 'USD', 'eur': 'EUR', 'gbp': 'GBP', 'chf': 'CHF'
                     }
                     if next_token in currency_units:
                         amount = number
                         currency = currency_units[next_token]
+                if has_budget_context and not currency:
+                    amount = number
             # 处理万、千等单位
             if amount:
+                # (这部分逻辑保持不变)
                 if i > 0:
                     prev_token = tokens[i-1]
                     if '万' in prev_token or 'w' in prev_token.lower():
                         amount *= 10000
                     elif '千' in next_token or 'k' in next_token.lower():
                         amount *= 1000
                 if amount > 0:
                     result["amount"] = int(amount)
+                    if currency: # 只有当识别到货币时才赋值
+                        result["currency"] = currency
                     break
+        # 2. 查找预算类型（此部分逻辑与金额完全无关）
         budget_type_keywords = {
+            'economy': ['经济', '便宜', '省钱', '实惠', '节省', '穷游', '学生', '背包客', '预算有限', '性价比', 'budget', 'cheap'],
+            'comfortable': ['舒适', '中等', '适中', '标准', '普通', '中档', '合理', 'comfortable', 'standard'],
+            'luxury': ['豪华', '奢华', '高端', '顶级', '精品', '不差钱', '任性', '土豪', '五星', 'luxury', 'premium']
         }
+        # 查找最能代表预算类型的关键词
+        found_type_keyword = ""
+        found_type = ""
         for token in tokens:
             token_lower = token.lower()
             for budget_type, keywords in budget_type_keywords.items():
+                for keyword in keywords:
+                    if keyword in token_lower:
+                        # 优先选择更长的、更具体的关键词作为描述
+                        if len(keyword) > len(found_type_keyword):
+                            found_type_keyword = keyword
+                            found_type = budget_type
+        if found_type:
+            result["type"] = found_type
+            result["description"] = found_type_keyword # 使用找到的最匹配的关键词作为描述
         chinese_money_mapping = {
             '一千': 1000, '两千': 2000, '三千': 3000, '四千': 4000, '五千': 5000,
             '六千': 6000, '七千': 7000, '八千': 8000, '九千': 9000,
             '一万': 10000, '两万': 20000, '三万': 30000, '四万': 40000, '五万': 50000
         }
         if not result.get("amount"):
             for token in tokens:
                 if token in chinese_money_mapping:
                     result["amount"] = chinese_money_mapping[token]
+                    # 这里同样不设置默认货币
                     break
         return result
     # 保持向后兼容的验证方法

modules/response_generator.py CHANGED Viewed

@@ -80,6 +80,9 @@ class ResponseGenerator:
             acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
             if acknowledgement:
                 response_parts.append(acknowledgement)
             # 2. 检查是否需要询问下一个信息
             next_question = self._get_dynamic_next_question(session_state)
@@ -108,30 +111,63 @@ class ResponseGenerator:
     def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
         """
-        【新增实现】根据最新提取的信息，生成一句生动的、非模板化的确认语。
         """
         if "destination" in extracted_info and extracted_info["destination"]:
-            dest_name = extracted_info["destination"]['name']
-            # 尝试使用您定义的城市特色描述，让回复更精彩
             if dest_name in self.city_descriptions:
                 feature = random.choice(self.city_descriptions[dest_name])
                 return f"{dest_name}！一个绝佳的选择，那可是著名的'{feature}'。目的地已为您记录。"
             else:
-                dest_country = extracted_info["destination"][0]['country']
-                return f"好的，目的地已确认为 {dest_country} 的 {dest_name}！一个充满魅力的地方。"
         if "duration" in extracted_info and extracted_info["duration"]:
-            days = extracted_info["duration"]
-            return f"了解，{days} 天的行程，时间很充裕，可以深度体验了！"
         if "budget" in extracted_info and extracted_info["budget"]:
-            amount = extracted_info["budget"]
-            return f"预算 {amount} 已记录，我会为您规划性价比最高的方案。"
-        # 如果没有提取到新信息，返回空字符串
         return ""
     def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
         if not session_state.get('destination'):

             acknowledgement = self._generate_vivid_acknowledgement(extracted_info, session_state)
             if acknowledgement:
                 response_parts.append(acknowledgement)
+                # 如果确认信息本身已经是一个问题（比如追问货币），就直接返回，避免再问下一个问题
+                if acknowledgement.strip().endswith(('?', '？')):
+                    return " ".join(response_parts)
             # 2. 检查是否需要询问下一个信息
             next_question = self._get_dynamic_next_question(session_state)
     def _generate_vivid_acknowledgement(self, extracted_info: dict, session_state: SessionManager) -> str:
         """
+        【更新版本】根据最新提取的信息，生成一句生动的、非模板化的确认语。
+        此版本已重构，以处理更复杂的预算逻辑，并实现主动询问。
         """
+        # --- 目的地确认 ---
         if "destination" in extracted_info and extracted_info["destination"]:
+            dest_info = extracted_info["destination"]
+            dest_name = dest_info.get('name')
+            if not dest_name: return "" # 如果没有提取到有效名称，则不作回复
+            # 优先使用预设的城市描述，让回复更生动
             if dest_name in self.city_descriptions:
                 feature = random.choice(self.city_descriptions[dest_name])
                 return f"{dest_name}！一个绝佳的选择，那可是著名的'{feature}'。目的地已为您记录。"
             else:
+                # 修复了原代码中从列表获取国家信息的bug
+                dest_country = dest_info.get('country')
+                if dest_country:
+                    return f"好的，目的地已确认为 {dest_country} 的 {dest_name}！一个充满魅力的地方。"
+                else:
+                    return f"好的，目的地 {dest_name} 已记录！听起来是个很棒的地方。"
+        # --- 旅行时长确认 ---
         if "duration" in extracted_info and extracted_info["duration"]:
+            # 使用 .get() 增加代码健壮性
+            duration_description = extracted_info["duration"].get('description', '一段美好的时光')
+            return f"了解，{duration_description}的行程，时间很充裕，可以深度体验了！"
+        # --- 预算确认（核心修改部分） ---
         if "budget" in extracted_info and extracted_info["budget"]:
+            budget_dict = extracted_info["budget"]
+            amount = budget_dict.get("amount")
+            currency = budget_dict.get("currency")
+            budget_type_desc = budget_dict.get("description") # 例如：“穷游”
+            # 场景一：用户提供了金额，但没说货币单位 -> 主动追问货币
+            if amount and not currency:
+                return f"收到，您的预算是 {amount}。请问这是以什么货币计算的呢？（例如：人民币、欧元、美元）"
+            # 场景二：用户提供了完整的金额和货币 -> 优雅地确认
+            if amount and currency:
+                if budget_type_desc:
+                    # 例如，用户说：“我预算5000元，想穷游”
+                    return f"好的，您「{budget_type_desc}」的预算（{amount} {currency}）已为您记录，我会为您规划性价比最高的方案。"
+                else:
+                    # 例如，用户说：“我预算5000元”
+                    return f"好的，预算 {amount} {currency} 已为您记录，我会为您规划性价比最高的方案。"
+            # 场景三：用户只提了预算类型，没说金额 -> 只确认风格
+            if budget_type_desc and not amount:
+                # 例如，用户说：“我这次想穷游”
+                return f"了解，您偏爱「{budget_type_desc}」的旅行方式，我会按这个风格为您规划。"
+        # 如果没有提取到任何新信息，返回空字符串
         return ""
     def _get_dynamic_next_question(self, session_state: SessionManager) -> str:
         if not session_state.get('destination'):