Spaces:

minhvtt
/

ChatbotRAG

Sleeping

App Files Files Community

minhvtt commited on 3 days ago

Commit

d52b84f

verified ·

1 Parent(s): 8679400

Update agent_service.py

Browse files

Files changed (1) hide show

agent_service.py +490 -470

agent_service.py CHANGED Viewed

@@ -1,470 +1,490 @@
-"""
-Agent Service - Central Brain for Sales & Feedback Agents
-Manages LLM conversation loop with native tool calling
-"""
-from typing import Dict, Any, List, Optional
-import os
-import json
-from tools_service import ToolsService
-class AgentService:
-    """
-    Manages the conversation loop between User -> LLM -> Tools -> Response
-    Uses native tool calling via HuggingFace Inference API
-    """
-    def __init__(
-        self,
-        tools_service: ToolsService,
-        embedding_service,
-        qdrant_service,
-        advanced_rag,
-        hf_token: str,
-        feedback_tracking=None  # Optional feedback tracking
-    ):
-        self.tools_service = tools_service
-        self.embedding_service = embedding_service
-        self.qdrant_service = qdrant_service
-        self.advanced_rag = advanced_rag
-        self.hf_token = hf_token
-        self.feedback_tracking = feedback_tracking
-        # Load system prompts
-        self.prompts = self._load_prompts()
-    def _load_prompts(self) -> Dict[str, str]:
-        """Load system prompts from files"""
-        prompts = {}
-        prompts_dir = "prompts"
-        for mode in ["sales_agent", "feedback_agent"]:
-            filepath = os.path.join(prompts_dir, f"{mode}.txt")
-            try:
-                with open(filepath, 'r', encoding='utf-8') as f:
-                    prompts[mode] = f.read()
-                print(f"✓ Loaded prompt: {mode}")
-            except Exception as e:
-                print(f"⚠️ Error loading {mode} prompt: {e}")
-                prompts[mode] = ""
-        return prompts
-    def _get_native_tools(self, mode: str = "sales") -> List[Dict]:
-        """
-        Get tools formatted for native tool calling API.
-        Returns OpenAI-compatible tool definitions.
-        """
-        common_tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "search_events",
-                    "description": "Tìm kiếm sự kiện phù hợp theo từ khóa, vibe, hoặc thời gian.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {"type": "string", "description": "Từ khóa tìm kiếm (VD: 'nhạc rock', 'hài kịch')"},
-                            "vibe": {"type": "string", "description": "Vibe/Mood (VD: 'chill', 'sôi động', 'hẹn hò')"},
-                            "time": {"type": "string", "description": "Thời gian (VD: 'cuối tuần này', 'tối nay')"}
-                        }
-                    }
-                }
-            },
-            {
-                "type": "function",
-                "function": {
-                    "name": "get_event_details",
-                    "description": "Lấy thông tin chi tiết (giá, địa điểm, thời gian) của sự kiện.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "event_id": {"type": "string", "description": "ID của sự kiện (MongoDB ID)"}
-                        },
-                        "required": ["event_id"]
-                    }
-                }
-            }
-        ]
-        sales_tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "save_lead",
-                    "description": "Lưu thông tin khách hàng quan tâm (Lead).",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "email": {"type": "string", "description": "Email address"},
-                            "phone": {"type": "string", "description": "Phone number"},
-                            "interest": {"type": "string", "description": "What they're interested in"}
-                        }
-                    }
-                }
-            }
-        ]
-        feedback_tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "get_purchased_events",
-                    "description": "Kiểm tra lịch sử các sự kiện user đã mua vé hoặc tham gia.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "user_id": {"type": "string", "description": "ID của user"}
-                        },
-                        "required": ["user_id"]
-                    }
-                }
-            },
-            {
-                "type": "function",
-                "function": {
-                    "name": "save_feedback",
-                    "description": "Lưu đánh giá/feedback của user về sự kiện.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "event_id": {"type": "string", "description": "ID sự kiện"},
-                            "rating": {"type": "integer", "description": "Số sao đánh giá (1-5)"},
-                            "comment": {"type": "string", "description": "Nội dung nhận xét"}
-                        },
-                        "required": ["event_id", "rating"]
-                    }
-                }
-            }
-        ]
-        if mode == "feedback":
-            return common_tools + feedback_tools
-        else:
-            return common_tools + sales_tools
-    async def chat(
-        self,
-        user_message: str,
-        conversation_history: List[Dict],
-        mode: str = "sales",  # "sales" or "feedback"
-        user_id: Optional[str] = None,
-        access_token: Optional[str] = None,  # For authenticated API calls
-        max_iterations: int = 3
-    ) -> Dict[str, Any]:
-        """
-        Main conversation loop with native tool calling
-        Args:
-            user_message: User's input
-            conversation_history: Previous messages [{"role": "user", "content": ...}, ...]
-            mode: "sales" or "feedback"
-            user_id: User ID (for feedback mode to check purchase history)
-            access_token: JWT token for authenticated API calls
-            max_iterations: Maximum tool call iterations to prevent infinite loops
-        Returns:
-            {
-                "message": "Bot response",
-                "tool_calls": [...],  # List of tools called (for debugging)
-                "mode": mode
-            }
-        """
-        print(f"\n🤖 Agent Mode: {mode}")
-        print(f"👤 User Message: {user_message}")
-        print(f"🔑 Auth Info:")
-        print(f"  - User ID: {user_id}")
-        print(f"  - Access Token: {'✅ Received' if access_token else '❌ None'}")
-        # Store user_id and access_token for tool calls
-        self.current_user_id = user_id
-        self.current_access_token = access_token
-        if access_token:
-            print(f"  - Stored access_token for tools: {access_token[:20]}...")
-        if user_id:
-            print(f"  - Stored user_id for tools: {user_id}")
-        # Select system prompt (without tool instructions - native tools handle this)
-        system_prompt = self._get_system_prompt(mode)
-        # Get native tools for this mode
-        tools = self._get_native_tools(mode)
-        # Build conversation context
-        messages = self._build_messages(system_prompt, conversation_history, user_message)
-        # Agentic loop: LLM may call tools multiple times
-        tool_calls_made = []
-        current_response = None
-        for iteration in range(max_iterations):
-            print(f"\n🔄 Iteration {iteration + 1}")
-            # Call LLM with native tools
-            llm_result = await self._call_llm_with_tools(messages, tools)
-            # Check if this is a final text response or a tool call
-            if llm_result["type"] == "text":
-                current_response = llm_result["content"]
-                print(f"🧠 LLM Final Response: {current_response[:200]}...")
-                break
-            elif llm_result["type"] == "tool_calls":
-                # Process each tool call
-                for tool_call in llm_result["tool_calls"]:
-                    tool_name = tool_call["function"]["name"]
-                    arguments = json.loads(tool_call["function"]["arguments"])
-                    print(f"🔧 Tool Called: {tool_name}")
-                    print(f"   Arguments: {arguments}")
-                    # Auto-inject real user_id for get_purchased_events
-                    if tool_name == 'get_purchased_events' and self.current_user_id:
-                        print(f"🔄 Auto-injecting real user_id: {self.current_user_id}")
-                        arguments['user_id'] = self.current_user_id
-                    # Execute tool
-                    tool_result = await self.tools_service.execute_tool(
-                        tool_name,
-                        arguments,
-                        access_token=self.current_access_token
-                    )
-                    # Record tool call
-                    tool_calls_made.append({
-                        "function": tool_name,
-                        "arguments": arguments,
-                        "result": tool_result
-                    })
-                    # Handle RAG search specially
-                    if isinstance(tool_result, dict) and tool_result.get("action") == "run_rag_search":
-                        tool_result = await self._execute_rag_search(tool_result["query"])
-                    # Add assistant's tool call to messages
-                    messages.append({
-                        "role": "assistant",
-                        "content": None,
-                        "tool_calls": [{
-                            "id": tool_call.get("id", f"call_{iteration}"),
-                            "type": "function",
-                            "function": {
-                                "name": tool_name,
-                                "arguments": json.dumps(arguments)
-                            }
-                        }]
-                    })
-                    # Add tool result to messages
-                    messages.append({
-                        "role": "tool",
-                        "tool_call_id": tool_call.get("id", f"call_{iteration}"),
-                        "content": self._format_tool_result({"result": tool_result})
-                    })
-            elif llm_result["type"] == "error":
-                print(f"⚠️ LLM Error: {llm_result['content']}")
-                current_response = "Xin lỗi, tôi đang gặp chút vấn đề kỹ thuật. Bạn thử lại sau nhé!"
-                break
-        # Get final response if we hit max iterations
-        final_response = current_response or "Tôi cần thêm thông tin để hỗ trợ bạn."
-        return {
-            "message": final_response,
-            "tool_calls": tool_calls_made,
-            "mode": mode
-        }
-    def _get_system_prompt(self, mode: str) -> str:
-        """Get system prompt for selected mode (without tool instructions)"""
-        prompt_key = f"{mode}_agent" if mode in ["sales", "feedback"] else "sales_agent"
-        return self.prompts.get(prompt_key, "")
-    def _build_messages(
-        self,
-        system_prompt: str,
-        history: List[Dict],
-        user_message: str
-    ) -> List[Dict]:
-        """Build messages array for LLM"""
-        messages = [{"role": "system", "content": system_prompt}]
-        # Add conversation history
-        messages.extend(history)
-        # Add current user message
-        messages.append({"role": "user", "content": user_message})
-        return messages
-    async def _call_llm_with_tools(self, messages: List[Dict], tools: List[Dict]) -> Dict:
-        """
-        Call HuggingFace LLM with native tool calling support
-        Returns:
-            {"type": "text", "content": "..."} for text responses
-            {"type": "tool_calls", "tool_calls": [...]} for tool call requests
-            {"type": "error", "content": "..."} for errors
-        """
-        try:
-            from huggingface_hub import AsyncInferenceClient
-            # Create async client - Qwen2.5 works on default HuggingFace API
-            client = AsyncInferenceClient(token=self.hf_token)
-            # Call HF API with chat completion and native tools
-            # Qwen2.5-72B-Instruct: Best for Vietnamese - state-of-the-art performance
-            response = await client.chat_completion(
-                messages=messages,
-                model="Qwen/Qwen2.5-72B-Instruct",  # Best for Vietnamese + tool calling
-                max_tokens=1024,  # Increased to prevent truncation
-                temperature=0.7,
-                tools=tools,
-                tool_choice="auto"  # Let model decide when to use tools
-            )
-            # Check if the model made tool calls
-            message = response.choices[0].message
-            if message.tool_calls:
-                print(f"🔧 Native tool calls detected: {len(message.tool_calls)}")
-                return {
-                    "type": "tool_calls",
-                    "tool_calls": [
-                        {
-                            "id": tc.id,
-                            "function": {
-                                "name": tc.function.name,
-                                "arguments": tc.function.arguments
-                            }
-                        }
-                        for tc in message.tool_calls
-                    ]
-                }
-            else:
-                # Regular text response
-                return {
-                    "type": "text",
-                    "content": message.content or ""
-                }
-        except Exception as e:
-            print(f"⚠️ LLM Call Error: {e}")
-            return {
-                "type": "error",
-                "content": str(e)
-            }
-    def _format_tool_result(self, tool_result: Dict) -> str:
-        """Format tool result for feeding back to LLM"""
-        result = tool_result.get("result", {})
-        # Special handling for purchased events list
-        if isinstance(result, list):
-            print(f"\n🔍 Formatting {len(result)} items for LLM")
-            if not result:
-                return "Không tìm thấy dữ liệu nào phù hợp."
-            # Format each event clearly
-            formatted_events = []
-            for i, event in enumerate(result, 1):
-                # Handle both object/dict and string results
-                if isinstance(event, str):
-                    formatted_events.append(f"{i}. {event}")
-                    continue
-                event_info = []
-                event_info.append(f"Event {i}:")
-                # Extract key fields
-                if 'eventName' in event:
-                    event_info.append(f"  Name: {event['eventName']}")
-                if 'eventCode' in event:
-                    event_info.append(f"  Code: {event['eventCode']}")
-                if '_id' in event:
-                    event_info.append(f"  ID: {event['_id']}")
-                if 'startTimeEventTime' in event:
-                    event_info.append(f"  Date: {event['startTimeEventTime']}")
-                # Handle RAG result payload structure
-                if 'texts' in event: # Flat text from RAG
-                     event_info.append(f"  Content: {event['texts']}")
-                if 'id_use' in event:
-                     event_info.append(f"  ID: {event['id_use']}")
-                formatted_events.append("\n".join(event_info))
-            formatted = "Tool Results:\n\n" + "\n\n".join(formatted_events)
-            # print(f"📤 Sending to LLM:\n{formatted}") # Reduce noise
-            return formatted
-        # Default formatting for other results
-        if isinstance(result, dict):
-            # Pretty print key info
-            formatted = []
-            for key, value in result.items():
-                if key not in ["success", "error"]:
-                    formatted.append(f"{key}: {value}")
-            return "\n".join(formatted) if formatted else json.dumps(result)
-        return str(result)
-    async def _execute_rag_search(self, query_params: Dict) -> str:
-        """
-        Execute RAG search for event discovery
-        Called when LLM wants to search_events
-        """
-        query = query_params.get("query", "")
-        vibe = query_params.get("vibe", "")
-        time = query_params.get("time", "")
-        # Build search query
-        search_text = f"{query} {vibe} {time}".strip()
-        print(f"🔍 RAG Search Query: '{search_text}'")
-        if not search_text:
-             return "Vui lòng cung cấp từ khóa tìm kiếm."
-        # Use embedding + qdrant
-        embedding = self.embedding_service.encode_text(search_text)
-        results = self.qdrant_service.search(
-            query_embedding=embedding,
-            limit=5
-        )
-        print(f"📊 RAG Results Count: {len(results)}")
-        # Fallback if no results and query was complex
-        if not results and (query and vibe):
-             print(f"⚠️ No results for combined query. Retrying with just 'vibe': {vibe}")
-             search_text = vibe
-             embedding = self.embedding_service.encode_text(search_text)
-             results = self.qdrant_service.search(
-                query_embedding=embedding,
-                limit=5
-            )
-             print(f"📊 Retry Results Count: {len(results)}")
-        # Format results
-        formatted = []
-        for i, result in enumerate(results, 1):
-            # Result is a dict with keys: id, score, payload
-            payload = result.get("payload", {})
-            texts = payload.get("texts", [])
-            text = texts[0] if texts else ""
-            event_id = payload.get("id_use", "")
-            if not text:
-                continue
-            # Clean and truncate text for context window
-            clean_text = text.replace("\n", " ").strip()
-            formatted.append(f"Event Found: {clean_text[:300]}... (ID: {event_id})")
-        if not formatted:
-            print("❌ RAG Search returned 0 usable results")
-            return "SYSTEM_MESSAGE: Không tìm thấy sự kiện nào trong cơ sở dữ liệu phù hợp với yêu cầu. Hãy báo lại cho khách hàng: 'Hiện tại mình chưa tìm thấy sự kiện nào phù hợp với yêu cầu này, bạn thử đổi tiêu chí xem sao nhé?'"
-        print(f"✅ Returning {len(formatted)} events to LLM")
-        return "\n\n".join(formatted)

+"""
+Agent Service - Central Brain for Sales & Feedback Agents
+Manages LLM conversation loop with native tool calling
+"""
+from typing import Dict, Any, List, Optional
+import os
+import json
+from tools_service import ToolsService
+class AgentService:
+    """
+    Manages the conversation loop between User -> LLM -> Tools -> Response
+    Uses native tool calling via HuggingFace Inference API
+    """
+    def __init__(
+        self,
+        tools_service: ToolsService,
+        embedding_service,
+        qdrant_service,
+        advanced_rag,
+        hf_token: str,
+        feedback_tracking=None  # Optional feedback tracking
+    ):
+        self.tools_service = tools_service
+        self.embedding_service = embedding_service
+        self.qdrant_service = qdrant_service
+        self.advanced_rag = advanced_rag
+        self.hf_token = hf_token
+        self.feedback_tracking = feedback_tracking
+        # Load system prompts
+        self.prompts = self._load_prompts()
+    def _load_prompts(self) -> Dict[str, str]:
+        """Load system prompts from files"""
+        prompts = {}
+        prompts_dir = "prompts"
+        for mode in ["sales_agent", "feedback_agent"]:
+            filepath = os.path.join(prompts_dir, f"{mode}.txt")
+            try:
+                with open(filepath, 'r', encoding='utf-8') as f:
+                    prompts[mode] = f.read()
+                print(f"✓ Loaded prompt: {mode}")
+            except Exception as e:
+                print(f"⚠️ Error loading {mode} prompt: {e}")
+                prompts[mode] = ""
+        return prompts
+    def _get_native_tools(self, mode: str = "sales") -> List[Dict]:
+        """
+        Get tools formatted for native tool calling API.
+        Returns OpenAI-compatible tool definitions.
+        """
+        common_tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search_events",
+                    "description": "Tìm kiếm sự kiện phù hợp theo từ khóa, vibe, hoặc thời gian.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "query": {"type": "string", "description": "Từ khóa tìm kiếm (VD: 'nhạc rock', 'hài kịch')"},
+                            "vibe": {"type": "string", "description": "Vibe/Mood (VD: 'chill', 'sôi động', 'hẹn hò')"},
+                            "time": {"type": "string", "description": "Thời gian (VD: 'cuối tuần này', 'tối nay')"}
+                        }
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_event_details",
+                    "description": "Lấy thông tin chi tiết (giá, địa điểm, thời gian) của sự kiện.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "event_id": {"type": "string", "description": "ID của sự kiện (MongoDB ID)"}
+                        },
+                        "required": ["event_id"]
+                    }
+                }
+            }
+        ]
+        sales_tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "save_lead",
+                    "description": "Lưu thông tin khách hàng quan tâm (Lead).",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "email": {"type": "string", "description": "Email address"},
+                            "phone": {"type": "string", "description": "Phone number"},
+                            "interest": {"type": "string", "description": "What they're interested in"}
+                        }
+                    }
+                }
+            }
+        ]
+        feedback_tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_purchased_events",
+                    "description": "Kiểm tra lịch sử các sự kiện user đã mua vé hoặc tham gia.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "user_id": {"type": "string", "description": "ID của user"}
+                        },
+                        "required": ["user_id"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "save_feedback",
+                    "description": "Lưu đánh giá/feedback của user về sự kiện.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "event_id": {"type": "string", "description": "ID sự kiện"},
+                            "rating": {"type": "integer", "description": "Số sao đánh giá (1-5)"},
+                            "comment": {"type": "string", "description": "Nội dung nhận xét"}
+                        },
+                        "required": ["event_id", "rating"]
+                    }
+                }
+            }
+        ]
+        if mode == "feedback":
+            return common_tools + feedback_tools
+        else:
+            return common_tools + sales_tools
+    async def chat(
+        self,
+        user_message: str,
+        conversation_history: List[Dict],
+        mode: str = "sales",  # "sales" or "feedback"
+        user_id: Optional[str] = None,
+        access_token: Optional[str] = None,  # For authenticated API calls
+        max_iterations: int = 3
+    ) -> Dict[str, Any]:
+        """
+        Main conversation loop with native tool calling
+        Args:
+            user_message: User's input
+            conversation_history: Previous messages [{"role": "user", "content": ...}, ...]
+            mode: "sales" or "feedback"
+            user_id: User ID (for feedback mode to check purchase history)
+            access_token: JWT token for authenticated API calls
+            max_iterations: Maximum tool call iterations to prevent infinite loops
+        Returns:
+            {
+                "message": "Bot response",
+                "tool_calls": [...],  # List of tools called (for debugging)
+                "mode": mode
+            }
+        """
+        print(f"\n🤖 Agent Mode: {mode}")
+        print(f"👤 User Message: {user_message}")
+        print(f"🔑 Auth Info:")
+        print(f"  - User ID: {user_id}")
+        print(f"  - Access Token: {'✅ Received' if access_token else '❌ None'}")
+        # Store user_id and access_token for tool calls
+        self.current_user_id = user_id
+        self.current_access_token = access_token
+        if access_token:
+            print(f"  - Stored access_token for tools: {access_token[:20]}...")
+        if user_id:
+            print(f"  - Stored user_id for tools: {user_id}")
+        # Select system prompt (without tool instructions - native tools handle this)
+        system_prompt = self._get_system_prompt(mode)
+        # Get native tools for this mode
+        tools = self._get_native_tools(mode)
+        # Build conversation context
+        messages = self._build_messages(system_prompt, conversation_history, user_message)
+        # Agentic loop: LLM may call tools multiple times
+        tool_calls_made = []
+        current_response = None
+        for iteration in range(max_iterations):
+            print(f"\n🔄 Iteration {iteration + 1}")
+            # Call LLM with native tools
+            llm_result = await self._call_llm_with_tools(messages, tools)
+            # Check if this is a final text response or a tool call
+            if llm_result["type"] == "text":
+                current_response = llm_result["content"]
+                print(f"🧠 LLM Final Response: {current_response[:200]}...")
+                break
+            elif llm_result["type"] == "tool_calls":
+                # Process each tool call
+                for tool_call in llm_result["tool_calls"]:
+                    tool_name = tool_call["function"]["name"]
+                    arguments = json.loads(tool_call["function"]["arguments"])
+                    print(f"🔧 Tool Called: {tool_name}")
+                    print(f"   Arguments: {arguments}")
+                    # Auto-inject real user_id for get_purchased_events
+                    if tool_name == 'get_purchased_events' and self.current_user_id:
+                        print(f"🔄 Auto-injecting real user_id: {self.current_user_id}")
+                        arguments['user_id'] = self.current_user_id
+                    # Execute tool
+                    tool_result = await self.tools_service.execute_tool(
+                        tool_name,
+                        arguments,
+                        access_token=self.current_access_token
+                    )
+                    # Record tool call
+                    tool_calls_made.append({
+                        "function": tool_name,
+                        "arguments": arguments,
+                        "result": tool_result
+                    })
+                    # Handle RAG search specially
+                    if isinstance(tool_result, dict) and tool_result.get("action") == "run_rag_search":
+                        tool_result = await self._execute_rag_search(tool_result["query"])
+                    # Add assistant's tool call to messages
+                    messages.append({
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [{
+                            "id": tool_call.get("id", f"call_{iteration}"),
+                            "type": "function",
+                            "function": {
+                                "name": tool_name,
+                                "arguments": json.dumps(arguments)
+                            }
+                        }]
+                    })
+                    # Add tool result to messages
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tool_call.get("id", f"call_{iteration}"),
+                        "content": self._format_tool_result({"result": tool_result})
+                    })
+            elif llm_result["type"] == "error":
+                print(f"⚠️ LLM Error: {llm_result['content']}")
+                current_response = "Xin lỗi, tôi đang gặp chút vấn đề kỹ thuật. Bạn thử lại sau nhé!"
+                break
+        # Get final response if we hit max iterations
+        final_response = current_response or "Tôi cần thêm thông tin để hỗ trợ bạn."
+        return {
+            "message": final_response,
+            "tool_calls": tool_calls_made,
+            "mode": mode
+        }
+    def _get_system_prompt(self, mode: str) -> str:
+        """Get system prompt for selected mode (without tool instructions)"""
+        prompt_key = f"{mode}_agent" if mode in ["sales", "feedback"] else "sales_agent"
+        return self.prompts.get(prompt_key, "")
+    def _build_messages(
+        self,
+        system_prompt: str,
+        history: List[Dict],
+        user_message: str
+    ) -> List[Dict]:
+        """Build messages array for LLM"""
+        messages = [{"role": "system", "content": system_prompt}]
+        # Add conversation history
+        messages.extend(history)
+        # Add current user message
+        messages.append({"role": "user", "content": user_message})
+        return messages
+    async def _call_llm_with_tools(self, messages: List[Dict], tools: List[Dict]) -> Dict:
+        """
+        Call HuggingFace LLM with native tool calling support
+        Returns:
+            {"type": "text", "content": "..."} for text responses
+            {"type": "tool_calls", "tool_calls": [...]} for tool call requests
+            {"type": "error", "content": "..."} for errors
+        """
+        try:
+            from huggingface_hub import AsyncInferenceClient
+            # Create async client - Qwen2.5 works on default HuggingFace API
+            client = AsyncInferenceClient(token=self.hf_token)
+            # Call HF API with chat completion and native tools
+            # Qwen2.5-72B-Instruct: Best for Vietnamese - state-of-the-art performance
+            response = await client.chat_completion(
+                messages=messages,
+                model="Qwen/Qwen2.5-72B-Instruct",  # Best for Vietnamese + tool calling
+                max_tokens=1024,  # Increased to prevent truncation
+                temperature=0.7,
+                tools=tools,
+                tool_choice="auto"  # Let model decide when to use tools
+            )
+            # Check if the model made tool calls
+            message = response.choices[0].message
+            if message.tool_calls:
+                print(f"🔧 Native tool calls detected: {len(message.tool_calls)}")
+                return {
+                    "type": "tool_calls",
+                    "tool_calls": [
+                        {
+                            "id": tc.id,
+                            "function": {
+                                "name": tc.function.name,
+                                "arguments": tc.function.arguments
+                            }
+                        }
+                        for tc in message.tool_calls
+                    ]
+                }
+            else:
+                # Regular text response
+                return {
+                    "type": "text",
+                    "content": message.content or ""
+                }
+        except Exception as e:
+            print(f"⚠️ LLM Call Error: {e}")
+            return {
+                "type": "error",
+                "content": str(e)
+            }
+    def _format_tool_result(self, tool_result: Dict) -> str:
+        """Format tool result for feeding back to LLM"""
+        result = tool_result.get("result", {})
+        # Special handling for purchased events list
+        if isinstance(result, list):
+            print(f"\n🔍 Formatting {len(result)} items for LLM")
+            if not result:
+                return "Không tìm thấy dữ liệu nào phù hợp."
+            # Format each event clearly
+            formatted_events = []
+            for i, event in enumerate(result, 1):
+                # Handle both object/dict and string results
+                if isinstance(event, str):
+                    formatted_events.append(f"{i}. {event}")
+                    continue
+                event_info = []
+                event_info.append(f"Event {i}:")
+                # Extract key fields
+                if 'eventName' in event:
+                    event_info.append(f"  Name: {event['eventName']}")
+                if 'eventCode' in event:
+                    event_info.append(f"  Code: {event['eventCode']}")
+                if '_id' in event:
+                    event_info.append(f"  ID: {event['_id']}")
+                if 'startTimeEventTime' in event:
+                    event_info.append(f"  Date: {event['startTimeEventTime']}")
+                # Handle RAG result payload structure
+                if 'texts' in event: # Flat text from RAG
+                     event_info.append(f"  Content: {event['texts']}")
+                if 'id_use' in event:
+                     event_info.append(f"  ID: {event['id_use']}")
+                formatted_events.append("\n".join(event_info))
+            formatted = "Tool Results:\n\n" + "\n\n".join(formatted_events)
+            # print(f"📤 Sending to LLM:\n{formatted}") # Reduce noise
+            return formatted
+        # Default formatting for other results
+        if isinstance(result, dict):
+            # Pretty print key info
+            formatted = []
+            for key, value in result.items():
+                if key not in ["success", "error"]:
+                    formatted.append(f"{key}: {value}")
+            return "\n".join(formatted) if formatted else json.dumps(result)
+        return str(result)
+    async def _execute_rag_search(self, query_params: Dict) -> str:
+        """
+        Execute RAG search with Multi-Stage Fallback Strategy
+        Called when LLM wants to search_events
+        """
+        query = query_params.get("query", "").strip()
+        vibe = query_params.get("vibe", "").strip()
+        time = query_params.get("time", "").strip()
+        # Strategy: Try specific -> broad
+        search_strategies = []
+        # 1. Full combination (Specific)
+        if query or vibe:
+            full_query = f"{query} {vibe} {time}".strip()
+            search_strategies.append(("Full Context", full_query))
+        # 2. Main keyword only (Broad) - Critical for terms like "rượu"
+        if query and len(search_strategies) > 0 and query != full_query:
+            search_strategies.append(("Keyword Only", query))
+        # 3. Vibe only (Fallback)
+        if vibe and len(search_strategies) > 0 and vibe != full_query:
+            search_strategies.append(("Vibe Only", vibe))
+        print(f"🔍 RAG Search Plan: {[s[0] for s in search_strategies]}")
+        final_results = []
+        seen_ids = set()
+        for strategy_name, search_text in search_strategies:
+            if not search_text:
+                continue
+            print(f"👉 Trying strategy: {strategy_name} ('{search_text}')")
+            # Use embedding + qdrant
+            embedding = self.embedding_service.encode_text(search_text)
+            results = self.qdrant_service.search(
+                query_embedding=embedding,
+                limit=5
+            )
+            # Deduplicate and add results
+            count = 0
+            for res in results:
+                doc_id = res['id']
+                if doc_id not in seen_ids:
+                    seen_ids.add(doc_id)
+                    final_results.append(res)
+                    count += 1
+            print(f"   Found {count} new results (Total: {len(final_results)})")
+            # If we have enough results, stop
+            if len(final_results) >= 5:
+                break
+        # Format results
+        formatted = []
+        for i, result in enumerate(final_results[:5], 1): # Limit to top 5
+            payload = result.get("payload", {})
+            texts = payload.get("texts", [])
+            text = texts[0] if texts else ""
+            event_id = payload.get("id_use", "")
+            if not text:
+                continue
+            # Clean and truncate text for context window
+            clean_text = text.replace("\n", " ").strip()
+            formatted.append(f"Event Found: {clean_text[:300]}... (ID: {event_id})")
+        if not formatted:
+            print("❌ RAG Search returned 0 usable results after all strategies")
+            return "SYSTEM_MESSAGE: Không tìm thấy sự kiện nào trong cơ sở dữ liệu phù hợp với yêu cầu. Hãy báo lại cho khách hàng: 'Hiện tại mình chưa tìm thấy sự kiện nào phù hợp với yêu cầu này, bạn thử đổi tiêu chí xem sao nhé?'"
+        print(f"✅ Returning {len(formatted)} events to LLM")
+        return "\n\n".join(formatted)