minhvtt commited on
Commit
d52b84f
·
verified ·
1 Parent(s): 8679400

Update agent_service.py

Browse files
Files changed (1) hide show
  1. agent_service.py +490 -470
agent_service.py CHANGED
@@ -1,470 +1,490 @@
1
- """
2
- Agent Service - Central Brain for Sales & Feedback Agents
3
- Manages LLM conversation loop with native tool calling
4
- """
5
- from typing import Dict, Any, List, Optional
6
- import os
7
- import json
8
- from tools_service import ToolsService
9
-
10
-
11
- class AgentService:
12
- """
13
- Manages the conversation loop between User -> LLM -> Tools -> Response
14
- Uses native tool calling via HuggingFace Inference API
15
- """
16
-
17
- def __init__(
18
- self,
19
- tools_service: ToolsService,
20
- embedding_service,
21
- qdrant_service,
22
- advanced_rag,
23
- hf_token: str,
24
- feedback_tracking=None # Optional feedback tracking
25
- ):
26
- self.tools_service = tools_service
27
- self.embedding_service = embedding_service
28
- self.qdrant_service = qdrant_service
29
- self.advanced_rag = advanced_rag
30
- self.hf_token = hf_token
31
- self.feedback_tracking = feedback_tracking
32
-
33
- # Load system prompts
34
- self.prompts = self._load_prompts()
35
-
36
- def _load_prompts(self) -> Dict[str, str]:
37
- """Load system prompts from files"""
38
- prompts = {}
39
- prompts_dir = "prompts"
40
-
41
- for mode in ["sales_agent", "feedback_agent"]:
42
- filepath = os.path.join(prompts_dir, f"{mode}.txt")
43
- try:
44
- with open(filepath, 'r', encoding='utf-8') as f:
45
- prompts[mode] = f.read()
46
- print(f"✓ Loaded prompt: {mode}")
47
- except Exception as e:
48
- print(f"⚠️ Error loading {mode} prompt: {e}")
49
- prompts[mode] = ""
50
-
51
- return prompts
52
-
53
- def _get_native_tools(self, mode: str = "sales") -> List[Dict]:
54
- """
55
- Get tools formatted for native tool calling API.
56
- Returns OpenAI-compatible tool definitions.
57
- """
58
- common_tools = [
59
- {
60
- "type": "function",
61
- "function": {
62
- "name": "search_events",
63
- "description": "Tìm kiếm sự kiện phù hợp theo từ khóa, vibe, hoặc thời gian.",
64
- "parameters": {
65
- "type": "object",
66
- "properties": {
67
- "query": {"type": "string", "description": "Từ khóa tìm kiếm (VD: 'nhạc rock', 'hài kịch')"},
68
- "vibe": {"type": "string", "description": "Vibe/Mood (VD: 'chill', 'sôi động', 'hẹn hò')"},
69
- "time": {"type": "string", "description": "Thời gian (VD: 'cuối tuần này', 'tối nay')"}
70
- }
71
- }
72
- }
73
- },
74
- {
75
- "type": "function",
76
- "function": {
77
- "name": "get_event_details",
78
- "description": "Lấy thông tin chi tiết (giá, địa điểm, thời gian) của sự kiện.",
79
- "parameters": {
80
- "type": "object",
81
- "properties": {
82
- "event_id": {"type": "string", "description": "ID của sự kiện (MongoDB ID)"}
83
- },
84
- "required": ["event_id"]
85
- }
86
- }
87
- }
88
- ]
89
-
90
- sales_tools = [
91
- {
92
- "type": "function",
93
- "function": {
94
- "name": "save_lead",
95
- "description": "Lưu thông tin khách hàng quan tâm (Lead).",
96
- "parameters": {
97
- "type": "object",
98
- "properties": {
99
- "email": {"type": "string", "description": "Email address"},
100
- "phone": {"type": "string", "description": "Phone number"},
101
- "interest": {"type": "string", "description": "What they're interested in"}
102
- }
103
- }
104
- }
105
- }
106
- ]
107
-
108
- feedback_tools = [
109
- {
110
- "type": "function",
111
- "function": {
112
- "name": "get_purchased_events",
113
- "description": "Kiểm tra lịch sử các sự kiện user đã mua vé hoặc tham gia.",
114
- "parameters": {
115
- "type": "object",
116
- "properties": {
117
- "user_id": {"type": "string", "description": "ID của user"}
118
- },
119
- "required": ["user_id"]
120
- }
121
- }
122
- },
123
- {
124
- "type": "function",
125
- "function": {
126
- "name": "save_feedback",
127
- "description": "Lưu đánh giá/feedback của user về sự kiện.",
128
- "parameters": {
129
- "type": "object",
130
- "properties": {
131
- "event_id": {"type": "string", "description": "ID sự kiện"},
132
- "rating": {"type": "integer", "description": "Số sao đánh giá (1-5)"},
133
- "comment": {"type": "string", "description": "Nội dung nhận xét"}
134
- },
135
- "required": ["event_id", "rating"]
136
- }
137
- }
138
- }
139
- ]
140
-
141
- if mode == "feedback":
142
- return common_tools + feedback_tools
143
- else:
144
- return common_tools + sales_tools
145
-
146
- async def chat(
147
- self,
148
- user_message: str,
149
- conversation_history: List[Dict],
150
- mode: str = "sales", # "sales" or "feedback"
151
- user_id: Optional[str] = None,
152
- access_token: Optional[str] = None, # For authenticated API calls
153
- max_iterations: int = 3
154
- ) -> Dict[str, Any]:
155
- """
156
- Main conversation loop with native tool calling
157
-
158
- Args:
159
- user_message: User's input
160
- conversation_history: Previous messages [{"role": "user", "content": ...}, ...]
161
- mode: "sales" or "feedback"
162
- user_id: User ID (for feedback mode to check purchase history)
163
- access_token: JWT token for authenticated API calls
164
- max_iterations: Maximum tool call iterations to prevent infinite loops
165
-
166
- Returns:
167
- {
168
- "message": "Bot response",
169
- "tool_calls": [...], # List of tools called (for debugging)
170
- "mode": mode
171
- }
172
- """
173
- print(f"\n🤖 Agent Mode: {mode}")
174
- print(f"👤 User Message: {user_message}")
175
- print(f"🔑 Auth Info:")
176
- print(f" - User ID: {user_id}")
177
- print(f" - Access Token: {'✅ Received' if access_token else '❌ None'}")
178
-
179
- # Store user_id and access_token for tool calls
180
- self.current_user_id = user_id
181
- self.current_access_token = access_token
182
- if access_token:
183
- print(f" - Stored access_token for tools: {access_token[:20]}...")
184
- if user_id:
185
- print(f" - Stored user_id for tools: {user_id}")
186
-
187
- # Select system prompt (without tool instructions - native tools handle this)
188
- system_prompt = self._get_system_prompt(mode)
189
-
190
- # Get native tools for this mode
191
- tools = self._get_native_tools(mode)
192
-
193
- # Build conversation context
194
- messages = self._build_messages(system_prompt, conversation_history, user_message)
195
-
196
- # Agentic loop: LLM may call tools multiple times
197
- tool_calls_made = []
198
- current_response = None
199
-
200
- for iteration in range(max_iterations):
201
- print(f"\n🔄 Iteration {iteration + 1}")
202
-
203
- # Call LLM with native tools
204
- llm_result = await self._call_llm_with_tools(messages, tools)
205
-
206
- # Check if this is a final text response or a tool call
207
- if llm_result["type"] == "text":
208
- current_response = llm_result["content"]
209
- print(f"🧠 LLM Final Response: {current_response[:200]}...")
210
- break
211
-
212
- elif llm_result["type"] == "tool_calls":
213
- # Process each tool call
214
- for tool_call in llm_result["tool_calls"]:
215
- tool_name = tool_call["function"]["name"]
216
- arguments = json.loads(tool_call["function"]["arguments"])
217
-
218
- print(f"🔧 Tool Called: {tool_name}")
219
- print(f" Arguments: {arguments}")
220
-
221
- # Auto-inject real user_id for get_purchased_events
222
- if tool_name == 'get_purchased_events' and self.current_user_id:
223
- print(f"🔄 Auto-injecting real user_id: {self.current_user_id}")
224
- arguments['user_id'] = self.current_user_id
225
-
226
- # Execute tool
227
- tool_result = await self.tools_service.execute_tool(
228
- tool_name,
229
- arguments,
230
- access_token=self.current_access_token
231
- )
232
-
233
- # Record tool call
234
- tool_calls_made.append({
235
- "function": tool_name,
236
- "arguments": arguments,
237
- "result": tool_result
238
- })
239
-
240
- # Handle RAG search specially
241
- if isinstance(tool_result, dict) and tool_result.get("action") == "run_rag_search":
242
- tool_result = await self._execute_rag_search(tool_result["query"])
243
-
244
- # Add assistant's tool call to messages
245
- messages.append({
246
- "role": "assistant",
247
- "content": None,
248
- "tool_calls": [{
249
- "id": tool_call.get("id", f"call_{iteration}"),
250
- "type": "function",
251
- "function": {
252
- "name": tool_name,
253
- "arguments": json.dumps(arguments)
254
- }
255
- }]
256
- })
257
-
258
- # Add tool result to messages
259
- messages.append({
260
- "role": "tool",
261
- "tool_call_id": tool_call.get("id", f"call_{iteration}"),
262
- "content": self._format_tool_result({"result": tool_result})
263
- })
264
-
265
- elif llm_result["type"] == "error":
266
- print(f"⚠️ LLM Error: {llm_result['content']}")
267
- current_response = "Xin lỗi, tôi đang gặp chút vấn đề kỹ thuật. Bạn thử lại sau nhé!"
268
- break
269
-
270
- # Get final response if we hit max iterations
271
- final_response = current_response or "Tôi cần thêm thông tin để hỗ trợ bạn."
272
-
273
- return {
274
- "message": final_response,
275
- "tool_calls": tool_calls_made,
276
- "mode": mode
277
- }
278
-
279
- def _get_system_prompt(self, mode: str) -> str:
280
- """Get system prompt for selected mode (without tool instructions)"""
281
- prompt_key = f"{mode}_agent" if mode in ["sales", "feedback"] else "sales_agent"
282
- return self.prompts.get(prompt_key, "")
283
-
284
- def _build_messages(
285
- self,
286
- system_prompt: str,
287
- history: List[Dict],
288
- user_message: str
289
- ) -> List[Dict]:
290
- """Build messages array for LLM"""
291
- messages = [{"role": "system", "content": system_prompt}]
292
-
293
- # Add conversation history
294
- messages.extend(history)
295
-
296
- # Add current user message
297
- messages.append({"role": "user", "content": user_message})
298
-
299
- return messages
300
-
301
- async def _call_llm_with_tools(self, messages: List[Dict], tools: List[Dict]) -> Dict:
302
- """
303
- Call HuggingFace LLM with native tool calling support
304
-
305
- Returns:
306
- {"type": "text", "content": "..."} for text responses
307
- {"type": "tool_calls", "tool_calls": [...]} for tool call requests
308
- {"type": "error", "content": "..."} for errors
309
- """
310
- try:
311
- from huggingface_hub import AsyncInferenceClient
312
-
313
- # Create async client - Qwen2.5 works on default HuggingFace API
314
- client = AsyncInferenceClient(token=self.hf_token)
315
-
316
- # Call HF API with chat completion and native tools
317
- # Qwen2.5-72B-Instruct: Best for Vietnamese - state-of-the-art performance
318
- response = await client.chat_completion(
319
- messages=messages,
320
- model="Qwen/Qwen2.5-72B-Instruct", # Best for Vietnamese + tool calling
321
- max_tokens=1024, # Increased to prevent truncation
322
- temperature=0.7,
323
- tools=tools,
324
- tool_choice="auto" # Let model decide when to use tools
325
- )
326
-
327
- # Check if the model made tool calls
328
- message = response.choices[0].message
329
-
330
- if message.tool_calls:
331
- print(f"🔧 Native tool calls detected: {len(message.tool_calls)}")
332
- return {
333
- "type": "tool_calls",
334
- "tool_calls": [
335
- {
336
- "id": tc.id,
337
- "function": {
338
- "name": tc.function.name,
339
- "arguments": tc.function.arguments
340
- }
341
- }
342
- for tc in message.tool_calls
343
- ]
344
- }
345
- else:
346
- # Regular text response
347
- return {
348
- "type": "text",
349
- "content": message.content or ""
350
- }
351
-
352
- except Exception as e:
353
- print(f"⚠️ LLM Call Error: {e}")
354
- return {
355
- "type": "error",
356
- "content": str(e)
357
- }
358
-
359
- def _format_tool_result(self, tool_result: Dict) -> str:
360
- """Format tool result for feeding back to LLM"""
361
- result = tool_result.get("result", {})
362
-
363
- # Special handling for purchased events list
364
- if isinstance(result, list):
365
- print(f"\n🔍 Formatting {len(result)} items for LLM")
366
- if not result:
367
- return "Không tìm thấy dữ liệu nào phù hợp."
368
-
369
- # Format each event clearly
370
- formatted_events = []
371
- for i, event in enumerate(result, 1):
372
- # Handle both object/dict and string results
373
- if isinstance(event, str):
374
- formatted_events.append(f"{i}. {event}")
375
- continue
376
-
377
- event_info = []
378
- event_info.append(f"Event {i}:")
379
-
380
- # Extract key fields
381
- if 'eventName' in event:
382
- event_info.append(f" Name: {event['eventName']}")
383
- if 'eventCode' in event:
384
- event_info.append(f" Code: {event['eventCode']}")
385
- if '_id' in event:
386
- event_info.append(f" ID: {event['_id']}")
387
- if 'startTimeEventTime' in event:
388
- event_info.append(f" Date: {event['startTimeEventTime']}")
389
- # Handle RAG result payload structure
390
- if 'texts' in event: # Flat text from RAG
391
- event_info.append(f" Content: {event['texts']}")
392
- if 'id_use' in event:
393
- event_info.append(f" ID: {event['id_use']}")
394
-
395
- formatted_events.append("\n".join(event_info))
396
-
397
- formatted = "Tool Results:\n\n" + "\n\n".join(formatted_events)
398
- # print(f"📤 Sending to LLM:\n{formatted}") # Reduce noise
399
- return formatted
400
-
401
- # Default formatting for other results
402
- if isinstance(result, dict):
403
- # Pretty print key info
404
- formatted = []
405
- for key, value in result.items():
406
- if key not in ["success", "error"]:
407
- formatted.append(f"{key}: {value}")
408
- return "\n".join(formatted) if formatted else json.dumps(result)
409
-
410
- return str(result)
411
-
412
- async def _execute_rag_search(self, query_params: Dict) -> str:
413
- """
414
- Execute RAG search for event discovery
415
- Called when LLM wants to search_events
416
- """
417
- query = query_params.get("query", "")
418
- vibe = query_params.get("vibe", "")
419
- time = query_params.get("time", "")
420
-
421
- # Build search query
422
- search_text = f"{query} {vibe} {time}".strip()
423
-
424
- print(f"🔍 RAG Search Query: '{search_text}'")
425
-
426
- if not search_text:
427
- return "Vui lòng cung cấp từ khóa tìm kiếm."
428
-
429
- # Use embedding + qdrant
430
- embedding = self.embedding_service.encode_text(search_text)
431
- results = self.qdrant_service.search(
432
- query_embedding=embedding,
433
- limit=5
434
- )
435
-
436
- print(f"📊 RAG Results Count: {len(results)}")
437
-
438
- # Fallback if no results and query was complex
439
- if not results and (query and vibe):
440
- print(f"⚠️ No results for combined query. Retrying with just 'vibe': {vibe}")
441
- search_text = vibe
442
- embedding = self.embedding_service.encode_text(search_text)
443
- results = self.qdrant_service.search(
444
- query_embedding=embedding,
445
- limit=5
446
- )
447
- print(f"📊 Retry Results Count: {len(results)}")
448
-
449
- # Format results
450
- formatted = []
451
- for i, result in enumerate(results, 1):
452
- # Result is a dict with keys: id, score, payload
453
- payload = result.get("payload", {})
454
- texts = payload.get("texts", [])
455
- text = texts[0] if texts else ""
456
- event_id = payload.get("id_use", "")
457
-
458
- if not text:
459
- continue
460
-
461
- # Clean and truncate text for context window
462
- clean_text = text.replace("\n", " ").strip()
463
- formatted.append(f"Event Found: {clean_text[:300]}... (ID: {event_id})")
464
-
465
- if not formatted:
466
- print("❌ RAG Search returned 0 usable results")
467
- return "SYSTEM_MESSAGE: Không tìm thấy sự kiện nào trong cơ sở dữ liệu phù hợp với yêu cầu. Hãy báo lại cho khách hàng: 'Hiện tại mình chưa tìm thấy sự kiện nào phù hợp với yêu cầu này, bạn thử đổi tiêu chí xem sao nhé?'"
468
-
469
- print(f"✅ Returning {len(formatted)} events to LLM")
470
- return "\n\n".join(formatted)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent Service - Central Brain for Sales & Feedback Agents
3
+ Manages LLM conversation loop with native tool calling
4
+ """
5
+ from typing import Dict, Any, List, Optional
6
+ import os
7
+ import json
8
+ from tools_service import ToolsService
9
+
10
+
11
+ class AgentService:
12
+ """
13
+ Manages the conversation loop between User -> LLM -> Tools -> Response
14
+ Uses native tool calling via HuggingFace Inference API
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ tools_service: ToolsService,
20
+ embedding_service,
21
+ qdrant_service,
22
+ advanced_rag,
23
+ hf_token: str,
24
+ feedback_tracking=None # Optional feedback tracking
25
+ ):
26
+ self.tools_service = tools_service
27
+ self.embedding_service = embedding_service
28
+ self.qdrant_service = qdrant_service
29
+ self.advanced_rag = advanced_rag
30
+ self.hf_token = hf_token
31
+ self.feedback_tracking = feedback_tracking
32
+
33
+ # Load system prompts
34
+ self.prompts = self._load_prompts()
35
+
36
+ def _load_prompts(self) -> Dict[str, str]:
37
+ """Load system prompts from files"""
38
+ prompts = {}
39
+ prompts_dir = "prompts"
40
+
41
+ for mode in ["sales_agent", "feedback_agent"]:
42
+ filepath = os.path.join(prompts_dir, f"{mode}.txt")
43
+ try:
44
+ with open(filepath, 'r', encoding='utf-8') as f:
45
+ prompts[mode] = f.read()
46
+ print(f"✓ Loaded prompt: {mode}")
47
+ except Exception as e:
48
+ print(f"⚠️ Error loading {mode} prompt: {e}")
49
+ prompts[mode] = ""
50
+
51
+ return prompts
52
+
53
+ def _get_native_tools(self, mode: str = "sales") -> List[Dict]:
54
+ """
55
+ Get tools formatted for native tool calling API.
56
+ Returns OpenAI-compatible tool definitions.
57
+ """
58
+ common_tools = [
59
+ {
60
+ "type": "function",
61
+ "function": {
62
+ "name": "search_events",
63
+ "description": "Tìm kiếm sự kiện phù hợp theo từ khóa, vibe, hoặc thời gian.",
64
+ "parameters": {
65
+ "type": "object",
66
+ "properties": {
67
+ "query": {"type": "string", "description": "Từ khóa tìm kiếm (VD: 'nhạc rock', 'hài kịch')"},
68
+ "vibe": {"type": "string", "description": "Vibe/Mood (VD: 'chill', 'sôi động', 'hẹn hò')"},
69
+ "time": {"type": "string", "description": "Thời gian (VD: 'cuối tuần này', 'tối nay')"}
70
+ }
71
+ }
72
+ }
73
+ },
74
+ {
75
+ "type": "function",
76
+ "function": {
77
+ "name": "get_event_details",
78
+ "description": "Lấy thông tin chi tiết (giá, địa điểm, thời gian) của sự kiện.",
79
+ "parameters": {
80
+ "type": "object",
81
+ "properties": {
82
+ "event_id": {"type": "string", "description": "ID của sự kiện (MongoDB ID)"}
83
+ },
84
+ "required": ["event_id"]
85
+ }
86
+ }
87
+ }
88
+ ]
89
+
90
+ sales_tools = [
91
+ {
92
+ "type": "function",
93
+ "function": {
94
+ "name": "save_lead",
95
+ "description": "Lưu thông tin khách hàng quan tâm (Lead).",
96
+ "parameters": {
97
+ "type": "object",
98
+ "properties": {
99
+ "email": {"type": "string", "description": "Email address"},
100
+ "phone": {"type": "string", "description": "Phone number"},
101
+ "interest": {"type": "string", "description": "What they're interested in"}
102
+ }
103
+ }
104
+ }
105
+ }
106
+ ]
107
+
108
+ feedback_tools = [
109
+ {
110
+ "type": "function",
111
+ "function": {
112
+ "name": "get_purchased_events",
113
+ "description": "Kiểm tra lịch sử các sự kiện user đã mua vé hoặc tham gia.",
114
+ "parameters": {
115
+ "type": "object",
116
+ "properties": {
117
+ "user_id": {"type": "string", "description": "ID của user"}
118
+ },
119
+ "required": ["user_id"]
120
+ }
121
+ }
122
+ },
123
+ {
124
+ "type": "function",
125
+ "function": {
126
+ "name": "save_feedback",
127
+ "description": "Lưu đánh giá/feedback của user về sự kiện.",
128
+ "parameters": {
129
+ "type": "object",
130
+ "properties": {
131
+ "event_id": {"type": "string", "description": "ID sự kiện"},
132
+ "rating": {"type": "integer", "description": "Số sao đánh giá (1-5)"},
133
+ "comment": {"type": "string", "description": "Nội dung nhận xét"}
134
+ },
135
+ "required": ["event_id", "rating"]
136
+ }
137
+ }
138
+ }
139
+ ]
140
+
141
+ if mode == "feedback":
142
+ return common_tools + feedback_tools
143
+ else:
144
+ return common_tools + sales_tools
145
+
146
+ async def chat(
147
+ self,
148
+ user_message: str,
149
+ conversation_history: List[Dict],
150
+ mode: str = "sales", # "sales" or "feedback"
151
+ user_id: Optional[str] = None,
152
+ access_token: Optional[str] = None, # For authenticated API calls
153
+ max_iterations: int = 3
154
+ ) -> Dict[str, Any]:
155
+ """
156
+ Main conversation loop with native tool calling
157
+
158
+ Args:
159
+ user_message: User's input
160
+ conversation_history: Previous messages [{"role": "user", "content": ...}, ...]
161
+ mode: "sales" or "feedback"
162
+ user_id: User ID (for feedback mode to check purchase history)
163
+ access_token: JWT token for authenticated API calls
164
+ max_iterations: Maximum tool call iterations to prevent infinite loops
165
+
166
+ Returns:
167
+ {
168
+ "message": "Bot response",
169
+ "tool_calls": [...], # List of tools called (for debugging)
170
+ "mode": mode
171
+ }
172
+ """
173
+ print(f"\n🤖 Agent Mode: {mode}")
174
+ print(f"👤 User Message: {user_message}")
175
+ print(f"🔑 Auth Info:")
176
+ print(f" - User ID: {user_id}")
177
+ print(f" - Access Token: {'✅ Received' if access_token else '❌ None'}")
178
+
179
+ # Store user_id and access_token for tool calls
180
+ self.current_user_id = user_id
181
+ self.current_access_token = access_token
182
+ if access_token:
183
+ print(f" - Stored access_token for tools: {access_token[:20]}...")
184
+ if user_id:
185
+ print(f" - Stored user_id for tools: {user_id}")
186
+
187
+ # Select system prompt (without tool instructions - native tools handle this)
188
+ system_prompt = self._get_system_prompt(mode)
189
+
190
+ # Get native tools for this mode
191
+ tools = self._get_native_tools(mode)
192
+
193
+ # Build conversation context
194
+ messages = self._build_messages(system_prompt, conversation_history, user_message)
195
+
196
+ # Agentic loop: LLM may call tools multiple times
197
+ tool_calls_made = []
198
+ current_response = None
199
+
200
+ for iteration in range(max_iterations):
201
+ print(f"\n🔄 Iteration {iteration + 1}")
202
+
203
+ # Call LLM with native tools
204
+ llm_result = await self._call_llm_with_tools(messages, tools)
205
+
206
+ # Check if this is a final text response or a tool call
207
+ if llm_result["type"] == "text":
208
+ current_response = llm_result["content"]
209
+ print(f"🧠 LLM Final Response: {current_response[:200]}...")
210
+ break
211
+
212
+ elif llm_result["type"] == "tool_calls":
213
+ # Process each tool call
214
+ for tool_call in llm_result["tool_calls"]:
215
+ tool_name = tool_call["function"]["name"]
216
+ arguments = json.loads(tool_call["function"]["arguments"])
217
+
218
+ print(f"🔧 Tool Called: {tool_name}")
219
+ print(f" Arguments: {arguments}")
220
+
221
+ # Auto-inject real user_id for get_purchased_events
222
+ if tool_name == 'get_purchased_events' and self.current_user_id:
223
+ print(f"🔄 Auto-injecting real user_id: {self.current_user_id}")
224
+ arguments['user_id'] = self.current_user_id
225
+
226
+ # Execute tool
227
+ tool_result = await self.tools_service.execute_tool(
228
+ tool_name,
229
+ arguments,
230
+ access_token=self.current_access_token
231
+ )
232
+
233
+ # Record tool call
234
+ tool_calls_made.append({
235
+ "function": tool_name,
236
+ "arguments": arguments,
237
+ "result": tool_result
238
+ })
239
+
240
+ # Handle RAG search specially
241
+ if isinstance(tool_result, dict) and tool_result.get("action") == "run_rag_search":
242
+ tool_result = await self._execute_rag_search(tool_result["query"])
243
+
244
+ # Add assistant's tool call to messages
245
+ messages.append({
246
+ "role": "assistant",
247
+ "content": None,
248
+ "tool_calls": [{
249
+ "id": tool_call.get("id", f"call_{iteration}"),
250
+ "type": "function",
251
+ "function": {
252
+ "name": tool_name,
253
+ "arguments": json.dumps(arguments)
254
+ }
255
+ }]
256
+ })
257
+
258
+ # Add tool result to messages
259
+ messages.append({
260
+ "role": "tool",
261
+ "tool_call_id": tool_call.get("id", f"call_{iteration}"),
262
+ "content": self._format_tool_result({"result": tool_result})
263
+ })
264
+
265
+ elif llm_result["type"] == "error":
266
+ print(f"⚠️ LLM Error: {llm_result['content']}")
267
+ current_response = "Xin lỗi, tôi đang gặp chút vấn đề kỹ thuật. Bạn thử lại sau nhé!"
268
+ break
269
+
270
+ # Get final response if we hit max iterations
271
+ final_response = current_response or "Tôi cần thêm thông tin để hỗ trợ bạn."
272
+
273
+ return {
274
+ "message": final_response,
275
+ "tool_calls": tool_calls_made,
276
+ "mode": mode
277
+ }
278
+
279
+ def _get_system_prompt(self, mode: str) -> str:
280
+ """Get system prompt for selected mode (without tool instructions)"""
281
+ prompt_key = f"{mode}_agent" if mode in ["sales", "feedback"] else "sales_agent"
282
+ return self.prompts.get(prompt_key, "")
283
+
284
+ def _build_messages(
285
+ self,
286
+ system_prompt: str,
287
+ history: List[Dict],
288
+ user_message: str
289
+ ) -> List[Dict]:
290
+ """Build messages array for LLM"""
291
+ messages = [{"role": "system", "content": system_prompt}]
292
+
293
+ # Add conversation history
294
+ messages.extend(history)
295
+
296
+ # Add current user message
297
+ messages.append({"role": "user", "content": user_message})
298
+
299
+ return messages
300
+
301
+ async def _call_llm_with_tools(self, messages: List[Dict], tools: List[Dict]) -> Dict:
302
+ """
303
+ Call HuggingFace LLM with native tool calling support
304
+
305
+ Returns:
306
+ {"type": "text", "content": "..."} for text responses
307
+ {"type": "tool_calls", "tool_calls": [...]} for tool call requests
308
+ {"type": "error", "content": "..."} for errors
309
+ """
310
+ try:
311
+ from huggingface_hub import AsyncInferenceClient
312
+
313
+ # Create async client - Qwen2.5 works on default HuggingFace API
314
+ client = AsyncInferenceClient(token=self.hf_token)
315
+
316
+ # Call HF API with chat completion and native tools
317
+ # Qwen2.5-72B-Instruct: Best for Vietnamese - state-of-the-art performance
318
+ response = await client.chat_completion(
319
+ messages=messages,
320
+ model="Qwen/Qwen2.5-72B-Instruct", # Best for Vietnamese + tool calling
321
+ max_tokens=1024, # Increased to prevent truncation
322
+ temperature=0.7,
323
+ tools=tools,
324
+ tool_choice="auto" # Let model decide when to use tools
325
+ )
326
+
327
+ # Check if the model made tool calls
328
+ message = response.choices[0].message
329
+
330
+ if message.tool_calls:
331
+ print(f"🔧 Native tool calls detected: {len(message.tool_calls)}")
332
+ return {
333
+ "type": "tool_calls",
334
+ "tool_calls": [
335
+ {
336
+ "id": tc.id,
337
+ "function": {
338
+ "name": tc.function.name,
339
+ "arguments": tc.function.arguments
340
+ }
341
+ }
342
+ for tc in message.tool_calls
343
+ ]
344
+ }
345
+ else:
346
+ # Regular text response
347
+ return {
348
+ "type": "text",
349
+ "content": message.content or ""
350
+ }
351
+
352
+ except Exception as e:
353
+ print(f"⚠️ LLM Call Error: {e}")
354
+ return {
355
+ "type": "error",
356
+ "content": str(e)
357
+ }
358
+
359
+ def _format_tool_result(self, tool_result: Dict) -> str:
360
+ """Format tool result for feeding back to LLM"""
361
+ result = tool_result.get("result", {})
362
+
363
+ # Special handling for purchased events list
364
+ if isinstance(result, list):
365
+ print(f"\n🔍 Formatting {len(result)} items for LLM")
366
+ if not result:
367
+ return "Không tìm thấy dữ liệu nào phù hợp."
368
+
369
+ # Format each event clearly
370
+ formatted_events = []
371
+ for i, event in enumerate(result, 1):
372
+ # Handle both object/dict and string results
373
+ if isinstance(event, str):
374
+ formatted_events.append(f"{i}. {event}")
375
+ continue
376
+
377
+ event_info = []
378
+ event_info.append(f"Event {i}:")
379
+
380
+ # Extract key fields
381
+ if 'eventName' in event:
382
+ event_info.append(f" Name: {event['eventName']}")
383
+ if 'eventCode' in event:
384
+ event_info.append(f" Code: {event['eventCode']}")
385
+ if '_id' in event:
386
+ event_info.append(f" ID: {event['_id']}")
387
+ if 'startTimeEventTime' in event:
388
+ event_info.append(f" Date: {event['startTimeEventTime']}")
389
+ # Handle RAG result payload structure
390
+ if 'texts' in event: # Flat text from RAG
391
+ event_info.append(f" Content: {event['texts']}")
392
+ if 'id_use' in event:
393
+ event_info.append(f" ID: {event['id_use']}")
394
+
395
+ formatted_events.append("\n".join(event_info))
396
+
397
+ formatted = "Tool Results:\n\n" + "\n\n".join(formatted_events)
398
+ # print(f"📤 Sending to LLM:\n{formatted}") # Reduce noise
399
+ return formatted
400
+
401
+ # Default formatting for other results
402
+ if isinstance(result, dict):
403
+ # Pretty print key info
404
+ formatted = []
405
+ for key, value in result.items():
406
+ if key not in ["success", "error"]:
407
+ formatted.append(f"{key}: {value}")
408
+ return "\n".join(formatted) if formatted else json.dumps(result)
409
+
410
+ return str(result)
411
+
412
+ async def _execute_rag_search(self, query_params: Dict) -> str:
413
+ """
414
+ Execute RAG search with Multi-Stage Fallback Strategy
415
+ Called when LLM wants to search_events
416
+ """
417
+ query = query_params.get("query", "").strip()
418
+ vibe = query_params.get("vibe", "").strip()
419
+ time = query_params.get("time", "").strip()
420
+
421
+ # Strategy: Try specific -> broad
422
+ search_strategies = []
423
+
424
+ # 1. Full combination (Specific)
425
+ if query or vibe:
426
+ full_query = f"{query} {vibe} {time}".strip()
427
+ search_strategies.append(("Full Context", full_query))
428
+
429
+ # 2. Main keyword only (Broad) - Critical for terms like "rượu"
430
+ if query and len(search_strategies) > 0 and query != full_query:
431
+ search_strategies.append(("Keyword Only", query))
432
+
433
+ # 3. Vibe only (Fallback)
434
+ if vibe and len(search_strategies) > 0 and vibe != full_query:
435
+ search_strategies.append(("Vibe Only", vibe))
436
+
437
+ print(f"🔍 RAG Search Plan: {[s[0] for s in search_strategies]}")
438
+
439
+ final_results = []
440
+ seen_ids = set()
441
+
442
+ for strategy_name, search_text in search_strategies:
443
+ if not search_text:
444
+ continue
445
+
446
+ print(f"👉 Trying strategy: {strategy_name} ('{search_text}')")
447
+
448
+ # Use embedding + qdrant
449
+ embedding = self.embedding_service.encode_text(search_text)
450
+ results = self.qdrant_service.search(
451
+ query_embedding=embedding,
452
+ limit=5
453
+ )
454
+
455
+ # Deduplicate and add results
456
+ count = 0
457
+ for res in results:
458
+ doc_id = res['id']
459
+ if doc_id not in seen_ids:
460
+ seen_ids.add(doc_id)
461
+ final_results.append(res)
462
+ count += 1
463
+
464
+ print(f" Found {count} new results (Total: {len(final_results)})")
465
+
466
+ # If we have enough results, stop
467
+ if len(final_results) >= 5:
468
+ break
469
+
470
+ # Format results
471
+ formatted = []
472
+ for i, result in enumerate(final_results[:5], 1): # Limit to top 5
473
+ payload = result.get("payload", {})
474
+ texts = payload.get("texts", [])
475
+ text = texts[0] if texts else ""
476
+ event_id = payload.get("id_use", "")
477
+
478
+ if not text:
479
+ continue
480
+
481
+ # Clean and truncate text for context window
482
+ clean_text = text.replace("\n", " ").strip()
483
+ formatted.append(f"Event Found: {clean_text[:300]}... (ID: {event_id})")
484
+
485
+ if not formatted:
486
+ print("❌ RAG Search returned 0 usable results after all strategies")
487
+ return "SYSTEM_MESSAGE: Không tìm thấy sự kiện nào trong cơ sở dữ liệu phù hợp với yêu cầu. Hãy báo lại cho khách hàng: 'Hiện tại mình chưa tìm thấy sự kiện nào phù hợp với yêu cầu này, bạn thử đổi tiêu chí xem sao nhé?'"
488
+
489
+ print(f"✅ Returning {len(formatted)} events to LLM")
490
+ return "\n\n".join(formatted)