Spaces:
Sleeping
Sleeping
| import openai | |
| import os | |
| import dirtyjson as json | |
| from typing import Dict, Any, Optional, Tuple | |
| from enum import Enum | |
| from pydantic import BaseModel | |
| class IntentType(str, Enum): | |
| TRANSACTION = "transaction" | |
| QUERY = "query" | |
| SEMANTIC_SEARCH = "semantic_search" | |
| GENERAL_INFO = "general_info" | |
| class IntentResult(BaseModel): | |
| intent: IntentType | |
| confidence: float | |
| reasoning: str | |
| entities_hint: Optional[str] = None | |
| class IntentClassifier: | |
| def __init__(self, api_key: Optional[str] = None): | |
| """Initialize OpenAI client for intent classification""" | |
| self.client = openai.OpenAI( | |
| api_key=api_key or os.getenv('OPENAI_API_KEY') | |
| ) | |
| def classify_intent(self, user_message: str) -> IntentResult: | |
| """ | |
| Classify user intent using OpenAI API | |
| Returns: IntentResult with intent type, confidence, and reasoning | |
| """ | |
| system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage. | |
| Given a user message, classify it into one of these intents: | |
| 1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables | |
| - Examples: "How many USB drives did we buy?" (counts from purchases table) | |
| - Examples: "What's the total value of all sales?" (sum from sales table) | |
| - Examples: "Show me recent transactions" (list from transactions table) | |
| - Examples: "List all customers" (data from customers table) | |
| - Key indicators: Asking for counts, totals, lists, recent data from business transactions | |
| - Must be answerable from structured database tables (purchases, sales, customers, suppliers, products) | |
| 2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data | |
| - Examples: "What does Mark need to do?" (searching for task/context info) | |
| - Examples: "Find events related to supplier meetings" (contextual search) | |
| - Examples: "When do I have the meeting with George?" (calendar/scheduling info) | |
| - Examples: "Show me similar purchases to this one" (similarity search) | |
| - Examples: "What did we discuss in the last meeting?" (meeting notes/context) | |
| - Key indicators: Questions about tasks, meetings, discussions, or contextual information | |
| - Information that would NOT be in structured database tables | |
| 3. **TRANSACTION**: User wants to record a business transaction (purchase or sale) | |
| - Examples: "Add a purchase of 20 USB drives from TechMart at €5 each" | |
| - Examples: "Sold 10 laptops to John Smith at €800 each" | |
| - Contains: product names, quantities, suppliers/customers, prices | |
| - Action: Recording new business data | |
| 4. **GENERAL_INFO**: User wants to store general business information or notes | |
| - It cannot be a question. | |
| - Examples: "Meeting with new supplier scheduled for next week" | |
| - Examples: "Remember to check inventory levels before next order" | |
| - Examples: "Mark needs to call the supplier tomorrow" | |
| - Contains: notes, reminders, general business information, task assignments | |
| Return your response in this exact JSON format: | |
| { | |
| "intent": "transaction|query|semantic_search|general_info", | |
| "confidence": 0.0-1.0, | |
| "reasoning": "Brief explanation of why you chose this intent", | |
| "entities_hint": "Optional: Key entities you detected (for transaction intent)" | |
| } | |
| Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence.""" | |
| user_prompt = f'Classify the intent of this user message: "{user_message}"' | |
| try: | |
| response = self.client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| temperature=0.1, | |
| max_tokens=300 | |
| ) | |
| response_text = response.choices[0].message.content.strip() | |
| # Clean JSON response more carefully | |
| if response_text.startswith("```json"): | |
| response_text = response_text[7:] | |
| if response_text.startswith("```"): | |
| response_text = response_text[3:] | |
| if response_text.endswith("```"): | |
| response_text = response_text[:-3] | |
| response_text = response_text.strip() | |
| # Parse JSON response | |
| try: | |
| result_dict = json.loads(response_text) | |
| # Validate intent value | |
| intent_value = result_dict.get("intent", "").lower() | |
| if intent_value not in [e.value for e in IntentType]: | |
| print(f"Invalid intent value: {intent_value}") | |
| return self._fallback_classification(user_message, f"Invalid intent: {intent_value}") | |
| return IntentResult( | |
| intent=IntentType(intent_value), | |
| confidence=float(result_dict.get("confidence", 0.5)), | |
| reasoning=result_dict.get("reasoning", "No reasoning provided"), | |
| entities_hint=result_dict.get("entities_hint") | |
| ) | |
| except Exception as e: | |
| # Fallback if JSON parsing fails | |
| print(f"JSON parsing error: {e}") | |
| print(f"Raw response: {response_text}") | |
| return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}") | |
| except Exception as e: | |
| print(f"Error in intent classification: {e}") | |
| return self._fallback_classification(user_message, str(e)) | |
| def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult: | |
| """Fallback classification when OpenAI API fails""" | |
| message_lower = user_message.lower() | |
| # Simple keyword-based fallback | |
| transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "€", "$"] | |
| query_keywords = ["how many", "total", "list all", "recent transactions", "count"] | |
| search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"] | |
| if any(keyword in message_lower for keyword in transaction_keywords): | |
| intent = IntentType.TRANSACTION | |
| confidence = 0.6 | |
| elif any(keyword in message_lower for keyword in query_keywords): | |
| intent = IntentType.QUERY | |
| confidence = 0.6 | |
| elif any(keyword in message_lower for keyword in search_keywords): | |
| intent = IntentType.SEMANTIC_SEARCH | |
| confidence = 0.6 | |
| else: | |
| intent = IntentType.GENERAL_INFO | |
| confidence = 0.5 | |
| return IntentResult( | |
| intent=intent, | |
| confidence=confidence, | |
| reasoning=f"Fallback classification due to API error: {error_info[:100]}", | |
| entities_hint=None | |
| ) | |
| def get_intent_description(self, intent: IntentType) -> str: | |
| """Get human-readable description of intent type""" | |
| descriptions = { | |
| IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)", | |
| IntentType.QUERY: "Retrieving or analyzing data from the database", | |
| IntentType.SEMANTIC_SEARCH: "Finding similar events or information", | |
| IntentType.GENERAL_INFO: "Storing general business information or notes" | |
| } | |
| return descriptions.get(intent, "Unknown intent type") | |
| def batch_classify(self, messages: list[str]) -> list[IntentResult]: | |
| """Classify multiple messages efficiently""" | |
| results = [] | |
| for message in messages: | |
| result = self.classify_intent(message) | |
| results.append(result) | |
| return results |