Spaces:
Running
Running
# filepath: c:\Users\Dell\Monil\Apps\code\Projects\space-songporter\OCR\nlp_service.py | |
import json | |
from model_setup import zero_shot, ner # Assuming model_setup.py exists and is correct | |
from utils import parse_entities # Assuming utils.py exists and is correct | |
from config import CATEGORY_KEYWORDS, QUERY_KEYWORDS # Import categories and query keywords from config | |
def analyze_text(text: str) -> dict: | |
""" | |
Analyzes the input text for intent, entities, and category. | |
Args: | |
text: The input text string. | |
Returns: | |
A dictionary containing the analysis results (intent, category, amount, etc.) | |
or an error message. | |
""" | |
if not text: | |
return { | |
"status": "failed", | |
"message": "Input text cannot be empty." | |
} | |
print(f"NLP Service: Processing text: {text}") | |
# Step 1: Intent classification | |
try: | |
candidate_labels = ["expense", "investment", "query", "limit-setting", "income", "other"] | |
intent_result = zero_shot(text, candidate_labels=candidate_labels) | |
intent = intent_result["labels"][0] | |
score = intent_result["scores"][0] | |
print(f"NLP Service: Intent classification: {intent} (Score: {score:.2f})") | |
except Exception as e: | |
print(f"NLP Service: Error during intent classification: {e}") | |
return { | |
"status": "failed", | |
"message": "Intent classification failed", | |
"error": str(e) | |
} | |
# Step 2: Check for Query Keywords or Query Intent | |
text_lower = text.lower() | |
is_query_keyword_present = any(kw in text_lower for kw in QUERY_KEYWORDS) | |
if intent == "query" or is_query_keyword_present: | |
if is_query_keyword_present and intent != "query": | |
print(f"NLP Service: Query keyword detected, overriding initial intent '{intent}'. Fallback triggered.") | |
else: | |
print(f"NLP Service: Intent classified as '{intent}' or query keyword found. Fallback route triggered.") | |
# Return fallback status | |
return { | |
"status": "fallback_required", | |
"message": "Intent requires further processing (query detected).", | |
"original_text": text, | |
"classified_intent": "query" # Standardize to query if fallback is triggered | |
} | |
# Step 3: Entity extraction (for non-fallback intents) | |
try: | |
entities = ner(text) | |
print(f"NLP Service: NER entities: {entities}") | |
# --- FIX: Pass the original 'text' as 'full_text' --- | |
amount, currency, item = parse_entities(entities, full_text=text) | |
print(f"NLP Service: Parsed entities: Amount={amount}, Currency={currency}, Item={item}") | |
except Exception as e: | |
print(f"NLP Service: Error during entity extraction: {e}") | |
# Decide if you want to return an error or proceed with partial data | |
amount, currency, item = None, None, None # Default to None on error | |
# Optionally, log the traceback for debugging | |
import traceback | |
traceback.print_exc() | |
# Step 4: Category matching using config.py | |
category = "Misc" # Default | |
item_lower = item.lower() if item else "" | |
# Check intent first for Income/Investment categories | |
if intent == "income": | |
category = "Income" | |
elif intent == "investment": | |
category = "Investment" | |
else: # Only check keywords if not already classified as Income/Investment by intent | |
for cat, keywords in CATEGORY_KEYWORDS.items(): | |
# Skip Income/Investment keywords here as intent handles them | |
if cat in ["Income", "Investment"]: | |
continue | |
if any(kw in text_lower or (item_lower and kw in item_lower) for kw in keywords): | |
category = cat | |
break # Stop after first match | |
# Refine intent based on keywords if initial classification was 'other' or potentially wrong | |
if intent != "income" and category == "Income": | |
print(f"NLP Service: Correcting intent to 'income' based on keywords/category.") | |
intent = "income" | |
elif intent != "investment" and category == "Investment": | |
print(f"NLP Service: Correcting intent to 'investment' based on keywords/category.") | |
intent = "investment" | |
# If no specific category matched but intent is expense/other, ensure category isn't Income/Investment | |
elif category in ["Income", "Investment"] and intent not in ["income", "investment"]: | |
category = "Misc" # Revert category if intent doesn't match | |
print(f"NLP Service: Assigned category: {category}") | |
# Final successful response structure | |
return { | |
"status": "success", | |
"type": intent, | |
"category": category, | |
"amount": amount, | |
"currency": currency, | |
"item": item | |
} | |
# Example usage (for testing nlp_service.py directly) | |
if __name__ == '__main__': | |
test_cases = [ | |
"spent 5 eur on coffee", | |
"how much did I spend last month", | |
"salary credited 50000", | |
"invested 1000 in stocks", | |
"paid 20 usd for lunch", | |
"got groceries for 50 dollars", | |
"what was my total spending on food?", | |
"received 200 GBP deposit" | |
] | |
for case in test_cases: | |
print(f"\n--- Testing: '{case}' ---") | |
result = analyze_text(case) | |
print(json.dumps(result, indent=2)) | |