File size: 5,391 Bytes
07b50c0
 
 
 
2a8532b
4c9f681
07b50c0
4c9f681
07b50c0
4c9f681
07b50c0
 
4c9f681
07b50c0
 
 
4c9f681
07b50c0
 
 
 
4c9f681
 
07b50c0
6c095fc
07b50c0
6c095fc
07b50c0
 
 
 
 
6c095fc
07b50c0
 
 
 
 
 
4c9f681
2a8532b
 
 
 
 
 
 
 
 
 
 
07b50c0
2a8532b
 
07b50c0
2a8532b
07b50c0
4c9f681
07b50c0
 
 
 
2a8532b
 
07b50c0
 
 
 
 
2a8532b
 
 
07b50c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c9f681
07b50c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# filepath: c:\Users\Dell\Monil\Apps\code\Projects\space-songporter\OCR\nlp_service.py
import json
from model_setup import zero_shot, ner # Assuming model_setup.py exists and is correct
from utils import parse_entities      # Assuming utils.py exists and is correct
from config import CATEGORY_KEYWORDS, QUERY_KEYWORDS  # Import categories and query keywords from config

def analyze_text(text: str) -> dict:
    """
    Analyzes the input text for intent, entities, and category.

    Args:
        text: The input text string.

    Returns:
        A dictionary containing the analysis results (intent, category, amount, etc.)
        or an error message.
    """
    if not text:
        return {
            "status": "failed",
            "message": "Input text cannot be empty."
        }

    print(f"NLP Service: Processing text: {text}")

    # Step 1: Intent classification
    try:
        candidate_labels = ["expense", "investment", "query", "limit-setting", "income", "other"]
        intent_result = zero_shot(text, candidate_labels=candidate_labels)
        intent = intent_result["labels"][0]
        score = intent_result["scores"][0]
        print(f"NLP Service: Intent classification: {intent} (Score: {score:.2f})")
    except Exception as e:
        print(f"NLP Service: Error during intent classification: {e}")
        return {
            "status": "failed",
            "message": "Intent classification failed",
            "error": str(e)
        }

    # Step 2: Check for Query Keywords or Query Intent
    text_lower = text.lower()
    is_query_keyword_present = any(kw in text_lower for kw in QUERY_KEYWORDS)

    if intent == "query" or is_query_keyword_present:
        if is_query_keyword_present and intent != "query":
            print(f"NLP Service: Query keyword detected, overriding initial intent '{intent}'. Fallback triggered.")
        else:
            print(f"NLP Service: Intent classified as '{intent}' or query keyword found. Fallback route triggered.")
        
        # Return fallback status
        return {
            "status": "fallback_required",
            "message": "Intent requires further processing (query detected).",
            "original_text": text,
            "classified_intent": "query" # Standardize to query if fallback is triggered
        }

    # Step 3: Entity extraction (for non-fallback intents)
    try:
        entities = ner(text)
        print(f"NLP Service: NER entities: {entities}")
        # --- FIX: Pass the original 'text' as 'full_text' ---
        amount, currency, item = parse_entities(entities, full_text=text) 
        print(f"NLP Service: Parsed entities: Amount={amount}, Currency={currency}, Item={item}")
    except Exception as e:
        print(f"NLP Service: Error during entity extraction: {e}")
        # Decide if you want to return an error or proceed with partial data
        amount, currency, item = None, None, None # Default to None on error
        # Optionally, log the traceback for debugging
        import traceback
        traceback.print_exc()

    # Step 4: Category matching using config.py
    category = "Misc" # Default
    item_lower = item.lower() if item else ""

    # Check intent first for Income/Investment categories
    if intent == "income":
        category = "Income"
    elif intent == "investment":
        category = "Investment"
    else: # Only check keywords if not already classified as Income/Investment by intent
        for cat, keywords in CATEGORY_KEYWORDS.items():
            # Skip Income/Investment keywords here as intent handles them
            if cat in ["Income", "Investment"]:
                continue
            if any(kw in text_lower or (item_lower and kw in item_lower) for kw in keywords):
                category = cat
                break # Stop after first match

    # Refine intent based on keywords if initial classification was 'other' or potentially wrong
    if intent != "income" and category == "Income":
        print(f"NLP Service: Correcting intent to 'income' based on keywords/category.")
        intent = "income"
    elif intent != "investment" and category == "Investment":
        print(f"NLP Service: Correcting intent to 'investment' based on keywords/category.")
        intent = "investment"
    # If no specific category matched but intent is expense/other, ensure category isn't Income/Investment
    elif category in ["Income", "Investment"] and intent not in ["income", "investment"]:
         category = "Misc" # Revert category if intent doesn't match

    print(f"NLP Service: Assigned category: {category}")

    # Final successful response structure
    return {
        "status": "success",
        "type": intent,
        "category": category,
        "amount": amount,
        "currency": currency,
        "item": item
    }

# Example usage (for testing nlp_service.py directly)
if __name__ == '__main__':
    test_cases = [
        "spent 5 eur on coffee",
        "how much did I spend last month",
        "salary credited 50000",
        "invested 1000 in stocks",
        "paid 20 usd for lunch",
        "got groceries for 50 dollars",
        "what was my total spending on food?",
        "received 200 GBP deposit"
    ]
    for case in test_cases:
        print(f"\n--- Testing: '{case}' ---")
        result = analyze_text(case)
        print(json.dumps(result, indent=2))