File size: 2,061 Bytes
07b50c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import re

# --- NLP Configuration ---
CURRENCY_SYMBOLS = ["₹", "$", "€", "£"] # Expand as needed

# More robust regex to find monetary values even if spaCy misses MONEY entity
# Added a group to capture standalone numbers potentially without currency symbols nearby
FALLBACK_AMOUNT_REGEX = re.compile(r'([\$€£₹]|\b(?:rs|usd|eur|gbp))\s?([\d,]+(?:\.\d{1,2})?)\b|\b([\d,]+(?:\.\d{1,2})?)\s?([\$€£₹]|\b(?:rupees|rs|dollars|euros|pounds|usd|eur|gbp))\b|\b([\d,]+(?:\.\d{1,2})?)\b', re.IGNORECASE)

# Consolidated Category Keywords
CATEGORY_KEYWORDS = {
    "Coffee": ["coffee", "latte", "cappuccino", "starbucks", "cafe", "café", "espresso", "mocha", "ccd"],
    "Food": ["food", "meal", "lunch", "dinner", "snack", "restaurant", "dining", "sandwich", "burger", "pizza"],
    "Groceries": ["groceries", "supermarket", "vegetables", "milk", "market", "zepto", "blinkit", "bigbasket"],
    "Entertainment": ["movie", "cinema", "concert", "game", "netflix", "spotify", "tickets", "fun"],
    "Transport": ["travel", "taxi", "flight", "train", "bus", "uber", "ola", "fuel", "gas", "lyft", "cab", "ticket", "metro", "auto", "rickshaw", "commute"], # Combined Travel/Transport
    "Shopping": ["shop", "shopping", "clothes", "electronics", "mall", "amazon", "flipkart", "purchase", "order", "store"],
    "Utilities": ["utility", "utilities", "bill", "electricity", "water", "internet", "phone", "recharge"],
    "Rent": ["rent", "lease"],
    "Income": ["salary", "received", "credited", "deposit", "income"], # Added income keyword
    "Investment": ["invest", "stock", "shares", "mutual fund", "sip", "investment"], # Added investment keyword
    # "Misc" can be the default if no keywords match
}

# Keywords for intent detection (less critical if using zero-shot, but can be helpers)
QUERY_KEYWORDS = ["how much", "show me", "list", "what are", "total", "summary", "spending", "history", "report", "biggest", "view"]
ADD_EXPENSE_VERBS = ["spent", "bought", "paid", "cost", "charged", "expensed", "got", "had"] # Verbs often associated with spending