import streamlit as st import pdfplumber import pandas as pd # Define category mapping based on transaction keywords CATEGORY_MAPPING = { "Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"], "Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"], "Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"], "Rent": ["Apartment", "Rent", "Landlord", "Lease"], "Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"], "Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"], "Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"], "Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"], "Other": [] } # Function to classify transactions based on description def classify_transaction(description): description = str(description).lower() for category, keywords in CATEGORY_MAPPING.items(): if any(keyword.lower() in description for keyword in keywords): return category return "Other" # Function to process uploaded PDF and categorize transactions def process_pdf(file): if file is None: st.error("No file uploaded.") return None # Extract text from PDF with pdfplumber.open(file) as pdf: text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) # Extract transactions (Modify based on statement format) lines = text.split("\n") transactions = [line for line in lines if any(char.isdigit() for char in line)] # Convert to DataFrame df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"]) # Ensure amount column is numeric df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce") # Ensure no missing descriptions df["Description"] = df["Description"].fillna("Unknown") # Apply classification df["Category"] = df["Description"].apply(classify_transaction) # Summarize total spending per category category_summary = df.groupby("Category")["Amount"].sum().reset_index() return df, category_summary # Return full transactions and summary # Streamlit UI st.title("📄 Credit Card Statement Classifier") st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.") uploaded_file = st.file_uploader("Upload PDF", type=["pdf"]) if uploaded_file is not None: st.success("✅ File uploaded successfully!") # Process and display transactions df_result, category_summary = process_pdf(uploaded_file) if df_result is not None: st.write("### 📊 Classified Transactions:") st.dataframe(df_result) # Display detailed transactions st.write("### 💰 Spending Summary by Category:") st.dataframe(category_summary) # Display spending summary