receipt_scanner / code.py
lisstasy's picture
Update code.py
b4fcc21 verified
######################################################################
### SET UP ###
######################################################################
import numpy as np
import cv2
import pandas as pd
import json
from pydantic import BaseModel, Field
from typing import List, Union
from datetime import date, time,datetime
from enum import Enum
from paddleocr import PaddleOCR
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
import uuid
import os
import plotly.express as px
import plotly.graph_objects as go
import base64
from PIL import Image
from io import BytesIO
######################################################################
### EXMAPLES ###
######################################################################
example_cat_1= {
"store": "HiperDino",
"address": "9238-SD Bernardo de la torre",
"city": "Tafira Baja",
"phone": "928493638",
"receipt_no": "2024/923813-00060866",
"date": "15/04/2024",
"time": "16:01",
"items": [
{"name": "FRESA TARINA 500 GR", "unit": 1, "price": 1.59, "amount": 1.59, "category": "fruits"},
{"name": "HIPERDINO ACEITUNA R/ANCHOA LATA 350", "unit": 1, "price": 0.95, "amount": 0.95, "category": "canned_goods"},
{"name": "DESPERADOS CERVEZA TOQUE TEQUILA BOT", "unit": 1, "price": 1.05, "amount": 1.05, "category": "beverages"},
{"name": "HIPERDINO CENTRO JAMON SERRANO BODEG", "unit": 0.310, "price": 13.62, "amount": 4.22, "category": "protein_foods"},
{"name": "MONTESANO JAMON COCIDO SELECCION KG", "unit": 0.308, "price": 8.74, "amount": 2.15, "category": "protein_foods"}
],
"total": 9.96,
"number_items": 5,
"payment_method": "tarjeta"
}
example_cat_2 = {
"store": "SPAR TAFIRA",
"address": "C/. Bruno Naranjo DIAZ 9A-9B",
"city": "Tafira Baja",
"phone": "928 351 616",
"receipt_no": "014\\002-18965",
"date": "06/04/2024",
"time": "15:23",
"items": [
{"name": "CLIPPER MANZ.1.5L.", "unit": 1, "price": 1.49, "amount": 1.49, "category": "beverages"},
{"name": "PLATANO PRIMERA GR", "unit": 1.40, "price": 1.99, "amount": 2.79, "category": "fruits"},
{"name": "MANZANA PINK LADY GR", "unit": 1, "price": 2.99, "amount": 2.99, "category": "fruits"},
{"name": "SALSA.BARI.PES.GEN.1", "unit": 1, "price": 3.10, "amount": 3.10, "category": "condiments"},
{"name": "GOFIO B.LUGAR MIL.FU", "unit": 1, "price": 1.85, "amount": 1.85, "category": "grains"},
{"name": "ZUM.DISF.D.SIMON PIN", "unit": 1, "price": 1.75, "amount": 1.75, "category": "beverages"},
{"name": "LECHE.GRNJ.FLR.UHT.", "unit": 1, "price": 1.15, "amount": 1.15, "category": "dairy"}
],
"total": 15.12,
"number_items": 7,
"payment_method": "tarjeta"
}
example_cat_4 = {
"store": "MERCADONA",
"address": "AVDA. PINTOR FELO MONZON (C.C. 7 PALMAS) S/N",
"city": "35019 LAS PALMAS DE GRAN CANARIA",
"phone": "928411755",
"receipt_no": "2185-013-6970Z2",
"date": "03/04/2024",
"time": "21:22",
"items": [
{ "name": "DETERG HIPO COLONIA", "unit": 1, "price": 3.30, "amount": 3.30, "category": "household"},
{ "name": "SOLOMILLO POLLO CONG", "unit": 3, "price": 4.50, "amount": 13.50, "category": "protein_foods"},
{ "name": "JAMONCITO BARBACOA", "unit": 1, "price": 2.32, "amount": 2.32, "category": "protein_foods"},
{ "name": "JAMONCITO BARBACOA", "unit": 1, "price": 2.76, "amount": 2.76, "category": "protein_foods"},
{ "name": "NUEZ NATURAL", "unit": 1, "price": 2.00, "amount": 2.00, "category": "nuts_and_seeds"},
{ "name": "QUESU COTIAGE", "unit": 2, "price": 1.25, "amount": 2.50, "category": "dairy"},
{ "name": "POLLO ENTERO LIMPIO", "unit": 1, "price": 6.52, "amount": 6.52, "category": "protein_foods"},
{ "name": "PAPEL VEGETAL 30H", "unit": 1, "price": 1.70, "amount": 1.70, "category": "household"},
{ "name": "BEBIDA AVELLANAS", "unit": 1, "price": 1.30, "amount": 1.30, "category": "beverages"},
{ "name": "INFUSION DORMIR", "unit": 1, "price": 1.05, "amount": 1.05, "category": "beverages"},
{ "name": "LECHE DE COCO", "unit": 1, "price": 1.40, "amount": 1.40, "category": "beverages"},
{ "name": "QUESO UNTAR LIGHT", "unit": 1, "price": 1.35, "amount": 1.35, "category": "dairy"},
{ "name": "RULITO CABRA", "unit": 1, "price": 2.45, "amount": 2.45, "category": "dairy"},
{ "name": "GRIEGO LIGERO", "unit": 1, "price": 1.65, "amount": 1.65, "category": "dairy"},
{ "name": "BOLSA PLASTICO", "unit": 1, "price": 0.15, "amount": 0.15, "category": "household"}
],
"total": 43.95,
"number_items": 15,
"payment_method": "tarjeta"
}
receipt_texts_1 = [
'HiperDino',
'Las mcjores precios de Canarias',
'DINOSOL SUPERMERCADOS. S.L',
'C.I.F.B61742565',
'9238-SD BERNARD0 DE LA T0RRE',
'Te1éfono:928493638',
'Centro Vend. Documento',
'Fecha',
'Hora',
'9238 7868352024/923813-0006086615/04/2024 16:01',
'ARTICULO',
'IMPORTE',
'FRESA TARRINA 500 GR',
'1,59',
'HIPERDINO ACEITUNA R/ANCHOA LATA 350',
'0,95',
'DESPERADOS CERVEZA TOQUE TEQUILA BOT',
'1,05',
'HIPERDINO CENTRO JAMON SERRANO BODEG',
'0.310x13,62€/kg',
'4,22',
'MONTESANO JAMON COCIDO SELECCION KG',
'0,308 x 8,74 €/kg',
'Dto.0,54€',
'2,15',
'Total Articulos: 5',
'TOTAL COMPRA:',
'9,96',
'Detalle de pagos',
'EFECTIVO',
'0,00',
'TARJETA CREDITO',
'9,96',
'EMPLEAD0:12789.TICKET_P.E.203659',
'HORA:160142',
'FECHA-15/04/2024',
'IMP0RTE9,96',
'TARJETAxxxxxxxx*xxx5597',
'087663',
'CAPTURA CHIP / AUTORIZACION:',
'LABEL: Mastercard',
'ARC: 00',
'ATC:004F',
'AID:A0000000041010',
'AUTENTICACION: Contact1ess EMV',
'DCC INTERNACIONAL/REDSYS PCI',
'COM. PE: 154197156',
'TER. PE: 00000001',
'SES. PE:15042024001'
]
receipt_texts_2 = [
'SPAR TAFIRA',
'C/.BRUNO NARANJO DIAZ9A-B',
'TLF.:928351616-FAX:928351004',
'NIFB02868248',
'SUPERMERCAD0S DABEL2021,S.L',
'TAFIRA BAJA',
'FACTURA SIMPLIFICADA',
'Nro.014002-18965',
'Fecha:06-04-202415:23',
'Cajerc:10074',
'CANT.',
'PVP IMPORTE',
'DESCRIPCION',
'1,49',
'1,49',
'CLIPPER MANZ.1.5L.',
'1',
'1,40',
'1,99',
'PLATANO PRIMERA GRAN',
'2,79',
'2,99',
'2.99',
'MANZANA PINK LADY GR',
'3,10',
'3,10',
'SALSA.BARI.PES.GEN.1',
'1,85',
'1,85',
'GOFIO B.LUGAR MIL.FU',
'1',
'1,75',
'1,75',
'ZUM.DISF.D.SIMON PIN',
'1',
'1,15',
'1,15',
'LECHE.GRNJ.FLR.UHT.',
'1',
'Lineas : 7',
'Total F',
'15,12',
'"TARJETA',
'15.12',
'Entregado',
'Cambio',
'0,00',
'Operacion',
': VENTA',
'06/04/202415:24',
'Fecha',
'Comercio',
'249060518',
'ARC',
'00',
'A0000000031010',
'AID',
'Visa DEBIT',
'App Labe1',
'************761',
'Tarjeta',
'15,12EUR',
'Importe',
'-Copia para al'
]
receipt_texts_4 = [
'S.A.',
'MERCADONA.',
'A-46103834',
'AVDA. PINTOR FELO MONZON (C.C. 7 PALMAS)',
'S/N',
'35019 LAS PALMAS DE GRAN CANARIA',
'928411755',
'TELEFONO:',
'03/04/202421:220P:144041',
'FACTURA SIMPLIFICADA:2185-013-6970Z2',
'Imp.)',
'P.Unit',
'Descripción',
'3,30',
'1 DETERG HIPO COLONIA',
'13,50',
'4,50',
'3 SOLOMILLO POLLO CONG',
'2,32',
'1 JAMONCITO BARBACOA',
'2,76',
'1 JAMONCITO BARBACOA',
'2,00',
'1 NUEZ NATURAL',
'1,25',
'2,50',
'2 QUESU COTIAGE',
'6,52',
'1 POLLO ENTERO LIMPIO',
'1,70',
'1 PAPEL VEGETAL 30H',
'1.30',
'1 BEBIDA AVELLANAS',
'1,05',
'1 INFUSION DORMIR',
'1,40',
'1 LECHE DE COCO',
'1,35',
'1 QUESO UNTAR LIGHT',
'1 RULITO CABRA',
'2,45',
'1 GRIEGO LIGERO',
'1,65',
'1 BOLSA PLASTICO',
'0,15',
'TOTAL @)',
'43,95',
'TARJETA BANCARIA',
'43,95',
'COMERCIANTE MINORISTA',
'TARJBANCARIA',
'******915',
'N.C072850332',
'AUT:1LPOXG',
'AIDA0000000041010',
'ARC:3030',
')',
'Importe43,95',
'DEBIT MASTERCARD'
]
######################################################################
### SCHEMA ###
######################################################################
class ProductCategory(str, Enum):
fruits = 'fruits'
vegetables = 'vegetables'
protein_foods = 'protein_foods'
seafood = 'seafood'
dairy = 'dairy'
grains = 'grains'
nuts_and_seeds = 'nuts_and_seeds'
sweets = 'sweets'
spices = 'spices'
beverages = 'beverages'
snacks = 'snacks'
condiments = 'condiments'
frozen_foods = 'frozen_foods'
bakery = 'bakery'
canned_goods = 'canned_goods'
household = 'household'
personal_care = 'personal_care'
pet_supplies = 'pet_supplies'
other = 'other'
class ItemInfo(BaseModel):
name: str = Field(..., description="Name of the item")
unit: float = Field(..., description="Quantity of the item")
price: float = Field(..., description="Price per unit of the item")
amount: float = Field(..., description="Total amount for the item")
category: ProductCategory = Field(..., description="Category of the item")
class PaymentMethodEnum(str, Enum):
tarjeta = 'tarjeta'
efectivo = 'efectivo'
class ReceiptInfo(BaseModel):
store: str = Field(..., description="Store name")
address: str = Field(..., description="Address of the store")
city: str = Field(..., description="City where the store is located")
phone: str = Field(..., description="Phone number of the store")
receipt_no: str = Field(..., description="Receipt number")
date: str = Field(..., description="Date of the receipt in DD/MM/YYYY format")
time: str = Field(..., description="Time of the transaction")
items: List[ItemInfo] = Field(..., description="List of items purchased")
total: float = Field(..., description="Total amount of the receipt")
number_items: int = Field(..., description="Number of items in the receipt")
payment_method: PaymentMethodEnum = Field(..., description="Payment method used")
######################################################################
### FUNCTIONS ###
######################################################################
# Directory to store user-specific data
DATA_STORAGE_DIR = 'user_data'
# Ensure the data storage directory exists
os.makedirs(DATA_STORAGE_DIR, exist_ok=True)
def get_model(api_key):
model = ChatOpenAI(model="gpt-3.5-turbo-0125", api_key=api_key)
structured_llm = model.with_structured_output(ReceiptInfo, method="json_mode")
return structured_llm
def get_user_data_file(session_id):
return os.path.join(DATA_STORAGE_DIR, f'{session_id}.csv')
def ensure_user_data_file_exists(session_id):
data_file = get_user_data_file(session_id)
if not os.path.exists(data_file):
empty_df = pd.DataFrame(columns=[
'store', 'address', 'city', 'phone', 'receipt_no', 'date', 'time',
'total', 'number_items', 'payment_method', 'week', 'month', 'name',
'unit', 'price', 'amount', 'category'
])
empty_df.to_csv(data_file, index=False)
def ensure_numeric_columns(df, columns):
for column in columns:
df[column] = pd.to_numeric(df[column], errors='coerce')
return df
def ensure_category(df):
categories = [
'fruits', 'vegetables', 'protein_foods', 'seafood', 'dairy', 'grains', 'nuts_and_seeds',
'sweets', 'spices', 'beverages', 'snacks', 'condiments', 'frozen_foods', 'bakery',
'canned_goods', 'household', 'personal_care', 'pet_supplies', 'other'
]
df['category'] = df['category'].apply(lambda x: x if x in categories else 'other')
return df
def parse_dates(date_str):
date_formats = [
"%d/%m/%Y", # Day/Month/Year
"%Y-%m-%d", # Year-Month-Day
"%m/%d/%Y", # Month/Day/Year
"%d-%m-%Y", # Day-Month-Year
"%m-%d-%Y", # Month-Day-Year
"%Y/%m/%d", # Year/Month/Day
"%Y.%m.%d", # Year.Month.Day
"%d.%m.%Y", # Day.Month.Year
"%m.%d.%Y" # Month.Day.Year
]
for fmt in date_formats:
try:
return pd.to_datetime(date_str, format=fmt)
except ValueError:
continue
return pd.to_datetime(date_str, errors='coerce')
# def image_ocr(image):
# img = cv2.imread(image)
# # Perform OCR
# paddleocr = PaddleOCR(lang="es",ocr_version="PP-OCRv4",show_log = False, use_gpu=True)
# result = paddleocr.ocr(img, cls=True)
# result = result[0]
# text = [line[1][0] for line in result]
# return text
def image_ocr(image_files):
paddleocr = PaddleOCR(lang="es", ocr_version="PP-OCRv4", show_log=False, use_gpu=True)
all_texts = []
for image_file in image_files:
img = cv2.imread(image_file)
result = paddleocr.ocr(img, cls=True)
result = result[0]
text = [line[1][0] for line in result]
all_texts.append(" ".join(text))
return all_texts
def structured_output(texts, api_key):
# Initialize the model
structured_llm = get_model(api_key)
examples_cat = [
{"input": f"{receipt_texts_1}", "output": f"{example_cat_1}"},
{"input": f"{receipt_texts_2}", "output": f"{example_cat_2}"},
{"input": f"{receipt_texts_4}", "output": f"{example_cat_4}"}
]
example_prompt = ChatPromptTemplate.from_messages(
[
("human", "{input}"),
("ai", "{output}"),
])
few_shot_prompt_cat = FewShotChatMessagePromptTemplate(
example_prompt=example_prompt,
examples=examples_cat,
)
few_shot_prompt_cat.format()
system_message_cat = """You are POS receipt data expert, parse, detect, recognize and convert the receipt OCR image result into structure receipt data object.
Next, assign a category to each item. Don't make up value not in the Input. Output must be a well-formed JSON object.```json
"""
final_prompt_cat = ChatPromptTemplate.from_messages(
[
("system", "{system_message}"),
few_shot_prompt_cat,
("human", "{input}"),
])
chain = final_prompt_cat | structured_llm
all_data = []
for text in texts:
data = chain.invoke({"system_message":system_message_cat,"input": text})
all_data.append(data)
# Transform the data to dataframe
all_dfs = []
for data in all_data:
store_df = pd.DataFrame([{
'store': data.get('store', None),
'address': data.get('address', None),
'city': data.get('city', None),
'phone': data.get('phone', None),
'receipt_no': data.get('receipt_no', None),
#'date': pd.to_datetime(data.get('date', None), format='%d/%m/%Y'),
'date': parse_dates(data.get('date', None)),
'time': data.get('time', None),
'total': pd.to_numeric(data.get('total', None), errors='coerce'),
'number_items': pd.to_numeric(data.get('number_items', None), errors='coerce'),
'payment_method': data.get('payment_method', None)
}])
#if 'date' in store_df.columns:
store_df['date'] = pd.to_datetime(store_df['date'], errors='coerce')
store_df['week'] = store_df['date'].dt.isocalendar().week
store_df['month'] = store_df['date'].dt.strftime('%B')
store_df['date'] = store_df['date'].dt.strftime('%d/%m/%Y')
# Ensure the 'month' column is in the correct order
month_order = [
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
]
store_df['month'] = pd.Categorical(store_df['month'], categories=month_order, ordered=True)
# Transform items data to dataframe
items_df = pd.DataFrame(data.get('items', []))
# Fill 'unit' column with 1 if it is missing
if 'unit' not in items_df.columns:
items_df['unit'] = 1
else:
items_df['unit'] = pd.to_numeric(items_df['unit'], errors='coerce').fillna(1)
for column in ['price', 'amount']:
items_df[column] = pd.to_numeric(items_df[column], errors='coerce')
items_df = ensure_category(items_df)
# Concatenate store and items DataFrames
df = pd.concat([store_df] * len(items_df), ignore_index=True)
df = pd.concat([df, items_df], axis=1)
all_dfs.append(df)
return pd.concat(all_dfs, ignore_index=True)
# def update_combined_df(corrected_df, combined_df=None):
# # If there's an existing combined DataFrame, concatenate new data
# if combined_df is not None:
# combined_df = pd.concat([combined_df, corrected_df], ignore_index=True)
# else:
# combined_df = corrected_df
# return combined_df
def update_combined_df(new_data, session_id):
# Ensure user data file exists
ensure_user_data_file_exists(session_id)
# Load the user's historical data
data_file = get_user_data_file(session_id)
combined_df = pd.read_csv(data_file)
# Append the new data
new_data = new_data.dropna(how='all')
combined_df = pd.concat([combined_df, new_data], ignore_index=True)
# Ensure numeric columns and category column
columns_to_ensure_numeric = ['total', 'number_items', 'unit', 'price', 'amount']
combined_df = ensure_numeric_columns(combined_df, columns_to_ensure_numeric)
combined_df = ensure_category(combined_df)
# Save the updated combined data back to the user's file
combined_df.to_csv(data_file, index=False)
receipt_count = combined_df['receipt_no'].nunique()
return combined_df, receipt_count
def image_to_df(image_files, api_key):
text = image_ocr(image_files)
# Convert OCR text to structured output
df = structured_output(text, api_key)
return df
def initialize_session():
return str(uuid.uuid4())
######################################################################
### VISUALIZATION FUNCTIONS ###
######################################################################
categories = [
'protein_foods', 'dairy', 'fruits', 'vegetables', 'grains', 'nuts_and_seeds',
'beverages', 'snacks', 'condiments', 'frozen_foods', 'bakery', 'canned_goods',
'household', 'personal_care', 'pet_supplies', 'other', 'sweets', 'spices', 'seafood'
]
colors = [
'#87c293','#6074ab','#6b9acf','#8bbde6','#aae0f3','#c8eded',
'#d18b79','#dbac8c','#d18b79','#dbac8c','#e6cfa1','#e7ebbc',
'#b2dba0','#70a18f ','#637c8f', '#949da8','#b56e75','#c98f8f', '#edd5ca'
]
color_map = {category: colors[i % len(colors)] for i, category in enumerate(categories)}
def visualize_expenses_vs_budget(combined_df, selected_month, budget=500):
if combined_df is not None and not combined_df.empty:
expenses_per_month = combined_df.groupby('month')['amount'].sum().reset_index()
filtered_data = expenses_per_month[expenses_per_month['month'] == selected_month]
total = filtered_data['amount'].sum()
percent_budget_left = float(100 - (total / budget) * 100)
labels = ['Expenses', 'Remaining Budget']
values = [total, budget - total]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.5, hoverinfo='label+value', textinfo='none')])
fig.update_traces(marker=dict(colors=['rgba(0, 0, 0, 0)', '#80ced6']), sort=False)
fig.add_annotation(
text=f'<b>{percent_budget_left:.0f}%<b>',
x=0.5,
y=0.53,
showarrow=False,
font=dict(size=35),
align='center'
)
fig.add_annotation(
text='of Budget left',
x=0.5,
y=0.4,
showarrow=False,
font=dict(size=14),
align='center'
)
fig.update_layout(
title=dict(text=f'<b>Expenses vs. Budget in {selected_month}</b>',
x=0.5,
font=dict(size=16, color='Grey', family='Arial, sans-serif')),
showlegend=False,
width=500,
height=500
)
return fig
else:
return None
def visualize_budget_tracking(combined_df, expenses_per_month, budget=500):
if combined_df is not None and not combined_df.empty:
expenses_per_month = combined_df.groupby('month')['amount'].sum().reset_index()
expenses_per_month['percentage_expenses'] = (expenses_per_month['amount'] / budget) * 100
expenses_per_month['percentage_budget'] = 100 - expenses_per_month['percentage_expenses']
fig = go.Figure()
fig.add_trace(go.Bar(
x=expenses_per_month['month'],
y=expenses_per_month['percentage_expenses'],
name='Actual Expenses',
marker_color='#eeac99',
text=round(expenses_per_month['amount'], 0),
hovertemplate='<b>%{y:.0f}%<br>Expenses: %{text} EUR<b>',
textposition='inside',
insidetextanchor='middle',
textfont_size=14
))
fig.add_trace(go.Bar(
x=expenses_per_month['month'],
y=expenses_per_month['percentage_budget'],
name='Remaining Budget',
marker_color='#80ced6',
text=round(budget - round(expenses_per_month['amount']), 0),
hovertemplate='<b>%{y:.0f}%<br>Remaining Budget: %{text} EUR<b>',
textposition='inside',
insidetextanchor='middle',
textfont_size=14
))
fig.update_layout(
title=dict(text='<b>Budget Tracking: Expenses vs. Budget per Month</b>',
x=0.5,
font=dict(size=16, color='Grey', family='Arial, sans-serif')),
xaxis_title='',
yaxis=dict(title='Percentage', zeroline=False, showgrid=False),
plot_bgcolor='rgb(242,242,242)',
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.2),
barmode='relative',
height=500,
width=700
#width=len(expenses_per_month['month'])*300
)
return fig
else:
return None
def visualize_pie_chart(combined_df, selected_month):
if combined_df is not None and not combined_df.empty:
expenses_per_month_category = combined_df.groupby(['month', 'category'])['amount'].sum().reset_index()
filtered_data = expenses_per_month_category[expenses_per_month_category['month'] == selected_month]
fig = px.pie(filtered_data, values='amount', names='category',
hole=0.4,
color='category',
#color_discrete_sequence=px.colors.qualitative.Light24,
color_discrete_map=color_map,
labels={'amount': 'Expenses', 'category': 'Category'},
width=500,
height=500,
)
fig.update_traces(textinfo='percent',
insidetextorientation='radial',
textposition='inside',
hovertemplate="<b>Category: %{customdata}<br>Expenses: %{value} EUR<b>",
customdata=expenses_per_month_category['category'])
fig.update_layout(
showlegend=False,
plot_bgcolor='rgb(242,242,242)',
title=dict(text=f'<b>Category Expenses in {selected_month}</b>',
x=0.5,
font=dict(size=16, color='Grey', family='Arial, sans-serif'))
)
return fig
else:
return None
def visualize_category_expenses(combined_df):
if combined_df is not None and not combined_df.empty:
expenses_per_month_category = combined_df.groupby(['month', 'category'])['amount'].sum().reset_index()
fig = px.bar(expenses_per_month_category, x='month', y='amount', color='category',
barmode='stack',
#color_discrete_sequence=px.colors.qualitative.Light24,
color_discrete_map=color_map,
labels={'amount': 'Expenses', 'category': 'Category'},
height=500,
width=700
#width=expenses_per_month_category['month'].nunique()*300
)
fig.update_traces(hovertemplate="<b>Expenses: %{y} EUR<b>")
fig.update_layout(
xaxis=dict(showticklabels=True, title='', showgrid=False),
yaxis=dict(zeroline=False, showgrid=False),
plot_bgcolor='rgb(242,242,242)',
title=dict(text='<b>Category expenses per month</b>', x=0.5, font=dict(size=16, color='Grey', family='Arial, sans-serif'))
)
return fig
else:
return None
def visualize_price_distribution(combined_df):
if combined_df is not None and not combined_df.empty:
fig = px.box(combined_df, x='category', y='price', color='category',
title='Price Distribution by Category',
#color_discrete_sequence=px.colors.qualitative.Light24,
color_discrete_map=color_map,
height=500,
width=500
)
fig.update_layout(
xaxis=dict(showticklabels=False, title=''),
yaxis=dict(zeroline=False),
#paper_bgcolor='rgb(233,233,233)',
plot_bgcolor='rgb(242,242,242)',
showlegend=False,
title=dict(text='<b>Price Distribution by Category<b>',
x=0.5, font=dict(size=16, color='Grey', family='Arial, sans-serif'))
)
return fig
else:
return None
def visualize_trend_expenses(combined_df):
if combined_df is not None and not combined_df.empty:
expenses_over_time_category = combined_df.groupby(['date', 'category'])['amount'].sum().reset_index()
fig = px.line(expenses_over_time_category, x='date', y='amount', color='category',
labels={'amount': 'Expenses', 'category': 'Category', 'date': 'Date'},
#color_discrete_sequence=px.colors.qualitative.Light24,
color_discrete_map=color_map,
text=expenses_over_time_category['category'],
width=700,
height=500
)
fig.update_traces(mode="markers+lines",
hovertemplate="<b>%{text}: <br>Expenses: %{y} EUR </br> %{x}")
fig.update_layout(
xaxis=dict(showticklabels=True, title='', showgrid=False),
yaxis=dict(zeroline=False, showgrid=False),
plot_bgcolor='#f0efef',
title=dict(text='<b>Trends in expenses over time<b>',
x=0.5,
font=dict(size=16, color='Grey', family='Arial, sans-serif')),
)
return fig
else:
return None
def visualize_all(combined_df, selected_month, budget):
fig1 = visualize_expenses_vs_budget(combined_df, selected_month, budget)
fig2 = visualize_budget_tracking(combined_df, selected_month, budget)
fig3 = visualize_pie_chart(combined_df,selected_month)
fig4 = visualize_category_expenses(combined_df)
fig5 = visualize_price_distribution(combined_df)
fig6 = visualize_trend_expenses(combined_df)
return fig1, fig2, fig3, fig4, fig5, fig6