PDFExtractor / application /services /streamlit_function.py
Vela
Added multiple file upload functionality
22481bd
import streamlit as st
from typing import Union, List
import pandas as pd
from io import BytesIO
import json
import os
from openpyxl import load_workbook
from application.utils import logger
logger = logger.get_logger()
PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."
def config_homepage(page_title=PAGE_TITLE):
"""
Configures the Streamlit homepage with essential settings.
This function sets up the page title, icon, layout, and sidebar state.
It also defines custom menu items for better navigation.
Args:
page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).
Key Features:
- Ensures `st.set_page_config()` is called only once to avoid errors.
- Uses constants for improved maintainability and consistency.
- Provides links for help, bug reporting, and an 'About' section.
Example:
>>> config_homepage("My Custom App")
"""
if "page_config_set" not in st.session_state:
st.set_page_config(
page_title=page_title,
# page_icon=PAGE_ICON,
layout=PAGE_LAYOUT,
initial_sidebar_state="collapsed",
# menu_items={
# "Get help": GITHUB_LINK,
# "Report a bug": GITHUB_LINK,
# "About": ABOUT_US
# }
)
# st.session_state.page_config_set = True
def upload_file(
file_types: Union[str, List[str]] = "pdf",
label: str = "πŸ“€ Upload a file",
help_text: str = "Upload your file for processing.",
allow_multiple: bool = True,
):
"""
Streamlit file uploader widget with options.
Args:
file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].
label (str): Label displayed above the uploader.
help_text (str): Tooltip help text.
allow_multiple (bool): Allow multiple file uploads.
Returns:
Uploaded file(s): A single file object or a list of file objects.
"""
if isinstance(file_types, str):
file_types = [file_types]
uploaded_files = st.file_uploader(
label=label,
type=file_types,
help=help_text,
accept_multiple_files=allow_multiple
)
if st.button("Submit"):
st.session_state.pdf_file = uploaded_files
return uploaded_files
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx", column: str = None) -> BytesIO:
"""
Converts a dictionary result into a formatted Excel file.
Appends to a file in the 'data/' folder if it already exists,
and returns an in-memory Excel file for download.
Args:
results (dict): The data to export.
sheet_name (str): The sheet name to write to.
filename (str): The Excel file name (with or without '.xlsx').
Returns:
BytesIO: In-memory Excel file for Streamlit download.
"""
try:
if not results:
logger.error("Results object is None or empty.")
return None
filename = filename if filename.endswith(".xlsx") else f"{filename}.xlsx"
data = results.get(column, {})
logger.info(f"Exporting data for column '{column}' to {filename}")
if not isinstance(data, dict):
logger.error(f"Expected dictionary for column '{column}', but got {type(data)}")
return None
df = pd.DataFrame(data.items(), columns=[column, "Value"])
df.fillna(0, inplace=True)
os.makedirs("data", exist_ok=True)
physical_path = os.path.join("data", filename)
file_exists = os.path.exists(physical_path)
start_row = 0
start_column = 0
if file_exists:
book = load_workbook(physical_path)
if sheet_name in book.sheetnames:
sheet = book[sheet_name]
start_row = sheet.max_row
start_column = sheet.max_column
else:
start_row = 0
if file_exists:
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0, startcol=start_column)
else:
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='w') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0)
output_stream = BytesIO()
with pd.ExcelWriter(output_stream, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False)
output_stream.seek(0)
logger.info(f"Data exported to {filename} successfully.")
return output_stream
except Exception as e:
logger.error(f"Error creating Excel export: {e}")
return None