boardpac_chat_app_test / fileUpload.py
Boardpac/theekshanas
upload files again
39de480
"""
Python Backend API to chat with private data
08/17/2023
D.M. Theekshana Samaradiwakara
"""
import os
import streamlit as st
from streamlit.logger import get_logger
from io import StringIO
logger = get_logger(__name__)
from dataPipeline import DataPipeline
def initialize_session_state():
# Initialise all session state variables with defaults
SESSION_DEFAULTS = {
"data_index": None,
"published_year": 2023,
"is_parameters_changed":False,
"is_input_validated":False,
}
for k, v in SESSION_DEFAULTS.items():
if k not in st.session_state:
st.session_state[k] = v
def update_parameters_change():
st.session_state.is_parameters_changed = True
def validate_index():
index = st.session_state.data_index
if (not index) or (not index.strip()):
st.error("Empty index directory name!")
st.stop()
st.info(f"file persist directory name: {index}")
def validate_files(uploaded_file):
if not uploaded_file:
st.error("No uploaded files to process!")
st.stop()
st.info(f"No of files uploaded : {len(uploaded_file)}")
def validate_published_year():
if not st.session_state.published_year:
st.error("Invalid year!")
st.stop()
st.info(f"file published year : {st.session_state.published_year}")
def validate_inputs(uploaded_file):
validate_index()
validate_published_year()
validate_files(uploaded_file)
return True
def process_files(uploaded_files, data_index):
try:
st.info(uploaded_files)
dataPipe = DataPipeline()
documents = dataPipe.load_streamlit_documents(uploaded_files, st.session_state.published_year)
# documents = dataPipe.add_metadata(documents, "year", st.session_state.published_year)
# process_docs = dataPipe.process_documents(documents)
# st.success("files successfully processed!")
# dataPipe.persist_documents(data_index, process_docs)
# st.success("files successfully stored!")
except Exception as e:
st.error(str(e))
#sidebar function
def sidebar():
with st.sidebar:
st.subheader("Data indexing parameters")
persist_index_name = st.text_input(
label="file persist directory name",
placeholder="enter index name",
key="persist_index_name",
help="name of the directory which processed files need to persisted.",
on_change=update_parameters_change,
)
publish_year = st.number_input(
label="published year",
min_value=1950,
value=2023,
max_value=2025,
key="publish_year",
help="year of the files are published.",
on_change=update_parameters_change,
)
if st.session_state.is_parameters_changed:
st.session_state.data_index = persist_index_name
st.session_state.published_year = publish_year
st.session_state.is_parameters_changed = False
st.info(f"file persist directory name: {st.session_state.data_index}")
st.info(f"file published year : {st.session_state.published_year}")
#main function
def main():
st.set_page_config(page_title="upload files to databse", page_icon="📖")#, layout="wide")
st.header("📖Boardpac chat App")
initialize_session_state()
sidebar()
uploaded_file = st.file_uploader(
"Upload your filess here and click on 'Process'",
key = "uploaded_file",
accept_multiple_files=True,
help="Upload files here!",
)
col1, col2 = st.columns(2)
with col1:
if st.button("validate"):
if validate_inputs(uploaded_file):
st.session_state.is_input_validated=True
with col2:
if st.session_state.is_input_validated:
if st.button("process"):
with st.spinner("Indexing document... This may take a while⏳"):
process_files(uploaded_file,st.session_state.data_index)
uploaded_file = None
st.session_state.is_input_validated = False
if __name__ == "__main__":
main()