""" Python Backend API to chat with private data 08/17/2023 D.M. Theekshana Samaradiwakara """ import os import streamlit as st from streamlit.logger import get_logger from io import StringIO logger = get_logger(__name__) from dataPipeline import DataPipeline def initialize_session_state(): # Initialise all session state variables with defaults SESSION_DEFAULTS = { "data_index": None, "published_year": 2023, "is_parameters_changed":False, "is_input_validated":False, } for k, v in SESSION_DEFAULTS.items(): if k not in st.session_state: st.session_state[k] = v def update_parameters_change(): st.session_state.is_parameters_changed = True def validate_index(): index = st.session_state.data_index if (not index) or (not index.strip()): st.error("Empty index directory name!") st.stop() st.info(f"file persist directory name: {index}") def validate_files(uploaded_file): if not uploaded_file: st.error("No uploaded files to process!") st.stop() st.info(f"No of files uploaded : {len(uploaded_file)}") def validate_published_year(): if not st.session_state.published_year: st.error("Invalid year!") st.stop() st.info(f"file published year : {st.session_state.published_year}") def validate_inputs(uploaded_file): validate_index() validate_published_year() validate_files(uploaded_file) return True def process_files(uploaded_files, data_index): try: st.info(uploaded_files) dataPipe = DataPipeline() documents = dataPipe.load_streamlit_documents(uploaded_files, st.session_state.published_year) # documents = dataPipe.add_metadata(documents, "year", st.session_state.published_year) # process_docs = dataPipe.process_documents(documents) # st.success("files successfully processed!") # dataPipe.persist_documents(data_index, process_docs) # st.success("files successfully stored!") except Exception as e: st.error(str(e)) #sidebar function def sidebar(): with st.sidebar: st.subheader("Data indexing parameters") persist_index_name = st.text_input( label="file persist directory name", placeholder="enter index name", key="persist_index_name", help="name of the directory which processed files need to persisted.", on_change=update_parameters_change, ) publish_year = st.number_input( label="published year", min_value=1950, value=2023, max_value=2025, key="publish_year", help="year of the files are published.", on_change=update_parameters_change, ) if st.session_state.is_parameters_changed: st.session_state.data_index = persist_index_name st.session_state.published_year = publish_year st.session_state.is_parameters_changed = False st.info(f"file persist directory name: {st.session_state.data_index}") st.info(f"file published year : {st.session_state.published_year}") #main function def main(): st.set_page_config(page_title="upload files to databse", page_icon="📖")#, layout="wide") st.header("📖Boardpac chat App") initialize_session_state() sidebar() uploaded_file = st.file_uploader( "Upload your filess here and click on 'Process'", key = "uploaded_file", accept_multiple_files=True, help="Upload files here!", ) col1, col2 = st.columns(2) with col1: if st.button("validate"): if validate_inputs(uploaded_file): st.session_state.is_input_validated=True with col2: if st.session_state.is_input_validated: if st.button("process"): with st.spinner("Indexing document... This may take a while⏳"): process_files(uploaded_file,st.session_state.data_index) uploaded_file = None st.session_state.is_input_validated = False if __name__ == "__main__": main()