Spaces:
Runtime error
Runtime error
File size: 4,306 Bytes
39de480 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
"""
Python Backend API to chat with private data
08/17/2023
D.M. Theekshana Samaradiwakara
"""
import os
import streamlit as st
from streamlit.logger import get_logger
from io import StringIO
logger = get_logger(__name__)
from dataPipeline import DataPipeline
def initialize_session_state():
# Initialise all session state variables with defaults
SESSION_DEFAULTS = {
"data_index": None,
"published_year": 2023,
"is_parameters_changed":False,
"is_input_validated":False,
}
for k, v in SESSION_DEFAULTS.items():
if k not in st.session_state:
st.session_state[k] = v
def update_parameters_change():
st.session_state.is_parameters_changed = True
def validate_index():
index = st.session_state.data_index
if (not index) or (not index.strip()):
st.error("Empty index directory name!")
st.stop()
st.info(f"file persist directory name: {index}")
def validate_files(uploaded_file):
if not uploaded_file:
st.error("No uploaded files to process!")
st.stop()
st.info(f"No of files uploaded : {len(uploaded_file)}")
def validate_published_year():
if not st.session_state.published_year:
st.error("Invalid year!")
st.stop()
st.info(f"file published year : {st.session_state.published_year}")
def validate_inputs(uploaded_file):
validate_index()
validate_published_year()
validate_files(uploaded_file)
return True
def process_files(uploaded_files, data_index):
try:
st.info(uploaded_files)
dataPipe = DataPipeline()
documents = dataPipe.load_streamlit_documents(uploaded_files, st.session_state.published_year)
# documents = dataPipe.add_metadata(documents, "year", st.session_state.published_year)
# process_docs = dataPipe.process_documents(documents)
# st.success("files successfully processed!")
# dataPipe.persist_documents(data_index, process_docs)
# st.success("files successfully stored!")
except Exception as e:
st.error(str(e))
#sidebar function
def sidebar():
with st.sidebar:
st.subheader("Data indexing parameters")
persist_index_name = st.text_input(
label="file persist directory name",
placeholder="enter index name",
key="persist_index_name",
help="name of the directory which processed files need to persisted.",
on_change=update_parameters_change,
)
publish_year = st.number_input(
label="published year",
min_value=1950,
value=2023,
max_value=2025,
key="publish_year",
help="year of the files are published.",
on_change=update_parameters_change,
)
if st.session_state.is_parameters_changed:
st.session_state.data_index = persist_index_name
st.session_state.published_year = publish_year
st.session_state.is_parameters_changed = False
st.info(f"file persist directory name: {st.session_state.data_index}")
st.info(f"file published year : {st.session_state.published_year}")
#main function
def main():
st.set_page_config(page_title="upload files to databse", page_icon="📖")#, layout="wide")
st.header("📖Boardpac chat App")
initialize_session_state()
sidebar()
uploaded_file = st.file_uploader(
"Upload your filess here and click on 'Process'",
key = "uploaded_file",
accept_multiple_files=True,
help="Upload files here!",
)
col1, col2 = st.columns(2)
with col1:
if st.button("validate"):
if validate_inputs(uploaded_file):
st.session_state.is_input_validated=True
with col2:
if st.session_state.is_input_validated:
if st.button("process"):
with st.spinner("Indexing document... This may take a while⏳"):
process_files(uploaded_file,st.session_state.data_index)
uploaded_file = None
st.session_state.is_input_validated = False
if __name__ == "__main__":
main()
|