File size: 4,306 Bytes
39de480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
Python Backend API to chat with private data  

08/17/2023
D.M. Theekshana Samaradiwakara
"""

import os
import streamlit as st
from streamlit.logger import get_logger
from io import StringIO

logger = get_logger(__name__)

from dataPipeline import DataPipeline

def initialize_session_state():
    # Initialise all session state variables with defaults
    SESSION_DEFAULTS = {
        "data_index": None,
        "published_year": 2023,
        "is_parameters_changed":False,
        "is_input_validated":False,
    }

    for k, v in SESSION_DEFAULTS.items():
        if k not in st.session_state:
            st.session_state[k] = v

def update_parameters_change():
    st.session_state.is_parameters_changed = True

def validate_index():
    index = st.session_state.data_index
    if (not index) or  (not index.strip()):
        st.error("Empty index directory name!")
        st.stop()

    st.info(f"file persist directory name: {index}")

def validate_files(uploaded_file):
    if not uploaded_file:
        st.error("No uploaded files to process!")
        st.stop()

    st.info(f"No of files uploaded       : {len(uploaded_file)}")

def validate_published_year():
    if not st.session_state.published_year:
        st.error("Invalid year!")
        st.stop()

    st.info(f"file published year        : {st.session_state.published_year}")

def validate_inputs(uploaded_file):
    validate_index()
    validate_published_year()
    validate_files(uploaded_file)

    return True


def process_files(uploaded_files, data_index):

    try:
        
        st.info(uploaded_files)
        dataPipe = DataPipeline()

        documents = dataPipe.load_streamlit_documents(uploaded_files, st.session_state.published_year)
 
        # documents = dataPipe.add_metadata(documents, "year", st.session_state.published_year)
        # process_docs = dataPipe.process_documents(documents)
        # st.success("files successfully processed!")

        # dataPipe.persist_documents(data_index, process_docs)
        # st.success("files successfully stored!")
        
    except Exception as e:
        st.error(str(e))
                    

#sidebar function
def sidebar():
    with st.sidebar:
        st.subheader("Data indexing parameters")

        persist_index_name = st.text_input(
            label="file persist directory name",
            placeholder="enter index name",
            key="persist_index_name",
            help="name of the directory which processed files need to persisted.",
            on_change=update_parameters_change,
        )

        publish_year = st.number_input(
            label="published year",
            min_value=1950, 
            value=2023,
            max_value=2025,
            key="publish_year",
            help="year of the files are published.",
            on_change=update_parameters_change,
        )

        if st.session_state.is_parameters_changed:
            st.session_state.data_index = persist_index_name
            st.session_state.published_year = publish_year
            st.session_state.is_parameters_changed = False
            st.info(f"file persist directory name: {st.session_state.data_index}")
            st.info(f"file published year        : {st.session_state.published_year}")


#main function
def main():
    st.set_page_config(page_title="upload files to databse", page_icon="📖")#, layout="wide")
    st.header("📖Boardpac chat App")

    initialize_session_state()

    sidebar()
   
    uploaded_file = st.file_uploader(
        "Upload your filess here and click on 'Process'",
        key = "uploaded_file",
        accept_multiple_files=True,
        help="Upload files here!",
    )

    col1, col2 = st.columns(2)
    
    with col1:
        if st.button("validate"):
            if validate_inputs(uploaded_file):
                st.session_state.is_input_validated=True

                       
    with col2:
        if st.session_state.is_input_validated:
            if st.button("process"):
                with st.spinner("Indexing document... This may take a while⏳"):
                    process_files(uploaded_file,st.session_state.data_index)
                    uploaded_file = None
                    st.session_state.is_input_validated = False


if __name__ == "__main__":
    main()