elia-waefler commited on
Commit
7d56215
1 Parent(s): a232b2b

added mock functionality

Browse files
Files changed (1) hide show
  1. app.py +23 -25
app.py CHANGED
@@ -1,17 +1,17 @@
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
- from langchain import embeddings
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
7
  from langchain.vectorstores import FAISS
8
- from langchain.vectorstores import faiss
9
  from langchain.chat_models import ChatOpenAI
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
  import os
13
  import pickle
14
  from datetime import datetime
 
15
 
16
 
17
  css = '''
@@ -111,12 +111,16 @@ def handle_userinput(user_question):
111
  print(message)
112
  # Display AI response
113
  st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
 
 
114
  # Display source document information if available in the message
115
  if hasattr(message, 'source') and message.source:
116
  st.write(f"Source Document: {message.source}", unsafe_allow_html=True)
117
 
118
 
 
119
  def safe_vec_store():
 
120
  os.makedirs('vectorstore', exist_ok=True)
121
  filename = 'vectores' + datetime.now().strftime('%Y%m%d%H%M') + '.pkl'
122
  file_path = os.path.join('vectorstore', filename)
@@ -127,18 +131,22 @@ def safe_vec_store():
127
  pickle.dump(vector_store, f)
128
 
129
 
130
-
131
  def main():
132
  load_dotenv()
133
  st.set_page_config(page_title="Doc Verify RAG", page_icon=":hospital:")
134
  st.write(css, unsafe_allow_html=True)
135
-
136
  st.subheader("Your documents")
137
- pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
138
  filenames = [file.name for file in pdf_docs if file is not None]
139
 
140
  if st.button("Process"):
141
  with st.spinner("Processing"):
 
 
 
 
 
142
  loaded_vec_store = None
143
  for filename in filenames:
144
  if ".pkl" in filename:
@@ -156,7 +164,12 @@ def main():
156
  st.warning("merged to existing")
157
  st.session_state.vectorstore = vec
158
  st.session_state.conversation = get_conversation_chain(vec)
159
- st.success("data loaded")
 
 
 
 
 
160
 
161
  if "conversation" not in st.session_state:
162
  st.session_state.conversation = None
@@ -176,31 +189,16 @@ def main():
176
 
177
  if st.button("Process Classification"):
178
  with st.spinner("Processing"):
179
- loaded_vec_store = None
180
- for filename in filenames:
181
- if ".pkl" in filename:
182
- file_path = os.path.join('vectorstore', filename)
183
- with open(file_path, 'rb') as f:
184
- loaded_vec_store = pickle.load(f)
185
- raw_text = get_pdf_text(pdf_docs)
186
- text_chunks = get_text_chunks(raw_text)
187
- vec = get_vectorstore(text_chunks)
188
- if loaded_vec_store:
189
- vec.merge_from(loaded_vec_store)
190
- st.warning("loaded vectorstore")
191
- if "vectorstore" in st.session_state:
192
- vec.merge_from(st.session_state.vectorstore)
193
- st.warning("merged to existing")
194
- st.session_state.vectorstore = vec
195
- st.session_state.conversation = get_conversation_chain(vec)
196
- st.success("data loaded")
197
 
198
  # Save and Load Embeddings
199
  if st.button("Save Embeddings"):
200
  if "vectorstore" in st.session_state:
201
  safe_vec_store()
202
  # st.session_state.vectorstore.save_local("faiss_index")
203
- st.sidebar.success("safed")
204
  else:
205
  st.sidebar.warning("No embeddings to save. Please process documents first.")
206
 
 
1
+ import time
2
  import streamlit as st
3
  from dotenv import load_dotenv
4
  from PyPDF2 import PdfReader
 
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
7
  from langchain.vectorstores import FAISS
 
8
  from langchain.chat_models import ChatOpenAI
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
11
  import os
12
  import pickle
13
  from datetime import datetime
14
+ from backend.generate_metadata import extract_metadata, ingest
15
 
16
 
17
  css = '''
 
111
  print(message)
112
  # Display AI response
113
  st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
114
+
115
+ # THIS DOESNT WORK, SOMEONE PLS FIX
116
  # Display source document information if available in the message
117
  if hasattr(message, 'source') and message.source:
118
  st.write(f"Source Document: {message.source}", unsafe_allow_html=True)
119
 
120
 
121
+
122
  def safe_vec_store():
123
+ # USE VECTARA INSTEAD
124
  os.makedirs('vectorstore', exist_ok=True)
125
  filename = 'vectores' + datetime.now().strftime('%Y%m%d%H%M') + '.pkl'
126
  file_path = os.path.join('vectorstore', filename)
 
131
  pickle.dump(vector_store, f)
132
 
133
 
 
134
  def main():
135
  load_dotenv()
136
  st.set_page_config(page_title="Doc Verify RAG", page_icon=":hospital:")
137
  st.write(css, unsafe_allow_html=True)
138
+ st.session_state.classify = False
139
  st.subheader("Your documents")
140
+ pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=not st.session_state.classify)
141
  filenames = [file.name for file in pdf_docs if file is not None]
142
 
143
  if st.button("Process"):
144
  with st.spinner("Processing"):
145
+ if st.session_state.classify:
146
+ # THE CLASSIFICATION APP
147
+ plain_text_doc = ingest(pdf_docs)
148
+
149
+ # NORMAL RAG
150
  loaded_vec_store = None
151
  for filename in filenames:
152
  if ".pkl" in filename:
 
164
  st.warning("merged to existing")
165
  st.session_state.vectorstore = vec
166
  st.session_state.conversation = get_conversation_chain(vec)
167
+ st.success("data loaded")
168
+ if st.session_state.classify:
169
+ # THE CLASSIFICATION APP
170
+ classification_result = extract_metadata(plain_text_doc)
171
+ st.write(classification_result)
172
+
173
 
174
  if "conversation" not in st.session_state:
175
  st.session_state.conversation = None
 
189
 
190
  if st.button("Process Classification"):
191
  with st.spinner("Processing"):
192
+ st.session_state.classify = True
193
+ time.sleep(3)
194
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  # Save and Load Embeddings
197
  if st.button("Save Embeddings"):
198
  if "vectorstore" in st.session_state:
199
  safe_vec_store()
200
  # st.session_state.vectorstore.save_local("faiss_index")
201
+ st.sidebar.success("saved")
202
  else:
203
  st.sidebar.warning("No embeddings to save. Please process documents first.")
204