SiraH commited on
Commit
7d6627d
1 Parent(s): 1e2ad9e

change to upload from streamlit

Browse files
Files changed (1) hide show
  1. app.py +56 -50
app.py CHANGED
@@ -201,19 +201,28 @@ def main():
201
  data = []
202
  # DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
203
  st.header("DOCUMENT QUESTION ANSWERING IS2")
204
- directory = "data"
205
- data_dir = UploadDoc(directory).create_document()
206
- data.extend(data_dir)
207
 
208
- #create vector from upload
209
- if len(data) > 0 :
210
- sp_docs = split_docs(documents = data)
211
- st.write(f"This document have {len(sp_docs)} chunks")
 
 
 
 
 
 
 
 
212
  embeddings = load_embeddings()
213
- with st.spinner('Wait for create vector'):
214
- db = FAISS.from_documents(sp_docs, embeddings)
215
- # db.save_local(DB_FAISS_UPLOAD_PATH)
216
- # st.write(f"Your model is already store in {DB_FAISS_UPLOAD_PATH}")
 
217
 
218
  llm = load_llama2_llamaCpp()
219
  qa_prompt = set_custom_prompt()
@@ -221,52 +230,49 @@ def main():
221
  #memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
222
  doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
223
  question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
224
- qa_chain = ConversationalRetrievalChain(
225
- retriever =db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k':3, "score_threshold": 0.7}),
226
- question_generator=question_generator,
227
- #condense_question_prompt=CONDENSE_QUESTION_PROMPT,
228
- combine_docs_chain=doc_chain,
229
- return_source_documents=True,
230
- memory = memory,
231
- #get_chat_history=lambda h :h
232
- )
233
-
234
- for message in st.session_state.messages:
235
- with st.chat_message(message["role"]):
236
- st.markdown(message["content"])
237
 
238
- # Accept user input
239
- if query := st.chat_input("What is up?"):
240
- # Display user message in chat message container
241
- with st.chat_message("user"):
242
- st.markdown(query)
243
- # Add user message to chat history
244
- st.session_state.messages.append({"role": "user", "content": query})
245
 
246
- start = time.time()
 
 
 
 
 
 
247
 
248
- response = qa_chain({'question': query})
249
 
250
- url_list = set([i.metadata['source'] for i in response['source_documents']])
251
- #print(f"condensed quesion : {question_generator.run({'chat_history': response['chat_history'], 'question' : query})}")
252
 
253
- with st.chat_message("assistant"):
254
- st.markdown(response['answer'])
255
-
256
- end = time.time()
257
- st.write("Respone time:",int(end-start),"sec")
258
-
259
- # Add assistant response to chat history
260
- st.session_state.messages.append({"role": "assistant", "content": response['answer']})
261
 
262
- with st.expander("See the related documents"):
263
- for count, url in enumerate(url_list):
264
- #url_reg = regex_source(url)
265
- st.write(str(count+1)+":", url)
 
266
 
267
- # view_messages = st.expander("View the message contents in session state")
268
- # with view_messages:
269
- # view_messages.json(st.session_state.langchain_messages)
 
270
 
271
  clear_button = st.button("Start new convo")
272
  if clear_button :
 
201
  data = []
202
  # DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
203
  st.header("DOCUMENT QUESTION ANSWERING IS2")
204
+ # directory = "data"
205
+ # data_dir = UploadDoc(directory).create_document()
206
+ # data.extend(data_dir)
207
 
208
+ # #create vector from upload
209
+ # if len(data) > 0 :
210
+ # sp_docs = split_docs(documents = data)
211
+ # st.write(f"This document have {len(sp_docs)} chunks")
212
+ # embeddings = load_embeddings()
213
+ # with st.spinner('Wait for create vector'):
214
+ # db = FAISS.from_documents(sp_docs, embeddings)
215
+ # # db.save_local(DB_FAISS_UPLOAD_PATH)
216
+ # # st.write(f"Your model is already store in {DB_FAISS_UPLOAD_PATH}")
217
+ uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
218
+ print(uploaded_file)
219
+ if uploaded_file is not None:
220
  embeddings = load_embeddings()
221
+ pdf_reader = PdfReader(uploaded_file)
222
+ text = ""
223
+ for page in pdf_reader.pages:
224
+ text += page.extract_text()
225
+ db = FAISS.from_texts(text, embeddings)
226
 
227
  llm = load_llama2_llamaCpp()
228
  qa_prompt = set_custom_prompt()
 
230
  #memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
231
  doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
232
  question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
233
+ if db is not None :
234
+ qa_chain = ConversationalRetrievalChain(
235
+ retriever =db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k':3, "score_threshold": 0.7}),
236
+ question_generator=question_generator,
237
+ #condense_question_prompt=CONDENSE_QUESTION_PROMPT,
238
+ combine_docs_chain=doc_chain,
239
+ return_source_documents=True,
240
+ memory = memory,
241
+ #get_chat_history=lambda h :h
242
+ )
 
 
 
243
 
244
+ for message in st.session_state.messages:
245
+ with st.chat_message(message["role"]):
246
+ st.markdown(message["content"])
 
 
 
 
247
 
248
+ # Accept user input
249
+ if query := st.chat_input("What is up?"):
250
+ # Display user message in chat message container
251
+ with st.chat_message("user"):
252
+ st.markdown(query)
253
+ # Add user message to chat history
254
+ st.session_state.messages.append({"role": "user", "content": query})
255
 
256
+ start = time.time()
257
 
258
+ response = qa_chain({'question': query})
 
259
 
260
+ url_list = set([i.metadata['source'] for i in response['source_documents']])
261
+ #print(f"condensed quesion : {question_generator.run({'chat_history': response['chat_history'], 'question' : query})}")
262
+
263
+ with st.chat_message("assistant"):
264
+ st.markdown(response['answer'])
 
 
 
265
 
266
+ end = time.time()
267
+ st.write("Respone time:",int(end-start),"sec")
268
+
269
+ # Add assistant response to chat history
270
+ st.session_state.messages.append({"role": "assistant", "content": response['answer']})
271
 
272
+ with st.expander("See the related documents"):
273
+ for count, url in enumerate(url_list):
274
+ #url_reg = regex_source(url)
275
+ st.write(str(count+1)+":", url)
276
 
277
  clear_button = st.button("Start new convo")
278
  if clear_button :