SiraH commited on
Commit
559a5f6
1 Parent(s): 78238f1

add file path method

Browse files
Files changed (1) hide show
  1. app.py +23 -25
app.py CHANGED
@@ -203,29 +203,15 @@ def load_embeddings():
203
 
204
  def main():
205
  data = []
 
206
  msgs = StreamlitChatMessageHistory(key="langchain_messages")
207
  print(msgs)
208
  if "messages" not in st.session_state:
209
  st.session_state.messages = []
210
 
211
- # DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
212
- st.header("DOCUMENT QUESTION ANSWERING IS2")
213
- directory = "data"
214
- data_dir = UploadDoc(directory).create_document()
215
- data.extend(data_dir)
216
-
217
- #create vector from upload
218
- #if len(data) > 0 :
219
- sp_docs = split_docs(documents = data)
220
- st.write(f"This document have {len(sp_docs)} chunks")
221
- embeddings = load_embeddings()
222
- # with st.spinner('Wait for create vector'):
223
- db = FAISS.from_documents(sp_docs, embeddings)
224
- # db.save_local(DB_FAISS_UPLOAD_PATH)
225
- # st.write(f"Your model is already store in {DB_FAISS_UPLOAD_PATH}")
226
-
227
  llm = load_llama2_llamaCpp()
228
  qa_prompt = set_custom_prompt()
 
229
  #memory = ConversationBufferWindowMemory(k = 0, return_messages=True, input_key= 'question', output_key='answer', memory_key="chat_history")
230
  #memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
231
  #doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
@@ -233,16 +219,28 @@ def main():
233
  #embeddings = load_embeddings()
234
 
235
 
236
- # uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
237
- # print(uploaded_file)
238
- # if uploaded_file is not None:
239
- # pdf_reader = PdfReader(uploaded_file)
240
- # text = ""
241
- # for page in pdf_reader.pages:
242
- # text += page.extract_text()
243
- # print(text)
244
- # db = FAISS.from_texts(text, embeddings)
 
 
 
 
 
 
 
 
 
 
245
 
 
 
246
  memory = ConversationBufferMemory(memory_key="chat_history",
247
  return_messages=True,
248
  input_key="query",
 
203
 
204
  def main():
205
  data = []
206
+ sp_docs_list = []
207
  msgs = StreamlitChatMessageHistory(key="langchain_messages")
208
  print(msgs)
209
  if "messages" not in st.session_state:
210
  st.session_state.messages = []
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  llm = load_llama2_llamaCpp()
213
  qa_prompt = set_custom_prompt()
214
+ embeddings = load_embeddings()
215
  #memory = ConversationBufferWindowMemory(k = 0, return_messages=True, input_key= 'question', output_key='answer', memory_key="chat_history")
216
  #memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
217
  #doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
 
219
  #embeddings = load_embeddings()
220
 
221
 
222
+ uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
223
+ if uploaded_file is not None :
224
+ with NamedTemporaryFile(dir='PDF', suffix='.pdf', delete=False) as f:
225
+ f.write(uploaded_file.getbuffer())
226
+ print(f.name)
227
+ #filename = f.name
228
+ loader = PyPDFLoader(f.name)
229
+ pages = loader.load_and_split()
230
+ data.extend(pages)
231
+ #st.write(pages)
232
+ f.close()
233
+ os.unlink(f.name)
234
+ os.path.exists(f.name)
235
+ if len(data) > 0 :
236
+ embeddings = load_embeddings()
237
+ sp_docs = split_docs(documents = data)
238
+ st.write(f"This document have {len(sp_docs)} chunks")
239
+ st.write(sp_docs)
240
+ sp_docs_list.extend(sp_docs)
241
 
242
+ st.write(sp_docs_list)
243
+ db = FAISS.from_documents(sp_docs_list, embeddings)
244
  memory = ConversationBufferMemory(memory_key="chat_history",
245
  return_messages=True,
246
  input_key="query",