Spaces:

Geraldine
/

simple_contextual_chatbot

Runtime error

App Files Files Community

Geraldine commited on Jul 30, 2023

Commit

4b10f41

•

1 Parent(s): f7ab30c

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -6

app.py CHANGED Viewed

@@ -21,10 +21,69 @@ llm = HuggingFaceHub(repo_id=model ,
                                        "temperature":0.2})
 langchain.llm_cache = InMemoryCache()
-def predict(input_file):
-    return "ok"
-input_file = gr.File(label="Upload PDF file")
-output_text = gr.Textbox(label="test")
-demo = gr.Interface(fn=predict, inputs=[input_file], outputs=output_text)
-demo.launch()

                                        "temperature":0.2})
 langchain.llm_cache = InMemoryCache()
+def build_context(files,urls):
+  documents = []
+  if files is not None:
+    for idx, file in enumerate(files):
+      if file.name.endswith('.pdf'):
+        pdf_path = file.name
+        loader = PyPDFLoader(pdf_path)
+        documents.extend(loader.load())
+      elif file.name.endswith('.docx'):
+        doc_path = file.name
+        loader = Docx2txtLoader(doc_path)
+        documents.extend(loader.load())
+      elif file.name.endswith('.ppt') or file.name.endswith('.pptx'):
+        ppt_path = file.name
+        loader = UnstructuredPowerPointLoader(ppt_path)
+        documents.extend(loader.load())
+  if (urls is not None) | (urls != ""):
+    list_urls = urls.split(sep=",")
+    for url in list_urls:
+      loader = OnlinePDFLoader(url)
+      documents.extend(loader.load())
+  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+  chunked_documents = text_splitter.split_documents(documents)
+  vectordb = Chroma.from_documents(
+    documents=chunked_documents,
+    embedding=embeddings
+  )
+  global qa_chain
+  qa_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    retriever=vectordb.as_retriever(search_kwargs={'k': 7}),
+    chain_type="stuff",
+    #return_source_documents=True
+  )
+  return "ready"
+def loading():
+    return "Loading..."
+def respond(message, chat_history):
+  result = qa_chain({"query": message})["result"]
+  chat_history.append((message, result))
+  time.sleep(2)
+  return "", chat_history
+with gr.Blocks() as demo:
+  with gr.Row():
+    with gr.Column():
+      pdf_docs = gr.Files(label="Load pdf files", file_types=['.pdf'], type="file")
+    with gr.Column():
+      urls = gr.Textbox(label="Enter one of multiple online pdf urls (comma separated if multiple)", value=None)
+  with gr.Row():
+    load_context = gr.Button("Load documents and urls")
+  with gr.Row():
+    loading_status = gr.Textbox(label="Status", placeholder="", interactive=False)
+  with gr.Row():
+    with gr.Column():
+      hg_chatbot = gr.Chatbot()
+      msg = gr.Textbox(label="User message")
+      clear = gr.ClearButton([msg, hg_chatbot])
+  load_context.click(loading, None, loading_status, queue=False)
+  load_context.click(build_context, inputs=[pdf_docs, urls], outputs=[loading_status], queue=False)
+  msg.submit(respond, [msg, hg_chatbot], [msg, hg_chatbot])
+demo.queue(concurrency_count=3)
+demo.launch(debug=True)