Geraldine commited on
Commit
4b10f41
1 Parent(s): f7ab30c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -6
app.py CHANGED
@@ -21,10 +21,69 @@ llm = HuggingFaceHub(repo_id=model ,
21
  "temperature":0.2})
22
  langchain.llm_cache = InMemoryCache()
23
 
24
- def predict(input_file):
25
- return "ok"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- input_file = gr.File(label="Upload PDF file")
28
- output_text = gr.Textbox(label="test")
29
- demo = gr.Interface(fn=predict, inputs=[input_file], outputs=output_text)
30
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "temperature":0.2})
22
  langchain.llm_cache = InMemoryCache()
23
 
24
+ def build_context(files,urls):
25
+ documents = []
26
+ if files is not None:
27
+ for idx, file in enumerate(files):
28
+ if file.name.endswith('.pdf'):
29
+ pdf_path = file.name
30
+ loader = PyPDFLoader(pdf_path)
31
+ documents.extend(loader.load())
32
+ elif file.name.endswith('.docx'):
33
+ doc_path = file.name
34
+ loader = Docx2txtLoader(doc_path)
35
+ documents.extend(loader.load())
36
+ elif file.name.endswith('.ppt') or file.name.endswith('.pptx'):
37
+ ppt_path = file.name
38
+ loader = UnstructuredPowerPointLoader(ppt_path)
39
+ documents.extend(loader.load())
40
+ if (urls is not None) | (urls != ""):
41
+ list_urls = urls.split(sep=",")
42
+ for url in list_urls:
43
+ loader = OnlinePDFLoader(url)
44
+ documents.extend(loader.load())
45
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
46
+ chunked_documents = text_splitter.split_documents(documents)
47
+ vectordb = Chroma.from_documents(
48
+ documents=chunked_documents,
49
+ embedding=embeddings
50
+ )
51
+ global qa_chain
52
+ qa_chain = RetrievalQA.from_chain_type(
53
+ llm=llm,
54
+ retriever=vectordb.as_retriever(search_kwargs={'k': 7}),
55
+ chain_type="stuff",
56
+ #return_source_documents=True
57
+ )
58
+ return "ready"
59
 
60
+ def loading():
61
+ return "Loading..."
62
+
63
+ def respond(message, chat_history):
64
+ result = qa_chain({"query": message})["result"]
65
+ chat_history.append((message, result))
66
+ time.sleep(2)
67
+ return "", chat_history
68
+
69
+ with gr.Blocks() as demo:
70
+ with gr.Row():
71
+ with gr.Column():
72
+ pdf_docs = gr.Files(label="Load pdf files", file_types=['.pdf'], type="file")
73
+ with gr.Column():
74
+ urls = gr.Textbox(label="Enter one of multiple online pdf urls (comma separated if multiple)", value=None)
75
+ with gr.Row():
76
+ load_context = gr.Button("Load documents and urls")
77
+ with gr.Row():
78
+ loading_status = gr.Textbox(label="Status", placeholder="", interactive=False)
79
+ with gr.Row():
80
+ with gr.Column():
81
+ hg_chatbot = gr.Chatbot()
82
+ msg = gr.Textbox(label="User message")
83
+ clear = gr.ClearButton([msg, hg_chatbot])
84
+ load_context.click(loading, None, loading_status, queue=False)
85
+ load_context.click(build_context, inputs=[pdf_docs, urls], outputs=[loading_status], queue=False)
86
+ msg.submit(respond, [msg, hg_chatbot], [msg, hg_chatbot])
87
+
88
+ demo.queue(concurrency_count=3)
89
+ demo.launch(debug=True)