Spaces:

sabazo
/

docs_qachat

Runtime error

App Files Files Community

sabazo commited on Nov 14, 2023

Commit

080bbc9

1 Parent(s): 60d09de

changed from pdf to url loader

Browse files

Files changed (1) hide show

app.py +15 -22

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain.text_splitter import CharacterTextSplitter
 text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
 from langchain.llms import HuggingFaceHub
-flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
 from langchain.embeddings import HuggingFaceHubEmbeddings
 embeddings = HuggingFaceHubEmbeddings()
@@ -14,17 +14,19 @@ embeddings = HuggingFaceHubEmbeddings()
 from langchain.vectorstores import Chroma
 from langchain.chains import RetrievalQA
-def loading_pdf():
-    return "Loading..."
-def pdf_changes(pdf_doc):
-    loader = OnlinePDFLoader(pdf_doc.name)
-    documents = loader.load()
-    texts = text_splitter.split_documents(documents)
-    db = Chroma.from_documents(texts, embeddings)
-    retriever = db.as_retriever()
-    global qa
-    qa = RetrievalQA.from_chain_type(llm=flan_ul2, chain_type="stuff", retriever=retriever, return_source_documents=True)
-    return "Ready"
 def add_text(history, text):
     history = history + [(text, None)]
@@ -57,19 +59,10 @@ title = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML(title)
-        with gr.Column():
-            pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
-            with gr.Row():
-                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
-                load_pdf = gr.Button("Load pdf to langchain")
         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
         with gr.Row():
             question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
-    load_pdf.click(loading_pdf, None, langchain_status, queue=False)
-    load_pdf.click(pdf_changes, pdf_doc, langchain_status, queue=False)
     question.submit(add_text, [chatbot, question], [chatbot, question]).then(
         bot, chatbot, chatbot
     )

 text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
 from langchain.llms import HuggingFaceHub
+model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
 from langchain.embeddings import HuggingFaceHubEmbeddings
 embeddings = HuggingFaceHubEmbeddings()
 from langchain.vectorstores import Chroma
 from langchain.chains import RetrievalQA
+from langchain.document_loaders import WebBaseLoader
+web_links = ["https://www.databricks.com/","https://help.databricks.com","https://databricks.com/try-databricks","https://help.databricks.com/s/","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/release-notes/index.html","http://docs.databricks.com/ingestion/index.html","http://docs.databricks.com/exploratory-data-analysis/index.html","http://docs.databricks.com/data-preparation/index.html","http://docs.databricks.com/data-sharing/index.html","http://docs.databricks.com/marketplace/index.html","http://docs.databricks.com/workspace-index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html","http://docs.databricks.com/delta/index.html","http://docs.databricks.com/dev-tools/index.html","http://docs.databricks.com/integrations/index.html","http://docs.databricks.com/administration-guide/index.html","http://docs.databricks.com/security/index.html","http://docs.databricks.com/data-governance/index.html","http://docs.databricks.com/lakehouse-architecture/index.html","http://docs.databricks.com/reference/api.html","http://docs.databricks.com/resources/index.html","http://docs.databricks.com/whats-coming.html","http://docs.databricks.com/archive/index.html","http://docs.databricks.com/lakehouse/index.html","http://docs.databricks.com/getting-started/quick-start.html","http://docs.databricks.com/getting-started/etl-quick-start.html","http://docs.databricks.com/getting-started/lakehouse-e2e.html","http://docs.databricks.com/getting-started/free-training.html","http://docs.databricks.com/sql/language-manual/index.html","http://docs.databricks.com/error-messages/index.html","http://www.apache.org/","https://databricks.com/privacy-policy","https://databricks.com/terms-of-use"]
+loader = WebBaseLoader(web_links)
+documents = loader.load()
+texts = text_splitter.split_documents(documents)
+db = Chroma.from_documents(texts, embeddings)
+retriever = db.as_retriever()
+global qa
+qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, return_source_documents=True)
 def add_text(history, text):
     history = history + [(text, None)]
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.HTML(title)
         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
         with gr.Row():
             question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
     question.submit(add_text, [chatbot, question], [chatbot, question]).then(
         bot, chatbot, chatbot
     )