sabazo commited on
Commit
080bbc9
1 Parent(s): 60d09de

changed from pdf to url loader

Browse files
Files changed (1) hide show
  1. app.py +15 -22
app.py CHANGED
@@ -6,7 +6,7 @@ from langchain.text_splitter import CharacterTextSplitter
6
  text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
7
 
8
  from langchain.llms import HuggingFaceHub
9
- flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
10
 
11
  from langchain.embeddings import HuggingFaceHubEmbeddings
12
  embeddings = HuggingFaceHubEmbeddings()
@@ -14,17 +14,19 @@ embeddings = HuggingFaceHubEmbeddings()
14
  from langchain.vectorstores import Chroma
15
 
16
  from langchain.chains import RetrievalQA
17
- def loading_pdf():
18
- return "Loading..."
19
- def pdf_changes(pdf_doc):
20
- loader = OnlinePDFLoader(pdf_doc.name)
21
- documents = loader.load()
22
- texts = text_splitter.split_documents(documents)
23
- db = Chroma.from_documents(texts, embeddings)
24
- retriever = db.as_retriever()
25
- global qa
26
- qa = RetrievalQA.from_chain_type(llm=flan_ul2, chain_type="stuff", retriever=retriever, return_source_documents=True)
27
- return "Ready"
 
 
28
 
29
  def add_text(history, text):
30
  history = history + [(text, None)]
@@ -57,19 +59,10 @@ title = """
57
 
58
  with gr.Blocks(css=css) as demo:
59
  with gr.Column(elem_id="col-container"):
60
- gr.HTML(title)
61
-
62
- with gr.Column():
63
- pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
64
- with gr.Row():
65
- langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
66
- load_pdf = gr.Button("Load pdf to langchain")
67
-
68
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
69
  with gr.Row():
70
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
71
- load_pdf.click(loading_pdf, None, langchain_status, queue=False)
72
- load_pdf.click(pdf_changes, pdf_doc, langchain_status, queue=False)
73
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
74
  bot, chatbot, chatbot
75
  )
 
6
  text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
7
 
8
  from langchain.llms import HuggingFaceHub
9
+ model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
10
 
11
  from langchain.embeddings import HuggingFaceHubEmbeddings
12
  embeddings = HuggingFaceHubEmbeddings()
 
14
  from langchain.vectorstores import Chroma
15
 
16
  from langchain.chains import RetrievalQA
17
+
18
+ from langchain.document_loaders import WebBaseLoader
19
+
20
+ web_links = ["https://www.databricks.com/","https://help.databricks.com","https://databricks.com/try-databricks","https://help.databricks.com/s/","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/release-notes/index.html","http://docs.databricks.com/ingestion/index.html","http://docs.databricks.com/exploratory-data-analysis/index.html","http://docs.databricks.com/data-preparation/index.html","http://docs.databricks.com/data-sharing/index.html","http://docs.databricks.com/marketplace/index.html","http://docs.databricks.com/workspace-index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html","http://docs.databricks.com/delta/index.html","http://docs.databricks.com/dev-tools/index.html","http://docs.databricks.com/integrations/index.html","http://docs.databricks.com/administration-guide/index.html","http://docs.databricks.com/security/index.html","http://docs.databricks.com/data-governance/index.html","http://docs.databricks.com/lakehouse-architecture/index.html","http://docs.databricks.com/reference/api.html","http://docs.databricks.com/resources/index.html","http://docs.databricks.com/whats-coming.html","http://docs.databricks.com/archive/index.html","http://docs.databricks.com/lakehouse/index.html","http://docs.databricks.com/getting-started/quick-start.html","http://docs.databricks.com/getting-started/etl-quick-start.html","http://docs.databricks.com/getting-started/lakehouse-e2e.html","http://docs.databricks.com/getting-started/free-training.html","http://docs.databricks.com/sql/language-manual/index.html","http://docs.databricks.com/error-messages/index.html","http://www.apache.org/","https://databricks.com/privacy-policy","https://databricks.com/terms-of-use"]
21
+ loader = WebBaseLoader(web_links)
22
+ documents = loader.load()
23
+
24
+ texts = text_splitter.split_documents(documents)
25
+ db = Chroma.from_documents(texts, embeddings)
26
+ retriever = db.as_retriever()
27
+ global qa
28
+ qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, return_source_documents=True)
29
+
30
 
31
  def add_text(history, text):
32
  history = history + [(text, None)]
 
59
 
60
  with gr.Blocks(css=css) as demo:
61
  with gr.Column(elem_id="col-container"):
62
+ gr.HTML(title)
 
 
 
 
 
 
 
63
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
64
  with gr.Row():
65
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
 
 
66
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
67
  bot, chatbot, chatbot
68
  )