Spaces:

ubermenchh
/

chat-with-arxiv

Sleeping

App Files Files Community

ubermenchh commited on Nov 30, 2023

Commit

742e3f8

•

1 Parent(s): 4eb428c

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -21

app.py CHANGED Viewed

@@ -1,30 +1,28 @@
 import arxiv
 import gradio as gr
-from langchain.document_loaders import OnlinePDFLoader
-from langchain.text_splitter import CharacterTextSplitter
 from langchain.llms import HuggingFaceHub
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import Chroma
-from langchain.chains import RetrievalQA
-repo_id = 'mistralai/Mistral-7B-v0.1'
-client = arxiv.Client()
 def loading_paper(): return 'Loading...'
 def paper_changes(paper_id):
     paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
-    loader = OnlinePDFLoader(paper.download_pdf())
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
-    texts = text_splitter.split_documents(documents)
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
-    db = Chroma.from_documents(texts, embeddings, persist_directory="chroma_db")
-    retriever = db.as_retriever()
-    llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={'temperature': 0.5, 'max_new_tokens': 2096})
-    global qa
-    qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever, return_source_documents=True)
-    return 'Ready!!'
 def add_text(history, text):
     history = history + [(text, None)]
@@ -32,12 +30,12 @@ def add_text(history, text):
 def bot(history):
     response = infer(history[-1][0])
-    history[-1][1] = response['result']
     return history
 def infer(question):
-    result = qa({'query': question})
-    return result
 with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
     with gr.Column():

 import arxiv
 import gradio as gr
+from llama_index import (
+    VectorStoreIndex,
+    ServiceContext,
+    SimpleDirectoryReader,
+    Document
+)
 from langchain.llms import HuggingFaceHub
+from llama_index.llms import LangChainLLM
+repo_id = 'HuggingFaceH4/zephyr-7b-beta'
 def loading_paper(): return 'Loading...'
 def paper_changes(paper_id):
     paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
+    docs = SimpleDirectoryReader(input_files=[paper.download_pdf()]).load_data()
+    doc = Document(text='\n\n'.join([doc.text for doc in docs]))
+    llm = LangChainLLM(llm=HuggingFaceHub(repo_id=repo_id, model_kwargs={'temperature': 0.3}))
+    service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")
+    index = VectorStoreIndex.from_documents([doc], service_context=service_context)
+    global query_engine
+    query_engine = index.as_query_engine()
+    return 'Ready!!!'
 def add_text(history, text):
     history = history + [(text, None)]
 def bot(history):
     response = infer(history[-1][0])
+    history[-1][1] = response
     return history
 def infer(question):
+    response = query_engine.query(question)
+    return response
 with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
     with gr.Column():