Manglik-R commited on
Commit
5e80009
1 Parent(s): 0d88d8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -27
app.py CHANGED
@@ -1,15 +1,20 @@
1
  import gradio as gr
2
- from langchain.document_loaders import OnlinePDFLoader
 
3
  from langchain.text_splitter import CharacterTextSplitter
4
- from langchain.embeddings import HuggingFaceHubEmbeddings
5
- from langchain.vectorstores import FAISS
6
- from langchain.llms import HuggingFaceHub
7
- from langchain.chains import RetrievalQA
8
  from datasets import load_dataset
9
  import os
 
 
 
 
 
 
 
10
 
11
- key = os.environ.get('RLS')
12
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = key
13
 
14
  import sentence_transformers
15
  import faiss
@@ -19,29 +24,29 @@ def loading_pdf():
19
 
20
  def pdf_changes(pdf_doc):
21
 
22
- loader = OnlinePDFLoader(pdf_doc.name)
23
- pages = loader.load_and_split()
24
- text_splitter = CharacterTextSplitter(
25
- chunk_size=350,
26
- chunk_overlap=0,
27
- )
28
- docs = text_splitter.split_documents(pages)
29
  embeddings = HuggingFaceHubEmbeddings()
30
- db = FAISS.from_documents(docs, embeddings)
31
- llm = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature":0.1, "max_new_tokens":300})
32
- global qa
33
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
34
- return "Ready"
35
 
36
- def book_changes(book):
37
- db = FAISS.load_local( book , embeddings = HuggingFaceHubEmbeddings() )
38
- llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1, "max_new_tokens":250})
 
 
 
 
 
39
  global qa
40
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(), return_source_documents=True)
 
 
 
 
41
  return "Ready"
42
 
43
-
44
-
45
  def add_text(history, text):
46
  history = history + [(text, None)]
47
  return history, ""
@@ -74,12 +79,10 @@ with gr.Blocks(css=css) as demo:
74
  with gr.Column():
75
  pdf_doc = gr.File(label="Load a PDF", file_types=['.pdf'], type="file")
76
  load_pdf = gr.Button("Load PDF")
77
- Books = gr.Dropdown(label="Books", choices=[("Harry Potter and the Philosopher's Stone","Book1")] )
78
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
79
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
80
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
81
  submit_btn = gr.Button("Send message")
82
- Books.change(book_changes, inputs=[Books], outputs=[langchain_status], queue=False)
83
  load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
84
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
85
  bot, chatbot, chatbot
 
1
  import gradio as gr
2
+ from langchain.llms import Replicate
3
+ from langchain.vectorstores import Pinecone
4
  from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.chains import ConversationalRetrievalChain
 
8
  from datasets import load_dataset
9
  import os
10
+ import pinecone
11
+
12
+
13
+ key = os.environ.get('API')
14
+ yeh = os.environ.get('pineapi')
15
+ os.environ["REPLICATE_API_TOKEN"] = key
16
+ pinecone.init(api_key=yeh, environment='gcp-starter')
17
 
 
 
18
 
19
  import sentence_transformers
20
  import faiss
 
24
 
25
  def pdf_changes(pdf_doc):
26
 
27
+ loader = PyPDFLoader(pdf_doc.name)
28
+ documents = loader.load()
29
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
30
+ texts = text_splitter.split_documents(documents)
31
+
 
 
32
  embeddings = HuggingFaceHubEmbeddings()
 
 
 
 
 
33
 
34
+ index_name = "chatbot"
35
+ index = pinecone.Index(index_name)
36
+ vectordb = Pinecone.from_documents(texts, embeddings, index_name=index_name)
37
+
38
+ llm = Replicate(
39
+ model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
40
+ input={"temperature": 0.2, "max_length": 3000, "length_penalty":1.5, "num_beams":3}
41
+ )
42
  global qa
43
+ qa = ConversationalRetrievalChain.from_llm(
44
+ llm,
45
+ vectordb.as_retriever(search_kwargs={'k': 2}),
46
+ return_source_documents=True
47
+ )
48
  return "Ready"
49
 
 
 
50
  def add_text(history, text):
51
  history = history + [(text, None)]
52
  return history, ""
 
79
  with gr.Column():
80
  pdf_doc = gr.File(label="Load a PDF", file_types=['.pdf'], type="file")
81
  load_pdf = gr.Button("Load PDF")
 
82
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
83
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
84
  question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
85
  submit_btn = gr.Button("Send message")
 
86
  load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
87
  question.submit(add_text, [chatbot, question], [chatbot, question]).then(
88
  bot, chatbot, chatbot