Rehman1603's picture
Update app.py
1a342d1 verified
raw
history blame
1.78 kB
import os
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.question_answering import load_qa_chain
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
def submitYourDocument(doc):
reader = PdfReader(doc)
# read data from the file and put them into a variable called raw_text
raw_text = ''
for i, page in enumerate(reader.pages):
text = page.extract_text()
if text:
raw_text += text
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
texts = text_splitter.split_text(raw_text)
return texts
def main(doc,prompt):
result=submitYourDocument(doc)
embeddings = HuggingFaceEmbeddings()
db = FAISS.from_texts(result, embeddings)
llm=HuggingFaceHub(repo_id="google/flan-t5-xxl",model_kwargs={"temperature":1, "max_length":512})
chain=load_qa_chain(llm,chain_type="stuff")
query =prompt
docs = db.similarity_search(query)
return chain.run(input_documents=docs, question=query)
interface=gr.Interface(fn=main,inputs=[gr.File(label="Upload file"),gr.components.Textbox(label="Type Question Related to Uploaded Document")],
outputs=gr.components.Textbox(label="Answer.."),
examples=[["FYP_Proposal.pdf","what is the summary of attached document?"],["FYP_Proposal.pdf","who is Mukesh Ambani?"],["FYP_Proposal.pdf","what is the title of document?"]])
interface.launch(debug=True)