import os from PyPDF2 import PdfReader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains.question_answering import load_qa_chain import gradio as gr from langchain.embeddings import HuggingFaceEmbeddings from langchain import HuggingFaceHub def submitYourDocument(doc): reader = PdfReader(doc) # read data from the file and put them into a variable called raw_text raw_text = '' for i, page in enumerate(reader.pages): text = page.extract_text() if text: raw_text += text text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, chunk_overlap = 200, length_function = len, ) texts = text_splitter.split_text(raw_text) return texts def main(doc,prompt): result=submitYourDocument(doc) embeddings = HuggingFaceEmbeddings() db = FAISS.from_texts(result, embeddings) llm=HuggingFaceHub(repo_id="google/flan-t5-xxl",model_kwargs={"temperature":1, "max_length":512}) chain=load_qa_chain(llm,chain_type="stuff") query =prompt docs = db.similarity_search(query) return chain.run(input_documents=docs, question=query) interface=gr.Interface(fn=main,inputs=[gr.File(label="Upload file"),gr.components.Textbox(label="Type Question Related to Uploaded Document")], outputs=gr.components.Textbox(label="Answer.."), examples=[["FYP_Proposal.pdf","what is the summary of attached document?"],["FYP_Proposal.pdf","who is Mukesh Ambani?"],["FYP_Proposal.pdf","what is the title of document?"]]) interface.launch(debug=True)