def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text # chuck_size = 1000, chunk_overlap = 200 (for shorted PDFs) def get_text_chunks(text): text_splitter= RecursiveCharacterTextSplitter( chunk_size=10000, chunk_overlap=1000, # length_function=len ) chunks=text_splitter.split_text(text) return chunks # Converting into Vector data/store (can also be stored) def get_vector_store(text_chunks): # embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001') embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001') vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings) vector_store.save_local("faiss_index") # return vector_store def get_conversation_chain(): prompt_template="""Answer the query as detailed as possible from the provided context, make sure to provide all the details, if answeris not in the provided context, just say, "Answer is not available in the provided documents", don't provide the wrong answer:\n {context}? \n Query: {query}? \n Answer: """ model=ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) prompt=PromptTemplate(template=prompt_template, input_variables=["context", "query"]) # chain=load_qa_chain(llm=model, chain_type="stuff", prompt=prompt) chain=load_qa_chain(model, chain_type="stuff", prompt=prompt) return chain def user_input(user_question): # embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001') embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001') # Loading the embeddings new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) docs = new_db.similarity_search(user_question) chain=get_conversation_chain() response = chain( {"input_documents": docs, "question": user_question} , return_only_outputs=True) print(response) st.write("Reply: ", response["output_text"]) # Frontend page Processor def main(): st.set_page_config(page_title="PDF Chatbot") st.header("PDF Chatbot made with ❤") user_question = st.text_input("Ask a question about your documents:") if user_question: user_input(user_question) with st.sidebar: st.title("Menu:") pdf_docs = st.file_uploader( "Upload your PDFs here and click on 'Process'", accept_multiple_files=True) if st.button("Submit & Process"): with st.spinner("Ruko Padh raha hu..."): raw_text = get_pdf_text(pdf_docs) text_chunks = get_text_chunks(raw_text) get_vector_store(text_chunks) st.success("Saare documents padh liya. Ab swaal pucho 😤") if __name__ == '__main__': main()