import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings import google.generativeai as genai from langchain.vectorstores import FAISS from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate from dotenv import load_dotenv import os import asyncio import faiss # Solution for the Event Loop Error asyncio.set_event_loop(asyncio.new_event_loop()) load_dotenv() #Set the Google application credentials os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "credentials.json" # Get the API key from environment variables (Note: the credential path is already set.) api_key = os.getenv('GOOGLE_API_KEY') if api_key is None: st.error("GOOGLE_API_KEY environment variable not set. Please check your secrets.") else: # Configure the Generative AI API key genai.configure(api_key=api_key) # Define generation configuration generation_config = { "temperature": 0.4, "top_p": 0.95, "top_k": 64, "max_output_tokens": 8192, "response_mime_type": "text/plain", } # Initialize Generative AI model with generation configuration model = genai.GenerativeModel( model_name="gemini-1.5-flash-latest", generation_config=generation_config, ) @st.cache_data # Cache the FAISS index to avoid reprocessing unless the PDF changes def get_pdf_text(pdf_docs): text="" for pdf in pdf_docs: pdf_reader= PdfReader(pdf) for page in pdf_reader.pages: text+= page.extract_text() return text def get_text_chunks(text): text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) chunks = text_splitter.split_text(text) return chunks def get_vector_store(text_chunks): embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) vector_store.save_local("faiss_index") def get_conversational_chain(): prompt_template = """ You are a helpful and informative AI assistant. Your primary goal is to answer questions based on the provided context, which consists of user-uploaded PDF documents. Instructions: 1. First, thoroughly analyze the content of the user-uploaded PDFs to extract relevant information for answering the question. 2. If the answer can be fully derived from the PDF content, provide it directly and ensure accuracy and completeness. 3. If the PDF content is insufficient to answer the question fully: a. Use your knowledge and reasoning abilities to provide the best possible answer. b. Clearly indicate when you are supplementing the PDF content with your own knowledge by starting the answer with "Based on my knowledge..." or a similar phrase. 4. If you are uncertain about the answer or if the PDFs and your knowledge do not provide enough information to form a conclusive answer, state clearly that the information is not sufficient to provide a definitive answer. 5. Avoid fabricating information or making assumptions that cannot be supported by the PDF content or reliable external knowledge. Uploaded PDF Content:\n{context} Question: \n{question} Answer: """ model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0.3) prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) return chain @st.cache_data # Cache the FAISS index to avoid reprocessing unless the PDF changes def process_pdfs(pdf_docs): if pdf_docs: with st.spinner("Processing PDF files..."): raw_text = get_pdf_text(pdf_docs) text_chunks = get_text_chunks(raw_text) get_vector_store(text_chunks) st.success("PDFs processed and ready for queries.") embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # Allow pickle loading using faiss faiss.read_index("faiss_index", faiss.IO_FLAG_MMAP | faiss.IO_FLAG_READ_ONLY) return FAISS.load_local("faiss_index", embeddings) return None async def user_input(user_question): embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") FAISS.set_default_allow_pickle(True) new_db = FAISS.load_local("faiss_index", embeddings) docs = new_db.similarity_search(user_question) chain = get_conversational_chain() response = chain( {"input_documents":docs, "question": user_question} , return_only_outputs=True) print(response) st.write("Reply: ", response["output_text"]) def main(): st.set_page_config(page_title="Chat PDF") st.header("Chat with PDF using Gemini💁") # Initialize faiss_index faiss_index = None with st.sidebar: st.title("Menu:") pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True) if st.button("Submit & Process"): faiss_index = process_pdfs(pdf_docs) user_question = st.text_input("Ask a Question from the PDF Files") if user_question: if faiss_index is not None: asyncio.run(user_input(user_question)) else: st.warning("Please upload and process PDFs first.") if __name__ == "__main__": main()