import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings import google.generativeai as genai from langchain.vectorstores import FAISS from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate from dotenv import load_dotenv import os # Load environment variables from .env file load_dotenv() # Fetch the Google API key from the .env file api_key = os.getenv("GOOGLE_API_KEY") # Set the page configuration for the Streamlit app st.set_page_config(page_title="DocWizard Instant Insights and Analysis", layout="wide") # Header and Instructions st.markdown(""" ## Document Intelligence Explorer 🤖 This chatbot utilizes the Retrieval-Augmented Generation (RAG) framework with Google's Generative AI model Gemini-PRO. It processes uploaded PDF documents by segmenting them into chunks, creating a searchable vector store, and generating precise answers to your questions. This method ensures high-quality, contextually relevant responses for an efficient user experience. ### How It Works 1. **Upload Your Documents**: You can upload multiple PDF files simultaneously for comprehensive analysis. 2. **Ask a Question**: After processing the documents, type your question related to the content of your uploaded documents for a detailed answer. """) def get_pdf_text(pdf_docs): """ Extract text from uploaded PDF documents. """ text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: page_text = page.extract_text() if page_text: text += page_text return text def get_text_chunks(text): """ Split text into manageable chunks for processing. """ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) chunks = text_splitter.split_text(text) return chunks def get_vector_store(text_chunks, api_key): """ Create and save a FAISS vector store from text chunks. """ try: embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key) vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) vector_store.save_local("faiss_index") st.success("FAISS index created and saved successfully.") except Exception as e: st.error(f"Error creating FAISS index: {e}") def get_conversational_chain(api_key): """ Set up the conversational chain using the Gemini-PRO model. """ prompt_template = """ Answer the question as detailed as possible from the provided context. If the answer is not in the provided context, say "Answer is not available in the context". Do not provide incorrect information.\n\n Context:\n{context}\n Question:\n{question}\n Answer: """ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=api_key) prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) return chain def user_input(user_question, api_key): """ Handle user input and generate a response from the chatbot. """ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key) try: new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) docs = new_db.similarity_search(user_question) chain = get_conversational_chain(api_key) response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True) st.write("Reply:", response["output_text"]) except ValueError as e: st.error(f"Error loading FAISS index or generating response: {e}") def main(): """ Main function to run the Streamlit app. """ st.header("AI Assistant 🤖") user_question = st.text_input("Ask a Question from the PDF Files", key="user_question") if st.button("Generate Text", key="generate_button"): # Add a button to generate text if user_question: # Trigger user input function only if there's a question with st.spinner("Generating result..."): # Display spinner while generating user_input(user_question, api_key) with st.sidebar: st.title("Menu:") pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True, key="pdf_uploader") if st.button("Submit & Process", key="process_button"): if not api_key: st.error("Google API key is missing. Please add it to the .env file.") return if pdf_docs: with st.spinner("Processing..."): raw_text = get_pdf_text(pdf_docs) text_chunks = get_text_chunks(raw_text) get_vector_store(text_chunks, api_key) st.success("Processing complete. You can now ask questions based on the uploaded documents.") else: st.error("No PDF files uploaded. Please upload at least one PDF file to proceed.") if __name__ == "__main__": main()