from dotenv import load_dotenv load_dotenv() import streamlit as st import json import os from datetime import datetime, timedelta import google.generativeai as genai from langchain_community.vectorstores import Chroma from langchain.chains.question_answering import load_qa_chain from langchain import PromptTemplate from langchain_community.embeddings import SentenceTransformerEmbeddings from langchain_community.vectorstores import FAISS from langchain_google_genai import ChatGoogleGenerativeAI from langchain.prompts import ChatPromptTemplate from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings from transformers import pipeline from PyPDF2 import PdfReader import docx2txt print(genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))) model = genai.GenerativeModel("gemini-pro") chat = model.start_chat(history=[]) if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] if "messages" not in st.session_state: st.session_state["messages"] = [] if 'level' not in st.session_state: st.session_state['level'] = 'Beginner' # Add initial assistant message if chat history is empty if not st.session_state["messages"]: st.session_state["messages"].append({"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"}) prompt_template = """ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n Context:\n {context}?\n Question: \n{question}\n Answer: """ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"]) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) def get_file_text(files): text = [] for file in files: if file.type == "application/pdf": # Handle PDF files pdf_reader = PdfReader(file) for page in pdf_reader.pages: text.append(page.extract_text()) elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": # Handle DOCX files text.append(docx2txt.process(file)) elif file.type == "text/plain": # Handle text files with file.open(encoding="utf-8") as f: # Adjust encoding if needed text.append(f.read()) else: # Handle unsupported file types st.error(f"Unsupported file type: {file.type}") return text def format_timestamp(timestamp): now = datetime.now() if timestamp.date() == now.date(): return "today" elif timestamp.date() == (now - timedelta(days=1)).date(): return "yesterday" else: return timestamp.strftime('%Y-%m-%d %H:%M:%S') # document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1") # document=get_file_text(document1) # text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200) # context= ", ".join(map(str,document)) # text_chunks= text_splitter.split_text(context) def main(): c1, c2, c3 = st.columns([1, 2, 1]) pth = "Logo.jpg" c1.image(pth, width=130) c2.title("Chat Your Pdfs") chat_container = st.container() input_container = st.container() with chat_container: # for msg in st.session_state.messages: for i, msg in enumerate(st.session_state.messages): if msg["role"] == "user": # st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) # st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) else: # st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) # st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1") document=get_file_text(document1) text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200) context= ", ".join(map(str,document)) embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2') text_chunks= text_splitter.split_text(context) for chuck in text_chunks: if embeddings: vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) vector_store.save_local("faiss_index") # embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2') c2.markdown('##') if prompt := st.chat_input(): with input_container: if prompt: with chat_container: for i, msg in enumerate(st.session_state.messages): if msg["role"] == "user": st.markdown(f"
{msg['content']}
", unsafe_allow_html=True) new_db = FAISS.load_local("faiss_index", embeddings) docs = new_db.similarity_search(prompt) response_text = chain( {"input_documents":docs, "question": prompt} , return_only_outputs=False) timestamp = datetime.now() st.session_state['chat_history'].append(("You", prompt, timestamp)) st.session_state['chat_history'].append(("Bot", response_text['output_text'], timestamp)) st.session_state["messages"].append({"role": "user", "content": prompt}) st.session_state["messages"].append({"role": "assistant", "content": response_text["output_text"]}) st.experimental_rerun() st.sidebar.title("Chat History") for entry in st.session_state['chat_history']: role, content, timestamp = entry if role == "You": st.sidebar.write(f"{content} ({format_timestamp(timestamp)})") st.sidebar.button('Clear Chat History', on_click=clear_chat_history) def clear_chat_history(): st.session_state.messages = [{"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"}] st.session_state['chat_history'] = [] st.experimental_rerun()