import streamlit as st from dotenv import load_dotenv from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain.vectorstores import FAISS from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.llms import HuggingFaceHub, ctransformers def get_pdf_text(pdf_docs): text = "" try: for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() except Exception as e: st.error(f"Error reading PDFs: {e}") return text def get_text_chunks(text): try: text_splitter = CharacterTextSplitter( separator="\n", chunk_size=800, chunk_overlap=0, length_function=len ) chunks = text_splitter.split_text(text) except Exception as e: st.error(f"Error splitting text into chunks: {e}") chunks = [] return chunks def get_vectorstore(text_chunks): try: embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) except Exception as e: st.error(f"Error creating vector store: {e}") vectorstore = None return vectorstore def get_Hub_llm(): try: llm = HuggingFaceHub( repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={ "temperature": 0.1, "max_length": 2048, "top_k": 50, "num_return_sequences": 3, "task": "text-generation", "top_p": 0.95 } ) except Exception as e: st.error(f"Error loading Hub LLM: {e}") llm = None return llm def get_local_llm(): try: llm = ctransformers.CTransformers( model="C:/llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama", max_new_tokens=1024, max_length=4096, temperature=0.1 ) except Exception as e: st.error(f"Error loading local LLM: {e}") llm = None return llm def get_conversation_chain(vectorstore, llm): try: memory = ConversationBufferMemory( memory_key='chat_history', return_messages=True, input_key="question", output_key="answer") if vectorstore: conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", verbose=True, retriever=vectorstore.as_retriever(search_kwargs={"k": 3, "search_type": "similarity"}), memory=memory, output_key='answer', return_source_documents=False ) else: conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", verbose=True, memory=memory, output_key='answer', return_source_documents=False ) except Exception as e: st.error(f"Error creating conversation chain: {e}") conversation_chain = None return conversation_chain def handle_userinput(user_question): if st.session_state.conversation is None: st.error("Conversation chain is not initialized.") return try: response = st.session_state.conversation({'question': user_question}) st.session_state.chat_history = response['chat_history'] for i, message in enumerate(st.session_state.chat_history): if i % 2 == 0: with st.chat_message("User"): st.write(message.content) else: with st.chat_message("assistant"): st.write(message.content) except Exception as e: st.error(f"Error handling user input: {e}") def main(): load_dotenv() st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:") if "conversation" not in st.session_state: st.session_state.conversation = None if "chat_history" not in st.session_state: st.session_state.chat_history = None st.header("Chat with multiple PDFs ") user_question = st.chat_input("Ask a question about your documents:") if user_question: handle_userinput(user_question) with st.sidebar: st.subheader("Your documents") pdf_docs = st.file_uploader( "Upload your PDFs here and click on 'Process'", accept_multiple_files=True) if st.button("Process"): with st.spinner("Processing"): try: # get pdf text raw_text = get_pdf_text(pdf_docs) # get the text chunks text_chunks = get_text_chunks(raw_text) if not text_chunks: st.error("No text found in the PDFs or text splitting failed.") return # create vector store vectorstore = get_vectorstore(text_chunks) if not vectorstore: st.error("Failed to create vector store.") return # create llm llm = get_Hub_llm() if not llm: st.error("Failed to load LLM.") return # create conversation chain st.session_state.conversation = get_conversation_chain(vectorstore, llm) if not st.session_state.conversation: st.error("Failed to create conversation chain.") except Exception as e: st.error(f"An error occurred during processing: {e}") if __name__ == '__main__': main()