Adrian73's picture
Update app.py
685f404 verified
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFaceHub, ctransformers
def get_pdf_text(pdf_docs):
text = ""
try:
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
except Exception as e:
st.error(f"Error reading PDFs: {e}")
return text
def get_text_chunks(text):
try:
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=800,
chunk_overlap=0,
length_function=len
)
chunks = text_splitter.split_text(text)
except Exception as e:
st.error(f"Error splitting text into chunks: {e}")
chunks = []
return chunks
def get_vectorstore(text_chunks):
try:
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
except Exception as e:
st.error(f"Error creating vector store: {e}")
vectorstore = None
return vectorstore
def get_Hub_llm():
try:
llm = HuggingFaceHub(
repo_id="HuggingFaceH4/zephyr-7b-beta",
model_kwargs={
"temperature": 0.1,
"max_length": 2048,
"top_k": 50,
"num_return_sequences": 3,
"task": "text-generation",
"top_p": 0.95
}
)
except Exception as e:
st.error(f"Error loading Hub LLM: {e}")
llm = None
return llm
def get_local_llm():
try:
llm = ctransformers.CTransformers(
model="C:/llama-2-7b-chat.ggmlv3.q4_0.bin",
model_type="llama",
max_new_tokens=1024,
max_length=4096,
temperature=0.1
)
except Exception as e:
st.error(f"Error loading local LLM: {e}")
llm = None
return llm
def get_conversation_chain(vectorstore, llm):
try:
memory = ConversationBufferMemory(
memory_key='chat_history',
return_messages=True,
input_key="question",
output_key="answer")
if vectorstore:
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
verbose=True,
retriever=vectorstore.as_retriever(search_kwargs={"k": 3, "search_type": "similarity"}),
memory=memory,
output_key='answer',
return_source_documents=False
)
else:
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
verbose=True,
memory=memory,
output_key='answer',
return_source_documents=False
)
except Exception as e:
st.error(f"Error creating conversation chain: {e}")
conversation_chain = None
return conversation_chain
def handle_userinput(user_question):
if st.session_state.conversation is None:
st.error("Conversation chain is not initialized.")
return
try:
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']
for i, message in enumerate(st.session_state.chat_history):
if i % 2 == 0:
with st.chat_message("User"):
st.write(message.content)
else:
with st.chat_message("assistant"):
st.write(message.content)
except Exception as e:
st.error(f"Error handling user input: {e}")
def main():
load_dotenv()
st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=":books:")
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header("Chat with multiple PDFs ")
user_question = st.chat_input("Ask a question about your documents:")
if user_question:
handle_userinput(user_question)
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
try:
# get pdf text
raw_text = get_pdf_text(pdf_docs)
# get the text chunks
text_chunks = get_text_chunks(raw_text)
if not text_chunks:
st.error("No text found in the PDFs or text splitting failed.")
return
# create vector store
vectorstore = get_vectorstore(text_chunks)
if not vectorstore:
st.error("Failed to create vector store.")
return
# create llm
llm = get_Hub_llm()
if not llm:
st.error("Failed to load LLM.")
return
# create conversation chain
st.session_state.conversation = get_conversation_chain(vectorstore, llm)
if not st.session_state.conversation:
st.error("Failed to create conversation chain.")
except Exception as e:
st.error(f"An error occurred during processing: {e}")
if __name__ == '__main__':
main()