Spaces:
Runtime error
Runtime error
File size: 6,164 Bytes
5fb6640 dc96a4b cd806c1 f93ef66 cd806c1 48e1cc8 5fb6640 cd806c1 5bf270c cd806c1 5fb6640 cd806c1 5bf270c cd806c1 5fb6640 cd806c1 5bf270c cd806c1 082b45c dc96a4b 5bf270c d33c6e8 6f67feb d33c6e8 5bf270c cd806c1 5bf270c cd806c1 5bf270c cd806c1 dc96a4b 5bf270c c51d774 5bf270c 685f404 5bf270c a74bf92 9fe6076 5bf270c 685f404 5bf270c a74bf92 9fe6076 5bf270c cd806c1 5851d1b 5bf270c cd806c1 5bf270c cd806c1 dc96a4b cd806c1 22449d9 5f56d74 cd806c1 5bf270c 7f10941 5bf270c 7f10941 5bf270c cd806c1 f4dfa92 cd806c1 5bf270c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFaceHub, ctransformers
def get_pdf_text(pdf_docs):
text = ""
try:
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
except Exception as e:
st.error(f"Error reading PDFs: {e}")
return text
def get_text_chunks(text):
try:
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=800,
chunk_overlap=0,
length_function=len
)
chunks = text_splitter.split_text(text)
except Exception as e:
st.error(f"Error splitting text into chunks: {e}")
chunks = []
return chunks
def get_vectorstore(text_chunks):
try:
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
except Exception as e:
st.error(f"Error creating vector store: {e}")
vectorstore = None
return vectorstore
def get_Hub_llm():
try:
llm = HuggingFaceHub(
repo_id="HuggingFaceH4/zephyr-7b-beta",
model_kwargs={
"temperature": 0.1,
"max_length": 2048,
"top_k": 50,
"num_return_sequences": 3,
"task": "text-generation",
"top_p": 0.95
}
)
except Exception as e:
st.error(f"Error loading Hub LLM: {e}")
llm = None
return llm
def get_local_llm():
try:
llm = ctransformers.CTransformers(
model="C:/llama-2-7b-chat.ggmlv3.q4_0.bin",
model_type="llama",
max_new_tokens=1024,
max_length=4096,
temperature=0.1
)
except Exception as e:
st.error(f"Error loading local LLM: {e}")
llm = None
return llm
def get_conversation_chain(vectorstore, llm):
try:
memory = ConversationBufferMemory(
memory_key='chat_history',
return_messages=True,
input_key="question",
output_key="answer")
if vectorstore:
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
verbose=True,
retriever=vectorstore.as_retriever(search_kwargs={"k": 3, "search_type": "similarity"}),
memory=memory,
output_key='answer',
return_source_documents=False
)
else:
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
verbose=True,
memory=memory,
output_key='answer',
return_source_documents=False
)
except Exception as e:
st.error(f"Error creating conversation chain: {e}")
conversation_chain = None
return conversation_chain
def handle_userinput(user_question):
if st.session_state.conversation is None:
st.error("Conversation chain is not initialized.")
return
try:
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']
for i, message in enumerate(st.session_state.chat_history):
if i % 2 == 0:
with st.chat_message("User"):
st.write(message.content)
else:
with st.chat_message("assistant"):
st.write(message.content)
except Exception as e:
st.error(f"Error handling user input: {e}")
def main():
load_dotenv()
st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=":books:")
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header("Chat with multiple PDFs ")
user_question = st.chat_input("Ask a question about your documents:")
if user_question:
handle_userinput(user_question)
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
try:
# get pdf text
raw_text = get_pdf_text(pdf_docs)
# get the text chunks
text_chunks = get_text_chunks(raw_text)
if not text_chunks:
st.error("No text found in the PDFs or text splitting failed.")
return
# create vector store
vectorstore = get_vectorstore(text_chunks)
if not vectorstore:
st.error("Failed to create vector store.")
return
# create llm
llm = get_Hub_llm()
if not llm:
st.error("Failed to load LLM.")
return
# create conversation chain
st.session_state.conversation = get_conversation_chain(vectorstore, llm)
if not st.session_state.conversation:
st.error("Failed to create conversation chain.")
except Exception as e:
st.error(f"An error occurred during processing: {e}")
if __name__ == '__main__':
main()
|