|
import streamlit as st |
|
import os |
|
from dotenv import load_dotenv |
|
from PyPDF2 import PdfReader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain_community.embeddings import HuggingFaceInstructEmbeddings,HuggingFaceEmbeddings,CohereEmbeddings |
|
from langchain_openai import OpenAIEmbeddings,ChatOpenAI |
|
from langchain_community.chat_models import ChatCohere |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import ConversationalRetrievalChain |
|
from htmlTemplates import css, bot_template, user_template |
|
from langchain_community.llms import HuggingFaceHub,HuggingFaceTextGenInference |
|
|
|
|
|
|
|
import torch |
|
import transformers |
|
from langchain_community.llms import HuggingFacePipeline |
|
from transformers import AutoTokenizer |
|
from torch import cuda, bfloat16 |
|
import langchain |
|
langchain.verbose = False |
|
|
|
|
|
def get_pdf_text(pdf_docs): |
|
text = "" |
|
for pdf in pdf_docs: |
|
pdf_reader = PdfReader(pdf) |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
def get_text_chunks(text): |
|
text_splitter = CharacterTextSplitter( |
|
separator="\n", |
|
chunk_size=500, |
|
chunk_overlap=100, |
|
length_function=len |
|
) |
|
chunks = text_splitter.split_text(text) |
|
return chunks |
|
|
|
def get_vectorstore(text_chunks,selected_embedding): |
|
print('Selected Embedding: ' + selected_embedding) |
|
if selected_embedding == 'OpenAI': |
|
embeddings = OpenAIEmbeddings() |
|
elif selected_embedding == 'Instructor-xl': |
|
|
|
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") |
|
elif selected_embedding == 'Cohere-multilingual-v3.0': |
|
embeddings = CohereEmbeddings(model="embed-multilingual-v3.0") |
|
|
|
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) |
|
vectorstore.save_local("faiss_index") |
|
return vectorstore |
|
|
|
def load_vectorstore(text_chunks,selected_embedding): |
|
print('Selected Embedding: ' + selected_embedding) |
|
if selected_embedding == 'OpenAI': |
|
embeddings = OpenAIEmbeddings() |
|
elif selected_embedding == 'Instructor-xl': |
|
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") |
|
vectorstore = FAISS.load_local("faiss_index", embeddings) |
|
elif selected_embedding == 'Cohere-multilingual-v3.0': |
|
embeddings = CohereEmbeddings(model="embed-multilingual-v3.0") |
|
|
|
vectorstore = FAISS.load_local("faiss_index", embeddings) |
|
return vectorstore |
|
|
|
def get_conversation_chain(vectorstore,selected_llm,selected_temperature): |
|
print('Selected LLM: ' + selected_llm) |
|
print('Selected Temperature: ' + str(selected_temperature)) |
|
|
|
if selected_llm == 'GPT 3.5': |
|
|
|
openai_model = "gpt-3.5-turbo" |
|
llm = ChatOpenAI(model=openai_model,temperature=selected_temperature) |
|
elif selected_llm == 'Llama2 local': |
|
|
|
model_id = 'meta-llama/Llama-2-7b-chat-hf' |
|
hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN") |
|
|
|
model_config = transformers.AutoConfig.from_pretrained( |
|
model_id, |
|
token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") |
|
) |
|
|
|
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' |
|
|
|
if('cuda' in device): |
|
|
|
|
|
bnb_config = transformers.BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type='nf4', |
|
bnb_4bit_use_double_quant=True, |
|
bnb_4bit_compute_dtype=bfloat16 |
|
) |
|
|
|
model = transformers.AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
trust_remote_code=True, |
|
config=model_config, |
|
quantization_config=bnb_config, |
|
device_map='auto', |
|
token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") |
|
) |
|
else: |
|
model = transformers.AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
trust_remote_code=True, |
|
config=model_config, |
|
device_map='auto', |
|
token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") |
|
) |
|
|
|
|
|
model.eval() |
|
print(f"Model loaded on {device}") |
|
|
|
tokenizer = transformers.AutoTokenizer.from_pretrained( |
|
model_id, |
|
token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") |
|
) |
|
|
|
pipeline = transformers.pipeline( |
|
torch_dtype=torch.float32, |
|
model=model, |
|
tokenizer=tokenizer, |
|
return_full_text=True, |
|
task='text-generation', |
|
temperature=selected_temperature, |
|
max_new_tokens=512, |
|
repetition_penalty=1.1 |
|
) |
|
|
|
llm = HuggingFacePipeline(pipeline=pipeline) |
|
|
|
elif selected_llm == 'Llama2 inference': |
|
llm = HuggingFaceTextGenInference( |
|
inference_server_url=os.environ.get("INFERENCE_URL"), |
|
max_new_tokens=50, |
|
timeout=1200, |
|
temperature=selected_temperature |
|
) |
|
|
|
|
|
memory = ConversationBufferMemory( |
|
memory_key='chat_history', return_messages=True, output_key='answer') |
|
|
|
|
|
conversation_chain = ConversationalRetrievalChain.from_llm( |
|
llm=llm, |
|
retriever=vectorstore.as_retriever(), |
|
memory=memory, |
|
return_source_documents=True, |
|
verbose=True, |
|
) |
|
|
|
|
|
return conversation_chain |
|
|
|
|
|
def handle_userinput(user_question): |
|
|
|
|
|
response = st.session_state.conversation.invoke({'question': user_question}) |
|
|
|
|
|
anser = response.get("answer") |
|
sources = response.get("source_documents", []) |
|
|
|
|
|
with st.expander("Sources"): |
|
st.write(str(sources)) |
|
|
|
st.session_state.chat_history = response['chat_history'] |
|
|
|
for i, message in enumerate(st.session_state.chat_history): |
|
if i % 2 == 0: |
|
st.write(user_template.replace( |
|
"{{MSG}}", message.content), unsafe_allow_html=True) |
|
else: |
|
st.write(bot_template.replace( |
|
"{{MSG}}", message.content), unsafe_allow_html=True) |
|
|
|
|
|
def main(): |
|
load_dotenv() |
|
st.set_page_config(page_title="VerAi", |
|
page_icon=":books:") |
|
st.write(css, unsafe_allow_html=True) |
|
|
|
if "conversation" not in st.session_state: |
|
st.session_state.conversation = None |
|
if "chat_history" not in st.session_state: |
|
st.session_state.chat_history = None |
|
|
|
|
|
|
|
with st.sidebar: |
|
st.subheader("Your documents") |
|
pdf_docs = st.file_uploader( |
|
"Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True) |
|
|
|
selected_embedding = st.radio("Which Embedding?",["Cohere-multilingual-v3.0","OpenAI", "Instructor-xl"]) |
|
selected_llm = st.radio("Which LLM?",["GPT 3.5", "Llama2 local" ,"Llama2 inference"]) |
|
selected_temperature = st.slider('Temperature?', 0.0, 1.0, 0.1) |
|
|
|
if st.button("Process"): |
|
with st.spinner("Processing"): |
|
|
|
raw_text = get_pdf_text(pdf_docs) |
|
|
|
|
|
text_chunks = get_text_chunks(raw_text) |
|
|
|
|
|
vectorstore = get_vectorstore(text_chunks,selected_embedding) |
|
|
|
|
|
st.session_state.conversation = get_conversation_chain( |
|
vectorstore,selected_llm,selected_temperature) |
|
|
|
if st.button("Load"): |
|
with st.spinner("Processing"): |
|
|
|
|
|
vectorstore = load_vectorstore(selected_embedding,selected_embedding) |
|
|
|
|
|
st.session_state.conversation = get_conversation_chain( |
|
vectorstore,selected_llm,selected_temperature) |
|
|
|
if st.session_state.conversation: |
|
st.header("VerAi :books:") |
|
user_question = st.text_input("Stel een vraag hieronder") |
|
|
|
|
|
if st.session_state.conversation and user_question: |
|
handle_userinput(user_question) |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|