# # Load in packages
# +
import os
# Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
#os.system("pip uninstall -y gradio")
os.system("pip install gradio==3.42.0")
from typing import TypeVar
from langchain.embeddings import HuggingFaceEmbeddings#, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
import gradio as gr
from transformers import AutoTokenizer
# Alternative model sources
from ctransformers import AutoModelForCausalLM
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
# Disable cuda devices if necessary
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
#from chatfuncs.chatfuncs import *
import chatfuncs.ingest as ing
## Load preset embeddings, vectorstore, and model
embeddings_name = "BAAI/bge-base-en-v1.5"
def load_embeddings(embeddings_name = "BAAI/bge-base-en-v1.5"):
#if embeddings_name == "hkunlp/instructor-large":
# embeddings_func = HuggingFaceInstructEmbeddings(model_name=embeddings_name,
# embed_instruction="Represent the paragraph for retrieval: ",
# query_instruction="Represent the question for retrieving supporting documents: "
# )
#else:
embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_name)
global embeddings
embeddings = embeddings_func
return embeddings
def get_faiss_store(faiss_vstore_folder,embeddings):
import zipfile
with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
zip_ref.extractall(faiss_vstore_folder)
faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings)
os.remove(faiss_vstore_folder + "/index.faiss")
os.remove(faiss_vstore_folder + "/index.pkl")
global vectorstore
vectorstore = faiss_vstore
return vectorstore
import chatfuncs.chatfuncs as chatf
chatf.embeddings = load_embeddings(embeddings_name)
chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
print("Loading model")
# Default values inside the function
if gpu_config is None:
gpu_config = chatf.gpu_config
if cpu_config is None:
cpu_config = chatf.cpu_config
if torch_device is None:
torch_device = chatf.torch_device
if model_type == "Mistral Open Orca (larger, slow)":
if torch_device == "cuda":
gpu_config.update_gpu(gpu_layers)
else:
gpu_config.update_gpu(gpu_layers)
cpu_config.update_gpu(gpu_layers)
print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
print(vars(gpu_config))
print(vars(cpu_config))
try:
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
#model = AutoModelForCausalLM.from_pretrained('TheBloke/MistralLite-7B-GGUF', model_type='mistral', model_file='mistrallite.Q4_K_M.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
except:
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
#model = AutoModelForCausalLM.from_pretrained('TheBloke/MistralLite-7B-GGUF', model_type='mistral', model_file='mistrallite.Q4_K_M.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
tokenizer = []
if model_type == "Flan Alpaca (small, fast)":
# Huggingface chat model
hf_checkpoint = 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # #
def create_hf_model(model_name):
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
if torch_device == "cuda":
if "flan" in model_name:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
else:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
else:
if "flan" in model_name:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
else:
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
return model, tokenizer, model_type
model, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
chatf.model = model
chatf.tokenizer = tokenizer
chatf.model_type = model_type
load_confirmation = "Finished loading model: " + model_type
print(load_confirmation)
return model_type, load_confirmation, model_type
# Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
#model_type = "Mistral Open Orca (larger, slow)"
#load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
model_type = "Flan Alpaca (small, fast)"
load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
print(f"> Total split documents: {len(docs_out)}")
print(docs_out)
vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
chatf.vectorstore = vectorstore_func
out_message = "Document processing complete"
return out_message, vectorstore_func
# Gradio chat
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
with block:
ingest_text = gr.State()
ingest_metadata = gr.State()
ingest_docs = gr.State()
model_type_state = gr.State(model_type)
embeddings_state = gr.State(globals()["embeddings"])
vectorstore_state = gr.State(globals()["vectorstore"])
model_state = gr.State() # chatf.model (gives error)
tokenizer_state = gr.State() # chatf.tokenizer (gives error)
chat_history_state = gr.State()
instruction_prompt_out = gr.State()
gr.Markdown("
Chat with Misbahuddin Classroom
")
gr.Markdown("Ask any question of pharmacology of 10 drugs. However, there are some limitations.")
with gr.Row():
current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
with gr.Tab("Chatbot"):
with gr.Row():
#chat_height = 500
chatbot = gr.Chatbot(avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False, scale = 1) # , height=chat_height
with gr.Accordion("Open this tab to see the source paragraphs used to generate the answer", open = False):
sources = gr.HTML(value = "Source paragraphs with the most relevant text will appear here", scale = 1) # , height=chat_height
with gr.Row():
message = gr.Textbox(
label="Enter your question here",
lines=1,
)
with gr.Row():
submit = gr.Button(value="Send message", variant="secondary", scale = 1)
clear = gr.Button(value="Clear chat", variant="secondary", scale=0)
stop = gr.Button(value="Stop generating", variant="secondary", scale=0)
examples_set = gr.Radio(label="Examples for the Lambeth Borough Plan",
#value = "What were the five pillars of the previous borough plan?",
choices=["What were the five pillars of the previous borough plan?",
"What is the vision statement for Lambeth?",
"What are the commitments for Lambeth?",
"What are the 2030 outcomes for Lambeth?"])
current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here")
with gr.Tab("Load in a different file to chat with"):
with gr.Accordion("PDF file", open = False):
in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
with gr.Accordion("Web page", open = False):
with gr.Row():
in_web = gr.Textbox(label="Enter web page url")
in_div = gr.Textbox(label="(Advanced) Web page div for text extraction", value="p", placeholder="p")
load_web = gr.Button(value="Load in webpage", variant="secondary", scale=0)
with gr.Accordion("CSV/Excel file", open = False):
in_csv = gr.File(label="Upload CSV/Excel file", file_count="multiple", file_types=['.csv', '.xlsx'])
in_text_column = gr.Textbox(label="Enter column name where text is stored")
load_csv = gr.Button(value="Load in CSV/Excel file", variant="secondary", scale=0)
ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
with gr.Tab("Advanced features"):
out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
temp_slide = gr.Slider(minimum=0.1, value = 0.1, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
with gr.Row():
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Mistral Open Orca (larger, slow)"])
change_model_button = gr.Button(value="Load model", scale=0)
with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=5, step = 1, visible=True)
load_text = gr.Text(label="Load status")
gr.HTML(
"This app is based on the models Flan Alpaca and Mistral Open Orca. It powered by Gradio, Transformers, Ctransformers, and Langchain."
)
examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
change_model_button.click(fn=chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
then(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model]).\
then(lambda: chatf.restore_interactivity(), None, [message], queue=False).\
then(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic]).\
then(lambda: None, None, chatbot, queue=False)
# Load in a pdf
load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state]).\
then(chatf.hide_block, outputs = [examples_set])
# Load in a webpage
load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state]).\
then(chatf.hide_block, outputs = [examples_set])
# Load in a csv/excel file
load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
then(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state]).\
then(chatf.hide_block, outputs = [examples_set])
# Load in a webpage
# Click/enter to send message action
response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False, api_name="retrieval").\
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
then(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide], outputs=chatbot)
response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
then(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide], chatbot)
response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
# Stop box
stop.click(fn=None, inputs=None, outputs=None, cancels=[response_click, response_enter])
# Clear box
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
clear.click(lambda: None, None, chatbot, queue=False)
# Thumbs up or thumbs down voting function
chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], None)
block.queue(concurrency_count=1).launch(debug=True)
# -