|
import gradio as gr |
|
from gpt4all import GPT4All |
|
from huggingface_hub import hf_hub_download |
|
|
|
repo_id = "TheBloke/openchat_3.5-GGUF" |
|
model_name = "openchat_3.5.Q4_K_M.gguf" |
|
|
|
hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False) |
|
|
|
model = model = GPT4All(model_name, "models", allow_download = False, device="cpu") |
|
|
|
model.config["promptTemplate"] = "[INST] {0} [/INST]" |
|
model.config["systemPrompt"] = "" |
|
model._is_chat_session_activated = False |
|
|
|
max_new_tokens = 2048 |
|
|
|
def predict(input, instruction, history, temperature, top_p, top_k, max_tokens): |
|
|
|
history = history or [] |
|
formatted_input = "<s>" |
|
|
|
for user_message, assistant_message in history: |
|
formatted_input += model.config["promptTemplate"].format(user_message) |
|
formatted_input += assistant_message + "</s>" |
|
|
|
formatted_input += model.config["promptTemplate"].format(input) |
|
|
|
result = model.generate( |
|
formatted_input, |
|
temp = temperature, |
|
max_tokens = max_tokens, |
|
top_p = top_p, |
|
top_k = top_k, |
|
streaming = False, |
|
) |
|
|
|
history = history + [[input, result]] |
|
|
|
return (result, input, history) |
|
|
|
def clear_history(): |
|
print(">>> HISTORY CLEARED!") |
|
return [] |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(variant = "panel"): |
|
gr.Markdown("A AI model test demo.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
history = gr.Chatbot(label = "History", elem_id = "chatbot") |
|
input = gr.Textbox(label = "Input", value = "Hi.", lines = 2) |
|
instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4) |
|
run = gr.Button("▶") |
|
clear = gr.Button("🗑️") |
|
|
|
with gr.Column(): |
|
temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" ) |
|
top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" ) |
|
top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" ) |
|
max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" ) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
output = gr.Textbox(label = "Output", value = "", lines = 50) |
|
|
|
run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, max_tokens], outputs = [output, input, history]) |
|
clear.click(clear_history, [], history) |
|
|
|
demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True) |