import gradio as gr from gpt4all import GPT4All from huggingface_hub import hf_hub_download repo_id = "TheBloke/openchat_3.5-GGUF" model_name = "openchat_3.5.Q4_K_M.gguf" hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False) model = model = GPT4All(model_name, "models", allow_download = False, device="cpu") model.config["promptTemplate"] = "[INST] {0} [/INST]" model.config["systemPrompt"] = "" model._is_chat_session_activated = False max_new_tokens = 2048 def predict(input, instruction, history, temperature, top_p, top_k, max_tokens): history = history or [] formatted_input = "" for user_message, assistant_message in history: formatted_input += model.config["promptTemplate"].format(user_message) formatted_input += assistant_message + "" formatted_input += model.config["promptTemplate"].format(input) result = model.generate( formatted_input, temp = temperature, max_tokens = max_tokens, top_p = top_p, top_k = top_k, streaming = False, ) history = history + [[input, result]] return (result, input, history) def clear_history(): print(">>> HISTORY CLEARED!") return [] with gr.Blocks() as demo: with gr.Row(variant = "panel"): gr.Markdown("A AI model test demo.") with gr.Row(): with gr.Column(): history = gr.Chatbot(label = "History", elem_id = "chatbot") input = gr.Textbox(label = "Input", value = "Hi.", lines = 2) instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4) run = gr.Button("▶") clear = gr.Button("🗑️") with gr.Column(): temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" ) top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" ) top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" ) max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" ) with gr.Row(): with gr.Column(): output = gr.Textbox(label = "Output", value = "", lines = 50) run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, max_tokens], outputs = [output, input, history]) clear.click(clear_history, [], history) demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)