| import gradio as gr | |
| from gpt4all import GPT4All | |
| from huggingface_hub import hf_hub_download | |
| repo_id = "TheBloke/openchat_3.5-GGUF" | |
| model_name = "openchat_3.5.Q4_K_M.gguf" | |
| hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False) | |
| model = model = GPT4All(model_name, "models", allow_download = False, device="cpu") | |
| model.config["promptTemplate"] = "[INST] {0} [/INST]" | |
| model.config["systemPrompt"] = "" | |
| model._is_chat_session_activated = False | |
| max_new_tokens = 2048 | |
| def predict(input, instruction, history, temperature, top_p, top_k, max_tokens): | |
| history = history or [] | |
| formatted_input = "<s>" | |
| for user_message, assistant_message in history: | |
| formatted_input += model.config["promptTemplate"].format(user_message) | |
| formatted_input += assistant_message + "</s>" | |
| formatted_input += model.config["promptTemplate"].format(input) | |
| result = model.generate( | |
| formatted_input, | |
| temp = temperature, | |
| max_tokens = max_tokens, | |
| top_p = top_p, | |
| top_k = top_k, | |
| streaming = False, | |
| ) | |
| history = history + [[input, result]] | |
| return (result, input, history) | |
| def clear_history(): | |
| print(">>> HISTORY CLEARED!") | |
| return [] | |
| with gr.Blocks() as demo: | |
| with gr.Row(variant = "panel"): | |
| gr.Markdown("A AI model test demo.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| history = gr.Chatbot(label = "History", elem_id = "chatbot") | |
| input = gr.Textbox(label = "Input", value = "Hi.", lines = 2) | |
| instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4) | |
| run = gr.Button("▶") | |
| clear = gr.Button("🗑️") | |
| with gr.Column(): | |
| temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" ) | |
| top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" ) | |
| top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" ) | |
| max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| output = gr.Textbox(label = "Output", value = "", lines = 50) | |
| run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, max_tokens], outputs = [output, input, history]) | |
| clear.click(clear_history, [], history) | |
| demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True) |