Barry

Paused

File size: 2,343 Bytes

a25c1ff
c49c1a9
 
 
 
 
 
 
 
 
9c95f36
 
c49c1a9
9c95f36
 
c49c1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ace2f35
 
0f3f2f2
c49c1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
ace2f35
c49c1a9
 
 
 
ace2f35
c49c1a9
ace2f35
c49c1a9

import gradio as gr
import os
from pathlib import Path
import argparse
from huggingface_hub import snapshot_download


# repo_name = "TheBloke/Mistral-7B-v0.1-GGUF"
# model_file = "mistral-7b-v0.1.Q6_K.gguf"

#repo_name = 'HumanityFTW/so_rude'
#model_file = "mistral-comedy-2.0-ckpt-600.Q6_K.gguf"

repo_name = 'TheBloke/OpenHermes-2.5-Mistral-7B-GGUF'
model_file = "openhermes-2.5-mistral-7b.Q4_K_M.gguf"

print('Fetching model:', repo_name, model_file)
snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_file)
print('Done fetching model:')

DEFAULT_MODEL_PATH = model_file

from llama_cpp import Llama
llm = Llama(model_path=model_file, model_type="mistral")


def predict(input, chatbot, max_length, top_p, temperature, history):
    chatbot.append((input, ""))
    response = ""
    history.append(input)

    for output in llm(input, stream=True, temperature=temperature, top_p=top_p, max_tokens=max_length, ):
        piece = output['choices'][0]['text']
        response += piece
        chatbot[-1] = (chatbot[-1][0], response)

        yield chatbot, history

    history.append(response)
    yield chatbot, history


def reset_user_input():
    return gr.update(value="")


def reset_state():
    return [], []


with gr.Blocks() as demo:
    gr.HTML("""<h1 align="center">So Rude</h1>""")

    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column(scale=4):
            user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=8, elem_id="user_input")
            submitBtn = gr.Button("Submit", variant="primary", elem_id="submit_btn")
        with gr.Column(scale=1):
            max_length = gr.Slider(0, 256, value=64, step=1.0, label="Maximum Length", interactive=True)
            top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
            temperature = gr.Slider(0, 2.0, value=0.95, step=0.01, label="Temperature", interactive=True)
            emptyBtn = gr.Button("Clear History")

    history = gr.State([])

    submitBtn.click(
        predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history], show_progress=True
    )
    submitBtn.click(reset_user_input, [], [user_input])

    emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)

demo.queue().launch(share=False, inbrowser=True)