Spaces:

liang-huggingface
/

Yi6bChat

Runtime error

File size: 2,285 Bytes

a62d423
c2ee33a
 
048277f
ed34641
3080222
048277f
c2ee33a
0b39415
c2ee33a
0b39415
a62d423
c2ee33a
 
 
 
048277f
c2ee33a
 
 
 
 
 
 
048277f
c2ee33a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
048277f
c2ee33a

import gradio as gr
import os 
from pathlib import Path

os.environ["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir')

import argparse
model_file = "leo-mistral-hessianai-7b-chat.Q4_K_M.gguf"
if not os.path.isfile(model_file):
    os.system("wget -c https://huggingface.co/TheBloke/Leo-Mistral-Hessianai-7B-Chat-GGUF/blob/main/leo-mistral-hessianai-7b-chat.Q4_K_M.gguf")

DEFAULT_MODEL_PATH = model_file

from llama_cpp import Llama
llm = Llama(model_path=model_file, model_type="mistral")
llm._token_eos = 7


def predict(input, chatbot, max_length, top_p, temperature, history):
    chatbot.append((input, ""))
    response = ""
    history.append(input)

    for output in llm(input, stream=True, temperature=temperature, top_p=top_p, max_tokens=max_length, stop=["<|im_end|>"]):
        piece = output['choices'][0]['text']
        response += piece
        chatbot[-1] = (chatbot[-1][0], response)

        yield chatbot, history

    history.append(response)
    yield chatbot, history


def reset_user_input():
    return gr.update(value="")


def reset_state():
    return [], []


with gr.Blocks() as demo:
    gr.HTML("""<h1 align="center">Yi-6B-Chat by llama-cpp-python</h1>""")

    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column(scale=4):
            user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=8)
            submitBtn = gr.Button("Submit", variant="primary")
        with gr.Column(scale=1):
            max_length = gr.Slider(0, 32048, value=2048, step=1.0, label="Maximum Length", interactive=True)
            top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
            temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
            emptyBtn = gr.Button("Clear History")

    history = gr.State([])

    submitBtn.click(
        predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history], show_progress=True
    )
    submitBtn.click(reset_user_input, [], [user_input])

    emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)

demo.queue().launch(share=False, inbrowser=True)