Testtrial1

Runtime error

File size: 2,252 Bytes

3592be6
a9594eb
461e953
dacf472
 
a9e99a7
9d6e71f
3592be6
a9e99a7
 
 
 
3592be6
a9e99a7
 
f38b63a
dacf472
 
f38b63a
 
dacf472
 
 
 
 
f38b63a
dacf472
f38b63a
 
 
a9e99a7
 
 
dacf472
 
 
a9e99a7
 
dacf472
 
a9e99a7
dacf472
 
 
 
 
 
 
 
f38b63a
dacf472
 
d2c9268

import gradio as gr
import os
import wget
from llama_cpp import Llama
import random
url = 'https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/WizardLM-7B-uncensored.ggmlv3.q4_0.bin'
filename = wget.download(url)
llm2 = Llama(model_path=filename, seed=random.randint(1, 2**31))
theme = gr.themes.Soft(
    primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
    neutral_hue="red",
)
title = """<h1 align="center">Chat with awesome WizardLM 7b model!</h1><br>"""
with gr.Blocks(theme=theme) as demo:
    gr.HTML(title)
    gr.HTML("This model is awesome for its size! It is only 20th the size of Chatgpt but is around 90% as good as Chatgpt. However, please don't rely on WizardLM to provide 100% true information as it might be wrong sometimes. ")
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])
    #instruction = gr.Textbox(label="Instruction", placeholder=)

    def user(user_message, history):
        return gr.update(value="", interactive=True), history + [[user_message, None]]

    def bot(history):
        #instruction = history[-1][1] or ""
        user_message = history[-1][0]
        #token1 = llm.tokenize(b"### Instruction: ")
        #token2 = llm.tokenize(instruction.encode())
        #token3 = llm2.tokenize(b"USER: ")
        tokens3 = llm2.tokenize(user_message.encode())
        token4 = llm2.tokenize(b"\n\n### Response:")
        tokens = tokens3 + token4
        history[-1][1] = ""
        count = 0
        output = ""
        for token in llm2.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1):
            text = llm2.detokenize([token])
            output += text.decode()
            count += 1
            if count >= 500 or (token == llm2.token_eos()):
                break
            history[-1][1] += text.decode()
            yield history

    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
    gr.HTML("Thanks for checking out this app!")

demo.queue()
demo.launch(debug=True)