Mixtral-8x7B-Instruct-v0.1

Running

File size: 3,094 Bytes

9c9ed59
 
31e0a12
c3a6303
8329725
9c9ed59
1822503
 
 
 
 
 
 
508d7db
9c9ed59
0287f0d
9c9ed59
fd47081
61c5d7e
 
9e30ec0
61c5d7e
9e30ec0
61c5d7e
9c9ed59
 
 
 
e093f93
9c9ed59
 
 
 
0287f0d
9c9ed59
 
 
 
 
 
 
 
 
 
4b01506
 
 
45761fb
4b01506
 
 
 
 
9c9ed59
 
7119a57
9c9ed59
 
 
 
 
 
 
4b01506
 
 
 
 
9c9ed59
4b01506
9c9ed59
 
 
 
 
 
 
 
 
 
3cbb361
 
 
 
 
 
 
 
 
fd47081
822039c
fd47081
 
61c5d7e
fd47081
9c9ed59
 
 
e95e8e1
 
 
2891dae
e21f915
75d3abc
37eeddb

from huggingface_hub import InferenceClient
import gradio as gr
import torch

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(
    prompt, history, max_new_tokens, temperature, repetition_penalty, top_p, top_k, seed,
):

    if seed == 0:
        seed = random.randint(1, 100000)
        torch.manual_seed(seed)
    else:
        torch.manual_seed(seed)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        do_sample=True,
    )

    formatted_prompt = format_prompt(prompt, history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output


additional_inputs=[
    gr.Slider(
        label="Max new tokens",
        value=1000,
        minimum=100,
        maximum=32768,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens, controls how long is the output",
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens, making the AI repeat less itself",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Top-k",
        value=1,
        minimum=0,
        maximum=100,
        step=1,
        interactive=True,
        info="Higher k means more diverse outputs by considering a range of tokens",
    ),
    gr.Number(
        label="Seed",
        value=42,
        minimum=1,
        info="Use an integer starting point to initiate the generation process, put 0 for a random",
    ),
]


gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 8x7b Instruct v0.1",
    description="Chatbot Hugging Face space made by [Nick088](https://linktr.ee/Nick088) with costumizable options for model: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1<br>If you get an erorr, you putted a too much high Max_New_Tokens or your prompt is too long, shorten up one of these",
).launch(show_api=False, share=True)