Spaces:

ColeGuion
/

Mistral8x7B

Running

File size: 3,876 Bytes

958557f
 
 
4c80952
958557f
25b1a79
3fb1de6
25b1a79
3fb1de6
25b1a79
 
 
 
ca29043
25b1a79
 
 
 
2915268
c49d633
3fb1de6
c0cb522
 
 
b102992
c0cb522
 
3fb1de6
c0cb522
 
3fb1de6
c0cb522
 
 
 
 
716b126
bdc235c
 
9d59ab3
958557f
3fb1de6
958557f
3fb1de6
e53bb43
111aa32
9d59ab3
 
3fb1de6
5e37f93
 
7e14f2f
958557f
 
 
 
 
 
 
 
111aa32
958557f
3fb1de6
46bd600
 
 
9d59ab3
958557f
 
c49d633
 
aee8870
42f0387
958557f
 
 
 
 
c49d633
958557f

from huggingface_hub import InferenceClient
import gradio as gr

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

# Formats the prompt to hold all of the past messages
def format_prompt(message, history):
    prompt = "<s>"
    prompt_template = "[INST] {} [/INST]"

    # Iterates through every past user input and response to be added to the prompt
    for user_prompt, bot_response in history:
        prompt += prompt_template.format(user_prompt)
        prompt += f" {bot_response}</s> "
        
    prompt += prompt_template.format(message)
    return prompt


# Use for GEC, Doesn't track actual history
def format_prompt_grammar(message, history):
    prompt = "<s>"

    # String to add before every prompt
    prompt_prefix = "Correct any grammatical errors in the following sentence and provide the corrected version:\n\nSentence:"
    prompt_template = "[INST] " + prompt_prefix + ' {} [/INST]'

    myHistory = [["It is my friends house in England.", "It is my friend's house in England."], ["Every girl must bring their books to school.", "Every girl must bring her books to school."]]

    # Iterates through every past user input and response to be added to the prompt
    for user_prompt, bot_response in myHistory:
        prompt += prompt_template.format(user_prompt)
        prompt += f" {bot_response}</s> \n"

    prompt += prompt_template.format(message)
    return prompt



def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    temperature = float(temperature)
    if temperature < 1e-2: temperature = 1e-2
    top_p = float(top_p)
    
    generate_kwargs = dict(temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42,)
    
    #formatted_prompt = format_prompt_grammar(f"Corrected Sentence: {prompt}", history)
    formatted_prompt = format_prompt(f"{system_prompt} {prompt}", history)
    print("\nPROMPT: \n\t" + formatted_prompt)

    # Generate text from the HF inference
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output



additional_inputs=[
    gr.Textbox( label="System Prompt", value="" , max_lines=1, interactive=True, ),
    gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ),
    gr.Slider( label="Max new tokens", value=256, minimum=0, maximum=1048, step=64, interactive=True, info="The maximum numbers of new tokens", ),
    gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ),
    gr.Slider( label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens", )
]

examples=['Give me the grammatically correct version of the sentence: "We shood buy an car"', "Give me an example exam question testing students on square roots on basic integers", "Would this block of HTML code run?\n```\n\n```"]
#examples += ["I have been to New York last summer.", "We shood buy an car.", "People is coming to my party.", "She is more taller.", "Their were lot of sheeps.", "I want to speak English good.", "I must to buy a new cartoon book."]
examples = [[x, None, None, None, None, None] for x in examples]


gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="My Mistral Space",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)