Spaces:

VinayHajare
/

MistralTalk

Running

File size: 4,213 Bytes

6309c6b
 
7fdf629
 
0ea985e
 
a09c96f
0ea985e
 
7fdf629
0b644a6
6309c6b
0ea985e
 
6309c6b
 
 
0ea985e
a09c96f
0ea985e
6309c6b
 
0b644a6
6309c6b
0b644a6
 
 
17ab0da
6309c6b
 
 
 
 
 
a09c96f
 
6309c6b
 
a09c96f
6309c6b
a09c96f
6309c6b
 
 
 
 
 
 
 
a09c96f
 
 
0b644a6
 
395cd13
6309c6b
 
 
 
 
 
 
a09c96f
 
 
 
 
 
 
 
 
 
 
 
 
6309c6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b644a6
a09c96f
6309c6b
 
 
 
 
 
 
 
 
 
 
410864e
2a96b36
6309c6b
 
 
 
daf470d
a09c96f
6309c6b
 
0d7ab1b

from huggingface_hub import InferenceClient
import gradio as gr
import os

API_URL = {
    "Mistral" : "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
    "Codestral" : "mistralai/Codestral-22B-v0.1"
}

HF_TOKEN = os.environ['HF_TOKEN']
Hinglish_Prompt = os.environ['Hinglish_Prompt']

mistralClient = InferenceClient(
    API_URL["Mistral"],
    headers = {"Authorization" : f"Bearer {HF_TOKEN}"},
)

codestralClient = InferenceClient(
    model = API_URL["Codestral"],
    headers = {"Authorization" : f"Bearer {HF_TOKEN}"},
)

def format_prompt(message, history, enable_hinglish=False):
  prompt = "<s>"
  # Adding the Hinglish prompt
  if enable_hinglish and not any("[INST] You are a Hinglish LLM." in user_prompt for user_prompt, bot_response in history):
      prompt += Hinglish_Prompt
      
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(prompt, history, model = "Mistral", enable_hinglish=False, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    temperature = float(temperature) # Generation arguments
    if temperature < 1e-2:
        temperature = 1e-2
        
    top_p = float(top_p)
    
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    # Selecting model to be used
    client = mistralClient if(model == "Mistral") else codestralClient
    
    formatted_prompt = format_prompt(prompt, history, enable_hinglish)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    return output

additional_inputs=[
    gr.Dropdown(
        choices = ["Mistral","Codestral"],
        value = "Mistral",
        label = "Model to be used",
        interactive=True,
        info = "Mistral for general-purpose chatting and codestral for code related task (Supports 80+ languages)"
    ),
    gr.Checkbox(
        label="Hinglish",
        value=False,
        interactive=True,
        info="Enables the MistralTalk to talk in Hinglish (Combination of Hindi and English)",
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=256,
        minimum=0,
        maximum=1048,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    ),
    
]

css = """
  #mkd {
    height: 500px;
    overflow: auto;
    border: 1px solid #ccc;
  }
"""

with gr.Blocks(css=css) as demo:
    gr.HTML("<h1><center>MistralTalk🗣️<h1><center>")
    gr.HTML("<h3><center>In this demo, you can chat with <a href='https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1'>Mixtral-8x7B</a> model. 💬<h3><center>")
    gr.HTML("<h3><center>Learn more about the model <a href='https://huggingface.co/docs/transformers/main/model_doc/mistral'>here</a>. 📚<h3><center>")
    gr.ChatInterface(
        generate,
        additional_inputs=additional_inputs,
        theme = gr.themes.Soft(),
        examples=[["What is the secret to life?"], ["How the universe works?"],["What can you do?"],["What is quantum mechanics?"],["Do you belive in after life?"], ["Java function to check if URL is valid or not."]]
    )

demo.queue(max_size=100).launch(debug=True)