File size: 4,447 Bytes
700d53e 6bce6c7 700d53e 168547c 700d53e 7a451c4 700d53e 7a451c4 700d53e 7a451c4 700d53e 7a451c4 700d53e 7a451c4 700d53e 7a451c4 700d53e 7a451c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from huggingface_hub import Repository
import gradio as gr
import os
from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from transformers import AutoTokenizer
import torch
# Load the model from your Hugging Face account
model_name = "adi1193/mistral-postv6"
repository = Repository(model_name, clone_from="adi1193/mistral-postv6")
# Load the model
model_path = repository.local_dir
model = AutoPeftModelForCausalLM.from_pretrained(
model_path,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
device_map="cpu")
# model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
generation_config = GenerationConfig(
do_sample=True,
top_k=1,
temperature=0.1,
max_new_tokens=100,
pad_token_id=tokenizer.eos_token_id
)
def format_prompt(message):
input_str = "###Human: " + message + "###Assistant: "
inputs = tokenizer(input_str, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, generation_config=generation_config)
return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(input_str, '')
# def format_prompt(message, history, enable_hinglish=False):
# prompt = "<s>"
# # Adding the Hinglish prompt
# if enable_hinglish and not any("[INST] You are a Hinglish LLM." in user_prompt for user_prompt, bot_response in history):
# prompt += Hinglish_Prompt
# for user_prompt, bot_response in history:
# prompt += f"[INST] {user_prompt} [/INST]"
# prompt += f" {bot_response} "
# prompt += f"[INST] {message} [/INST]"
# return prompt
# def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, enable_hinglish=False):
# temperature = float(temperature)
# if temperature < 1e-2:
# temperature = 1e-2
# top_p = float(top_p)
# generate_kwargs = {
# "model": model,
# "tokenizer": tokenizer,
# "max_length": max_new_tokens + len(tokenizer.encode(prompt)),
# "temperature": temperature,
# "top_p": top_p,
# "repetition_penalty": repetition_penalty,
# "do_sample": True,
# "seed": 42,
# }
# formatted_prompt = format_prompt(prompt, history, enable_hinglish)
# input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt")
# output = model.generate(input_ids, **generate_kwargs)
# return tokenizer.decode(output[0], skip_special_tokens=True)
additional_inputs=[
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=256,
minimum=0,
maximum=1048,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
),
gr.Checkbox(
label="Hinglish",
value=False,
interactive=True,
info="Enables the MistralTalk to talk in Hinglish (Combination of Hindi and English)",
)
]
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("<h1><center>MistralTalk🗣️<h1><center>")
gr.HTML("<h3><center>In this demo, you can chat with <a href='https://huggingface.co/adi1193/mistral-postv6'>Mistral-8x7B</a> model. 💬<h3><center>")
gr.HTML("<h3><center>Learn more about the model <a href='https://huggingface.co/docs/transformers/main/model_doc/mistral'>here</a>. 📚<h3><center>")
gr.ChatInterface(
format_prompt,
additional_inputs=additional_inputs,
theme = gr.themes.Soft(),
examples=[["What is the interest?"], ["How does the universe work?"],["What can you do?"],["What is quantum mechanics?"],["Do you believe in an after life?"]]
)
if __name__ == "__main__":
demo.launch()
|