Spaces:
Running
Running
File size: 3,028 Bytes
1bfc1de cbfc805 a99204e 1bfc1de a99204e cbfc805 a99204e cbfc805 a99204e 1bfc1de a99204e cca23d7 abae382 eafc83b a99204e cca23d7 e458505 5953377 a99204e 7b9985a a99204e 1bfc1de 4d9f180 7b9985a 4d9f180 a322642 1bfc1de c8b330d 1bfc1de c8b330d 99ab7c8 1bfc1de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from transformers import pipeline, AutoTokenizer
def load_model(model_name):
return pipeline("text-generation", model=model_name, device="cpu")
def generate(
model_name,
system_input,
user_initial_message,
assistant_initial_message,
user_input,
):
pipe = load_model(model_name)
message_template = [
{"role": "system", "content": system_input},
{"role": "user", "content": user_initial_message},
{"role": "assistant", "content": assistant_initial_message},
{"role": "user", "content": user_input},
]
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
if model_name == "Felladrin/Pythia-31M-Chat-v1":
outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=2, repetition_penalty=1.0016)
elif model_name == "Felladrin/Llama-68M-Chat-v1":
outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.043)
elif model_name == "Felladrin/Smol-Llama-101M-Chat-v1":
outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.105)
elif model_name == "Felladrin/Llama-160M-Chat-v1":
outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.01)
elif model_name == "Felladrin/TinyMistral-248M-SFT-v4":
outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=5, repetition_penalty=1.001)
else:
outputs = pipe(prompt, max_length=1024, do_sample=True, temperature=0.7, top_k=35, top_p=0.5, repetition_penalty=1.176)
return outputs[0]["generated_text"]
model_choices = [
"Felladrin/Llama-160M-Chat-v1",
"Felladrin/Minueza-32M-Chat",
"Felladrin/Smol-Llama-101M-Chat-v1",
"Felladrin/TinyMistral-248M-SFT-v4",
"Felladrin/Pythia-31M-Chat-v1",
"Felladrin/Llama-68M-Chat-v1"
]
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="System Message", value="You are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user."),
gr.components.Textbox(lines=2, label="User Initial Message", value="Hey! Got a question for you!"),
gr.components.Textbox(lines=2, label="Assistant Initial Message", value="Sure! What's it?"),
gr.components.Textbox(lines=2, label="User Message", value="Can you list some potential applications for quantum computing?"),
],
outputs=[gr.Textbox(lines=24, label="Output")],
title="A place to try out text-generation models fine-tuned by Felladrin",
concurrency_limit=1
)
g.launch(max_threads=2)
|