File size: 3,028 Bytes
1bfc1de
 
 
 
 
 
 
 
cbfc805
a99204e
 
1bfc1de
 
 
a99204e
cbfc805
a99204e
 
 
cbfc805
 
a99204e
1bfc1de
a99204e
 
cca23d7
abae382
eafc83b
a99204e
cca23d7
e458505
 
 
5953377
a99204e
7b9985a
a99204e
1bfc1de
 
4d9f180
 
7b9985a
4d9f180
 
 
 
 
a322642
1bfc1de
 
 
 
c8b330d
 
 
 
1bfc1de
c8b330d
99ab7c8
1bfc1de
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from transformers import pipeline, AutoTokenizer

def load_model(model_name):
    return pipeline("text-generation", model=model_name, device="cpu")

def generate(
    model_name,
    system_input,
    user_initial_message,
    assistant_initial_message,
    user_input,
):
    pipe = load_model(model_name)
    
    message_template = [
        {"role": "system", "content": system_input},
        {"role": "user", "content": user_initial_message},
        {"role": "assistant", "content": assistant_initial_message},
        {"role": "user", "content": user_input},
    ]
    
    prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
    
    if model_name == "Felladrin/Pythia-31M-Chat-v1":
        outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=2, repetition_penalty=1.0016)
    elif model_name == "Felladrin/Llama-68M-Chat-v1":
        outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.043)
    elif model_name == "Felladrin/Smol-Llama-101M-Chat-v1":
        outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.105)
    elif model_name == "Felladrin/Llama-160M-Chat-v1":
        outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.01)
    elif model_name == "Felladrin/TinyMistral-248M-SFT-v4":
        outputs = pipe(prompt, max_length=1024, use_cache=True, penalty_alpha=0.5, top_k=5, repetition_penalty=1.001)
    else:
        outputs = pipe(prompt, max_length=1024, do_sample=True, temperature=0.7, top_k=35, top_p=0.5, repetition_penalty=1.176)
        
    return outputs[0]["generated_text"]

model_choices = [
    "Felladrin/Llama-160M-Chat-v1",
    "Felladrin/Minueza-32M-Chat",
    "Felladrin/Smol-Llama-101M-Chat-v1",
    "Felladrin/TinyMistral-248M-SFT-v4",
    "Felladrin/Pythia-31M-Chat-v1",
    "Felladrin/Llama-68M-Chat-v1"
]

g = gr.Interface(
    fn=generate,
    inputs=[
        gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
        gr.components.Textbox(lines=2, label="System Message", value="You are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user."),
        gr.components.Textbox(lines=2, label="User Initial Message", value="Hey! Got a question for you!"),
        gr.components.Textbox(lines=2, label="Assistant Initial Message", value="Sure! What's it?"),
        gr.components.Textbox(lines=2, label="User Message", value="Can you list some potential applications for quantum computing?"),
    ],
    outputs=[gr.Textbox(lines=24, label="Output")],
    title="A place to try out text-generation models fine-tuned by Felladrin",
    concurrency_limit=1
)

g.launch(max_threads=2)