Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,19 +9,18 @@ def generate(
|
|
9 |
template_name,
|
10 |
user_input,
|
11 |
temperature=0.4,
|
12 |
-
top_p=0.
|
13 |
-
top_k=
|
14 |
max_new_tokens=256,
|
15 |
):
|
16 |
pipe = load_model(model_name)
|
17 |
-
|
18 |
-
if template_name == "Falcon 1B Template":
|
19 |
message_template = [
|
20 |
{"role": "user", "content": "Hello!"},
|
21 |
-
{"role": "assistant", "content": "
|
22 |
{"role": "user", "content": user_input},
|
23 |
]
|
24 |
-
else:
|
25 |
message_template = [
|
26 |
{
|
27 |
"role": "system",
|
@@ -30,15 +29,14 @@ def generate(
|
|
30 |
{"role": "user", "content": user_input},
|
31 |
]
|
32 |
|
33 |
-
# Set tokenize correctly. Otherwise ticking the box breaks it.
|
34 |
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
|
35 |
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
|
36 |
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
|
37 |
return outputs[0]["generated_text"]
|
38 |
|
39 |
-
model_choices = ["
|
40 |
-
template_choices = ["
|
41 |
-
|
42 |
g = gr.Interface(
|
43 |
fn=generate,
|
44 |
inputs=[
|
@@ -46,13 +44,13 @@ g = gr.Interface(
|
|
46 |
gr.components.Dropdown(choices=template_choices, label="Template", value=template_choices[0], interactive=True),
|
47 |
gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"),
|
48 |
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
|
49 |
-
gr.components.Slider(minimum=0, maximum=1, value=0.
|
50 |
-
gr.components.Slider(minimum=0, maximum=100, step=1, value=
|
51 |
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
|
52 |
],
|
53 |
outputs=[gr.Textbox(lines=10, label="Output")],
|
54 |
-
title="
|
55 |
-
description="
|
56 |
concurrency_limit=1
|
57 |
)
|
58 |
|
|
|
9 |
template_name,
|
10 |
user_input,
|
11 |
temperature=0.4,
|
12 |
+
top_p=0.25,
|
13 |
+
top_k=7,
|
14 |
max_new_tokens=256,
|
15 |
):
|
16 |
pipe = load_model(model_name)
|
17 |
+
if template_name == "User-Assistant":
|
|
|
18 |
message_template = [
|
19 |
{"role": "user", "content": "Hello!"},
|
20 |
+
{"role": "assistant", "content": "Hi! How can I assist you today?"},
|
21 |
{"role": "user", "content": user_input},
|
22 |
]
|
23 |
+
else:
|
24 |
message_template = [
|
25 |
{
|
26 |
"role": "system",
|
|
|
29 |
{"role": "user", "content": user_input},
|
30 |
]
|
31 |
|
|
|
32 |
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
|
33 |
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
|
34 |
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
|
35 |
return outputs[0]["generated_text"]
|
36 |
|
37 |
+
model_choices = ["Felladrin/Pythia-31M-Chat-v1", "Felladrin/Llama-160M-Chat-v1", "Felladrin/Smol-Llama-101M-Chat-v1", "Felladrin/TinyMistral-248M-SFT-v4"]
|
38 |
+
template_choices = ["System-User-Assistant", "User-Assistant"]
|
39 |
+
|
40 |
g = gr.Interface(
|
41 |
fn=generate,
|
42 |
inputs=[
|
|
|
44 |
gr.components.Dropdown(choices=template_choices, label="Template", value=template_choices[0], interactive=True),
|
45 |
gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"),
|
46 |
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
|
47 |
+
gr.components.Slider(minimum=0, maximum=1, value=0.25, label="Top p"),
|
48 |
+
gr.components.Slider(minimum=0, maximum=100, step=1, value=7, label="Top k"),
|
49 |
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
|
50 |
],
|
51 |
outputs=[gr.Textbox(lines=10, label="Output")],
|
52 |
+
title="Chat with Felladrin's LLMs",
|
53 |
+
description="Note that the inference happens on free-tier hardware, which may lead to slower outputs during periods of high demand.",
|
54 |
concurrency_limit=1
|
55 |
)
|
56 |
|