Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -11,12 +11,10 @@ def generate(
|
|
11 |
temperature=0.4,
|
12 |
top_p=0.25,
|
13 |
top_k=7,
|
14 |
-
max_new_tokens=256,
|
15 |
repetition_penalty=1.0,
|
|
|
16 |
):
|
17 |
pipe = load_model(model_name)
|
18 |
-
if model_name == "Felladrin/Pythia-31M-Chat-v1":
|
19 |
-
repetition_penalty=1.0016
|
20 |
message_template = [
|
21 |
{
|
22 |
"role": "system",
|
@@ -42,12 +40,11 @@ g = gr.Interface(
|
|
42 |
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
|
43 |
gr.components.Slider(minimum=0, maximum=1, value=0.25, label="Top p"),
|
44 |
gr.components.Slider(minimum=0, maximum=100, step=1, value=7, label="Top k"),
|
|
|
45 |
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
|
46 |
-
gr.components.Slider(minimum=1.0, maximum=2.0, step=0.001, value=1.0, label="Repetition Penalty"),
|
47 |
],
|
48 |
outputs=[gr.Textbox(lines=10, label="Output")],
|
49 |
title="Chat with models fine-tuned by Felladrin",
|
50 |
-
description="Note that the inference runs on CPU only, which may lead to slower outputs during periods of high demand.",
|
51 |
concurrency_limit=1
|
52 |
)
|
53 |
|
|
|
11 |
temperature=0.4,
|
12 |
top_p=0.25,
|
13 |
top_k=7,
|
|
|
14 |
repetition_penalty=1.0,
|
15 |
+
max_new_tokens=256,
|
16 |
):
|
17 |
pipe = load_model(model_name)
|
|
|
|
|
18 |
message_template = [
|
19 |
{
|
20 |
"role": "system",
|
|
|
40 |
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
|
41 |
gr.components.Slider(minimum=0, maximum=1, value=0.25, label="Top p"),
|
42 |
gr.components.Slider(minimum=0, maximum=100, step=1, value=7, label="Top k"),
|
43 |
+
gr.components.Slider(minimum=1.0, maximum=1.5, step=0.001, value=1.0016, label="Repetition Penalty"),
|
44 |
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
|
|
|
45 |
],
|
46 |
outputs=[gr.Textbox(lines=10, label="Output")],
|
47 |
title="Chat with models fine-tuned by Felladrin",
|
|
|
48 |
concurrency_limit=1
|
49 |
)
|
50 |
|