ModelsPlayground

Running

Felladrin commited on Jan 7, 2024

Commit

2590ebf

1 Parent(s): cbfc805

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,12 +11,10 @@ def generate(
     temperature=0.4,
     top_p=0.25,
     top_k=7,
-    max_new_tokens=256,
     repetition_penalty=1.0,
 ):
     pipe = load_model(model_name)
-    if model_name == "Felladrin/Pythia-31M-Chat-v1":
-        repetition_penalty=1.0016
     message_template = [
         {
             "role": "system",
@@ -42,12 +40,11 @@ g = gr.Interface(
         gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
         gr.components.Slider(minimum=0, maximum=1, value=0.25, label="Top p"),
         gr.components.Slider(minimum=0, maximum=100, step=1, value=7, label="Top k"),
         gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
-        gr.components.Slider(minimum=1.0, maximum=2.0, step=0.001, value=1.0, label="Repetition Penalty"),
     ],
     outputs=[gr.Textbox(lines=10, label="Output")],
     title="Chat with models fine-tuned by Felladrin",
-    description="Note that the inference runs on CPU only, which may lead to slower outputs during periods of high demand.",
     concurrency_limit=1
 )

     temperature=0.4,
     top_p=0.25,
     top_k=7,
     repetition_penalty=1.0,
+    max_new_tokens=256,
 ):
     pipe = load_model(model_name)
     message_template = [
         {
             "role": "system",
         gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
         gr.components.Slider(minimum=0, maximum=1, value=0.25, label="Top p"),
         gr.components.Slider(minimum=0, maximum=100, step=1, value=7, label="Top k"),
+        gr.components.Slider(minimum=1.0, maximum=1.5, step=0.001, value=1.0016, label="Repetition Penalty"),
         gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
     ],
     outputs=[gr.Textbox(lines=10, label="Output")],
     title="Chat with models fine-tuned by Felladrin",
     concurrency_limit=1
 )