Mistral-Nemo

Running on Zero

vilarin commited on 17 days ago

Commit

4ed884e

•

1 Parent(s): 10efa15

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,14 +43,23 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = model.eval()
 @spaces.GPU()
-def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     for resp, history in model.stream_chat(
         tokenizer,
         query = message,
         history = history,
         max_new_tokens = max_new_tokens,
-        do_sample = True if temperature == 0 else False,
         top_p = top_p,
         top_k = top_k,
         temperature = temperature,
@@ -80,7 +89,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             ),
             gr.Slider(
                 minimum=128,
-                maximum=2048,
                 step=1,
                 value=1024,
                 label="Max New Tokens",
@@ -90,7 +99,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 minimum=0.0,
                 maximum=1.0,
                 step=0.1,
-                value=0.8,
                 label="top_p",
                 render=False,
             ),
@@ -106,7 +115,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 minimum=0.0,
                 maximum=2.0,
                 step=0.1,
-                value=1.0,
                 label="Repetition penalty",
                 render=False,
             ),

 model = model.eval()
 @spaces.GPU()
+def stream_chat(
+    message: str,
+    history: list,
+    temperature: float = 0.8,
+    max_new_tokens: int = 1024,
+    top_p: float = 1.0,
+    top_k: int = 20,
+    penalty: float = 1.2
+):
+    print(f'message: {message}')
+    print(f'history: {history}')
     for resp, history in model.stream_chat(
         tokenizer,
         query = message,
         history = history,
         max_new_tokens = max_new_tokens,
+        do_sample = False if temperature == 0 else True,
         top_p = top_p,
         top_k = top_k,
         temperature = temperature,
             ),
             gr.Slider(
                 minimum=128,
+                maximum=8192,
                 step=1,
                 value=1024,
                 label="Max New Tokens",
                 minimum=0.0,
                 maximum=1.0,
                 step=0.1,
+                value=1.0,
                 label="top_p",
                 render=False,
             ),
                 minimum=0.0,
                 maximum=2.0,
                 step=0.1,
+                value=1.2,
                 label="Repetition penalty",
                 render=False,
             ),