gemma-2-9b-it

Runtime error

App Files Files Community

ehristoforu commited on Jul 21

Commit

c2dd1cb

•

1 Parent(s): bd77505

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -63

app.py CHANGED Viewed

@@ -39,10 +39,10 @@ model.config.sliding_window = 4096
 model.eval()
-@spaces.GPU(duration=50)
 def generate(
     message: str,
-    system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
@@ -50,7 +50,13 @@ def generate(
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
-    conversation.append({"role": "system", "content": system_prompt})
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
@@ -79,68 +85,54 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
-message = gr.Textbox(
-    label="Message",
-    max_lines=5,
-    lines=2,
-    interactive=True,
-)
-system_prompt = gr.Textbox(
-    label="System prompt",
-    max_lines=5,
-    lines=2,
-    interactive=True,
-)
-max_tokens = gr.Slider(
-    label="Max new tokens",
-    minimum=1,
-    maximum=MAX_MAX_NEW_TOKENS,
-    step=1,
-        value=DEFAULT_MAX_NEW_TOKENS,
-)
-temperature = gr.Slider(
-    label="Temperature",
-    minimum=0.1,
-    maximum=4.0,
-    step=0.1,
-    value=0.6,
-)
-top_p = gr.Slider(
-    label="Top-p (nucleus sampling)",
-    minimum=0.05,
-    maximum=1.0,
-    step=0.05,
-    value=0.9,
-)
-top_k = gr.Slider(
-    label="Top-k",
-    minimum=1,
-    maximum=1000,
-    step=1,
-    value=50,
-)
-repeat_penalty = gr.Slider(
-    label="Repetition penalty",
-    minimum=1.0,
-    maximum=2.0,
-    step=0.05,
-    value=1.2,
-)
-output = gr.Textbox(
-    label="Output",
-    max_lines=16,
-    lines=10,
-    interactive=True,
-)
-chat_interface = gr.Interface(
     fn=generate,
-    inputs=[message, system_prompt, max_tokens, temperature, top_p, top_k, repeat_penalty],
-    outputs=output,
-    api_name="/run",
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo:

 model.eval()
+@spaces.GPU(duration=90)
 def generate(
     message: str,
+    chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
+    for user, assistant in chat_history:
+        conversation.extend(
+            [
+                {"role": "user", "content": user},
+                {"role": "assistant", "content": assistant},
+            ]
+        )
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         outputs.append(text)
         yield "".join(outputs)
+chat_interface = gr.ChatInterface(
     fn=generate,
+    additional_inputs=[
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.6,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.2,
+        ),
+    ],
+    stop_btn=None,
+    examples=[
+        ["Hello there! How are you doing?"],
+        ["Can you explain briefly to me what is the Python programming language?"],
+        ["Explain the plot of Cinderella in a sentence."],
+        ["How many hours does it take a man to eat a Helicopter?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
+    ],
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo: