Spaces:

zubairsamo
/

transformers_streaming

Sleeping

App Files Files Community

joaogante HF staff commited on Apr 4, 2023

Commit

fdb003a

•

1 Parent(s): d509568

revert to simpler textbox

Browse files

Files changed (1) hide show

app.py +36 -67

app.py CHANGED Viewed

@@ -14,28 +14,9 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_id, load_in_8bit=True, devic
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-def run_generation(user_text, top_p, temperature, top_k, max_new_tokens, use_history, history):
-    if history is None:
-        history = []
-    history.append([user_text, ""])
-    # Get the model and tokenizer, and tokenize the user text. If `use_history` is True, we use the chatbot history
-    if use_history:
-        user_name, assistant_name, sep = "User: ", "Assistant: ", "\n"
-        past = []
-        for data in history:
-            user_data, model_data = data
-            if not user_data.startswith(user_name):
-                user_data = user_name + user_data
-            if not model_data.startswith(sep + assistant_name):
-                model_data = sep + assistant_name + model_data
-            past.append(user_data + model_data.rstrip() + sep)
-            text_input = "".join(past)
-    else:
-        text_input = user_text
-    model_inputs = tokenizer([text_input], return_tensors="pt").to(torch_device)
     # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
     # in the main thread.
@@ -52,69 +33,57 @@ def run_generation(user_text, top_p, temperature, top_k, max_new_tokens, use_his
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
-    # Pull the generated text from the streamer, and update the chatbot.
     for new_text in streamer:
-        history[-1][1] += new_text
-        yield history
-    return history
 def reset_textbox():
     return gr.update(value='')
-with gr.Blocks(
-    css="""#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
-    #chatbot {height: 520px; overflow: auto;}"""
-) as demo:
-    with gr.Column(elem_id="col_container"):
-        duplicate_link = "https://huggingface.co/spaces/joaogante/chatbot_transformers_streaming?duplicate=true"
-        gr.Markdown(
-            "# 🤗 Transformers 🔥Streaming🔥 on Gradio\n"
-            "This demo showcases the use of the "
-            "[streaming feature](https://huggingface.co/docs/transformers/main/en/generation_strategies#streaming) "
-            "of 🤗 Transformers with Gradio to generate text in real-time, as a chatbot. It uses "
-            f"[{model_id}](https://huggingface.co/{model_id}), "
-            "loaded in 8-bit quantized form.\n\n"
-            f"Feel free to [duplicate this Space]({duplicate_link}) to try your own models or use this space as a "
-            "template! 💛"
-        )
-        chatbot = gr.Chatbot(elem_id='chatbot', label="Chat history")
-        user_text = gr.Textbox(
-            placeholder="Write an email about an alpaca that likes flan",
-            label="Type an input and press Enter"
-        )
-        with gr.Row():
             button_submit = gr.Button(value="Submit")
-            button_clear = gr.Button(value="Clear chat history")
-        with gr.Accordion("Generation Parameters", open=False):
-            use_history = gr.Checkbox(value=False, label="Use chat history as prompt")
             max_new_tokens = gr.Slider(
                 minimum=1, maximum=1000, value=250, step=1, interactive=True, label="Max New Tokens",
             )
             top_p = gr.Slider(
                 minimum=0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
             )
-            temperature = gr.Slider(
-                minimum=0, maximum=5.0, value=0.8, step=0.1, interactive=True, label="Temperature (set to 0 for Greedy Decoding)",
-            )
             top_k = gr.Slider(
                 minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
             )
-    user_text.submit(
-        run_generation,
-        [user_text, top_p, temperature, top_k, max_new_tokens, use_history, chatbot],
-        chatbot
-    )
-    button_submit.click(
-        run_generation,
-        [user_text, top_p, temperature, top_k, max_new_tokens, use_history, chatbot],
-        chatbot
-    )
-    button_clear.click(reset_textbox, [], [chatbot])
     demo.queue(max_size=32).launch(enable_queue=True)

 tokenizer = AutoTokenizer.from_pretrained(model_id)
+def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
+    # Get the model and tokenizer, and tokenize the user text.
+    model_inputs = tokenizer([user_text], return_tensors="pt").to(torch_device)
     # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
     # in the main thread.
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
+    # Pull the generated text from the streamer, and update the model output.
+    model_output = ""
     for new_text in streamer:
+        model_output += new_text
+        yield model_output
+    return model_output
 def reset_textbox():
     return gr.update(value='')
+with gr.Blocks() as demo:
+    duplicate_link = "https://huggingface.co/spaces/joaogante/transformers_streaming?duplicate=true"
+    gr.Markdown(
+        "# 🤗 Transformers 🔥Streaming🔥 on Gradio\n"
+        "This demo showcases the use of the "
+        "[streaming feature](https://huggingface.co/docs/transformers/main/en/generation_strategies#streaming) "
+        "of 🤗 Transformers with Gradio to generate text in real-time. It uses "
+        f"[{model_id}](https://huggingface.co/{model_id}), "
+        "loaded in 8-bit quantized form.\n\n"
+        f"Feel free to [duplicate this Space]({duplicate_link}) to try your own models or use this space as a "
+        "template! 💛"
+    )
+    with gr.Row():
+        with gr.Column(scale=4):
+            user_text = gr.Textbox(
+                placeholder="Write an email about an alpaca that likes flan",
+                label="User input"
+            )
+            model_output = gr.Textbox(
+                label="Model output", lines=10, read_only=True
+            )
             button_submit = gr.Button(value="Submit")
+        with gr.Column(scale=1):
             max_new_tokens = gr.Slider(
                 minimum=1, maximum=1000, value=250, step=1, interactive=True, label="Max New Tokens",
             )
             top_p = gr.Slider(
                 minimum=0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
             )
             top_k = gr.Slider(
                 minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
             )
+            temperature = gr.Slider(
+                minimum=0, maximum=5.0, value=0.8, step=0.1, interactive=True, label="Temperature (0 = Greedy Decoding)",
+            )
+    user_text.submit(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
+    button_submit.click(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
     demo.queue(max_size=32).launch(enable_queue=True)