Spaces:

zubairsamo
/

transformers_streaming

Sleeping

App Files Files Community

joaogante HF staff commited on Apr 4, 2023

Commit

8445393

•

1 Parent(s): 5c81752

Updated chatbot

Browse files

Files changed (2) hide show

app.py +89 -34
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,46 +1,101 @@
-import gradio as gr
 from threading import Thread
-from transformers import AutoModelForCausalLM, AutoTokenizer, IteratorStreamer
-# Global variable loading
-tokenizer = AutoTokenizer.from_pretrained("gpt2")
-print("Loading the model...")
-model = AutoModelForCausalLM.from_pretrained("gpt2")
-print("Done!")
-# Gradio app
-with gr.Blocks() as demo:
-    def user(user_message, history):
-        return "", history + [[user_message, None]]
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox()
-    clear = gr.Button("Clear")
-    def update_chatbot(history):
-        user_query = history[-1][0]
-        history[-1][1] = ""
-        model_inputs = tokenizer([user_query], return_tensors="pt")
-        # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
-        # in the main thread.
-        streamer = IteratorStreamer(tokenizer)
-        generate_kwargs = dict(model_inputs, streamer=streamer, max_new_tokens=200, do_sample=True)
-        t = Thread(target=model.generate, kwargs=generate_kwargs)
-        t.start()
-        # Pull the generated text from the streamer, and update the chatbot.
-        for new_text in streamer:
-            history[-1][1] += new_text
-            yield history
-        return history
-    msg.submit(user, [msg, chatbot], [msg, chatbot]).then(
-        update_chatbot, chatbot, chatbot
     )
-    clear.click(lambda: None, None, chatbot)
-demo.queue()
-demo.launch()

 from threading import Thread
+from functools import lru_cache
+import gradio as gr
+from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer
+@lru_cache(maxsize=1)  # only cache the latest model
+def get_model_and_tokenizer(model_id):
+    config = AutoConfig.from_pretrained(model_id)
+    if config.is_encoder_decoder:
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+    else:
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    return model, tokenizer
+def run_generation(model_id, user_text, top_p, temperature, top_k, chat_counter, max_new_tokens, history):
+    if history is None:
+        history = []
+    history.append[[user_text, ""]]
+    # Get the model and tokenizer, and tokenize the user text.
+    model, tokenizer = get_model_and_tokenizer(model_id)
+    model_inputs = tokenizer([user_text], return_tensors="pt")
+    # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
+    # in the main thread.
+    streamer = TextIteratorStreamer(tokenizer)
+    generate_kwargs = dict(
+        model_inputs,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        temperature=temperature,
+        top_k=top_k
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    # Pull the generated text from the streamer, and update the chatbot.
+    for new_text in streamer:
+        history[-1][1] += new_text
+        yield history
+    return history
+def reset_textbox():
+    return gr.update(value='')
+title = """<h1 align="center">🔥Transformers + Gradio 🚀Streaming🚀</h1>"""
+with gr.Blocks(
+    css="""#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
+    #chatbot {height: 520px; overflow: auto;}"""
+) as demo:
+    gr.HTML(title)
+    demo_link = "https://huggingface.co/spaces/joaogante/chatbot_transformers_streaming"
+    img_src = "https://bit.ly/3gLdBN6"
+    button_desc = "Duplicate the Space to bypass queues, add hardware resources, or to use this demo as a template!"
+    gr.HTML(f'''<center><a href="{demo_link}?duplicate=true"><img src="{img_src}" alt="Duplicate Space"></a>{button_desc}</center>''')
+    with gr.Column(elem_id="col_container"):
+        model_id = gr.Textbox(value='EleutherAI/pythia-410m', label="🤗 Hub Model repo")
+        chatbot = gr.Chatbot(elem_id='chatbot')
+        user_text = gr.Textbox(placeholder="Is pineapple a pizza topping?", label="Type an input and press Enter")
+        button = gr.Button()
+        with gr.Accordion("Parameters", open=False):
+            top_p = gr.Slider(
+                minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
+            )
+            temperature = gr.Slider(
+                minimum=0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature (set to 0 for Greedy Decoding)",
+            )
+            top_k = gr.Slider(
+                minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
+            )
+            max_new_tokens = gr.Slider(
+                minimum=1, maximum=1000, value=100, step=1, interactive=True, label="Max New Tokens",
+            )
+    user_text.submit(
+        run_generation,
+        [model_id, user_text, top_p, temperature, top_k, max_new_tokens, chatbot, chatbot],
+        [chatbot, chatbot]
+    )
+    button.click(
+        run_generation,
+        [model_id, user_text, top_p, temperature, top_k, max_new_tokens, chatbot, chatbot],
+        [chatbot, chatbot]
     )
+    button.click(reset_textbox, [], [user_text])
+    user_text.submit(reset_textbox, [], [user_text])
+    demo.queue().launch()

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	torch
2	- git+https://github.com/~~gante~~/transformers.git~~@streamer_iterator~~ # transformers from ~~dev~~ ~~branch~~


1	torch
2	+ git+https://github.com/huggingface/transformers.git # transformers from main (TextIteratorStreamer will be added in v4.28)