Spaces:

zubairsamo
/

transformers_streaming

Sleeping

App Files Files Community

joaogante HF staff commited on Apr 4, 2023

Commit

2d2dd9a

•

1 Parent(s): 8445393

visual tweaks

Browse files

Files changed (2) hide show

.gitignore +169 -0
app.py +30 -28

.gitignore ADDED Viewed

	@@ -0,0 +1,169 @@

+# Initially taken from Github's Python gitignore file
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# tests and logs
+tests/fixtures/cached_*_text.txt
+logs/
+lightning_logs/
+lang_code_data/
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# vscode
+.vs
+.vscode
+# Pycharm
+.idea
+# TF code
+tensorflow_code
+# Models
+proc_data
+# examples
+runs
+/runs_old
+/wandb
+/examples/runs
+/examples/**/*.args
+/examples/rag/sweep
+# data
+/data
+serialization_dir
+# emacs
+*.*~
+debug.env
+# vim
+.*.swp
+#ctags
+tags
+# pre-commit
+.pre-commit*
+# .lock
+*.lock
+# DS_Store (MacOS)
+.DS_Store
+# ruff
+.ruff_cache

app.py CHANGED Viewed

@@ -1,10 +1,14 @@
 from threading import Thread
 from functools import lru_cache
 import gradio as gr
 from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer
 @lru_cache(maxsize=1)  # only cache the latest model
 def get_model_and_tokenizer(model_id):
     config = AutoConfig.from_pretrained(model_id)
@@ -14,21 +18,22 @@ def get_model_and_tokenizer(model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     return model, tokenizer
-def run_generation(model_id, user_text, top_p, temperature, top_k, chat_counter, max_new_tokens, history):
     if history is None:
         history = []
-    history.append[[user_text, ""]]
     # Get the model and tokenizer, and tokenize the user text.
     model, tokenizer = get_model_and_tokenizer(model_id)
-    model_inputs = tokenizer([user_text], return_tensors="pt")
     # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
     # in the main thread.
-    streamer = TextIteratorStreamer(tokenizer)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
@@ -52,26 +57,32 @@ def reset_textbox():
     return gr.update(value='')
-title = """<h1 align="center">🔥Transformers + Gradio 🚀Streaming🚀</h1>"""
 with gr.Blocks(
     css="""#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
     #chatbot {height: 520px; overflow: auto;}"""
 ) as demo:
-    gr.HTML(title)
-    demo_link = "https://huggingface.co/spaces/joaogante/chatbot_transformers_streaming"
-    img_src = "https://bit.ly/3gLdBN6"
-    button_desc = "Duplicate the Space to bypass queues, add hardware resources, or to use this demo as a template!"
-    gr.HTML(f'''<center><a href="{demo_link}?duplicate=true"><img src="{img_src}" alt="Duplicate Space"></a>{button_desc}</center>''')
     with gr.Column(elem_id="col_container"):
         model_id = gr.Textbox(value='EleutherAI/pythia-410m', label="🤗 Hub Model repo")
-        chatbot = gr.Chatbot(elem_id='chatbot')
         user_text = gr.Textbox(placeholder="Is pineapple a pizza topping?", label="Type an input and press Enter")
-        button = gr.Button()
-        with gr.Accordion("Parameters", open=False):
             top_p = gr.Slider(
                 minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
             )
@@ -81,21 +92,12 @@ with gr.Blocks(
             top_k = gr.Slider(
                 minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
             )
-            max_new_tokens = gr.Slider(
-                minimum=1, maximum=1000, value=100, step=1, interactive=True, label="Max New Tokens",
-            )
     user_text.submit(
         run_generation,
-        [model_id, user_text, top_p, temperature, top_k, max_new_tokens, chatbot, chatbot],
-        [chatbot, chatbot]
-    )
-    button.click(
-        run_generation,
-        [model_id, user_text, top_p, temperature, top_k, max_new_tokens, chatbot, chatbot],
-        [chatbot, chatbot]
     )
     button.click(reset_textbox, [], [user_text])
-    user_text.submit(reset_textbox, [], [user_text])
-    demo.queue().launch()

 from threading import Thread
 from functools import lru_cache
+import torch
 import gradio as gr
 from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 @lru_cache(maxsize=1)  # only cache the latest model
 def get_model_and_tokenizer(model_id):
     config = AutoConfig.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = model.to(torch_device)
     return model, tokenizer
+def run_generation(model_id, user_text, top_p, temperature, top_k, max_new_tokens, history):
     if history is None:
         history = []
+    history.append([user_text, ""])
     # Get the model and tokenizer, and tokenize the user text.
     model, tokenizer = get_model_and_tokenizer(model_id)
+    model_inputs = tokenizer([user_text], return_tensors="pt").to(torch_device)
     # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
     # in the main thread.
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
     return gr.update(value='')
 with gr.Blocks(
     css="""#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
     #chatbot {height: 520px; overflow: auto;}"""
 ) as demo:
     with gr.Column(elem_id="col_container"):
+        demo_link = "https://huggingface.co/spaces/joaogante/chatbot_transformers_streaming"
+        gr.Markdown(
+            f"""
+            # 🤗 Transformers Gradio 🔥Streaming🔥
+            This demo showcases how to use the streaming feature of 🤗 Transformers with Gradio to generate text in real-time.
+            ⚠️ [Duplicate this Space]({demo_link}) if ⚠️
+            - You want to use a large model (> 1GB). Otherwise, this public space will become slow for others 💛
+            - You want to build your own app, using this demo as a template 🚀
+            - You want to bypass the queue and/or add hardware resources 👾
+            """
+        )
         model_id = gr.Textbox(value='EleutherAI/pythia-410m', label="🤗 Hub Model repo")
+        chatbot = gr.Chatbot(elem_id='chatbot', label="Message history")
         user_text = gr.Textbox(placeholder="Is pineapple a pizza topping?", label="Type an input and press Enter")
+        button = gr.Button(value="Clear message history")
+        with gr.Accordion("Generation Parameters", open=False):
+            max_new_tokens = gr.Slider(
+                minimum=1, maximum=1000, value=100, step=1, interactive=True, label="Max New Tokens",
+            )
             top_p = gr.Slider(
                 minimum=0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
             )
             top_k = gr.Slider(
                 minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
             )
     user_text.submit(
         run_generation,
+        [model_id, user_text, top_p, temperature, top_k, max_new_tokens, chatbot],
+        chatbot
     )
     button.click(reset_textbox, [], [user_text])
+    demo.queue(max_size=32).launch()