Spaces:

fxmarty
/

bettertransformer-demo

Running

Felix Marty commited on Nov 21, 2022

Commit

590064e

1 Parent(s): b23ba47

add timeout

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,15 +1,29 @@
 import gradio as gr
-from backend import (get_message_single, get_message_spam, send_single,
-                     send_spam)
-from defaults import (ADDRESS_BETTERTRANSFORMER, ADDRESS_VANILLA,
-                      defaults_bt_single, defaults_bt_spam,
-                      defaults_vanilla_single, defaults_vanilla_spam)
 with gr.Blocks() as demo:
     gr.Markdown(
         """
-    Let's try out TorchServe + BetterTransformer! This is some longer description This is some longer description This is some longer description")
     ## Inference using...
     """

 import gradio as gr
+from backend import get_message_single, get_message_spam, send_single, send_spam
+from defaults import (
+    ADDRESS_BETTERTRANSFORMER,
+    ADDRESS_VANILLA,
+    defaults_bt_single,
+    defaults_bt_spam,
+    defaults_vanilla_single,
+    defaults_vanilla_spam,
+)
 with gr.Blocks() as demo:
     gr.Markdown(
         """
+    Let's try out TorchServe + BetterTransformer!
+    BetterTransformer is a feature made available with PyTorch 1.13. allowing to use a fastpath execution for encoder attention blocks.
+    As a one-liner, you can use BetterTransformer with compatible Transformers models:
+    ```
+    better_model = BetterTransformer.transform(model)
+    ```
+    This Space is a demo of an **end-to-end** deployement of PyTorch eager-mode models, both with and without BetterTransformer. The goal is to see what are the benefits server-side and client-side of using BetterTransformer.
     ## Inference using...
     """

backend.py CHANGED Viewed

@@ -2,8 +2,12 @@ import json
 from datasets import load_dataset
-from defaults import (ADDRESS_BETTERTRANSFORMER, ADDRESS_VANILLA, HEADERS,
-                       SPAM_N_REQUESTS)
 from utils import ElapsedFuturesSession
 data = load_dataset("glue", "sst2", split="validation")
@@ -67,8 +71,9 @@ SESSION = ElapsedFuturesSession()
 def send_single(input_model_vanilla, address: str):
     assert address in [ADDRESS_VANILLA, ADDRESS_BETTERTRANSFORMER]
     promise = SESSION.post(
-        address, headers=HEADERS, data=input_model_vanilla.encode("utf-8")
     )
     try:
@@ -110,7 +115,11 @@ def send_spam(address: str):
     for i in range(SPAM_N_REQUESTS):
         input_data = inp[i]["sentence"].encode("utf-8")
-        promises.append(SESSION.post(address, headers=HEADERS, data=input_data))
     for promise in promises:
         try:

 from datasets import load_dataset
+from defaults import (
+    ADDRESS_BETTERTRANSFORMER,
+    ADDRESS_VANILLA,
+    HEADERS,
+    SPAM_N_REQUESTS,
+)
 from utils import ElapsedFuturesSession
 data = load_dataset("glue", "sst2", split="validation")
 def send_single(input_model_vanilla, address: str):
     assert address in [ADDRESS_VANILLA, ADDRESS_BETTERTRANSFORMER]
+    # should not take more than 10 s, so timeout if that's the case
     promise = SESSION.post(
+        address, headers=HEADERS, data=input_model_vanilla.encode("utf-8"), timeout=10
     )
     try:
     for i in range(SPAM_N_REQUESTS):
         input_data = inp[i]["sentence"].encode("utf-8")
+        # should not take more than 15 s, so timeout if that's the case
+        promises.append(
+            SESSION.post(address, headers=HEADERS, data=input_data, timeout=15)
+        )
     for promise in promises:
         try: