Spaces:

ybelkada
/

i-like-flan-ul2

Running

App Files Files Community

philschmid HF staff commited on Mar 3, 2023

Commit

5afebd5

•

1 Parent(s): a5cf62b

make requests async and in parallel

Browse files

Files changed (1) hide show

app.py +39 -15

app.py CHANGED Viewed

@@ -1,19 +1,23 @@
 import os
-import gradio as gr
 import requests
-TOKEN = os.environ.get("API_TOKEN")
-UL2_API_URL = "https://api-inference.huggingface.co/models/google/flan-ul2"
-FLAN_API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
-headers = {"Authorization": f"Bearer {TOKEN}"}
-MAX_NEW_TOKENS = 256
-def query(text, api_url):
-    response = requests.post(api_url, headers=headers, json={"inputs":text, "parameters": {"max_new_tokens":MAX_NEW_TOKENS}})
-    return response.json()
 examples = [
@@ -35,10 +39,30 @@ Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half of
 title = "Flan UL2 vs Flan T5 XXL"
 description = "This demo compares [Flan-T5-xxl](https://huggingface.co/google/flan-t5-xxl) and [Flan-UL2](https://huggingface.co/google/flan-ul2). Learn more about these models in their model card!"
-def inference(text):
-    output_ul2 = query(text, api_url=UL2_API_URL)[0]["generated_text"]
-    output_flan = query(text, api_url=FLAN_API_URL)[0]["generated_text"]
-    return [output_ul2, output_flan]
 io = gr.Interface(
   inference,

 import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
 import requests
+import gradio as gr
+MAX_NEW_TOKENS = 128
+TOKEN = os.environ.get("API_TOKEN",None)
+URLS = [
+  "https://api-inference.huggingface.co/models/google/flan-ul2",
+  "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
+]
+def fetch(session, text, api_url):
+    model = api_url.split("/")[-1]
+    response = session.post(api_url, json={"inputs":text, "parameters": {"max_new_tokens":MAX_NEW_TOKENS}})
+    if response.status_code != 200:
+      return None
+    return model, response.json()
 examples = [
 title = "Flan UL2 vs Flan T5 XXL"
 description = "This demo compares [Flan-T5-xxl](https://huggingface.co/google/flan-t5-xxl) and [Flan-UL2](https://huggingface.co/google/flan-ul2). Learn more about these models in their model card!"
+async def inference(text):
+  with ThreadPoolExecutor(max_workers=2) as executor:
+      with requests.Session() as session:
+          session.headers = {"Authorization": f"Bearer {TOKEN}"}
+          # Initialize the event loop
+          loop = asyncio.get_event_loop()
+          tasks = [
+              loop.run_in_executor(
+                  executor,
+                  fetch,
+                  *(session, text, url) # Allows us to pass in multiple arguments to `fetch`
+              )
+              for url in urls
+          ]
+          # Initializes the tasks to run and awaits their results
+          responses = [None, None]
+          for (model, response) in await asyncio.gather(*tasks):
+              if response is not None:
+                if model == "flan-ul2":
+                  responses[0] = response
+                elif model == "flan-t5-xxl":
+                  responses[1] = response
+  return responses
 io = gr.Interface(
   inference,