Spaces:

playgrdstar
/

compare-llms

Runtime error

App Files Files Community

playgrdstar commited on Mar 1, 2023

Commit

3909801

1 Parent(s): 8639338

Add check truncation

Browse files

Files changed (1) hide show

app.py +54 -8

app.py CHANGED Viewed

@@ -84,7 +84,6 @@ def load_and_generate(model_name, prompt):
     return gen_text.replace("<pad>", "").replace("</s>", "")
 ### This code for the inference api ###
 def generate_from_api(query, model_name, temperature, max_tokens):
@@ -102,15 +101,52 @@ def generate_from_api(query, model_name, temperature, max_tokens):
         response = requests.post(model_api_url, headers=headers, json=payload)
     return response.json()[0]['generated_text']
 with gr.Blocks(css='style.css') as demo:
     gr.HTML("""
     <div style="text-align: center; max-width: 1240px; margin: 0 auto;">
     <h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
     Different Strokes (Prompts) for Different Folks (LLMs)
     </h1>
     <h4 style="font-weight: 50; font-size: 14px; margin-bottom:0px; margin-top:0px;">
-    After reading <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a>, which is an excellent guide on prompts for large language models (LLMs), specifically OpenAI's LLMs, I was interested in seeing the results with for other LLMs. Hence, did up a simple demonstration of different prompts for different popular LLMs of different sizes. The prompt examples are taken from the Prompt Engineering Guide, and the LLMs that you can select below are all available on Hugging Face. If you are interested in comparing them with the prompts from OpenAI's model, you can refer to the writeup in the <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a> itself.
     </h4>
     </div>
     """)
@@ -132,7 +168,14 @@ with gr.Blocks(css='style.css') as demo:
             )
             max_tokens = gr.Slider(
-                10, 250, step=1, value=100, label="Max. Tokens (in Output)",
             ).style(
                 container=False,
             )
@@ -142,7 +185,7 @@ with gr.Blocks(css='style.css') as demo:
                 label="Enter your prompt",
                 show_label=False,
                 # max_lines=2,
-                placeholder="Select your prompt below",
             ).style(
                 container=False,
             )
@@ -150,7 +193,7 @@ with gr.Blocks(css='style.css') as demo:
         with gr.Row():
             output=gr.Textbox(
-                label="LLM Output",
                 show_label=True)
         gr.HTML("""
@@ -222,7 +265,10 @@ with gr.Blocks(css='style.css') as demo:
                                         inputs=[prompt])
         # process.click(load_and_generate, inputs=[model_name, prompt], outputs=[output])
-        process.click(generate_from_api, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
-# demo.launch(server_port=8080)
-demo.launch()

     return gen_text.replace("<pad>", "").replace("</s>", "")
 ### This code for the inference api ###
 def generate_from_api(query, model_name, temperature, max_tokens):
         response = requests.post(model_api_url, headers=headers, json=payload)
     return response.json()[0]['generated_text']
+def generate_from_api_check(query, model_name, temperature, max_tokens):
+    headers = {f"Authorization": f"Bearer {HF_READ_API_KEY}",
+               "wait_for_model": "true",
+               "temperature": str(temperature),
+               "max_tokens": str(max_tokens),
+               "max_time": str(120)}
+    model_api_url = f"https://api-inference.huggingface.co/models/{model_name}"
+    payload = {"inputs": query}
+    response = requests.post(model_api_url, headers=headers, json=payload)
+    while response.status_code != 200:
+        response = requests.post(model_api_url, headers=headers, json=payload)
+    max_times = 20
+    gen_text = response.json()[0]['generated_text']
+    while maybe_is_truncated(gen_text) and max_times > 0:
+        headers = {f"Authorization": f"Bearer {HF_READ_API_KEY}",
+                    "wait_for_model": "true",
+                    "temperature": str(temperature),
+                    "max_tokens": str(max_tokens + len(gen_text)),
+                    "max_time": str(120)}
+        payload = {"inputs": query + ' ' + gen_text}
+        response = requests.post(model_api_url, headers=headers, json=payload)
+        while response.status_code != 200:
+            response = requests.post(model_api_url, headers=headers, json=payload)
+        gen_text = response.json()[0]['generated_text']
+        max_times -= 1
+    return gen_text
 with gr.Blocks(css='style.css') as demo:
     gr.HTML("""
     <div style="text-align: center; max-width: 1240px; margin: 0 auto;">
     <h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
     Different Strokes (Prompts) for Different Folks (LLMs)
     </h1>
+    <hr style="margin-bottom:5px; margin-top:5px;">
     <h4 style="font-weight: 50; font-size: 14px; margin-bottom:0px; margin-top:0px;">
+    After reading <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a>, which is a good guide when starting to learn about prompts for large language models (LLMs), specifically OpenAI's LLMs, I was interested in seeing the results with for other LLMs. Hence, did up a simple demonstration of different prompts for different popular LLMs of different sizes. The prompt examples are taken from the Prompt Engineering Guide, and the LLMs that you can select below are all available on Hugging Face. If you are interested in comparing them with the prompts from OpenAI's model, you can refer to the writeup in the <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a> itself.
     </h4>
+    <hr style="margin-bottom:5px; margin-top:5px;">
+    <h5 style="font-weight: 50; font-size: 12px; margin-bottom:0px; margin-top:0px;">
+    Note: Larger models will take a while, especially on the first run.
+    </h5>
     </div>
     """)
             )
             max_tokens = gr.Slider(
+                10, 250, step=1, value=100, label="Max. tokens (in output)",
+            ).style(
+                container=False,
+            )
+            check_truncated = gr.Checkbox(
+                label="Check for truncated output",
+                value=False,
             ).style(
                 container=False,
             )
                 label="Enter your prompt",
                 show_label=False,
                 # max_lines=2,
+                placeholder="Select your prompt from the examples below",
             ).style(
                 container=False,
             )
         with gr.Row():
             output=gr.Textbox(
+                label="LLM output",
                 show_label=True)
         gr.HTML("""
                                         inputs=[prompt])
         # process.click(load_and_generate, inputs=[model_name, prompt], outputs=[output])
+        if check_truncated:
+            process.click(generate_from_api_check, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
+        else:
+            process.click(generate_from_api, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
+demo.launch(server_port=8080)
+# demo.launch()