Spaces:
Runtime error
Runtime error
playgrdstar
commited on
Commit
•
3909801
1
Parent(s):
8639338
Add check truncation
Browse files
app.py
CHANGED
@@ -84,7 +84,6 @@ def load_and_generate(model_name, prompt):
|
|
84 |
|
85 |
return gen_text.replace("<pad>", "").replace("</s>", "")
|
86 |
|
87 |
-
|
88 |
### This code for the inference api ###
|
89 |
|
90 |
def generate_from_api(query, model_name, temperature, max_tokens):
|
@@ -102,15 +101,52 @@ def generate_from_api(query, model_name, temperature, max_tokens):
|
|
102 |
response = requests.post(model_api_url, headers=headers, json=payload)
|
103 |
return response.json()[0]['generated_text']
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
with gr.Blocks(css='style.css') as demo:
|
106 |
gr.HTML("""
|
107 |
<div style="text-align: center; max-width: 1240px; margin: 0 auto;">
|
108 |
<h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
|
109 |
Different Strokes (Prompts) for Different Folks (LLMs)
|
110 |
</h1>
|
|
|
111 |
<h4 style="font-weight: 50; font-size: 14px; margin-bottom:0px; margin-top:0px;">
|
112 |
-
After reading <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a>, which is
|
113 |
</h4>
|
|
|
|
|
|
|
|
|
114 |
</div>
|
115 |
""")
|
116 |
|
@@ -132,7 +168,14 @@ with gr.Blocks(css='style.css') as demo:
|
|
132 |
)
|
133 |
|
134 |
max_tokens = gr.Slider(
|
135 |
-
10, 250, step=1, value=100, label="Max.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
).style(
|
137 |
container=False,
|
138 |
)
|
@@ -142,7 +185,7 @@ with gr.Blocks(css='style.css') as demo:
|
|
142 |
label="Enter your prompt",
|
143 |
show_label=False,
|
144 |
# max_lines=2,
|
145 |
-
placeholder="Select your prompt below",
|
146 |
).style(
|
147 |
container=False,
|
148 |
)
|
@@ -150,7 +193,7 @@ with gr.Blocks(css='style.css') as demo:
|
|
150 |
|
151 |
with gr.Row():
|
152 |
output=gr.Textbox(
|
153 |
-
label="LLM
|
154 |
show_label=True)
|
155 |
|
156 |
gr.HTML("""
|
@@ -222,7 +265,10 @@ with gr.Blocks(css='style.css') as demo:
|
|
222 |
inputs=[prompt])
|
223 |
|
224 |
# process.click(load_and_generate, inputs=[model_name, prompt], outputs=[output])
|
225 |
-
|
|
|
|
|
|
|
226 |
|
227 |
-
|
228 |
-
demo.launch()
|
|
|
84 |
|
85 |
return gen_text.replace("<pad>", "").replace("</s>", "")
|
86 |
|
|
|
87 |
### This code for the inference api ###
|
88 |
|
89 |
def generate_from_api(query, model_name, temperature, max_tokens):
|
|
|
101 |
response = requests.post(model_api_url, headers=headers, json=payload)
|
102 |
return response.json()[0]['generated_text']
|
103 |
|
104 |
+
def generate_from_api_check(query, model_name, temperature, max_tokens):
|
105 |
+
headers = {f"Authorization": f"Bearer {HF_READ_API_KEY}",
|
106 |
+
"wait_for_model": "true",
|
107 |
+
"temperature": str(temperature),
|
108 |
+
"max_tokens": str(max_tokens),
|
109 |
+
"max_time": str(120)}
|
110 |
+
|
111 |
+
model_api_url = f"https://api-inference.huggingface.co/models/{model_name}"
|
112 |
+
|
113 |
+
payload = {"inputs": query}
|
114 |
+
response = requests.post(model_api_url, headers=headers, json=payload)
|
115 |
+
while response.status_code != 200:
|
116 |
+
response = requests.post(model_api_url, headers=headers, json=payload)
|
117 |
+
|
118 |
+
max_times = 20
|
119 |
+
gen_text = response.json()[0]['generated_text']
|
120 |
+
while maybe_is_truncated(gen_text) and max_times > 0:
|
121 |
+
headers = {f"Authorization": f"Bearer {HF_READ_API_KEY}",
|
122 |
+
"wait_for_model": "true",
|
123 |
+
"temperature": str(temperature),
|
124 |
+
"max_tokens": str(max_tokens + len(gen_text)),
|
125 |
+
"max_time": str(120)}
|
126 |
+
payload = {"inputs": query + ' ' + gen_text}
|
127 |
+
response = requests.post(model_api_url, headers=headers, json=payload)
|
128 |
+
while response.status_code != 200:
|
129 |
+
response = requests.post(model_api_url, headers=headers, json=payload)
|
130 |
+
gen_text = response.json()[0]['generated_text']
|
131 |
+
max_times -= 1
|
132 |
+
|
133 |
+
return gen_text
|
134 |
+
|
135 |
+
|
136 |
with gr.Blocks(css='style.css') as demo:
|
137 |
gr.HTML("""
|
138 |
<div style="text-align: center; max-width: 1240px; margin: 0 auto;">
|
139 |
<h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
|
140 |
Different Strokes (Prompts) for Different Folks (LLMs)
|
141 |
</h1>
|
142 |
+
<hr style="margin-bottom:5px; margin-top:5px;">
|
143 |
<h4 style="font-weight: 50; font-size: 14px; margin-bottom:0px; margin-top:0px;">
|
144 |
+
After reading <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a>, which is a good guide when starting to learn about prompts for large language models (LLMs), specifically OpenAI's LLMs, I was interested in seeing the results with for other LLMs. Hence, did up a simple demonstration of different prompts for different popular LLMs of different sizes. The prompt examples are taken from the Prompt Engineering Guide, and the LLMs that you can select below are all available on Hugging Face. If you are interested in comparing them with the prompts from OpenAI's model, you can refer to the writeup in the <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a> itself.
|
145 |
</h4>
|
146 |
+
<hr style="margin-bottom:5px; margin-top:5px;">
|
147 |
+
<h5 style="font-weight: 50; font-size: 12px; margin-bottom:0px; margin-top:0px;">
|
148 |
+
Note: Larger models will take a while, especially on the first run.
|
149 |
+
</h5>
|
150 |
</div>
|
151 |
""")
|
152 |
|
|
|
168 |
)
|
169 |
|
170 |
max_tokens = gr.Slider(
|
171 |
+
10, 250, step=1, value=100, label="Max. tokens (in output)",
|
172 |
+
).style(
|
173 |
+
container=False,
|
174 |
+
)
|
175 |
+
|
176 |
+
check_truncated = gr.Checkbox(
|
177 |
+
label="Check for truncated output",
|
178 |
+
value=False,
|
179 |
).style(
|
180 |
container=False,
|
181 |
)
|
|
|
185 |
label="Enter your prompt",
|
186 |
show_label=False,
|
187 |
# max_lines=2,
|
188 |
+
placeholder="Select your prompt from the examples below",
|
189 |
).style(
|
190 |
container=False,
|
191 |
)
|
|
|
193 |
|
194 |
with gr.Row():
|
195 |
output=gr.Textbox(
|
196 |
+
label="LLM output",
|
197 |
show_label=True)
|
198 |
|
199 |
gr.HTML("""
|
|
|
265 |
inputs=[prompt])
|
266 |
|
267 |
# process.click(load_and_generate, inputs=[model_name, prompt], outputs=[output])
|
268 |
+
if check_truncated:
|
269 |
+
process.click(generate_from_api_check, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
|
270 |
+
else:
|
271 |
+
process.click(generate_from_api, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
|
272 |
|
273 |
+
demo.launch(server_port=8080)
|
274 |
+
# demo.launch()
|