Spaces:
Sleeping
Sleeping
File size: 20,743 Bytes
b308128 937be2f cd41e3a cd1760d 7f877a9 2baca0d 738a5f6 94f53fc 224700e 94f53fc 21090d3 04fc021 1941971 e5f657f 1941971 6204d1b 1941971 8c76c4e b2e68d1 2baca0d b2e68d1 2baca0d f0030c7 c15f723 adf3921 c15f723 3c712c1 b8c87a6 adf3921 6cdbf20 3036c83 230d9ba 3036c83 6cdbf20 b8c87a6 3036c83 6cdbf20 94f53fc 3036c83 94f53fc b8c87a6 adf3921 b8c87a6 ee8b9cc a2797f8 94f53fc b8c87a6 cfed1ed b8c87a6 94f53fc 68440cc 94f53fc 02f452d 3c712c1 337ccf9 17678fc adf3921 17678fc adf3921 17678fc adf3921 17678fc adf3921 17678fc adf3921 17678fc adf3921 e1ba54a 17678fc 0743d21 b8c87a6 adf3921 b8c87a6 adf3921 b8c87a6 adf3921 b8c87a6 adf3921 b8c87a6 adf3921 b8c87a6 adf3921 b8c87a6 6cdbf20 b8c87a6 6cdbf20 b8c87a6 6cdbf20 b8c87a6 49c7ae8 0743d21 adf3921 0743d21 adf3921 0743d21 adf3921 0743d21 adf3921 0743d21 adf3921 0743d21 adf3921 0743d21 b8c87a6 f0030c7 70a526c f0030c7 67b28a6 70a526c f0030c7 70a526c f0030c7 204e065 f0030c7 3348cb9 4fee723 3348cb9 4fee723 3348cb9 88d9942 3348cb9 b8c87a6 3348cb9 5160266 3348cb9 5160266 3348cb9 5160266 3348cb9 f122880 b8c87a6 e7e5931 337ccf9 17678fc b8c87a6 70a526c e870f88 21a6ab6 b8c87a6 f690e36 e870f88 0743d21 b8c87a6 f690e36 e870f88 0743d21 b8c87a6 f690e36 e02ba6b b8c87a6 e3103da 17678fc f690e36 8d07073 0ce110a 70a526c 8d07073 70a526c 8d07073 469b565 b8c87a6 337ccf9 967d013 e02ba6b 967d013 e02ba6b 967d013 f690e36 8c76c4e 25203d2 4fee723 edb0bcd b8c87a6 94f53fc 35e0ec8 8c245db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import lftk
import spacy
import time
import os
import openai
# Load the Vicuna 7B model and tokenizer
vicuna_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3")
vicuna_model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3")
# Load the LLaMA 7b model and tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf")
llama_model = AutoModelForCausalLM.from_pretrained("daryl149/llama-2-7b-chat-hf")
def update_api_key(new_key):
print("update_api_key ran")
global api_key
os.environ['OPENAI_API_TOKEN'] = new_key
openai.api_key = os.environ['OPENAI_API_TOKEN']
def chat(system_prompt, user_prompt, model = 'gpt-3.5-turbo', temperature = 0, verbose = False):
''' Normal call of OpenAI API '''
response = openai.ChatCompletion.create(
temperature = temperature,
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
])
res = response['choices'][0]['message']['content']
if verbose:
print('System prompt:', system_prompt)
print('User prompt:', user_prompt)
print('GPT response:', res)
return res
def format_chat_prompt(message, chat_history, max_convo_length):
prompt = ""
for turn in chat_history[-max_convo_length:]:
user_message, bot_message = turn
prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
prompt = f"{prompt}\nUser: {message}\nAssistant:"
return prompt
def gpt_respond(have_key, tab_name, message, chat_history, max_convo_length = 10):
if (have_key == "No"):
return "", chat_history
formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length)
print('Prompt + Context:')
print(formatted_prompt)
bot_message = chat(system_prompt = f'''Generate the output only for the assistant. Output any <{tab_name}> in the following sentence one per line.''',
user_prompt = formatted_prompt)
chat_history.append((message, bot_message))
return "", chat_history
def vicuna_respond(tab_name, message, chat_history):
formatted_prompt = f'''Output any {tab_name} in the following sentence one per line: "{message}"'''
print('Vicuna Ling Ents Fn - Prompt + Context:')
print(formatted_prompt)
input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt")
output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2)
bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(bot_message)
# Remove formatted prompt from bot_message
bot_message = bot_message.replace(formatted_prompt, '')
print(bot_message)
chat_history.append((formatted_prompt, bot_message))
time.sleep(2)
return tab_name, "", chat_history
def llama_respond(tab_name, message, chat_history):
formatted_prompt = f'''Output any {tab_name} in the following sentence one per line: "{message}"'''
# print('Llama - Prompt + Context:')
# print(formatted_prompt)
input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt")
output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2)
bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Remove formatted prompt from bot_message
bot_message = bot_message.replace(formatted_prompt, '')
# print(bot_message)
chat_history.append((formatted_prompt, bot_message))
time.sleep(2)
return tab_name, "", chat_history
def gpt_strategies_respond(have_key, strategy, task_name, task_ling_ent, message, chat_history, max_convo_length = 10):
if (have_key == "No"):
return "", chat_history
formatted_system_prompt = ""
if (task_name == "POS Tagging"):
if (strategy == "S1"):
formatted_system_prompt = f'''Generate the output only for the assistant. Output any {task_ling_ent} in the following sentence one per line: "{message}"'''
elif (strategy == "S2"):
formatted_system_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"'''
elif (strategy == "S3"):
formatted_system_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"'''
elif (task_name == "Chunking"):
if (strategy == "S1"):
formatted_system_prompt = f'''Generate the output only for the assistant. Output any {task_ling_ent} in the following sentence one per line: "{message}"'''
elif (strategy == "S2"):
formatted_system_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"'''
elif (strategy == "S3"):
formatted_system_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"'''
formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length)
print('Prompt + Context:')
print(formatted_prompt)
bot_message = chat(system_prompt = formatted_system_prompt,
user_prompt = formatted_prompt)
chat_history.append((message, bot_message))
return "", chat_history
def vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history):
formatted_prompt = ""
if (task_name == "POS Tagging"):
if (strategy == "S1"):
formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"'''
elif (strategy == "S2"):
formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"'''
elif (strategy == "S3"):
formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"'''
elif (task_name == "Chunking"):
if (strategy == "S1"):
formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"'''
elif (strategy == "S2"):
formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"'''
elif (strategy == "S3"):
formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"'''
print('Vicuna Strategy Fn - Prompt + Context:')
print(formatted_prompt)
input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt")
output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2)
bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(bot_message)
# Remove formatted prompt from bot_message
bot_message = bot_message.replace(formatted_prompt, '')
print(bot_message)
chat_history.append((formatted_prompt, bot_message))
time.sleep(2)
return task_name, "", chat_history
def llama_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history):
formatted_prompt = ""
if (task_name == "POS Tagging"):
if (strategy == "S1"):
formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"'''
elif (strategy == "S2"):
formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"'''
elif (strategy == "S3"):
formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"'''
elif (task_name == "Chunking"):
if (strategy == "S1"):
formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"'''
elif (strategy == "S2"):
formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"'''
elif (strategy == "S3"):
formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"'''
# print('Llama Strategies - Prompt + Context:')
# print(formatted_prompt)
input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt")
output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2)
bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True)
# print(bot_message)
# Remove formatted prompt from bot_message
bot_message = bot_message.replace(formatted_prompt, '')
# print(bot_message)
chat_history.append((formatted_prompt, bot_message))
time.sleep(2)
return task_name, "", chat_history
def interface():
with gr.Tab("Linguistic Entities"):
gr.Markdown("""
## π Step-By-Step Instructions
- Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5).
- If you own an OpenAI API key, select 'Yes' in the dropdown. If you don't own one, select 'No'.
- If you selected 'Yes', enter your OpenAI API Key [Link to your OpenAI keys](https://platform.openai.com/api-keys).
- If you selected 'No', leave the 'OpenAI Key' field blank and continue with the rest.
- Select a Linguistic Entity from the Dropdown.
- Click 'Submit' to send your inputs to the models.
- To enter a new prompt, scroll to the bottom and click 'Clear' to start again.
### β³ After you click 'Submit', the models will take a couple seconds to process your inputs.
### π€ Then, the models will output the linguistic entity found in your prompt based on your selection!
Note: If you get an 'Error' in the gpt-3.5 model, check the following:
- Check that you entered your key correctly without any extra characters.
- If you used a free key, it means you exceeded your quota from the free API Key.
""")
# Inputs
ling_ents_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt and press enter")
with gr.Row():
# Will activate after getting API key
have_key2 = gr.Dropdown(["Yes", "No"], label="Do you own an API Key?", scale=0.5)
ling_ents_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your Openai key here", type="password")
linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity")
ling_ents_btn = gr.Button(value="Submit")
# Outputs
user_prompt_1 = gr.Textbox(label="Original prompt")
# linguistic_features_textbox = gr.Textbox(label="Linguistic Features", disabled=True)
with gr.Row():
gpt_ling_ents_chatbot = gr.Chatbot(label="gpt-3.5")
llama_ling_ents_chatbot = gr.Chatbot(label="llama-7b")
vicuna_ling_ents_chatbot = gr.Chatbot(label="vicuna-7b")
clear = gr.ClearButton(components=[ling_ents_prompt, ling_ents_apikey_input, have_key2, linguistic_entities,
vicuna_ling_ents_chatbot, llama_ling_ents_chatbot, gpt_ling_ents_chatbot,])
# Event Handler for API Key
ling_ents_btn.click(update_api_key, inputs=ling_ents_apikey_input)
def update_textbox(prompt):
return prompt
task_btn.click(fn=update_textbox, inputs=user_prompt_1, outputs=user_prompt_1, api_name="task_btn")
# Event Handler for GPT 3.5 Chatbot
ling_ents_btn.click(gpt_respond, inputs=[have_key2, linguistic_entities, ling_ents_prompt, gpt_ling_ents_chatbot],
outputs=[ling_ents_prompt, gpt_ling_ents_chatbot])
# Event Handler for LLaMA Chatbot
ling_ents_btn.click(llama_respond, inputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot],
outputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot])
# Event Handler for Vicuna Chatbot
ling_ents_btn.click(vicuna_respond, inputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot],
outputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot])
with gr.Tab("CoreNLP"):
with gr.Row():
gr.Markdown("""
## π Step-By-Step Instructions
- Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5).
- If you own an OpenAI API key, select 'Yes' in the dropdown. If you don't own one, select 'No'.
- If you selected 'Yes', enter your OpenAI API Key [Link to your OpenAI keys](https://platform.openai.com/api-keys).
- If you selected 'No', leave the 'OpenAI Key' field blank and continue with the rest.
- Select a Task from the Dropdown.
- Select a Linguistic Entity from the Dropdown.
- Click 'Submit' to send your inputs to the models.
- To enter a new prompt, scroll to the bottom and click 'Clear' to start again.
### β³ After you click 'Submit', the models will take a couple seconds to process your inputs.
### π€ Then, the models will output the POS Tagging or Chunking in your prompt with three different strategies based on your selections!
Note: If you get an 'Error' in the gpt-3.5 model, check the following:
- Check that you entered your key correctly without any extra characters.
- If you used a free key, it means you exceeded your quota from the free API Key.
""")
gr.Markdown("""
### π οΈ How each Strategy works
- Strategy 1 - QA-Based Prompting
- The model is prompted with a question-answer format. The input consists of a question, and the model generates a response based on the understanding of the question and its knowledge.
- Strategy 2 - Instruction-Based Prompting
- Involves providing the model with explicit instructions on how to generate a response. Instead of relying solely on context or previous knowledge, the instructions guide the model in generating content that aligns with specific criteria.
- Strategy 3 - Structured Prompting
- Involves presenting information to the model in a structured format, often with defined sections or categories. The model then generates responses following the given structure.
""")
# Inputs
task_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt and press enter")
with gr.Row():
have_key = gr.Dropdown(["Yes", "No"], label="Do you own an API Key?", scale=0.5)
task_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your OpenAI key here", type="password", visible=True)
task = gr.Dropdown(["POS Tagging", "Chunking"], label="Task")
task_linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity For Strategy 1")
task_btn = gr.Button(value="Submit")
# Outputs
user_prompt_2 = gr.Textbox(label="Original prompt", )
gr.Markdown("### Strategy 1 - QA-Based Prompting")
strategy1 = gr.Markdown("S1", visible=False)
with gr.Row():
gpt_S1_chatbot = gr.Chatbot(label="gpt-3.5")
llama_S1_chatbot = gr.Chatbot(label="llama-7b")
vicuna_S1_chatbot = gr.Chatbot(label="vicuna-7b")
gr.Markdown("### Strategy 2 - Instruction-Based Prompting")
strategy2 = gr.Markdown("S2", visible=False)
with gr.Row():
gpt_S2_chatbot = gr.Chatbot(label="gpt-3.5")
llama_S2_chatbot = gr.Chatbot(label="llama-7b")
vicuna_S2_chatbot = gr.Chatbot(label="vicuna-7b")
gr.Markdown("### Strategy 3 - Structured Prompting")
strategy3 = gr.Markdown("S3", visible=False)
with gr.Row():
gpt_S3_chatbot = gr.Chatbot(label="gpt-3.5")
llama_S3_chatbot = gr.Chatbot(label="llama-7b")
vicuna_S3_chatbot = gr.Chatbot(label="vicuna-7b")
clear_all = gr.ClearButton(components=[task_prompt, task_apikey_input, have_key, task, task_linguistic_entities,
vicuna_S1_chatbot, llama_S1_chatbot, gpt_S1_chatbot,
vicuna_S2_chatbot, llama_S2_chatbot, gpt_S2_chatbot,
vicuna_S3_chatbot, llama_S3_chatbot, gpt_S3_chatbot])
# Event Handler for API Key
task_btn.click(update_api_key, inputs=task_apikey_input)
# Show user's original prompt
def update_textbox(prompt):
return prompt
task_btn.click(fn=update_textbox, inputs=user_prompt_2, outputs=user_prompt_2, api_name="task_btn")
# Event Handler for GPT 3.5 Chatbot POS/Chunk, user must submit api key before submitting the prompt
# Will activate after getting API key
# task_apikey_btn.click(update_api_key, inputs=ling_ents_apikey_input)
task_btn.click(gpt_strategies_respond, inputs=[have_key, strategy1, task, task_linguistic_entities, task_prompt, gpt_S1_chatbot],
outputs=[task_prompt, gpt_S1_chatbot])
task_btn.click(gpt_strategies_respond, inputs=[have_key, strategy2, task, task_linguistic_entities, task_prompt, gpt_S2_chatbot],
outputs=[task_prompt, gpt_S2_chatbot])
task_btn.click(gpt_strategies_respond, inputs=[have_key, strategy3, task, task_linguistic_entities, task_prompt, gpt_S3_chatbot],
outputs=[task_prompt, gpt_S3_chatbot])
# Event Handler for LLaMA Chatbot POS/Chunk
task_btn.click(llama_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, llama_S1_chatbot],
outputs=[task, task_prompt, llama_S1_chatbot])
task_btn.click(llama_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, llama_S2_chatbot],
outputs=[task, task_prompt, llama_S2_chatbot])
task_btn.click(llama_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, llama_S3_chatbot],
outputs=[task, task_prompt, llama_S3_chatbot])
# vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history):
# Event Handlers for Vicuna Chatbot POS/Chunk
task_btn.click(vicuna_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, vicuna_S1_chatbot],
outputs=[task, task_prompt, vicuna_S1_chatbot])
task_btn.click(vicuna_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, vicuna_S2_chatbot],
outputs=[task, task_prompt, vicuna_S2_chatbot])
task_btn.click(vicuna_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, vicuna_S3_chatbot],
outputs=[task, task_prompt, vicuna_S3_chatbot])
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# Assessing the Articulate
## A Comparative Analysis of the Core Linguistic Knowledge in Large Language Models
""")
# load interface
interface()
demo.launch()
|