Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from utils.json_loader import JsonDataLoader | |
from utils.metrics import accuracy | |
from utils.openai import chat_completion | |
def simon_says_helper(message, prompt=None): | |
response = chat_completion( | |
message, prompt=prompt, model="gpt-3.5-turbo", temperature=0 | |
) | |
return response | |
def gradio_chat_completion(prompt, difficulty): | |
loader = JsonDataLoader(filepath="data/validation.json") | |
inputs, targets = loader.load_data(category=difficulty) | |
# get predictions | |
predictions = [simon_says_helper(**input_, prompt=prompt) for input_ in inputs] | |
# calculate accuracy | |
response_target = [target["response"] for target in targets] | |
accuracy_score = accuracy(predictions, response_target) | |
# produce table | |
df = pd.DataFrame( | |
{ | |
"Correct": [ | |
predicted == target["response"] | |
for predicted, target in zip(predictions, targets) | |
], | |
"Input": [input_["message"] for input_ in inputs], | |
"Prediction": predictions, | |
"Target": [target["response"] for target in targets], | |
} | |
) | |
return accuracy_score, df | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# Simon Says | |
Create a prompt that gets 100% accuracy on 'easy', 'medium', and 'hard' modes! | |
""" | |
) | |
with gr.Tab("Description"): | |
gr.Markdown( | |
""" | |
**Model:** gpt-3.5-turbo | |
**Temperature:** 0 | |
#### Allowed Commands | |
- :: jumps :: | |
- :: sticks out tongue :: | |
- :: makes a funny face :: | |
- :: runs in place :: | |
- :: stomps feet :: | |
- :: hops on one foot :: | |
- :: wiggles fingers :: | |
- :: moos like a cow :: | |
- :: touches toes :: | |
- :: claps hands :: | |
- :: sits down :: | |
#### Rules | |
- If Simon directs the LLM to do any of the allowed commands, the LLM should do it. | |
- If Simon does not say so, the LLM should respond with ":: does nothing ::" | |
- If the user directs the LLM to do any other command, the LLM should respond with ":: does nothing :: | |
""" | |
) | |
with gr.Tab("Play"): | |
difficulty_dropdown = gr.Dropdown( | |
["easy", "medium", "hard"], label="Difficulty" | |
) | |
prompt_box = gr.Textbox( | |
label="Prompt", value="Always reply with :: does nothing ::", lines=10 | |
) | |
btn = gr.Button(value="Submit") | |
gr.Markdown( | |
""" | |
## Results | |
""" | |
) | |
accuracy_box = gr.Textbox(label="Accuracy", interactive=False) | |
results_table = gr.Dataframe( | |
headers=["Correct", "Input", "Target", "Prediction"], | |
col_count=(4, "fixed"), | |
interactive=False, | |
wrap=True, | |
) | |
btn.click( | |
gradio_chat_completion, | |
inputs=[prompt_box, difficulty_dropdown], | |
outputs=[accuracy_box, results_table], | |
) | |
demo.launch() | |