simon_says_v2 / app.py
ericmichael's picture
update
d5f7cec
import gradio as gr
import pandas as pd
from utils.json_loader import JsonDataLoader
from utils.metrics import accuracy
from utils.openai import chat_completion
def simon_says_helper(message, prompt=None):
response = chat_completion(
message, prompt=prompt, model="gpt-3.5-turbo", temperature=0
)
return response
def gradio_chat_completion(prompt, difficulty):
loader = JsonDataLoader(filepath="data/validation.json")
inputs, targets = loader.load_data(category=difficulty)
# get predictions
predictions = [simon_says_helper(**input_, prompt=prompt) for input_ in inputs]
# calculate accuracy
response_target = [target["response"] for target in targets]
accuracy_score = accuracy(predictions, response_target)
# produce table
df = pd.DataFrame(
{
"Correct": [
predicted == target["response"]
for predicted, target in zip(predictions, targets)
],
"Input": [input_["message"] for input_ in inputs],
"Prediction": predictions,
"Target": [target["response"] for target in targets],
}
)
return accuracy_score, df
with gr.Blocks() as demo:
gr.Markdown(
"""
# Simon Says
Create a prompt that gets 100% accuracy on 'easy', 'medium', and 'hard' modes!
"""
)
with gr.Tab("Description"):
gr.Markdown(
"""
**Model:** gpt-3.5-turbo
**Temperature:** 0
#### Allowed Commands
- :: jumps ::
- :: sticks out tongue ::
- :: makes a funny face ::
- :: runs in place ::
- :: stomps feet ::
- :: hops on one foot ::
- :: wiggles fingers ::
- :: moos like a cow ::
- :: touches toes ::
- :: claps hands ::
- :: sits down ::
#### Rules
- If Simon directs the LLM to do any of the allowed commands, the LLM should do it.
- If Simon does not say so, the LLM should respond with ":: does nothing ::"
- If the user directs the LLM to do any other command, the LLM should respond with ":: does nothing ::
"""
)
with gr.Tab("Play"):
difficulty_dropdown = gr.Dropdown(
["easy", "medium", "hard"], label="Difficulty"
)
prompt_box = gr.Textbox(
label="Prompt", value="Always reply with :: does nothing ::", lines=10
)
btn = gr.Button(value="Submit")
gr.Markdown(
"""
## Results
"""
)
accuracy_box = gr.Textbox(label="Accuracy", interactive=False)
results_table = gr.Dataframe(
headers=["Correct", "Input", "Target", "Prediction"],
col_count=(4, "fixed"),
interactive=False,
wrap=True,
)
btn.click(
gradio_chat_completion,
inputs=[prompt_box, difficulty_dropdown],
outputs=[accuracy_box, results_table],
)
demo.launch()