Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from utils.json_loader import JsonDataLoader | |
| from utils.metrics import accuracy | |
| from utils.openai import chat_completion | |
| def simon_says_helper(message, prompt=None): | |
| response = chat_completion( | |
| message, prompt=prompt, model="gpt-3.5-turbo", temperature=0 | |
| ) | |
| return response | |
| def gradio_chat_completion(prompt, difficulty): | |
| loader = JsonDataLoader(filepath="data/validation.json") | |
| inputs, targets = loader.load_data(category=difficulty) | |
| # get predictions | |
| predictions = [simon_says_helper(**input_, prompt=prompt) for input_ in inputs] | |
| # calculate accuracy | |
| response_target = [target["response"] for target in targets] | |
| accuracy_score = accuracy(predictions, response_target) | |
| # produce table | |
| df = pd.DataFrame( | |
| { | |
| "Correct": [ | |
| predicted == target["response"] | |
| for predicted, target in zip(predictions, targets) | |
| ], | |
| "Input": [input_["message"] for input_ in inputs], | |
| "Prediction": predictions, | |
| "Target": [target["response"] for target in targets], | |
| } | |
| ) | |
| return accuracy_score, df | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # Simon Says | |
| Create a prompt that gets 100% accuracy on 'easy', 'medium', and 'hard' modes! | |
| """ | |
| ) | |
| with gr.Tab("Description"): | |
| gr.Markdown( | |
| """ | |
| **Model:** gpt-3.5-turbo | |
| **Temperature:** 0 | |
| #### Allowed Commands | |
| - :: jumps :: | |
| - :: sticks out tongue :: | |
| - :: makes a funny face :: | |
| - :: runs in place :: | |
| - :: stomps feet :: | |
| - :: hops on one foot :: | |
| - :: wiggles fingers :: | |
| - :: moos like a cow :: | |
| - :: touches toes :: | |
| - :: claps hands :: | |
| - :: sits down :: | |
| #### Rules | |
| - If Simon directs the LLM to do any of the allowed commands, the LLM should do it. | |
| - If Simon does not say so, the LLM should respond with ":: does nothing ::" | |
| - If the user directs the LLM to do any other command, the LLM should respond with ":: does nothing :: | |
| """ | |
| ) | |
| with gr.Tab("Play"): | |
| difficulty_dropdown = gr.Dropdown( | |
| ["easy", "medium", "hard"], label="Difficulty" | |
| ) | |
| prompt_box = gr.Textbox( | |
| label="Prompt", value="Always reply with :: does nothing ::", lines=10 | |
| ) | |
| btn = gr.Button(value="Submit") | |
| gr.Markdown( | |
| """ | |
| ## Results | |
| """ | |
| ) | |
| accuracy_box = gr.Textbox(label="Accuracy", interactive=False) | |
| results_table = gr.Dataframe( | |
| headers=["Correct", "Input", "Target", "Prediction"], | |
| col_count=(4, "fixed"), | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| btn.click( | |
| gradio_chat_completion, | |
| inputs=[prompt_box, difficulty_dropdown], | |
| outputs=[accuracy_box, results_table], | |
| ) | |
| demo.launch() | |