Spaces:
Runtime error
Runtime error
File size: 4,562 Bytes
ae6a8c0 de1d92a 91143ec de1d92a 91143ec de1d92a 91143ec 01022c9 91143ec dfba357 91143ec dfba357 91143ec 01022c9 91143ec de1d92a 1a73201 de1d92a 2ec5af9 09056a8 abedf13 de1d92a abedf13 de1d92a 91143ec de1d92a 7e3fbab de1d92a 1a73201 7e3fbab de1d92a 7e3fbab de1d92a 7e3fbab abedf13 7e3fbab 63eddff 09056a8 1a73201 7e3fbab de1d92a 1a73201 7e3fbab de1d92a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
from Sample import sample_random_entry
from Config import TOPICS
import pandas as pd
import os
from threading import Lock
lock = Lock()
info_dict = {}
def append_to_csv(output_path, row_data, header_names):
# Acquire the lock before accessing the file
with lock:
# Check if file exists and is not empty
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
print(f"Appending to {output_path}")
# File exists and is not empty, append without headers
# load first
df = pd.DataFrame([row_data])
df.to_csv(output_path, mode='a', header=False, index=False)
# check and print
with open(output_path, 'r') as f:
print(f.read())
else:
print(f"Writing to {output_path}")
# File does not exist or is empty, write with headers
df = pd.DataFrame([row_data], columns=header_names)
df.to_csv(output_path, mode='w', header=True, index=False)
def sample_and_display(topic):
# If a topic is selected, use it to sample a new entry
global info_dict
display_dict, info_dict = sample_random_entry(topic=topic) if topic else sample_random_entry()
question_text = display_dict['qa']
evaluation_card_text = display_dict['card']
model_name = '' # Clear the model name
completion_text = '' # Clear the completion text
return '', question_text, evaluation_card_text, model_name, completion_text
def evaluate_guess(reasoning, correctness, confidence, topic):
global info_dict
# Here your logic will go to evaluate the guess
# Placeholder for the correct logic to determine the correct answer
correct_answer = 'Correct' if info_dict['correctness'] else 'Incorrect'
# print(correctness)
# print(correct_answer)
evaluation_response = "Correct" if correctness == correct_answer else "Incorrect"
# Assuming info_dict is updated by sample_and_display function
actual_model = info_dict.get('model', 'Unknown Model')
actual_completion = info_dict.get('completion', 'No completion available.')
# Update the completion text
completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}"
question_index = info_dict.get('index', -1)
question_topic = topic
output_path = f'responses/mmlu/{question_topic}/response.csv'
entry = dict()
entry['index'] = question_index
entry['model'] = actual_model
entry['reasoning'] = reasoning
entry['correctness'] = correctness == correct_answer
entry['confidence'] = confidence
header_names = ['index', 'model', 'reasoning', 'correctness', 'confidence'] # Add other headers as necessary
append_to_csv(output_path, entry, header_names)
return evaluation_response, actual_model, completion_text
# Initial sampling
initial_topic = TOPICS['mmlu'][0] # Assuming TOPICS is a list of topics
correct_text, question_text, evaluation_card_text, model_name, completion_text = sample_and_display(initial_topic)
with gr.Blocks() as app:
topic = gr.Dropdown(choices=TOPICS['mmlu'], label="Select Topic", value=initial_topic)
with gr.Row():
with gr.Column(scale=2):
evaluation_card = gr.Textbox(value=evaluation_card_text, label="Evaluation Card", interactive=False)
model = gr.Textbox(value=model_name, label="Model", interactive=False)
completion = gr.Textbox(value=completion_text, label="Model's Completion", interactive=False)
with gr.Column(scale=1):
question = gr.Textbox(value=question_text, label="Question", interactive=False)
reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)")
correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I believe the model will answer this question")
confidence = gr.Slider(minimum=1, maximum=5, step=1, value=3, label="Confidence")
output_text = gr.Textbox(value=correct_text, label="Evaluation Output", interactive=False)
# output_text = gr.Text(label="Evaluation Output")
submit_button = gr.Button("Submit")
next_button = gr.Button("Next Entry")
submit_button.click(fn=evaluate_guess, inputs=[reasoning, correctness, confidence, topic], outputs=[output_text, model, completion])
next_button.click(fn=sample_and_display, inputs=[topic], outputs=[output_text, question, evaluation_card, model, completion])
app.launch()
|