Spaces:
Runtime error
Runtime error
import gradio as gr | |
from Sample import sample_random_entry | |
from Config import TOPICS | |
import pandas as pd | |
import os | |
from threading import Lock | |
lock = Lock() | |
info_dict = {} | |
def append_to_csv(output_path, row_data, header_names): | |
# Acquire the lock before accessing the file | |
with lock: | |
# Check if file exists and is not empty | |
if os.path.exists(output_path) and os.path.getsize(output_path) > 0: | |
print(f"Appending to {output_path}") | |
# File exists and is not empty, append without headers | |
# load first | |
df = pd.DataFrame([row_data]) | |
df.to_csv(output_path, mode='a', header=False, index=False) | |
# check and print | |
with open(output_path, 'r') as f: | |
print(f.read()) | |
else: | |
print(f"Writing to {output_path}") | |
# File does not exist or is empty, write with headers | |
df = pd.DataFrame([row_data], columns=header_names) | |
df.to_csv(output_path, mode='w', header=True, index=False) | |
def sample_and_display(topic): | |
# If a topic is selected, use it to sample a new entry | |
global info_dict | |
display_dict, info_dict = sample_random_entry(topic=topic) if topic else sample_random_entry() | |
question_text = display_dict['qa'] | |
evaluation_card_text = display_dict['card'] | |
model_name = '' # Clear the model name | |
completion_text = '' # Clear the completion text | |
return '', question_text, evaluation_card_text, model_name, completion_text | |
def evaluate_guess(reasoning, correctness, confidence, topic): | |
global info_dict | |
# Here your logic will go to evaluate the guess | |
# Placeholder for the correct logic to determine the correct answer | |
correct_answer = 'Correct' if info_dict['correctness'] else 'Incorrect' | |
print(correctness) | |
print(correct_answer) | |
evaluation_response = "Correct" if correctness == correct_answer else "Incorrect" | |
# Assuming info_dict is updated by sample_and_display function | |
actual_model = info_dict.get('model', 'Unknown Model') | |
actual_completion = info_dict.get('completion', 'No completion available.') | |
# Update the completion text | |
completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}" | |
question_index = info_dict.get('index', -1) | |
question_topic = topic | |
output_path = f'responses/mmlu/{question_topic}/response.csv' | |
entry = dict() | |
entry['index'] = question_index | |
entry['model'] = actual_model | |
entry['reasoning'] = reasoning | |
entry['correctness'] = correctness == correct_answer | |
entry['confidence'] = confidence | |
header_names = ['index', 'model', 'reasoning', 'correctness', 'confidence'] # Add other headers as necessary | |
append_to_csv(output_path, entry, header_names) | |
return evaluation_response, actual_model, completion_text | |
# Initial sampling | |
initial_topic = TOPICS['mmlu'][0] # Assuming TOPICS is a list of topics | |
correct_text, question_text, evaluation_card_text, model_name, completion_text = sample_and_display(initial_topic) | |
with gr.Blocks() as app: | |
topic = gr.Dropdown(choices=TOPICS['mmlu'], label="Select Topic", value=initial_topic) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
evaluation_card = gr.Textbox(value=evaluation_card_text, label="Evaluation Card", interactive=False) | |
model = gr.Textbox(value=model_name, label="Model", interactive=False) | |
completion = gr.Textbox(value=completion_text, label="Model's Completion", interactive=False) | |
with gr.Column(scale=1): | |
question = gr.Textbox(value=question_text, label="Question", interactive=False) | |
reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)") | |
correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I believe the model will answer this question") | |
confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence") | |
output_text = gr.Textbox(value=correct_text, label="Evaluation Output", interactive=False) | |
# output_text = gr.Text(label="Evaluation Output") | |
submit_button = gr.Button("Submit") | |
next_button = gr.Button("Next Entry") | |
submit_button.click(fn=evaluate_guess, inputs=[reasoning, correctness, confidence, topic], outputs=[output_text, model, completion]) | |
next_button.click(fn=sample_and_display, inputs=[topic], outputs=[output_text, question, evaluation_card, model, completion]) | |
app.launch() | |