Spaces:
Sleeping
Sleeping
File size: 4,227 Bytes
ae6a8c0 de1d92a 91143ec de1d92a 91143ec de1d92a 91143ec 01022c9 91143ec 01022c9 91143ec de1d92a 91143ec abedf13 de1d92a abedf13 de1d92a 91143ec de1d92a 7e3fbab de1d92a 7e3fbab de1d92a 7e3fbab de1d92a 7e3fbab abedf13 7e3fbab 63eddff 7e3fbab de1d92a 7e3fbab de1d92a 7e3fbab de1d92a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
from Sample import sample_random_entry
from Config import TOPICS
import pandas as pd
import os
from threading import Lock
lock = Lock()
info_dict = {}
def append_to_csv(output_path, row_data, header_names):
# Acquire the lock before accessing the file
with lock:
# Check if file exists and is not empty
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
print(f"Appending to {output_path}")
# File exists and is not empty, append without headers
df = pd.DataFrame([row_data])
df.to_csv(output_path, mode='a', header=False, index=False)
else:
print(f"Writing to {output_path}")
# File does not exist or is empty, write with headers
df = pd.DataFrame([row_data], columns=header_names)
df.to_csv(output_path, mode='w', header=True, index=False)
def sample_and_display(topic):
# If a topic is selected, use it to sample a new entry
global info_dict
display_dict, info_dict = sample_random_entry(topic=topic) if topic else sample_random_entry()
question_text = display_dict['qa']
evaluation_card_text = display_dict['card']
model_name = '' # Clear the model name
completion_text = '' # Clear the completion text
return question_text, evaluation_card_text, model_name, completion_text
def evaluate_guess(reasoning, correctness, confidence, topic):
global info_dict
# Here your logic will go to evaluate the guess
# Placeholder for the correct logic to determine the correct answer
correct_answer = 'Correctly' if info_dict['correctness'] else 'Incorrectly'
evaluation_response = "Correct" if correctness == correct_answer else "Incorrect"
# Assuming info_dict is updated by sample_and_display function
actual_model = info_dict.get('model', 'Unknown Model')
actual_completion = info_dict.get('completion', 'No completion available.')
# Update the completion text
completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}"
question_index = info_dict.get('index', -1)
question_topic = topic
output_path = f'responses/mmlu/{question_topic}/response.csv'
entry = dict()
entry['index'] = question_index
entry['model'] = actual_model
entry['reasoning'] = reasoning
entry['correctness'] = correctness == correct_answer
entry['confidence'] = confidence
header_names = ['index', 'model', 'reasoning', 'correctness', 'confidence'] # Add other headers as necessary
append_to_csv(output_path, entry, header_names)
return evaluation_response, actual_model, completion_text
# Initial sampling
initial_topic = TOPICS['mmlu'][0] # Assuming TOPICS is a list of topics
question_text, evaluation_card_text, model_name, completion_text = sample_and_display(initial_topic)
with gr.Blocks() as app:
topic = gr.Dropdown(choices=TOPICS['mmlu'], label="Select Topic", value=initial_topic)
with gr.Row():
with gr.Column(scale=2):
evaluation_card = gr.Textbox(value=evaluation_card_text, label="Evaluation Card", interactive=False)
model = gr.Textbox(value=model_name, label="Model", interactive=False)
completion = gr.Textbox(value=completion_text, label="Model's Completion", interactive=False)
with gr.Column(scale=1):
question = gr.Textbox(value=question_text, label="Question", interactive=False)
reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)")
correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I believe the model will answer this question")
confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence")
output_text = gr.Text(label="Evaluation Output")
submit_button = gr.Button("Submit")
next_button = gr.Button("Next Entry")
submit_button.click(fn=evaluate_guess, inputs=[reasoning, correctness, confidence, topic], outputs=[output_text, model, completion])
next_button.click(fn=sample_and_display, inputs=[topic], outputs=[question, evaluation_card, model, completion])
app.launch()
|