import gradio as gr from Sample import sample_random_entry from Config import TOPICS import pandas as pd import os from threading import Lock lock = Lock() info_dict = {} def append_to_csv(output_path, row_data, header_names): # Acquire the lock before accessing the file with lock: # Check if file exists and is not empty if os.path.exists(output_path) and os.path.getsize(output_path) > 0: print(f"Appending to {output_path}") # File exists and is not empty, append without headers # load first df = pd.DataFrame([row_data]) df.to_csv(output_path, mode='a', header=False, index=False) # check and print with open(output_path, 'r') as f: print(f.read()) else: print(f"Writing to {output_path}") # File does not exist or is empty, write with headers df = pd.DataFrame([row_data], columns=header_names) df.to_csv(output_path, mode='w', header=True, index=False) def sample_and_display(topic): # If a topic is selected, use it to sample a new entry global info_dict display_dict, info_dict = sample_random_entry(topic=topic) if topic else sample_random_entry() question_text = display_dict['qa'] evaluation_card_text = display_dict['card'] model_name = '' # Clear the model name completion_text = '' # Clear the completion text return question_text, evaluation_card_text, model_name, completion_text def evaluate_guess(reasoning, correctness, confidence, topic): global info_dict # Here your logic will go to evaluate the guess # Placeholder for the correct logic to determine the correct answer correct_answer = 'Correctly' if info_dict['correctness'] else 'Incorrectly' evaluation_response = "Correct" if correctness == correct_answer else "Incorrect" # Assuming info_dict is updated by sample_and_display function actual_model = info_dict.get('model', 'Unknown Model') actual_completion = info_dict.get('completion', 'No completion available.') # Update the completion text completion_text = f"Completion: {actual_completion}\n\nChoice: {chr(info_dict.get('verdict', 0) + 65)}" question_index = info_dict.get('index', -1) question_topic = topic output_path = f'responses/mmlu/{question_topic}/response.csv' entry = dict() entry['index'] = question_index entry['model'] = actual_model entry['reasoning'] = reasoning entry['correctness'] = correctness == correct_answer entry['confidence'] = confidence header_names = ['index', 'model', 'reasoning', 'correctness', 'confidence'] # Add other headers as necessary append_to_csv(output_path, entry, header_names) return evaluation_response, actual_model, completion_text # Initial sampling initial_topic = TOPICS['mmlu'][0] # Assuming TOPICS is a list of topics question_text, evaluation_card_text, model_name, completion_text = sample_and_display(initial_topic) with gr.Blocks() as app: topic = gr.Dropdown(choices=TOPICS['mmlu'], label="Select Topic", value=initial_topic) with gr.Row(): with gr.Column(scale=2): evaluation_card = gr.Textbox(value=evaluation_card_text, label="Evaluation Card", interactive=False) model = gr.Textbox(value=model_name, label="Model", interactive=False) completion = gr.Textbox(value=completion_text, label="Model's Completion", interactive=False) with gr.Column(scale=1): question = gr.Textbox(value=question_text, label="Question", interactive=False) reasoning = gr.Textbox(lines=5, placeholder="Your reasoning (optional)") correctness = gr.Radio(choices=["Correct", "Incorrect"], label="I believe the model will answer this question") confidence = gr.Slider(minimum=0, maximum=10, step=1, label="Confidence") output_text = gr.Text(label="Evaluation Output") submit_button = gr.Button("Submit") next_button = gr.Button("Next Entry") submit_button.click(fn=evaluate_guess, inputs=[reasoning, correctness, confidence, topic], outputs=[output_text, model, completion]) next_button.click(fn=sample_and_display, inputs=[topic], outputs=[question, evaluation_card, model, completion]) app.launch()