import os import json import pandas as pd import sys # Add the path to api-results.py sys.path.append(os.path.abspath('data/api-results')) # Now import the API results from api_results import gpt4, gpt4o, gpt35turbo, claude_opus, gemini_15_pro, gemini_pro_1, gemini_15_flash from models_info import model_info directory = 'data/raw-eval-outputs' data = [] # Function to create a clickable hyperlink for the model name def model_hyperlink(link, model_name): return f'{model_name}' # Function to apply the hyperlink creation function to the DataFrame def make_clickable_names(df): df["Model"] = df.apply( lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1 ) return df # Iterate over all the files in the directory for filename in os.listdir(directory): if filename.endswith(".json"): filepath = os.path.join(directory, filename) with open(filepath, 'r') as f: json_data = json.load(f) model_name = filename.replace("_results.json", "") # Extract the accuracy values results = json_data['results'] row = {'Model': model_name} for key, value in results.items(): row[key] = round(value['acc,none'] * 100, 2) # Add the tuning type and link to the row row['T'] = model_info[model_name]['tuning'] row['Link'] = model_info[model_name]['link'] data.append(row) # Prepare the API results for integration api_models = { 'GPT-4': gpt4, 'GPT-4o': gpt4o, 'GPT-3.5 Turbo': gpt35turbo, 'Claude Opus': claude_opus, 'Gemini 1.5 Pro': gemini_15_pro, 'Gemini Pro 1': gemini_pro_1, 'Gemini 1.5 Flash': gemini_15_flash } for model_name, results in api_models.items(): row = { 'Model': model_name, 'b4bqa': round(results.get('b4bqa', 0) * 100, 2), 'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2), 'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2), 'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2), 'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2), 'T': model_info[model_name]['tuning'], 'Link': model_info[model_name]['link'] } data.append(row) # Create DataFrame from the collected data df = pd.DataFrame(data) df = make_clickable_names(df) df.drop(columns=["Link"], inplace=True) # Calculate differences between specific evaluation metrics df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2) df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2) # Reorder columns cols = [ "T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff" ] + [col for col in df.columns if col not in [ "T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff" ]] df = df[cols] # Save DataFrame to CSV output_csv = 'data/csv/models_data.csv' df.to_csv(output_csv, index=False) print(f"DataFrame saved to {output_csv}")