File size: 1,778 Bytes
cab81c9
 
 
 
 
 
 
 
 
866e1ae
cab81c9
 
 
 
 
 
 
 
 
 
 
866e1ae
 
 
cab81c9
 
 
866e1ae
 
cab81c9
 
 
 
866e1ae
 
 
cab81c9
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import json
import os
import pandas as pd
from glob import glob

eval_results_dir = "eval_results/"  # Directory containing evaluation results

def load_results():
    records = {}
    
    # Recursively find all JSON files in the eval_results directory
    json_files = glob(os.path.join(eval_results_dir, "**", "*.json"), recursive=True)
    
    for file_path in json_files:
        try:
            with open(file_path, "r") as f:
                data = json.load(f)
                model_name = data["config_general"].get("model_name", "Unknown")
                results = data.get("results", {})
                
                if model_name not in records:
                    records[model_name] = {}
                
                # Extract relevant evaluation metrics
                for task, task_data in results.items():
                    if "extractive_match" in task_data:
                        records[model_name][f"{task} (Match)"] = task_data["extractive_match"]
                        records[model_name][f"{task} (StdErr)"] = task_data["extractive_match_stderr"]
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
    
    # Convert to DataFrame
    df = pd.DataFrame.from_dict(records, orient="index").reset_index()
    df.rename(columns={"index": "Model"}, inplace=True)
    return df

def leaderboard():
    df = load_results()
    return df

with gr.Blocks() as demo:
    gr.Markdown("# πŸ“Š Evaluation Leaderboard")
    gr.Markdown("This leaderboard displays evaluation results from JSON files in `eval_results/`.")
    
    results_table = gr.Dataframe(leaderboard)
    refresh_button = gr.Button("πŸ”„ Refresh")
    refresh_button.click(leaderboard, outputs=[results_table])

demo.launch()