File size: 1,385 Bytes
f98b171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcc2472
f98b171
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
import json

import tyro
import pandas as pd

TASK_METRICS = {
    "arc_challenge": "acc_norm",
    "hellaswag": "acc_norm",
    "truthfulqa_mc": "mc2",
}

TASK_SHORT_NAMES = {
    "arc_challenge": "arc",
    "hellaswag": "hellaswag",
    "truthfulqa_mc": "truthfulqa",
}


def main(data_dir: str, out_file: str = "score.csv") -> None:
    """Aggregate results from lm-evaluation-harness into a CSV file.

    Args:
        data_dir: The directory containing the results. Model names are
            expected to be the immediate subdirectories of `data_dir`.
        out_file: The path to the output CSV file. (Default: `score.csv`)
    """
    models = list(filter(lambda x: os.path.isdir(f"{data_dir}/{x}"), os.listdir(data_dir)))

    df = pd.DataFrame(columns=TASK_SHORT_NAMES.values())
    for model_dir in models:
        for task, metric in TASK_METRICS.items():
            model_name = "/".join(model_dir.split("--")[-2:])
            results = json.load(open(f"{data_dir}/{model_dir}/{task}.json"))
            df.loc[model_name, TASK_SHORT_NAMES[task]] = float(results["results"][task][metric]) * 100.0
    df = df.reset_index().rename(columns={"index": "model"})

    # Write the CSV file.
    if dirname := os.path.dirname(out_file):
        os.makedirs(dirname, exist_ok=True)
    df.to_csv(out_file, index=False)

if __name__ == "__main__":
    tyro.cli(main)