Spaces:
Running
Running
File size: 1,385 Bytes
f98b171 dcc2472 f98b171 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import os
import json
import tyro
import pandas as pd
TASK_METRICS = {
"arc_challenge": "acc_norm",
"hellaswag": "acc_norm",
"truthfulqa_mc": "mc2",
}
TASK_SHORT_NAMES = {
"arc_challenge": "arc",
"hellaswag": "hellaswag",
"truthfulqa_mc": "truthfulqa",
}
def main(data_dir: str, out_file: str = "score.csv") -> None:
"""Aggregate results from lm-evaluation-harness into a CSV file.
Args:
data_dir: The directory containing the results. Model names are
expected to be the immediate subdirectories of `data_dir`.
out_file: The path to the output CSV file. (Default: `score.csv`)
"""
models = list(filter(lambda x: os.path.isdir(f"{data_dir}/{x}"), os.listdir(data_dir)))
df = pd.DataFrame(columns=TASK_SHORT_NAMES.values())
for model_dir in models:
for task, metric in TASK_METRICS.items():
model_name = "/".join(model_dir.split("--")[-2:])
results = json.load(open(f"{data_dir}/{model_dir}/{task}.json"))
df.loc[model_name, TASK_SHORT_NAMES[task]] = float(results["results"][task][metric]) * 100.0
df = df.reset_index().rename(columns={"index": "model"})
# Write the CSV file.
if dirname := os.path.dirname(out_file):
os.makedirs(dirname, exist_ok=True)
df.to_csv(out_file, index=False)
if __name__ == "__main__":
tyro.cli(main)
|