David Pomerenke
Only run tasks for which there is no result yet
2f9dee1
raw
history blame
1.45 kB
import asyncio
import json
import numpy as np
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from languages import languages
from models import models
from tasks import tasks
# ===== config =====
n_sentences = 10
n_languages = 40
n_models = 25
# ===== run evaluation and aggregate results =====
async def evaluate():
print("running evaluations")
old_results = pd.read_json("results.json")
results = [
task(model, lang.bcp_47, i)
for task_name, task in tasks.items()
for i in range(n_sentences)
for lang in languages.iloc[:n_languages].itertuples()
for model in models["id"].iloc[:n_models]
if len(
old_results[
(old_results["model"] == model)
& (old_results["bcp_47"] == lang.bcp_47)
& (old_results["task"] == task_name)
& (old_results["sentence_nr"] == i)
]
)
== 0
]
results = await tqdm_asyncio.gather(*results, miniters=1)
results = [r for group in results for r in group]
results = pd.DataFrame(results)
results = pd.concat([old_results, results])
args = dict(orient="records", indent=2, force_ascii=False)
results.to_json("results.json", **args)
pd.DataFrame(models).to_json("models.json", **args)
pd.DataFrame(languages).to_json("languages.json", **args)
if __name__ == "__main__":
results = asyncio.run(evaluate())