Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
from huggingface_hub import HfApi, hf_hub_download | |
from huggingface_hub.repocard import metadata_load | |
import pandas as pd | |
METRICS_TO_NOT_DISPLAY = set(["ser"]) | |
NO_LANGUAGE_MODELS = [] | |
api = HfApi() | |
models = api.list_models(filter="robust-speech-event") | |
model_ids = [x.modelId for x in models] | |
metadatas = {} | |
for model_id in model_ids: | |
readme_path = hf_hub_download(model_id, filename="README.md") | |
metadatas[model_id] = metadata_load(readme_path) | |
all_model_results = {} | |
# model_id | |
# - dataset | |
# - metric | |
model_language_map = {} | |
# model_id: lang | |
for model_id, metadata in metadatas.items(): | |
if "language" not in metadata: | |
NO_LANGUAGE_MODELS.append(model_id) | |
continue | |
lang = metadata["language"] | |
model_language_map[model_id] = lang if isinstance(lang, list) else [lang] | |
if "model-index" not in metadata: | |
all_model_results[model_id] = None | |
else: | |
result_dict = {} | |
for result in metadata["model-index"][0]["results"]: | |
dataset = result["dataset"]["type"] | |
metrics = [x["type"] for x in result["metrics"]] | |
values = [x["value"] if "value" in x else None for x in result["metrics"]] | |
result_dict[dataset] = {k: v for k, v in zip(metrics, values)} | |
all_model_results[model_id] = result_dict | |
# get all datasets | |
all_datasets = set(sum([list(x.keys()) for x in all_model_results.values() if x is not None], [])) | |
all_langs = set(sum(list(model_language_map.values()), [])) | |
# get all metrics | |
all_metrics = [] | |
for metric_result in all_model_results.values(): | |
if metric_result is not None: | |
all_metrics += sum([list(x.keys()) for x in metric_result.values()], []) | |
all_metrics = set(all_metrics) - METRICS_TO_NOT_DISPLAY | |
# get results table (one table for each dataset, metric) | |
all_datasets_results = {} | |
pandas_datasets = {} | |
for dataset in all_datasets: | |
all_datasets_results[dataset] = {} | |
pandas_datasets[dataset] = {} | |
for metric in all_metrics: | |
all_datasets_results[dataset][metric] = {} | |
pandas_datasets[dataset][metric] = {} | |
for lang in all_langs: | |
all_datasets_results[dataset][metric][lang] = {} | |
results = {} | |
for model_id, model_result in all_model_results.items(): | |
is_relevant = lang in model_language_map[model_id] and model_result is not None and dataset in model_result and metric in model_result[dataset] | |
if not is_relevant: | |
continue | |
result = model_result[dataset][metric] | |
if isinstance(result, str): | |
"".join(result.split("%")) | |
try: | |
result = float(result) | |
except: | |
result = None | |
elif isinstance(result, float) and result < 1.0: | |
# assuming that WER is given in 0.13 format | |
result = 100 * result | |
results[model_id] = round(result, 2) if result is not None else None | |
results = dict(sorted(results.items(), key=lambda item: (item[1] is None, item[1]))) | |
all_datasets_results[dataset][metric][lang] = [f"{k}: {v}" for k, v in results.items()] | |
data = all_datasets_results[dataset][metric] | |
data_frame = pd.DataFrame.from_dict(data, orient="index") | |
data_frame.fillna("", inplace=True) | |
pandas_datasets[dataset][metric] = data_frame | |