File size: 2,048 Bytes
1394a88
af2acd4
 
c671908
af2acd4
 
 
 
d658c8a
af2acd4
 
 
 
 
 
1edd506
af2acd4
 
1394a88
326ac2a
 
 
1edd506
 
 
1394a88
 
 
af2acd4
 
1394a88
2f16764
af2acd4
2f16764
 
 
 
 
 
 
c671908
 
1edd506
1394a88
 
c671908
1394a88
c671908
 
 
 
 
 
1edd506
c671908
1394a88
1edd506
1394a88
1edd506
 
1394a88
c671908
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import copy
from dataclasses import dataclass

import streamlit as st
from huggingface_hub import DatasetFilter, HfApi
from huggingface_hub.hf_api import DatasetInfo


@dataclass(frozen=True, eq=True)
class EvaluationInfo:
    task: str
    model: str
    dataset_name: str
    dataset_config: str
    dataset_split: str
    metrics: set


def create_evaluation_info(dataset_info: DatasetInfo) -> int:
    if dataset_info.cardData is not None:
        metadata = dataset_info.cardData["eval_info"]
        metadata.pop("col_mapping", None)
        # TODO(lewtun): populate dataset cards with metric info
        if "metrics" not in metadata:
            metadata["metrics"] = frozenset()
        else:
            metadata["metrics"] = frozenset(metadata["metrics"])
        return EvaluationInfo(**metadata)


def get_evaluation_infos():
    evaluation_datasets = []
    filt = DatasetFilter(author="autoevaluate")
    autoevaluate_datasets = HfApi().list_datasets(filter=filt, full=True)
    for dset in autoevaluate_datasets:
        try:
            evaluation_datasets.append(create_evaluation_info(dset))
        except Exception as e:
            print(f"Error processing dataset {dset}: {e}")
    return evaluation_datasets


def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
    evaluation_infos = get_evaluation_infos()
    models_to_filter = copy.copy(models)

    for model in models_to_filter:
        evaluation_info = EvaluationInfo(
            task=task,
            model=model,
            dataset_name=dataset_name,
            dataset_config=dataset_config,
            dataset_split=dataset_split,
            metrics=frozenset(metrics),
        )
        if evaluation_info in evaluation_infos:
            st.info(
                f"Model [`{model}`](https://huggingface.co/{model}) has already been evaluated on this configuration. \
                    This model will be excluded from the evaluation job..."
            )
            models.remove(model)

    return models