lewtun HF staff commited on
Commit
1394a88
1 Parent(s): 4677a77

Refactor evaluation logic

Browse files
Files changed (1) hide show
  1. evaluation.py +14 -15
evaluation.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from dataclasses import dataclass
2
 
3
  import streamlit as st
@@ -5,7 +6,7 @@ from huggingface_hub import DatasetFilter, HfApi
5
  from huggingface_hub.hf_api import DatasetInfo
6
 
7
 
8
- @dataclass(frozen=True, eq=True)
9
  class EvaluationInfo:
10
  task: str
11
  model: str
@@ -15,30 +16,29 @@ class EvaluationInfo:
15
  metrics: set
16
 
17
 
18
- def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
19
  if dataset_info.cardData is not None:
20
  metadata = dataset_info.cardData["eval_info"]
21
  metadata.pop("col_mapping", None)
22
  # TODO(lewtun): populate dataset cards with metric info
23
  if "metrics" not in metadata:
24
  metadata["metrics"] = frozenset()
25
- metadata["metrics"] = frozenset(metadata["metrics"])
26
- evaluation_info = EvaluationInfo(**metadata)
27
- return hash(evaluation_info)
28
- else:
29
- return None
30
 
31
 
32
- def get_evaluation_ids():
33
  filt = DatasetFilter(author="autoevaluate")
34
  evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
35
- return [compute_evaluation_id(dset) for dset in evaluation_datasets]
36
 
37
 
38
  def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
39
- evaluation_ids = get_evaluation_ids()
 
40
 
41
- for idx, model in enumerate(models):
42
  evaluation_info = EvaluationInfo(
43
  task=task,
44
  model=model,
@@ -47,12 +47,11 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
47
  dataset_split=dataset_split,
48
  metrics=frozenset(metrics),
49
  )
50
- candidate_id = hash(evaluation_info)
51
- if candidate_id in evaluation_ids:
52
  st.info(
53
- f"Model `{model}` has already been evaluated on this configuration. \
54
  This model will be excluded from the evaluation job..."
55
  )
56
- models.pop(idx)
57
 
58
  return models
1
+ import copy
2
  from dataclasses import dataclass
3
 
4
  import streamlit as st
6
  from huggingface_hub.hf_api import DatasetInfo
7
 
8
 
9
+ @dataclass(frozen=True, eq=True, unsafe_hash=True)
10
  class EvaluationInfo:
11
  task: str
12
  model: str
16
  metrics: set
17
 
18
 
19
+ def create_evaluation_info(dataset_info: DatasetInfo) -> int:
20
  if dataset_info.cardData is not None:
21
  metadata = dataset_info.cardData["eval_info"]
22
  metadata.pop("col_mapping", None)
23
  # TODO(lewtun): populate dataset cards with metric info
24
  if "metrics" not in metadata:
25
  metadata["metrics"] = frozenset()
26
+ else:
27
+ metadata["metrics"] = frozenset(metadata["metrics"])
28
+ return EvaluationInfo(**metadata)
 
 
29
 
30
 
31
+ def get_evaluation_infos():
32
  filt = DatasetFilter(author="autoevaluate")
33
  evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
34
+ return [create_evaluation_info(dset) for dset in evaluation_datasets]
35
 
36
 
37
  def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
38
+ evaluation_infos = get_evaluation_infos()
39
+ models_to_filter = copy.copy(models)
40
 
41
+ for model in models_to_filter:
42
  evaluation_info = EvaluationInfo(
43
  task=task,
44
  model=model,
47
  dataset_split=dataset_split,
48
  metrics=frozenset(metrics),
49
  )
50
+ if evaluation_info in evaluation_infos:
 
51
  st.info(
52
+ f"Model [`{model}`](https://huggingface.co/{model}) has already been evaluated on this configuration. \
53
  This model will be excluded from the evaluation job..."
54
  )
55
+ models.remove(model)
56
 
57
  return models