Tristan Thrush commited on
Commit
888432c
1 Parent(s): 79668b2

added enforcement for known metric ranges

Browse files
Files changed (3) hide show
  1. app.py +13 -2
  2. ascending_metrics.py +0 -10
  3. utils.py +38 -0
app.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
  import streamlit as st
3
  from huggingface_hub import HfApi, hf_hub_download
4
  from huggingface_hub.repocard import metadata_load
5
- from ascending_metrics import ascending_metrics
6
  import numpy as np
7
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
8
  from os.path import exists
@@ -46,6 +46,7 @@ def parse_metrics_rows(meta, only_verified=False):
46
  if "config" in result["dataset"]:
47
  row["config"] = result["dataset"]["config"]
48
  no_results = True
 
49
  for metric in result["metrics"]:
50
  name = metric["type"].lower().strip()
51
 
@@ -64,10 +65,16 @@ def parse_metrics_rows(meta, only_verified=False):
64
  if "verified" in metric and metric["verified"]:
65
  no_results = False
66
  row[name] = value
 
 
 
67
  else:
68
  no_results = False
69
  row[name] = value
70
- if no_results:
 
 
 
71
  continue
72
  yield row
73
 
@@ -199,6 +206,10 @@ if len(dataset_df) > 0:
199
  "Want to beat the leaderboard? Don't see your model here? Simply request an automatic evaluation [here](https://huggingface.co/spaces/autoevaluate/model-evaluator)."
200
  )
201
 
 
 
 
 
202
  # Make the default metric appear right after model names
203
  cols = dataset_df.columns.tolist()
204
  cols.remove(sorting_metric)
2
  import streamlit as st
3
  from huggingface_hub import HfApi, hf_hub_download
4
  from huggingface_hub.repocard import metadata_load
5
+ from utils import ascending_metrics, metric_ranges
6
  import numpy as np
7
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
8
  from os.path import exists
46
  if "config" in result["dataset"]:
47
  row["config"] = result["dataset"]["config"]
48
  no_results = True
49
+ incorrect_results = False
50
  for metric in result["metrics"]:
51
  name = metric["type"].lower().strip()
52
 
65
  if "verified" in metric and metric["verified"]:
66
  no_results = False
67
  row[name] = value
68
+ if name in metric_ranges:
69
+ if value < metric_ranges[name][0] or value > metric_ranges[name][1]:
70
+ incorrect_results = True
71
  else:
72
  no_results = False
73
  row[name] = value
74
+ if name in metric_ranges:
75
+ if value < metric_ranges[name][0] or value > metric_ranges[name][1]:
76
+ incorrect_results = True
77
+ if no_results or incorrect_results:
78
  continue
79
  yield row
80
 
206
  "Want to beat the leaderboard? Don't see your model here? Simply request an automatic evaluation [here](https://huggingface.co/spaces/autoevaluate/model-evaluator)."
207
  )
208
 
209
+ st.markdown(
210
+ "Note: if you do not see your self-reported results here, ensure that your results are in the expected range for all metrics. E.g., accuracy is 0-1, not 0-100."
211
+ )
212
+
213
  # Make the default metric appear right after model names
214
  cols = dataset_df.columns.tolist()
215
  cols.remove(sorting_metric)
ascending_metrics.py DELETED
@@ -1,10 +0,0 @@
1
- ascending_metrics = {
2
- "wer",
3
- "cer",
4
- "loss",
5
- "mae",
6
- "mahalanobis",
7
- "mse",
8
- "perplexity",
9
- "ter",
10
- }
 
 
 
 
 
 
 
 
 
 
utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ascending_metrics = {
2
+ "wer",
3
+ "cer",
4
+ "loss",
5
+ "mae",
6
+ "mahalanobis",
7
+ "mse",
8
+ "perplexity",
9
+ "ter",
10
+ }
11
+
12
+ metric_ranges = {
13
+ "accuracy": (0,1),
14
+ "precision": (0,1),
15
+ "recall": (0,1),
16
+ "f1": (0,1),
17
+ "macro f1": (0,1),
18
+ "micro f1": (0,1),
19
+ "cer": (0,1),
20
+ "wer": (0,1),
21
+ "pearson": (-1, 1),
22
+ "matthews_correlation": (-1, 1),
23
+ "spearmanr": (-1, 1),
24
+ "google_bleu": (0, 1),
25
+ "precision@10": (0, 1),
26
+ "mae": (0, 1),
27
+ "mauve": (0, 1),
28
+ "frontier_integral": (0, 1),
29
+ "mean_iou": (0, 1),
30
+ "mean_accuracy": (0, 1),
31
+ "overall_accuracy": (0, 1),
32
+ "meteor": (0, 1),
33
+ "mse": (0, 1),
34
+ "perplexity": (0, float("inf")),
35
+ "rogue1": (0, 1),
36
+ "rogue2": (0, 1),
37
+ "sari": (0, 100),
38
+ }