lewtun HF staff commited on
Commit
b727941
1 Parent(s): 212696e

Update leaderboard with real data

Browse files
Files changed (1) hide show
  1. app.py +39 -15
app.py CHANGED
@@ -4,42 +4,65 @@ from pathlib import Path
4
  import pandas as pd
5
  import requests
6
  import streamlit as st
 
7
  from dotenv import load_dotenv
8
 
9
  if Path(".env").is_file():
10
  load_dotenv(".env")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def download_submissions():
14
- auth_token = os.getenv("HF_HUB_TOKEN")
15
- header = {"Authorization": "Bearer " + auth_token}
16
  response = requests.get("http://huggingface.co/api/datasets", headers=header)
17
  all_datasets = response.json()
18
 
19
  submissions = []
20
 
21
  for dataset in all_datasets:
22
- is_benchmark = any([t for t in dataset["tags"] if t.split(":")[1] == "neelalex/raft-test"])
23
- if is_benchmark:
24
  submissions.append(dataset)
 
25
 
26
- for submission in submissions:
 
 
 
 
 
 
27
  submission_id = submission["id"]
28
  response = requests.get(
29
  f"http://huggingface.co/api/datasets/{submission_id}?full=true",
30
  headers=header,
31
  )
32
  data = response.json()
33
- submission_data = {"Team Name": [data["card_data"]["submission_dataset"].split("/")[0]]}
34
- for task in data["card_data"]["model-index"]:
35
- for task_name, splits in task.items():
36
- for split in splits:
37
- if split["split"] == "test":
38
- for metric in split["metrics"]:
39
- if metric["name"] == "f1":
40
- submission_data[task_name] = [metric["value"]]
 
 
 
 
41
 
42
- return pd.DataFrame(submission_data)
43
 
44
 
45
  ###########
@@ -47,6 +70,7 @@ def download_submissions():
47
  ###########
48
  st.set_page_config(layout="wide")
49
  st.title("RAFT Leaderboard")
50
- df = download_submissions()
 
51
  # hack to remove index column from https://github.com/streamlit/streamlit/issues/641
52
  st.table(df.assign(hack="").set_index("hack"))
4
  import pandas as pd
5
  import requests
6
  import streamlit as st
7
+ from datasets import get_dataset_config_names
8
  from dotenv import load_dotenv
9
 
10
  if Path(".env").is_file():
11
  load_dotenv(".env")
12
 
13
+ auth_token = os.getenv("HF_HUB_TOKEN")
14
+ header = {"Authorization": "Bearer " + auth_token}
15
+
16
+ TASKS = get_dataset_config_names("ought/raft")
17
+
18
+
19
+ def extract_tags(dataset):
20
+ tags = {}
21
+ for tag in dataset["tags"]:
22
+ k, v = tuple(tag.split(":", 1))
23
+ tags[k] = v
24
+ return tags
25
+
26
 
27
  def download_submissions():
 
 
28
  response = requests.get("http://huggingface.co/api/datasets", headers=header)
29
  all_datasets = response.json()
30
 
31
  submissions = []
32
 
33
  for dataset in all_datasets:
34
+ tags = extract_tags(dataset)
35
+ if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
36
  submissions.append(dataset)
37
+ return submissions
38
 
39
+
40
+ def format_submissions(submissions):
41
+ submission_data = {**{"Submission": []}, **{t: [] for t in TASKS}}
42
+
43
+ # TODO(lewtun): delete / filter all the junk repos from development
44
+ # The following picks the latest submissions which adhere to the model card schema
45
+ for submission in submissions[-2:]:
46
  submission_id = submission["id"]
47
  response = requests.get(
48
  f"http://huggingface.co/api/datasets/{submission_id}?full=true",
49
  headers=header,
50
  )
51
  data = response.json()
52
+ card_data = data["card_data"]
53
+ submission_name = card_data["submission_dataset"]
54
+ submission_data["Submission"].append(submission_name)
55
+
56
+ for task in card_data["results"]:
57
+ task_data = task["task"]
58
+ task_name = task_data["name"]
59
+ score = task_data["metrics"][0]["value"]
60
+ submission_data[task_name].append(score)
61
+
62
+ df = pd.DataFrame(submission_data)
63
+ df.insert(1, "Overall", df[TASKS].mean(axis=1))
64
 
65
+ return df
66
 
67
 
68
  ###########
70
  ###########
71
  st.set_page_config(layout="wide")
72
  st.title("RAFT Leaderboard")
73
+ submissions = download_submissions()
74
+ df = format_submissions(submissions)
75
  # hack to remove index column from https://github.com/streamlit/streamlit/issues/641
76
  st.table(df.assign(hack="").set_index("hack"))