Quentin GallouΓ©dec commited on
Commit
6d58c89
β€’
1 Parent(s): a925279
Files changed (2) hide show
  1. app.py +19 -8
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import json
2
  import os
3
 
@@ -6,12 +7,12 @@ import numpy as np
6
  import pandas as pd
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
  from huggingface_hub import HfApi
 
 
9
 
10
  from src.backend import backend_routine
11
- import glob
12
  from src.logging import configure_root_logger, setup_logger
13
 
14
-
15
  configure_root_logger()
16
  logger = setup_logger(__name__)
17
 
@@ -119,6 +120,14 @@ ALL_ENV_IDS = {
119
  }
120
 
121
 
 
 
 
 
 
 
 
 
122
  def get_leaderboard_df():
123
  dir_path = API.snapshot_download(repo_id=RESULTS_REPO, repo_type="dataset")
124
  pattern = os.path.join(dir_path, "**", "results_*.json")
@@ -134,7 +143,7 @@ def get_leaderboard_df():
134
  env_ids = list(report["results"].keys())
135
  assert len(env_ids) == 1, "Only one environment supported for the moment"
136
  row["env_id"] = env_ids[0]
137
- row["mean_episodic_return"] = np.mean(report["results"][env_ids[0]]["episodic_returns"])
138
  data.append(row)
139
 
140
  df = pd.DataFrame(data) # create DataFrame
@@ -144,7 +153,7 @@ def get_leaderboard_df():
144
 
145
  def select_env(df: pd.DataFrame, env_id: str):
146
  df = df[df["env_id"] == env_id]
147
- df = df.sort_values("mean_episodic_return", ascending=False)
148
  df["ranking"] = np.arange(1, len(df) + 1)
149
  return df
150
 
@@ -159,7 +168,7 @@ def format_df(df: pd.DataFrame):
159
  df.loc[index, "model_id"] = f"[{model_id}](https://huggingface.co/{user_id}/{model_id})"
160
 
161
  # Keep only the relevant columns
162
- df = df[["ranking", "user_id", "model_id", "mean_episodic_return"]]
163
  return df.values.tolist()
164
 
165
 
@@ -201,9 +210,11 @@ def refresh_winners():
201
  # # Or in HTML:
202
  # outputs.append(f'<h3>πŸ† <a href="https://huggingface.co/{model}">{model}</a> πŸ†</h3>')
203
  else:
204
- outputs.append(f"""## {env_id}
 
205
 
206
- ### πŸ€·β€β™‚οΈ No winner yet""")
 
207
  return outputs
208
 
209
 
@@ -318,7 +329,7 @@ with gr.Blocks(css=css) as demo:
318
  with gr.Column(scale=3):
319
  # Display the leaderboard
320
  gr_df = gr.components.Dataframe(
321
- headers=["πŸ†", "πŸ§‘ User", "πŸ€– Model id", "πŸ“Š Mean episodic return"],
322
  datatype=["number", "markdown", "markdown", "number"],
323
  row_count=(20, "fixed"),
324
  )
 
1
+ import glob
2
  import json
3
  import os
4
 
 
7
  import pandas as pd
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from huggingface_hub import HfApi
10
+ from rliable import library as rly
11
+ from rliable import metrics
12
 
13
  from src.backend import backend_routine
 
14
  from src.logging import configure_root_logger, setup_logger
15
 
 
16
  configure_root_logger()
17
  logger = setup_logger(__name__)
18
 
 
120
  }
121
 
122
 
123
+ def iqm(x):
124
+ score_dict = {"a": np.expand_dims(np.array(x), 1)}
125
+ aggregate_func = lambda x: np.array([metrics.aggregate_iqm(x)])
126
+ aggregate_scores, aggregate_score_cis = rly.get_interval_estimates(score_dict, aggregate_func, reps=50000)
127
+ iqm, low, high = aggregate_scores["a"][0], aggregate_score_cis["a"][0][0], aggregate_score_cis["a"][1][0]
128
+ return iqm
129
+
130
+
131
  def get_leaderboard_df():
132
  dir_path = API.snapshot_download(repo_id=RESULTS_REPO, repo_type="dataset")
133
  pattern = os.path.join(dir_path, "**", "results_*.json")
 
143
  env_ids = list(report["results"].keys())
144
  assert len(env_ids) == 1, "Only one environment supported for the moment"
145
  row["env_id"] = env_ids[0]
146
+ row["iqm_episodic_return"] = iqm(report["results"][env_ids[0]]["episodic_returns"])
147
  data.append(row)
148
 
149
  df = pd.DataFrame(data) # create DataFrame
 
153
 
154
  def select_env(df: pd.DataFrame, env_id: str):
155
  df = df[df["env_id"] == env_id]
156
+ df = df.sort_values("iqm_episodic_return", ascending=False)
157
  df["ranking"] = np.arange(1, len(df) + 1)
158
  return df
159
 
 
168
  df.loc[index, "model_id"] = f"[{model_id}](https://huggingface.co/{user_id}/{model_id})"
169
 
170
  # Keep only the relevant columns
171
+ df = df[["ranking", "user_id", "model_id", "iqm_episodic_return"]]
172
  return df.values.tolist()
173
 
174
 
 
210
  # # Or in HTML:
211
  # outputs.append(f'<h3>πŸ† <a href="https://huggingface.co/{model}">{model}</a> πŸ†</h3>')
212
  else:
213
+ outputs.append(
214
+ f"""## {env_id}
215
 
216
+ ### πŸ€·β€β™‚οΈ No winner yet"""
217
+ )
218
  return outputs
219
 
220
 
 
329
  with gr.Column(scale=3):
330
  # Display the leaderboard
331
  gr_df = gr.components.Dataframe(
332
+ headers=["πŸ†", "πŸ§‘ User", "πŸ€– Model id", "πŸ“Š IQM episodic return"],
333
  datatype=["number", "markdown", "markdown", "number"],
334
  row_count=(20, "fixed"),
335
  )
requirements.txt CHANGED
@@ -12,6 +12,7 @@ numpy==1.24.2
12
  pandas==2.0.0
13
  python-dateutil==2.8.2
14
  requests==2.28.2
 
15
  torch==2.2.2
16
  tqdm==4.65.0
17
 
 
12
  pandas==2.0.0
13
  python-dateutil==2.8.2
14
  requests==2.28.2
15
+ rliable==1.0.8
16
  torch==2.2.2
17
  tqdm==4.65.0
18