rusticluftig commited on
Commit
4a5c2b7
1 Parent(s): 8c72a30

Add table for benchmarks

Browse files
Files changed (1) hide show
  1. app.py +27 -6
app.py CHANGED
@@ -27,6 +27,7 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/Ra
27
  EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
28
  EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
29
  VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
 
30
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
31
  API = HfApi(token=H4_TOKEN)
32
  WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
@@ -157,14 +158,14 @@ def is_floatable(x) -> bool:
157
  ) or isinstance(x, int)
158
 
159
 
160
- def get_wandb_runs() -> List:
161
  """Get the latest runs from Wandb, retrying infinitely until we get them."""
162
  while True:
163
  api = wandb.Api(api_key=WANDB_TOKEN)
164
  runs = list(
165
  api.runs(
166
- VALIDATOR_WANDB_PROJECT,
167
- filters={"config.type": "validator", "config.uid": 238},
168
  )
169
  )
170
  if len(runs) > 0:
@@ -286,6 +287,18 @@ def leaderboard_data(
286
  for c in leaderboard
287
  if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
288
  ]
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
 
291
  def restart_space():
@@ -298,9 +311,9 @@ def main():
298
  model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
299
  model_data.sort(key=lambda x: x.incentive, reverse=True)
300
 
301
- wandb_runs = get_wandb_runs()
302
 
303
- scores = get_scores([x.uid for x in model_data], wandb_runs)
304
 
305
  current_block = metagraph.block.item()
306
  next_epoch_block = next_epoch(subtensor, current_block)
@@ -309,6 +322,8 @@ def main():
309
  weight_keys = set()
310
  for uid, stats in validator_df.items():
311
  weight_keys.update(stats[-1].keys())
 
 
312
 
313
  demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
314
  with demo:
@@ -326,6 +341,12 @@ def main():
326
  },
327
  num_top_classes=10,
328
  )
 
 
 
 
 
 
329
 
330
  with gr.Accordion("Evaluation Stats"):
331
  gr.HTML(EVALUATION_HEADER)
@@ -346,7 +367,7 @@ def main():
346
  )
347
 
348
  gr.LinePlot(
349
- get_losses_over_time(wandb_runs),
350
  x="timestamp",
351
  x_title="Date",
352
  y="best_loss",
 
27
  EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
28
  EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
29
  VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
30
+ BENCHMARK_WANDB_PROJECT = "raofoundation/pretraining-leaderboard-data"
31
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
32
  API = HfApi(token=H4_TOKEN)
33
  WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
 
158
  ) or isinstance(x, int)
159
 
160
 
161
+ def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
162
  """Get the latest runs from Wandb, retrying infinitely until we get them."""
163
  while True:
164
  api = wandb.Api(api_key=WANDB_TOKEN)
165
  runs = list(
166
  api.runs(
167
+ project,
168
+ filters=filters,
169
  )
170
  )
171
  if len(runs) > 0:
 
287
  for c in leaderboard
288
  if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
289
  ]
290
+
291
+ def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
292
+ """Returns the latest benchmarks and the time they were run."""
293
+ runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
294
+ for run in runs:
295
+ artifacts = list(run.logged_artifacts())
296
+ if artifacts:
297
+ table = artifacts[-1].get("benchmarks")
298
+ if table:
299
+ return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
300
+ bt.logging.error("Failed to get benchmarks from Wandb.")
301
+ return None, None
302
 
303
 
304
  def restart_space():
 
311
  model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
312
  model_data.sort(key=lambda x: x.incentive, reverse=True)
313
 
314
+ vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
315
 
316
+ scores = get_scores([x.uid for x in model_data], vali_runs)
317
 
318
  current_block = metagraph.block.item()
319
  next_epoch_block = next_epoch(subtensor, current_block)
 
322
  weight_keys = set()
323
  for uid, stats in validator_df.items():
324
  weight_keys.update(stats[-1].keys())
325
+
326
+ benchmarks, benchmark_timestamp = get_benchmarks()
327
 
328
  demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
329
  with demo:
 
341
  },
342
  num_top_classes=10,
343
  )
344
+
345
+ if benchmarks is not None:
346
+ with gr.Accordion("Top Model Benchmarks"):
347
+ gr.components.Dataframe(benchmarks)
348
+ gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/RaoFoundation/pretraining/blob/main/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
349
+ gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
350
 
351
  with gr.Accordion("Evaluation Stats"):
352
  gr.HTML(EVALUATION_HEADER)
 
367
  )
368
 
369
  gr.LinePlot(
370
+ get_losses_over_time(vali_runs),
371
  x="timestamp",
372
  x_title="Date",
373
  y="best_loss",