rusticluftig commited on
Commit
a2dcddd
1 Parent(s): 17bb6e0

First pass at a working leaderboard

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. app.py +7 -9
  3. competitions.py +23 -0
  4. requirements.txt +2 -1
  5. utils.py +47 -18
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv
2
+ __pycache__/
3
+ .env
app.py CHANGED
@@ -9,6 +9,7 @@ from dotenv import load_dotenv
9
  from huggingface_hub import HfApi
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
 
 
12
  import utils
13
 
14
  FONT = (
@@ -20,11 +21,6 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/ma
20
  EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
21
  EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
22
 
23
- # A map of competition IDs to HTML descriptions.
24
- COMPETITION_DETAILS: Dict[int, str] = {
25
- 1: """<b>Competition ID 1:</b> Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
26
- }
27
-
28
  HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
29
  SECONDS_PER_BLOCK = 12
30
 
@@ -93,8 +89,9 @@ def main():
93
  show_stale = gr.Checkbox(label="Show Stale", interactive=True)
94
  competition_leaderboards = []
95
  # TODO: Dynamically generate per-competition leaderboards based on model_data.
96
- with gr.Accordion("Finetuned SN9 competition"):
97
- gr.HTML(COMPETITION_DETAILS[1])
 
98
  competition_leaderboards.append(gr.components.Dataframe(
99
  value=utils.leaderboard_data(model_data, scores, show_stale.value),
100
  headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
@@ -110,13 +107,14 @@ def main():
110
  outputs=competition_leaderboards,
111
  )
112
 
 
113
  gr.LinePlot(
114
  utils.get_losses_over_time(vali_runs),
115
  x="timestamp",
116
  x_title="Date",
117
- y="best_loss",
118
  y_title="Average Loss",
119
- tooltip="best_loss",
120
  interactive=True,
121
  visible=True,
122
  width=1024,
 
9
  from huggingface_hub import HfApi
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
 
12
+ import competitions
13
  import utils
14
 
15
  FONT = (
 
21
  EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
22
  EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
23
 
 
 
 
 
 
24
  HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
25
  SECONDS_PER_BLOCK = 12
26
 
 
89
  show_stale = gr.Checkbox(label="Show Stale", interactive=True)
90
  competition_leaderboards = []
91
  # TODO: Dynamically generate per-competition leaderboards based on model_data.
92
+ competition_details = competitions.COMPETITION_DETAILS[1]
93
+ with gr.Accordion(f"{competition_details.name} competition"):
94
+ gr.HTML(competition_details.html_description)
95
  competition_leaderboards.append(gr.components.Dataframe(
96
  value=utils.leaderboard_data(model_data, scores, show_stale.value),
97
  headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
 
107
  outputs=competition_leaderboards,
108
  )
109
 
110
+ # TODO: Make this a multi-competition line plot
111
  gr.LinePlot(
112
  utils.get_losses_over_time(vali_runs),
113
  x="timestamp",
114
  x_title="Date",
115
+ y="SN9_MODEL",
116
  y_title="Average Loss",
117
+ tooltip="SN9_MODEL",
118
  interactive=True,
119
  visible=True,
120
  width=1024,
competitions.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ from dataclasses import dataclass
5
+ from typing import Dict
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class CompetitionDetails:
10
+ # The display name of the competition.
11
+ name: str
12
+
13
+ # The HTML description of the competition.
14
+ html_description: str
15
+
16
+
17
+ # A map of competition IDs to HTML descriptions.
18
+ COMPETITION_DETAILS: Dict[int, CompetitionDetails] = {
19
+ 1: CompetitionDetails(
20
+ name="SN9_MODEL",
21
+ html_description="""<b>Competition ID 1</b><br/>Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
22
+ )
23
+ }
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  bittensor
2
  requests
3
- wandb
 
4
  python-dotenv
5
  APScheduler
6
  huggingface-hub
 
1
  bittensor
2
  requests
3
+ wandb==0.17.1
4
+ numpy==1.26.4
5
  python-dotenv
6
  APScheduler
7
  huggingface-hub
utils.py CHANGED
@@ -6,7 +6,9 @@ import math
6
  import os
7
  import time
8
  import traceback
 
9
  from dataclasses import dataclass
 
10
  from typing import Any, Dict, List, Optional, Tuple
11
 
12
  import bittensor as bt
@@ -15,6 +17,9 @@ import pandas as pd
15
  import wandb
16
  from bittensor.extrinsics.serving import get_metadata
17
  from dotenv import load_dotenv
 
 
 
18
 
19
  # TODO: Update once registered
20
  NETUID = 179
@@ -235,24 +240,48 @@ def get_validator_weights(
235
  def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
236
  """Returns a dataframe of the best average model loss over time."""
237
  timestamps = []
238
- best_losses = []
239
-
240
  for run in wandb_runs:
241
- if "original_format_json" not in run.summary:
242
- continue
243
- data = json.loads(run.summary["original_format_json"])
244
- all_uid_data = data["uid_data"]
245
- timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
246
- best_loss = math.inf
247
- for _, uid_data in all_uid_data.items():
248
- loss = uid_data.get("average_loss", math.inf)
249
- if loss < best_loss:
250
- best_loss = uid_data["average_loss"]
251
- if best_loss != math.inf:
252
- timestamps.append(timestamp)
253
- best_losses.append(best_loss)
254
-
255
- return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
 
258
  def next_epoch(subtensor: bt.subtensor, block: int) -> int:
@@ -384,7 +413,7 @@ def load_state_vars() -> dict[Any]:
384
  vali_runs = get_wandb_runs(
385
  project=VALIDATOR_WANDB_PROJECT,
386
  # TODO: Update to point to the OTF vali on finetuning
387
- filters={"config.type": "validator", "config.uid": 238},
388
  )
389
 
390
  scores = get_scores([x.uid for x in model_data], vali_runs)
 
6
  import os
7
  import time
8
  import traceback
9
+ from collections import defaultdict
10
  from dataclasses import dataclass
11
+ from email.policy import default
12
  from typing import Any, Dict, List, Optional, Tuple
13
 
14
  import bittensor as bt
 
17
  import wandb
18
  from bittensor.extrinsics.serving import get_metadata
19
  from dotenv import load_dotenv
20
+ from wandb.apis.public.history import HistoryScan
21
+
22
+ import competitions
23
 
24
  # TODO: Update once registered
25
  NETUID = 179
 
240
  def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
241
  """Returns a dataframe of the best average model loss over time."""
242
  timestamps = []
243
+ datapoints_per_comp_id = {id: [] for id in competitions.COMPETITION_DETAILS}
244
+
245
  for run in wandb_runs:
246
+ # For each run, check the 10 most recent steps.
247
+ best_loss_per_competition_id = defaultdict(lambda: math.inf)
248
+ should_add_datapoint = False
249
+ min_step = max(0, run.lastHistoryStep - 10)
250
+ history_scan = HistoryScan(
251
+ run.client, run, min_step, run.lastHistoryStep, page_size=10
252
+ )
253
+ max_timestamp = None
254
+ for step in history_scan:
255
+ if "original_format_json" not in step:
256
+ continue
257
+ data = json.loads(step["original_format_json"])
258
+ all_uid_data = data["uid_data"]
259
+ timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
260
+ if max_timestamp is None:
261
+ max_timestamp = timestamp
262
+ max_timestamp = max(max_timestamp, timestamp)
263
+
264
+ for _, uid_data in all_uid_data.items():
265
+ loss = uid_data.get("average_loss", math.inf)
266
+ competition_id = uid_data.get("competition_id", None)
267
+ if not competition_id:
268
+ continue
269
+
270
+ if loss < best_loss_per_competition_id[competition_id]:
271
+ best_loss_per_competition_id[competition_id] = uid_data["average_loss"]
272
+ should_add_datapoint = True
273
+ # Now that we've processed the run's most recent steps, check if we should add a datapoint.
274
+ if should_add_datapoint:
275
+ timestamps.append(max_timestamp)
276
+ # Iterate through all possible competitions and add the best loss for each.
277
+ # Set None for any that aren't active during this run.
278
+ for id, losses in datapoints_per_comp_id.items():
279
+ losses.append(best_loss_per_competition_id.get(id, None))
280
+
281
+ # Create a dictionary of competitions to lists of losses.
282
+ output_columns = {competitions.COMPETITION_DETAILS[id].name: losses for id, losses in datapoints_per_comp_id.items()}
283
+
284
+ return pd.DataFrame({"timestamp": timestamps, **output_columns})
285
 
286
 
287
  def next_epoch(subtensor: bt.subtensor, block: int) -> int:
 
413
  vali_runs = get_wandb_runs(
414
  project=VALIDATOR_WANDB_PROJECT,
415
  # TODO: Update to point to the OTF vali on finetuning
416
+ filters={"config.type": "validator", "config.uid": 0},
417
  )
418
 
419
  scores = get_scores([x.uid for x in model_data], vali_runs)