Spaces:

RaoFoundation
/

pretraining-leaderboard

Runtime error

App Files Files Community

rusticluftig commited on Feb 9

Commit

8c72a30

•

1 Parent(s): b33f70b

Add a line plot of loss over time

Browse files

Files changed (1) hide show

app.py +75 -19

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import bittensor as bt
 from typing import Dict, List, Any, Optional, Tuple
 from bittensor.extrinsics.serving import get_metadata
 from dataclasses import dataclass
-import requests
 import wandb
 import math
 import os
@@ -16,6 +15,7 @@ import pandas as pd
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
 load_dotenv()
@@ -121,10 +121,12 @@ def get_subnet_data(
         hotkey = metagraph.hotkeys[uid]
         metadata = None
         try:
-            metadata = run_with_retries(functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey))
         except:
             print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
         if not metadata:
             continue
@@ -155,10 +157,8 @@ def is_floatable(x) -> bool:
     ) or isinstance(x, int)
-def get_scores(
-    uids: List[int],
-) -> Dict[int, Dict[str, Optional[float]]]:
-    runs = []
     while True:
         api = wandb.Api(api_key=WANDB_TOKEN)
         runs = list(
@@ -168,15 +168,20 @@ def get_scores(
             )
         )
         if len(runs) > 0:
-            break
         # WandDB API is quite unreliable. Wait another minute and try again.
         print("Failed to get runs from Wandb. Trying again in 60 seconds.")
         time.sleep(60)
     result = {}
     previous_timestamp = None
     # Iterate through the runs until we've processed all the uids.
-    for i, run in enumerate(runs):
         if not "original_format_json" in run.summary:
             continue
         data = json.loads(run.summary["original_format_json"])
@@ -208,6 +213,30 @@ def get_scores(
     return result
 def format_score(uid: int, scores, key) -> Optional[float]:
     if uid in scores:
         if key in scores[uid]:
@@ -218,9 +247,11 @@ def format_score(uid: int, scores, key) -> Optional[float]:
 def next_epoch(subtensor: bt.subtensor, block: int) -> int:
-    return block + subtensor.get_subnet_hyperparameters(
-        NETUID
-    ).tempo - subtensor.blocks_since_epoch(NETUID, block)
 def get_next_update_div(current_block: int, next_update_block: int) -> str:
@@ -232,9 +263,11 @@ def get_next_update_div(current_block: int, next_update_block: int) -> str:
     delta = next_update_time - now
     return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
 def get_last_updated_div() -> str:
     return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
 def leaderboard_data(
     leaderboard: List[ModelData],
     scores: Dict[int, Dict[str, Optional[float]]],
@@ -254,6 +287,7 @@ def leaderboard_data(
         if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
     ]
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
@@ -264,7 +298,9 @@ def main():
     model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
     model_data.sort(key=lambda x: x.incentive, reverse=True)
-    scores = get_scores([x.uid for x in model_data])
     current_block = metagraph.block.item()
     next_epoch_block = next_epoch(subtensor, current_block)
@@ -303,13 +339,34 @@ def main():
                 visible=True,
             )
             gr.HTML(EVALUATION_DETAILS)
-            show_stale.change(lambda stale: leaderboard_data(model_data, scores, stale), inputs=[show_stale], outputs=leaderboard_table)
         with gr.Accordion("Validator Stats"):
             gr.components.Dataframe(
                 value=[
                     [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
-                    + [validator_df[uid][-1].get(c.uid) for c in model_data if c.incentive]
                     for uid, _ in sorted(
                         zip(
                             validator_df.keys(),
@@ -332,8 +389,6 @@ def main():
             )
         gr.HTML(value=get_last_updated_div())
     scheduler = BackgroundScheduler()
     scheduler.add_job(
         restart_space, "interval", seconds=60 * 30
@@ -341,5 +396,6 @@ def main():
     scheduler.start()
     demo.launch()
 main()

 from typing import Dict, List, Any, Optional, Tuple
 from bittensor.extrinsics.serving import get_metadata
 from dataclasses import dataclass
 import wandb
 import math
 import os
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
+import pandas as pd
 load_dotenv()
         hotkey = metagraph.hotkeys[uid]
         metadata = None
         try:
+            metadata = run_with_retries(
+                functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
+            )
         except:
             print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
         if not metadata:
             continue
     ) or isinstance(x, int)
+def get_wandb_runs() -> List:
+    """Get the latest runs from Wandb, retrying infinitely until we get them."""
     while True:
         api = wandb.Api(api_key=WANDB_TOKEN)
         runs = list(
             )
         )
         if len(runs) > 0:
+            return runs
         # WandDB API is quite unreliable. Wait another minute and try again.
         print("Failed to get runs from Wandb. Trying again in 60 seconds.")
         time.sleep(60)
+def get_scores(
+    uids: List[int],
+    wandb_runs: List,
+) -> Dict[int, Dict[str, Optional[float]]]:
     result = {}
     previous_timestamp = None
     # Iterate through the runs until we've processed all the uids.
+    for i, run in enumerate(wandb_runs):
         if not "original_format_json" in run.summary:
             continue
         data = json.loads(run.summary["original_format_json"])
     return result
+def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
+    """Returns a dataframe of the best average model loss over time."""
+    timestamps = []
+    best_losses = []
+    for run in wandb_runs:
+        if "original_format_json" not in run.summary:
+            continue
+        data = json.loads(run.summary["original_format_json"])
+        all_uid_data = data["uid_data"]
+        timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
+        best_loss = math.inf
+        for _, uid_data in all_uid_data.items():
+            loss = uid_data.get("average_loss", math.inf)
+            # Filter out the numbers from the exploit.
+            if loss < best_loss and (loss > 2.5 or timestamp > datetime.datetime(2024,2,8)):
+                best_loss = uid_data["average_loss"]
+        if best_loss != math.inf:
+            timestamps.append(timestamp)
+            best_losses.append(best_loss)
+    return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
 def format_score(uid: int, scores, key) -> Optional[float]:
     if uid in scores:
         if key in scores[uid]:
 def next_epoch(subtensor: bt.subtensor, block: int) -> int:
+    return (
+        block
+        + subtensor.get_subnet_hyperparameters(NETUID).tempo
+        - subtensor.blocks_since_epoch(NETUID, block)
+    )
 def get_next_update_div(current_block: int, next_update_block: int) -> str:
     delta = next_update_time - now
     return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
 def get_last_updated_div() -> str:
     return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
 def leaderboard_data(
     leaderboard: List[ModelData],
     scores: Dict[int, Dict[str, Optional[float]]],
         if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
     ]
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
     model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
     model_data.sort(key=lambda x: x.incentive, reverse=True)
+    wandb_runs = get_wandb_runs()
+    scores = get_scores([x.uid for x in model_data], wandb_runs)
     current_block = metagraph.block.item()
     next_epoch_block = next_epoch(subtensor, current_block)
                 visible=True,
             )
             gr.HTML(EVALUATION_DETAILS)
+            show_stale.change(
+                lambda stale: leaderboard_data(model_data, scores, stale),
+                inputs=[show_stale],
+                outputs=leaderboard_table,
+            )
+            gr.LinePlot(
+                get_losses_over_time(wandb_runs),
+                x="timestamp",
+                x_title="Date",
+                y="best_loss",
+                y_title="Average Loss",
+                tooltip="best_loss",
+                interactive=True,
+                visible=True,
+                width=1024,
+                title="Best Average Loss Over Time",
+            )
         with gr.Accordion("Validator Stats"):
             gr.components.Dataframe(
                 value=[
                     [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
+                    + [
+                        validator_df[uid][-1].get(c.uid)
+                        for c in model_data
+                        if c.incentive
+                    ]
                     for uid, _ in sorted(
                         zip(
                             validator_df.keys(),
             )
         gr.HTML(value=get_last_updated_div())
     scheduler = BackgroundScheduler()
     scheduler.add_job(
         restart_space, "interval", seconds=60 * 30
     scheduler.start()
     demo.launch()
 main()