Spaces:
Runtime error
Runtime error
rusticluftig
commited on
Commit
•
4a5c2b7
1
Parent(s):
8c72a30
Add table for benchmarks
Browse files
app.py
CHANGED
@@ -27,6 +27,7 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/Ra
|
|
27 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
28 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
29 |
VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
|
|
|
30 |
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
31 |
API = HfApi(token=H4_TOKEN)
|
32 |
WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
|
@@ -157,14 +158,14 @@ def is_floatable(x) -> bool:
|
|
157 |
) or isinstance(x, int)
|
158 |
|
159 |
|
160 |
-
def get_wandb_runs() -> List:
|
161 |
"""Get the latest runs from Wandb, retrying infinitely until we get them."""
|
162 |
while True:
|
163 |
api = wandb.Api(api_key=WANDB_TOKEN)
|
164 |
runs = list(
|
165 |
api.runs(
|
166 |
-
|
167 |
-
filters=
|
168 |
)
|
169 |
)
|
170 |
if len(runs) > 0:
|
@@ -286,6 +287,18 @@ def leaderboard_data(
|
|
286 |
for c in leaderboard
|
287 |
if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
|
288 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
|
291 |
def restart_space():
|
@@ -298,9 +311,9 @@ def main():
|
|
298 |
model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
|
299 |
model_data.sort(key=lambda x: x.incentive, reverse=True)
|
300 |
|
301 |
-
|
302 |
|
303 |
-
scores = get_scores([x.uid for x in model_data],
|
304 |
|
305 |
current_block = metagraph.block.item()
|
306 |
next_epoch_block = next_epoch(subtensor, current_block)
|
@@ -309,6 +322,8 @@ def main():
|
|
309 |
weight_keys = set()
|
310 |
for uid, stats in validator_df.items():
|
311 |
weight_keys.update(stats[-1].keys())
|
|
|
|
|
312 |
|
313 |
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
|
314 |
with demo:
|
@@ -326,6 +341,12 @@ def main():
|
|
326 |
},
|
327 |
num_top_classes=10,
|
328 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
|
330 |
with gr.Accordion("Evaluation Stats"):
|
331 |
gr.HTML(EVALUATION_HEADER)
|
@@ -346,7 +367,7 @@ def main():
|
|
346 |
)
|
347 |
|
348 |
gr.LinePlot(
|
349 |
-
get_losses_over_time(
|
350 |
x="timestamp",
|
351 |
x_title="Date",
|
352 |
y="best_loss",
|
|
|
27 |
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
|
28 |
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
|
29 |
VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
|
30 |
+
BENCHMARK_WANDB_PROJECT = "raofoundation/pretraining-leaderboard-data"
|
31 |
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
32 |
API = HfApi(token=H4_TOKEN)
|
33 |
WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
|
|
|
158 |
) or isinstance(x, int)
|
159 |
|
160 |
|
161 |
+
def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
|
162 |
"""Get the latest runs from Wandb, retrying infinitely until we get them."""
|
163 |
while True:
|
164 |
api = wandb.Api(api_key=WANDB_TOKEN)
|
165 |
runs = list(
|
166 |
api.runs(
|
167 |
+
project,
|
168 |
+
filters=filters,
|
169 |
)
|
170 |
)
|
171 |
if len(runs) > 0:
|
|
|
287 |
for c in leaderboard
|
288 |
if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
|
289 |
]
|
290 |
+
|
291 |
+
def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
|
292 |
+
"""Returns the latest benchmarks and the time they were run."""
|
293 |
+
runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
|
294 |
+
for run in runs:
|
295 |
+
artifacts = list(run.logged_artifacts())
|
296 |
+
if artifacts:
|
297 |
+
table = artifacts[-1].get("benchmarks")
|
298 |
+
if table:
|
299 |
+
return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
|
300 |
+
bt.logging.error("Failed to get benchmarks from Wandb.")
|
301 |
+
return None, None
|
302 |
|
303 |
|
304 |
def restart_space():
|
|
|
311 |
model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
|
312 |
model_data.sort(key=lambda x: x.incentive, reverse=True)
|
313 |
|
314 |
+
vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
|
315 |
|
316 |
+
scores = get_scores([x.uid for x in model_data], vali_runs)
|
317 |
|
318 |
current_block = metagraph.block.item()
|
319 |
next_epoch_block = next_epoch(subtensor, current_block)
|
|
|
322 |
weight_keys = set()
|
323 |
for uid, stats in validator_df.items():
|
324 |
weight_keys.update(stats[-1].keys())
|
325 |
+
|
326 |
+
benchmarks, benchmark_timestamp = get_benchmarks()
|
327 |
|
328 |
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
|
329 |
with demo:
|
|
|
341 |
},
|
342 |
num_top_classes=10,
|
343 |
)
|
344 |
+
|
345 |
+
if benchmarks is not None:
|
346 |
+
with gr.Accordion("Top Model Benchmarks"):
|
347 |
+
gr.components.Dataframe(benchmarks)
|
348 |
+
gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/RaoFoundation/pretraining/blob/main/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
|
349 |
+
gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
|
350 |
|
351 |
with gr.Accordion("Evaluation Stats"):
|
352 |
gr.HTML(EVALUATION_HEADER)
|
|
|
367 |
)
|
368 |
|
369 |
gr.LinePlot(
|
370 |
+
get_losses_over_time(vali_runs),
|
371 |
x="timestamp",
|
372 |
x_title="Date",
|
373 |
y="best_loss",
|