rusticluftig commited on
Commit
1f67d0f
0 Parent(s):

Copy existing pretraining-api repo

Browse files
Files changed (5) hide show
  1. README.md +16 -0
  2. api.py +135 -0
  3. app.py +133 -0
  4. requirements.txt +10 -0
  5. utils.py +448 -0
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Sn9
3
+ emoji: 👁
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.36.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+
16
+ To run the API `python api.py`
api.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import atexit
3
+ import datetime
4
+
5
+ from flask import Flask, request, jsonify
6
+ from apscheduler.schedulers.background import BackgroundScheduler
7
+
8
+ import utils
9
+
10
+ app = Flask(__name__)
11
+
12
+ # Global variables (saves time on loading data)
13
+ state_vars = None
14
+ reload_timestamp = datetime.datetime.now().strftime('%D %T')
15
+
16
+
17
+ def load_data(test=False):
18
+ """
19
+ Reload the state variables
20
+ """
21
+ global state_vars, reload_timestamp
22
+ if test:
23
+ state_vars = utils.test_load_state_vars()
24
+ else:
25
+ state_vars = utils.load_state_vars()
26
+
27
+ reload_timestamp = datetime.datetime.now().strftime('%D %T')
28
+
29
+ print(f'Reloaded data at {reload_timestamp}')
30
+
31
+
32
+ def start_scheduler():
33
+ scheduler = BackgroundScheduler()
34
+ scheduler.add_job(func=load_data, trigger="interval", seconds=60*30)
35
+ scheduler.start()
36
+
37
+ # Shut down the scheduler when exiting the app
38
+ atexit.register(lambda: scheduler.shutdown())
39
+
40
+
41
+ @app.route('/', methods=['GET'])
42
+ def home():
43
+ return "Welcome to the Bittensor Pretraining Leaderboard API!"
44
+
45
+
46
+ @app.route('/updated', methods=['GET'])
47
+ def updated():
48
+ return reload_timestamp
49
+
50
+
51
+ @app.route('/benchmark', methods=['GET'])
52
+ def benchmark():
53
+ """
54
+ Get the benchmarks and the timestamp
55
+
56
+ Returns:
57
+ - benchmarks: List of dicts (from pandas DataFrame)
58
+ - benchmark_timestamp: String
59
+ """
60
+
61
+ benchmarks = state_vars.get("benchmarks", None)
62
+ benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
63
+
64
+ return jsonify(
65
+ {
66
+ "benchmarks": benchmarks.to_dict(orient='records'),
67
+ "benchmark_timestamp": benchmark_timestamp.strftime('%Y-%m-%d %H:%M:%S')
68
+ }
69
+ )
70
+
71
+ @app.route('/metagraph', methods=['GET'])
72
+ def metagraph():
73
+ """
74
+ Get the metagraph data
75
+ Returns:
76
+ - metagraph_data: List of dicts (from pandas DataFrame)
77
+ """
78
+
79
+ metagraph = state_vars["metagraph"]
80
+
81
+ return jsonify(
82
+ utils.make_metagraph_dataframe(metagraph).to_dict(orient='records')
83
+ )
84
+
85
+ @app.route('/leaderboard', methods=['GET'])
86
+ def leaderboard():
87
+ """
88
+ Get the leaderboard data
89
+ Returns:
90
+ - leaderboard_data: List of dicts (from pandas DataFrame)
91
+ """
92
+
93
+ model_data = state_vars["model_data"]
94
+ scores = state_vars["scores"]
95
+ show_stale = request.args.get('show_stale')
96
+ return jsonify(
97
+ utils.leaderboard_data(model_data, scores, show_stale=show_stale)
98
+ )
99
+
100
+
101
+ @app.route('/loss', methods=['GET'])
102
+ def loss():
103
+ """
104
+ Get the losses over time
105
+ Returns:
106
+ - losses_over_time: List of dicts (from pandas DataFrame)
107
+ """
108
+ vali_runs = state_vars["vali_runs"]
109
+
110
+ return jsonify(
111
+ utils.get_losses_over_time(vali_runs).to_dict(orient='records')
112
+ )
113
+
114
+
115
+ @app.route('/validator', methods=['GET'])
116
+ def validator():
117
+ """
118
+ Get the validator data
119
+ Returns:
120
+ - validator_data: List of dicts (from pandas DataFrame)
121
+ """
122
+ model_data = state_vars["model_data"]
123
+ validator_df = state_vars["validator_df"]
124
+
125
+ return jsonify(
126
+ utils.make_validator_dataframe(validator_df, model_data).to_dict(orient='records')
127
+ )
128
+
129
+
130
+ if __name__ == '__main__':
131
+
132
+ load_data()
133
+ start_scheduler()
134
+
135
+ app.run(host='0.0.0.0', port=5000, debug=True)
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
2
+
3
+ import os
4
+ import datetime
5
+ import gradio as gr
6
+
7
+ from dotenv import load_dotenv
8
+ from huggingface_hub import HfApi
9
+ from apscheduler.schedulers.background import BackgroundScheduler
10
+
11
+ import utils
12
+
13
+ FONT = (
14
+ """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
15
+ )
16
+ TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 9 Leaderboard</h1>"""
17
+ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/pretraining" target="_blank">Subnet 9</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing pretrained Foundation-Models on the <a href="https://huggingface.co/datasets/tiiuae/falcon-refinedweb" target="_blank">Falcon Refined Web dataset</a>. It acts like a continuous benchmark whereby miners are rewarded for attaining the best losses on randomly sampled pages of Falcon.<br/>The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
18
+
19
+ EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
20
+ EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
21
+
22
+
23
+ HF_REPO_ID = "macrocosm-os/pretraining-leaderboard"
24
+ SECONDS_PER_BLOCK = 12
25
+
26
+ load_dotenv()
27
+
28
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
29
+ API = HfApi(token=HF_TOKEN)
30
+
31
+
32
+ def get_next_update_div(current_block: int, next_update_block: int) -> str:
33
+ now = datetime.datetime.now()
34
+ blocks_to_go = next_update_block - current_block
35
+ next_update_time = now + datetime.timedelta(
36
+ seconds=blocks_to_go * SECONDS_PER_BLOCK
37
+ )
38
+ delta = next_update_time - now
39
+ return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
40
+
41
+
42
+ def get_last_updated_div() -> str:
43
+ return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
44
+
45
+
46
+ def restart_space():
47
+ API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)
48
+
49
+
50
+ def main():
51
+ # To avoid leaderboard failures, infinitely try until we get all data
52
+ # needed to populate the dashboard
53
+
54
+ state_vars = utils.load_state_vars()
55
+ model_data = state_vars["model_data"]
56
+ vali_runs = state_vars["vali_runs"]
57
+ scores = state_vars["scores"]
58
+ validator_df = state_vars["validator_df"]
59
+ benchmarks = state_vars.get("benchmarks", None)
60
+ benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
61
+
62
+ demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
63
+ with demo:
64
+ gr.HTML(FONT)
65
+ gr.HTML(TITLE)
66
+ gr.HTML(HEADER)
67
+
68
+ # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
69
+ # gr.HTML(value=get_next_update_div(current_block, next_epoch_block))
70
+
71
+ gr.Label(
72
+ value={
73
+ f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
74
+ for c in model_data
75
+ if c.incentive
76
+ },
77
+ num_top_classes=10,
78
+ )
79
+ if benchmarks is not None:
80
+ with gr.Accordion("Top Model Benchmarks"):
81
+ gr.components.Dataframe(benchmarks)
82
+ gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/macrocosm-os/pretraining/blob/dev/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
83
+ gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
84
+
85
+ with gr.Accordion("Evaluation Stats"):
86
+ gr.HTML(EVALUATION_HEADER)
87
+ show_stale = gr.Checkbox(label="Show Stale", interactive=True)
88
+ leaderboard_table = gr.components.Dataframe(
89
+ value=utils.leaderboard_data(model_data, scores, show_stale.value),
90
+ headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
91
+ datatype=["markdown", "number", "number", "number", "number", "number"],
92
+ elem_id="leaderboard-table",
93
+ interactive=False,
94
+ visible=True,
95
+ )
96
+ gr.HTML(EVALUATION_DETAILS)
97
+ show_stale.change(
98
+ lambda stale: utils.leaderboard_data(model_data, scores, stale),
99
+ inputs=[show_stale],
100
+ outputs=leaderboard_table,
101
+ )
102
+
103
+ gr.LinePlot(
104
+ utils.get_losses_over_time(vali_runs),
105
+ x="timestamp",
106
+ x_title="Date",
107
+ y="best_loss",
108
+ y_title="Average Loss",
109
+ tooltip="best_loss",
110
+ interactive=True,
111
+ visible=True,
112
+ width=1024,
113
+ title="Best Average Loss Over Time",
114
+ )
115
+
116
+ with gr.Accordion("Validator Stats"):
117
+ gr.components.Dataframe(
118
+ utils.make_validator_dataframe(validator_df, model_data),
119
+ interactive=False,
120
+ visible=True,
121
+ )
122
+ gr.HTML(value=get_last_updated_div())
123
+
124
+ scheduler = BackgroundScheduler()
125
+ scheduler.add_job(
126
+ restart_space, "interval", seconds=60 * 30
127
+ ) # restart every 15 minutes
128
+ scheduler.start()
129
+
130
+ demo.launch()
131
+
132
+
133
+ main()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ bittensor
2
+ requests
3
+ wandb
4
+ python-dotenv
5
+ APScheduler
6
+ huggingface-hub
7
+ gradio
8
+ pandas
9
+ flask
10
+
utils.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import math
4
+ import time
5
+ import json
6
+ import wandb
7
+ import pickle
8
+ import datetime
9
+ import argparse
10
+ import functools
11
+ import traceback
12
+
13
+ import pandas as pd
14
+ import numpy as np
15
+ import bittensor as bt
16
+
17
+ from dotenv import load_dotenv
18
+ from dataclasses import dataclass
19
+ from typing import Dict, List, Any, Optional, Tuple
20
+ from bittensor.extrinsics.serving import get_metadata
21
+
22
+
23
+ NETUID = 9
24
+ DELAY_SECS = 3
25
+ RETRIES = 3
26
+
27
+ load_dotenv()
28
+
29
+ WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
30
+ SUBTENSOR_ENDPOINT = os.environ.get("SUBTENSOR_ENDPOINT", None)
31
+ VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
32
+ BENCHMARK_WANDB_PROJECT = "pretraining-benchmark-data"
33
+ BENCHMARK_FLAG = os.environ.get("BENCHMARK_FLAG", None)
34
+
35
+ @dataclass
36
+ class ModelData:
37
+ uid: int
38
+ hotkey: str
39
+ namespace: str
40
+ name: str
41
+ commit: str
42
+ hash: str
43
+ block: int
44
+ incentive: float
45
+ emission: float
46
+
47
+ @classmethod
48
+ def from_compressed_str(
49
+ cls,
50
+ uid: int,
51
+ hotkey: str,
52
+ cs: str,
53
+ block: int,
54
+ incentive: float,
55
+ emission: float,
56
+ ):
57
+ """Returns an instance of this class from a compressed string representation"""
58
+ tokens = cs.split(":")
59
+ return ModelData(
60
+ uid=uid,
61
+ hotkey=hotkey,
62
+ namespace=tokens[0],
63
+ name=tokens[1],
64
+ commit=tokens[2] if tokens[2] != "None" else None,
65
+ hash=tokens[3] if tokens[3] != "None" else None,
66
+ block=block,
67
+ incentive=incentive,
68
+ emission=emission,
69
+ )
70
+
71
+
72
+ def run_with_retries(func, *args, **kwargs):
73
+ for i in range(0, RETRIES):
74
+ try:
75
+ return func(*args, **kwargs)
76
+ except (Exception, RuntimeError):
77
+ bt.logging.error(f"Failed to run function: {traceback.format_exc()}")
78
+ if i == RETRIES - 1:
79
+ raise
80
+ time.sleep(DELAY_SECS)
81
+ raise RuntimeError("Should never happen")
82
+
83
+
84
+ def get_subtensor_and_metagraph() -> Tuple[bt.subtensor, bt.metagraph]:
85
+
86
+ def _internal() -> Tuple[bt.subtensor, bt.metagraph]:
87
+ if SUBTENSOR_ENDPOINT:
88
+ parser = argparse.ArgumentParser()
89
+ bt.subtensor.add_args(parser)
90
+ subtensor = bt.subtensor(config=bt.config(parser=parser, args=["--subtensor.chain_endpoint", SUBTENSOR_ENDPOINT]))
91
+ else:
92
+ subtensor = bt.subtensor("finney")
93
+
94
+ metagraph = subtensor.metagraph(NETUID, lite=False)
95
+
96
+ return subtensor, metagraph
97
+
98
+ return run_with_retries(_internal)
99
+
100
+
101
+ def get_subnet_data(
102
+ subtensor: bt.subtensor, metagraph: bt.metagraph
103
+ ) -> List[ModelData]:
104
+ result = []
105
+ for uid in metagraph.uids.tolist():
106
+ hotkey = metagraph.hotkeys[uid]
107
+ metadata = None
108
+ try:
109
+ metadata = run_with_retries(
110
+ functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
111
+ )
112
+ except:
113
+ print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
114
+
115
+ if not metadata:
116
+ continue
117
+
118
+ commitment = metadata["info"]["fields"][0]
119
+ hex_data = commitment[list(commitment.keys())[0]][2:]
120
+ chain_str = bytes.fromhex(hex_data).decode()
121
+ block = metadata["block"]
122
+
123
+ incentive = np.nan_to_num(metagraph.incentive[uid]).item()
124
+ emission = (
125
+ np.nan_to_num(metagraph.emission[uid]).item() * 20
126
+ ) # convert to daily TAO
127
+
128
+ model_data = None
129
+ try:
130
+ model_data = ModelData.from_compressed_str(
131
+ uid, hotkey, chain_str, block, incentive, emission
132
+ )
133
+ except:
134
+ continue
135
+
136
+ result.append(model_data)
137
+ return result
138
+
139
+
140
+
141
+
142
+ def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
143
+ """Get the latest runs from Wandb, retrying infinitely until we get them."""
144
+ while True:
145
+ api = wandb.Api(api_key=WANDB_TOKEN)
146
+ runs = list(
147
+ api.runs(
148
+ project,
149
+ filters=filters,
150
+ )
151
+ )
152
+ if len(runs) > 0:
153
+ return runs
154
+ # WandDB API is quite unreliable. Wait another minute and try again.
155
+ bt.logging.error("Failed to get runs from Wandb. Trying again in 60 seconds.")
156
+ time.sleep(60)
157
+
158
+
159
+ def get_scores(
160
+ uids: List[int],
161
+ wandb_runs: List,
162
+ ) -> Dict[int, Dict[str, Optional[float]]]:
163
+ result = {}
164
+ previous_timestamp = None
165
+ # Iterate through the runs until we've processed all the uids.
166
+ for i, run in enumerate(wandb_runs):
167
+ if not "original_format_json" in run.summary:
168
+ continue
169
+ data = json.loads(run.summary["original_format_json"])
170
+ all_uid_data = data["uid_data"]
171
+ timestamp = data["timestamp"]
172
+ # Make sure runs are indeed in descending time order.
173
+ #assert (
174
+ #previous_timestamp is None or timestamp < previous_timestamp
175
+ #), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
176
+ previous_timestamp = timestamp
177
+
178
+ for uid in uids:
179
+ if uid in result:
180
+ continue
181
+ if str(uid) in all_uid_data:
182
+ uid_data = all_uid_data[str(uid)]
183
+ # Only the most recent run is fresh.
184
+ is_fresh = i == 0
185
+ result[uid] = {
186
+ "avg_loss": uid_data.get("average_loss", None),
187
+ "win_rate": uid_data.get("win_rate", None),
188
+ "win_total": uid_data.get("win_total", None),
189
+ "weight": uid_data.get("weight", None),
190
+ "fresh": is_fresh,
191
+ }
192
+ if len(result) == len(uids):
193
+ break
194
+ return result
195
+
196
+
197
+
198
+ def get_validator_weights(
199
+ metagraph: bt.metagraph,
200
+ ) -> Dict[int, Tuple[float, int, Dict[int, float]]]:
201
+ """Returns a dictionary of validator UIDs to (vtrust, stake, {uid: weight})."""
202
+ ret = {}
203
+ for uid in metagraph.uids.tolist():
204
+ vtrust = metagraph.validator_trust[uid].item()
205
+ stake = metagraph.stake[uid].item()
206
+ if vtrust > 0 and stake > 10_000:
207
+ ret[uid] = (vtrust, stake, {})
208
+ for ouid in metagraph.uids.tolist():
209
+ if ouid == uid:
210
+ continue
211
+ weight = round(metagraph.weights[uid][ouid].item(), 4)
212
+ if weight > 0:
213
+ ret[uid][-1][ouid] = weight
214
+ return ret
215
+
216
+
217
+
218
+
219
+ def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
220
+ """Returns a dataframe of the best average model loss over time."""
221
+ timestamps = []
222
+ best_losses = []
223
+
224
+ for run in wandb_runs:
225
+ if "original_format_json" not in run.summary:
226
+ continue
227
+ data = json.loads(run.summary["original_format_json"])
228
+ all_uid_data = data["uid_data"]
229
+ timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
230
+ best_loss = math.inf
231
+ for _, uid_data in all_uid_data.items():
232
+ loss = uid_data.get("average_loss", math.inf)
233
+ # Filter out the numbers from the exploit and when validators lost the best model.
234
+ if loss < best_loss and (loss > 2.5 or timestamp > datetime.datetime(2024,2,12)) and (loss < 5 or timestamp > datetime.datetime(2024,3,27)):
235
+ best_loss = uid_data["average_loss"]
236
+ if best_loss != math.inf:
237
+ timestamps.append(timestamp)
238
+ best_losses.append(best_loss)
239
+
240
+ return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
241
+
242
+
243
+
244
+ def next_epoch(subtensor: bt.subtensor, block: int) -> int:
245
+ return (
246
+ block
247
+ + subtensor.get_subnet_hyperparameters(NETUID).tempo
248
+ - subtensor.blocks_since_epoch(NETUID, block)
249
+ )
250
+
251
+
252
+ def is_floatable(x) -> bool:
253
+ return (
254
+ isinstance(x, float) and not math.isnan(x) and not math.isinf(x)
255
+ ) or isinstance(x, int)
256
+
257
+
258
+
259
+ def format_score(uid: int, scores, key) -> Optional[float]:
260
+ if uid in scores:
261
+ if key in scores[uid]:
262
+ point = scores[uid][key]
263
+ if is_floatable(point):
264
+ return round(scores[uid][key], 4)
265
+ return None
266
+
267
+
268
+ def leaderboard_data(
269
+ leaderboard: List[ModelData],
270
+ scores: Dict[int, Dict[str, Optional[float]]],
271
+ show_stale: bool,
272
+ ) -> List[List[Any]]:
273
+ """Returns the leaderboard data, based on models data and UID scores."""
274
+ return [
275
+ [
276
+ f"[{c.namespace}/{c.name} ({c.commit[0:8]})](https://huggingface.co/{c.namespace}/{c.name}/commit/{c.commit})",
277
+ format_score(c.uid, scores, "win_rate"),
278
+ format_score(c.uid, scores, "avg_loss"),
279
+ format_score(c.uid, scores, "weight"),
280
+ c.uid,
281
+ c.block,
282
+ ]
283
+ for c in leaderboard
284
+ if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
285
+ ]
286
+
287
+
288
+ def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
289
+ """Returns the latest benchmarks and the time they were run."""
290
+ if not BENCHMARK_WANDB_PROJECT:
291
+ bt.logging.error("No benchmark project set.")
292
+ return None, None
293
+ runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
294
+ for run in runs[::-1]:
295
+ artifacts = list(run.logged_artifacts())
296
+ if artifacts:
297
+ table = artifacts[-1].get("benchmarks")
298
+ if table:
299
+ return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
300
+ bt.logging.error("Failed to get benchmarks from Wandb.")
301
+ return None, None
302
+
303
+
304
+ def make_validator_dataframe(validator_df: pd.DataFrame, model_data: ModelData) -> pd.DataFrame:
305
+
306
+ values = [
307
+ [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
308
+ + [
309
+ validator_df[uid][-1].get(c.uid)
310
+ for c in model_data
311
+ if c.incentive
312
+ ]
313
+ for uid, _ in sorted(
314
+ zip(
315
+ validator_df.keys(),
316
+ [validator_df[x][1] for x in validator_df.keys()],
317
+ ),
318
+ key=lambda x: x[1],
319
+ reverse=True,
320
+ )
321
+ ]
322
+ dtypes = {"UID":int, "Stake (τ)":float, "V-Trust":float}
323
+ dtypes.update({
324
+ f"{c.namespace}/{c.name} ({c.commit[0:8]})": float
325
+ for c in model_data
326
+ if c.incentive
327
+ })
328
+ return pd.DataFrame(values, columns=dtypes.keys()).astype(dtypes)
329
+
330
+ def make_metagraph_dataframe(metagraph: bt.metagraph, weights=False) -> pd.DataFrame:
331
+
332
+ cols = ['stake','emission','trust','validator_trust','dividends','incentive','R', 'consensus','validator_permit']
333
+
334
+ frame = pd.DataFrame({k: getattr(metagraph, k) for k in cols})
335
+ frame['block'] = metagraph.block.item()
336
+ frame['netuid'] = NETUID
337
+ frame['uid'] = range(len(frame))
338
+ frame['hotkey'] = [axon.hotkey for axon in metagraph.axons]
339
+ frame['coldkey'] = [axon.coldkey for axon in metagraph.axons]
340
+ if weights and metagraph.W is not None:
341
+ # convert NxN tensor to a list of lists so it fits into the dataframe
342
+ frame['weights'] = [w.tolist() for w in metagraph.W]
343
+
344
+ return frame
345
+
346
+ def load_state_vars() -> dict[Any]:
347
+ while True:
348
+ try:
349
+ subtensor, metagraph = get_subtensor_and_metagraph()
350
+
351
+ bt.logging.success("Loaded subtensor and metagraph")
352
+
353
+ model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
354
+ model_data.sort(key=lambda x: x.incentive, reverse=True)
355
+
356
+ bt.logging.success(f'Loaded {len(model_data)} models')
357
+ vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
358
+
359
+ scores = get_scores([x.uid for x in model_data], vali_runs)
360
+
361
+ # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
362
+ # current_block = metagraph.block.item()
363
+ # next_epoch_block = next_epoch(subtensor, current_block)
364
+
365
+ validator_df = get_validator_weights(metagraph)
366
+ weight_keys = set()
367
+ for uid, stats in validator_df.items():
368
+ weight_keys.update(stats[-1].keys())
369
+
370
+ # Enable benchmark if the flag is set
371
+ if BENCHMARK_FLAG:
372
+ benchmarks, benchmark_timestamp = get_benchmarks()
373
+ else:
374
+ benchmarks, benchmark_timestamp = None, None
375
+ break
376
+
377
+ except KeyboardInterrupt:
378
+ bt.logging.error("Exiting...")
379
+ break
380
+
381
+ except Exception as e:
382
+ print(f"Failed to get data: {traceback.format_exc()}")
383
+ time.sleep(30)
384
+
385
+ return {
386
+ 'metagraph': metagraph,
387
+ "model_data": model_data,
388
+ "vali_runs": vali_runs,
389
+ "scores": scores,
390
+ "validator_df": validator_df,
391
+ "benchmarks": benchmarks,
392
+ "benchmark_timestamp": benchmark_timestamp
393
+ }
394
+
395
+ def test_load_state_vars():
396
+
397
+ subtensor = bt.subtensor("finney")
398
+ metagraph = subtensor.metagraph(NETUID, lite=True)
399
+ model_data = [
400
+ ModelData(uid=253, hotkey='5DjoPAgZ54Zf6NsuiVYh8RjonnWWWREE2iXBNzM2VDBMQDPm', namespace='jw-hf-test', name='jw2', commit='aad131f6b02219964e6dcf749c2a23e75a7ceca8', hash='L1ImYzWJwV+9KSnZ2TYW0Iy2KMcVjJVTd30YJoRkpbw=', block=3131103, incentive=1.0, emission=209.06051635742188),
401
+ ModelData(uid=1, hotkey='5CccVtjk4yamCao6QYgEg7jc8vktdj16RbLKNUftHfEsjuJS', namespace='borggAI', name='bittensor-subnet9-models', commit='d373864bc6c972872edb8db95eed570958054bac', hash='+drdTIKYEGYClW2FFVVID6A2Dh//4rLmExRFCJsH6Y4=', block=2081837, incentive=0.0, emission=0.0),
402
+ ModelData(uid=2, hotkey='5HYwoXaczs3jAptbb5mk4aUCkgZqeNcNzJKxSec97GwasfLy', namespace='jungiebeen', name='pretrain1', commit='4c0c6bfd0f92e243d6c8a82209142e7204c852c3', hash='ld/agc0XIWICom/Cpj0fkQLcMogMNj/F65MJogK5RLY=', block=2467482, incentive=0.0, emission=0.0),
403
+ ModelData(uid=3, hotkey='5Dnb6edh9yTeEp5aasRPZVPRAkxvQ6qnERVcXw22awMZ5rxm', namespace='jungiebeen', name='pretrain2', commit='e827b7281c92224adb11124489cc45356553a87a', hash='ld/agc0XIWICom/Cpj0fkQLcMogMNj/F65MJogK5RLY=', block=2467497, incentive=0.0, emission=0.0),
404
+ ModelData(uid=4, hotkey='5FRfca8NbnH424WaX43PMhKBnbLA1bZpRRoXXiVs6HgsxN4K', namespace='ZainAli60', name='mine_modeles', commit='8a4ed4ad1f1fb58d424fd22e8e9874b87d32917c', hash='tVcbZAFoNIOF+Ntxq31OQ2NrLXf5iFCmmPUJlpkMYYo=', block=2508509, incentive=0.0, emission=0.0)
405
+ ]
406
+ vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
407
+
408
+ scores = get_scores([x.uid for x in model_data], vali_runs)
409
+
410
+ validator_df = {
411
+ 28: (1.0, 33273.4453125, {253: 1.0}),
412
+ 49: (0.9127794504165649,
413
+ 10401.677734375,
414
+ {7: 0.0867,
415
+ 217: 0.0001,
416
+ 219: 0.0001,
417
+ 241: 0.0001,
418
+ 248: 0.0001,
419
+ 253: 0.9128}),
420
+ 78: (1.0, 26730.37109375, {253: 1.0}),
421
+ 116: (1.0, 629248.4375, {253: 1.0}),
422
+ 150: (1.0, 272634.53125, {253: 1.0}),
423
+ 161: (1.0, 280212.53125, {253: 1.0}),
424
+ 180: (1.0, 16838.0, {253: 1.0}),
425
+ 184: (1.0, 47969.3984375, {253: 1.0}),
426
+ 210: (1.0, 262846.28125, {253: 1.0}),
427
+ 213: (1.0, 119462.734375, {253: 1.0}),
428
+ 215: (1.0, 274747.46875, {253: 1.0}),
429
+ 234: (1.0, 38831.6953125, {253: 1.0}),
430
+ 236: (1.0, 183966.9375, {253: 1.0}),
431
+ 238: (1.0, 1293707.25, {253: 1.0}),
432
+ 240: (1.0, 106461.6015625, {253: 1.0}),
433
+ 243: (1.0, 320271.5, {253: 1.0}),
434
+ 244: (1.0, 116138.9609375, {253: 1.0}),
435
+ 247: (0.9527428150177002, 119812.390625, {7: 0.0472, 253: 0.9528}),
436
+ 249: (1.0, 478127.3125, {253: 1.0}),
437
+ 252: (1.0, 442395.03125, {253: 1.0}),
438
+ 254: (1.0, 46845.2109375, {253: 1.0}),
439
+ 255: (1.0, 28977.56640625, {253: 1.0})
440
+ }
441
+
442
+ return {
443
+ 'metagraph': metagraph,
444
+ "model_data": model_data,
445
+ "vali_runs": vali_runs,
446
+ "scores": scores,
447
+ "validator_df": validator_df,
448
+ }