Sarkosos commited on
Commit
f40a2d9
·
1 Parent(s): 5a605f5

Updated the dashboard to run via an api, reworked api to use fastapi

Browse files
Files changed (4) hide show
  1. api.py +27 -84
  2. app.py +55 -60
  3. classes.py +17 -0
  4. utils.py +123 -40
api.py CHANGED
@@ -2,25 +2,36 @@
2
  import atexit
3
  import datetime
4
 
5
- from flask import Flask, request, jsonify
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
-
8
  import utils
 
 
 
 
9
 
10
- app = Flask(__name__)
11
 
12
 
13
  # Global variables (saves time on loading data)
14
  state_vars = None
15
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
16
 
 
 
 
 
17
 
18
  def load_data():
19
  """
20
  Reload the state variables
21
  """
22
- global state_vars, reload_timestamp
23
- state_vars = utils.load_state_vars()
 
 
 
 
 
24
 
25
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
26
 
@@ -36,110 +47,42 @@ def start_scheduler():
36
  atexit.register(lambda: scheduler.shutdown())
37
 
38
 
39
- @app.route('/', methods=['GET'])
40
  def home():
41
  return "Welcome to the Bittensor Protein Folding Leaderboard API!"
42
 
43
 
44
- @app.route('/updated', methods=['GET'])
45
  def updated():
46
  return reload_timestamp
47
 
48
 
49
- @app.route('/data', methods=['GET'])
50
- @app.route('/data/<period>', methods=['GET'])
51
- def data(period=None):
52
- """
53
- Get the productivity metrics
54
- """
55
- assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
56
- df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
57
- return jsonify(
58
- df.astype(str).to_dict(orient='records')
59
- )
60
-
61
- @app.route('/productivity', methods=['GET'])
62
- @app.route('/productivity/<period>', methods=['GET'])
63
- def productivity_metrics(period=None):
64
  """
65
  Get the productivity metrics
66
  """
67
 
68
- assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
69
- df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
70
- return jsonify(
71
- utils.get_productivity(df)
72
- )
73
 
74
 
75
- @app.route('/throughput', methods=['GET'])
76
- @app.route('/throughput/<period>', methods=['GET'])
77
- def throughput_metrics(period=None):
78
  """
79
  Get the throughput metrics
80
  """
81
- assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
82
- df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
83
- return jsonify(utils.get_data_transferred(df))
84
-
85
-
86
- @app.route('/metagraph', methods=['GET'])
87
- def metagraph():
88
- """
89
- Get the metagraph data
90
- Returns:
91
- - metagraph_data: List of dicts (from pandas DataFrame)
92
- """
93
-
94
- df_m = state_vars["metagraph"]
95
-
96
- return jsonify(
97
- df_m.to_dict(orient='records')
98
- )
99
-
100
- @app.route('/leaderboard', methods=['GET'])
101
- @app.route('/leaderboard/<entity>', methods=['GET'])
102
- @app.route('/leaderboard/<entity>/<ntop>', methods=['GET'])
103
- def leaderboard(entity='identity',ntop=10):
104
- """
105
- Get the leaderboard data
106
- Returns:
107
- - leaderboard_data: List of dicts (from pandas DataFrame)
108
- """
109
-
110
- assert entity in utils.ENTITY_CHOICES, f"Invalid entity choice: {entity}"
111
-
112
- df_miners = utils.get_leaderboard(
113
- state_vars["metagraph"],
114
- ntop=int(ntop),
115
- entity_choice=entity
116
- )
117
-
118
- return jsonify(
119
- df_miners.to_dict(orient='records')
120
- )
121
-
122
- @app.route('/validator', methods=['GET'])
123
- def validator():
124
- """
125
- Get the validator data
126
- Returns:
127
- - validator_data: List of dicts (from pandas DataFrame)
128
- """
129
- df_m = state_vars["metagraph"]
130
- df_validators = df_m.loc[df_m.validator_trust > 0]
131
 
132
- return jsonify(
133
- df_validators.to_dict(orient='records')
134
- )
135
 
136
 
137
  if __name__ == '__main__':
138
 
139
  load_data()
 
140
  start_scheduler()
141
 
142
- app.run(host='0.0.0.0', port=5001, debug=True)
143
 
144
 
145
  # to test locally
 
2
  import atexit
3
  import datetime
4
 
 
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
+ from fastapi import FastAPI
7
  import utils
8
+ import pandas as pd
9
+ import uvicorn
10
+
11
+ from classes import Productivity, Throughput
12
 
 
13
 
14
 
15
  # Global variables (saves time on loading data)
16
  state_vars = None
17
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
18
 
19
+ data_all = None
20
+ data_24h = None
21
+
22
+ app = FastAPI()
23
 
24
  def load_data():
25
  """
26
  Reload the state variables
27
  """
28
+ global data_all, data_24h, reload_timestamp
29
+
30
+ utils.fetch_new_runs()
31
+
32
+ data_all = utils.preload_data()
33
+
34
+ data_24h = (pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))
35
 
36
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
37
 
 
47
  atexit.register(lambda: scheduler.shutdown())
48
 
49
 
50
+ @app.get("/")
51
  def home():
52
  return "Welcome to the Bittensor Protein Folding Leaderboard API!"
53
 
54
 
55
+ @app.get("/updated")
56
  def updated():
57
  return reload_timestamp
58
 
59
 
60
+ @app.get("/productivity", response_model=Productivity)
61
+ def productivity_metrics():
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  """
63
  Get the productivity metrics
64
  """
65
 
66
+ return Productivity(all_time=utils.get_productivity(data_all), last_24h=utils.get_productivity(data_24h))
 
 
 
 
67
 
68
 
69
+ @app.get("/throughput", response_model=Throughput)
70
+ def throughput_metrics():
 
71
  """
72
  Get the throughput metrics
73
  """
74
+
75
+ return Throughput(all_time=utils.get_data_transferred(data_all), last_24h=utils.get_data_transferred(data_24h))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
 
 
 
77
 
78
 
79
  if __name__ == '__main__':
80
 
81
  load_data()
82
+
83
  start_scheduler()
84
 
85
+ uvicorn.run(app, host='0.0.0.0', port=5001)
86
 
87
 
88
  # to test locally
app.py CHANGED
@@ -2,6 +2,7 @@ import time
2
  import pandas as pd
3
  import streamlit as st
4
  import plotly.express as px
 
5
 
6
  import utils
7
 
@@ -14,21 +15,13 @@ Simulation duration distribution
14
  """
15
 
16
  UPDATE_INTERVAL = 3600
 
17
 
18
 
19
  st.title('Folding Subnet Dashboard')
20
  st.markdown('<br>', unsafe_allow_html=True)
21
 
22
- # reload data periodically
23
- df = utils.build_data(time.time()//UPDATE_INTERVAL)
24
- st.toast(f'Loaded {len(df)} runs')
25
 
26
- # TODO: fix the factor for 24 hours ago
27
- runs_alive_24h_ago = (df.last_event_at > pd.Timestamp.now() - pd.Timedelta('1d'))
28
- df_24h = df.loc[runs_alive_24h_ago]
29
- # correction factor to account for the fact that the data straddles the 24h boundary
30
- # correction factor is based on the fraction of the run which occurred in the last 24h
31
- # factor = (df_24h.last_event_at - pd.Timestamp.now() + pd.Timedelta('1d')) / pd.Timedelta('1d')
32
 
33
 
34
  #### ------ PRODUCTIVITY ------
@@ -37,34 +30,35 @@ df_24h = df.loc[runs_alive_24h_ago]
37
  st.subheader('Productivity overview')
38
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
39
 
40
- productivity = utils.get_productivity(df)
41
- productivity_24h = utils.get_productivity(df_24h)
 
42
 
43
 
44
- m1, m2, m3 = st.columns(3)
45
- m1.metric('Unique proteins folded', f'{productivity.get("unique_folded"):,.0f}', delta=f'{productivity_24h.get("unique_folded"):,.0f} (24h)')
46
- m2.metric('Total proteins folded', f'{productivity.get("total_simulations"):,.0f}', delta=f'{productivity_24h.get("total_simulations"):,.0f} (24h)')
47
- m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
48
-
49
- st.markdown('<br>', unsafe_allow_html=True)
50
-
51
- time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
52
-
53
- PROD_CHOICES = {
54
- 'Unique proteins folded': 'unique_pdbs',
55
- 'Total simulations': 'total_pdbs',
56
- 'Total simulation steps': 'total_md_steps',
57
- }
58
- prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
59
- prod_choice = PROD_CHOICES[prod_choice_label]
60
- steps_running_total = time_binned_data[prod_choice].sum().cumsum()
61
- st.plotly_chart(
62
- # add fillgradient to make it easier to see the trend
63
- px.area(steps_running_total, y=prod_choice,
64
- labels={'last_event_at':'', prod_choice: prod_choice_label},
65
- ).update_traces(fill='tozeroy'),
66
- use_container_width=True,
67
- )
68
 
69
  st.markdown('<br>', unsafe_allow_html=True)
70
 
@@ -75,26 +69,27 @@ st.subheader('Throughput overview')
75
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
76
 
77
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
 
78
 
79
- data_transferred = utils.get_data_transferred(df,unit=MEM_UNIT)
80
- data_transferred_24h = utils.get_data_transferred(df_24h, unit=MEM_UNIT)
81
 
82
  m1, m2, m3 = st.columns(3)
83
- m1.metric(f'Total sent data ({MEM_UNIT})', f'{data_transferred.get("sent"):,.0f}', delta=f'{data_transferred_24h.get("sent"):,.0f} (24h)')
84
- m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred.get("received"):,.0f}', delta=f'{data_transferred_24h.get("received"):,.0f} (24h)')
85
- m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred.get("total"):,.0f}', delta=f'{data_transferred_24h.get("total"):,.0f} (24h)')
86
 
87
 
88
- IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
89
- io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
90
- io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
91
 
92
- st.plotly_chart(
93
- px.area(io_running_total, y='value', color='variable',
94
- labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
95
- ),
96
- use_container_width=True,
97
- )
98
 
99
  st.markdown('<br>', unsafe_allow_html=True)
100
 
@@ -127,14 +122,14 @@ st.markdown('<br>', unsafe_allow_html=True)
127
 
128
  #### ------ LOGGED RUNS ------
129
 
130
- st.subheader('Logged runs')
131
- st.info('The timeline shows the creation and last event time of each run.')
132
- st.plotly_chart(
133
- px.timeline(df, x_start='created_at', x_end='last_event_at', y='username', color='state',
134
- labels={'created_at':'Created at', 'last_event_at':'Last event at', 'username':''},
135
- ),
136
- use_container_width=True
137
- )
138
-
139
- with st.expander('Show raw run data'):
140
- st.dataframe(df)
 
2
  import pandas as pd
3
  import streamlit as st
4
  import plotly.express as px
5
+ import requests
6
 
7
  import utils
8
 
 
15
  """
16
 
17
  UPDATE_INTERVAL = 3600
18
+ BASE_URL = 'API_URL'
19
 
20
 
21
  st.title('Folding Subnet Dashboard')
22
  st.markdown('<br>', unsafe_allow_html=True)
23
 
 
 
 
24
 
 
 
 
 
 
 
25
 
26
 
27
  #### ------ PRODUCTIVITY ------
 
30
  st.subheader('Productivity overview')
31
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
32
 
33
+ productivity_all = requests.get(f'{BASE_URL}/productivity').json()
34
+ productivity = productivity_all['all_time']
35
+ productivity_24h = productivity_all['last_24h']
36
 
37
 
38
+ m1, m2 = st.columns(2)
39
+ m1.metric('Unique proteins folded', f'{productivity["unique_folded"]:,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
40
+ m2.metric('Total jobs completed', f'{productivity["total_completed_jobs"]:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
41
+ # m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
42
+
43
+ # st.markdown('<br>', unsafe_allow_html=True)
44
+
45
+ # time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
46
+
47
+ # PROD_CHOICES = {
48
+ # 'Unique proteins folded': 'unique_pdbs',
49
+ # 'Total simulations': 'total_pdbs',
50
+ # 'Total simulation steps': 'total_md_steps',
51
+ # }
52
+ # prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
53
+ # prod_choice = PROD_CHOICES[prod_choice_label]
54
+ # steps_running_total = time_binned_data[prod_choice].sum().cumsum()
55
+ # st.plotly_chart(
56
+ # # add fillgradient to make it easier to see the trend
57
+ # px.area(steps_running_total, y=prod_choice,
58
+ # labels={'last_event_at':'', prod_choice: prod_choice_label},
59
+ # ).update_traces(fill='tozeroy'),
60
+ # use_container_width=True,
61
+ # )
62
 
63
  st.markdown('<br>', unsafe_allow_html=True)
64
 
 
69
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
70
 
71
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
72
+ throughput = requests.get(f'{BASE_URL}/throughput').json()
73
 
74
+ data_transferred = throughput['all_time']
75
+ data_transferred_24h = throughput['last_24h']
76
 
77
  m1, m2, m3 = st.columns(3)
78
+ m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
79
+ m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
80
+ m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
81
 
82
 
83
+ # IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
84
+ # io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
85
+ # io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
86
 
87
+ # st.plotly_chart(
88
+ # px.area(io_running_total, y='value', color='variable',
89
+ # labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
90
+ # ),
91
+ # use_container_width=True,
92
+ # )
93
 
94
  st.markdown('<br>', unsafe_allow_html=True)
95
 
 
122
 
123
  #### ------ LOGGED RUNS ------
124
 
125
+ # st.subheader('Logged runs')
126
+ # st.info('The timeline shows the creation and last event time of each run.')
127
+ # st.plotly_chart(
128
+ # px.timeline(df, x_start='created_at', x_end='last_event_at', y='username', color='state',
129
+ # labels={'created_at':'Created at', 'last_event_at':'Last event at', 'username':''},
130
+ # ),
131
+ # use_container_width=True
132
+ # )
133
+
134
+ # with st.expander('Show raw run data'):
135
+ # st.dataframe(df)
classes.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class ProductivityData(BaseModel):
4
+ unique_folded: int
5
+ total_completed_jobs: int
6
+
7
+ class Productivity(BaseModel):
8
+ all_time: ProductivityData
9
+ last_24h: ProductivityData
10
+
11
+ class ThroughputData(BaseModel):
12
+ validator_sent: float
13
+ miner_sent: float
14
+
15
+ class Throughput(BaseModel):
16
+ all_time: ThroughputData
17
+ last_24h: ThroughputData
utils.py CHANGED
@@ -5,6 +5,7 @@ import wandb
5
  import streamlit as st
6
  import pandas as pd
7
  import bittensor as bt
 
8
 
9
 
10
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
@@ -15,7 +16,7 @@ import bittensor as bt
15
  MIN_STEPS = 12 # minimum number of steps in wandb run in order to be worth analyzing
16
  MAX_RUNS = 100#0000
17
  NETUID = 25
18
- BASE_PATHS = ['macrocosmos/folding-validators--moved', 'macrocosmos/folding-validators'] # added historical data from otf wandb and current data
19
  NETWORK = 'finney'
20
  KEYS = None
21
  ABBREV_CHARS = 8
@@ -23,7 +24,12 @@ ENTITY_CHOICES = ('identity', 'hotkey', 'coldkey')
23
 
24
  PDBS_PER_RUN_STEP = 0.083
25
  AVG_MD_STEPS = 30_000
26
- BASE_UNITS = 'MB'
 
 
 
 
 
27
 
28
  api = wandb.Api(timeout=120, api_key='cdcbe340bb7937d3a289d39632491d12b39231b7')
29
 
@@ -47,24 +53,24 @@ EXTRACTORS = {
47
  'run_id': lambda x: x.id,
48
  'user': lambda x: x.user.name[:16],
49
  'username': lambda x: x.user.username[:16],
50
- 'created_at': lambda x: pd.Timestamp(x.created_at),
51
- 'last_event_at': lambda x: pd.Timestamp(x.summary.get('_timestamp'), unit='s'),
52
 
53
  'netuid': lambda x: x.config.get('netuid'),
54
  'mock': lambda x: x.config.get('neuron').get('mock'),
55
  'sample_size': lambda x: x.config.get('neuron').get('sample_size'),
56
  'queue_size': lambda x: x.config.get('neuron').get('queue_size'),
57
  'timeout': lambda x: x.config.get('neuron').get('timeout'),
58
- 'update_interval': lambda x: x.config.get('neuron').get('update_interval'),
59
  'epoch_length': lambda x: x.config.get('neuron').get('epoch_length'),
60
  'disable_set_weights': lambda x: x.config.get('neuron').get('disable_set_weights'),
61
 
62
  # This stuff is from the last logged event
63
  'num_steps': lambda x: x.summary.get('_step'),
64
- 'runtime': lambda x: x.summary.get('_runtime'),
65
- 'init_energy': lambda x: x.summary.get('init_energy'),
66
- 'best_energy': lambda x: x.summary.get('best_loss'),
67
- 'pdb_id': lambda x: x.summary.get('pdb_id'),
68
  'pdb_updates': lambda x: x.summary.get('updated_count'),
69
  'total_returned_sizes': lambda x: get_total_file_sizes(x),
70
  'total_sent_sizes': lambda x: get_total_md_input_sizes(x),
@@ -74,10 +80,12 @@ EXTRACTORS = {
74
  'version': lambda x: x.tags[0],
75
  'spec_version': lambda x: x.tags[1],
76
  'vali_hotkey': lambda x: x.tags[2],
77
-
78
  # System metrics
79
  'disk_read': lambda x: x.system_metrics.get('system.disk.in'),
80
  'disk_write': lambda x: x.system_metrics.get('system.disk.out'),
 
 
81
  # Really slow stuff below
82
  # 'started_at': lambda x: x.metadata.get('startedAt'),
83
  # 'disk_used': lambda x: x.metadata.get('disk').get('/').get('used'),
@@ -135,32 +143,30 @@ def get_total_md_input_sizes(run):
135
 
136
 
137
  def get_data_transferred(df, unit='GB'):
138
-
139
- factor = convert_unit(1, from_unit=BASE_UNITS, to_unit=unit)
140
- sent = df.total_data_sent.sum()
141
- received = df.total_data_received.sum()
142
  return {
143
- 'sent':sent * factor,
144
- 'received':received * factor,
145
- 'total': (sent + received) * factor,
146
- 'read':df.disk_read.sum() * factor,
147
- 'write':df.disk_write.sum() * factor,
148
- }
149
 
150
 
151
  def get_productivity(df):
152
 
153
  # Estimate the number of unique pdbs folded using our heuristic
154
- unique_folded = df.unique_pdbs.sum().round()
155
- # Estimate the total number of simulations completed using our heuristic
156
- total_simulations = df.total_pdbs.sum().round()
157
- # Estimate the total number of simulation steps completed using our heuristic
158
- total_md_steps = df.total_md_steps.sum().round()
159
 
 
 
 
 
160
  return {
161
  'unique_folded': unique_folded,
162
- 'total_simulations': total_simulations,
163
- 'total_md_steps': total_md_steps,
164
  }
165
 
166
  def get_leaderboard(df, ntop=10, entity_choice='identity'):
@@ -169,6 +175,83 @@ def get_leaderboard(df, ntop=10, entity_choice='identity'):
169
  df.index = range(df.shape[0])
170
  return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  @st.cache_data()
173
  def get_metagraph(time):
174
  print(f'Loading metagraph with time {time}')
@@ -188,20 +271,26 @@ def get_metagraph(time):
188
  return df_m
189
 
190
 
191
- @st.cache_data()
192
- def load_run(run_path, keys=KEYS):
193
-
194
  print('Loading run:', run_path)
195
  run = api.run(run_path)
196
- df = pd.DataFrame(list(run.scan_history(keys=keys)))
 
197
  for col in ['updated_at', 'best_loss_at', 'created_at']:
198
  if col in df.columns:
199
  df[col] = pd.to_datetime(df[col])
200
- print(f'+ Loaded {len(df)} records')
201
- return df
 
 
 
 
 
 
 
202
 
203
  @st.cache_data(show_spinner=False)
204
- def build_data(timestamp=None, paths=BASE_PATHS, min_steps=MIN_STEPS, use_cache=True):
205
 
206
  save_path = '_saved_runs.csv'
207
  filters = {}
@@ -272,10 +361,4 @@ def load_state_vars():
272
  }
273
 
274
 
275
- if __name__ == '__main__':
276
-
277
- print('Loading runs')
278
- df = load_runs()
279
 
280
- df.to_csv('test_wandb_data.csv', index=False)
281
- print(df)
 
5
  import streamlit as st
6
  import pandas as pd
7
  import bittensor as bt
8
+ import ast
9
 
10
 
11
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
 
16
  MIN_STEPS = 12 # minimum number of steps in wandb run in order to be worth analyzing
17
  MAX_RUNS = 100#0000
18
  NETUID = 25
19
+ BASE_PATH = 'macrocosmos/folding-validators' # added historical data from otf wandb and current data
20
  NETWORK = 'finney'
21
  KEYS = None
22
  ABBREV_CHARS = 8
 
24
 
25
  PDBS_PER_RUN_STEP = 0.083
26
  AVG_MD_STEPS = 30_000
27
+ BASE_UNITS = 'GB'
28
+ SAVE_PATH = 'current_runs/'
29
+ # Check if the directory exists
30
+ if not os.path.exists(SAVE_PATH):
31
+ # If it doesn't exist, create the directory
32
+ os.makedirs(SAVE_PATH)
33
 
34
  api = wandb.Api(timeout=120, api_key='cdcbe340bb7937d3a289d39632491d12b39231b7')
35
 
 
53
  'run_id': lambda x: x.id,
54
  'user': lambda x: x.user.name[:16],
55
  'username': lambda x: x.user.username[:16],
56
+ # 'created_at': lambda x: pd.Timestamp(x.created_at),
57
+ 'last_event_at': lambda x: pd.to_datetime(x.summary.get('_timestamp'), errors='coerce'),
58
 
59
  'netuid': lambda x: x.config.get('netuid'),
60
  'mock': lambda x: x.config.get('neuron').get('mock'),
61
  'sample_size': lambda x: x.config.get('neuron').get('sample_size'),
62
  'queue_size': lambda x: x.config.get('neuron').get('queue_size'),
63
  'timeout': lambda x: x.config.get('neuron').get('timeout'),
64
+ # 'update_interval': lambda x: x.config.get('neuron').get('update_interval'),
65
  'epoch_length': lambda x: x.config.get('neuron').get('epoch_length'),
66
  'disable_set_weights': lambda x: x.config.get('neuron').get('disable_set_weights'),
67
 
68
  # This stuff is from the last logged event
69
  'num_steps': lambda x: x.summary.get('_step'),
70
+ # 'runtime': lambda x: x.summary.get('_runtime'),
71
+ # 'init_energy': lambda x: x.summary.get('init_energy'),
72
+ # 'best_energy': lambda x: x.summary.get('best_loss'),
73
+ # 'pdb_id': lambda x: x.summary.get('pdb_id'),
74
  'pdb_updates': lambda x: x.summary.get('updated_count'),
75
  'total_returned_sizes': lambda x: get_total_file_sizes(x),
76
  'total_sent_sizes': lambda x: get_total_md_input_sizes(x),
 
80
  'version': lambda x: x.tags[0],
81
  'spec_version': lambda x: x.tags[1],
82
  'vali_hotkey': lambda x: x.tags[2],
83
+
84
  # System metrics
85
  'disk_read': lambda x: x.system_metrics.get('system.disk.in'),
86
  'disk_write': lambda x: x.system_metrics.get('system.disk.out'),
87
+ 'network_sent': lambda x: x.system_metrics.get('system.network.sent'),
88
+ 'network_recv': lambda x: x.system_metrics.get('system.network.recv'),
89
  # Really slow stuff below
90
  # 'started_at': lambda x: x.metadata.get('startedAt'),
91
  # 'disk_used': lambda x: x.metadata.get('disk').get('/').get('used'),
 
143
 
144
 
145
  def get_data_transferred(df, unit='GB'):
146
+
147
+ validator_sent = df.md_inputs_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().sum()
148
+ miner_sent = df.response_returned_files_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().explode().sum()
149
+
150
  return {
151
+ 'validator_sent': convert_unit(validator_sent, from_unit='B', to_unit=BASE_UNITS),
152
+ 'miner_sent': convert_unit(miner_sent, from_unit='B', to_unit=BASE_UNITS),
153
+ }
 
 
 
154
 
155
 
156
  def get_productivity(df):
157
 
158
  # Estimate the number of unique pdbs folded using our heuristic
159
+ unique_folded = len(df.pdb_id.value_counts())
160
+ # Estimate the total number of jobs completed using our heuristic
161
+ completed_jobs = len(df[df.active == False])
 
 
162
 
163
+ total_historical_run_updates = df.active.isna().sum()
164
+ total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
165
+
166
+
167
  return {
168
  'unique_folded': unique_folded,
169
+ 'total_completed_jobs': (completed_jobs + total_historical_completed_jobs).item(),
 
170
  }
171
 
172
  def get_leaderboard(df, ntop=10, entity_choice='identity'):
 
175
  df.index = range(df.shape[0])
176
  return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
177
 
178
+
179
+
180
+ def fetch_new_runs(base_path: str = BASE_PATH , netuid: int = 25, min_steps: int = 10, save_path: str= SAVE_PATH, extractors: dict = EXTRACTORS):
181
+ runs_checker = pd.read_csv('runs_checker.csv')
182
+ current_time = pd.to_datetime(time.time(), unit='s')
183
+ current_time_str = current_time.strftime('%y-%m-%d') # Format as 'YYYYMMDD'
184
+ new_ticker = runs_checker.check_ticker.max() + 1
185
+
186
+ new_rows_list = []
187
+
188
+ # update runs list based on all current runs running
189
+ for run in api.runs(base_path):
190
+ num_steps = run.summary.get('_step')
191
+
192
+ if run.config.get('netuid') != netuid:
193
+ continue
194
+
195
+ if num_steps is None or num_steps < min_steps:
196
+ continue
197
+
198
+ if run.state =='running':
199
+ new_rows_list.append({
200
+ 'run_id': run.id,
201
+ 'state': run.state,
202
+ 'step': num_steps,
203
+ 'check_time': current_time,
204
+ 'check_ticker': new_ticker,
205
+ 'user': run.user.name[:16],
206
+ 'username': run.user.username[:16]
207
+ })
208
+ if new_rows_list:
209
+ new_rows_df = pd.DataFrame(new_rows_list)
210
+ runs_checker= pd.concat([runs_checker, new_rows_df], ignore_index=True)
211
+ # save
212
+ runs_checker.to_csv('runs_checker.csv', index=False)
213
+
214
+ bt.logging.info(f'Cross checking runs for ticker {new_ticker} against previous ticker')
215
+ previous_check = runs_checker[runs_checker.check_ticker==new_ticker - 1]
216
+ current_check = runs_checker[runs_checker.check_ticker == new_ticker]
217
+
218
+ # save ended runs from last check
219
+ for run_id in previous_check.run_id:
220
+ if run_id not in current_check.run_id:
221
+
222
+ frame = load_run(f'{base_path}/{run_id}', extractors=EXTRACTORS)
223
+
224
+ csv_path = os.path.join(save_path, f"{run_id}.csv")
225
+ frame.to_csv(csv_path)
226
+
227
+ # save new runs
228
+ for run in api.runs(base_path):
229
+ if run.config.get('netuid') != netuid:
230
+ continue
231
+ num_steps = run.summary.get('_step')
232
+ if num_steps is None or num_steps < min_steps:
233
+ continue
234
+ if run.state =='running':
235
+ frame = load_run(run_path='/'.join(run.path), extractors=EXTRACTORS)
236
+ csv_path = os.path.join(save_path, f"{run.id}.csv")
237
+ frame.to_csv(csv_path)
238
+
239
+
240
+ def preload_data():
241
+ # save all the paths of files to a list in a directory
242
+ paths_list = []
243
+ for path in os.listdir(SAVE_PATH):
244
+ paths_list.append(os.path.join(SAVE_PATH, path))
245
+
246
+ df_list = []
247
+
248
+ for path in paths_list:
249
+ df = pd.read_csv(path,low_memory=False)
250
+ df_list.append(df)
251
+
252
+ combined_df = pd.concat(df_list, ignore_index=True)
253
+ return combined_df
254
+
255
  @st.cache_data()
256
  def get_metagraph(time):
257
  print(f'Loading metagraph with time {time}')
 
271
  return df_m
272
 
273
 
274
+ def load_run(run_path: str, extractors: dict):
 
 
275
  print('Loading run:', run_path)
276
  run = api.run(run_path)
277
+ df = pd.DataFrame(list(run.scan_history()))
278
+
279
  for col in ['updated_at', 'best_loss_at', 'created_at']:
280
  if col in df.columns:
281
  df[col] = pd.to_datetime(df[col])
282
+ num_rows=len(df)
283
+
284
+ extractor_df = {key: func(run) for key, func in extractors.items()}
285
+ repeated_data = {key: [value] * num_rows for key, value in extractor_df.items()}
286
+ extractor_df = pd.DataFrame(repeated_data)
287
+
288
+ combined_df = pd.concat([df, extractor_df], axis=1)
289
+
290
+ return combined_df
291
 
292
  @st.cache_data(show_spinner=False)
293
+ def build_data(timestamp=None, paths=BASE_PATH, min_steps=MIN_STEPS, use_cache=True):
294
 
295
  save_path = '_saved_runs.csv'
296
  filters = {}
 
361
  }
362
 
363
 
 
 
 
 
364