Spaces:
Runtime error
Runtime error
fixes for competitions
Browse files- .gitignore +2 -1
- app.py +110 -27
- requirements.txt +3 -2
.gitignore
CHANGED
@@ -4,4 +4,5 @@ cd/
|
|
4 |
lib
|
5 |
lib64
|
6 |
pyvenv.cfg
|
7 |
-
.env
|
|
|
|
4 |
lib
|
5 |
lib64
|
6 |
pyvenv.cfg
|
7 |
+
.env
|
8 |
+
.vscode/
|
app.py
CHANGED
@@ -9,9 +9,12 @@ import math
|
|
9 |
import os
|
10 |
import datetime
|
11 |
import time
|
|
|
|
|
12 |
from dotenv import load_dotenv
|
13 |
from huggingface_hub import HfApi
|
14 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
15 |
|
16 |
load_dotenv()
|
17 |
|
@@ -27,9 +30,11 @@ API = HfApi(token=H4_TOKEN)
|
|
27 |
REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
|
28 |
METAGRAPH_RETRIES = 10
|
29 |
METAGRAPH_DELAY_SECS = 30
|
|
|
30 |
NETUID = 6
|
31 |
SUBNET_START_BLOCK = 2225782
|
32 |
SECONDS_PER_BLOCK = 12
|
|
|
33 |
|
34 |
@dataclass
|
35 |
class Competition:
|
@@ -39,15 +44,64 @@ class Competition:
|
|
39 |
COMPETITIONS = [Competition(id="m1", name="mistral-7b"), Competition(id="g1", name="gemma-2b")]
|
40 |
DEFAULT_COMPETITION_ID = "m1"
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
|
43 |
for i in range(0, METAGRAPH_RETRIES):
|
44 |
try:
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
return subtensor, metagraph
|
48 |
except:
|
49 |
if i == METAGRAPH_RETRIES - 1:
|
50 |
raise
|
|
|
51 |
time.sleep(METAGRAPH_DELAY_SECS)
|
52 |
raise RuntimeError()
|
53 |
|
@@ -107,9 +161,16 @@ def get_validator_weights(metagraph: bt.metagraph) -> typing.Dict[int, typing.Tu
|
|
107 |
|
108 |
def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.List[ModelData]:
|
109 |
result = []
|
110 |
-
for uid in metagraph.uids.tolist():
|
111 |
hotkey = metagraph.hotkeys[uid]
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
if not metadata:
|
114 |
continue
|
115 |
|
@@ -132,31 +193,45 @@ def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.
|
|
132 |
def floatable(x) -> bool:
|
133 |
return (isinstance(x, float) and not math.isnan(x) and not math.isinf(x)) or isinstance(x, int)
|
134 |
|
135 |
-
def get_float_score(key: str, history) -> typing.Tuple[typing.Optional[float], bool]:
|
136 |
-
if key in history:
|
137 |
data = list(history[key])
|
138 |
if len(data) > 0:
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
return None, False
|
146 |
|
147 |
-
def get_sample(uid, history) -> typing.Optional[typing.Tuple[str, str, str]]:
|
148 |
prompt_key = f"sample_prompt_data.{uid}"
|
149 |
response_key = f"sample_response_data.{uid}"
|
150 |
truth_key = f"sample_truth_data.{uid}"
|
151 |
-
if prompt_key in history and response_key in history and truth_key in history:
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
return None
|
158 |
|
159 |
-
def get_scores(uids: typing.List[int]) -> typing.Dict[int, typing.Dict[str, typing.Optional[float | str]]]:
|
160 |
api = wandb.Api()
|
161 |
runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
|
162 |
|
@@ -166,11 +241,11 @@ def get_scores(uids: typing.List[int]) -> typing.Dict[int, typing.Dict[str, typi
|
|
166 |
for uid in uids:
|
167 |
if uid in result.keys():
|
168 |
continue
|
169 |
-
perplexity, perplexity_fresh = get_float_score(f"perplexity_data.{uid}", history)
|
170 |
-
win_rate, win_rate_fresh = get_float_score(f"win_rate_data.{uid}", history)
|
171 |
-
win_total, win_total_fresh = get_float_score(f"win_total_data.{uid}", history)
|
172 |
-
weight, weight_fresh = get_float_score(f"weight_data.{uid}", history)
|
173 |
-
sample = get_sample(uid, history)
|
174 |
result[uid] = {
|
175 |
"perplexity": perplexity,
|
176 |
"win_rate": win_rate,
|
@@ -205,7 +280,7 @@ leaderboard_df = get_subnet_data(subtensor, metagraph)
|
|
205 |
leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
|
206 |
|
207 |
competition_scores = {
|
208 |
-
y.id: get_scores([x.uid for x in leaderboard_df if x.competition == y.id])
|
209 |
for y in COMPETITIONS
|
210 |
}
|
211 |
|
@@ -255,9 +330,17 @@ with demo:
|
|
255 |
for competition in COMPETITIONS:
|
256 |
with gr.Tab(competition.name):
|
257 |
scores = competition_scores[competition.id]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
gr.Label(
|
260 |
-
value=
|
261 |
num_top_classes=10,
|
262 |
)
|
263 |
|
|
|
9 |
import os
|
10 |
import datetime
|
11 |
import time
|
12 |
+
import functools
|
13 |
+
import multiprocessing
|
14 |
from dotenv import load_dotenv
|
15 |
from huggingface_hub import HfApi
|
16 |
from apscheduler.schedulers.background import BackgroundScheduler
|
17 |
+
from tqdm import tqdm
|
18 |
|
19 |
load_dotenv()
|
20 |
|
|
|
30 |
REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
|
31 |
METAGRAPH_RETRIES = 10
|
32 |
METAGRAPH_DELAY_SECS = 30
|
33 |
+
METADATA_TTL = 10
|
34 |
NETUID = 6
|
35 |
SUBNET_START_BLOCK = 2225782
|
36 |
SECONDS_PER_BLOCK = 12
|
37 |
+
SUBTENSOR = os.environ.get("SUBTENSOR", "finney")
|
38 |
|
39 |
@dataclass
|
40 |
class Competition:
|
|
|
44 |
COMPETITIONS = [Competition(id="m1", name="mistral-7b"), Competition(id="g1", name="gemma-2b")]
|
45 |
DEFAULT_COMPETITION_ID = "m1"
|
46 |
|
47 |
+
def run_in_subprocess(func: functools.partial, ttl: int) -> typing.Any:
|
48 |
+
"""Runs the provided function on a subprocess with 'ttl' seconds to complete.
|
49 |
+
|
50 |
+
Args:
|
51 |
+
func (functools.partial): Function to be run.
|
52 |
+
ttl (int): How long to try for in seconds.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
Any: The value returned by 'func'
|
56 |
+
"""
|
57 |
+
|
58 |
+
def wrapped_func(func: functools.partial, queue: multiprocessing.Queue):
|
59 |
+
try:
|
60 |
+
result = func()
|
61 |
+
queue.put(result)
|
62 |
+
except (Exception, BaseException) as e:
|
63 |
+
# Catch exceptions here to add them to the queue.
|
64 |
+
queue.put(e)
|
65 |
+
|
66 |
+
# Use "fork" (the default on all POSIX except macOS), because pickling doesn't seem
|
67 |
+
# to work on "spawn".
|
68 |
+
ctx = multiprocessing.get_context("fork")
|
69 |
+
queue = ctx.Queue()
|
70 |
+
process = ctx.Process(target=wrapped_func, args=[func, queue])
|
71 |
+
|
72 |
+
process.start()
|
73 |
+
|
74 |
+
process.join(timeout=ttl)
|
75 |
+
|
76 |
+
if process.is_alive():
|
77 |
+
process.terminate()
|
78 |
+
process.join()
|
79 |
+
raise TimeoutError(f"Failed to {func.func.__name__} after {ttl} seconds")
|
80 |
+
|
81 |
+
# Raises an error if the queue is empty. This is fine. It means our subprocess timed out.
|
82 |
+
result = queue.get(block=False)
|
83 |
+
|
84 |
+
# If we put an exception on the queue then raise instead of returning.
|
85 |
+
if isinstance(result, Exception):
|
86 |
+
raise result
|
87 |
+
if isinstance(result, BaseException):
|
88 |
+
raise Exception(f"BaseException raised in subprocess: {str(result)}")
|
89 |
+
|
90 |
+
return result
|
91 |
+
|
92 |
+
|
93 |
def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
|
94 |
for i in range(0, METAGRAPH_RETRIES):
|
95 |
try:
|
96 |
+
print("Connecting to subtensor...")
|
97 |
+
subtensor: bt.subtensor = bt.subtensor(SUBTENSOR)
|
98 |
+
print("Pulling metagraph...")
|
99 |
+
metagraph: bt.metagraph = subtensor.metagraph(NETUID, lite=False)
|
100 |
return subtensor, metagraph
|
101 |
except:
|
102 |
if i == METAGRAPH_RETRIES - 1:
|
103 |
raise
|
104 |
+
print(f"Error connecting to subtensor or pulling metagraph, retry {i + 1} of {METAGRAPH_RETRIES} in {METAGRAPH_DELAY_SECS} seconds...")
|
105 |
time.sleep(METAGRAPH_DELAY_SECS)
|
106 |
raise RuntimeError()
|
107 |
|
|
|
161 |
|
162 |
def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.List[ModelData]:
|
163 |
result = []
|
164 |
+
for uid in tqdm(metagraph.uids.tolist(), desc="Metadata for hotkeys"):
|
165 |
hotkey = metagraph.hotkeys[uid]
|
166 |
+
try:
|
167 |
+
# Wrap calls to the subtensor in a subprocess with a timeout to handle potential hangs.
|
168 |
+
partial = functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
|
169 |
+
metadata = run_in_subprocess(partial, METADATA_TTL)
|
170 |
+
except KeyboardInterrupt:
|
171 |
+
raise
|
172 |
+
except:
|
173 |
+
metadata = None
|
174 |
if not metadata:
|
175 |
continue
|
176 |
|
|
|
193 |
def floatable(x) -> bool:
|
194 |
return (isinstance(x, float) and not math.isnan(x) and not math.isinf(x)) or isinstance(x, int)
|
195 |
|
196 |
+
def get_float_score(key: str, history, competition_id: str) -> typing.Tuple[typing.Optional[float], bool]:
|
197 |
+
if key in history and "competition_id" in history:
|
198 |
data = list(history[key])
|
199 |
if len(data) > 0:
|
200 |
+
competitions = list(history["competition_id"])
|
201 |
+
while True:
|
202 |
+
if competitions.pop() != competition_id:
|
203 |
+
data.pop()
|
204 |
+
continue
|
205 |
+
if floatable(data[-1]):
|
206 |
+
return float(data[-1]), True
|
207 |
+
else:
|
208 |
+
data = [float(x) for x in data if floatable(x)]
|
209 |
+
if len(data) > 0:
|
210 |
+
return float(data[-1]), False
|
211 |
+
break
|
212 |
return None, False
|
213 |
|
214 |
+
def get_sample(uid, history, competition_id: str) -> typing.Optional[typing.Tuple[str, str, str]]:
|
215 |
prompt_key = f"sample_prompt_data.{uid}"
|
216 |
response_key = f"sample_response_data.{uid}"
|
217 |
truth_key = f"sample_truth_data.{uid}"
|
218 |
+
if prompt_key in history and response_key in history and truth_key in history and "competition_id" in history:
|
219 |
+
competitions = list(history["competition_id"])
|
220 |
+
prompts = list(history[prompt_key])
|
221 |
+
responses = list(history[response_key])
|
222 |
+
truths = list(history[truth_key])
|
223 |
+
while True:
|
224 |
+
prompt = prompts.pop()
|
225 |
+
response = responses.pop()
|
226 |
+
truth = truths.pop()
|
227 |
+
if competitions.pop() != competition_id:
|
228 |
+
continue
|
229 |
+
if isinstance(prompt, str) and isinstance(response, str) and isinstance(truth, str):
|
230 |
+
return prompt, response, truth
|
231 |
+
break
|
232 |
return None
|
233 |
|
234 |
+
def get_scores(uids: typing.List[int], competition_id: str) -> typing.Dict[int, typing.Dict[str, typing.Optional[float | str]]]:
|
235 |
api = wandb.Api()
|
236 |
runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
|
237 |
|
|
|
241 |
for uid in uids:
|
242 |
if uid in result.keys():
|
243 |
continue
|
244 |
+
perplexity, perplexity_fresh = get_float_score(f"perplexity_data.{uid}", history, competition_id)
|
245 |
+
win_rate, win_rate_fresh = get_float_score(f"win_rate_data.{uid}", history, competition_id)
|
246 |
+
win_total, win_total_fresh = get_float_score(f"win_total_data.{uid}", history, competition_id)
|
247 |
+
weight, weight_fresh = get_float_score(f"weight_data.{uid}", history, competition_id)
|
248 |
+
sample = get_sample(uid, history, competition_id)
|
249 |
result[uid] = {
|
250 |
"perplexity": perplexity,
|
251 |
"win_rate": win_rate,
|
|
|
280 |
leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
|
281 |
|
282 |
competition_scores = {
|
283 |
+
y.id: get_scores([x.uid for x in leaderboard_df if x.competition == y.id], y.id)
|
284 |
for y in COMPETITIONS
|
285 |
}
|
286 |
|
|
|
330 |
for competition in COMPETITIONS:
|
331 |
with gr.Tab(competition.name):
|
332 |
scores = competition_scores[competition.id]
|
333 |
+
print(scores)
|
334 |
+
|
335 |
+
class_denominator = sum(leaderboard_df[i].incentive for i in range(0, 10) if leaderboard_df[i].incentive and leaderboard_df[i].competition == competition.id)
|
336 |
+
|
337 |
+
class_values = {
|
338 |
+
f"{leaderboard_df[i].namespace}/{leaderboard_df[i].name} ({leaderboard_df[i].commit[0:8]}, UID={leaderboard_df[i].uid}) · ${round(leaderboard_df[i].emission * tao_price, 2):,} (τ{round(leaderboard_df[i].emission, 2):,})": \
|
339 |
+
leaderboard_df[i].incentive / class_denominator for i in range(0, 10) if leaderboard_df[i].incentive and leaderboard_df[i].competition == competition.id
|
340 |
+
}
|
341 |
|
342 |
gr.Label(
|
343 |
+
value=class_values,
|
344 |
num_top_classes=10,
|
345 |
)
|
346 |
|
requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
-
bittensor==6.
|
2 |
requests==2.31.0
|
3 |
wandb==0.16.2
|
4 |
python-dotenv==1.0.1
|
5 |
APScheduler==3.10.1
|
6 |
-
huggingface-hub>=0.18.0
|
|
|
|
1 |
+
bittensor==6.8.2
|
2 |
requests==2.31.0
|
3 |
wandb==0.16.2
|
4 |
python-dotenv==1.0.1
|
5 |
APScheduler==3.10.1
|
6 |
+
huggingface-hub>=0.18.0
|
7 |
+
tqdm==4.66.2
|