finetuning_subnet_leaderboard

Runtime error

App Files Files Community

emozilla commited on Mar 4

Commit

0581681

•

1 Parent(s): 53f8f6c

fixes for competitions

Browse files

Files changed (3) hide show

.gitignore +2 -1
app.py +110 -27
requirements.txt +3 -2

.gitignore CHANGED Viewed

@@ -4,4 +4,5 @@ cd/
 lib
 lib64
 pyvenv.cfg
-.env

 lib
 lib64
 pyvenv.cfg
+.env
+.vscode/

app.py CHANGED Viewed

@@ -9,9 +9,12 @@ import math
 import os
 import datetime
 import time
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
 load_dotenv()
@@ -27,9 +30,11 @@ API = HfApi(token=H4_TOKEN)
 REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
 METAGRAPH_RETRIES = 10
 METAGRAPH_DELAY_SECS = 30
 NETUID = 6
 SUBNET_START_BLOCK = 2225782
 SECONDS_PER_BLOCK = 12
 @dataclass
 class Competition:
@@ -39,15 +44,64 @@ class Competition:
 COMPETITIONS = [Competition(id="m1", name="mistral-7b"), Competition(id="g1", name="gemma-2b")]
 DEFAULT_COMPETITION_ID = "m1"
 def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
     for i in range(0, METAGRAPH_RETRIES):
         try:
-            subtensor: bt.subtensor = bt.subtensor("finney")
-            metagraph: bt.metagraph = subtensor.metagraph(6, lite=False)
             return subtensor, metagraph
         except:
             if i == METAGRAPH_RETRIES - 1:
                 raise
             time.sleep(METAGRAPH_DELAY_SECS)
     raise RuntimeError()
@@ -107,9 +161,16 @@ def get_validator_weights(metagraph: bt.metagraph) -> typing.Dict[int, typing.Tu
 def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.List[ModelData]:
     result = []
-    for uid in metagraph.uids.tolist():
         hotkey = metagraph.hotkeys[uid]
-        metadata = get_metadata(subtensor, metagraph.netuid, hotkey)
         if not metadata:
             continue
@@ -132,31 +193,45 @@ def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.
 def floatable(x) -> bool:
     return (isinstance(x, float) and not math.isnan(x) and not math.isinf(x)) or isinstance(x, int)
-def get_float_score(key: str, history) -> typing.Tuple[typing.Optional[float], bool]:
-    if key in history:
         data = list(history[key])
         if len(data) > 0:
-            if floatable(data[-1]):
-                return float(data[-1]), True
-            else:
-                data = [float(x) for x in data if floatable(x)]
-                if len(data) > 0:
-                    return float(data[-1]), False
     return None, False
-def get_sample(uid, history) -> typing.Optional[typing.Tuple[str, str, str]]:
     prompt_key = f"sample_prompt_data.{uid}"
     response_key = f"sample_response_data.{uid}"
     truth_key = f"sample_truth_data.{uid}"
-    if prompt_key in history and response_key in history and truth_key in history:
-        prompt = list(history[prompt_key])[-1]
-        response = list(history[response_key])[-1]
-        truth = list(history[truth_key])[-1]
-        if isinstance(prompt, str) and isinstance(response, str) and isinstance(truth, str):
-            return prompt, response, truth
     return None
-def get_scores(uids: typing.List[int]) -> typing.Dict[int, typing.Dict[str, typing.Optional[float | str]]]:
     api = wandb.Api()
     runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
@@ -166,11 +241,11 @@ def get_scores(uids: typing.List[int]) -> typing.Dict[int, typing.Dict[str, typi
         for uid in uids:
             if uid in result.keys():
                 continue
-            perplexity, perplexity_fresh = get_float_score(f"perplexity_data.{uid}", history)
-            win_rate, win_rate_fresh = get_float_score(f"win_rate_data.{uid}", history)
-            win_total, win_total_fresh = get_float_score(f"win_total_data.{uid}", history)
-            weight, weight_fresh = get_float_score(f"weight_data.{uid}", history)
-            sample = get_sample(uid, history)
             result[uid] = {
                 "perplexity": perplexity,
                 "win_rate": win_rate,
@@ -205,7 +280,7 @@ leaderboard_df = get_subnet_data(subtensor, metagraph)
 leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
 competition_scores = {
-    y.id: get_scores([x.uid for x in leaderboard_df if x.competition == y.id])
     for y in COMPETITIONS
 }
@@ -255,9 +330,17 @@ with demo:
         for competition in COMPETITIONS:
             with gr.Tab(competition.name):
                 scores = competition_scores[competition.id]
                 gr.Label(
-                    value={ f"{c.namespace}/{c.name} ({c.commit[0:8]}, UID={c.uid}) · ${round(c.emission * tao_price, 2):,} (τ{round(c.emission, 2):,})": c.incentive for c in leaderboard_df if c.incentive and c.competition == competition.id},
                     num_top_classes=10,
                 )

 import os
 import datetime
 import time
+import functools
+import multiprocessing
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
+from tqdm import tqdm
 load_dotenv()
 REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
 METAGRAPH_RETRIES = 10
 METAGRAPH_DELAY_SECS = 30
+METADATA_TTL = 10
 NETUID = 6
 SUBNET_START_BLOCK = 2225782
 SECONDS_PER_BLOCK = 12
+SUBTENSOR = os.environ.get("SUBTENSOR", "finney")
 @dataclass
 class Competition:
 COMPETITIONS = [Competition(id="m1", name="mistral-7b"), Competition(id="g1", name="gemma-2b")]
 DEFAULT_COMPETITION_ID = "m1"
+def run_in_subprocess(func: functools.partial, ttl: int) -> typing.Any:
+    """Runs the provided function on a subprocess with 'ttl' seconds to complete.
+    Args:
+        func (functools.partial): Function to be run.
+        ttl (int): How long to try for in seconds.
+    Returns:
+        Any: The value returned by 'func'
+    """
+    def wrapped_func(func: functools.partial, queue: multiprocessing.Queue):
+        try:
+            result = func()
+            queue.put(result)
+        except (Exception, BaseException) as e:
+            # Catch exceptions here to add them to the queue.
+            queue.put(e)
+    # Use "fork" (the default on all POSIX except macOS), because pickling doesn't seem
+    # to work on "spawn".
+    ctx = multiprocessing.get_context("fork")
+    queue = ctx.Queue()
+    process = ctx.Process(target=wrapped_func, args=[func, queue])
+    process.start()
+    process.join(timeout=ttl)
+    if process.is_alive():
+        process.terminate()
+        process.join()
+        raise TimeoutError(f"Failed to {func.func.__name__} after {ttl} seconds")
+    # Raises an error if the queue is empty. This is fine. It means our subprocess timed out.
+    result = queue.get(block=False)
+    # If we put an exception on the queue then raise instead of returning.
+    if isinstance(result, Exception):
+        raise result
+    if isinstance(result, BaseException):
+        raise Exception(f"BaseException raised in subprocess: {str(result)}")
+    return result
 def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
     for i in range(0, METAGRAPH_RETRIES):
         try:
+            print("Connecting to subtensor...")
+            subtensor: bt.subtensor = bt.subtensor(SUBTENSOR)
+            print("Pulling metagraph...")
+            metagraph: bt.metagraph = subtensor.metagraph(NETUID, lite=False)
             return subtensor, metagraph
         except:
             if i == METAGRAPH_RETRIES - 1:
                 raise
+            print(f"Error connecting to subtensor or pulling metagraph, retry {i + 1} of {METAGRAPH_RETRIES} in {METAGRAPH_DELAY_SECS} seconds...")
             time.sleep(METAGRAPH_DELAY_SECS)
     raise RuntimeError()
 def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.List[ModelData]:
     result = []
+    for uid in tqdm(metagraph.uids.tolist(), desc="Metadata for hotkeys"):
         hotkey = metagraph.hotkeys[uid]
+        try:
+            # Wrap calls to the subtensor in a subprocess with a timeout to handle potential hangs.
+            partial = functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
+            metadata = run_in_subprocess(partial, METADATA_TTL)
+        except KeyboardInterrupt:
+            raise
+        except:
+            metadata = None
         if not metadata:
             continue
 def floatable(x) -> bool:
     return (isinstance(x, float) and not math.isnan(x) and not math.isinf(x)) or isinstance(x, int)
+def get_float_score(key: str, history, competition_id: str) -> typing.Tuple[typing.Optional[float], bool]:
+    if key in history and "competition_id" in history:
         data = list(history[key])
         if len(data) > 0:
+            competitions = list(history["competition_id"])
+            while True:
+                if competitions.pop() != competition_id:
+                    data.pop()
+                    continue
+                if floatable(data[-1]):
+                    return float(data[-1]), True
+                else:
+                    data = [float(x) for x in data if floatable(x)]
+                    if len(data) > 0:
+                        return float(data[-1]), False
+                break
     return None, False
+def get_sample(uid, history, competition_id: str) -> typing.Optional[typing.Tuple[str, str, str]]:
     prompt_key = f"sample_prompt_data.{uid}"
     response_key = f"sample_response_data.{uid}"
     truth_key = f"sample_truth_data.{uid}"
+    if prompt_key in history and response_key in history and truth_key in history and "competition_id" in history:
+        competitions = list(history["competition_id"])
+        prompts = list(history[prompt_key])
+        responses = list(history[response_key])
+        truths = list(history[truth_key])
+        while True:
+            prompt = prompts.pop()
+            response = responses.pop()
+            truth = truths.pop()
+            if competitions.pop() != competition_id:
+                continue
+            if isinstance(prompt, str) and isinstance(response, str) and isinstance(truth, str):
+                return prompt, response, truth
+            break
     return None
+def get_scores(uids: typing.List[int], competition_id: str) -> typing.Dict[int, typing.Dict[str, typing.Optional[float | str]]]:
     api = wandb.Api()
     runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
         for uid in uids:
             if uid in result.keys():
                 continue
+            perplexity, perplexity_fresh = get_float_score(f"perplexity_data.{uid}", history, competition_id)
+            win_rate, win_rate_fresh = get_float_score(f"win_rate_data.{uid}", history, competition_id)
+            win_total, win_total_fresh = get_float_score(f"win_total_data.{uid}", history, competition_id)
+            weight, weight_fresh = get_float_score(f"weight_data.{uid}", history, competition_id)
+            sample = get_sample(uid, history, competition_id)
             result[uid] = {
                 "perplexity": perplexity,
                 "win_rate": win_rate,
 leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
 competition_scores = {
+    y.id: get_scores([x.uid for x in leaderboard_df if x.competition == y.id], y.id)
     for y in COMPETITIONS
 }
         for competition in COMPETITIONS:
             with gr.Tab(competition.name):
                 scores = competition_scores[competition.id]
+                print(scores)
+                class_denominator = sum(leaderboard_df[i].incentive for i in range(0, 10) if leaderboard_df[i].incentive and leaderboard_df[i].competition == competition.id)
+                class_values = {
+                    f"{leaderboard_df[i].namespace}/{leaderboard_df[i].name} ({leaderboard_df[i].commit[0:8]}, UID={leaderboard_df[i].uid}) · ${round(leaderboard_df[i].emission * tao_price, 2):,} (τ{round(leaderboard_df[i].emission, 2):,})": \
+                        leaderboard_df[i].incentive / class_denominator for i in range(0, 10) if leaderboard_df[i].incentive and leaderboard_df[i].competition == competition.id
+                }
                 gr.Label(
+                    value=class_values,
                     num_top_classes=10,
                 )

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
-bittensor==6.7.0
 requests==2.31.0
 wandb==0.16.2
 python-dotenv==1.0.1
 APScheduler==3.10.1
-huggingface-hub>=0.18.0

+bittensor==6.8.2
 requests==2.31.0
 wandb==0.16.2
 python-dotenv==1.0.1
 APScheduler==3.10.1
+huggingface-hub>=0.18.0
+tqdm==4.66.2