Spaces:

ml-energy
/

leaderboard

Running

App Files Files Community

Zhiyu Wu commited on Sep 4, 2023

Commit

ff851b0

•

1 Parent(s): b19efcb

new count items (#24)

Browse files

Files changed (1) hide show

scripts/count_benchmark_items.py +43 -25

scripts/count_benchmark_items.py CHANGED Viewed

@@ -1,38 +1,56 @@
 import os
 import json
 import tyro
-def main(data_dir: str, depth: int = 1) -> None:
     """Summarize the results collected for all models in the given directory.
     Args:
         data_dir: The directory containing the results.
-        depth: The depth of the directory tree to search. When it's 1, the
-            script expects to fine model directories directly under `data_dir`.
-            (Default: 1)
     """
-    if depth < 1:
-        raise ValueError("depth must be >= 1")
-    if depth == 1:
-        model_names = os.listdir(data_dir)
-        print(len(model_names), "models found in", data_dir)
-        for i, model_name in enumerate(model_names):
-            if not os.path.isdir(f"{data_dir}/{model_name}"):
-                continue
-            try:
-                benchmark = json.load(open(f"{data_dir}/{model_name}/benchmark.json"))
-                print(f"{i:2d} {len(benchmark):5d} results found for", model_name)
-            except json.JSONDecodeError:
-                print(f"{i:2d} [ERR] results found for {model_name}")
-    else:
-        for dir in os.listdir(data_dir):
-            main(f"{data_dir}/{dir}", depth - 1)
 if __name__ == "__main__":
     tyro.cli(main)

 import os
+import re
 import json
 import tyro
+from collections import namedtuple
+ModelBatch = namedtuple('ModelStruct', ['task','model_name', 'batch'])
+model_state = {}
+def load_and_print_length(task: str, root_dir: str) -> None:
+    pattern = re.compile(r'benchmark_batch_(\d+).json')
+    for subdir, _, files in os.walk(root_dir):
+        for file in files:
+            match = pattern.match(file)
+            if match:
+                file_path = os.path.join(subdir, file)
+                batch = int(match.group(1))
+                print(f"Model: {os.path.basename(subdir)}, Batch size: {batch}")
+                model_batch = ModelBatch(task=task, model_name=os.path.basename(subdir), batch=batch)
+                try:
+                    with open(file_path, 'r') as f:
+                        data = json.load(f)
+                    print(f"Length: {len(data)}")
+                    if len(data) >= 2000 / batch:
+                        model_state[model_batch] = True
+                    else:
+                        model_state[model_batch] = False
+                except json.JSONDecodeError:
+                    print(f"[ERR] results found ")
+                    model_state[model_batch] = False
+                print("------")
+def main(data_dir: str) -> None:
     """Summarize the results collected for all models in the given directory.
     Args:
         data_dir: The directory containing the results.
     """
+    root_dir = ['chat', 'chat-concise', 'instruct', 'instruct-concise']
+    for dir in root_dir:
+        print(dir)
+        load_and_print_length(dir, f"{data_dir}/{dir}")
+    print("complete instance:")
+    for info, stat in model_state.items():
+        if stat is True:
+            print(info)
+    print("------")
+    print("incomplete instance:")
+    for info, stat in model_state.items():
+        if stat is not True:
+            print(info)
 if __name__ == "__main__":
     tyro.cli(main)