Zhiyu Wu commited on
Commit
ff851b0
1 Parent(s): b19efcb

new count items (#24)

Browse files
Files changed (1) hide show
  1. scripts/count_benchmark_items.py +43 -25
scripts/count_benchmark_items.py CHANGED
@@ -1,38 +1,56 @@
1
  import os
 
2
  import json
3
-
4
  import tyro
 
5
 
 
 
6
 
7
- def main(data_dir: str, depth: int = 1) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  """Summarize the results collected for all models in the given directory.
9
 
10
  Args:
11
  data_dir: The directory containing the results.
12
- depth: The depth of the directory tree to search. When it's 1, the
13
- script expects to fine model directories directly under `data_dir`.
14
- (Default: 1)
15
  """
16
- if depth < 1:
17
- raise ValueError("depth must be >= 1")
18
-
19
- if depth == 1:
20
- model_names = os.listdir(data_dir)
21
- print(len(model_names), "models found in", data_dir)
22
-
23
- for i, model_name in enumerate(model_names):
24
- if not os.path.isdir(f"{data_dir}/{model_name}"):
25
- continue
26
- try:
27
- benchmark = json.load(open(f"{data_dir}/{model_name}/benchmark.json"))
28
- print(f"{i:2d} {len(benchmark):5d} results found for", model_name)
29
- except json.JSONDecodeError:
30
- print(f"{i:2d} [ERR] results found for {model_name}")
31
-
32
- else:
33
- for dir in os.listdir(data_dir):
34
- main(f"{data_dir}/{dir}", depth - 1)
35
-
36
 
37
  if __name__ == "__main__":
38
  tyro.cli(main)
 
1
  import os
2
+ import re
3
  import json
 
4
  import tyro
5
+ from collections import namedtuple
6
 
7
+ ModelBatch = namedtuple('ModelStruct', ['task','model_name', 'batch'])
8
+ model_state = {}
9
 
10
+ def load_and_print_length(task: str, root_dir: str) -> None:
11
+ pattern = re.compile(r'benchmark_batch_(\d+).json')
12
+
13
+ for subdir, _, files in os.walk(root_dir):
14
+ for file in files:
15
+ match = pattern.match(file)
16
+ if match:
17
+ file_path = os.path.join(subdir, file)
18
+ batch = int(match.group(1))
19
+ print(f"Model: {os.path.basename(subdir)}, Batch size: {batch}")
20
+ model_batch = ModelBatch(task=task, model_name=os.path.basename(subdir), batch=batch)
21
+ try:
22
+ with open(file_path, 'r') as f:
23
+ data = json.load(f)
24
+ print(f"Length: {len(data)}")
25
+ if len(data) >= 2000 / batch:
26
+ model_state[model_batch] = True
27
+ else:
28
+ model_state[model_batch] = False
29
+ except json.JSONDecodeError:
30
+ print(f"[ERR] results found ")
31
+ model_state[model_batch] = False
32
+ print("------")
33
+
34
+ def main(data_dir: str) -> None:
35
  """Summarize the results collected for all models in the given directory.
36
 
37
  Args:
38
  data_dir: The directory containing the results.
 
 
 
39
  """
40
+ root_dir = ['chat', 'chat-concise', 'instruct', 'instruct-concise']
41
+ for dir in root_dir:
42
+ print(dir)
43
+ load_and_print_length(dir, f"{data_dir}/{dir}")
44
+
45
+ print("complete instance:")
46
+ for info, stat in model_state.items():
47
+ if stat is True:
48
+ print(info)
49
+ print("------")
50
+ print("incomplete instance:")
51
+ for info, stat in model_state.items():
52
+ if stat is not True:
53
+ print(info)
 
 
 
 
 
 
54
 
55
  if __name__ == "__main__":
56
  tyro.cli(main)