Nathan Habib commited on
Commit
d6b3d82
1 Parent(s): 9d5015b

only display the scores for the latest result file

Browse files
src/auto_leaderboard/load_results.py CHANGED
@@ -4,6 +4,7 @@ import glob
4
  import json
5
  import os
6
  from typing import Dict, List, Tuple
 
7
 
8
  from src.utils_display import AutoEvalColumn, make_clickable_model
9
  import numpy as np
@@ -103,16 +104,30 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
103
 
104
 
105
  def get_eval_results(is_public) -> List[EvalResult]:
106
- json_filepaths = glob.glob(
107
- "eval-results/**/results*.json", recursive=True
108
- )
109
- if not is_public:
110
- json_filepaths += glob.glob(
111
- "private-eval-results/**/results*.json", recursive=True
112
- )
113
 
114
- eval_results = {}
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
116
  for json_filepath in json_filepaths:
117
  result_key, results = parse_eval_result(json_filepath)
118
  for eval_result in results:
 
4
  import json
5
  import os
6
  from typing import Dict, List, Tuple
7
+ import dateutil
8
 
9
  from src.utils_display import AutoEvalColumn, make_clickable_model
10
  import numpy as np
 
104
 
105
 
106
  def get_eval_results(is_public) -> List[EvalResult]:
107
+ json_filepaths = []
 
 
 
 
 
 
108
 
109
+ for root, dir, files in os.walk("eval-results"):
110
+ # We should only have json files in model results
111
+ if len(files) == 0 or any([not f.endswith(".json") for f in files]):
112
+ continue
113
+
114
+ # Sort the files by date
115
+ try:
116
+ files.sort(key=lambda x: dateutil.parser.parse(x.split("_", 1)[-1][:-5]))
117
+ except dateutil.parser._parser.ParserError:
118
+ up_to_date = files[-1]
119
 
120
+ up_to_date = files[-1]
121
+
122
+ if len(files) > 1:
123
+ print(root)
124
+ print(files)
125
+ print(up_to_date)
126
+ print("===")
127
+
128
+ json_filepaths.append(os.path.join(root, up_to_date))
129
+
130
+ eval_results = {}
131
  for json_filepath in json_filepaths:
132
  result_key, results = parse_eval_result(json_filepath)
133
  for eval_result in results: