hysts HF Staff commited on
Commit
257b821
·
1 Parent(s): f4d3c9c

Revert "Add parquet file support"

Browse files

This reverts commit 67542c97b7478bd462058888384d612a46cd139d.

Files changed (3) hide show
  1. app.py +1 -5
  2. src/leaderboard/read_evals.py +33 -63
  3. src/populate.py +1 -0
app.py CHANGED
@@ -90,11 +90,7 @@ except Exception:
90
  FAILED_EVAL_QUEUE_DF,
91
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
92
 
93
- # ORIGINAL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
94
- # Get dataframes
95
- results_path = "eval-results/leaderboard.parquet"
96
-
97
- ORIGINAL_DF = get_leaderboard_df(results_path, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
98
  MAX_MODEL_SIZE = ORIGINAL_DF["#Params (B)"].max()
99
 
100
 
 
90
  FAILED_EVAL_QUEUE_DF,
91
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
92
 
93
+ ORIGINAL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
 
 
 
94
  MAX_MODEL_SIZE = ORIGINAL_DF["#Params (B)"].max()
95
 
96
 
src/leaderboard/read_evals.py CHANGED
@@ -5,7 +5,6 @@ from dataclasses import dataclass
5
  from decimal import Decimal
6
 
7
  import dateutil
8
- import pandas as pd
9
 
10
  from src.display.formatting import make_clickable_model
11
  from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
@@ -39,12 +38,9 @@ class EvalResult:
39
 
40
  @classmethod
41
  def init_from_json_file(self, json_filepath):
42
- """Inits the result from the specific model result file or dict"""
43
- if isinstance(json_filepath, dict):
44
- data = json_filepath
45
- else:
46
- with open(json_filepath) as fp:
47
- data = json.load(fp)
48
 
49
  config = data.get("config")
50
  metainfo = config.get("metainfo", {})
@@ -194,63 +190,35 @@ def get_request_file_for_model(requests_path, model_name, precision):
194
 
195
 
196
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
197
- """From the path of the results folder root or parquet file, extract all needed info for results"""
198
- eval_results = {}
199
 
200
- if results_path.endswith(".parquet"):
201
- df = pd.read_parquet(results_path)
202
- for _, row in df.iterrows():
203
- data = {
204
- "scores": {
205
- col.replace("scores.", ""): str(row[col]) for col in df.columns if col.startswith("scores.")
206
- },
207
- "config": {
208
- "model_name": row.get("config.model.pretrained_model_name_or_path"),
209
- "model": {
210
- "dtype": row.get("config.model.dtype"),
211
- "revision": row.get("config.model.revision"),
212
- "_target_": row.get("config.model._target_"),
213
- },
214
- "metainfo": {
215
- "num_few_shots": row.get("config.metainfo.num_few_shots"),
216
- "version": row.get("config.metainfo.version"),
217
- },
218
- "pipeline_kwargs": {"add_special_tokens": row.get("config.pipeline_kwargs.add_special_tokens")},
219
- },
220
- }
221
- eval_result = EvalResult.init_from_json_file(data)
222
- eval_result.update_with_request_file(requests_path)
223
-
224
- eval_name = eval_result.eval_name
225
- if eval_name in eval_results:
226
- eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
227
- else:
228
- eval_results[eval_name] = eval_result
229
- else:
230
- # JSON
231
- model_result_filepaths = []
232
- for root, _, files in os.walk(results_path):
233
- if len(files) == 0 or any([not f.endswith(".json") for f in files]):
234
- continue
235
-
236
- try:
237
- files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
238
- except dateutil.parser._parser.ParserError:
239
- files = [files[-1]]
240
-
241
- for file in files:
242
- model_result_filepaths.append(os.path.join(root, file))
243
-
244
- eval_results = {}
245
- for model_result_filepath in model_result_filepaths:
246
- eval_result = EvalResult.init_from_json_file(model_result_filepath)
247
- eval_result.update_with_request_file(requests_path)
248
-
249
- eval_name = eval_result.eval_name
250
- if eval_name in eval_results.keys():
251
- eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
252
- else:
253
- eval_results[eval_name] = eval_result
254
 
255
  results = []
256
  for v in eval_results.values():
@@ -259,5 +227,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
259
  results.append(v)
260
  except KeyError: # not all eval values present
261
  continue
 
 
262
 
263
  return results
 
5
  from decimal import Decimal
6
 
7
  import dateutil
 
8
 
9
  from src.display.formatting import make_clickable_model
10
  from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
 
38
 
39
  @classmethod
40
  def init_from_json_file(self, json_filepath):
41
+ """Inits the result from the specific model result file"""
42
+ with open(json_filepath) as fp:
43
+ data = json.load(fp)
 
 
 
44
 
45
  config = data.get("config")
46
  metainfo = config.get("metainfo", {})
 
190
 
191
 
192
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
193
+ """From the path of the results folder root, extract all needed info for results"""
194
+ model_result_filepaths = []
195
 
196
+ for root, _, files in os.walk(results_path):
197
+ # We should only have json files in model results
198
+ if len(files) == 0 or any([not f.endswith(".json") for f in files]):
199
+ continue
200
+
201
+ # Sort the files by date
202
+ try:
203
+ files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
204
+ except dateutil.parser._parser.ParserError:
205
+ files = [files[-1]]
206
+
207
+ for file in files:
208
+ model_result_filepaths.append(os.path.join(root, file))
209
+
210
+ eval_results = {}
211
+ for model_result_filepath in model_result_filepaths:
212
+ # Creation of result
213
+ eval_result = EvalResult.init_from_json_file(model_result_filepath)
214
+ eval_result.update_with_request_file(requests_path)
215
+
216
+ # Store results of same eval together
217
+ eval_name = eval_result.eval_name
218
+ if eval_name in eval_results.keys():
219
+ eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
220
+ else:
221
+ eval_results[eval_name] = eval_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  results = []
224
  for v in eval_results.values():
 
227
  results.append(v)
228
  except KeyError: # not all eval values present
229
  continue
230
+ # print(f"Processing file: {model_result_filepath}")
231
+ # print(f"Eval result: {eval_result.to_dict()}")
232
 
233
  return results
src/populate.py CHANGED
@@ -14,6 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
 
17
  # Add a row ID column
18
  df[AutoEvalColumn.row_id.name] = range(len(df))
19
 
 
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
+
18
  # Add a row ID column
19
  df[AutoEvalColumn.row_id.name] = range(len(df))
20