sh1gechan commited on
Commit
a684f6e
·
verified ·
1 Parent(s): 70a5d78

Update src/populate.py

Browse files
Files changed (1) hide show
  1. src/populate.py +50 -4
src/populate.py CHANGED
@@ -8,32 +8,78 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
 
 
 
 
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
-
17
-
18
  df = pd.DataFrame.from_records(all_data_json)
 
 
19
  score_cols = [
20
  'ALT E to J BLEU', 'ALT J to E BLEU', 'WikiCorpus E to J BLEU', 'WikiCorpus J to E BLEU',
21
  'XL-Sum JA BLEU', 'XL-Sum ROUGE1', 'XL-Sum ROUGE2', 'XL-Sum ROUGE-Lsum'
22
  ]
23
 
 
24
  existing_score_cols = [col for col in score_cols if col in df.columns]
25
  print(f"Existing score columns: {existing_score_cols}")
26
 
27
  # スコア列を100で割り、.4f形式でフォーマット
28
  df[existing_score_cols] = (df[existing_score_cols] / 100).applymap(lambda x: f'{x:.4f}')
 
 
 
 
 
29
  df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
30
- df = df[cols].round(decimals=2)
31
 
32
- # filter out if any of the benchmarks have not been produced
 
 
 
33
  df = df[has_no_nan_values(df, benchmark_cols)]
 
 
 
 
 
 
 
34
  return df
35
 
36
 
 
37
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
38
  """Creates the different dataframes for the evaluation queues requestes"""
39
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
 
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
+ # def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
+ # """Creates a dataframe from all the individual experiment results"""
13
+ # raw_data = get_raw_eval_results(results_path, requests_path)
14
+ # all_data_json = [v.to_dict() for v in raw_data]
15
+
16
+
17
+
18
+ # df = pd.DataFrame.from_records(all_data_json)
19
+ # score_cols = [
20
+ # 'ALT E to J BLEU', 'ALT J to E BLEU', 'WikiCorpus E to J BLEU', 'WikiCorpus J to E BLEU',
21
+ # 'XL-Sum JA BLEU', 'XL-Sum ROUGE1', 'XL-Sum ROUGE2', 'XL-Sum ROUGE-Lsum'
22
+ # ]
23
+
24
+ # existing_score_cols = [col for col in score_cols if col in df.columns]
25
+ # print(f"Existing score columns: {existing_score_cols}")
26
+
27
+ # # スコア列を100で割り、.4f形式でフォーマット
28
+ # df[existing_score_cols] = (df[existing_score_cols] / 100).applymap(lambda x: f'{x:.4f}')
29
+ # df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
30
+ # df = df[cols].round(decimals=2)
31
+
32
+ # # filter out if any of the benchmarks have not been produced
33
+ # df = df[has_no_nan_values(df, benchmark_cols)]
34
+ # return df
35
+
36
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
37
  """Creates a dataframe from all the individual experiment results"""
38
  raw_data = get_raw_eval_results(results_path, requests_path)
39
+
40
+ # デバッグ: Raw data の内容を確認
41
+ print(f"Raw data before conversion: {raw_data}")
42
+
43
  all_data_json = [v.to_dict() for v in raw_data]
44
 
45
+ # デバッグ: 全てのデータをDataFrameに変換
 
46
  df = pd.DataFrame.from_records(all_data_json)
47
+ print(f"Initial DataFrame: {df}")
48
+
49
  score_cols = [
50
  'ALT E to J BLEU', 'ALT J to E BLEU', 'WikiCorpus E to J BLEU', 'WikiCorpus J to E BLEU',
51
  'XL-Sum JA BLEU', 'XL-Sum ROUGE1', 'XL-Sum ROUGE2', 'XL-Sum ROUGE-Lsum'
52
  ]
53
 
54
+ # デバッグ: 存在するスコア列を確認
55
  existing_score_cols = [col for col in score_cols if col in df.columns]
56
  print(f"Existing score columns: {existing_score_cols}")
57
 
58
  # スコア列を100で割り、.4f形式でフォーマット
59
  df[existing_score_cols] = (df[existing_score_cols] / 100).applymap(lambda x: f'{x:.4f}')
60
+
61
+ # デバッグ: スコア調整後のデータフレームを確認
62
+ print(f"DataFrame after score adjustment: {df}")
63
+
64
+ # ソート
65
  df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
 
66
 
67
+ # デバッグ: ソート後のデータフレームを確認
68
+ print(f"Sorted DataFrame: {df}")
69
+
70
+ # NaNを持つ行を除外
71
  df = df[has_no_nan_values(df, benchmark_cols)]
72
+
73
+ # デバッグ: NaNフィルタリング後のデータフレームを確認
74
+ print(f"Final DataFrame after NaN filtering: {df}")
75
+
76
+ # 必要なカラムのみに絞り込む
77
+ df = df[cols].round(decimals=2)
78
+
79
  return df
80
 
81
 
82
+
83
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
84
  """Creates the different dataframes for the evaluation queues requestes"""
85
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]