annamonica commited on
Commit
492f435
Β·
1 Parent(s): caa5e2c

update column names

Browse files
results/BOOM_leaderboard.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,model_type,MASE_6750_scaled,CRPS_6750_scaled,Rank_6750_scaled,MAE_663_unscaled,CRPS_663_unscaled,Rank_663_unscaled
2
+ Toto-Open-Base-1.0,pretrained,0.617,0.375,2.351,0.001,0.025,7.549
3
+ moirai_1.1_base,pretrained,0.710,0.428,4.278,0.000,0.003,5.644
4
+ moirai_1.1_large,pretrained,0.720,0.436,4.499,0.001,0.005,6.707
5
+ moirai_1.1_small,pretrained,0.738,0.447,4.796,0.001,0.009,7.404
6
+ timesfm_2_0_500m,pretrained,0.725,0.447,5.153,0.014,0.091,10.029
7
+ chronos_bolt_base,pretrained,0.726,0.451,5.446,0.003,0.019,7.682
8
+ chronos_bolt_small,pretrained,0.733,0.455,5.793,0.003,0.022,8.140
9
+ autoarima,statistical,0.824,0.736,9.171,0.000,0.001,5.496
10
+ timer,pretrained,0.796,0.639,9.356,0.001,0.005,6.474
11
+ time-moe,pretrained,0.806,0.649,9.369,0.001,0.005,8.505
12
+ visionts,pretrained,0.991,0.675,10.336,0.001,0.009,8.538
13
+ autoets,statistical,0.842,1.975,10.956,0.000,0.030,6.992
14
+ autotheta,statistical,1.123,1.018,11.712,0.001,0.002,6.513
15
+ naive,statistical,1.000,1.000,11.783,0.000,0.006,9.326
results/BOOM_v8_leaderboard_dd_bench_test_scaled_separate_zero_inflated_shifted_gmean_no_grid_search_context_2048_toto_checkpoint_000026_2025-05-04T13_00_15+00_00.csv DELETED
@@ -1,15 +0,0 @@
1
- model,MASE-6750-scaled,CRPS-6750-scaled,Rank-6750-scaled,eval_metrics/MAE[0.5]-663-unscaled,CRPS-663-unscaled,Rank-663-unscaled
2
- dd-data-science-us1-prod_ray_foundation-models_TOTO_base-no-dual-softmax-no-tsmixup-1746214361_TorchTrainer_37d72_00000_0_2025-05-02_19-32-43_checkpoint_000026,0.617,0.375,2.351,0.001,0.025,7.549
3
- moirai_1.1_base,0.710,0.428,4.278,0.000,0.003,5.644
4
- moirai_1.1_large,0.720,0.436,4.499,0.001,0.005,6.707
5
- moirai_1.1_small,0.738,0.447,4.796,0.001,0.009,7.404
6
- timesfm_2_0_500m,0.725,0.447,5.153,0.014,0.091,10.029
7
- chronos_bolt_base,0.726,0.451,5.446,0.003,0.019,7.682
8
- chronos_bolt_small,0.733,0.455,5.793,0.003,0.022,8.140
9
- autoarima,0.824,0.736,9.171,0.000,0.001,5.496
10
- timer,0.796,0.639,9.356,0.001,0.005,6.474
11
- time-moe,0.806,0.649,9.369,0.001,0.005,8.505
12
- visionts,0.991,0.675,10.336,0.001,0.009,8.538
13
- autoets,0.842,1.975,10.956,0.000,0.030,6.992
14
- autotheta,1.123,1.018,11.712,0.001,0.002,6.513
15
- naive,1.000,1.000,11.783,0.000,0.006,9.326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/display/utils.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
 
6
  from src.about import Tasks
7
 
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
@@ -20,29 +21,34 @@ class ColumnContent:
20
  hidden: bool = False
21
  never_hidden: bool = False
22
 
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
- #Scores
29
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
- for task in Tasks:
31
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
 
 
32
  # Model information
33
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❀️", "number", False)])
40
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
45
 
 
46
  ## For the queue columns in the submission tab
47
  @dataclass(frozen=True)
48
  class EvalQueueColumn: # Queue column
@@ -53,19 +59,21 @@ class EvalQueueColumn: # Queue column
53
  weight_type = ColumnContent("weight_type", "str", "Original")
54
  status = ColumnContent("status", "str", True)
55
 
 
56
  ## All the model information that we might need
57
  @dataclass
58
  class ModelDetails:
59
  name: str
60
  display_name: str = ""
61
- symbol: str = "" # emoji
62
 
63
 
64
  class ModelType(Enum):
65
- PT = ModelDetails(name="pretrained", symbol="🟒")
66
- FT = ModelDetails(name="fine-tuned", symbol="πŸ”Ά")
67
- IFT = ModelDetails(name="instruction-tuned", symbol="β­•")
68
- RL = ModelDetails(name="RL-tuned", symbol="🟦")
 
69
  Unknown = ModelDetails(name="", symbol="?")
70
 
71
  def to_str(self, separator=" "):
@@ -77,17 +85,19 @@ class ModelType(Enum):
77
  return ModelType.FT
78
  if "pretrained" in type or "🟒" in type:
79
  return ModelType.PT
80
- if "RL-tuned" in type or "🟦" in type:
81
- return ModelType.RL
82
- if "instruction-tuned" in type or "β­•" in type:
83
- return ModelType.IFT
84
  return ModelType.Unknown
85
 
 
86
  class WeightType(Enum):
87
  Adapter = ModelDetails("Adapter")
88
  Original = ModelDetails("Original")
89
  Delta = ModelDetails("Delta")
90
 
 
91
  class Precision(Enum):
92
  float16 = ModelDetails("float16")
93
  bfloat16 = ModelDetails("bfloat16")
@@ -100,6 +110,7 @@ class Precision(Enum):
100
  return Precision.bfloat16
101
  return Precision.Unknown
102
 
 
103
  # Column selection
104
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
105
 
@@ -107,4 +118,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
107
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
108
 
109
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
110
-
 
5
 
6
  from src.about import Tasks
7
 
8
+
9
  def fields(raw_class):
10
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
11
 
 
21
  hidden: bool = False
22
  never_hidden: bool = False
23
 
24
+
25
  ## Leaderboard columns
26
  auto_eval_column_dict = []
27
  # Init
28
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
29
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
30
+ # Scores
31
+ auto_eval_column_dict.append(["MASE_6750_scaled", ColumnContent, ColumnContent("MASE_scaled", "number", True)])
32
+ auto_eval_column_dict.append(["CRPS_6750_scaled", ColumnContent, ColumnContent("CRPS_scaled", "number", True)])
33
+ auto_eval_column_dict.append(["Rank_6750_scaled", ColumnContent, ColumnContent("Rank_scaled", "number", True)])
34
+ auto_eval_column_dict.append(["MAE_663_unscaled", ColumnContent, ColumnContent("MAE[0.5]_unscaled", "number", True)])
35
+ auto_eval_column_dict.append(["CRPS_663_unscaled", ColumnContent, ColumnContent("CRPS_unscaled", "number", True)])
36
+ auto_eval_column_dict.append(["Rank_663_unscaled", ColumnContent, ColumnContent("Rank_unscaled", "number", True)])
37
  # Model information
38
+ auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=True)])
39
+ # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
40
+ # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
41
+ # auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
42
+ # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
43
+ # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
44
+ # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❀️", "number", False)])
45
+ # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
46
+ # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
47
 
48
  # We use make dataclass to dynamically fill the scores from Tasks
49
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
50
 
51
+
52
  ## For the queue columns in the submission tab
53
  @dataclass(frozen=True)
54
  class EvalQueueColumn: # Queue column
 
59
  weight_type = ColumnContent("weight_type", "str", "Original")
60
  status = ColumnContent("status", "str", True)
61
 
62
+
63
  ## All the model information that we might need
64
  @dataclass
65
  class ModelDetails:
66
  name: str
67
  display_name: str = ""
68
+ symbol: str = "" # emoji
69
 
70
 
71
  class ModelType(Enum):
72
+ PT = ModelDetails(name="🟒 pretrained", symbol="🟒")
73
+ FT = ModelDetails(name="πŸ”Ά fine-tuned", symbol="πŸ”Ά")
74
+ DL = ModelDetails(name="πŸ”· deep-learning", symbol="πŸ”·")
75
+ ST = ModelDetails(name="🟣 statistical", symbol="🟣")
76
+
77
  Unknown = ModelDetails(name="", symbol="?")
78
 
79
  def to_str(self, separator=" "):
 
85
  return ModelType.FT
86
  if "pretrained" in type or "🟒" in type:
87
  return ModelType.PT
88
+ if "deep-learning" in type or "🟦" in type:
89
+ return ModelType.DL
90
+ if "statistical" in type or "🟣" in type:
91
+ return ModelType.ST
92
  return ModelType.Unknown
93
 
94
+
95
  class WeightType(Enum):
96
  Adapter = ModelDetails("Adapter")
97
  Original = ModelDetails("Original")
98
  Delta = ModelDetails("Delta")
99
 
100
+
101
  class Precision(Enum):
102
  float16 = ModelDetails("float16")
103
  bfloat16 = ModelDetails("bfloat16")
 
110
  return Precision.bfloat16
111
  return Precision.Unknown
112
 
113
+
114
  # Column selection
115
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
116
 
 
118
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
119
 
120
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]