agh123 commited on
Commit
6327e70
·
1 Parent(s): 8d02ac1

update table and plots

Browse files
src/components/filters.py CHANGED
@@ -12,11 +12,24 @@ def render_grouping_options() -> List[str]:
12
  "flash_attn",
13
  "cache_type_k",
14
  "cache_type_v",
15
- "PP Value",
16
- "TG Value",
 
 
 
17
  ]
18
 
19
- default_groups = ["Model ID", "Device", "Platform"]
 
 
 
 
 
 
 
 
 
 
20
 
21
  selected_groups = st.multiselect(
22
  "Group Results By",
@@ -39,10 +52,12 @@ def render_column_visibility() -> Set[str]:
39
  "Memory Usage (%)",
40
  ],
41
  "Benchmark Info": [
42
- "PP Value",
43
- "TG Value",
44
- "Prompt Processing",
45
- "Token Generation",
 
 
46
  ],
47
  "Model Info": [
48
  "Model",
@@ -54,6 +69,9 @@ def render_column_visibility() -> Set[str]:
54
  "flash_attn",
55
  "cache_type_k",
56
  "cache_type_v",
 
 
 
57
  ],
58
  }
59
 
@@ -63,8 +81,16 @@ def render_column_visibility() -> Set[str]:
63
  "Platform",
64
  "Model",
65
  "Model Size",
66
- "Prompt Processing",
67
- "Token Generation",
 
 
 
 
 
 
 
 
68
  }
69
 
70
  with st.expander("Column Visibility", expanded=False):
 
12
  "flash_attn",
13
  "cache_type_k",
14
  "cache_type_v",
15
+ "PP Config",
16
+ "TG Config",
17
+ "n_context",
18
+ "n_batch",
19
+ "n_ubatch",
20
  ]
21
 
22
+ default_groups = [
23
+ "Model ID",
24
+ "Device",
25
+ "Platform",
26
+ "n_threads",
27
+ "flash_attn",
28
+ "cache_type_k",
29
+ "cache_type_v",
30
+ "PP Config",
31
+ "TG Config",
32
+ ]
33
 
34
  selected_groups = st.multiselect(
35
  "Group Results By",
 
52
  "Memory Usage (%)",
53
  ],
54
  "Benchmark Info": [
55
+ "PP Config",
56
+ "TG Config",
57
+ "Prompt Processing (mean)",
58
+ "Prompt Processing (std)",
59
+ "Token Generation (mean)",
60
+ "Token Generation (std)",
61
  ],
62
  "Model Info": [
63
  "Model",
 
69
  "flash_attn",
70
  "cache_type_k",
71
  "cache_type_v",
72
+ "n_context",
73
+ "n_batch",
74
+ "n_ubatch",
75
  ],
76
  }
77
 
 
81
  "Platform",
82
  "Model",
83
  "Model Size",
84
+ "Prompt Processing (mean)",
85
+ "Prompt Processing (std)",
86
+ "Token Generation (mean)",
87
+ "Token Generation (std)",
88
+ "n_threads",
89
+ "flash_attn",
90
+ "cache_type_k",
91
+ "cache_type_v",
92
+ "PP Config",
93
+ "TG Config",
94
  }
95
 
96
  with st.expander("Column Visibility", expanded=False):
src/components/visualizations.py CHANGED
@@ -4,11 +4,16 @@ import pandas as pd
4
  from typing import Optional, Dict, List, Set
5
 
6
 
7
- def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
 
 
8
  """Create a performance comparison plot"""
9
  if df.empty:
10
  return None
11
 
 
 
 
12
  fig = px.bar(
13
  df,
14
  x="Device",
@@ -17,11 +22,11 @@ def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
17
  title=title,
18
  template="plotly_white",
19
  barmode="group",
20
- hover_data=["CPU Cores", "Memory Usage (GB)", "Model Size"],
21
  )
22
  fig.update_layout(
23
  xaxis_title="Device",
24
- yaxis_title="Time (ms)",
25
  legend_title="Platform",
26
  plot_bgcolor="white",
27
  height=400,
@@ -34,14 +39,16 @@ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
34
  if df.empty:
35
  return df
36
 
 
 
37
  # Basic filters
38
  basic_filters = filters["basic_filters"]
39
  if basic_filters["model"] != "All":
40
- df = df[df["Model ID"] == basic_filters["model"]]
41
  if basic_filters["platform"] != "All":
42
- df = df[df["Platform"] == basic_filters["platform"]]
43
  if basic_filters["device"] != "All":
44
- df = df[df["Device"] == basic_filters["device"]]
45
 
46
  # Benchmark configuration filters
47
  benchmark_config = filters["benchmark_config"]
@@ -49,45 +56,44 @@ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
49
  pp_min, pp_max = benchmark_config["pp_range"]
50
  tg_min, tg_max = benchmark_config["tg_range"]
51
 
52
- # Extract PP/TG values if not already present
53
- if "PP Value" not in df.columns:
54
- df["PP Value"] = df["Benchmark"].apply(
55
- lambda x: int(x.split("pp: ")[1].split(",")[0])
56
- )
57
- if "TG Value" not in df.columns:
58
- df["TG Value"] = df["Benchmark"].apply(
59
- lambda x: int(x.split("tg: ")[1].split(")")[0])
60
- )
61
 
62
- df = df[
63
- (df["PP Value"] >= pp_min)
64
- & (df["PP Value"] <= pp_max)
65
- & (df["TG Value"] >= tg_min)
66
- & (df["TG Value"] <= tg_max)
67
  ]
68
 
69
  # Advanced settings filters
70
  advanced = filters["advanced_settings"]
71
  if advanced["n_threads"]:
72
- df["n_threads"] = df["initSettings"].apply(lambda x: x.get("n_threads"))
73
- df = df[df["n_threads"].isin(advanced["n_threads"])]
74
 
75
  if advanced["flash_attn"]:
76
- df["flash_attn"] = df["initSettings"].apply(lambda x: x.get("flash_attn"))
77
- df = df[df["flash_attn"].isin(advanced["flash_attn"])]
78
 
79
  if advanced["cache_type"]:
80
- df["cache_type_k"] = df["initSettings"].apply(lambda x: x.get("cache_type_k"))
81
- df["cache_type_v"] = df["initSettings"].apply(lambda x: x.get("cache_type_v"))
82
- df = df[
83
- (df["cache_type_k"].isin(advanced["cache_type"]))
84
- & (df["cache_type_v"].isin(advanced["cache_type"]))
 
 
 
 
85
  ]
86
 
87
  if advanced["max_memory_usage"] < 100:
88
- df = df[df["Memory Usage (%)"] <= advanced["max_memory_usage"]]
 
 
89
 
90
- return df
91
 
92
 
93
  def render_performance_plots(df: pd.DataFrame, filters: Dict):
@@ -102,15 +108,15 @@ def render_performance_plots(df: pd.DataFrame, filters: Dict):
102
  st.warning("No data matches the selected filters for plotting.")
103
  return
104
 
105
- # Extract PP/TG values if not already present
106
- if "PP Value" not in filtered_df.columns:
107
- filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
108
- lambda x: int(x.split("pp: ")[1].split(",")[0])
109
- )
110
- if "TG Value" not in filtered_df.columns:
111
- filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
112
- lambda x: int(x.split("tg: ")[1].split(")")[0])
113
- )
114
 
115
  # Extract initSettings if not already present
116
  if "n_threads" not in filtered_df.columns:
@@ -127,30 +133,83 @@ def render_performance_plots(df: pd.DataFrame, filters: Dict):
127
  lambda x: x.get("cache_type_v")
128
  )
129
 
130
- # Group by device and platform for plotting
131
- plot_group = (
132
- filtered_df.groupby(["Device", "Platform"])
133
- .agg(
134
- {
135
- "Prompt Processing": "mean",
136
- "Token Generation": "mean",
137
- "Memory Usage (%)": "mean",
138
- "Memory Usage (GB)": "mean",
139
- "CPU Cores": "first",
140
- "Model Size": "first",
141
- "PP Value": "first",
142
- "TG Value": "first",
143
- }
144
- )
145
- .reset_index()
 
 
 
 
 
 
 
 
 
 
 
146
  )
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  col1, col2 = st.columns(2)
149
  with col1:
150
  fig1 = create_performance_plot(
151
  plot_group,
152
- "Prompt Processing",
153
- f"Prompt Processing Time (PP: {plot_group['PP Value'].iloc[0]})",
 
154
  )
155
  if fig1:
156
  st.plotly_chart(fig1, use_container_width=True)
@@ -158,8 +217,9 @@ def render_performance_plots(df: pd.DataFrame, filters: Dict):
158
  with col2:
159
  fig2 = create_performance_plot(
160
  plot_group,
161
- "Token Generation",
162
- f"Token Generation Time (TG: {plot_group['TG Value'].iloc[0]})",
 
163
  )
164
  if fig2:
165
  st.plotly_chart(fig2, use_container_width=True)
@@ -177,49 +237,46 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
177
  st.warning("No data matches the selected filters.")
178
  return
179
 
180
- # Extract settings from benchmark results
181
- filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
182
- lambda x: int(x.split("pp: ")[1].split(",")[0])
183
- )
184
- filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
185
- lambda x: int(x.split("tg: ")[1].split(")")[0])
186
- )
187
-
188
- # Extract initSettings
189
- filtered_df["n_threads"] = filtered_df["initSettings"].apply(
190
- lambda x: x.get("n_threads")
191
- )
192
- filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
193
- lambda x: x.get("flash_attn")
194
- )
195
- filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
196
- lambda x: x.get("cache_type_k")
197
- )
198
- filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
199
- lambda x: x.get("cache_type_v")
200
- )
201
-
202
  # Group by selected columns
203
  grouping_cols = filters["grouping"]
204
  if not grouping_cols:
205
  grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping
206
 
 
207
  agg_dict = {
208
- "Prompt Processing": ["mean", "count", "std"],
209
- "Token Generation": ["mean", "std"],
210
- "Memory Usage (%)": "mean",
211
- "Memory Usage (GB)": "mean",
212
- "Total Memory (GB)": "first",
213
- "CPU Cores": "first",
214
- "Model Size": "first",
215
- "PP Value": "first",
216
- "TG Value": "first",
217
- "n_threads": "first",
218
- "flash_attn": "first",
219
- "cache_type_k": "first",
220
- "cache_type_v": "first",
221
  }
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
224
 
225
  # Flatten column names
@@ -227,12 +284,6 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
227
  col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
228
  ]
229
 
230
- # Sort by Model Size, PP Value, and TG time
231
- grouped_df = grouped_df.sort_values(
232
- by=["Model Size (first)", "PP Value (first)", "Token Generation (mean)"],
233
- ascending=[False, True, True],
234
- )
235
-
236
  # Round numeric columns
237
  numeric_cols = [
238
  col
@@ -241,17 +292,41 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
241
  ]
242
  grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  # Rename columns for display
245
  column_mapping = {
246
- "Prompt Processing (mean)": "PP Avg (ms)",
247
- "Prompt Processing (std)": "PP Std",
248
  "Prompt Processing (count)": "Runs",
249
- "Token Generation (mean)": "TG Avg (ms)",
250
- "Token Generation (std)": "TG Std",
251
  "Memory Usage (%) (mean)": "Memory Usage (%)",
252
  "Memory Usage (GB) (mean)": "Memory Usage (GB)",
253
- "PP Value (first)": "PP Value",
254
- "TG Value (first)": "TG Value",
 
 
 
 
 
 
 
 
 
 
255
  }
256
  grouped_df = grouped_df.rename(columns=column_mapping)
257
 
@@ -262,38 +337,63 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
262
  column_name_mapping = {
263
  "Device": "Device",
264
  "Platform": "Platform",
265
- "CPU Cores": "CPU Cores (first)",
266
- "Total Memory (GB)": "Total Memory (GB) (first)",
267
  "Memory Usage (%)": "Memory Usage (%)",
268
- "PP Value": "PP Value",
269
- "TG Value": "TG Value",
270
- "Prompt Processing": "PP Avg (ms)",
271
- "Token Generation": "TG Avg (ms)",
 
 
272
  "Model": "Model ID",
273
- "Model Size": "Model Size (first)",
274
  "Model ID": "Model ID",
275
- "n_threads": "n_threads (first)",
276
- "flash_attn": "flash_attn (first)",
277
- "cache_type_k": "cache_type_k (first)",
278
- "cache_type_v": "cache_type_v (first)",
 
 
 
279
  }
280
- display_cols = [
281
- column_name_mapping[col]
282
- for col in visible_cols
283
- if col in column_name_mapping
284
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  else:
286
  # Default columns if none selected
287
  display_cols = [
288
  "Device",
289
  "Platform",
290
  "Model ID",
291
- "Model Size (first)",
292
  "PP Avg (ms)",
293
  "TG Avg (ms)",
294
  "Memory Usage (%)",
295
  ]
296
 
 
 
 
297
  # Display the filtered and grouped table
298
  st.dataframe(
299
  grouped_df[display_cols],
 
4
  from typing import Optional, Dict, List, Set
5
 
6
 
7
+ def create_performance_plot(
8
+ df: pd.DataFrame, metric: str, title: str, hover_data: List[str] = None
9
+ ):
10
  """Create a performance comparison plot"""
11
  if df.empty:
12
  return None
13
 
14
+ if hover_data is None:
15
+ hover_data = ["CPU Cores", "Memory Usage (GB)"]
16
+
17
  fig = px.bar(
18
  df,
19
  x="Device",
 
22
  title=title,
23
  template="plotly_white",
24
  barmode="group",
25
+ hover_data=hover_data,
26
  )
27
  fig.update_layout(
28
  xaxis_title="Device",
29
+ yaxis_title="Token/sec",
30
  legend_title="Platform",
31
  plot_bgcolor="white",
32
  height=400,
 
39
  if df.empty:
40
  return df
41
 
42
+ filtered_df = df.copy()
43
+
44
  # Basic filters
45
  basic_filters = filters["basic_filters"]
46
  if basic_filters["model"] != "All":
47
+ filtered_df = filtered_df[filtered_df["Model ID"] == basic_filters["model"]]
48
  if basic_filters["platform"] != "All":
49
+ filtered_df = filtered_df[filtered_df["Platform"] == basic_filters["platform"]]
50
  if basic_filters["device"] != "All":
51
+ filtered_df = filtered_df[filtered_df["Device"] == basic_filters["device"]]
52
 
53
  # Benchmark configuration filters
54
  benchmark_config = filters["benchmark_config"]
 
56
  pp_min, pp_max = benchmark_config["pp_range"]
57
  tg_min, tg_max = benchmark_config["tg_range"]
58
 
59
+ pp_values = filtered_df["PP Config"]
60
+ tg_values = filtered_df["TG Config"]
 
 
 
 
 
 
 
61
 
62
+ filtered_df = filtered_df[
63
+ (pp_values >= pp_min)
64
+ & (pp_values <= pp_max)
65
+ & (tg_values >= tg_min)
66
+ & (tg_values <= tg_max)
67
  ]
68
 
69
  # Advanced settings filters
70
  advanced = filters["advanced_settings"]
71
  if advanced["n_threads"]:
72
+ n_threads = filtered_df["initSettings"].apply(lambda x: x.get("n_threads"))
73
+ filtered_df = filtered_df[n_threads.isin(advanced["n_threads"])]
74
 
75
  if advanced["flash_attn"]:
76
+ flash_attn = filtered_df["initSettings"].apply(lambda x: x.get("flash_attn"))
77
+ filtered_df = filtered_df[flash_attn.isin(advanced["flash_attn"])]
78
 
79
  if advanced["cache_type"]:
80
+ cache_type_k = filtered_df["initSettings"].apply(
81
+ lambda x: x.get("cache_type_k")
82
+ )
83
+ cache_type_v = filtered_df["initSettings"].apply(
84
+ lambda x: x.get("cache_type_v")
85
+ )
86
+ filtered_df = filtered_df[
87
+ (cache_type_k.isin(advanced["cache_type"]))
88
+ & (cache_type_v.isin(advanced["cache_type"]))
89
  ]
90
 
91
  if advanced["max_memory_usage"] < 100:
92
+ filtered_df = filtered_df[
93
+ filtered_df["Memory Usage (%)"] <= advanced["max_memory_usage"]
94
+ ]
95
 
96
+ return filtered_df
97
 
98
 
99
  def render_performance_plots(df: pd.DataFrame, filters: Dict):
 
108
  st.warning("No data matches the selected filters for plotting.")
109
  return
110
 
111
+ ## # Extract PP/TG values if not already present
112
+ ## if "PP Value" not in filtered_df.columns:
113
+ ## filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
114
+ ## lambda x: int(x.split("pp: ")[1].split(",")[0])
115
+ ## )
116
+ ## if "TG Value" not in filtered_df.columns:
117
+ ## filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
118
+ ## lambda x: int(x.split("tg: ")[1].split(")")[0])
119
+ ## )
120
 
121
  # Extract initSettings if not already present
122
  if "n_threads" not in filtered_df.columns:
 
133
  lambda x: x.get("cache_type_v")
134
  )
135
 
136
+ # Build aggregation dictionary based on available columns
137
+ agg_dict = {}
138
+
139
+ # Always include performance metrics
140
+ agg_dict.update(
141
+ {
142
+ "Prompt Processing": "mean",
143
+ "Token Generation": "mean",
144
+ }
145
+ )
146
+
147
+ # Include memory metrics if available
148
+ if "Memory Usage (%)" in filtered_df.columns:
149
+ agg_dict["Memory Usage (%)"] = "mean"
150
+ if "Memory Usage (GB)" in filtered_df.columns:
151
+ agg_dict["Memory Usage (GB)"] = "mean"
152
+
153
+ # Include device info if available
154
+ if "CPU Cores" in filtered_df.columns:
155
+ agg_dict["CPU Cores"] = "first"
156
+
157
+ # Include config values
158
+ agg_dict.update(
159
+ {
160
+ "PP Config": "first",
161
+ "TG Config": "first",
162
+ }
163
  )
164
 
165
+ # Group by device and platform for plotting
166
+ plot_group = filtered_df.groupby(["Device", "Platform"]).agg(agg_dict).reset_index()
167
+
168
+ # Flatten column names and rename them
169
+ # plot_group.columns = [
170
+ # col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in plot_group.columns
171
+ # ]
172
+ # print("plot_group2:", plot_group)
173
+
174
+ # Rename columns for display
175
+ column_mapping = {
176
+ "Prompt Processing": "PP Avg (t/s)",
177
+ #"Prompt Processing (std)": "PP Std (t/s)",
178
+ "Prompt Processing (count)": "Runs",
179
+ "Token Generation": "TG Avg (t/s)",
180
+ #"Token Generation (std)": "TG Std (t/s)",
181
+ "Memory Usage (%) (mean)": "Memory Usage (%)",
182
+ "Memory Usage (GB) (mean)": "Memory Usage (GB)",
183
+ "PP Config (first)": "PP Config",
184
+ "TG Config (first)": "TG Config",
185
+ "Model Size (first)": "Model Size",
186
+ "CPU Cores (first)": "CPU Cores",
187
+ "Total Memory (GB) (first)": "Total Memory (GB)",
188
+ "n_threads (first)": "n_threads",
189
+ "flash_attn (first)": "flash_attn",
190
+ "cache_type_k (first)": "cache_type_k",
191
+ "cache_type_v (first)": "cache_type_v",
192
+ "n_context (first)": "n_context",
193
+ "n_batch (first)": "n_batch",
194
+ "n_ubatch (first)": "n_ubatch",
195
+ }
196
+ plot_group = plot_group.rename(columns=column_mapping)
197
+
198
+ # Define hover data based on available columns
199
+ hover_data = []
200
+ if "CPU Cores" in plot_group.columns:
201
+ hover_data.append("CPU Cores")
202
+ if "Memory Usage (GB)" in plot_group.columns:
203
+ hover_data.append("Memory Usage (GB)")
204
+
205
+ # Create plots
206
  col1, col2 = st.columns(2)
207
  with col1:
208
  fig1 = create_performance_plot(
209
  plot_group,
210
+ "PP Avg (t/s)",
211
+ f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})",
212
+ hover_data=hover_data,
213
  )
214
  if fig1:
215
  st.plotly_chart(fig1, use_container_width=True)
 
217
  with col2:
218
  fig2 = create_performance_plot(
219
  plot_group,
220
+ "TG Avg (t/s)",
221
+ f"Token Generation (TG: {plot_group['TG Config'].iloc[0]})",
222
+ hover_data=hover_data,
223
  )
224
  if fig2:
225
  st.plotly_chart(fig2, use_container_width=True)
 
237
  st.warning("No data matches the selected filters.")
238
  return
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Group by selected columns
241
  grouping_cols = filters["grouping"]
242
  if not grouping_cols:
243
  grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping
244
 
245
+ # Define aggregations (excluding grouping columns)
246
  agg_dict = {
247
+ col: agg
248
+ for col, agg in {
249
+ "Prompt Processing": ["mean", "std"],
250
+ "Token Generation": ["mean", "std"],
251
+ #"Memory Usage (%)": "mean",
252
+ "Memory Usage (GB)": "mean", # For a given model, device, platform, mem should be the same.
253
+ "Total Memory (GB)": "first", # For a given model, device, platform, mem should be the same.
254
+ "CPU Cores": "first", # For a given model, device, platform, cpu cores should be the same.
255
+ "Model Size": "first", # model size should be the same for all.
256
+ }.items()
257
+ if col not in grouping_cols
 
 
258
  }
259
 
260
+ # Extract initSettings if needed
261
+ init_settings_cols = {
262
+ "n_threads": "n_threads",
263
+ "flash_attn": "flash_attn",
264
+ "cache_type_k": "cache_type_k",
265
+ "cache_type_v": "cache_type_v",
266
+ "n_context": "n_context",
267
+ "n_batch": "n_batch",
268
+ "n_ubatch": "n_ubatch",
269
+ }
270
+
271
+ for col, setting in init_settings_cols.items():
272
+ if col not in filtered_df.columns:
273
+ filtered_df[col] = filtered_df["initSettings"].apply(
274
+ lambda x: x.get(setting)
275
+ )
276
+ if col not in grouping_cols:
277
+ agg_dict[col] = "first"
278
+
279
+ # Group and aggregate
280
  grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
281
 
282
  # Flatten column names
 
284
  col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
285
  ]
286
 
 
 
 
 
 
 
287
  # Round numeric columns
288
  numeric_cols = [
289
  col
 
292
  ]
293
  grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
294
 
295
+ # Sort using the actual column names we have
296
+ sort_cols = []
297
+ if "Model Size (first)" in grouped_df.columns:
298
+ sort_cols.append("Model Size (first)")
299
+ if "PP Config (first)" in grouped_df.columns:
300
+ sort_cols.append("PP Config (first)")
301
+ if "Token Generation (mean)" in grouped_df.columns:
302
+ sort_cols.append("Token Generation (mean)")
303
+
304
+ if sort_cols: # Only sort if we have columns to sort by
305
+ grouped_df = grouped_df.sort_values(
306
+ by=sort_cols, ascending=[False] + [True] * (len(sort_cols) - 1)
307
+ )
308
+
309
  # Rename columns for display
310
  column_mapping = {
311
+ "Prompt Processing (mean)": "PP Avg (t/s)",
312
+ "Prompt Processing (std)": "PP Std (t/s)",
313
  "Prompt Processing (count)": "Runs",
314
+ "Token Generation (mean)": "TG Avg (t/s)",
315
+ "Token Generation (std)": "TG Std (t/s)",
316
  "Memory Usage (%) (mean)": "Memory Usage (%)",
317
  "Memory Usage (GB) (mean)": "Memory Usage (GB)",
318
+ "PP Config (first)": "PP Config",
319
+ "TG Config (first)": "TG Config",
320
+ "Model Size (first)": "Model Size",
321
+ "CPU Cores (first)": "CPU Cores",
322
+ "Total Memory (GB) (first)": "Total Memory (GB)",
323
+ "n_threads (first)": "n_threads",
324
+ "flash_attn (first)": "flash_attn",
325
+ "cache_type_k (first)": "cache_type_k",
326
+ "cache_type_v (first)": "cache_type_v",
327
+ "n_context (first)": "n_context",
328
+ "n_batch (first)": "n_batch",
329
+ "n_ubatch (first)": "n_ubatch",
330
  }
331
  grouped_df = grouped_df.rename(columns=column_mapping)
332
 
 
337
  column_name_mapping = {
338
  "Device": "Device",
339
  "Platform": "Platform",
340
+ "CPU Cores": "CPU Cores",
341
+ "Total Memory (GB)": "Total Memory (GB)",
342
  "Memory Usage (%)": "Memory Usage (%)",
343
+ "PP Config": "PP Config",
344
+ "TG Config": "TG Config",
345
+ "Prompt Processing (mean)": "PP Avg (t/s)",
346
+ "Token Generation (mean)": "TG Avg (t/s)",
347
+ "Prompt Processing (std)": "PP Std (t/s)",
348
+ "Token Generation (std)": "TG Std (t/s)",
349
  "Model": "Model ID",
350
+ "Model Size": "Model Size",
351
  "Model ID": "Model ID",
352
+ "n_threads": "n_threads",
353
+ "flash_attn": "flash_attn",
354
+ "cache_type_k": "cache_type_k",
355
+ "cache_type_v": "cache_type_v",
356
+ "n_context": "n_context",
357
+ "n_batch": "n_batch",
358
+ "n_ubatch": "n_ubatch",
359
  }
360
+
361
+ # Convert visible columns and grouping columns to their mapped names
362
+ mapped_visible = {column_name_mapping.get(col, col) for col in visible_cols}
363
+ mapped_grouping = {
364
+ column_name_mapping.get(col, col) for col in filters["grouping"]
365
+ }
366
+
367
+ # Combine both sets to get unique columns
368
+ all_cols = mapped_visible | mapped_grouping
369
+
370
+ # Create final display columns list while preserving grouping columns order
371
+ display_cols = []
372
+
373
+ # First add grouping columns in their original order
374
+ for col in filters["grouping"]:
375
+ mapped_col = column_name_mapping.get(col, col)
376
+ if mapped_col in all_cols:
377
+ display_cols.append(mapped_col)
378
+ all_cols.remove(mapped_col)
379
+
380
+ # Then add remaining columns
381
+ display_cols.extend(sorted(all_cols))
382
  else:
383
  # Default columns if none selected
384
  display_cols = [
385
  "Device",
386
  "Platform",
387
  "Model ID",
388
+ "Model Size",
389
  "PP Avg (ms)",
390
  "TG Avg (ms)",
391
  "Memory Usage (%)",
392
  ]
393
 
394
+ # Ensure all display columns exist in the DataFrame
395
+ display_cols = [col for col in display_cols if col in grouped_df.columns]
396
+
397
  # Display the filtered and grouped table
398
  st.dataframe(
399
  grouped_df[display_cols],
src/services/firebase.py CHANGED
@@ -68,6 +68,8 @@ def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
68
  "Device": device_info.get("model", "Unknown"),
69
  "Platform": device_info.get("systemName", "Unknown"),
70
  "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
 
 
71
  "Model": benchmark_result.get("modelName", "Unknown"),
72
  "Model Size": format_params_in_b(
73
  benchmark_result.get("modelNParams", 0)
@@ -97,6 +99,7 @@ def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
97
  "Model ID": benchmark_result.get("modelId", "Unknown"),
98
  "OID": benchmark_result.get("oid"),
99
  "initSettings": benchmark_result.get("initSettings"),
 
100
  }
101
  )
102
  except Exception as e:
 
68
  "Device": device_info.get("model", "Unknown"),
69
  "Platform": device_info.get("systemName", "Unknown"),
70
  "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
71
+ "PP Config": benchmark_result.get("config", {}).get("pp", "N/A"),
72
+ "TG Config": benchmark_result.get("config", {}).get("tg", "N/A"),
73
  "Model": benchmark_result.get("modelName", "Unknown"),
74
  "Model Size": format_params_in_b(
75
  benchmark_result.get("modelNParams", 0)
 
99
  "Model ID": benchmark_result.get("modelId", "Unknown"),
100
  "OID": benchmark_result.get("oid"),
101
  "initSettings": benchmark_result.get("initSettings"),
102
+ "Version": device_info.get("version", "Unknown"),
103
  }
104
  )
105
  except Exception as e: