rzanoli commited on
Commit
9835969
·
1 Parent(s): 9cdb678

Place charts on the main page immediately before the leaderboard table

Browse files
Files changed (2) hide show
  1. app.py +169 -36
  2. src/display/css_html_js.py +1 -0
app.py CHANGED
@@ -160,11 +160,12 @@ def boxplot_per_task(dataframe=None, baselines=None):
160
  if task in baselines and baselines[task] is not None:
161
  fig.add_shape(
162
  type="line",
163
- x0=i-0.3, x1=i+0.3,
164
  y0=baselines[task], y1=baselines[task],
165
- line=dict(color="black", width=2, dash="dash"),
166
  xref="x", yref="y"
167
  )
 
168
  fig.add_annotation(
169
  x=i, y=baselines[task],
170
  text=f"{baselines[task]}%",
@@ -172,22 +173,23 @@ def boxplot_per_task(dataframe=None, baselines=None):
172
  yshift=10,
173
  font=dict(size=10, color="black")
174
  )
 
175
 
176
  fig.update_layout(
177
  title="Distribution of Model Accuracy by Task",
178
- #xaxis_title="Task",
179
  yaxis_title="Combined Performance",
180
  template="plotly_white",
181
  boxmode="group",
182
  dragmode=False,
183
- font=dict(family="Arial", size=13),
184
  margin=dict(b=140),
185
  )
186
 
187
  fig.add_annotation(
188
  text=(
189
- "In zero/few-shot settings, models are getting closer to the supervised EVALITA baselines <br>"
190
- "(black dashed line), except for NER and REL."
191
  ),
192
  xref="paper", yref="paper",
193
  x=0.5, y=-0.30,
@@ -211,6 +213,12 @@ def boxplot_prompts_per_task(dataframe, tasks=None):
211
  if tasks is None:
212
  tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
213
 
 
 
 
 
 
 
214
  fig = go.Figure()
215
 
216
  # Liste per creare una sola voce in legenda per Average e Best
@@ -264,12 +272,12 @@ def boxplot_prompts_per_task(dataframe, tasks=None):
264
  )
265
 
266
  fig.update_layout(
267
- title= "Average Prompt Accuracy vs Best Prompt Accuracy per Task",
268
- xaxis_title="",
269
  yaxis_title="Combined Performance",
270
  barmode='group',
271
  template="plotly_white",
272
- font=dict(family="Arial", size=13),
273
  yaxis=dict(range=[0, 100], fixedrange=True),
274
  )
275
 
@@ -286,29 +294,28 @@ def boxplot_prompts_per_task(dataframe, tasks=None):
286
 
287
  return fig
288
 
 
289
 
 
 
 
 
 
 
 
290
 
291
- def line_chart(dataframe):
292
  # Separiamo i dati in base a IS_FS
293
  df_true = dataframe[dataframe['IS_FS'] == True]
294
  df_false = dataframe[dataframe['IS_FS'] == False]
295
 
296
- # Estrai valori x, y e labels per True e False
297
  x_true = df_true['#Params (B)'].tolist()
298
  y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
299
- labels_true = [
300
- #re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
301
- re.search(r'>([^<]+)<', m).group(1)
302
- for m in df_true['Model'].tolist()
303
- ]
304
 
305
  x_false = df_false['#Params (B)'].tolist()
306
  y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
307
- labels_false = [
308
- #re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
309
- re.search(r'>([^<]+)<', m).group(1)
310
- for m in df_false['Model'].tolist()
311
- ]
312
 
313
  fig = go.Figure()
314
 
@@ -316,11 +323,14 @@ def line_chart(dataframe):
316
  fig.add_trace(go.Scatter(
317
  x=x_true,
318
  y=y_true,
319
- mode='markers', # solo marker, niente testo
320
  name='5-Shot',
321
- marker=dict(color='red', size=10),
 
 
 
322
  hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
323
- customdata=labels_true # tutte le informazioni sul hover
324
  ))
325
 
326
  # Punti IS_FS=False
@@ -329,7 +339,10 @@ def line_chart(dataframe):
329
  y=y_false,
330
  mode='markers',
331
  name='0-Shot',
332
- marker=dict(color='blue', size=10),
 
 
 
333
  hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
334
  customdata=labels_false
335
  ))
@@ -340,13 +353,18 @@ def line_chart(dataframe):
340
  yaxis_title="Avg. Combined Performance",
341
  template="plotly_white",
342
  hovermode="closest",
343
- dragmode=False
 
 
 
 
 
344
  )
345
 
346
- # Aggiungi la caption come annotazione separata
347
  fig.add_annotation(
348
- text="Models with more parameters generally perform better than smaller ones. However, few-shot learning <br>"
349
- "can sometimes enable smaller models to outperform larger models evaluated in zero-shot settings.",
350
  xref="paper", yref="paper",
351
  x=0, y=-0.3,
352
  showarrow=False,
@@ -354,15 +372,124 @@ def line_chart(dataframe):
354
  align="left"
355
  )
356
 
357
- # Disabilita lo zoom e altri controlli
358
  fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
359
  fig.update_yaxes(fixedrange=True)
360
- #fig.update_yaxes(range=[0, 100], fixedrange=True)
361
 
362
  return fig
363
 
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
 
366
 
367
 
368
  # Define task metadata (icons, names, descriptions)
@@ -441,13 +568,11 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
441
  else:
442
  new_model_column.append(row["Model"])
443
 
444
-
445
  # Lista delle colonne da aggiornare
446
- cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
447
  # Applichiamo la trasformazione
448
- for col in cols_to_update:
449
- dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
450
-
451
 
452
  # Aggiorna la colonna Model
453
  sorted_dataframe["Model"] = new_model_column
@@ -641,6 +766,12 @@ with demo:
641
  )
642
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
643
 
 
 
 
 
 
 
644
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
645
 
646
  # Main leaderboard tab
@@ -668,6 +799,7 @@ with demo:
668
  """
669
  )
670
 
 
671
  with gr.TabItem("📈 Charts"):
672
  #gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
673
  #gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
@@ -675,6 +807,7 @@ with demo:
675
  gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES))
676
  gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF))
677
  gr.Plot(value=barplot_mean_few_minus_zero_shot(LEADERBOARD_DF))
 
678
 
679
  # About tab
680
  with gr.TabItem("📝 About"):
 
160
  if task in baselines and baselines[task] is not None:
161
  fig.add_shape(
162
  type="line",
163
+ x0=i - 0.3, x1=i + 0.3,
164
  y0=baselines[task], y1=baselines[task],
165
+ line=dict(color="black", width=2, dash="dot"), # più visibile
166
  xref="x", yref="y"
167
  )
168
+ '''
169
  fig.add_annotation(
170
  x=i, y=baselines[task],
171
  text=f"{baselines[task]}%",
 
173
  yshift=10,
174
  font=dict(size=10, color="black")
175
  )
176
+ '''
177
 
178
  fig.update_layout(
179
  title="Distribution of Model Accuracy by Task",
180
+ xaxis_title="Task",
181
  yaxis_title="Combined Performance",
182
  template="plotly_white",
183
  boxmode="group",
184
  dragmode=False,
185
+ font=dict(family="Arial", size=10),
186
  margin=dict(b=140),
187
  )
188
 
189
  fig.add_annotation(
190
  text=(
191
+ " In tasks like TE and SA, zero/few-shot models reach accuracy close to supervised <br> "
192
+ "methods at EVALITA (dashed line); in NER and REL they remain much lower. "
193
  ),
194
  xref="paper", yref="paper",
195
  x=0.5, y=-0.30,
 
213
  if tasks is None:
214
  tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
215
 
216
+ # Lista delle colonne da aggiornare
217
+ cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
218
+ # Applichiamo la trasformazione
219
+ for col in cols_to_update:
220
+ dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
221
+
222
  fig = go.Figure()
223
 
224
  # Liste per creare una sola voce in legenda per Average e Best
 
272
  )
273
 
274
  fig.update_layout(
275
+ title= "Prompt Accuracy: Avg vs Best",
276
+ xaxis_title="Task",
277
  yaxis_title="Combined Performance",
278
  barmode='group',
279
  template="plotly_white",
280
+ font=dict(family="Arial", size=10),
281
  yaxis=dict(range=[0, 100], fixedrange=True),
282
  )
283
 
 
294
 
295
  return fig
296
 
297
+ def line_chart2(dataframe):
298
 
299
+ # Normalizziamo le dimensioni per avere marker non troppo piccoli né enormi
300
+ def scale_sizes(values, min_size=8, max_size=30):
301
+ vmin, vmax = min(values), max(values)
302
+ return [
303
+ min_size + (val - vmin) / (vmax - vmin) * (max_size - min_size) if vmax > vmin else (min_size + max_size) / 2
304
+ for val in values
305
+ ]
306
 
 
307
  # Separiamo i dati in base a IS_FS
308
  df_true = dataframe[dataframe['IS_FS'] == True]
309
  df_false = dataframe[dataframe['IS_FS'] == False]
310
 
311
+ # Estrai valori x, y e labels
312
  x_true = df_true['#Params (B)'].tolist()
313
  y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
314
+ labels_true = [re.search(r'>([^<]+)<', m).group(1) for m in df_true['Model'].tolist()]
 
 
 
 
315
 
316
  x_false = df_false['#Params (B)'].tolist()
317
  y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
318
+ labels_false = [re.search(r'>([^<]+)<', m).group(1) for m in df_false['Model'].tolist()]
 
 
 
 
319
 
320
  fig = go.Figure()
321
 
 
323
  fig.add_trace(go.Scatter(
324
  x=x_true,
325
  y=y_true,
326
+ mode='markers',
327
  name='5-Shot',
328
+ marker=dict(
329
+ color='blue',
330
+ size=scale_sizes(x_true) # marker più grandi se #Params è grande
331
+ ),
332
  hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
333
+ customdata=labels_true
334
  ))
335
 
336
  # Punti IS_FS=False
 
339
  y=y_false,
340
  mode='markers',
341
  name='0-Shot',
342
+ marker=dict(
343
+ color='red',
344
+ size=scale_sizes(x_false)
345
+ ),
346
  hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
347
  customdata=labels_false
348
  ))
 
353
  yaxis_title="Avg. Combined Performance",
354
  template="plotly_white",
355
  hovermode="closest",
356
+ font=dict(family="Arial", size=10),
357
+ dragmode=False,
358
+ xaxis=dict(
359
+ tickvals=[0, 25, 50, 75, 100, 125], # valori che vuoi mostrare
360
+ ticktext=["0", "25", "50", "75", "100"]
361
+ )
362
  )
363
 
364
+ # Caption
365
  fig.add_annotation(
366
+ text="Accuracy generally rises with #Params, but smaller models with 5-shot <br> "
367
+ "can outperform larger zero-shot models.",
368
  xref="paper", yref="paper",
369
  x=0, y=-0.3,
370
  showarrow=False,
 
372
  align="left"
373
  )
374
 
 
375
  fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
376
  fig.update_yaxes(fixedrange=True)
 
377
 
378
  return fig
379
 
380
 
381
+ def line_chart(dataframe):
382
+
383
+ import re
384
+ import plotly.graph_objects as go
385
+
386
+ # Normalizziamo le dimensioni per avere marker non troppo piccoli né enormi
387
+ def scale_sizes(values, min_size=8, max_size=30):
388
+ vmin, vmax = min(values), max(values)
389
+ return [
390
+ min_size + (val - vmin) / (vmax - vmin) * (max_size - min_size) if vmax > vmin else (min_size + max_size) / 2
391
+ for val in values
392
+ ]
393
+
394
+ # Separiamo i dati in base a IS_FS
395
+ df_true = dataframe[dataframe['IS_FS'] == True]
396
+ df_false = dataframe[dataframe['IS_FS'] == False]
397
+
398
+ # Estrai valori x, y e labels
399
+ x_true = df_true['#Params (B)'].tolist()
400
+ y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
401
+ labels_true = [re.search(r'>([^<]+)<', m).group(1) for m in df_true['Model'].tolist()]
402
+
403
+ x_false = df_false['#Params (B)'].tolist()
404
+ y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
405
+ labels_false = [re.search(r'>([^<]+)<', m).group(1) for m in df_false['Model'].tolist()]
406
+
407
+ fig = go.Figure()
408
+
409
+ # Punti IS_FS=True
410
+ fig.add_trace(go.Scatter(
411
+ x=x_true,
412
+ y=y_true,
413
+ mode='markers',
414
+ name='5-Shot',
415
+ marker=dict(
416
+ color='blue',
417
+ size=scale_sizes(x_true)
418
+ ),
419
+ hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
420
+ customdata=labels_true
421
+ ))
422
+
423
+ # Punti IS_FS=False
424
+ fig.add_trace(go.Scatter(
425
+ x=x_false,
426
+ y=y_false,
427
+ mode='markers',
428
+ name='0-Shot',
429
+ marker=dict(
430
+ color='red',
431
+ size=scale_sizes(x_false)
432
+ ),
433
+ hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
434
+ customdata=labels_false
435
+ ))
436
+
437
+ # Trova il massimo tra tutti i modelli
438
+ all_y = y_true + y_false
439
+ all_x = x_true + x_false
440
+ all_labels = labels_true + labels_false
441
+ max_idx = all_y.index(max(all_y))
442
+ max_x = all_x[max_idx]
443
+ max_y = all_y[max_idx]
444
+ max_label = all_labels[max_idx]
445
+
446
+ # Aggiungi annotazione visibile per il modello migliore
447
+ fig.add_annotation(
448
+ x=max_x,
449
+ y=max_y,
450
+ #text=f"Top: {max_label} ({max_y:.1f}%)",
451
+ text=f"{max_label}",
452
+ showarrow=True,
453
+ arrowhead=2,
454
+ arrowsize=1,
455
+ arrowwidth=2,
456
+ arrowcolor="black",
457
+ font=dict(size=11, color="black"),
458
+ xshift=10,
459
+ yshift=10,
460
+ ax = -30, ay = -20, # sposta la label a sinistra e sopra il punto
461
+ xanchor = "right" # allinea la label a destra rispetto al punto
462
+ )
463
+
464
+ fig.update_layout(
465
+ title="Avg. Combined Performance vs #Params",
466
+ xaxis_title="#Params (B)",
467
+ yaxis_title="Avg. Combined Performance",
468
+ template="plotly_white",
469
+ hovermode="closest",
470
+ font=dict(family="Arial", size=10),
471
+ dragmode=False,
472
+ xaxis=dict(
473
+ tickvals=[0, 25, 50, 75, 100, 125],
474
+ ticktext=["0", "25", "50", "75", "100"]
475
+ )
476
+ )
477
+
478
+ # Caption
479
+ fig.add_annotation(
480
+ text="Accuracy generally rises with #Params, but smaller models with 5-shot <br>"
481
+ "can outperform larger zero-shot models.",
482
+ xref="paper", yref="paper",
483
+ x=0, y=-0.3,
484
+ showarrow=False,
485
+ font=dict(size=11, color="gray"),
486
+ align="left"
487
+ )
488
+
489
+ fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
490
+ fig.update_yaxes(fixedrange=True)
491
 
492
+ return fig
493
 
494
 
495
  # Define task metadata (icons, names, descriptions)
 
568
  else:
569
  new_model_column.append(row["Model"])
570
 
 
571
  # Lista delle colonne da aggiornare
572
+ #cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
573
  # Applichiamo la trasformazione
574
+ #for col in cols_to_update:
575
+ # dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
 
576
 
577
  # Aggiorna la colonna Model
578
  sorted_dataframe["Model"] = new_model_column
 
766
  )
767
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
768
 
769
+ # ⬇️ QUI aggiungiamo i grafici subito sotto la barra del titolo e sopra le tabs
770
+ with gr.Row():
771
+ gr.Plot(value=line_chart(LEADERBOARD_DF), elem_id="line-chart")
772
+ gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES), elem_id="boxplot-task")
773
+ gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF), elem_id="boxplot-prompt-task")
774
+
775
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
776
 
777
  # Main leaderboard tab
 
799
  """
800
  )
801
 
802
+ '''
803
  with gr.TabItem("📈 Charts"):
804
  #gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
805
  #gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
 
807
  gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES))
808
  gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF))
809
  gr.Plot(value=barplot_mean_few_minus_zero_shot(LEADERBOARD_DF))
810
+ '''
811
 
812
  # About tab
813
  with gr.TabItem("📝 About"):
src/display/css_html_js.py CHANGED
@@ -104,3 +104,4 @@ get_window_url_params = """
104
  return url_params;
105
  }
106
  """
 
 
104
  return url_params;
105
  }
106
  """
107
+