pminervini commited on
Commit
9af5ebf
β€’
1 Parent(s): 241e347
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -60,7 +60,7 @@ raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH
60
  # update_collections(original_df.copy())
61
  leaderboard_df = original_df.copy()
62
 
63
- plot_df = create_plot_df(create_scores_df(raw_data))
64
 
65
  (
66
  finished_eval_queue_df,
@@ -251,22 +251,23 @@ with demo:
251
  queue=True,
252
  )
253
 
254
- with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
255
- with gr.Row():
256
- with gr.Column():
257
- chart = create_metric_plot_obj(
258
- plot_df,
259
- [AutoEvalColumn.average.name],
260
- title="Average of Top Scores and Human Baseline Over Time (from last update)",
261
- )
262
- gr.Plot(value=chart, min_width=500)
263
- with gr.Column():
264
- chart = create_metric_plot_obj(
265
- plot_df,
266
- BENCHMARK_COLS,
267
- title="Top Scores and Human Baseline Over Time (from last update)",
268
- )
269
- gr.Plot(value=chart, min_width=500)
 
270
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
271
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
272
 
 
60
  # update_collections(original_df.copy())
61
  leaderboard_df = original_df.copy()
62
 
63
+ # plot_df = create_plot_df(create_scores_df(raw_data))
64
 
65
  (
66
  finished_eval_queue_df,
 
251
  queue=True,
252
  )
253
 
254
+ # with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
255
+ # with gr.Row():
256
+ # with gr.Column():
257
+ # chart = create_metric_plot_obj(
258
+ # plot_df,
259
+ # [AutoEvalColumn.average.name],
260
+ # title="Average of Top Scores and Human Baseline Over Time (from last update)",
261
+ # )
262
+ # gr.Plot(value=chart, min_width=500)
263
+ # with gr.Column():
264
+ # chart = create_metric_plot_obj(
265
+ # plot_df,
266
+ # BENCHMARK_COLS,
267
+ # title="Top Scores and Human Baseline Over Time (from last update)",
268
+ # )
269
+ # gr.Plot(value=chart, min_width=500)
270
+
271
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
272
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
273