chriscanal commited on
Commit
e872e8a
β€’
1 Parent(s): 65fc294

Updated main to include title in the graph function parameters

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -105,7 +105,6 @@ else:
105
  original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
106
  models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
107
  plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
108
-
109
  to_be_dumped = f"models = {repr(models)}\n"
110
 
111
  # with open("models_backlinks.py", "w") as f:
@@ -476,16 +475,21 @@ with demo:
476
  with gr.TabItem("πŸ“ˆ Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
477
  with gr.Row():
478
  with gr.Column():
479
- chart = create_metric_plot_obj(plot_df, ["Average ⬆️"], HUMAN_BASELINES).properties(
480
- title="Average of Top Scores and Human Baseline Over Time"
 
 
 
481
  )
482
  gr.Plot(value=chart, interactive=False, width=500, height=500)
483
  with gr.Column():
484
  chart = create_metric_plot_obj(
485
- plot_df, ["ARC", "HellaSwag", "MMLU", "TruthfulQA"], HUMAN_BASELINES
486
- ).properties(title="Top Scores and Human Baseline Over Time")
 
 
 
487
  gr.Plot(value=chart, interactive=False, width=500, height=500)
488
-
489
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
490
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
491
 
@@ -608,4 +612,4 @@ with demo:
608
  scheduler = BackgroundScheduler()
609
  scheduler.add_job(restart_space, "interval", seconds=1800)
610
  scheduler.start()
611
- demo.queue(concurrency_count=40).launch()
 
105
  original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
106
  models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
107
  plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
 
108
  to_be_dumped = f"models = {repr(models)}\n"
109
 
110
  # with open("models_backlinks.py", "w") as f:
 
475
  with gr.TabItem("πŸ“ˆ Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
476
  with gr.Row():
477
  with gr.Column():
478
+ chart = create_metric_plot_obj(
479
+ plot_df,
480
+ ["Average ⬆️"],
481
+ HUMAN_BASELINES,
482
+ title="Average of Top Scores and Human Baseline Over Time",
483
  )
484
  gr.Plot(value=chart, interactive=False, width=500, height=500)
485
  with gr.Column():
486
  chart = create_metric_plot_obj(
487
+ plot_df,
488
+ ["ARC", "HellaSwag", "MMLU", "TruthfulQA"],
489
+ HUMAN_BASELINES,
490
+ title="Top Scores and Human Baseline Over Time",
491
+ )
492
  gr.Plot(value=chart, interactive=False, width=500, height=500)
 
493
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
494
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
495
 
 
612
  scheduler = BackgroundScheduler()
613
  scheduler.add_job(restart_space, "interval", seconds=1800)
614
  scheduler.start()
615
+ demo.queue(concurrency_count=40).launch()