lixuejing commited on
Commit
e48257b
Β·
1 Parent(s): 3eaa307

add metrix

Browse files
Files changed (2) hide show
  1. app.py +16 -16
  2. src/populate.py +1 -1
app.py CHANGED
@@ -353,22 +353,22 @@ with demo:
353
  queue=True,
354
  )
355
 
356
- #with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
357
- # with gr.Row():
358
- # with gr.Column():
359
- # chart = create_metric_plot_obj(
360
- # plot_df,
361
- # [AutoEvalColumn.average.name],
362
- # title="Average of Top Scores and Human Baseline Over Time (from last update)",
363
- # )
364
- # gr.Plot(value=chart, min_width=500)
365
- # with gr.Column():
366
- # chart = create_metric_plot_obj(
367
- # plot_df,
368
- # BENCHMARK_COLS,
369
- # title="Top Scores and Human Baseline Over Time (from last update)",
370
- # )
371
- # gr.Plot(value=chart, min_width=500)
372
 
373
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
374
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
353
  queue=True,
354
  )
355
 
356
+ with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
357
+ with gr.Row():
358
+ with gr.Column():
359
+ chart = create_metric_plot_obj(
360
+ plot_df,
361
+ [AutoEvalColumn.average.name],
362
+ title="Average of Top Scores and Human Baseline Over Time (from last update)",
363
+ )
364
+ gr.Plot(value=chart, min_width=500)
365
+ with gr.Column():
366
+ chart = create_metric_plot_obj(
367
+ plot_df,
368
+ BENCHMARK_COLS,
369
+ title="Top Scores and Human Baseline Over Time (from last update)",
370
+ )
371
+ gr.Plot(value=chart, min_width=500)
372
 
373
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
374
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
src/populate.py CHANGED
@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
14
  raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
15
  all_data_json = [v.to_dict() for v in raw_data]
16
  print("all_data_json", all_data_json)
17
- #all_data_json.append(baseline_row)
18
  filter_models_flags(all_data_json)
19
 
20
  df = pd.DataFrame.from_records(all_data_json)
 
14
  raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
15
  all_data_json = [v.to_dict() for v in raw_data]
16
  print("all_data_json", all_data_json)
17
+ all_data_json.append(baseline_row)
18
  filter_models_flags(all_data_json)
19
 
20
  df = pd.DataFrame.from_records(all_data_json)