lixuejing
commited on
Commit
Β·
e48257b
1
Parent(s):
3eaa307
add metrix
Browse files- app.py +16 -16
- src/populate.py +1 -1
app.py
CHANGED
@@ -353,22 +353,22 @@ with demo:
|
|
353 |
queue=True,
|
354 |
)
|
355 |
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
|
373 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
374 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
353 |
queue=True,
|
354 |
)
|
355 |
|
356 |
+
with gr.TabItem("π Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
|
357 |
+
with gr.Row():
|
358 |
+
with gr.Column():
|
359 |
+
chart = create_metric_plot_obj(
|
360 |
+
plot_df,
|
361 |
+
[AutoEvalColumn.average.name],
|
362 |
+
title="Average of Top Scores and Human Baseline Over Time (from last update)",
|
363 |
+
)
|
364 |
+
gr.Plot(value=chart, min_width=500)
|
365 |
+
with gr.Column():
|
366 |
+
chart = create_metric_plot_obj(
|
367 |
+
plot_df,
|
368 |
+
BENCHMARK_COLS,
|
369 |
+
title="Top Scores and Human Baseline Over Time (from last update)",
|
370 |
+
)
|
371 |
+
gr.Plot(value=chart, min_width=500)
|
372 |
|
373 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
374 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
src/populate.py
CHANGED
@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
|
|
14 |
raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
|
15 |
all_data_json = [v.to_dict() for v in raw_data]
|
16 |
print("all_data_json", all_data_json)
|
17 |
-
|
18 |
filter_models_flags(all_data_json)
|
19 |
|
20 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
14 |
raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
|
15 |
all_data_json = [v.to_dict() for v in raw_data]
|
16 |
print("all_data_json", all_data_json)
|
17 |
+
all_data_json.append(baseline_row)
|
18 |
filter_models_flags(all_data_json)
|
19 |
|
20 |
df = pd.DataFrame.from_records(all_data_json)
|