Spaces:

BAAI
/

open_flageval_vlm_leaderboard

Running

lixuejing commited on 23 days ago

Commit

e48257b

1 Parent(s): 3eaa307

add metrix

Files changed (2) hide show

app.py CHANGED Viewed

@@ -353,22 +353,22 @@ with demo:
                     queue=True,
                 )
-        #with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
-        #    with gr.Row():
-        #        with gr.Column():
-        #            chart = create_metric_plot_obj(
-        #                plot_df,
-        #                [AutoEvalColumn.average.name],
-        #                title="Average of Top Scores and Human Baseline Over Time (from last update)",
-        #            )
-        #            gr.Plot(value=chart, min_width=500)
-        #        with gr.Column():
-        #            chart = create_metric_plot_obj(
-        #                plot_df,
-        #                BENCHMARK_COLS,
-        #                title="Top Scores and Human Baseline Over Time (from last update)",
-        #            )
-        #            gr.Plot(value=chart, min_width=500)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

                     queue=True,
                 )
+        with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
+            with gr.Row():
+                with gr.Column():
+                    chart = create_metric_plot_obj(
+                        plot_df,
+                        [AutoEvalColumn.average.name],
+                        title="Average of Top Scores and Human Baseline Over Time (from last update)",
+                    )
+                    gr.Plot(value=chart, min_width=500)
+                with gr.Column():
+                    chart = create_metric_plot_obj(
+                        plot_df,
+                        BENCHMARK_COLS,
+                        title="Top Scores and Human Baseline Over Time (from last update)",
+                    )
+                    gr.Plot(value=chart, min_width=500)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

src/populate.py CHANGED Viewed

@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
     raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     print("all_data_json", all_data_json)
-    #all_data_json.append(baseline_row)
     filter_models_flags(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)

     raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     print("all_data_json", all_data_json)
+    all_data_json.append(baseline_row)
     filter_models_flags(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)