PROBE

Sleeping

App Files Files Community

mgyigit commited on Oct 1, 2024

Commit

524ef7e

verified ·

1 Parent(s): ec6bef2

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -17

app.py CHANGED Viewed

@@ -14,13 +14,31 @@ from src.bin.PROBE import run_probe
 global data_component, filter_component
 def get_baseline_df(selected_methods, selected_metrics):
     df = pd.read_csv(CSV_RESULT_PATH)
     present_columns = ["method_name"] + selected_metrics
     df = df[df['method_name'].isin(selected_methods)][present_columns]
     return df
-def create_plot(methods_selected, x_metric, y_metric):
     df = pd.read_csv(CSV_RESULT_PATH)
     filtered_df = df[df['method_name'].isin(methods_selected)]
@@ -73,7 +91,7 @@ with block:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         # table jmmmu bench
-        with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
             method_names = pd.read_csv(CSV_RESULT_PATH)['method_name'].unique().tolist()
             metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist()
@@ -116,23 +134,45 @@ with block:
                 outputs=data_component
             )
-            # Add the visualizer components (Dropdown, Checkbox, Button, Image)
-            with gr.Row():
-                # Visualizer Controls: Smaller and underneath each other
-                with gr.Column(scale=1):
-                    method_selector = gr.CheckboxGroup(choices=method_names, label="Select method_names", interactive=True, value=method_names)
-                    x_metric_selector = gr.Dropdown(choices=metrics_with_method, label="Select X-axis Metric", interactive=True)
-                    y_metric_selector = gr.Dropdown(choices=metrics_with_method, label="Select Y-axis Metric", interactive=True)
-                    plot_button = gr.Button("Plot")
-                # Larger plot display
-                with gr.Column(scale=3):
-                    output_plot = gr.Image(label="Plot", height=735)  # Set larger height for the plot
-            plot_button.click(create_plot, inputs=[method_selector, x_metric_selector, y_metric_selector], outputs=output_plot)
-        # table 5
         with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

 global data_component, filter_component
+def benchmark_plot(benchmark_type, methods_selected, x_metric, y_metric):
+    if benchmark_type == 'Flexible':
+        # Use general visualizer logic
+        return general_visualizer_plot(methods_selected, x_metric=x_metric, y_metric=y_metric)
+    elif benchmark_type == 'Benchmark 1':
+        return benchmark_1_plot(x_metric, y_metric)
+    elif benchmark_type == 'Benchmark 2':
+        return benchmark_2_plot(x_metric, y_metric)
+    elif benchmark_type == 'Benchmark 3':
+        return benchmark_3_plot(x_metric, y_metric)
+    elif benchmark_type == 'Benchmark 4':
+        return benchmark_4_plot(x_metric, y_metric)
+    else:
+        return "Invalid benchmark type selected."
 def get_baseline_df(selected_methods, selected_metrics):
     df = pd.read_csv(CSV_RESULT_PATH)
     present_columns = ["method_name"] + selected_metrics
     df = df[df['method_name'].isin(selected_methods)][present_columns]
     return df
+def general_visualizer(methods_selected, x_metric, y_metric):
     df = pd.read_csv(CSV_RESULT_PATH)
     filtered_df = df[df['method_name'].isin(methods_selected)]
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         # table jmmmu bench
+        with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1):
             method_names = pd.read_csv(CSV_RESULT_PATH)['method_name'].unique().tolist()
             metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist()
                 outputs=data_component
             )
+        with gr.TabItem("Visualizer"):
+            # Dropdown for benchmark type
+            benchmark_types = TASK_INFO + ['flexible']
+            benchmark_type_selector = gr.Dropdown(choices=benchmark_types, label="Select Benchmark Type for Visualization", value="flexible")
+            # Dynamic metric selectors (will be updated based on benchmark type)
+            x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric")
+            y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric")
+            method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names)
+            # Button to draw the plot for the selected benchmark
+            plot_button = gr.Button("Plot Visualization")
+            plot_output = gr.Image(label="Plot")
+            # Update metric selectors when benchmark type is chosen
+            def update_metric_choices(benchmark_type):
+                if benchmark_type == 'flexible':
+                    # Show all metrics for the flexible visualizer
+                    metric_names = df.columns.tolist()
+                    return gr.update(choices=metric_names, value=metric_names[0]), gr.update(choices=metric_names, value=metric_names[1])
+                elif benchmark_type in benchmark_specific_metrics:
+                    metrics = benchmark_specific_metrics[benchmark_type]
+                    return gr.update(choices=metrics, value=metrics[0]), gr.update(choices=metrics[1])
+                return gr.update(choices=[]), gr.update(choices=[])
+            benchmark_type_selector.change(
+                update_metric_choices,
+                inputs=[benchmark_type_selector],
+                outputs=[x_metric_selector, y_metric_selector]
+            )
+            # Generate the plot based on user input
+            plot_button.click(
+                benchmark_plot,
+                inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector],
+                outputs=plot_output
+            )
         with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
             with gr.Row():
                 gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")