PROBE / app.py
gyigit's picture
update app
a2e6203
raw
history blame
4.33 kB
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import gradio as gr
import pandas as pd
import re
import pandas as pd
import os
import json
import yaml
from src.about import *
from src.bin.PROBE import run_probe
global data_component, filter_component
def get_baseline_df():
df = pd.read_csv(CSV_RESULT_PATH)
present_columns = ["Method"] + checkbox_group.value
df = df[present_columns]
return df
def add_new_eval(
human_file,
skempi_file,
model_name_textbox: str,
revision_name_textbox: str,
benchmark_type: str,
):
representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
run_probe(benchmark_type, representation_name, human_file, skempi_file)
return None
block = gr.Blocks()
with block:
gr.Markdown(
LEADERBOARD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# table jmmmu bench
with gr.TabItem("πŸ… PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
# selection for column part:
checkbox_group = gr.CheckboxGroup(
choices=TASK_INFO,
label="Benchmark Type",
interactive=True,
) # user can select the evaluation dimension
baseline_value = get_baseline_df()
baseline_header = ["Method"] + checkbox_group.value
baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)
data_component = gr.components.Dataframe(
value=baseline_value,
headers=baseline_header,
type="pandas",
datatype=baseline_datatype,
interactive=False,
visible=True,
)
# table 5
with gr.TabItem("πŸ“ About", elem_id="probe-benchmark-tab-table", id=2):
with gr.Row():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your model's representation files here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_name_textbox = gr.Textbox(
label="Model name",
)
revision_name_textbox = gr.Textbox(
label="Revision Model Name",
)
# Selection for benchmark type from (similartiy, family, function, affinity) to eval the representations (chekbox)
benchmark_type = gr.CheckboxGroup(
choices=TASK_INFO,
label="Benchmark Type",
interactive=True,
)
with gr.Column():
human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
inputs = [
human_file,
skempi_file,
model_name_textbox,
revision_name_textbox,
benchmark_type
],
)
def refresh_data():
value = get_baseline_df()
return value
with gr.Row():
data_run = gr.Button("Refresh")
data_run.click(
refresh_data, outputs=[data_component]
)
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
block.launch()