Spaces:

wenhu
/

Science-Leaderboard

Runtime error

App Files Files Community

wenhu commited on Apr 8

Commit

af4a677

•

1 Parent(s): 09b4a5d

Create app.py

Browse files

Files changed (1) hide show

app.py +380 -0

app.py ADDED Viewed

	@@ -0,0 +1,380 @@

+__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
+import os
+import gradio as gr
+import pandas as pd
+import json
+import tempfile
+from constants import *
+from huggingface_hub import Repository
+HF_TOKEN = os.environ.get("HF_TOKEN")
+global data_component, filter_component
+def upload_file(files):
+    file_paths = [file.name for file in files]
+    return file_paths
+def add_new_eval(
+    input_file,
+    model_name_textbox: str,
+    revision_name_textbox: str,
+    model_link: str,
+):
+    if input_file is None:
+        return "Error! Empty file!"
+    upload_data=json.loads(input_file)
+    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
+    submission_repo.git_pull()
+    shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))
+    csv_data = pd.read_csv(CSV_DIR)
+    if revision_name_textbox == '':
+        col = csv_data.shape[0]
+        model_name = model_name_textbox
+    else:
+        model_name = revision_name_textbox
+        model_name_list = csv_data['Model Name (clickable)']
+        name_list = [name.split(']')[0][1:] for name in model_name_list]
+        if revision_name_textbox not in name_list:
+            col = csv_data.shape[0]
+        else:
+            col = name_list.index(revision_name_textbox)
+    if model_link == '':
+        model_name = model_name  # no url
+    else:
+        model_name = '[' + model_name + '](' + model_link + ')'
+    # add new data
+    new_data = [
+        model_name
+        ]
+    for key in TASK_INFO:
+        if key in upload_data:
+            new_data.append(upload_data[key][0])
+        else:
+            new_data.append(0)
+    csv_data.loc[col] = new_data
+    csv_data = csv_data.to_csv(CSV_DIR, index=False)
+    submission_repo.push_to_hub()
+    return 0
+def get_normalized_df(df):
+    # final_score = df.drop('name', axis=1).sum(axis=1)
+    # df.insert(1, 'Overall Score', final_score)
+    normalize_df = df.copy().fillna(0.0)
+    for column in normalize_df.columns[1:]:
+        min_val = NORMALIZE_DIC[column]['Min']
+        max_val = NORMALIZE_DIC[column]['Max']
+        normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
+    return normalize_df
+def calculate_selected_score(df, selected_columns):
+    # selected_score = df[selected_columns].sum(axis=1)
+    selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
+    selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
+    selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
+    selected_semantic_score = df[selected_SEMANTIC].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_SEMANTIC ])
+    if selected_quality_score.isna().any().any() and selected_semantic_score.isna().any().any():
+        selected_score =  (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
+        return selected_score.fillna(0.0)
+    if selected_quality_score.isna().any().any():
+        return selected_semantic_score
+    if selected_semantic_score.isna().any().any():
+        return selected_quality_score
+    # print(selected_semantic_score,selected_quality_score )
+    selected_score =  (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
+    return selected_score.fillna(0.0)
+def get_final_score(df, selected_columns):
+    normalize_df = get_normalized_df(df)
+    #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
+    for name in normalize_df.drop('Model Name (clickable)', axis=1):
+        normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
+    quality_score = normalize_df[QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
+    semantic_score = normalize_df[SEMANTIC_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST ])
+    final_score =  (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
+    if 'Total Score' in df:
+        df['Total Score'] = final_score
+    else:
+        df.insert(1, 'Total Score', final_score)
+    if 'Semantic Score' in df:
+        df['Semantic Score'] = semantic_score
+    else:
+        df.insert(2, 'Semantic Score', semantic_score)
+    if 'Quality Score' in df:
+        df['Quality Score'] = quality_score
+    else:
+        df.insert(3, 'Quality Score', quality_score)
+    selected_score = calculate_selected_score(normalize_df, selected_columns)
+    if 'Selected Score' in df:
+        df['Selected Score'] = selected_score
+    else:
+        df.insert(1, 'Selected Score', selected_score)
+    return df
+def get_final_score_quality(df, selected_columns):
+    normalize_df = get_normalized_df(df)
+    for name in normalize_df.drop('Model Name (clickable)', axis=1):
+        normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
+    quality_score = normalize_df[QUALITY_TAB].sum(axis=1) / sum([DIM_WEIGHT[i] for i in QUALITY_TAB])
+    if 'Quality Score' in df:
+        df['Quality Score'] = quality_score
+    else:
+        df.insert(1, 'Quality Score', quality_score)
+    # selected_score = normalize_df[selected_columns].sum(axis=1) / len(selected_columns)
+    selected_score = normalize_df[selected_columns].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_columns])
+    if 'Selected Score' in df:
+        df['Selected Score'] = selected_score
+    else:
+        df.insert(1, 'Selected Score', selected_score)
+    return df
+def get_baseline_df():
+    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
+    submission_repo.git_pull()
+    df = pd.read_csv(CSV_DIR)
+    df = get_final_score(df, checkbox_group.value)
+    df = df.sort_values(by="Selected Score", ascending=False)
+    present_columns = MODEL_INFO + checkbox_group.value
+    df = df[present_columns]
+    df = convert_scores_to_percentage(df)
+    return df
+def get_baseline_df_quality():
+    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
+    submission_repo.git_pull()
+    df = pd.read_csv(QUALITY_DIR)
+    df = get_final_score_quality(df, checkbox_group_quality.value)
+    df = df.sort_values(by="Selected Score", ascending=False)
+    present_columns = MODEL_INFO_TAB_QUALITY + checkbox_group_quality.value
+    df = df[present_columns]
+    df = convert_scores_to_percentage(df)
+    return df
+def get_all_df(selected_columns, dir=CSV_DIR):
+    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
+    submission_repo.git_pull()
+    df = pd.read_csv(dir)
+    df = get_final_score(df, selected_columns)
+    df = df.sort_values(by="Selected Score", ascending=False)
+    return df
+def get_all_df_quality(selected_columns, dir=QUALITY_DIR):
+    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
+    submission_repo.git_pull()
+    df = pd.read_csv(dir)
+    df = get_final_score_quality(df, selected_columns)
+    df = df.sort_values(by="Selected Score", ascending=False)
+    return df
+def convert_scores_to_percentage(df):
+    # 对DataFrame中的每一列（除了'name'列）进行操作
+    for column in df.columns[1:]:  # 假设第一列是'name'
+        df[column] = round(df[column] * 100,2)  # 将分数转换为百分数
+        df[column] = df[column].astype(str) + '%'
+    return df
+def choose_all_quailty():
+    return gr.update(value=QUALITY_LIST)
+def choose_all_semantic():
+    return gr.update(value=SEMANTIC_LIST)
+def disable_all():
+    return gr.update(value=[])
+def enable_all():
+    return gr.update(value=TASK_INFO)
+def on_filter_model_size_method_change(selected_columns):
+    updated_data = get_all_df(selected_columns, CSV_DIR)
+    #print(updated_data)
+    # columns:
+    selected_columns = [item for item in TASK_INFO if item in selected_columns]
+    present_columns = MODEL_INFO + selected_columns
+    updated_data = updated_data[present_columns]
+    updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
+    updated_data = convert_scores_to_percentage(updated_data)
+    updated_headers = present_columns
+    update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
+    # print(updated_data,present_columns,update_datatype)
+    filter_component = gr.components.Dataframe(
+        value=updated_data,
+        headers=updated_headers,
+        type="pandas",
+        datatype=update_datatype,
+        interactive=False,
+        visible=True,
+        )
+    return filter_component#.value
+def on_filter_model_size_method_change_quality(selected_columns):
+    updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
+    #print(updated_data)
+    # columns:
+    selected_columns = [item for item in QUALITY_TAB if item in selected_columns]
+    present_columns = MODEL_INFO_TAB_QUALITY + selected_columns
+    updated_data = updated_data[present_columns]
+    updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
+    updated_data = convert_scores_to_percentage(updated_data)
+    updated_headers = present_columns
+    update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
+    # print(updated_data,present_columns,update_datatype)
+    filter_component = gr.components.Dataframe(
+        value=updated_data,
+        headers=updated_headers,
+        type="pandas",
+        datatype=update_datatype,
+        interactive=False,
+        visible=True,
+        )
+    return filter_component#.value
+block = gr.Blocks()
+with block:
+    gr.Markdown(
+        LEADERBORAD_INTRODUCTION
+    )
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        # Table 0
+        with gr.TabItem("📊 VBench", elem_id="vbench-tab-table", id=1):
+            with gr.Row():
+                with gr.Accordion("Citation", open=False):
+                    citation_button = gr.Textbox(
+                        value=CITATION_BUTTON_TEXT,
+                        label=CITATION_BUTTON_LABEL,
+                        elem_id="citation-button",
+                        lines=10,
+                    )
+            gr.Markdown(
+                TABLE_INTRODUCTION
+            )
+            with gr.Row():
+                with gr.Column(scale=0.2):
+                    choosen_q = gr.Button("Select Quality Dimensions")
+                    choosen_s = gr.Button("Select Semantic Dimensions")
+                    # enable_b = gr.Button("Select All")
+                    disable_b = gr.Button("Deselect All")
+                with gr.Column(scale=0.8):
+                    # selection for column part:
+                    checkbox_group = gr.CheckboxGroup(
+                        choices=TASK_INFO,
+                        value=DEFAULT_INFO,
+                        label="Evaluation Dimension",
+                        interactive=True,
+                    )
+            data_component = gr.components.Dataframe(
+                value=get_baseline_df,
+                headers=COLUMN_NAMES,
+                type="pandas",
+                datatype=DATA_TITILE_TYPE,
+                interactive=False,
+                visible=True,
+                )
+            choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
+            choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
+            # enable_b.click(enable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
+            disable_b.click(disable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
+            checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
+        with gr.TabItem("Video Quaity", elem_id="vbench-tab-table", id=2):
+            with gr.Accordion("INSTRUCTION", open=False):
+                    citation_button = gr.Textbox(
+                        value=QUALITY_CLAIM_TEXT,
+                        label="",
+                        elem_id="quality-button",
+                        lines=2,
+                    )
+            with gr.Row():
+                with gr.Column(scale=1.0):
+                    # selection for column part:
+                    checkbox_group_quality = gr.CheckboxGroup(
+                        choices=QUALITY_TAB,
+                        value=QUALITY_TAB,
+                        label="Evaluation Quality Dimension",
+                        interactive=True,
+                    )
+            data_component_quality = gr.components.Dataframe(
+                value=get_baseline_df_quality,
+                headers=COLUMN_NAMES_QUALITY,
+                type="pandas",
+                datatype=DATA_TITILE_TYPE,
+                interactive=False,
+                visible=True,
+                )
+            checkbox_group_quality.change(fn=on_filter_model_size_method_change_quality, inputs=[checkbox_group_quality], outputs=data_component_quality)
+        # table 2
+        with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=3):
+            gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
+        # table 3
+        with gr.TabItem("🚀 Submit here! ", elem_id="mvbench-tab-table", id=4):
+            gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
+            with gr.Row():
+                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(
+                        label="Model name", placeholder="LaVie"
+                        )
+                    revision_name_textbox = gr.Textbox(
+                        label="Revision Model Name", placeholder="LaVie"
+                    )
+                with gr.Column():
+                    model_link = gr.Textbox(
+                        label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
+                    )
+            with gr.Column():
+                input_file = gr.components.File(label = "Click to Upload a json File", file_count="single", type='binary')
+                submit_button = gr.Button("Submit Eval")
+                submission_result = gr.Markdown()
+                submit_button.click(
+                    add_new_eval,
+                    inputs = [
+                        input_file,
+                        model_name_textbox,
+                        revision_name_textbox,
+                        model_link,
+                    ],
+                )
+    def refresh_data():
+        value1 = get_baseline_df()
+        return value1
+    with gr.Row():
+        data_run = gr.Button("Refresh")
+        data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
+block.launch()