Spaces:

Yyy0530
/

leaderboard

Sleeping

App Files Files Community

Yyy0530 commited on 7 days ago

Commit

43bf3ba

•

1 Parent(s): e583379

update

Browse files

Files changed (18) hide show

app.py +166 -200
config.yaml +20 -0
data/w-w-API.xlsx +0 -0
data/w-w-Avg.xlsx +0 -0
data/w-w-Code.xlsx +0 -0
data/w-w-Customized.xlsx +0 -0
data/w-wo-API.xlsx +0 -0
data/w-wo-Avg.xlsx +0 -0
data/w-wo-Code.xlsx +0 -0
data/w-wo-Customized.xlsx +0 -0
data/wo-w-API.xlsx +0 -0
data/wo-w-Avg.xlsx +0 -0
data/wo-w-Code.xlsx +0 -0
data/wo-w-Customized.xlsx +0 -0
data/wo-wo-API.xlsx +0 -0
data/wo-wo-Avg.xlsx +0 -0
data/wo-wo-Code.xlsx +0 -0
data/wo-wo-Customized.xlsx +0 -0

app.py CHANGED Viewed

@@ -1,204 +1,170 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
-        interactive=False,
-    )
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
     with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
 import pandas as pd
+from functools import reduce
+import pandas as pd
+import gradio as gr
+from collections import defaultdict
+import os
+from yaml import safe_load
+from collections import defaultdict
+import os
+CONFIG = safe_load(open("config.yaml"))
+data = defaultdict(dict)
+# 读取数据
+for settings in CONFIG['settings']:
+    for type in CONFIG['types']:
+        # 根据配置文件中的路径读取数据
+        data[settings][type] = pd.read_excel(CONFIG["settings_mapping"][settings] + f"-{type}.xlsx")
+# 添加平均分列
+for settings in CONFIG['settings']:
+    for type in CONFIG['types']:
+        data[settings][type]["Average"] = data[settings][type].iloc[:,1:].mean(axis=1)
+# 添加rank列
+for settings in CONFIG['settings']:
+    for type in CONFIG['types']:
+        data[settings][type]["Rank"] = data[settings][type]["Average"].rank(ascending=False, method='min').astype(int)
+# 将rank列放在第一列
+for settings in CONFIG['settings']:
+    for type in CONFIG['types']:
+        cols = data[settings][type].columns.tolist()
+        cols = cols[-1:] + cols[:-1]
+        data[settings][type] = data[settings][type][cols]
+css = """
+table > thead {
+    white-space: normal;
+}
+table {
+    --cell-width-1: 250px;
+}
+table > tbody > tr > td:nth-child(2) > div {
+    overflow-x: auto;
+}
+.filter-checkbox-group {
+    max-width: max-content;
+}
+/* 确保第二列(Model)完全展开 */
+table > tbody > tr > td:nth-child(2) {
+    white-space: nowrap;
+    width: auto;
+}
+/* 紧凑显示其他列 */
+table > tbody > tr > td:not(:nth-child(2)) {
+    white-space: normal;
+    width: auto;
+}
+"""
+"""
+Each inner tab can have the following keys:
+- language: The language of the leaderboard
+- language_long: [optional] The long form of the language
+- description: The description of the leaderboard
+- credits: [optional] The credits for the leaderboard
+- desc: [optional] The description of the leaderboard
+- data: The data for the leaderboard
+"""
+# 定义模型类型和大小（占位符）
+MODEL_TYPES = [
+    "Open",
+    "Proprietary",
+    "Sentence Transformers",
+    "Cross-Encoders",
+    "Bi-Encoders",
+    "Uses Instructions",
+    "No Instructions",
+]
+NUMERIC_INTERVALS = {
+    "<100M": pd.Interval(0, 100, closed="right"),
+    "100M to 250M": pd.Interval(100, 250, closed="right"),
+    "250M to 500M": pd.Interval(250, 500, closed="right"),
+    "500M to 1B": pd.Interval(500, 1000, closed="right"),
+    ">1B": pd.Interval(1000, 1_000_000, closed="right"),
+}
+#定义
+def filter_data(search_query, model_types, model_sizes):
+    output_df = df.copy()
+    # Apply the search query
+    if search_query:
+        names = output_df.index.str.lower()
+        masks = []
+        for query in search_query.split(";"):
+            masks.append(names.str.contains(query.lower()))
+        output_df = output_df[reduce(lambda a, b: a | b, masks)]
+    # Apply the model type filtering
+    if set(model_types) != set(MODEL_TYPES):
+        # Placeholder logic for model type filtering
+        pass
+    # Apply the model size filtering
+    if model_sizes:
+        # Placeholder logic for model size filtering
+        pass
+    return output_df
+# Create the Gradio interface
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# Model Leaderboard")
     with gr.Row():
+        search_box = gr.Textbox(
+            label="Search Models (separate by ';')",
+            placeholder=" 🔍 Search for a model and press enter..."
+        )
+        model_type_checkbox_group = gr.CheckboxGroup(
+            label="Model types",
+            choices=MODEL_TYPES,
+            value=MODEL_TYPES,
+            interactive=True,
+            elem_classes=["filter-checkbox-group"],
+            scale=3
+        )
+        model_size_checkbox_group = gr.CheckboxGroup(
+            label="Model sizes (in number of parameters)",
+            choices=list(NUMERIC_INTERVALS.keys()),
+            value=list(NUMERIC_INTERVALS.keys()),
+            interactive=True,
+            elem_classes=["filter-checkbox-group"],
+            scale=2,
+        )
+    submit_button = gr.Button("Filter Data")
+    with gr.Tabs() as result_table:
+        for settings in CONFIG['settings']:
+            with gr.Tab(label=settings):
+                for type in CONFIG['types']:
+                    with gr.Tab(label=type):
+                        # gr.Dataframe(data[settings][type], headers=data[settings][type].columns.tolist(), datatype=["str"] + ["number"] * (len(data[settings][type].columns) - 1))
+                        gr.DataFrame(data[settings][type], type="pandas")
+    # result_table = gr.Dataframe(headers=df.columns.tolist(), datatype=["str"] + ["number"] * (len(df.columns) - 1))
+    # Initially display the entire DataFrame
+    # vis = gr.DataFrame(df)
+    # submit_button.click(fn=filter_data, inputs=[search_box, model_type_checkbox_group, model_size_checkbox_group], outputs=result_table)
+    # Display the initial DataFrame
+demo.launch()

config.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+settings:
+  - "w/ meta w/ inst"
+  - "w/ meta w/o inst"
+  - "w/o meta w/ inst"
+  - "w/o meta w/o inst"
+types:
+  - "Code"
+  - "API"
+  - "Customized"
+  - "Avg"
+metrics:
+  - Comp@10
+  - Recall@10
+  - Prec@10
+  - NDCG@10
+settings_mapping:
+  "w/ meta w/ inst": "w-w"
+  "w/ meta w/o inst": "w-wo"
+  "w/o meta w/ inst": "wo-w"
+  "w/o meta w/o inst": "wo-wo"

data/w-w-API.xlsx ADDED Viewed

Binary file (26.9 kB). View file

data/w-w-Avg.xlsx ADDED Viewed

Binary file (11.7 kB). View file

data/w-w-Code.xlsx ADDED Viewed

Binary file (28.4 kB). View file

data/w-w-Customized.xlsx ADDED Viewed

Binary file (11.3 kB). View file

data/w-wo-API.xlsx ADDED Viewed

Binary file (10.7 kB). View file

data/w-wo-Avg.xlsx ADDED Viewed

Binary file (28.4 kB). View file

data/w-wo-Code.xlsx ADDED Viewed

Binary file (28.6 kB). View file

data/w-wo-Customized.xlsx ADDED Viewed