Spaces:

CPunisher
/

JavaBench

Running

App Files Files Community

CPunisher commited on Aug 23

Commit

23f22ff

•

1 Parent(s): 948d4dc

Data

Browse files

Files changed (6) hide show

app.py +32 -64
data/data_context.json +492 -0
data/data_incr-order.json +282 -0
data/data_method.json +492 -0
data/models.json +30 -0
src/display/utils.py +21 -15

app.py CHANGED Viewed

@@ -1,102 +1,70 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
 from src.about import (
     CITATION_BUTTON_LABEL,
     CITATION_BUTTON_TEXT,
     EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
     TITLE,
 )
 from src.display.css_html_js import custom_css
 from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
     AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
 )
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
         value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
         interactive=False,
     )
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.HTML(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
         # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
         #     gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

 import gradio as gr
 import pandas as pd
+import json
+from gradio_leaderboard import Leaderboard, SelectColumns
 from apscheduler.schedulers.background import BackgroundScheduler
 from src.about import (
     CITATION_BUTTON_LABEL,
     CITATION_BUTTON_TEXT,
     EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     TITLE,
 )
 from src.display.css_html_js import custom_css
 from src.display.utils import (
     AutoEvalColumn,
+    fields
 )
+from src.envs import API, REPO_ID
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
+def init_leaderboard(data_file):
+    with open(data_file, "r") as fp:
+        data = json.load(fp)
+        dataframe = pd.DataFrame()
+        for key, value in data.items():
+            col_df = pd.DataFrame(value)
+            col_df.rename(columns={"Pass_at_1": key}, inplace=True)
+            dataframe = col_df if dataframe.empty else dataframe.merge(col_df, on=['Context', 'Method', 'Model'], how='outer')
+        dataframe['Score'] = dataframe.drop(columns=['Context', 'Method', 'Model']).sum(axis=1) / 5
+        numeric_cols = dataframe.select_dtypes(include='number').columns
+        dataframe[numeric_cols] = dataframe[numeric_cols].apply(lambda x: x * 100).round(1)
+        cols = list(dataframe.columns)
+        cols.remove('Score')
+        cols.insert(3, 'Score')
+        dataframe = dataframe[cols]
+        cols.insert(3, cols.pop(cols.index('Score')))
+        dataframe = dataframe.sort_values(by='Score', ascending=False)
+    return gr.components.DataFrame(
         value=dataframe,
+        headers=[c.name for c in fields(AutoEvalColumn) if not c.hidden],
         datatype=[c.type for c in fields(AutoEvalColumn)],
         interactive=False,
     )
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.HTML(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("[Method] Evaluation", elem_id="llm-benchmark-tab-table", id=0):
+            leaderboard = init_leaderboard("./data/data_method.json")
+        with gr.TabItem("[Context] Evaluation", elem_id="llm-benchmark-tab-table", id=1):
+            leaderboard = init_leaderboard("./data/data_context.json")
+        with gr.TabItem("[Incremental] Evaluation", elem_id="llm-benchmark-tab-table", id=2):
+            leaderboard = init_leaderboard("./data/data_incr-order.json")
         # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
         #     gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

data/data_context.json ADDED Viewed

	@@ -0,0 +1,492 @@

+{
+  "completion": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 1.0
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.9714285714
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.945
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.9378571429
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.9357142857
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.9328571429
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.9214285714
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.9007142857
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.8907142857
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.8828571429
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.8728571429
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.8578571429
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.845
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.7964285714
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.7935714286
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.7192857143
+    }
+  ],
+  "compilation_class_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7942857143
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.7414285714
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.7385714286
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.7314285714
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.7171428571
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6978571429
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.6607142857
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6592857143
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6414285714
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.58
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.4814285714
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.45
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.39
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.3692857143
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3457142857
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.215
+    }
+  ],
+  "compilation_test_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.5035714286
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.4202826585
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.3443277311
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3405987395
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.3387079832
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.3183823529
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.3121848739
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.2858193277
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.2283088235
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.2240546218
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.1466911765
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.1128676471
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.03125
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0147058824
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.0125
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.0
+    }
+  ],
+  "pass_class_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7832360347
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.723699056
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.715291943
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.7033228696
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6855203826
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6808480861
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.6545897285
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6417690022
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6293667264
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.5674101922
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.4741970721
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.4489821662
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.3864123669
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.3599216366
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3272885064
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.2139054163
+    }
+  ],
+  "pass_test_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.3438179726
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3047552867
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3032497787
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.2941156144
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.2544265255
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.2393000344
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2028454735
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.1966660863
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.1877858469
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.1669267449
+    },
+    {
+      "Context": "maximum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.1370849195
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.1123372221
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.03125
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0147058824
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.0125
+    },
+    {
+      "Context": "minimum",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.0
+    }
+  ]
+}

data/data_incr-order.json ADDED Viewed

	@@ -0,0 +1,282 @@

+{
+  "completion": [
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.8642857143
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.8514285714
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.8392857143
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.8257142857
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.8135714286
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.8121428571
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.7485714286
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.5992857143
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.5057142857
+    }
+  ],
+  "compilation_class_wise": [
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7285714286
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.725
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.695
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6864285714
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6642857143
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.5778571429
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.5021428571
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.3507142857
+    }
+  ],
+  "compilation_test_wise": [
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2621848739
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2589381207
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2505252101
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.2421457219
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.1824818564
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.0955357143
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.0701680672
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.0352941176
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0
+    }
+  ],
+  "pass_class_wise": [
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7209670092
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.7036481325
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6846699639
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6816620162
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6650646981
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6537268945
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.5603969625
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.4878321662
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.3468474087
+    }
+  ],
+  "pass_test_wise": [
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2107590067
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2085373337
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.1967821219
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_rev",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.1865232907
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental_random",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.1179619378
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.061012122
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.0514928193
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.0350620781
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0
+    }
+  ]
+}

data/data_method.json ADDED Viewed

	@@ -0,0 +1,492 @@

+{
+  "completion": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 1.0
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.9378571429
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.9357142857
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.9214285714
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.8642857143
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.8535714286
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.8471428571
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.845
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.8392857143
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.8192857143
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.7964285714
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.7485714286
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6378571429
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.5992857143
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.5971428571
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.5057142857
+    }
+  ],
+  "compilation_class_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7942857143
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.7414285714
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.725
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.7171428571
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6978571429
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6828571429
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6814285714
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.6764285714
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.6607142857
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.58
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.5778571429
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.5021428571
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.4985714286
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.4335714286
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.3507142857
+    }
+  ],
+  "compilation_test_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.5035714286
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.4202826585
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.3443277311
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3378676471
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.3183823529
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.3121848739
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2836134454
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2621848739
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.2240546218
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.1930147059
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.0955357143
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.0932773109
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.0701680672
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.0352941176
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0
+    }
+  ],
+  "pass_class_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.7832360347
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.723699056
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.7036481325
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6855203826
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6846699639
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.6808480861
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.6772858762
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.6547244155
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.6547232007
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.6545897285
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.5674101922
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.5603969625
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.4878321662
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.4863639752
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.4261740357
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.3468474087
+    }
+  ],
+  "pass_test_wise": [
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-4o-2024-05-13",
+      "Pass_at_1": 0.3438179726
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.3047552867
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.2941156144
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.2544265255
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.2224382166
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2083516025
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.2028454735
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-6.7b-instruct",
+      "Pass_at_1": 0.1967821219
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.1930147059
+    },
+    {
+      "Context": "selective",
+      "Method": "holistic",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.1669267449
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.0714792814
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "Phind-CodeLlama-34B-v2",
+      "Pass_at_1": 0.061012122
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "gpt-3.5-turbo-1106",
+      "Pass_at_1": 0.0514928193
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "deepseek-coder-33b-instruct",
+      "Pass_at_1": 0.0350620781
+    },
+    {
+      "Context": "selective",
+      "Method": "independent",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0
+    },
+    {
+      "Context": "selective",
+      "Method": "incremental",
+      "Model": "WizardCoder-15B-V1.0",
+      "Pass_at_1": 0.0
+    }
+  ]
+}

data/models.json ADDED Viewed

	@@ -0,0 +1,30 @@

+[
+  {
+    "model": "gpt-3.5-turbo-1106",
+    "link": "https://openai.com/"
+  },
+  {
+    "model": "gpt-4o-2024-05-13",
+    "link": "https://openai.com/"
+  },
+  {
+    "model": "deepseek-coder-33b-instruct",
+    "link": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
+    "size": 33
+  },
+  {
+    "model": "deepseek-coder-6.7b-instruct",
+    "link": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
+    "size": 6.7
+  },
+  {
+    "model": "Phind-CodeLlama-34B-v2",
+    "link": "https://huggingface.co/Phind/Phind-CodeLlama-34B-v2",
+    "size": 34
+  },
+  {
+    "model": "WizardCoder-15B-V1.0",
+    "link": "https://huggingface.co/WizardLMTeam/WizardCoder-15B-V1.0",
+    "size": 15
+  }
+]

src/display/utils.py CHANGED Viewed

@@ -23,22 +23,28 @@ class ColumnContent:
 ## Leaderboard columns
 auto_eval_column_dict = []
 # Init
-auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
-auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 #Scores
-auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
-for task in Tasks:
-    auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
-# Model information
-auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
-auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
-auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
-auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
-auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
-auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
-auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
-auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
-auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

 ## Leaderboard columns
 auto_eval_column_dict = []
 # Init
+# auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
+# auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 #Scores
+# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
+# auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
+# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
+# auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
+# auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
+# auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
+# auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
+# auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
+# auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
+# auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
+auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
+auto_eval_column_dict.append(["context", ColumnContent, ColumnContent("Context", "str", True, never_hidden=True)])
+auto_eval_column_dict.append(["method", ColumnContent, ColumnContent("Method", "str", True, never_hidden=True)])
+auto_eval_column_dict.append(["completion", ColumnContent, ColumnContent("Completion", "number", True, never_hidden=True)])
+auto_eval_column_dict.append(["compilation_class_wise", ColumnContent, ColumnContent("Compilation(class)", "number", True, never_hidden=True)])
+auto_eval_column_dict.append(["compilation_test_wise", ColumnContent, ColumnContent("Compilation(test)", "number", True, never_hidden=True)])
+auto_eval_column_dict.append(["pass_class_wise", ColumnContent, ColumnContent("Pass(class)", "number", True, never_hidden=True)])
+auto_eval_column_dict.append(["pass_test_wise", ColumnContent, ColumnContent("Pass(test)", "number", True, never_hidden=True)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)