Spaces:

relbench
/

leaderboard

Running

App Files Files Community

kexinhuang12345 commited on Jul 20

Commit

aae1219

•

1 Parent(s): df330ee

minor fix

Browse files

Files changed (3) hide show

app.py +9 -4
src/about.py +1 -1
src/populate.py +30 -5

app.py CHANGED Viewed

@@ -134,7 +134,7 @@ with demo:
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 Node Classification Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
             COLS = COLS_NC
             AutoEvalColumn = AutoEvalColumn_NodeClassification
             original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Node Classification")
@@ -206,8 +206,10 @@ with demo:
                     leaderboard_table,
                     queue=True,
                 )
-        with gr.TabItem("🏅 Node Regression Leaderboard", elem_id="llm-benchmark-tab-table", id=1):
             COLS = COLS_NR
             AutoEvalColumn = AutoEvalColumn_NodeRegression
             original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Node Regression")
@@ -278,8 +280,9 @@ with demo:
                     leaderboard_table,
                     queue=True,
                 )
-        with gr.TabItem("🏅 Link Prediction Leaderboard", elem_id="llm-benchmark-tab-table", id=2):
             COLS = COLS_LP
             AutoEvalColumn = AutoEvalColumn_LinkPrediction
             original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Link Prediction")
@@ -350,6 +353,8 @@ with demo:
                     leaderboard_table,
                     queue=True,
                 )
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
@@ -388,7 +393,7 @@ with demo:
                     github_url_textbox = gr.Textbox(label="GitHub URL Link")
                     #parameters_textbox = gr.Textbox(label="Number of parameters")
                     task_track = gr.Dropdown(
-                        choices=['Node Classification', 'Node Regression', 'Link Prediction'],
                         label="Choose the task track",
                         multiselect=False,
                         value=None,

     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 Entity Classification Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
             COLS = COLS_NC
             AutoEvalColumn = AutoEvalColumn_NodeClassification
             original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Node Classification")
                     leaderboard_table,
                     queue=True,
                 )
+            gr.Markdown("Evaluation metric: AUROC ⬆️")
+        with gr.TabItem("🏅 Entity Regression Leaderboard", elem_id="llm-benchmark-tab-table", id=1):
             COLS = COLS_NR
             AutoEvalColumn = AutoEvalColumn_NodeRegression
             original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Node Regression")
                     leaderboard_table,
                     queue=True,
                 )
+            gr.Markdown("Evaluation metric: MAE ⬇️")
+        with gr.TabItem("🏅 Recommendation Leaderboard", elem_id="llm-benchmark-tab-table", id=2):
             COLS = COLS_LP
             AutoEvalColumn = AutoEvalColumn_LinkPrediction
             original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Link Prediction")
                     leaderboard_table,
                     queue=True,
                 )
+            gr.Markdown("Evaluation metric: MAE ⬇️")
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                     github_url_textbox = gr.Textbox(label="GitHub URL Link")
                     #parameters_textbox = gr.Textbox(label="Number of parameters")
                     task_track = gr.Dropdown(
+                        choices=['Entity Classification', 'Entity Regression', 'Recommendation'],
                         label="Choose the task track",
                         multiselect=False,
                         value=None,

src/about.py CHANGED Viewed

@@ -79,7 +79,7 @@ Once you have developed your model and got results, you can submit your test res
 - **Is it an official submission**: Whether the implementation is official (implementation by authors who proposed the method) or unofficial (re-implementation of the method by non-authors).
 - **Paper URL Link**: The original paper describing the method (arXiv link is recommended. paper needs not be peer-reviewed). If your method has any original component (e.g., even just combining existing methods XXX and YYY), you have to write a technical report describing it (e.g., how you exactly combined XXX and YYY).
 - **GitHub URL Link**: The Github repository or directory containining all code to reproduce the result. A placeholder repository is not allowed.
-- **Task Track**: Choose the task you submit to, from node classification, node regression, and link prediction.
 - **Honor code**: Please acknowledge that your submission adheres to all the ethical policies and your result is reproducible.
 - **Test performance**: Raw test performance output by RelBench model evaluators, where average and unbiased standard deviation must be taken over 5 different random seeds. You can either not fix random seeds at all, or use the random seeds from 0 to 4. We highly discourage you to tune the random seeds.
 - **Validation performance**: Validation performance of the model that is used to report the test performance above.

 - **Is it an official submission**: Whether the implementation is official (implementation by authors who proposed the method) or unofficial (re-implementation of the method by non-authors).
 - **Paper URL Link**: The original paper describing the method (arXiv link is recommended. paper needs not be peer-reviewed). If your method has any original component (e.g., even just combining existing methods XXX and YYY), you have to write a technical report describing it (e.g., how you exactly combined XXX and YYY).
 - **GitHub URL Link**: The Github repository or directory containining all code to reproduce the result. A placeholder repository is not allowed.
+- **Task Track**: Choose the task you submit to, from entity classification, entity regression, and recommendation.
 - **Honor code**: Please acknowledge that your submission adheres to all the ethical policies and your result is reproducible.
 - **Test performance**: Raw test performance output by RelBench model evaluators, where average and unbiased standard deviation must be taken over 5 different random seeds. You can either not fix random seeds at all, or use the random seeds from 0 to 4. We highly discourage you to tune the random seeds.
 - **Validation performance**: Validation performance of the model that is used to report the test performance above.

src/populate.py CHANGED Viewed

@@ -35,18 +35,41 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     #df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df
 '''
 def format_number(num):
     return f"{num:.3f}"
 def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
-    if task_type == 'Node Classification':
         ascending = False
         tasks = nc_tasks
-    elif task_type == 'Node Regression':
         ascending = True
         tasks = nr_tasks
-    elif task_type == 'Link Prediction':
         ascending = False
         tasks = lp_tasks
     model_result_filepaths = []
     for root,_, files in os.walk(EVAL_REQUESTS_PATH):
@@ -60,7 +83,7 @@ def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
         import json
         with open(model) as f:
             out = json.load(f)
-            if ('task' in out) and (out['task'] == task_type):
                 model_res.append(out)
     for model in model_res:
@@ -87,11 +110,13 @@ def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
         df_res = pd.DataFrame(columns=columns_to_show)
     #df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
-    print(df_res)
     ranks = df_res[list(name2short_name.values())].rank(ascending = ascending)
     df_res.rename(columns={'model': 'Model', 'author': 'Author', 'email': 'Email', 'paper_url': 'Paper URL', 'github_url': 'Github URL', 'submitted_time': 'Time'}, inplace=True)
     df_res['Average Rank⬆️'] = ranks.mean(axis=1)
     df_res.sort_values(by='Average Rank⬆️', ascending=True, inplace=True)
     return df_res
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:

     #df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df
 '''
+# Function to extract the numerical part before '+'
+def extract_x(value):
+    return float(value.split('+')[0])
+# Function to highlight the highest (or lowest) value based on X
+def make_bold(df, cols, ascending):
+    df_highlight = df.copy()
+    def apply_highlight(s):
+        if ascending:
+            max_idx = s.apply(extract_x).idxmin()
+        else:
+            max_idx = s.apply(extract_x).idxmax()
+        return ['font-weight: bold' if i == max_idx else '' for i in range(len(s))]
+    styler = df_highlight.style.apply(lambda x: apply_highlight(x) if x.name in cols else ['']*len(x), axis=0)
+    return styler
 def format_number(num):
     return f"{num:.3f}"
 def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
+    if task_type in ['Node Classification', 'Entity Classification']:
         ascending = False
         tasks = nc_tasks
+        task_type = ['Node Classification', 'Entity Classification']
+    elif task_type in ['Node Regression', 'Entity Regression']:
         ascending = True
         tasks = nr_tasks
+        task_type = ['Node Regression', 'Entity Regression']
+    elif task_type in ['Link Prediction', 'Recommendation']:
         ascending = False
         tasks = lp_tasks
+        task_type = ['Link Prediction', 'Recommendation']
     model_result_filepaths = []
     for root,_, files in os.walk(EVAL_REQUESTS_PATH):
         import json
         with open(model) as f:
             out = json.load(f)
+            if ('task' in out) and (out['task'] in task_type):
                 model_res.append(out)
     for model in model_res:
         df_res = pd.DataFrame(columns=columns_to_show)
     #df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
     ranks = df_res[list(name2short_name.values())].rank(ascending = ascending)
     df_res.rename(columns={'model': 'Model', 'author': 'Author', 'email': 'Email', 'paper_url': 'Paper URL', 'github_url': 'Github URL', 'submitted_time': 'Time'}, inplace=True)
     df_res['Average Rank⬆️'] = ranks.mean(axis=1)
     df_res.sort_values(by='Average Rank⬆️', ascending=True, inplace=True)
+    #df_res = make_bold(df_res, list(name2short_name.values()), ascending = ascending)
+    print(df_res)
     return df_res
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]: