Spaces:
Running
Running
from dataclasses import dataclass | |
from enum import Enum | |
class Task: | |
benchmark: str | |
metric: str | |
col_name: str | |
# Select your tasks here | |
# --------------------------------------------------- | |
class Tasks(Enum): | |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard | |
task0 = Task("anli_r1", "acc", "ANLI") | |
task1 = Task("logiqa", "acc_norm", "LogiQA") | |
NUM_FEWSHOT = 0 # Change with your few shot | |
# --------------------------------------------------- | |
class nc_tasks(Enum): | |
task0 = Task("rel-amazon/user-churn", "auroc", "user-churn") | |
task1 = Task("rel-amazon/item-churn", "auroc", "item-churn") | |
task3 = Task("rel-avito/user-visits", "auroc", "user-visits") | |
task2 = Task("rel-avito/user-clicks", "auroc", "user-clicks") | |
task7 = Task("rel-f1/driver-dnf", "auroc", "driver-dnf") | |
task8 = Task("rel-f1/driver-top3", "auroc", "driver-top3") | |
task4 = Task("rel-hm/user-churn", "auroc", "hm-user-churn") | |
task6 = Task("rel-stack/user-engagement", "auroc", "user-engagement") | |
task5 = Task("rel-stack/user-badge", "auroc", "user-badge") | |
task9 = Task("rel-trial/study-outcome", "auroc", "study-outcome") | |
task10 = Task("rel-event/user-repeat", "auroc", "user-repeat") | |
task11 = Task("rel-event/user-ignore", "auroc", "user-ignore") | |
class nr_tasks(Enum): | |
task0 = Task("rel-amazon/user-ltv", "mae", "user-ltv") | |
task1 = Task("rel-amazon/item-ltv", "mae", "item-ltv") | |
task3 = Task("rel-avito/ad-ctr", "mae", "ad-ctr") | |
task4 = Task("rel-f1/driver-position", "mae", "driver-position") | |
task5 = Task("rel-hm/item-sales", "mae", "item-sales") | |
task6 = Task("rel-stack/post-votes", "mae", "post-votes") | |
task7 = Task("rel-trial/study-adverse", "mae", "study-adverse") | |
task8 = Task("rel-trial/site-success", "mae", "site-success") | |
task9 = Task("rel-event/user-attendance", "mae", "user-attendance") | |
class lp_tasks(Enum): | |
task0 = Task("rel-amazon/user-item-purchase", "map", "user-item-purchase") | |
task1 = Task("rel-amazon/user-item-rate", "map", "user-item-rate") | |
task2 = Task("rel-amazon/user-item-review", "map", "user-item-review") | |
task3 = Task("rel-avito/user-ad-visit", "map", "user-ad-visit") | |
task4 = Task("rel-hm/user-item-purchase", "map", "hm-user-item-purchase") | |
task5 = Task("rel-stack/user-post-comment", "map", "user-post-comment") | |
task6 = Task("rel-stack/post-post-related", "map", "post-post-related") | |
task7 = Task("rel-trial/condition-sponsor-run", "map", "condition-sponsor-run") | |
task8 = Task("rel-trial/site-sponsor-run", "map", "site-sponsor-run") | |
# Your leaderboard name | |
TITLE = """<p align="center"><img src="https://relbench.stanford.edu/img/logo.png" alt="logo" width="400px" /></p>""" | |
# What does your leaderboard evaluate? | |
INTRODUCTION_TEXT = """ | |
""" | |
# Which evaluations are you running? how can people reproduce what you have? | |
LLM_BENCHMARKS_TEXT = f""" | |
## Overview of RelBench | |
""" | |
EVALUATION_QUEUE_TEXT = """ | |
## Instruction to submit your model | |
Once you have developed your model and got results, you can submit your test results to our leaderboards. For each dataset, we require you to submit the following information. | |
- **Your name**: Primary contact's name | |
- **Your email**: Primary contact's email | |
- **RelBench version**: The RelBench version used to conduct the experiments. | |
- **Model name**: The name of the method. This is an unique identifier of the model. Please be distinct with any existing model names. It will be overriden if the same model name is submitted. | |
- **Is it an official submission**: Whether the implementation is official (implementation by authors who proposed the method) or unofficial (re-implementation of the method by non-authors). | |
- **Paper URL Link**: The original paper describing the method (arXiv link is recommended. paper needs not be peer-reviewed). If your method has any original component (e.g., even just combining existing methods XXX and YYY), you have to write a technical report describing it (e.g., how you exactly combined XXX and YYY). | |
- **GitHub URL Link**: The Github repository or directory containining all code to reproduce the result. A placeholder repository is not allowed. | |
- **Task Track**: Choose the task you submit to, from entity classification, entity regression, and recommendation. | |
- **Honor code**: Please acknowledge that your submission adheres to all the ethical policies and your result is reproducible. | |
- **Test performance**: Raw test performance output by RelBench model evaluators, where average and unbiased standard deviation must be taken over 5 different random seeds. You can either not fix random seeds at all, or use the random seeds from 0 to 4. We highly discourage you to tune the random seeds. | |
- **Validation performance**: Validation performance of the model that is used to report the test performance above. | |
""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r""" | |
@article{relbench, | |
title={Relational Deep Learning: Graph Representation Learning on Relational Tables}, | |
author={Matthias Fey, Weihua Hu, Kexin Huang, Jan Eric Lenssen, Rishabh Ranjan, Joshua Robinson, Rex Ying, Jiaxuan You, Jure Leskovec}, | |
year={2023} | |
} | |
""" | |