leaderboard / config.py
benediktstroebl's picture
init v1
7c691e6
import pandas as pd
TYPES = [
"str",
"number",
"number"
]
SWEBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
SWEBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
SWEBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']
USACO_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
USACO_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
USACO_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']
COREBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
COREBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
COREBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']
MLAGENTBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Overall Score",
"Total Cost",
]
MLAGENTBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
MLAGENTBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Accuracy']
NUMERIC_INTERVALS = {
"?": pd.Interval(-1, 0, closed="right"),
"~1.5": pd.Interval(0, 2, closed="right"),
"~3": pd.Interval(2, 4, closed="right"),
"~7": pd.Interval(4, 9, closed="right"),
"~13": pd.Interval(9, 20, closed="right"),
"~35": pd.Interval(20, 45, closed="right"),
"~60": pd.Interval(45, 70, closed="right"),
"70+": pd.Interval(70, 10000, closed="right"),
}