Spaces:

agent-evals
/

leaderboard

Running

App Files Files Community

leaderboard / config.py

benediktstroebl

init v1

7c691e6 24 days ago

raw

history blame contribute delete

2.07 kB

	import pandas as pd

	TYPES = [
	"str",
	"number",
	"number"
	]

	SWEBENCH_ON_LOAD_COLUMNS = [
	"Agent Name",
	"Accuracy",
	"Total Cost",
	"Runs",
	]
	SWEBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
	SWEBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']

	USACO_ON_LOAD_COLUMNS = [
	"Agent Name",
	"Accuracy",
	"Total Cost",
	"Runs",
	]
	USACO_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
	USACO_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']

	COREBENCH_ON_LOAD_COLUMNS = [
	"Agent Name",
	"Accuracy",
	"Total Cost",
	"Runs",
	]
	COREBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
	COREBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score']



	MLAGENTBENCH_ON_LOAD_COLUMNS = [
	"Agent Name",
	"Overall Score",
	"Total Cost",
	]
	MLAGENTBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
	MLAGENTBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Accuracy']


	NUMERIC_INTERVALS = {
	"?": pd.Interval(-1, 0, closed="right"),
	"~1.5": pd.Interval(0, 2, closed="right"),
	"~3": pd.Interval(2, 4, closed="right"),
	"~7": pd.Interval(4, 9, closed="right"),
	"~13": pd.Interval(9, 20, closed="right"),
	"~35": pd.Interval(20, 45, closed="right"),
	"~60": pd.Interval(45, 70, closed="right"),
	"70+": pd.Interval(70, 10000, closed="right"),
	}