Spaces:

braindecode
/

OpenEEGBench

Running

App Files Files Community

OpenEEGBench / backend /scripts /push_eval_yaml.py

bruAristimunha

Add HF eval-results benchmark registry and API endpoint

6f5cc9b about 2 months ago

raw

history blame contribute delete

4.22 kB

	"""
	One-time script to push eval.yaml files to benchmark dataset repos on HuggingFace.

	This registers each EEG benchmark dataset with HF's decentralized eval system
	by creating the dataset repo (if needed) and uploading an eval.yaml file.

	Usage:
	python -m scripts.push_eval_yaml
	python -m scripts.push_eval_yaml --dry-run
	python -m scripts.push_eval_yaml --benchmark bcic2a
	"""

	import argparse
	import sys
	import logging

	import yaml

	from app.config.base import HF_TOKEN
	from app.config.benchmarks import EEG_BENCHMARKS, EVALUATION_FRAMEWORK
	from app.config.hf_config import API as hf_api
	from app.core.formatting import LogFormatter

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	def generate_eval_yaml(benchmark) -> str:
	"""Generate eval.yaml content for a benchmark dataset."""
	eval_config = {
	"name": f"{benchmark.display_name} {benchmark.category}",
	"description": benchmark.description,
	"evaluation_framework": EVALUATION_FRAMEWORK,
	"tasks": [
	{
	"id": benchmark.task_id,
	"config": benchmark.config,
	"split": benchmark.split,
	}
	],
	}
	return yaml.dump(eval_config, default_flow_style=False, sort_keys=False)


	def push_eval_yaml(
	benchmark_key: str = None,
	dry_run: bool = False,
	):
	"""Push eval.yaml to benchmark dataset repos.

	Args:
	benchmark_key: If specified, only push for this benchmark.
	dry_run: If True, print what would happen without pushing.
	"""
	benchmarks = EEG_BENCHMARKS
	if benchmark_key:
	if benchmark_key not in benchmarks:
	logger.error(f"Unknown benchmark key: {benchmark_key}")
	logger.info(f"Available keys: {list(benchmarks.keys())}")
	sys.exit(1)
	benchmarks = {benchmark_key: benchmarks[benchmark_key]}

	logger.info(LogFormatter.section("PUSHING EVAL.YAML TO BENCHMARK DATASETS"))

	for key, benchmark in benchmarks.items():
	dataset_id = benchmark.dataset_id
	logger.info(LogFormatter.subsection(f"Processing: {dataset_id}"))

	eval_yaml_content = generate_eval_yaml(benchmark)

	if dry_run:
	logger.info(f"[DRY RUN] Would create repo: {dataset_id} (type=dataset)")
	logger.info(f"[DRY RUN] Would upload eval.yaml:\n{eval_yaml_content}")
	continue

	# Create the dataset repo if it doesn't exist
	try:
	hf_api.create_repo(
	repo_id=dataset_id,
	repo_type="dataset",
	exist_ok=True,
	)
	logger.info(LogFormatter.success(f"Repo ready: {dataset_id}"))
	except Exception as e:
	logger.error(LogFormatter.error(f"Failed to create repo {dataset_id}", e))
	continue

	# Upload eval.yaml
	try:
	hf_api.upload_file(
	path_or_fileobj=eval_yaml_content.encode("utf-8"),
	path_in_repo="eval.yaml",
	repo_id=dataset_id,
	repo_type="dataset",
	commit_message="Add eval.yaml for EEG Finetune Arena benchmark registration",
	)
	logger.info(LogFormatter.success(f"Uploaded eval.yaml to {dataset_id}"))
	except Exception as e:
	logger.error(
	LogFormatter.error(f"Failed to upload eval.yaml to {dataset_id}", e)
	)

	logger.info(LogFormatter.section("DONE"))


	def main():
	parser = argparse.ArgumentParser(
	description="Push eval.yaml files to HF benchmark dataset repos."
	)
	parser.add_argument(
	"--dry-run",
	action="store_true",
	help="Preview without pushing to HF.",
	)
	parser.add_argument(
	"--benchmark",
	type=str,
	default=None,
	help="Only push for a specific benchmark key (e.g. bcic2a).",
	)
	args = parser.parse_args()

	if not HF_TOKEN and not args.dry_run:
	logger.error("HF_TOKEN environment variable is required (set it or use --dry-run)")
	sys.exit(1)

	push_eval_yaml(benchmark_key=args.benchmark, dry_run=args.dry_run)


	if __name__ == "__main__":
	main()