Spaces:

OpenEvals
/

every-leaderboards

Running

App Files Files Community

every-leaderboards / scripts /create_dataset.py

Linker1907

Fetch leaderboard data from private HuggingFace dataset

155702e 5 days ago

raw

history blame contribute delete

2.97 kB

	#!/usr/bin/env python3
	"""
	Create a private HuggingFace dataset and upload leaderboard data.
	"""

	import os
	import sys
	from huggingface_hub import HfApi, create_repo


	def main():
	# Get token from environment
	token = os.environ.get("HF_TOKEN")
	if not token:
	print("❌ Error: HF_TOKEN environment variable not set")
	print("Please set it with: export HF_TOKEN=your_token_here")
	sys.exit(1)

	# Initialize API
	api = HfApi(token=token)

	# Repository details
	repo_id = "OpenEvals/leaderboard-data"
	repo_type = "dataset"

	try:
	# Create the repository (private)
	print(f"📦 Creating private dataset: {repo_id}")
	repo_url = create_repo(
	repo_id=repo_id,
	token=token,
	repo_type=repo_type,
	private=True,
	exist_ok=True,
	)
	print(f"✅ Repository created/exists: {repo_url}")

	# Upload the JSON file
	print(f"📤 Uploading leaderboard.json...")
	api.upload_file(
	path_or_fileobj="data/leaderboard.json",
	path_in_repo="leaderboard.json",
	repo_id=repo_id,
	repo_type=repo_type,
	token=token,
	commit_message="Update leaderboard data",
	)
	print(f"✅ File uploaded successfully!")

	# Create README
	readme_content = """---
	license: mit
	private: true
	---

	# Leaderboard Data

	This is a private dataset containing benchmark leaderboard data.

	## Files

	- `leaderboard.json` - Main leaderboard data with model scores across 12 benchmarks

	## Structure

	The JSON file contains:
	- `metadata`: Version, last updated timestamp, counts
	- `benchmarks`: Benchmark definitions and metadata
	- `models`: Array of model entries with scores

	## Usage

	This dataset is private and requires authentication to access.

	```python
	from huggingface_hub import hf_hub_download

	file = hf_hub_download(
	repo_id="OpenEvals/leaderboard-data",
	filename="leaderboard.json",
	repo_type="dataset",
	token="your_token"
	)
	```

	Or fetch directly via URL (requires auth token):
	```
	https://huggingface.co/datasets/OpenEvals/leaderboard-data/resolve/main/leaderboard.json
	```
	"""

	print(f"📝 Creating README.md...")
	api.upload_file(
	path_or_fileobj=readme_content.encode(),
	path_in_repo="README.md",
	repo_id=repo_id,
	repo_type=repo_type,
	token=token,
	commit_message="Add README",
	)
	print(f"✅ README created!")

	print(f"\n🎉 Success! Dataset is ready at:")
	print(f" https://huggingface.co/datasets/{repo_id}")
	print(f"\n📋 Data URL (requires auth):")
	print(
	f" https://huggingface.co/datasets/{repo_id}/resolve/main/leaderboard.json"
	)

	except Exception as e:
	print(f"❌ Error: {e}")
	sys.exit(1)


	if __name__ == "__main__":
	main()