File size: 996 Bytes
5ccdf55 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from huggingface_hub import snapshot_download
import os
def download_dataset(local_dir="./downloaded_data", token=None):
"""
Download the arxiv-cs-paper-metadata-embedding dataset from Hugging Face
Args:
local_dir (str): Local directory to save the downloaded dataset
token (str): Hugging Face token (optional, will use HF_TOKEN env var if not provided)
Returns:
str: Path to the downloaded dataset
"""
if token is None:
token = os.getenv("HF_TOKEN")
# Create local directory if it doesn't exist
os.makedirs(local_dir, exist_ok=True)
# Download the dataset
downloaded_path = snapshot_download(
repo_id="Just-Curieous/arxiv-cs-paper-metadata-embedding",
repo_type="dataset",
local_dir=local_dir,
token=token
)
print(f"Dataset downloaded to: {downloaded_path}")
return downloaded_path
if __name__ == "__main__":
# Example usage
download_dataset()
|