Spaces:
Running
Running
File size: 3,896 Bytes
a33a001 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
"""Hugging Face Hub integration utilities."""
import re
import os
from typing import Optional
from huggingface_hub import HfApi, create_repo
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN")
def is_valid_repo_name(repo_name: str) -> bool:
"""Check if a repository name is valid for Hugging Face Hub.
Args:
repo_name: The repository name to validate
Returns:
True if the name is valid, False otherwise
"""
return bool(re.match(r'^[A-Za-z0-9_./-]+$', repo_name))
def create_hf_repo(
repo_name: str,
repo_type: str = "dataset",
private: bool = False
) -> str:
"""Create a new repository on Hugging Face Hub.
Args:
repo_name: Name of the repository to create
repo_type: Type of repository (dataset, model, or space)
private: Whether the repository should be private
Returns:
The repository ID
Raises:
Exception: If the repository name is invalid or creation fails
"""
if not is_valid_repo_name(repo_name):
raise Exception(
"Invalid repo name: must not contain slashes, spaces, or special "
"characters except '-', '_', '.'"
)
try:
api = HfApi(token=HF_TOKEN)
create_repo(
repo_id=repo_name,
repo_type=repo_type,
private=private,
exist_ok=True,
token=HF_TOKEN
)
return repo_name
except Exception as e:
raise Exception(f"Error creating repository: {str(e)}")
def upload_to_hf(
file_path: str,
repo_name: str,
repo_type: str = "dataset",
private: bool = False
) -> str:
"""Upload a file to Hugging Face Hub.
Args:
file_path: Path to the file to upload
repo_name: Name of the repository to upload to
repo_type: Type of repository
private: Whether the repository should be private
Returns:
The repository ID
Raises:
Exception: If the upload fails
"""
try:
# Create or get repository
repo_id = create_hf_repo(repo_name, repo_type, private)
# Upload file
api = HfApi(token=HF_TOKEN)
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.basename(file_path),
repo_id=repo_id,
repo_type=repo_type,
token=HF_TOKEN
)
return repo_id
except Exception as e:
raise Exception(f"Error uploading to Hugging Face Hub: {str(e)}")
def download_from_hf(
repo_name: str,
file_name: str,
local_path: Optional[str] = None
) -> str:
"""Download a file from Hugging Face Hub.
Args:
repo_name: Name of the repository to download from
file_name: Name of the file to download
local_path: Optional local path to save the file to
Returns:
Path to the downloaded file
Raises:
Exception: If the download fails
"""
try:
import requests
# Construct the raw URL for the file
raw_url = f"https://huggingface.co/datasets/{repo_name}/raw/main/{file_name}"
# Download the file
response = requests.get(raw_url)
if response.status_code != 200:
raise Exception(f"Failed to download file: {response.status_code}")
# Save the file
if local_path is None:
local_path = os.path.join("data", file_name)
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, "wb") as f:
f.write(response.content)
return local_path
except Exception as e:
raise Exception(f"Error downloading from Hugging Face Hub: {str(e)}") |