File size: 3,896 Bytes
a33a001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""Hugging Face Hub integration utilities."""

import re
import os
from typing import Optional
from huggingface_hub import HfApi, create_repo
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN")

def is_valid_repo_name(repo_name: str) -> bool:
    """Check if a repository name is valid for Hugging Face Hub.
    
    Args:
        repo_name: The repository name to validate
        
    Returns:
        True if the name is valid, False otherwise
    """
    return bool(re.match(r'^[A-Za-z0-9_./-]+$', repo_name))

def create_hf_repo(
    repo_name: str,
    repo_type: str = "dataset",
    private: bool = False
) -> str:
    """Create a new repository on Hugging Face Hub.
    
    Args:
        repo_name: Name of the repository to create
        repo_type: Type of repository (dataset, model, or space)
        private: Whether the repository should be private
        
    Returns:
        The repository ID
        
    Raises:
        Exception: If the repository name is invalid or creation fails
    """
    if not is_valid_repo_name(repo_name):
        raise Exception(
            "Invalid repo name: must not contain slashes, spaces, or special "
            "characters except '-', '_', '.'"
        )
    
    try:
        api = HfApi(token=HF_TOKEN)
        create_repo(
            repo_id=repo_name,
            repo_type=repo_type,
            private=private,
            exist_ok=True,
            token=HF_TOKEN
        )
        return repo_name
    except Exception as e:
        raise Exception(f"Error creating repository: {str(e)}")

def upload_to_hf(
    file_path: str,
    repo_name: str,
    repo_type: str = "dataset",
    private: bool = False
) -> str:
    """Upload a file to Hugging Face Hub.
    
    Args:
        file_path: Path to the file to upload
        repo_name: Name of the repository to upload to
        repo_type: Type of repository
        private: Whether the repository should be private
        
    Returns:
        The repository ID
        
    Raises:
        Exception: If the upload fails
    """
    try:
        # Create or get repository
        repo_id = create_hf_repo(repo_name, repo_type, private)
        
        # Upload file
        api = HfApi(token=HF_TOKEN)
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=os.path.basename(file_path),
            repo_id=repo_id,
            repo_type=repo_type,
            token=HF_TOKEN
        )
        return repo_id
    except Exception as e:
        raise Exception(f"Error uploading to Hugging Face Hub: {str(e)}")

def download_from_hf(
    repo_name: str,
    file_name: str,
    local_path: Optional[str] = None
) -> str:
    """Download a file from Hugging Face Hub.
    
    Args:
        repo_name: Name of the repository to download from
        file_name: Name of the file to download
        local_path: Optional local path to save the file to
        
    Returns:
        Path to the downloaded file
        
    Raises:
        Exception: If the download fails
    """
    try:
        import requests
        
        # Construct the raw URL for the file
        raw_url = f"https://huggingface.co/datasets/{repo_name}/raw/main/{file_name}"
        
        # Download the file
        response = requests.get(raw_url)
        if response.status_code != 200:
            raise Exception(f"Failed to download file: {response.status_code}")
        
        # Save the file
        if local_path is None:
            local_path = os.path.join("data", file_name)
        
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        with open(local_path, "wb") as f:
            f.write(response.content)
        
        return local_path
    except Exception as e:
        raise Exception(f"Error downloading from Hugging Face Hub: {str(e)}")