File size: 13,621 Bytes
254a3c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# coding=utf-8
# Copyright 2023-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains command to upload a repo or file with the CLI.

Usage:
    # Upload file (implicit)
    huggingface-cli upload my-cool-model ./my-cool-model.safetensors

    # Upload file (explicit)
    huggingface-cli upload my-cool-model ./my-cool-model.safetensors  model.safetensors

    # Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
    huggingface-cli upload my-cool-model

    # Upload directory (explicit)
    huggingface-cli upload my-cool-model ./models/my-cool-model .

    # Upload filtered directory (example: tensorboard logs except for the last run)
    huggingface-cli upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"

    # Upload private dataset
    huggingface-cli upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private

    # Upload with token
    huggingface-cli upload Wauplin/my-cool-model --token=hf_****

    # Sync local Space with Hub (upload new files, delete removed files)
    huggingface-cli upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"

    # Schedule commits every 30 minutes
    huggingface-cli upload Wauplin/my-cool-model --every=30
"""
import os
import time
import warnings
from argparse import Namespace, _SubParsersAction
from typing import List, Optional

from huggingface_hub import logging
from huggingface_hub._commit_scheduler import CommitScheduler
from huggingface_hub.commands import BaseHuggingfaceCLICommand
from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
from huggingface_hub.hf_api import HfApi
from huggingface_hub.utils import RevisionNotFoundError, disable_progress_bars, enable_progress_bars


logger = logging.get_logger(__name__)


class UploadCommand(BaseHuggingfaceCLICommand):
    @staticmethod
    def register_subcommand(parser: _SubParsersAction):
        upload_parser = parser.add_parser("upload", help="Upload a file or a folder to a repo on the Hub")
        upload_parser.add_argument(
            "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
        )
        upload_parser.add_argument(
            "local_path", nargs="?", help="Local path to the file or folder to upload. Defaults to current directory."
        )
        upload_parser.add_argument(
            "path_in_repo",
            nargs="?",
            help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
        )
        upload_parser.add_argument(
            "--repo-type",
            choices=["model", "dataset", "space"],
            default="model",
            help="Type of the repo to upload to (e.g. `dataset`).",
        )
        upload_parser.add_argument(
            "--revision",
            type=str,
            help=(
                "An optional Git revision to push to. It can be a branch name or a PR reference. If revision does not"
                " exist and `--create-pr` is not set, a branch will be automatically created."
            ),
        )
        upload_parser.add_argument(
            "--private",
            action="store_true",
            help=(
                "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already"
                " exists."
            ),
        )
        upload_parser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
        upload_parser.add_argument(
            "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload."
        )
        upload_parser.add_argument(
            "--delete",
            nargs="*",
            type=str,
            help="Glob patterns for file to be deleted from the repo while committing.",
        )
        upload_parser.add_argument(
            "--commit-message", type=str, help="The summary / title / first line of the generated commit."
        )
        upload_parser.add_argument("--commit-description", type=str, help="The description of the generated commit.")
        upload_parser.add_argument(
            "--create-pr", action="store_true", help="Whether to upload content as a new Pull Request."
        )
        upload_parser.add_argument(
            "--every",
            type=float,
            help="If set, a background job is scheduled to create commits every `every` minutes.",
        )
        upload_parser.add_argument(
            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
        )
        upload_parser.add_argument(
            "--quiet",
            action="store_true",
            help="If True, progress bars are disabled and only the path to the uploaded files is printed.",
        )
        upload_parser.set_defaults(func=UploadCommand)

    def __init__(self, args: Namespace) -> None:
        self.repo_id: str = args.repo_id
        self.repo_type: Optional[str] = args.repo_type
        self.revision: Optional[str] = args.revision
        self.private: bool = args.private

        self.include: Optional[List[str]] = args.include
        self.exclude: Optional[List[str]] = args.exclude
        self.delete: Optional[List[str]] = args.delete

        self.commit_message: Optional[str] = args.commit_message
        self.commit_description: Optional[str] = args.commit_description
        self.create_pr: bool = args.create_pr
        self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
        self.quiet: bool = args.quiet  # disable warnings and progress bars

        # Check `--every` is valid
        if args.every is not None and args.every <= 0:
            raise ValueError(f"`every` must be a positive value (got '{args.every}')")
        self.every: Optional[float] = args.every

        # Resolve `local_path` and `path_in_repo`
        repo_name: str = args.repo_id.split("/")[-1]  # e.g. "Wauplin/my-cool-model" => "my-cool-model"
        self.local_path: str
        self.path_in_repo: str
        if args.local_path is None and os.path.isfile(repo_name):
            # Implicit case 1: user provided only a repo_id which happen to be a local file as well => upload it with same name
            self.local_path = repo_name
            self.path_in_repo = repo_name
        elif args.local_path is None and os.path.isdir(repo_name):
            # Implicit case 2: user provided only a repo_id which happen to be a local folder as well => upload it at root
            self.local_path = repo_name
            self.path_in_repo = "."
        elif args.local_path is None:
            # Implicit case 3: user provided only a repo_id that does not match a local file or folder
            # => the user must explicitly provide a local_path => raise exception
            raise ValueError(f"'{repo_name}' is not a local file or folder. Please set `local_path` explicitly.")
        elif args.path_in_repo is None and os.path.isfile(args.local_path):
            # Explicit local path to file, no path in repo => upload it at root with same name
            self.local_path = args.local_path
            self.path_in_repo = os.path.basename(args.local_path)
        elif args.path_in_repo is None:
            # Explicit local path to folder, no path in repo => upload at root
            self.local_path = args.local_path
            self.path_in_repo = "."
        else:
            # Finally, if both paths are explicit
            self.local_path = args.local_path
            self.path_in_repo = args.path_in_repo

    def run(self) -> None:
        if self.quiet:
            disable_progress_bars()
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                print(self._upload())
            enable_progress_bars()
        else:
            logging.set_verbosity_info()
            print(self._upload())
            logging.set_verbosity_warning()

    def _upload(self) -> str:
        if os.path.isfile(self.local_path):
            if self.include is not None and len(self.include) > 0:
                warnings.warn("Ignoring `--include` since a single file is uploaded.")
            if self.exclude is not None and len(self.exclude) > 0:
                warnings.warn("Ignoring `--exclude` since a single file is uploaded.")
            if self.delete is not None and len(self.delete) > 0:
                warnings.warn("Ignoring `--delete` since a single file is uploaded.")

        if not HF_HUB_ENABLE_HF_TRANSFER:
            logger.info(
                "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
                " https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."
            )

        # Schedule commits if `every` is set
        if self.every is not None:
            if os.path.isfile(self.local_path):
                # If file => watch entire folder + use allow_patterns
                folder_path = os.path.dirname(self.local_path)
                path_in_repo = (
                    self.path_in_repo[: -len(self.local_path)]  # remove filename from path_in_repo
                    if self.path_in_repo.endswith(self.local_path)
                    else self.path_in_repo
                )
                allow_patterns = [self.local_path]
                ignore_patterns = []
            else:
                folder_path = self.local_path
                path_in_repo = self.path_in_repo
                allow_patterns = self.include or []
                ignore_patterns = self.exclude or []
                if self.delete is not None and len(self.delete) > 0:
                    warnings.warn("Ignoring `--delete` when uploading with scheduled commits.")

            scheduler = CommitScheduler(
                folder_path=folder_path,
                repo_id=self.repo_id,
                repo_type=self.repo_type,
                revision=self.revision,
                allow_patterns=allow_patterns,
                ignore_patterns=ignore_patterns,
                path_in_repo=path_in_repo,
                private=self.private,
                every=self.every,
                hf_api=self.api,
            )
            print(f"Scheduling commits every {self.every} minutes to {scheduler.repo_id}.")
            try:  # Block main thread until KeyboardInterrupt
                while True:
                    time.sleep(100)
            except KeyboardInterrupt:
                scheduler.stop()
                return "Stopped scheduled commits."

        # Otherwise, create repo and proceed with the upload
        if not os.path.isfile(self.local_path) and not os.path.isdir(self.local_path):
            raise FileNotFoundError(f"No such file or directory: '{self.local_path}'.")
        repo_id = self.api.create_repo(
            repo_id=self.repo_id,
            repo_type=self.repo_type,
            exist_ok=True,
            private=self.private,
            space_sdk="gradio" if self.repo_type == "space" else None,
            # ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
            # ^ I'd rather not add CLI args to set it explicitly as we already have `huggingface-cli repo create` for that.
        ).repo_id

        # Check if branch already exists and if not, create it
        if self.revision is not None and not self.create_pr:
            try:
                self.api.repo_info(repo_id=repo_id, repo_type=self.repo_type, revision=self.revision)
            except RevisionNotFoundError:
                logger.info(f"Branch '{self.revision}' not found. Creating it...")
                self.api.create_branch(repo_id=repo_id, repo_type=self.repo_type, branch=self.revision, exist_ok=True)
                # ^ `exist_ok=True` to avoid race concurrency issues

        # File-based upload
        if os.path.isfile(self.local_path):
            return self.api.upload_file(
                path_or_fileobj=self.local_path,
                path_in_repo=self.path_in_repo,
                repo_id=repo_id,
                repo_type=self.repo_type,
                revision=self.revision,
                commit_message=self.commit_message,
                commit_description=self.commit_description,
                create_pr=self.create_pr,
            )

        # Folder-based upload
        else:
            return self.api.upload_folder(
                folder_path=self.local_path,
                path_in_repo=self.path_in_repo,
                repo_id=repo_id,
                repo_type=self.repo_type,
                revision=self.revision,
                commit_message=self.commit_message,
                commit_description=self.commit_description,
                create_pr=self.create_pr,
                allow_patterns=self.include,
                ignore_patterns=self.exclude,
                delete_patterns=self.delete,
            )