|
""" |
|
Type definitions and utilities for the `create_commit` API |
|
""" |
|
import base64 |
|
import io |
|
import os |
|
import warnings |
|
from collections import defaultdict |
|
from contextlib import contextmanager |
|
from dataclasses import dataclass, field |
|
from itertools import groupby |
|
from pathlib import Path, PurePosixPath |
|
from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union |
|
|
|
from tqdm.contrib.concurrent import thread_map |
|
|
|
from huggingface_hub import get_session |
|
|
|
from .constants import ENDPOINT, HF_HUB_ENABLE_HF_TRANSFER |
|
from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info |
|
from .utils import ( |
|
EntryNotFoundError, |
|
build_hf_headers, |
|
chunk_iterable, |
|
hf_raise_for_status, |
|
logging, |
|
tqdm_stream_file, |
|
validate_hf_hub_args, |
|
) |
|
from .utils import tqdm as hf_tqdm |
|
|
|
|
|
if TYPE_CHECKING: |
|
from .hf_api import RepoFile |
|
|
|
|
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
UploadMode = Literal["lfs", "regular"] |
|
|
|
|
|
|
|
|
|
|
|
FETCH_LFS_BATCH_SIZE = 500 |
|
|
|
|
|
@dataclass |
|
class CommitOperationDelete: |
|
""" |
|
Data structure holding necessary info to delete a file or a folder from a repository |
|
on the Hub. |
|
|
|
Args: |
|
path_in_repo (`str`): |
|
Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"` |
|
for a file or `"checkpoints/1fec34a/"` for a folder. |
|
is_folder (`bool` or `Literal["auto"]`, *optional*) |
|
Whether the Delete Operation applies to a folder or not. If "auto", the path |
|
type (file or folder) is guessed automatically by looking if path ends with |
|
a "/" (folder) or not (file). To explicitly set the path type, you can set |
|
`is_folder=True` or `is_folder=False`. |
|
""" |
|
|
|
path_in_repo: str |
|
is_folder: Union[bool, Literal["auto"]] = "auto" |
|
|
|
def __post_init__(self): |
|
self.path_in_repo = _validate_path_in_repo(self.path_in_repo) |
|
|
|
if self.is_folder == "auto": |
|
self.is_folder = self.path_in_repo.endswith("/") |
|
if not isinstance(self.is_folder, bool): |
|
raise ValueError( |
|
f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'." |
|
) |
|
|
|
|
|
@dataclass |
|
class CommitOperationCopy: |
|
""" |
|
Data structure holding necessary info to copy a file in a repository on the Hub. |
|
|
|
Limitations: |
|
- Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it |
|
- Cross-repository copies are not supported. |
|
|
|
Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub. |
|
|
|
Args: |
|
src_path_in_repo (`str`): |
|
Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`. |
|
path_in_repo (`str`): |
|
Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`. |
|
src_revision (`str`, *optional*): |
|
The git revision of the file to be copied. Can be any valid git revision. |
|
Default to the target commit revision. |
|
""" |
|
|
|
src_path_in_repo: str |
|
path_in_repo: str |
|
src_revision: Optional[str] = None |
|
|
|
def __post_init__(self): |
|
self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo) |
|
self.path_in_repo = _validate_path_in_repo(self.path_in_repo) |
|
|
|
|
|
@dataclass |
|
class CommitOperationAdd: |
|
""" |
|
Data structure holding necessary info to upload a file to a repository on the Hub. |
|
|
|
Args: |
|
path_in_repo (`str`): |
|
Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"` |
|
path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`): |
|
Either: |
|
- a path to a local file (as `str` or `pathlib.Path`) to upload |
|
- a buffer of bytes (`bytes`) holding the content of the file to upload |
|
- a "file object" (subclass of `io.BufferedIOBase`), typically obtained |
|
with `open(path, "rb")`. It must support `seek()` and `tell()` methods. |
|
|
|
Raises: |
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`. |
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file. |
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both |
|
`seek()` and `tell()`. |
|
""" |
|
|
|
path_in_repo: str |
|
path_or_fileobj: Union[str, Path, bytes, BinaryIO] |
|
upload_info: UploadInfo = field(init=False, repr=False) |
|
|
|
|
|
|
|
|
|
_upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None) |
|
|
|
|
|
|
|
_should_ignore: Optional[bool] = field(init=False, repr=False, default=None) |
|
|
|
|
|
_is_uploaded: bool = field(init=False, repr=False, default=False) |
|
|
|
|
|
_is_committed: bool = field(init=False, repr=False, default=False) |
|
|
|
def __post_init__(self) -> None: |
|
"""Validates `path_or_fileobj` and compute `upload_info`.""" |
|
self.path_in_repo = _validate_path_in_repo(self.path_in_repo) |
|
|
|
|
|
if isinstance(self.path_or_fileobj, Path): |
|
self.path_or_fileobj = str(self.path_or_fileobj) |
|
if isinstance(self.path_or_fileobj, str): |
|
path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj)) |
|
if not os.path.isfile(path_or_fileobj): |
|
raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system") |
|
elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)): |
|
|
|
raise ValueError( |
|
"path_or_fileobj must be either an instance of str, bytes or" |
|
" io.BufferedIOBase. If you passed a file-like object, make sure it is" |
|
" in binary mode." |
|
) |
|
if isinstance(self.path_or_fileobj, io.BufferedIOBase): |
|
try: |
|
self.path_or_fileobj.tell() |
|
self.path_or_fileobj.seek(0, os.SEEK_CUR) |
|
except (OSError, AttributeError) as exc: |
|
raise ValueError( |
|
"path_or_fileobj is a file-like object but does not implement seek() and tell()" |
|
) from exc |
|
|
|
|
|
if isinstance(self.path_or_fileobj, str): |
|
self.upload_info = UploadInfo.from_path(self.path_or_fileobj) |
|
elif isinstance(self.path_or_fileobj, bytes): |
|
self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj) |
|
else: |
|
self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj) |
|
|
|
@contextmanager |
|
def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]: |
|
""" |
|
A context manager that yields a file-like object allowing to read the underlying |
|
data behind `path_or_fileobj`. |
|
|
|
Args: |
|
with_tqdm (`bool`, *optional*, defaults to `False`): |
|
If True, iterating over the file object will display a progress bar. Only |
|
works if the file-like object is a path to a file. Pure bytes and buffers |
|
are not supported. |
|
|
|
Example: |
|
|
|
```python |
|
>>> operation = CommitOperationAdd( |
|
... path_in_repo="remote/dir/weights.h5", |
|
... path_or_fileobj="./local/weights.h5", |
|
... ) |
|
CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5') |
|
|
|
>>> with operation.as_file() as file: |
|
... content = file.read() |
|
|
|
>>> with operation.as_file(with_tqdm=True) as file: |
|
... while True: |
|
... data = file.read(1024) |
|
... if not data: |
|
... break |
|
config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s] |
|
|
|
>>> with operation.as_file(with_tqdm=True) as file: |
|
... requests.put(..., data=file) |
|
config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s] |
|
``` |
|
""" |
|
if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path): |
|
if with_tqdm: |
|
with tqdm_stream_file(self.path_or_fileobj) as file: |
|
yield file |
|
else: |
|
with open(self.path_or_fileobj, "rb") as file: |
|
yield file |
|
elif isinstance(self.path_or_fileobj, bytes): |
|
yield io.BytesIO(self.path_or_fileobj) |
|
elif isinstance(self.path_or_fileobj, io.BufferedIOBase): |
|
prev_pos = self.path_or_fileobj.tell() |
|
yield self.path_or_fileobj |
|
self.path_or_fileobj.seek(prev_pos, io.SEEK_SET) |
|
|
|
def b64content(self) -> bytes: |
|
""" |
|
The base64-encoded content of `path_or_fileobj` |
|
|
|
Returns: `bytes` |
|
""" |
|
with self.as_file() as file: |
|
return base64.b64encode(file.read()) |
|
|
|
|
|
def _validate_path_in_repo(path_in_repo: str) -> str: |
|
|
|
if path_in_repo.startswith("/"): |
|
path_in_repo = path_in_repo[1:] |
|
if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"): |
|
raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'") |
|
if path_in_repo.startswith("./"): |
|
path_in_repo = path_in_repo[2:] |
|
if any(part == ".git" for part in path_in_repo.split("/")): |
|
raise ValueError( |
|
"Invalid `path_in_repo` in CommitOperation: cannot update files under a '.git/' folder (path:" |
|
f" '{path_in_repo}')." |
|
) |
|
return path_in_repo |
|
|
|
|
|
CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete] |
|
|
|
|
|
def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None: |
|
""" |
|
Warn user when a list of operations is expected to overwrite itself in a single |
|
commit. |
|
|
|
Rules: |
|
- If a filepath is updated by multiple `CommitOperationAdd` operations, a warning |
|
message is triggered. |
|
- If a filepath is updated at least once by a `CommitOperationAdd` and then deleted |
|
by a `CommitOperationDelete`, a warning is triggered. |
|
- If a `CommitOperationDelete` deletes a filepath that is then updated by a |
|
`CommitOperationAdd`, no warning is triggered. This is usually useless (no need to |
|
delete before upload) but can happen if a user deletes an entire folder and then |
|
add new files to it. |
|
""" |
|
nb_additions_per_path: Dict[str, int] = defaultdict(int) |
|
for operation in operations: |
|
path_in_repo = operation.path_in_repo |
|
if isinstance(operation, CommitOperationAdd): |
|
if nb_additions_per_path[path_in_repo] > 0: |
|
warnings.warn( |
|
"About to update multiple times the same file in the same commit:" |
|
f" '{path_in_repo}'. This can cause undesired inconsistencies in" |
|
" your repo." |
|
) |
|
nb_additions_per_path[path_in_repo] += 1 |
|
for parent in PurePosixPath(path_in_repo).parents: |
|
|
|
|
|
nb_additions_per_path[str(parent)] += 1 |
|
if isinstance(operation, CommitOperationDelete): |
|
if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0: |
|
if operation.is_folder: |
|
warnings.warn( |
|
"About to delete a folder containing files that have just been" |
|
f" updated within the same commit: '{path_in_repo}'. This can" |
|
" cause undesired inconsistencies in your repo." |
|
) |
|
else: |
|
warnings.warn( |
|
"About to delete a file that have just been updated within the" |
|
f" same commit: '{path_in_repo}'. This can cause undesired" |
|
" inconsistencies in your repo." |
|
) |
|
|
|
|
|
@validate_hf_hub_args |
|
def _upload_lfs_files( |
|
*, |
|
additions: List[CommitOperationAdd], |
|
repo_type: str, |
|
repo_id: str, |
|
token: Optional[str], |
|
endpoint: Optional[str] = None, |
|
num_threads: int = 5, |
|
revision: Optional[str] = None, |
|
): |
|
""" |
|
Uploads the content of `additions` to the Hub using the large file storage protocol. |
|
|
|
Relevant external documentation: |
|
- LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md |
|
|
|
Args: |
|
additions (`List` of `CommitOperationAdd`): |
|
The files to be uploaded |
|
repo_type (`str`): |
|
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. |
|
repo_id (`str`): |
|
A namespace (user or an organization) and a repo name separated |
|
by a `/`. |
|
token (`str`, *optional*): |
|
An authentication token ( See https://huggingface.co/settings/tokens ) |
|
num_threads (`int`, *optional*): |
|
The number of concurrent threads to use when uploading. Defaults to 5. |
|
revision (`str`, *optional*): |
|
The git revision to upload to. |
|
|
|
Raises: `RuntimeError` if an upload failed for any reason |
|
|
|
Raises: `ValueError` if the server returns malformed responses |
|
|
|
Raises: `requests.HTTPError` if the LFS batch endpoint returned an HTTP |
|
error |
|
|
|
""" |
|
|
|
|
|
|
|
batch_actions: List[Dict] = [] |
|
for chunk in chunk_iterable(additions, chunk_size=256): |
|
batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info( |
|
upload_infos=[op.upload_info for op in chunk], |
|
token=token, |
|
repo_id=repo_id, |
|
repo_type=repo_type, |
|
revision=revision, |
|
endpoint=endpoint, |
|
) |
|
|
|
|
|
if batch_errors_chunk: |
|
message = "\n".join( |
|
[ |
|
f'Encountered error for file with OID {err.get("oid")}: `{err.get("error", {}).get("message")}' |
|
for err in batch_errors_chunk |
|
] |
|
) |
|
raise ValueError(f"LFS batch endpoint returned errors:\n{message}") |
|
|
|
batch_actions += batch_actions_chunk |
|
oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions} |
|
|
|
|
|
filtered_actions = [] |
|
for action in batch_actions: |
|
if action.get("actions") is None: |
|
logger.debug( |
|
f"Content of file {oid2addop[action['oid']].path_in_repo} is already" |
|
" present upstream - skipping upload." |
|
) |
|
else: |
|
filtered_actions.append(action) |
|
|
|
if len(filtered_actions) == 0: |
|
logger.debug("No LFS files to upload.") |
|
return |
|
|
|
|
|
def _wrapped_lfs_upload(batch_action) -> None: |
|
try: |
|
operation = oid2addop[batch_action["oid"]] |
|
lfs_upload(operation=operation, lfs_batch_action=batch_action, token=token) |
|
except Exception as exc: |
|
raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc |
|
|
|
if HF_HUB_ENABLE_HF_TRANSFER: |
|
logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.") |
|
for action in hf_tqdm(filtered_actions): |
|
_wrapped_lfs_upload(action) |
|
elif len(filtered_actions) == 1: |
|
logger.debug("Uploading 1 LFS file to the Hub") |
|
_wrapped_lfs_upload(filtered_actions[0]) |
|
else: |
|
logger.debug( |
|
f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently" |
|
) |
|
thread_map( |
|
_wrapped_lfs_upload, |
|
filtered_actions, |
|
desc=f"Upload {len(filtered_actions)} LFS files", |
|
max_workers=num_threads, |
|
tqdm_class=hf_tqdm, |
|
) |
|
|
|
|
|
def _validate_preupload_info(preupload_info: dict): |
|
files = preupload_info.get("files") |
|
if not isinstance(files, list): |
|
raise ValueError("preupload_info is improperly formatted") |
|
for file_info in files: |
|
if not ( |
|
isinstance(file_info, dict) |
|
and isinstance(file_info.get("path"), str) |
|
and isinstance(file_info.get("uploadMode"), str) |
|
and (file_info["uploadMode"] in ("lfs", "regular")) |
|
): |
|
raise ValueError("preupload_info is improperly formatted:") |
|
return preupload_info |
|
|
|
|
|
@validate_hf_hub_args |
|
def _fetch_upload_modes( |
|
additions: Iterable[CommitOperationAdd], |
|
repo_type: str, |
|
repo_id: str, |
|
token: Optional[str], |
|
revision: str, |
|
endpoint: Optional[str] = None, |
|
create_pr: bool = False, |
|
gitignore_content: Optional[str] = None, |
|
) -> None: |
|
""" |
|
Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob |
|
or as git LFS blob. Input `additions` are mutated in-place with the upload mode. |
|
|
|
Args: |
|
additions (`Iterable` of :class:`CommitOperationAdd`): |
|
Iterable of :class:`CommitOperationAdd` describing the files to |
|
upload to the Hub. |
|
repo_type (`str`): |
|
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. |
|
repo_id (`str`): |
|
A namespace (user or an organization) and a repo name separated |
|
by a `/`. |
|
token (`str`, *optional*): |
|
An authentication token ( See https://huggingface.co/settings/tokens ) |
|
revision (`str`): |
|
The git revision to upload the files to. Can be any valid git revision. |
|
gitignore_content (`str`, *optional*): |
|
The content of the `.gitignore` file to know which files should be ignored. The order of priority |
|
is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present |
|
in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub |
|
(if any). |
|
Raises: |
|
[`~utils.HfHubHTTPError`] |
|
If the Hub API returned an error. |
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
If the Hub API response is improperly formatted. |
|
""" |
|
endpoint = endpoint if endpoint is not None else ENDPOINT |
|
headers = build_hf_headers(token=token) |
|
|
|
|
|
upload_modes: Dict[str, UploadMode] = {} |
|
should_ignore_info: Dict[str, bool] = {} |
|
|
|
for chunk in chunk_iterable(additions, 256): |
|
payload: Dict = { |
|
"files": [ |
|
{ |
|
"path": op.path_in_repo, |
|
"sample": base64.b64encode(op.upload_info.sample).decode("ascii"), |
|
"size": op.upload_info.size, |
|
"sha": op.upload_info.sha256.hex(), |
|
} |
|
for op in chunk |
|
] |
|
} |
|
if gitignore_content is not None: |
|
payload["gitIgnore"] = gitignore_content |
|
|
|
resp = get_session().post( |
|
f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}", |
|
json=payload, |
|
headers=headers, |
|
params={"create_pr": "1"} if create_pr else None, |
|
) |
|
hf_raise_for_status(resp) |
|
preupload_info = _validate_preupload_info(resp.json()) |
|
upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]}) |
|
should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]}) |
|
|
|
|
|
for addition in additions: |
|
addition._upload_mode = upload_modes[addition.path_in_repo] |
|
addition._should_ignore = should_ignore_info[addition.path_in_repo] |
|
|
|
|
|
|
|
for addition in additions: |
|
if addition.upload_info.size == 0: |
|
addition._upload_mode = "regular" |
|
|
|
|
|
@validate_hf_hub_args |
|
def _fetch_lfs_files_to_copy( |
|
copies: Iterable[CommitOperationCopy], |
|
repo_type: str, |
|
repo_id: str, |
|
token: Optional[str], |
|
revision: str, |
|
endpoint: Optional[str] = None, |
|
) -> Dict[Tuple[str, Optional[str]], "RepoFile"]: |
|
""" |
|
Requests the Hub files information of the LFS files to be copied, including their sha256. |
|
|
|
Args: |
|
copies (`Iterable` of :class:`CommitOperationCopy`): |
|
Iterable of :class:`CommitOperationCopy` describing the files to |
|
copy on the Hub. |
|
repo_type (`str`): |
|
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. |
|
repo_id (`str`): |
|
A namespace (user or an organization) and a repo name separated |
|
by a `/`. |
|
token (`str`, *optional*): |
|
An authentication token ( See https://huggingface.co/settings/tokens ) |
|
revision (`str`): |
|
The git revision to upload the files to. Can be any valid git revision. |
|
|
|
Returns: `Dict[Tuple[str, Optional[str]], RepoFile]]` |
|
Key is the file path and revision of the file to copy, value is the repo file. |
|
|
|
Raises: |
|
[`~utils.HfHubHTTPError`] |
|
If the Hub API returned an error. |
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
If the Hub API response is improperly formatted. |
|
""" |
|
from .hf_api import HfApi, RepoFolder |
|
|
|
hf_api = HfApi(endpoint=endpoint, token=token) |
|
files_to_copy = {} |
|
for src_revision, operations in groupby(copies, key=lambda op: op.src_revision): |
|
operations = list(operations) |
|
paths = [op.src_path_in_repo for op in operations] |
|
for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE): |
|
src_repo_files = hf_api.get_paths_info( |
|
repo_id=repo_id, |
|
paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE], |
|
revision=src_revision or revision, |
|
repo_type=repo_type, |
|
) |
|
for src_repo_file in src_repo_files: |
|
if isinstance(src_repo_file, RepoFolder): |
|
raise NotImplementedError("Copying a folder is not implemented.") |
|
if not src_repo_file.lfs: |
|
raise NotImplementedError("Copying a non-LFS file is not implemented") |
|
files_to_copy[(src_repo_file.rfilename, src_revision)] = src_repo_file |
|
for operation in operations: |
|
if (operation.src_path_in_repo, src_revision) not in files_to_copy: |
|
raise EntryNotFoundError( |
|
f"Cannot copy {operation.src_path_in_repo} at revision " |
|
f"{src_revision or revision}: file is missing on repo." |
|
) |
|
return files_to_copy |
|
|
|
|
|
def _prepare_commit_payload( |
|
operations: Iterable[CommitOperation], |
|
files_to_copy: Dict[Tuple[str, Optional[str]], "RepoFile"], |
|
commit_message: str, |
|
commit_description: Optional[str] = None, |
|
parent_commit: Optional[str] = None, |
|
) -> Iterable[Dict[str, Any]]: |
|
""" |
|
Builds the payload to POST to the `/commit` API of the Hub. |
|
|
|
Payload is returned as an iterator so that it can be streamed as a ndjson in the |
|
POST request. |
|
|
|
For more information, see: |
|
- https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073 |
|
- http://ndjson.org/ |
|
""" |
|
commit_description = commit_description if commit_description is not None else "" |
|
|
|
|
|
header_value = {"summary": commit_message, "description": commit_description} |
|
if parent_commit is not None: |
|
header_value["parentCommit"] = parent_commit |
|
yield {"key": "header", "value": header_value} |
|
|
|
nb_ignored_files = 0 |
|
|
|
|
|
for operation in operations: |
|
|
|
if isinstance(operation, CommitOperationAdd) and operation._should_ignore: |
|
logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).") |
|
nb_ignored_files += 1 |
|
continue |
|
|
|
|
|
if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular": |
|
yield { |
|
"key": "file", |
|
"value": { |
|
"content": operation.b64content().decode(), |
|
"path": operation.path_in_repo, |
|
"encoding": "base64", |
|
}, |
|
} |
|
|
|
elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs": |
|
yield { |
|
"key": "lfsFile", |
|
"value": { |
|
"path": operation.path_in_repo, |
|
"algo": "sha256", |
|
"oid": operation.upload_info.sha256.hex(), |
|
"size": operation.upload_info.size, |
|
}, |
|
} |
|
|
|
elif isinstance(operation, CommitOperationDelete): |
|
yield { |
|
"key": "deletedFolder" if operation.is_folder else "deletedFile", |
|
"value": {"path": operation.path_in_repo}, |
|
} |
|
|
|
elif isinstance(operation, CommitOperationCopy): |
|
file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)] |
|
if not file_to_copy.lfs: |
|
raise NotImplementedError("Copying a non-LFS file is not implemented") |
|
yield { |
|
"key": "lfsFile", |
|
"value": { |
|
"path": operation.path_in_repo, |
|
"algo": "sha256", |
|
"oid": file_to_copy.lfs["sha256"], |
|
}, |
|
} |
|
|
|
else: |
|
raise ValueError( |
|
f"Unknown operation to commit. Operation: {operation}. Upload mode:" |
|
f" {getattr(operation, '_upload_mode', None)}" |
|
) |
|
|
|
if nb_ignored_files > 0: |
|
logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).") |
|
|