|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Contains command to delete some revisions from the HF cache directory. |
|
|
|
Usage: |
|
huggingface-cli delete-cache |
|
huggingface-cli delete-cache --disable-tui |
|
huggingface-cli delete-cache --dir ~/.cache/huggingface/hub |
|
|
|
NOTE: |
|
This command is based on `InquirerPy` to build the multiselect menu in the terminal. |
|
This dependency has to be installed with `pip install huggingface_hub[cli]`. Since |
|
we want to avoid as much as possible cross-platform issues, I chose a library that |
|
is built on top of `python-prompt-toolkit` which seems to be a reference in terminal |
|
GUI (actively maintained on both Unix and Windows, 7.9k stars). |
|
|
|
For the moment, the TUI feature is in beta. |
|
|
|
See: |
|
- https://github.com/kazhala/InquirerPy |
|
- https://inquirerpy.readthedocs.io/en/latest/ |
|
- https://github.com/prompt-toolkit/python-prompt-toolkit |
|
|
|
Other solutions could have been: |
|
- `simple_term_menu`: would be good as well for our use case but some issues suggest |
|
that Windows is less supported. |
|
See: https://github.com/IngoMeyer441/simple-term-menu |
|
- `PyInquirer`: very similar to `InquirerPy` but older and not maintained anymore. |
|
In particular, no support of Python3.10. |
|
See: https://github.com/CITGuru/PyInquirer |
|
- `pick` (or `pickpack`): easy to use and flexible but built on top of Python's |
|
standard library `curses` that is specific to Unix (not implemented on Windows). |
|
See https://github.com/wong2/pick and https://github.com/anafvana/pickpack. |
|
- `inquirer`: lot of traction (700 stars) but explicitly states "experimental |
|
support of Windows". Not built on top of `python-prompt-toolkit`. |
|
See https://github.com/magmax/python-inquirer |
|
|
|
TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ? |
|
TODO: add "--keep-last" arg to delete revisions that are not on `main` ref |
|
TODO: add "--filter" arg to filter repositories by name ? |
|
TODO: add "--sort" arg to sort by size ? |
|
TODO: add "--limit" arg to limit to X repos ? |
|
TODO: add "-y" arg for immediate deletion ? |
|
See discussions in https://github.com/huggingface/huggingface_hub/issues/1025. |
|
""" |
|
import os |
|
from argparse import Namespace, _SubParsersAction |
|
from functools import wraps |
|
from tempfile import mkstemp |
|
from typing import Any, Callable, Iterable, List, Optional, Union |
|
|
|
from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir |
|
from . import BaseHuggingfaceCLICommand |
|
from ._cli_utils import ANSI |
|
|
|
|
|
try: |
|
from InquirerPy import inquirer |
|
from InquirerPy.base.control import Choice |
|
from InquirerPy.separator import Separator |
|
|
|
_inquirer_py_available = True |
|
except ImportError: |
|
_inquirer_py_available = False |
|
|
|
|
|
def require_inquirer_py(fn: Callable) -> Callable: |
|
"""Decorator to flag methods that require `InquirerPy`.""" |
|
|
|
|
|
@wraps(fn) |
|
def _inner(*args, **kwargs): |
|
if not _inquirer_py_available: |
|
raise ImportError( |
|
"The `delete-cache` command requires extra dependencies to work with" |
|
" the TUI.\nPlease run `pip install huggingface_hub[cli]` to install" |
|
" them.\nOtherwise, disable TUI using the `--disable-tui` flag." |
|
) |
|
|
|
return fn(*args, **kwargs) |
|
|
|
return _inner |
|
|
|
|
|
|
|
_CANCEL_DELETION_STR = "CANCEL_DELETION" |
|
|
|
|
|
class DeleteCacheCommand(BaseHuggingfaceCLICommand): |
|
@staticmethod |
|
def register_subcommand(parser: _SubParsersAction): |
|
delete_cache_parser = parser.add_parser("delete-cache", help="Delete revisions from the cache directory.") |
|
|
|
delete_cache_parser.add_argument( |
|
"--dir", |
|
type=str, |
|
default=None, |
|
help="cache directory (optional). Default to the default HuggingFace cache.", |
|
) |
|
|
|
delete_cache_parser.add_argument( |
|
"--disable-tui", |
|
action="store_true", |
|
help=( |
|
"Disable Terminal User Interface (TUI) mode. Useful if your" |
|
" platform/terminal doesn't support the multiselect menu." |
|
), |
|
) |
|
|
|
delete_cache_parser.set_defaults(func=DeleteCacheCommand) |
|
|
|
def __init__(self, args: Namespace) -> None: |
|
self.cache_dir: Optional[str] = args.dir |
|
self.disable_tui: bool = args.disable_tui |
|
|
|
def run(self): |
|
"""Run `delete-cache` command with or without TUI.""" |
|
|
|
hf_cache_info = scan_cache_dir(self.cache_dir) |
|
|
|
|
|
if self.disable_tui: |
|
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[]) |
|
else: |
|
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[]) |
|
|
|
|
|
if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes: |
|
confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?" |
|
|
|
|
|
if self.disable_tui: |
|
confirmed = _ask_for_confirmation_no_tui(confirm_message) |
|
else: |
|
confirmed = _ask_for_confirmation_tui(confirm_message) |
|
|
|
|
|
if confirmed: |
|
strategy = hf_cache_info.delete_revisions(*selected_hashes) |
|
print("Start deletion.") |
|
strategy.execute() |
|
print( |
|
f"Done. Deleted {len(strategy.repos)} repo(s) and" |
|
f" {len(strategy.snapshots)} revision(s) for a total of" |
|
f" {strategy.expected_freed_size_str}." |
|
) |
|
return |
|
|
|
|
|
print("Deletion is cancelled. Do nothing.") |
|
|
|
|
|
@require_inquirer_py |
|
def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]: |
|
"""Ask the user for a manual review of the revisions to delete. |
|
|
|
Displays a multi-select menu in the terminal (TUI). |
|
""" |
|
|
|
choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected) |
|
checkbox = inquirer.checkbox( |
|
message="Select revisions to delete:", |
|
choices=choices, |
|
cycle=False, |
|
height=100, |
|
|
|
|
|
instruction=_get_expectations_str( |
|
hf_cache_info, |
|
selected_hashes=[c.value for c in choices if isinstance(c, Choice) and c.enabled], |
|
), |
|
|
|
long_instruction="Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.", |
|
|
|
transformer=lambda result: f"{len(result)} revision(s) selected.", |
|
) |
|
|
|
|
|
|
|
def _update_expectations(_) -> None: |
|
|
|
|
|
checkbox._instruction = _get_expectations_str( |
|
hf_cache_info, |
|
selected_hashes=[choice["value"] for choice in checkbox.content_control.choices if choice["enabled"]], |
|
) |
|
|
|
checkbox.kb_func_lookup["toggle"].append({"func": _update_expectations}) |
|
|
|
|
|
try: |
|
return checkbox.execute() |
|
except KeyboardInterrupt: |
|
return [] |
|
|
|
|
|
@require_inquirer_py |
|
def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool: |
|
"""Ask for confirmation using Inquirer.""" |
|
return inquirer.confirm(message, default=default).execute() |
|
|
|
|
|
def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: List[str]) -> List: |
|
"""Build a list of choices from the scanned repos. |
|
|
|
Args: |
|
repos (*Iterable[`CachedRepoInfo`]*): |
|
List of scanned repos on which we want to delete revisions. |
|
preselected (*List[`str`]*): |
|
List of revision hashes that will be preselected. |
|
|
|
Return: |
|
The list of choices to pass to `inquirer.checkbox`. |
|
""" |
|
choices: List[Union[Choice, Separator]] = [] |
|
|
|
|
|
|
|
choices.append( |
|
Choice( |
|
_CANCEL_DELETION_STR, |
|
name="None of the following (if selected, nothing will be deleted).", |
|
enabled=False, |
|
) |
|
) |
|
|
|
|
|
for repo in sorted(repos, key=_repo_sorting_order): |
|
|
|
choices.append( |
|
Separator( |
|
f"\n{repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str}," |
|
f" used {repo.last_accessed_str})" |
|
) |
|
) |
|
for revision in sorted(repo.revisions, key=_revision_sorting_order): |
|
|
|
choices.append( |
|
Choice( |
|
revision.commit_hash, |
|
name=( |
|
f"{revision.commit_hash[:8]}:" |
|
f" {', '.join(sorted(revision.refs)) or '(detached)'} #" |
|
f" modified {revision.last_modified_str}" |
|
), |
|
enabled=revision.commit_hash in preselected, |
|
) |
|
) |
|
|
|
|
|
return choices |
|
|
|
|
|
def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]: |
|
"""Ask the user for a manual review of the revisions to delete. |
|
|
|
Used when TUI is disabled. Manual review happens in a separate tmp file that the |
|
user can manually edit. |
|
""" |
|
|
|
fd, tmp_path = mkstemp(suffix=".txt") |
|
os.close(fd) |
|
|
|
lines = [] |
|
for repo in sorted(hf_cache_info.repos, key=_repo_sorting_order): |
|
lines.append( |
|
f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str}," |
|
f" used {repo.last_accessed_str})" |
|
) |
|
for revision in sorted(repo.revisions, key=_revision_sorting_order): |
|
lines.append( |
|
|
|
f"{'' if revision.commit_hash in preselected else '#'} " |
|
f" {revision.commit_hash} # Refs:" |
|
|
|
f" {', '.join(sorted(revision.refs)) or '(detached)'} # modified" |
|
|
|
f" {revision.last_modified_str}" |
|
) |
|
|
|
with open(tmp_path, "w") as f: |
|
f.write(_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS) |
|
f.write("\n".join(lines)) |
|
|
|
|
|
instructions = f""" |
|
TUI is disabled. In order to select which revisions you want to delete, please edit |
|
the following file using the text editor of your choice. Instructions for manual |
|
editing are located at the beginning of the file. Edit the file, save it and confirm |
|
to continue. |
|
File to edit: {ANSI.bold(tmp_path)} |
|
""" |
|
print("\n".join(line.strip() for line in instructions.strip().split("\n"))) |
|
|
|
|
|
while True: |
|
selected_hashes = _read_manual_review_tmp_file(tmp_path) |
|
if _ask_for_confirmation_no_tui( |
|
_get_expectations_str(hf_cache_info, selected_hashes) + " Continue ?", |
|
default=False, |
|
): |
|
break |
|
|
|
|
|
os.remove(tmp_path) |
|
return selected_hashes |
|
|
|
|
|
def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool: |
|
"""Ask for confirmation using pure-python.""" |
|
YES = ("y", "yes", "1") |
|
NO = ("n", "no", "0") |
|
DEFAULT = "" |
|
ALL = YES + NO + (DEFAULT,) |
|
full_message = message + (" (Y/n) " if default else " (y/N) ") |
|
while True: |
|
answer = input(full_message).lower() |
|
if answer == DEFAULT: |
|
return default |
|
if answer in YES: |
|
return True |
|
if answer in NO: |
|
return False |
|
print(f"Invalid input. Must be one of {ALL}") |
|
|
|
|
|
def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str: |
|
"""Format a string to display to the user how much space would be saved. |
|
|
|
Example: |
|
``` |
|
>>> _get_expectations_str(hf_cache_info, selected_hashes) |
|
'7 revisions selected counting for 4.3G.' |
|
``` |
|
""" |
|
if _CANCEL_DELETION_STR in selected_hashes: |
|
return "Nothing will be deleted." |
|
strategy = hf_cache_info.delete_revisions(*selected_hashes) |
|
return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}." |
|
|
|
|
|
def _read_manual_review_tmp_file(tmp_path: str) -> List[str]: |
|
"""Read the manually reviewed instruction file and return a list of revision hash. |
|
|
|
Example: |
|
```txt |
|
# This is the tmp file content |
|
### |
|
|
|
# Commented out line |
|
123456789 # revision hash |
|
|
|
# Something else |
|
# a_newer_hash # 2 days ago |
|
an_older_hash # 3 days ago |
|
``` |
|
|
|
```py |
|
>>> _read_manual_review_tmp_file(tmp_path) |
|
['123456789', 'an_older_hash'] |
|
``` |
|
""" |
|
with open(tmp_path) as f: |
|
content = f.read() |
|
|
|
|
|
lines = [line.strip() for line in content.split("\n")] |
|
|
|
|
|
selected_lines = [line for line in lines if not line.startswith("#")] |
|
|
|
|
|
selected_hashes = [line.split("#")[0].strip() for line in selected_lines] |
|
|
|
|
|
return [hash for hash in selected_hashes if len(hash) > 0] |
|
|
|
|
|
_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f""" |
|
# INSTRUCTIONS |
|
# ------------ |
|
# This is a temporary file created by running `huggingface-cli delete-cache` with the |
|
# `--disable-tui` option. It contains a set of revisions that can be deleted from your |
|
# local cache directory. |
|
# |
|
# Please manually review the revisions you want to delete: |
|
# - Revision hashes can be commented out with '#'. |
|
# - Only non-commented revisions in this file will be deleted. |
|
# - Revision hashes that are removed from this file are ignored as well. |
|
# - If `{_CANCEL_DELETION_STR}` line is uncommented, the all cache deletion is cancelled and |
|
# no changes will be applied. |
|
# |
|
# Once you've manually reviewed this file, please confirm deletion in the terminal. This |
|
# file will be automatically removed once done. |
|
# ------------ |
|
|
|
# KILL SWITCH |
|
# ------------ |
|
# Un-comment following line to completely cancel the deletion process |
|
# {_CANCEL_DELETION_STR} |
|
# ------------ |
|
|
|
# REVISIONS |
|
# ------------ |
|
""".strip() |
|
|
|
|
|
def _repo_sorting_order(repo: CachedRepoInfo) -> Any: |
|
|
|
return (repo.repo_type, repo.last_accessed) |
|
|
|
|
|
def _revision_sorting_order(revision: CachedRevisionInfo) -> Any: |
|
|
|
return revision.last_modified |
|
|