|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Contains command to scan the HF cache directory. |
|
|
|
Usage: |
|
huggingface-cli scan-cache |
|
huggingface-cli scan-cache -v |
|
huggingface-cli scan-cache -vvv |
|
huggingface-cli scan-cache --dir ~/.cache/huggingface/hub |
|
""" |
|
import time |
|
from argparse import Namespace, _SubParsersAction |
|
from typing import Optional |
|
|
|
from ..utils import CacheNotFound, HFCacheInfo, scan_cache_dir |
|
from . import BaseHuggingfaceCLICommand |
|
from ._cli_utils import ANSI, tabulate |
|
|
|
|
|
class ScanCacheCommand(BaseHuggingfaceCLICommand): |
|
@staticmethod |
|
def register_subcommand(parser: _SubParsersAction): |
|
scan_cache_parser = parser.add_parser("scan-cache", help="Scan cache directory.") |
|
|
|
scan_cache_parser.add_argument( |
|
"--dir", |
|
type=str, |
|
default=None, |
|
help="cache directory to scan (optional). Default to the default HuggingFace cache.", |
|
) |
|
scan_cache_parser.add_argument( |
|
"-v", |
|
"--verbose", |
|
action="count", |
|
default=0, |
|
help="show a more verbose output", |
|
) |
|
scan_cache_parser.set_defaults(func=ScanCacheCommand) |
|
|
|
def __init__(self, args: Namespace) -> None: |
|
self.verbosity: int = args.verbose |
|
self.cache_dir: Optional[str] = args.dir |
|
|
|
def run(self): |
|
try: |
|
t0 = time.time() |
|
hf_cache_info = scan_cache_dir(self.cache_dir) |
|
t1 = time.time() |
|
except CacheNotFound as exc: |
|
cache_dir = exc.cache_dir |
|
print(f"Cache directory not found: {cache_dir}") |
|
return |
|
|
|
self._print_hf_cache_info_as_table(hf_cache_info) |
|
|
|
print( |
|
f"\nDone in {round(t1-t0,1)}s. Scanned {len(hf_cache_info.repos)} repo(s)" |
|
f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}." |
|
) |
|
if len(hf_cache_info.warnings) > 0: |
|
message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning." |
|
if self.verbosity >= 3: |
|
print(ANSI.gray(message)) |
|
for warning in hf_cache_info.warnings: |
|
print(ANSI.gray(warning)) |
|
else: |
|
print(ANSI.gray(message + " Use -vvv to print details.")) |
|
|
|
def _print_hf_cache_info_as_table(self, hf_cache_info: HFCacheInfo) -> None: |
|
if self.verbosity == 0: |
|
print( |
|
tabulate( |
|
rows=[ |
|
[ |
|
repo.repo_id, |
|
repo.repo_type, |
|
"{:>12}".format(repo.size_on_disk_str), |
|
repo.nb_files, |
|
repo.last_accessed_str, |
|
repo.last_modified_str, |
|
", ".join(sorted(repo.refs)), |
|
str(repo.repo_path), |
|
] |
|
for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path) |
|
], |
|
headers=[ |
|
"REPO ID", |
|
"REPO TYPE", |
|
"SIZE ON DISK", |
|
"NB FILES", |
|
"LAST_ACCESSED", |
|
"LAST_MODIFIED", |
|
"REFS", |
|
"LOCAL PATH", |
|
], |
|
) |
|
) |
|
else: |
|
print( |
|
tabulate( |
|
rows=[ |
|
[ |
|
repo.repo_id, |
|
repo.repo_type, |
|
revision.commit_hash, |
|
"{:>12}".format(revision.size_on_disk_str), |
|
revision.nb_files, |
|
revision.last_modified_str, |
|
", ".join(sorted(revision.refs)), |
|
str(revision.snapshot_path), |
|
] |
|
for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path) |
|
for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash) |
|
], |
|
headers=[ |
|
"REPO ID", |
|
"REPO TYPE", |
|
"REVISION", |
|
"SIZE ON DISK", |
|
"NB FILES", |
|
"LAST_MODIFIED", |
|
"REFS", |
|
"LOCAL PATH", |
|
], |
|
) |
|
) |
|
|