Hanrui / sglang /scripts /ci /utils /cleanup_hf_cache.py
Lekr0's picture
Add files using upload-large-folder tool
61ba51e verified
#!/usr/bin/env python3
"""
Clean up stale HuggingFace cache artifacts from previous failed downloads.
This script removes incomplete marker files, temporary files, and lock files
from the HuggingFace cache directory. These artifacts can accumulate from
interrupted or failed downloads and may interfere with future downloads.
"""
import os
import sys
from pathlib import Path
from typing import List
try:
from huggingface_hub import constants
HF_HUB_AVAILABLE = True
except ImportError:
print("Warning: huggingface_hub not available")
HF_HUB_AVAILABLE = False
def get_hf_cache_dir() -> str:
"""Get the HuggingFace cache directory."""
if HF_HUB_AVAILABLE:
return constants.HF_HUB_CACHE
# Fallback to environment variable or default
hf_home = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface"))
return os.path.join(hf_home, "hub")
def find_stale_artifacts(cache_dir: str) -> List[Path]:
"""
Find stale artifact files in the HuggingFace cache.
Args:
cache_dir: HuggingFace cache directory
Returns:
List of paths to stale artifact files
"""
cache_path = Path(cache_dir)
if not cache_path.exists():
return []
# Patterns for stale files to clean up
patterns = [
"**/*.incomplete", # Incomplete download markers
"**/*.tmp", # Temporary files
"**/*.lock", # Lock files from interrupted downloads
]
stale_files = []
for pattern in patterns:
stale_files.extend(cache_path.glob(pattern))
return stale_files
def cleanup_artifacts(artifacts: List[Path]) -> tuple[int, int]:
"""
Remove stale artifact files.
Args:
artifacts: List of file paths to remove
Returns:
Tuple of (successful_removals, failed_removals)
"""
successful = 0
failed = 0
for file_path in artifacts:
try:
file_path.unlink()
print(f" Removed: {file_path}")
successful += 1
except Exception as e:
print(f" Warning: Could not remove {file_path}: {e}")
failed += 1
return successful, failed
def main() -> int:
"""
Main cleanup logic.
Returns:
Always returns 0 (cleanup is best-effort and should not fail CI)
"""
print("=" * 70)
print("HuggingFace Cache Cleanup")
print("=" * 70)
# Get cache directory
cache_dir = get_hf_cache_dir()
print(f"Cache directory: {cache_dir}")
if not os.path.exists(cache_dir):
print("Cache directory does not exist - nothing to clean")
return 0
print("-" * 70)
# Find stale artifacts
print("Scanning for stale artifacts...")
stale_artifacts = find_stale_artifacts(cache_dir)
if not stale_artifacts:
print("✓ No stale cache artifacts found")
return 0
# Clean up artifacts
print(f"Found {len(stale_artifacts)} stale artifact(s) to remove:")
successful, failed = cleanup_artifacts(stale_artifacts)
print("-" * 70)
# Summary
if failed > 0:
print(f"⚠ Cleaned up {successful} file(s), {failed} removal(s) failed")
else:
print(f"✓ Successfully cleaned up {successful} stale file(s)")
# Always return 0 - cleanup failures should not fail CI
return 0
if __name__ == "__main__":
try:
exit_code = main()
sys.exit(exit_code)
except KeyboardInterrupt:
print("\nInterrupted by user")
sys.exit(0)
except Exception as e:
print(f"ERROR: Unexpected error during cleanup: {e}")
import traceback
traceback.print_exc()
# Still return 0 - cleanup failures should not fail CI
sys.exit(0)