Spaces:
Sleeping
Sleeping
| import hashlib | |
| import re | |
| from urllib.parse import urlparse | |
| from pathlib import Path | |
| def get_content_hash(content: bytes) -> str: | |
| """Generates a SHA-256 hash for the given binary content.""" | |
| return hashlib.sha256(content).hexdigest() | |
| def sanitize_filename(url: str) -> str: | |
| """ | |
| Creates a sanitized, readable filename from a URL. | |
| e.g., 'https://example.com/path/to/file.html?query=1' -> 'path_to_file.html' | |
| """ | |
| parsed_url = urlparse(url) | |
| # Use the path, but remove leading/trailing slashes | |
| path_part = parsed_url.path.strip('/') | |
| if not path_part: | |
| # If path is empty (e.g., domain.com/), use the netloc | |
| path_part = parsed_url.netloc | |
| # Replace slashes with underscores and remove other invalid chars | |
| sanitized = re.sub(r'[<>:"/\\|?*]', '_', path_part) | |
| # Limit length to avoid OS errors | |
| return sanitized[:150] |