Spaces:
Sleeping
Sleeping
from huggingface_hub import HfFolder, hf_hub_url | |
import os | |
import requests | |
import tqdm | |
from requests.adapters import HTTPAdapter | |
from requests.exceptions import ConnectionError, RequestException, Timeout | |
from tqdm.contrib.concurrent import thread_map | |
from pathlib import Path | |
import time | |
# Save your token | |
HfFolder.save_token('') # Replace with your actual token | |
# Define the repository to download from | |
repo_id = "NoQuest/LLmSave" | |
repo_type = "model" | |
# Local path where you want to save the downloaded files | |
local_folder_path = "./LLmSaveLocal" | |
# Variable to specify the file or directory to download | |
download_target = "fichiertemoin.txt" # Change this to the desired file or directory name | |
print(f"Downloading {download_target} from {repo_id} to {local_folder_path}...") | |
# Create the local directory if it doesn't exist | |
os.makedirs(local_folder_path, exist_ok=True) | |
# Print the URL for debugging | |
print(f"URL: {hf_hub_url(repo_id, download_target, repo_type=repo_type)}") | |
def get_session(max_retries=5): | |
session = requests.Session() | |
if max_retries: | |
session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries)) | |
session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries)) | |
# ... (add authentication if needed) | |
return session | |
def get_single_file(url, output_folder, start_from_scratch=False, max_retries=7): | |
filename = Path(url.rsplit('/', 1)[1]) | |
output_path = output_folder / filename | |
attempt = 0 | |
while attempt < max_retries: | |
attempt += 1 | |
session = get_session() | |
headers = {} | |
mode = 'wb' | |
if output_path.exists() and not start_from_scratch: | |
# Resume download | |
r = session.get(url, stream=True, timeout=20) | |
total_size = int(r.headers.get('content-length', 0)) | |
if output_path.stat().st_size >= total_size: | |
return | |
headers = {'Range': f'bytes={output_path.stat().st_size}-'} | |
mode = 'ab' | |
try: | |
with session.get(url, stream=True, headers=headers, timeout=30) as r: | |
r.raise_for_status() | |
total_size = int(r.headers.get('content-length', 0)) | |
block_size = 1024 * 1024 # 1MB | |
tqdm_kwargs = {'total': total_size, 'unit': 'iB', 'unit_scale': True, 'bar_format': '{l_bar}{bar}|{n_fmt}/{total_fmt}{rate_fmt}'} | |
with open(output_path, mode) as f: | |
with tqdm.tqdm(**tqdm_kwargs) as t: | |
for data in r.iter_content(block_size): | |
f.write(data) | |
t.update(len(data)) | |
break # Exit loop if successful | |
except (RequestException, ConnectionError, Timeout) as e: | |
print(f"Error downloading {filename}: {e}.") | |
print(f"That was attempt {attempt}/{max_retries}.", end='') | |
if attempt < max_retries: | |
print(f"Retry begins in {2**attempt} seconds.") | |
time.sleep(2**attempt) | |
else: | |
print("Failed to download after the maximum number of attempts.") | |
def start_download_threads(file_list, output_folder, start_from_scratch=False, threads=4): | |
thread_map(lambda url: get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True) | |
def download_model_files(model, branch, links, output_folder, start_from_scratch=False, threads=4): | |
output_folder = Path(output_folder) | |
#output_folder.mkdir(parents=True, exist_ok=True) | |
output_folder.mkdir(parents=True, exist_ok=True) | |
# ... (add metadata writing if needed) | |
print(f"Downloading the model to {output_folder}") | |
start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads) | |
# Download the specified file or directory | |
session = get_session() | |
links = [hf_hub_url(repo_id, download_target, repo_type=repo_type)] | |
branch = "main" | |
download_model_files(repo_id, branch, links, local_folder_path) | |
print("Download complete!") | |