QP_ANmixtao / DownloadUploadble.py
NoQuest's picture
uploader and downloader file
bca9652
raw
history blame
4.1 kB
from huggingface_hub import HfFolder, hf_hub_url
import os
import requests
import tqdm
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, RequestException, Timeout
from tqdm.contrib.concurrent import thread_map
from pathlib import Path
import time
# Save your token
HfFolder.save_token('') # Replace with your actual token
# Define the repository to download from
repo_id = "NoQuest/LLmSave"
repo_type = "model"
# Local path where you want to save the downloaded files
local_folder_path = "./LLmSaveLocal"
# Variable to specify the file or directory to download
download_target = "fichiertemoin.txt" # Change this to the desired file or directory name
print(f"Downloading {download_target} from {repo_id} to {local_folder_path}...")
# Create the local directory if it doesn't exist
os.makedirs(local_folder_path, exist_ok=True)
# Print the URL for debugging
print(f"URL: {hf_hub_url(repo_id, download_target, repo_type=repo_type)}")
def get_session(max_retries=5):
session = requests.Session()
if max_retries:
session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
# ... (add authentication if needed)
return session
def get_single_file(url, output_folder, start_from_scratch=False, max_retries=7):
filename = Path(url.rsplit('/', 1)[1])
output_path = output_folder / filename
attempt = 0
while attempt < max_retries:
attempt += 1
session = get_session()
headers = {}
mode = 'wb'
if output_path.exists() and not start_from_scratch:
# Resume download
r = session.get(url, stream=True, timeout=20)
total_size = int(r.headers.get('content-length', 0))
if output_path.stat().st_size >= total_size:
return
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
mode = 'ab'
try:
with session.get(url, stream=True, headers=headers, timeout=30) as r:
r.raise_for_status()
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB
tqdm_kwargs = {'total': total_size, 'unit': 'iB', 'unit_scale': True, 'bar_format': '{l_bar}{bar}|{n_fmt}/{total_fmt}{rate_fmt}'}
with open(output_path, mode) as f:
with tqdm.tqdm(**tqdm_kwargs) as t:
for data in r.iter_content(block_size):
f.write(data)
t.update(len(data))
break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e:
print(f"Error downloading {filename}: {e}.")
print(f"That was attempt {attempt}/{max_retries}.", end='')
if attempt < max_retries:
print(f"Retry begins in {2**attempt} seconds.")
time.sleep(2**attempt)
else:
print("Failed to download after the maximum number of attempts.")
def start_download_threads(file_list, output_folder, start_from_scratch=False, threads=4):
thread_map(lambda url: get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
def download_model_files(model, branch, links, output_folder, start_from_scratch=False, threads=4):
output_folder = Path(output_folder)
#output_folder.mkdir(parents=True, exist_ok=True)
output_folder.mkdir(parents=True, exist_ok=True)
# ... (add metadata writing if needed)
print(f"Downloading the model to {output_folder}")
start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads)
# Download the specified file or directory
session = get_session()
links = [hf_hub_url(repo_id, download_target, repo_type=repo_type)]
branch = "main"
download_model_files(repo_id, branch, links, local_folder_path)
print("Download complete!")