|
import requests |
|
import os |
|
from urllib.parse import urlparse |
|
from tqdm import tqdm |
|
import concurrent.futures |
|
from tenacity import retry, stop_after_attempt, wait_exponential |
|
|
|
|
|
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10)) |
|
def download_file(url, session, download_folder): |
|
try: |
|
|
|
response = session.get(url) |
|
response.raise_for_status() |
|
|
|
|
|
parsed_url = urlparse(url).path.strip("/").split("/") |
|
file_name = f"{parsed_url[0]}_{parsed_url[1]}_{parsed_url[2]}.py" |
|
|
|
|
|
original_file_name = file_name |
|
counter = 1 |
|
while os.path.exists(os.path.join(download_folder, file_name)): |
|
|
|
file_name = f"{os.path.splitext(original_file_name)[0]}_{counter}{os.path.splitext(original_file_name)[1]}" |
|
counter += 1 |
|
|
|
|
|
with open(os.path.join(download_folder, file_name), "wb") as file: |
|
file.write(response.content) |
|
return f"Downloaded {file_name}" |
|
|
|
except requests.exceptions.RequestException as e: |
|
return f"Failed to download {url}: {e}" |
|
|
|
|
|
def download_files_concurrently(urls, download_folder): |
|
|
|
os.makedirs(download_folder, exist_ok=True) |
|
|
|
with requests.Session() as session: |
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
|
|
futures = [ |
|
executor.submit(download_file, url, session, download_folder) |
|
for url in urls |
|
] |
|
for future in tqdm( |
|
concurrent.futures.as_completed(futures), total=len(futures) |
|
): |
|
|
|
pass |
|
|
|
|
|
def read_urls_from_file(file_name): |
|
|
|
with open(file_name, "r") as file: |
|
|
|
return [line.strip() for line in file.readlines()] |
|
|
|
|
|
|
|
urls_file = "python_files.txt" |
|
|
|
|
|
download_folder = "downloaded_files" |
|
|
|
|
|
urls = read_urls_from_file(urls_file) |
|
|
|
|
|
download_files_concurrently(urls, download_folder) |
|
|