File size: 1,625 Bytes
a8639ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import requests
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os
import random
# Path to the file containing URLs
file_path = "python_files.txt"
# Function to get the size of a file from a URL using a HEAD request
def get_file_size(url):
try:
response = requests.head(url, allow_redirects=True, timeout=10)
# Extract the content length header
size = int(response.headers.get("Content-Length", 0))
return size
except (requests.RequestException, ValueError):
# Return 0 if any error occurs (e.g., timeout, invalid URL, or missing header)
return 0
# Main function to calculate total size of all URLs in file
def calculate_total_size(file_path):
# Read URLs from file
if not os.path.exists(file_path):
print("File not found!")
return 0
with open(file_path, "r") as file:
urls = [line.strip() for line in file if line.strip()]
random.shuffle(urls)
urls = urls[:5000]
# Use threading to perform requests concurrently with a progress bar
total_size = 0
with ThreadPoolExecutor() as executor:
# Wrap the map in tqdm for a progress bar
file_sizes = list(
tqdm(
executor.map(get_file_size, urls),
total=len(urls),
desc=f"Processing URLs.",
)
)
# Calculate the total size
total_size = sum(file_sizes)
return total_size
# Calculate and print the total size
total_size = calculate_total_size(file_path)
print(f"Total size of all files: {total_size / (1024 * 1024):.2f} MB")
|