import requests
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os
import random

# Path to the file containing URLs
file_path = "python_files.txt"


# Function to get the size of a file from a URL using a HEAD request
def get_file_size(url):
    try:
        response = requests.head(url, allow_redirects=True, timeout=10)
        # Extract the content length header
        size = int(response.headers.get("Content-Length", 0))
        return size
    except (requests.RequestException, ValueError):
        # Return 0 if any error occurs (e.g., timeout, invalid URL, or missing header)
        return 0


# Main function to calculate total size of all URLs in file
def calculate_total_size(file_path):
    # Read URLs from file
    if not os.path.exists(file_path):
        print("File not found!")
        return 0

    with open(file_path, "r") as file:
        urls = [line.strip() for line in file if line.strip()]

    random.shuffle(urls)
    urls = urls[:5000]

    # Use threading to perform requests concurrently with a progress bar
    total_size = 0
    with ThreadPoolExecutor() as executor:
        # Wrap the map in tqdm for a progress bar
        file_sizes = list(
            tqdm(
                executor.map(get_file_size, urls),
                total=len(urls),
                desc=f"Processing URLs.",
            )
        )

    # Calculate the total size
    total_size = sum(file_sizes)
    return total_size


# Calculate and print the total size
total_size = calculate_total_size(file_path)
print(f"Total size of all files: {total_size / (1024 * 1024):.2f} MB")