import os import urllib.request from tqdm import tqdm from PIL import Image def read_actor_files(folder_path): urls = {} for file in os.listdir(folder_path): if not file.endswith('.txt'): continue file_name_without_ext = os.path.splitext(file)[0] with open(os.path.join(folder_path, file)) as text_file: lines = text_file.readlines() lines = [line.rstrip() for line in lines] urls[file_name_without_ext] = lines return urls def save_images_to_folder(folder_path, url_dict): url_opener = urllib.request.URLopener() url_opener.addheader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36') for name, url_list in tqdm(url_dict.items()): base_folder = os.path.join(folder_path, name) if os.path.exists(base_folder): print(f'The image folder {base_folder} already exists. Skipping folder.') continue os.makedirs(base_folder) for i, url in tqdm(enumerate(url_list), desc=name, leave=False): url = urllib.parse.quote(url, safe='://?=&(),%+') img_file_path = os.path.join(base_folder, f'{name}_{i}.jpg') url_opener.retrieve(url, img_file_path) # open the image and resize it img = Image.open(img_file_path) img.thumbnail((1024, 1024)) img.save(img_file_path)