Spaces:
Sleeping
Sleeping
File size: 4,829 Bytes
a43e23c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# Thanks to Claude!
import os
import sys
import argparse
import requests
import time
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
from duckduckgo_search import DDGS
def download_images(query, save_dir, max_images=50):
"""
Download images based on search query with interactive approval using DuckDuckGo.
Args:
query (str): Search query for images
save_dir (str): Directory to save approved images
max_images (int): Maximum number of images to download
"""
# Create save directory if it doesn't exist
if not os.path.exists(save_dir):
os.makedirs(save_dir)
print(f"Created directory: {save_dir}")
# Search for images using duckduckgo-search
print(f"Searching for '{query}' on DuckDuckGo...")
try:
# Initialize DDGS
ddgs = DDGS()
# Get image results
image_results = list(ddgs.images(
query,
region="wt-wt", # Worldwide
safesearch="off",
size=None, # Any size
color=None, # Any color
type_image=None, # Any type
layout=None, # Any layout
license_image=None, # Any license
max_results=max_images
))
except Exception as e:
print(f"Error searching for images: {e}")
sys.exit(1)
if not image_results:
print("No images found for the query.")
return
print(f"Found {len(image_results)} images. Starting download process...")
# Download and display images one by one
for i, image_data in enumerate(image_results, 1):
try:
# Get image URL
image_url = image_data.get("image")
if not image_url:
print(f"Image {i}: No URL found, skipping")
continue
# Create filename from query
filename = f"{query.replace(' ', '_')}_{i}.jpg"
filepath = os.path.join(save_dir, filename)
# Download image
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(image_url, headers=headers, timeout=10)
response.raise_for_status() # Raise exception for HTTP errors
# Open image
img = Image.open(BytesIO(response.content))
# Display image and metadata
plt.figure(figsize=(10, 8))
plt.imshow(img)
image_title = image_data.get('title', 'Unknown title')
plt.title(f"File: {filename}\nSize: {img.width}x{img.height}\nTitle: {image_title}")
plt.axis('off')
plt.tight_layout()
plt.show(block=False)
# Ask user if they want to keep the image
while True:
choice = input(f"Keep image '{filename}'? (y/n/q to quit): ").lower()
if choice == 'y':
# Save the image
img.save(filepath)
print(f"Saved: {filepath}")
break
elif choice == 'n':
print(f"Skipped: {filename}")
break
elif choice == 'q':
print("Quitting...")
return
else:
print("Please enter 'y' to keep, 'n' to skip, or 'q' to quit.")
# Close the figure
plt.close()
# Add a small delay to avoid being blocked
time.sleep(0.5)
except requests.exceptions.RequestException as e:
print(f"Failed to download image {i}: {e}")
continue
except Exception as e:
print(f"Failed to process image {i}: {e}")
continue
except Exception as e:
print(f"Error processing image {i}: {e}")
print(f"\nFinished downloading images for '{query}'")
def main():
# Set up argument parser
parser = argparse.ArgumentParser(description='Interactive image downloader using DuckDuckGo')
parser.add_argument('query', help='Search query for images (e.g., "concorde pear photos")')
parser.add_argument('--save-dir', default='downloaded_images', help='Directory to save approved images')
parser.add_argument('--max-images', type=int, default=50, help='Maximum number of images to download')
# Parse arguments
args = parser.parse_args()
# Download images
download_images(args.query, args.save_dir, args.max_images)
if __name__ == "__main__":
main()
|