Spaces:
Sleeping
Sleeping
# Thanks to Claude! | |
import os | |
import sys | |
import argparse | |
import requests | |
import time | |
from PIL import Image | |
from io import BytesIO | |
import matplotlib.pyplot as plt | |
from duckduckgo_search import DDGS | |
def download_images(query, save_dir, max_images=50): | |
""" | |
Download images based on search query with interactive approval using DuckDuckGo. | |
Args: | |
query (str): Search query for images | |
save_dir (str): Directory to save approved images | |
max_images (int): Maximum number of images to download | |
""" | |
# Create save directory if it doesn't exist | |
if not os.path.exists(save_dir): | |
os.makedirs(save_dir) | |
print(f"Created directory: {save_dir}") | |
# Search for images using duckduckgo-search | |
print(f"Searching for '{query}' on DuckDuckGo...") | |
try: | |
# Initialize DDGS | |
ddgs = DDGS() | |
# Get image results | |
image_results = list(ddgs.images( | |
query, | |
region="wt-wt", # Worldwide | |
safesearch="off", | |
size=None, # Any size | |
color=None, # Any color | |
type_image=None, # Any type | |
layout=None, # Any layout | |
license_image=None, # Any license | |
max_results=max_images | |
)) | |
except Exception as e: | |
print(f"Error searching for images: {e}") | |
sys.exit(1) | |
if not image_results: | |
print("No images found for the query.") | |
return | |
print(f"Found {len(image_results)} images. Starting download process...") | |
# Download and display images one by one | |
for i, image_data in enumerate(image_results, 1): | |
try: | |
# Get image URL | |
image_url = image_data.get("image") | |
if not image_url: | |
print(f"Image {i}: No URL found, skipping") | |
continue | |
# Create filename from query | |
filename = f"{query.replace(' ', '_')}_{i}.jpg" | |
filepath = os.path.join(save_dir, filename) | |
# Download image | |
try: | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
response = requests.get(image_url, headers=headers, timeout=10) | |
response.raise_for_status() # Raise exception for HTTP errors | |
# Open image | |
img = Image.open(BytesIO(response.content)) | |
# Display image and metadata | |
plt.figure(figsize=(10, 8)) | |
plt.imshow(img) | |
image_title = image_data.get('title', 'Unknown title') | |
plt.title(f"File: {filename}\nSize: {img.width}x{img.height}\nTitle: {image_title}") | |
plt.axis('off') | |
plt.tight_layout() | |
plt.show(block=False) | |
# Ask user if they want to keep the image | |
while True: | |
choice = input(f"Keep image '{filename}'? (y/n/q to quit): ").lower() | |
if choice == 'y': | |
# Save the image | |
img.save(filepath) | |
print(f"Saved: {filepath}") | |
break | |
elif choice == 'n': | |
print(f"Skipped: {filename}") | |
break | |
elif choice == 'q': | |
print("Quitting...") | |
return | |
else: | |
print("Please enter 'y' to keep, 'n' to skip, or 'q' to quit.") | |
# Close the figure | |
plt.close() | |
# Add a small delay to avoid being blocked | |
time.sleep(0.5) | |
except requests.exceptions.RequestException as e: | |
print(f"Failed to download image {i}: {e}") | |
continue | |
except Exception as e: | |
print(f"Failed to process image {i}: {e}") | |
continue | |
except Exception as e: | |
print(f"Error processing image {i}: {e}") | |
print(f"\nFinished downloading images for '{query}'") | |
def main(): | |
# Set up argument parser | |
parser = argparse.ArgumentParser(description='Interactive image downloader using DuckDuckGo') | |
parser.add_argument('query', help='Search query for images (e.g., "concorde pear photos")') | |
parser.add_argument('--save-dir', default='downloaded_images', help='Directory to save approved images') | |
parser.add_argument('--max-images', type=int, default=50, help='Maximum number of images to download') | |
# Parse arguments | |
args = parser.parse_args() | |
# Download images | |
download_images(args.query, args.save_dir, args.max_images) | |
if __name__ == "__main__": | |
main() | |