Spaces:

vlttnv
/

pear-classifier

Sleeping

File size: 4,829 Bytes

a43e23c

# Thanks to Claude!

import os
import sys
import argparse
import requests
import time
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
from duckduckgo_search import DDGS

def download_images(query, save_dir, max_images=50):
    """
    Download images based on search query with interactive approval using DuckDuckGo.

    Args:
        query (str): Search query for images
        save_dir (str): Directory to save approved images
        max_images (int): Maximum number of images to download
    """
    # Create save directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        print(f"Created directory: {save_dir}")

    # Search for images using duckduckgo-search
    print(f"Searching for '{query}' on DuckDuckGo...")

    try:
        # Initialize DDGS
        ddgs = DDGS()

        # Get image results
        image_results = list(ddgs.images(
            query,
            region="wt-wt",  # Worldwide
            safesearch="off",
            size=None,       # Any size
            color=None,      # Any color
            type_image=None, # Any type
            layout=None,     # Any layout
            license_image=None, # Any license
            max_results=max_images
        ))

    except Exception as e:
        print(f"Error searching for images: {e}")
        sys.exit(1)

    if not image_results:
        print("No images found for the query.")
        return

    print(f"Found {len(image_results)} images. Starting download process...")

    # Download and display images one by one
    for i, image_data in enumerate(image_results, 1):
        try:
            # Get image URL
            image_url = image_data.get("image")
            if not image_url:
                print(f"Image {i}: No URL found, skipping")
                continue

            # Create filename from query
            filename = f"{query.replace(' ', '_')}_{i}.jpg"
            filepath = os.path.join(save_dir, filename)

            # Download image
            try:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                }
                response = requests.get(image_url, headers=headers, timeout=10)
                response.raise_for_status()  # Raise exception for HTTP errors

                # Open image
                img = Image.open(BytesIO(response.content))

                # Display image and metadata
                plt.figure(figsize=(10, 8))
                plt.imshow(img)
                image_title = image_data.get('title', 'Unknown title')
                plt.title(f"File: {filename}\nSize: {img.width}x{img.height}\nTitle: {image_title}")
                plt.axis('off')
                plt.tight_layout()
                plt.show(block=False)

                # Ask user if they want to keep the image
                while True:
                    choice = input(f"Keep image '{filename}'? (y/n/q to quit): ").lower()
                    if choice == 'y':
                        # Save the image
                        img.save(filepath)
                        print(f"Saved: {filepath}")
                        break
                    elif choice == 'n':
                        print(f"Skipped: {filename}")
                        break
                    elif choice == 'q':
                        print("Quitting...")
                        return
                    else:
                        print("Please enter 'y' to keep, 'n' to skip, or 'q' to quit.")

                # Close the figure
                plt.close()

                # Add a small delay to avoid being blocked
                time.sleep(0.5)

            except requests.exceptions.RequestException as e:
                print(f"Failed to download image {i}: {e}")
                continue
            except Exception as e:
                print(f"Failed to process image {i}: {e}")
                continue

        except Exception as e:
            print(f"Error processing image {i}: {e}")

    print(f"\nFinished downloading images for '{query}'")

def main():
    # Set up argument parser
    parser = argparse.ArgumentParser(description='Interactive image downloader using DuckDuckGo')
    parser.add_argument('query', help='Search query for images (e.g., "concorde pear photos")')
    parser.add_argument('--save-dir', default='downloaded_images', help='Directory to save approved images')
    parser.add_argument('--max-images', type=int, default=50, help='Maximum number of images to download')

    # Parse arguments
    args = parser.parse_args()

    # Download images
    download_images(args.query, args.save_dir, args.max_images)

if __name__ == "__main__":
    main()