Spaces:
Building
Building
import gradio as gr | |
from bing_image_downloader import downloader | |
import os | |
import requests | |
from bs4 import BeautifulSoup | |
from urllib.parse import quote | |
import urllib.request | |
import re | |
import time | |
# Configuration des headers HTTP | |
HEADERS = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', | |
'Accept-Language': 'en-US,en;q=0.5' | |
} | |
# Téléchargement via Bing | |
def download_bing_images(search_query, limit, adult_filter_off): | |
try: | |
# Ici, adult_filter_off est True si le mode sécurisé est désactivé ("Off") | |
downloader.download( | |
search_query, | |
limit=limit, | |
adult_filter_off=adult_filter_off, | |
force_replace=False, | |
timeout=60, | |
filter_type='photo' | |
) | |
output_dir = os.path.join('dataset', search_query) | |
return get_image_paths(output_dir) | |
except Exception as e: | |
print(f"Erreur Bing : {str(e)}") | |
return [] | |
# Téléchargement via Google | |
def download_google_images(search_query, limit): | |
try: | |
output_dir = os.path.join('dataset', f'google_{search_query}') | |
os.makedirs(output_dir, exist_ok=True) | |
url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch" | |
response = requests.get(url, headers=HEADERS) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
scripts = soup.find_all('script') | |
image_urls = [] | |
pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"' | |
for script in scripts: | |
if 'AF_initDataCallback' in script.text: | |
matches = re.findall(pattern, script.text) | |
image_urls.extend(matches) | |
image_urls = list(set(image_urls))[:limit] | |
return download_and_save(image_urls, output_dir) | |
except Exception as e: | |
print(f"Erreur Google : {str(e)}") | |
return [] | |
# Téléchargement et sauvegarde des images depuis une liste d'URLs | |
def download_and_save(urls, output_dir): | |
saved_paths = [] | |
for idx, url in enumerate(urls): | |
try: | |
filename = f"image_{idx+1}_{int(time.time())}.jpg" | |
full_path = os.path.join(output_dir, filename) | |
req = urllib.request.Request(url, headers=HEADERS) | |
with urllib.request.urlopen(req, timeout=10) as response: | |
with open(full_path, 'wb') as f: | |
f.write(response.read()) | |
saved_paths.append(full_path) | |
except Exception as e: | |
print(f"Erreur téléchargement {url} : {str(e)}") | |
return saved_paths | |
# Récupérer les chemins des images dans un dossier | |
def get_image_paths(directory): | |
if os.path.exists(directory): | |
return [os.path.join(directory, f) for f in os.listdir(directory) | |
if f.lower().endswith(('png', 'jpg', 'jpeg'))] | |
return [] | |
# Fonction principale appelée par l'interface Gradio | |
def download_handler(source, query, limit, safe_mode): | |
limit = max(1, min(limit, 100)) | |
try: | |
if source == "Bing": | |
# Si safe_mode est "Off", le filtre est désactivé (adult_filter_off=True) | |
image_paths = download_bing_images(query, limit, safe_mode == "Off") | |
elif source == "Google": | |
image_paths = download_google_images(query, limit) | |
else: | |
image_paths = [] | |
status_msg = f"{len(image_paths)} image(s) téléchargée(s)." if image_paths else "Aucune image téléchargée." | |
return image_paths, status_msg | |
except Exception as e: | |
print(f"Erreur globale : {str(e)}") | |
return [], f"Erreur: {str(e)}" | |
# Création de l'interface Gradio avec Blocks | |
with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app: | |
gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources") | |
gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)") | |
with gr.Row(): | |
source = gr.Radio(["Bing", "Google"], label="Source", value="Bing") | |
query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...") | |
limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images") | |
safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On") | |
submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary") | |
gallery = gr.Gallery(label="Résultats", columns=5, object_fit="contain", height="auto") | |
status = gr.Textbox(label="Statut", interactive=False) | |
submit_btn.click( | |
fn=download_handler, | |
inputs=[source, query, limit, safe_mode], | |
outputs=[gallery, status], | |
api_name="download" | |
) | |
gr.Examples( | |
examples=[ | |
["Bing", "chatons mignons", 10, "On"], | |
["Google", "paysages montagneux", 15, "On"] | |
], | |
inputs=[source, query, limit, safe_mode] | |
) | |
if __name__ == "__main__": | |
# Récupère le port depuis la variable d'environnement (nécessaire pour Hugging Face Spaces) | |
port = int(os.environ.get("PORT", 7860)) | |
app.launch(server_name="0.0.0.0", server_port=port, show_error=True) | |