davidegato1's picture
Update app.py
5be0243 verified
import gradio as gr
from bing_image_downloader import downloader
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import urllib.request
import re
import time
# Configuration des headers HTTP
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.5'
}
# Téléchargement via Bing
def download_bing_images(search_query, limit, adult_filter_off):
try:
# Ici, adult_filter_off est True si le mode sécurisé est désactivé ("Off")
downloader.download(
search_query,
limit=limit,
adult_filter_off=adult_filter_off,
force_replace=False,
timeout=60,
filter_type='photo'
)
output_dir = os.path.join('dataset', search_query)
return get_image_paths(output_dir)
except Exception as e:
print(f"Erreur Bing : {str(e)}")
return []
# Téléchargement via Google
def download_google_images(search_query, limit):
try:
output_dir = os.path.join('dataset', f'google_{search_query}')
os.makedirs(output_dir, exist_ok=True)
url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
scripts = soup.find_all('script')
image_urls = []
pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"'
for script in scripts:
if 'AF_initDataCallback' in script.text:
matches = re.findall(pattern, script.text)
image_urls.extend(matches)
image_urls = list(set(image_urls))[:limit]
return download_and_save(image_urls, output_dir)
except Exception as e:
print(f"Erreur Google : {str(e)}")
return []
# Téléchargement et sauvegarde des images depuis une liste d'URLs
def download_and_save(urls, output_dir):
saved_paths = []
for idx, url in enumerate(urls):
try:
filename = f"image_{idx+1}_{int(time.time())}.jpg"
full_path = os.path.join(output_dir, filename)
req = urllib.request.Request(url, headers=HEADERS)
with urllib.request.urlopen(req, timeout=10) as response:
with open(full_path, 'wb') as f:
f.write(response.read())
saved_paths.append(full_path)
except Exception as e:
print(f"Erreur téléchargement {url} : {str(e)}")
return saved_paths
# Récupérer les chemins des images dans un dossier
def get_image_paths(directory):
if os.path.exists(directory):
return [os.path.join(directory, f) for f in os.listdir(directory)
if f.lower().endswith(('png', 'jpg', 'jpeg'))]
return []
# Fonction principale appelée par l'interface Gradio
def download_handler(source, query, limit, safe_mode):
limit = max(1, min(limit, 100))
try:
if source == "Bing":
# Si safe_mode est "Off", le filtre est désactivé (adult_filter_off=True)
image_paths = download_bing_images(query, limit, safe_mode == "Off")
elif source == "Google":
image_paths = download_google_images(query, limit)
else:
image_paths = []
status_msg = f"{len(image_paths)} image(s) téléchargée(s)." if image_paths else "Aucune image téléchargée."
return image_paths, status_msg
except Exception as e:
print(f"Erreur globale : {str(e)}")
return [], f"Erreur: {str(e)}"
# Création de l'interface Gradio avec Blocks
with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app:
gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources")
gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)")
with gr.Row():
source = gr.Radio(["Bing", "Google"], label="Source", value="Bing")
query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...")
limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images")
safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On")
submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary")
gallery = gr.Gallery(label="Résultats", columns=5, object_fit="contain", height="auto")
status = gr.Textbox(label="Statut", interactive=False)
submit_btn.click(
fn=download_handler,
inputs=[source, query, limit, safe_mode],
outputs=[gallery, status],
api_name="download"
)
gr.Examples(
examples=[
["Bing", "chatons mignons", 10, "On"],
["Google", "paysages montagneux", 15, "On"]
],
inputs=[source, query, limit, safe_mode]
)
if __name__ == "__main__":
# Récupère le port depuis la variable d'environnement (nécessaire pour Hugging Face Spaces)
port = int(os.environ.get("PORT", 7860))
app.launch(server_name="0.0.0.0", server_port=port, show_error=True)