# app.py import streamlit as st from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import requests from io import BytesIO from deep_translator import GoogleTranslator import torch # Streamlit page config st.set_page_config(page_title="AI Image Caption Generator", page_icon="🖼️") st.title("🖼️ AI Image Caption Generator") st.write("Upload an image or paste a Google Image URL to get multiple captions generated in your preferred language!") # Language options languages = { "English": "en", "Urdu": "ur", "Hindi": "hi", "French": "fr", "Spanish": "es", "Arabic": "ar" } # Image input options upload_option = st.radio("Choose Image Input Method:", ("Upload from Computer", "Paste Image URL")) uploaded_file = None image_url = None if upload_option == "Upload from Computer": uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) else: image_url = st.text_input("Paste Image URL (must be direct link ending with .jpg/.png/.jpeg)") # Language selection selected_language = st.selectbox("🌐 Choose Output Language", list(languages.keys())) # Load image based on input image = None if uploaded_file: image = Image.open(uploaded_file).convert('RGB') elif image_url: try: response = requests.get(image_url) if response.status_code == 200: image = Image.open(BytesIO(response.content)).convert('RGB') else: st.error("Failed to fetch image. Please check the URL.") except Exception as e: st.error(f"Error fetching image: {e}") if image: st.image(image, caption="Selected Image", width=300) with st.spinner("Generating captions... please wait ⏳"): # Resize for model input resized_image = image.resize((384, 384)) processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") inputs = processor(resized_image, return_tensors="pt") # Generate multiple captions out = model.generate( **inputs, num_beams=5, num_return_sequences=3, max_length=50, early_stopping=True ) captions = [processor.decode(o, skip_special_tokens=True) for o in out] st.success("📝 Captions Generated Successfully!") st.subheader(f"Here are the captions in {selected_language}:") for idx, cap in enumerate(captions): try: translated_caption = GoogleTranslator(source='auto', target=languages[selected_language]).translate(cap) except Exception as e: translated_caption = f"(Translation Error: Showing English) {cap}" st.text_area(f"✏️ Caption {idx+1}", value=translated_caption, height=80) st.caption("Tip: You can edit, copy, or download captions easily! ✍️") else: st.info("Please upload an image or paste a valid URL to proceed.")