from transformers import pipeline import streamlit as st from PIL import Image import requests from io import BytesIO # Initialize the pipeline image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") st.title('Image Captioning Application') # Function to load images from URL def load_image_from_url(url): try: response = requests.get(url) img = Image.open(BytesIO(response.content)) return img except Exception as e: st.error(f"Error loading image from URL: {e}") return None # User option to select input type: Upload or URL input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Text")) if input_type == "Upload Image": uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption='Uploaded Image', use_column_width=True) elif input_type == "Image URL": image_url = st.text_input("Enter the image URL here:", "") if image_url: image = load_image_from_url(image_url) if image: st.image(image, caption='Image from URL', use_column_width=True) elif input_type == "Text": text_input = st.text_input("Enter text here:", "") if text_input: st.image(image, caption='Image from URL', use_column_width=True) # Generate caption button if st.button('Generate Caption'): if not image: if not text_input: st.warning("Please upload an image, enter an image URL or input text") else: result = text_input else: with st.spinner("Generating caption..."): # Process the image and generate caption if input_type == "Upload Image": # Save the uploaded image to a temporary file to pass its path to the model with open("temp_image.jpg", "wb") as f: f.write(uploaded_file.getbuffer()) result = image_to_text("temp_image.jpg") elif input_type == "Image URL" and image_url: result = image_to_text(image_url) if result: generated_text = result[0]['generated_text'] st.success(f'Generated Caption: {generated_text}') else: st.error("Failed to generate caption.")