Dendup's picture
Update app.py
0aca14b verified
import streamlit as st
from PIL import Image
import requests
from transformers import BlipProcessor, BlipForConditionalGeneration
# Load the BLIP model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
# Streamlit app
st.title("Image Captioning with BLIP")
# Uploading the image
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
image = Image.open(uploaded_image).convert('RGB')
st.image(image, caption='Uploaded Image', use_column_width=True)
# Perform conditional image captioning
captioning_mode = st.selectbox("Captioning Mode", ["Conditional", "Unconditional"])
if captioning_mode == "Conditional":
text = st.text_input("Provide a condition for the captioning (e.g., 'a photo of', 'an illustration of'): ", "a photo of")
if text: # Only proceed if the user has provided a text
inputs = processor(image, text, return_tensors="pt")
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
st.write(f"Generated Caption: {caption}")
else: # Unconditional captioning
inputs = processor(image, return_tensors="pt")
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
st.write(f"Generated Caption: {caption}")