Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
import requests | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
# Load the BLIP model | |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") | |
# Streamlit app | |
st.title("Image Captioning with BLIP") | |
# Uploading the image | |
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_image is not None: | |
image = Image.open(uploaded_image).convert('RGB') | |
st.image(image, caption='Uploaded Image', use_column_width=True) | |
# Perform conditional image captioning | |
captioning_mode = st.selectbox("Captioning Mode", ["Conditional", "Unconditional"]) | |
if captioning_mode == "Conditional": | |
text = st.text_input("Provide a condition for the captioning (e.g., 'a photo of', 'an illustration of'): ", "a photo of") | |
if text: # Only proceed if the user has provided a text | |
inputs = processor(image, text, return_tensors="pt") | |
out = model.generate(**inputs) | |
caption = processor.decode(out[0], skip_special_tokens=True) | |
st.write(f"Generated Caption: {caption}") | |
else: # Unconditional captioning | |
inputs = processor(image, return_tensors="pt") | |
out = model.generate(**inputs) | |
caption = processor.decode(out[0], skip_special_tokens=True) | |
st.write(f"Generated Caption: {caption}") | |