Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
from transformers import AutoProcessor, AutoModelForImageTextToText | |
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
model = AutoModelForImageTextToText.from_pretrained("Salesforce/blip-image-captioning-large") | |
enable = st.checkbox("Enable camera") | |
picture = st.camera_input("Take a picture", disabled=not enable) | |
if picture: | |
raw_image = Image.open(picture) | |
# conditional image captioning | |
text = "A view of a person in" | |
inputs = processor(raw_image, text, return_tensors="pt") | |
out = model.generate(**inputs) | |
st.write(processor.decode(out[0], skip_special_tokens=True)) |