ZoomLocation / app.py
greencatted's picture
Use Transformers
17f7d43
raw
history blame
660 Bytes
import streamlit as st
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = AutoModelForImageTextToText.from_pretrained("Salesforce/blip-image-captioning-large")
enable = st.checkbox("Enable camera")
picture = st.camera_input("Take a picture", disabled=not enable)
if picture:
raw_image = Image.open(picture)
# conditional image captioning
text = "A view of a person in"
inputs = processor(raw_image, text, return_tensors="pt")
out = model.generate(**inputs)
st.write(processor.decode(out[0], skip_special_tokens=True))