|
import streamlit as st |
|
|
|
|
|
from PIL import Image |
|
import requests |
|
|
|
from transformers import CLIPProcessor, CLIPModel |
|
|
|
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") |
|
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") |
|
|
|
url = "http://images.cocodataset.org/val2017/000000039769.jpg" |
|
image = Image.open(requests.get(url, stream=True).raw) |
|
|
|
inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True) |
|
|
|
outputs = model(**inputs) |
|
st.write(outputs) |
|
|