Spaces:
Paused
Paused
import gradio as gr | |
from transformers import CLIPProcessor, CLIPModel | |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") | |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") | |
def calculate_CLIP_score(image, text): | |
words = text.split(";") | |
words = [w.strip() for w in words] | |
words = list(filter(None, words)) | |
if len(words) == 0: | |
return dict() | |
inputs = processor(text=words, images=image, return_tensors="pt", padding=True) | |
outputs = model(**inputs) | |
logits_per_image = outputs.logits_per_image.detach().numpy() | |
results_dict = { | |
label: score / 100.0 | |
for label, score in zip(words, logits_per_image[0]) | |
} | |
return results_dict | |
examples = [ | |
["images/two_dogs.jpg", "two dogs playing in the beach; a dog and a dog playing in the beach; beach"], | |
["images/horse_field.jpg", "horse standing in a field; a field; a horse standing"], | |
["images/human.jpg", "a man beside a river; a riverbed; a man"] | |
] | |
demo = gr.Interface( | |
fn=calculate_CLIP_score, | |
inputs=["image", "text"], | |
outputs="label", | |
examples=examples, | |
) | |
demo.launch() |