|
import gradio as gr |
|
from transformers import CLIPProcessor, CLIPModel |
|
|
|
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") |
|
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") |
|
|
|
|
|
def calculate_score(image, text): |
|
labels = text.split(";") |
|
labels = [l.strip() for l in labels] |
|
labels = list(filter(None, labels)) |
|
if len(labels) == 0: |
|
return dict() |
|
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True) |
|
outputs = model(**inputs) |
|
logits_per_image = outputs.logits_per_image.detach().numpy() |
|
|
|
results_dict = { |
|
label: score / 100.0 for label, score in zip(labels, logits_per_image[0]) |
|
} |
|
return results_dict |
|
|
|
|
|
if __name__ == "__main__": |
|
cat_example = [ |
|
"cat.jpg", |
|
"a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void", |
|
] |
|
|
|
demo = gr.Interface( |
|
fn=calculate_score, |
|
inputs=["image", "text"], |
|
outputs="label", |
|
examples=[cat_example], |
|
allow_flagging="never", |
|
description="# CLIP Score", |
|
article="Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text", |
|
cache_examples=True, |
|
) |
|
|
|
demo.launch() |
|
|