Spaces:
Paused
Paused
File size: 1,156 Bytes
3bebb65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
def calculate_CLIP_score(image, text):
words = text.split(";")
words = [w.strip() for w in words]
words = list(filter(None, words))
if len(words) == 0:
return dict()
inputs = processor(text=words, images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image.detach().numpy()
results_dict = {
label: score / 100.0
for label, score in zip(words, logits_per_image[0])
}
return results_dict
examples = [
["images/two_dogs.jpg", "two dogs playing in the beach; a dog and a dog playing in the beach; beach"],
["images/horse_field.jpg", "horse standing in a field; a field; a horse standing"],
["images/human.jpg", "a man beside a river; a riverbed; a man"]
]
demo = gr.Interface(
fn=calculate_CLIP_score,
inputs=["image", "text"],
outputs="label",
examples=examples,
)
demo.launch() |