CLIP / app.py
TharunSivamani's picture
initial commit
3bebb65
raw
history blame
No virus
1.16 kB
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
def calculate_CLIP_score(image, text):
words = text.split(";")
words = [w.strip() for w in words]
words = list(filter(None, words))
if len(words) == 0:
return dict()
inputs = processor(text=words, images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image.detach().numpy()
results_dict = {
label: score / 100.0
for label, score in zip(words, logits_per_image[0])
}
return results_dict
examples = [
["images/two_dogs.jpg", "two dogs playing in the beach; a dog and a dog playing in the beach; beach"],
["images/horse_field.jpg", "horse standing in a field; a field; a horse standing"],
["images/human.jpg", "a man beside a river; a riverbed; a man"]
]
demo = gr.Interface(
fn=calculate_CLIP_score,
inputs=["image", "text"],
outputs="label",
examples=examples,
)
demo.launch()