CLIP / app.py
VarunSivamani's picture
added app file
c9bbb53
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
def CLIP_calculate_score(image, text):
words = text.split(";")
words = [w.strip() for w in words]
words = list(filter(None, words))
if len(words) == 0:
return dict()
inputs = processor(text=words, images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image.detach().numpy()
results_dict = {
label: score / 100.0
for label, score in zip(words, logits_per_image[0])
}
return results_dict
examples = [
["images/girl_and_dog.jpg", "a dog playing in the beach; a dog and a girl playing in the beach; a girl playing in the beach"],
["images/horse.jpg", "group of horses running; a dog playing; a horse standing"],
["images/man_and_cat.jpg", "a man and a cat listening to music; a cat; a man"]
]
demo = gr.Interface(
fn=CLIP_calculate_score,
inputs=["image", "text"],
outputs="label",
examples=examples,
# allow_flagging="never",
# cache_examples=True,
)
demo.launch()