|
import gradio as gr |
|
from transformers import CLIPModel, AutoTokenizer, RawImage |
|
|
|
|
|
model = CLIPModel.from_pretrained("Xenova/mobileclip_blt") |
|
tokenizer = AutoTokenizer.from_pretrained("Xenova/mobileclip_blt") |
|
|
|
|
|
def compute_probability(image): |
|
|
|
image = RawImage.read(image) |
|
image_inputs = processor(image) |
|
image_embeds = vision_model(image_inputs) |
|
normalized_image_embeds = image_embeds.normalize().tolist() |
|
|
|
|
|
text_inputs = tokenizer(["cats", "dogs", "birds"], padding="max_length", truncation=True) |
|
text_embeds = model(text_inputs) |
|
normalized_text_embeds = text_embeds.normalize().tolist() |
|
|
|
probabilities = [softmax([dot(x, y) for y in normalized_text_embeds])[0] for x in normalized_image_embeds] |
|
|
|
return {"probability": probabilities[0]} |
|
|
|
|
|
iface = gr.Interface( |
|
fn=compute_probability, |
|
inputs="image", |
|
outputs="text", |
|
title="CLIP Probability", |
|
description="Upload an image and get the probability scores!" |
|
) |
|
|
|
|
|
iface.launch() |