|
import gradio as gr |
|
from transformers import CLIPProcessor, CLIPModel |
|
|
|
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") |
|
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") |
|
|
|
|
|
def calculate_score(image, text): |
|
labels = text.split(';') |
|
labels = [l.strip() for l in labels] |
|
labels = list(filter(None, labels)) |
|
if len(labels) == 0 : |
|
return dict() |
|
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True) |
|
outputs = model(**inputs) |
|
logits_per_image = outputs.logits_per_image.detach().numpy() |
|
|
|
results_dict = {label:score/100.0 for label,score in zip(labels, logits_per_image[0])} |
|
return results_dict |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = gr.Interface(fn=calculate_score, inputs=["image", "text"], outputs="label") |
|
demo.launch() |