import gradio as gr
from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")


def calculate_score(image, text):
    labels = text.split(";")
    labels = [l.strip() for l in labels]
    labels = list(filter(None, labels))
    if len(labels) == 0:
        return dict()
    inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image.detach().numpy()

    results_dict = {
        label: score / 100.0 for label, score in zip(labels, logits_per_image[0])
    }
    return results_dict


if __name__ == "__main__":
    cat_example = [
        "cat.jpg",
        "a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void",
    ]

    demo = gr.Interface(
        fn=calculate_score,
        inputs=["image", "text"],
        outputs="label",
        examples=[cat_example],
        allow_flagging="never",
        description="# CLIP Score",
        article="Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text",
        cache_examples=True,
    )

    demo.launch()