import gradio as gr
import numpy as np
from PIL import Image
from transformers import pipeline

# Create the pipeline object
pipe = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")

# Define the function that will be used by the interface
def zero_shot_classification(image, labels_text):
    # Convert image to a PIL image object
    pil_image = Image.fromarray(np.uint8(image)).convert("RGB")
    
    # Split the labels text into a list of labels
    labels = labels_text.split(",")
    
    # Use the pipeline to classify the image with the given labels
    res = pipe(
        images=pil_image, 
        candidate_labels=labels,
        hypothesis_template= "This is a photo of a {}"
    )
    
    # Return a dictionary mapping labels to scores
    return {dic["label"]: dic["score"] for dic in res}
    
# Create the interface
iface = gr.Interface(
    zero_shot_classification, 
    ["image", "text"], 
    "label", 
    examples=[
        ["corn.jpg", "corn,wheat,rice"],
    ],
    description="Please add a picture and a list of labels separated by commas to see the zero-shot classification capabilities",
    title="Zero-shot Image Classification"
)

# Launch the interface
iface.launch()