import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image

checkpoint = "openai/clip-vit-base-patch32"
classifier = pipeline(model=checkpoint, 
                      task="zero-shot-image-classification")

def shot(image, labels_text):
    if labels_text:
        labels = labels_text.split(";")
    else:
        labels = ['A page of printed text',
                 'A page of handwritten text',
                 'A blank page with no text',
                 'A cover of a book',
                 'A page of a book that contains a large illustration',
                 'A page that features a table with multiple columns and rows']
    results = classifier(image,
                         candidate_labels=labels)
    return {result["label"]: result["score"] for result in results}
    
demo = gr.Interface(shot, 
                    [gr.Image(type="pil"), 
                    gr.Textbox(
                        label="Labels",
                        info="Separated by a semicolon (;)",
                        lines=6,
                        value="""A page of printed text; 
A page of handwritten text;
A blank page with no text;
A cover of a book;
A page of a book that contains a large illustration;
A page that features a table with multiple columns and rows""",
                    )], 
                    outputs="label", 
                    examples=[['Journalsdateboo00DeanZ_0177.jpg',None], 
                              ["newmexicobotani00newmb_0084.jpg",None],
                              ["easternareacrui00natic_0004.jpg",None],
                              ["1945fieldnotesla00klau_0318.jpg",None],
                              ["sturmsfiguresofp01stur_0263.jpg",None]],
                    description="Upload an image of a scanned document page, or choose one of the examples below",
                    title="Zero-shot Image Classification of BHL Images")

demo.launch()