import gradio as gr
import torch
import open_clip
import joblib

def predict(input_image):
    if input_image is None:
        return "No image submitted..."
    
    device = torch.device("cpu")
    # when running on GPUs: 
    # device = torch.device("cuda")
    
    model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai', device=device)

    image = preprocess(input_image).unsqueeze(0).to(device)

    with torch.amp.autocast(device_type=device.type):
        with torch.no_grad():
            image_features = model.encode_image(image)
            image_features /= image_features.norm(dim=-1, keepdim=True)

    embedding = image_features[0].cpu().float().numpy()

    model = joblib.load('model.pkl')
    result = model.predict([embedding])

    return "Map" if result == 1 else "No map"

demo = gr.Interface(fn=predict, 
                    inputs=gr.Image(label="Input image", type="pil"), 
                    outputs="text",
                    title="MapPool model", 
                    description="The model predicts whether an image is a map or not. It takes about 30 seconds since it runs on a CPU (it is much faster on a GPU). Although the validation accuracy of the model is 98.5%, some outputs may not be correct. In this case, feel free to contact <a href='https://schnuerer.dev/contact'>me</a>.", 
                    article="More information: <a href='https://huggingface.co/datasets/sraimund/MapPool'>MapPool - Bubbling up an extremely large corpus of maps for AI</a><br>Keywords: map identification, map recognition, map classification")
demo.launch()