import torch
import gradio as gr
import json
from torchvision import transforms
import torch.nn.functional as F

TORCHSCRIPT_PATH = "res/screensim-resnet-uda+web350k.torchscript"
IMG_SIZE = (256, 128)

model = torch.jit.load(TORCHSCRIPT_PATH)
    
img_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
    
def predict(img1, img2, thresh=0.35):
    img_input1 = img_transforms(img1).unsqueeze(0)
    img_input2 = img_transforms(img2).unsqueeze(0)
    diff = torch.linalg.norm(model(img_input1) - model(img_input2))

    return "{:.3f}".format(diff), "same screen" if float(diff) < thresh else "different screens"
    
example_imgs = [
    ["res/example_pair1.jpg", "res/example_pair2.jpg", 0.35],
    ["res/example_pair1.jpg", "res/example.jpg", 0.35]
]

interface = gr.Interface(fn=predict, inputs=[gr.Image(type="pil"), gr.Image(type="pil"), gr.Slider(0.2, 0.5, step=0.05, value=0.35)], outputs=["text", "text"], examples=example_imgs)

interface.launch()