import torch import gradio as gr import json from torchvision import transforms import torch.nn.functional as F TORCHSCRIPT_PATH = "res/screensim-resnet-uda+web350k.torchscript" IMG_SIZE = (256, 128) model = torch.jit.load(TORCHSCRIPT_PATH) img_transforms = transforms.Compose([ transforms.Resize(IMG_SIZE), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) def predict(img1, img2, thresh=0.35): img_input1 = img_transforms(img1).unsqueeze(0) img_input2 = img_transforms(img2).unsqueeze(0) diff = torch.linalg.norm(model(img_input1) - model(img_input2)) return "{:.3f}".format(diff), "same screen" if float(diff) < thresh else "different screens" example_imgs = [ ["res/example_pair1.jpg", "res/example_pair2.jpg", 0.35], ["res/example_pair1.jpg", "res/example.jpg", 0.35] ] interface = gr.Interface(fn=predict, inputs=[gr.Image(type="pil"), gr.Image(type="pil"), gr.Slider(0.2, 0.5, step=0.05, value=0.35)], outputs=["text", "text"], examples=example_imgs) interface.launch()