import gradio as gr import cv2 import numpy as np import torch import torch.nn as nn import torchvision.models as models import einops def predict(img): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = models.resnet50() model.fc = nn.Linear(2048, 720) resume_path = 'full+++++.pth' model.load_state_dict(torch.load(resume_path, map_location=torch.device(device))) model.to(device) with torch.no_grad(): model.eval() img = cv2.resize(img, (224, 224))/255. img = np.stack([einops.rearrange(img, 'h w c -> c h w')], 0) img = torch.Tensor(img).float().to(device) pred = model(img) max_pred = torch.argsort(pred, dim=1, descending=True) max_h = (max_pred[0][0] // 60).item() max_m = (max_pred[0][0] % 60).item() return '{}:{}'.format(str(max_h), str(max_m).zfill(2)) inputs = gr.inputs.Image() io = gr.Interface( fn=predict, description='Note that this model ingests clocks that are already cropped, i.e. we do not run object detection.', title='It\'s About Time: Analog Clock Reading in the Wild', inputs=inputs, examples=['d1.png', 'd2.png'], outputs="text", ) io.launch(share=True)