import torch import gradio as gr import torchvision.transforms as transforms device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True) utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_convnets_processing_utils') efficientnet.eval().to(device) def inference(img): img_transforms = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()] ) img = img_transforms(img) with torch.no_grad(): # mean and std are not multiplied by 255 as they are in training script # torch dataloader reads data into bytes whereas loading directly # through PIL creates a tensor with floats in [0,1] range mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) img = img.float() img = img.unsqueeze(0).sub_(mean).div_(std) batch = torch.cat( [img] ).to(device) with torch.no_grad(): output = torch.nn.functional.softmax(efficientnet(batch), dim=1) results = utils.pick_n_best(predictions=output, n=5) return results gr.Interface(inference,gr.inputs.Image(type="file"),"text").launch()