import torch
import gradio as gr

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_convnets_processing_utils')

efficientnet.eval().to(device)

def inference(img):

  img_transforms = transforms.Compose(
                [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()]
            )

  img = img_transforms(img)
  with torch.no_grad():
    # mean and std are not multiplied by 255 as they are in training script
    # torch dataloader reads data into bytes whereas loading directly
    # through PIL creates a tensor with floats in [0,1] range
    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
    img = img.float()
    img = img.unsqueeze(0).sub_(mean).div_(std)

  batch = torch.cat(
    [img]
  ).to(device)
  with torch.no_grad():
      output = torch.nn.functional.softmax(efficientnet(batch), dim=1)
      
    
  results = utils.pick_n_best(predictions=output, n=5)
  
  return results
  
gr.Interface(inference,gr.inputs.Image(type="file"),"text").launch()