import gradio as gr
import torch
import requests

from timm import create_model
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

title = "Image Classifier Four -- Timm Resnet-50"
description = """This machine has vision. It can see objects and concepts in an image. To test the machine, upload or drop an image, submit and read the results. The results comprise a list of words that the machine sees in the image. Beside a word, the length of the bar indicates the confidence with which the machine sees the word. The longer the bar, the more confident the machine is.
"""
article = "This app was made by following [this guys' space](https://huggingface.co/spaces/nateraw/gradio-demo)."

IMAGENET_1K_URL = "https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt"
LABELS = requests.get(IMAGENET_1K_URL).text.strip().split("\n")
model = create_model('resnet50', pretrained=True)
transform = create_transform(
    **resolve_data_config({}, model=model)
)  

model.eval()

def predict_fn(img):
  img = img.convert('RGB')
  img = transform(img).unsqueeze(0)
  with torch.no_grad():
    out = model(img)
    
  probabilities = torch.nn.functional.softmax(out[0], dim=0)
  values, indices = torch.topk(probabilities, k=3)
  return {LABELS[i]: v.item() for i, v in zip(indices, values)}
  
gr.Interface(predict_fn, 
             gr.inputs.Image(type='pil'), 
             outputs='label',
             title = title, 
             description = description, 
             article = article).launch()