import requests import gradio as gr import torch from timm import create_model from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform # url for accesing the image DB IMAGENET_1k_URL = "https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt" # fetching labels from the URL LABELS = requests.get(IMAGENET_1k_URL).text.strip().split('\n') #using a pretrained resnet50 model model = create_model('resnet50',pretrained=True) transform = create_transform(**resolve_data_config({},model=model)) # we do not need to train model , hence using model.eval() to use it only for inference model.eval() # declaring the main fn. for returning the prediction from our model # we use softmax, to take probabilities of the outputs. def predict(img): img = img.convert('RGB') img = transform(img).unsqueeze(0) with torch.no_grad(): out= model(img) probability = torch.nn.functional.softmax(out[0],dim=0) values, indices = torch.topk(probability,k=5) return {LABELS[i]: v.item() for i,v in zip(indices,values)} iface = gr.Interface(fn=predict, inputs=gr.inputs.Image(type='pil'), outputs="label").launch() iface.launch()