Spaces:
Runtime error
Runtime error
# general setup | |
import os | |
os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html') | |
os.system('pip install opencv-python') | |
# setup detectron2 logger | |
import torch, detectron2 | |
from detectron2.utils.logger import setup_logger | |
setup_logger() | |
# import some common libraries | |
import numpy as np | |
import os, json, cv2 | |
import pandas as pd | |
from PIL import Image | |
from torchvision import transforms | |
from torchvision import models | |
from torch import nn | |
# import some common detectron2 utilities | |
from detectron2 import model_zoo | |
from detectron2.engine import DefaultPredictor | |
from detectron2.config import get_cfg | |
from detectron2.utils.visualizer import Visualizer | |
from detectron2.data import MetadataCatalog, DatasetCatalog | |
# import gradio | |
import gradio as gr | |
# set device | |
DEVICE = 'cpu' | |
# load model | |
model = models.resnet18(pretrained=True) | |
num_features = model.fc.in_features | |
model.fc = nn.Linear(num_features, 5) | |
# insert trained paramters | |
model.load_state_dict(torch.load('model_modernity.pth', map_location=torch.device('cpu'))) | |
# enable model eval | |
model.eval() | |
# define mean and std of resent training data | |
mean = [0.485, 0.456, 0.406] | |
std=[0.229, 0.224, 0.225] | |
# define transforms | |
test_transform = transforms.Compose([ | |
transforms.Resize((224,224)), | |
transforms.ToTensor(), | |
transforms.Normalize(mean=mean, | |
std=std) | |
]) | |
# define input and outputs | |
i1 = gr.inputs.Image(type="numpy", label="Input image") | |
o1 = gr.outputs.Image(type="pil", label="Cropped image") | |
o2 = gr.outputs.Textbox(label="Modernity score") | |
# define function to be called by gradio interface | |
def modernity(im): | |
# create detectron2 config and detectron2 DefaultPredictor to run inference on image | |
cfg = get_cfg() | |
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) | |
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model | |
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") | |
cfg.MODEL.DEVICE='cpu' | |
predictor = DefaultPredictor(cfg) | |
outputs = predictor(im) | |
# get all masks of input image | |
masks = outputs['instances'].pred_masks.to('cpu').numpy() | |
# create empty lists for objects names and object sizes | |
obj = [] | |
obj_size = [] | |
# iterate over all detected objects in input image to obtain object names and object sizes | |
for idx, data in enumerate(outputs['instances'].pred_classes): | |
num = data.item() | |
obj.append(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes[num]) | |
obj_size.append(masks[idx].sum()) | |
# define output if there is no automobile detected | |
if 'car' not in obj: | |
# return image with all detected objects highlighted | |
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) | |
out = v.draw_instance_predictions(outputs["instances"].to('cpu')) | |
img = (out.get_image()[:, :, ::-1]) | |
# return message | |
out = 'No automobiles were found in the image.' | |
else: | |
# create data frame containing all object names and sizes | |
objects = pd.DataFrame({'obj': obj, | |
'obj_size': obj_size}) | |
# get mask of the largest object that is labeled as car | |
item_mask = masks[objects[objects['obj'] == 'car']['obj_size'].idxmax()] | |
# create segmentation | |
segmentation = np.where(item_mask == True) | |
# get x and y boundaries | |
x_min = int(np.min(segmentation[1])) | |
x_max = int(np.max(segmentation[1])) | |
y_min = int(np.min(segmentation[0])) | |
y_max = int(np.max(segmentation[0])) | |
# create cropped image | |
cropped = Image.fromarray(im[y_min:y_max, x_min:x_max, :], mode='RGB') | |
# create mask | |
mask = Image.fromarray((item_mask * 255).astype('uint8')) | |
# create cropped mask | |
cropped_mask = mask.crop((x_min, y_min, x_max, y_max)) | |
# create background | |
background = Image.new(mode='RGB', size=cropped_mask.size, color='white') | |
# define paste position | |
paste_position = (0,0) | |
# create foreground image | |
new_fg_image = Image.new('RGB', background.size) | |
new_fg_image.paste(cropped, paste_position) | |
# composite final image | |
img = Image.composite(new_fg_image, background, cropped_mask) | |
# apply previously defined transformations | |
img_t = test_transform(img).to(DEVICE) | |
# feed transformed image to the model | |
out = model(img_t[None, :]) | |
# apply softmax | |
softmax = nn.Softmax(dim=1) | |
out = softmax(out) | |
# get label classes | |
label_classes=torch.tensor([0,1,2,3,4]).to(DEVICE) | |
# compute modernity score | |
out = round((label_classes * out).sum(axis=1).item(),1) | |
return img, out | |
# set interface title | |
title = 'Design Modernity of Automobiles' | |
# set interface description | |
description = "Demo for design modernity of automobiles. To use it, simply upload your image, or click one of the examples to load them." | |
# include example images | |
examples = [['input.jpg'],['input1.jpg']] | |
# define interface | |
interface = gr.Interface(modernity,inputs=i1, outputs=[o1, o2], title=title, description=description, examples=examples, cache_examples=False) | |
# launch interface | |
interface.launch() |