from PIL import Image |
import numpy as np |
import torch |
from torchvision import transforms, models |
from onnx import numpy_helper |
import os |
import onnxruntime as rt |
from matplotlib.colors import hsv_to_rgb |
import cv2 |
import gradio as gr |
preprocess = transforms.Compose([ |
transforms.ToTensor(), |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
]) |
os.system("wget https://github.com/AK391/models/raw/main/vision/object_detection_segmentation/fcn/model/fcn-resnet101-11.onnx") |
sess = rt.InferenceSession("fcn-resnet101-11.onnx") |
outputs = sess.get_outputs() |
classes = [line.rstrip('\n') for line in open('voc_classes.txt')] |
num_classes = len(classes) |
def get_palette(): |
palette = [0] * num_classes * 3 |
for hue in range(num_classes): |
if hue == 0: |
colors = (0, 0, 0) |
else: |
colors = hsv_to_rgb((hue / num_classes, 0.75, 0.75)) |
for i in range(3): |
palette[hue * 3 + i] = int(colors[i] * 255) |
return palette |
def colorize(labels): |
result_img = Image.fromarray(labels).convert('P', colors=num_classes) |
result_img.putpalette(get_palette()) |
return np.array(result_img.convert('RGB')) |
def visualize_output(image, output): |
assert(image.shape[0] == output.shape[1] and \ |
image.shape[1] == output.shape[2]) |
assert(output.shape[0] == num_classes) |
raw_labels = np.argmax(output, axis=0).astype(np.uint8) |
confidence = float(np.max(output, axis=0).mean()) |
result_img = colorize(raw_labels) |
blended_img = cv2.addWeighted(image[:, :, ::-1], 0.5, result_img, 0.5, 0) |
result_img = Image.fromarray(result_img) |
blended_img = Image.fromarray(blended_img) |
return confidence, result_img, blended_img, raw_labels |
def inference(img): |
input_image = Image.open(img) |
orig_tensor = np.asarray(input_image) |
input_tensor = preprocess(input_image) |
input_tensor = input_tensor.unsqueeze(0) |
input_tensor = input_tensor.detach().cpu().numpy() |
output_names = list(map(lambda output: output.name, outputs)) |
input_name = sess.get_inputs()[0].name |
detections = sess.run(output_names, {input_name: input_tensor}) |
output, aux = detections |
conf, result_img, blended_img, _ = visualize_output(orig_tensor, output[0]) |
return blended_img |
title="Fully Convolutional Network" |
description="FCNs are a model for real-time neural network for class-wise image segmentation. As the name implies, every weight layer in the network is convolutional. The final layer has the same height/width as the input image, making FCNs a useful tool for doing dense pixel-wise predictions without a significant amount of postprocessing. Being fully convolutional also provides great flexibility in the resolutions this model can handle. This specific model detects 20 different classes. The models have been pre-trained on the COCO train2017 dataset on this class subset." |
gr.Interface(inference,gr.inputs.Image(type="filepath"),gr.outputs.Image(type="pil"),title=title,description=description).launch(enable_queue=True) |