File size: 3,456 Bytes
8f5cd7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568e936
 
8f5cd7f
 
 
 
 
 
 
568e936
8f5cd7f
 
 
 
 
 
568e936
8f5cd7f
568e936
 
 
 
 
 
8f5cd7f
 
 
 
 
 
 
 
 
 
 
 
 
568e936
 
 
8f5cd7f
 
568e936
 
8f5cd7f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from PIL import Image
import numpy as np
import torch
from torchvision import transforms, models
from onnx import numpy_helper
import os
import onnxruntime as rt
from matplotlib.colors import hsv_to_rgb
import cv2
import gradio as gr

import matplotlib.pyplot as plt
import matplotlib.patches as patches

import pycocotools.mask as mask_util

def preprocess(image):
    # Resize
    ratio = 800.0 / min(image.size[0], image.size[1])
    image = image.resize((int(ratio * image.size[0]), int(ratio * image.size[1])), Image.BILINEAR)

    # Convert to BGR
    image = np.array(image)[:, :, [2, 1, 0]].astype('float32')

    # HWC -> CHW
    image = np.transpose(image, [2, 0, 1])

    # Normalize
    mean_vec = np.array([102.9801, 115.9465, 122.7717])
    for i in range(image.shape[0]):
        image[i, :, :] = image[i, :, :] - mean_vec[i]

    # Pad to be divisible of 32
    import math
    padded_h = int(math.ceil(image.shape[1] / 32) * 32)
    padded_w = int(math.ceil(image.shape[2] / 32) * 32)

    padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32)
    padded_image[:, :image.shape[1], :image.shape[2]] = image
    image = padded_image

    return image



# Start from ORT 1.10, ORT requires explicitly setting the providers parameter if you want to use execution providers
# other than the default CPU provider (as opposed to the previous behavior of providers getting set/registered by default
# based on the build flags) when instantiating InferenceSession.
# For example, if NVIDIA GPU is available and ORT Python package is built with CUDA, then call API as following:
# onnxruntime.InferenceSession(path/to/model, providers=['CUDAExecutionProvider'])
os.system("wget https://github.com/AK391/models/raw/main/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-10.onnx")
sess = rt.InferenceSession("FasterRCNN-10.onnx")

outputs = sess.get_outputs()


classes = [line.rstrip('\n') for line in open('coco_classes.txt')]


def display_objdetect_image(image, boxes, labels, scores, score_threshold=0.7):
    # Resize boxes
    ratio = 800.0 / min(image.size[0], image.size[1])
    boxes /= ratio

    _, ax = plt.subplots(1, figsize=(12,9))
    image = np.array(image)
    ax.imshow(image)

    # Showing boxes with score > 0.7
    for box, label, score in zip(boxes, labels, scores):
        if score > score_threshold:
            rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=1, edgecolor='b', facecolor='none')
            ax.annotate(classes[label] + ':' + str(np.round(score, 2)), (box[0], box[1]), color='w', fontsize=12)
            ax.add_patch(rect)

    plt.axis('off')
    plt.savefig('out.png', bbox_inches='tight')


def inference(img):
  input_image = Image.open(img)
  orig_tensor = np.asarray(input_image)
  input_tensor = preprocess(input_image)
  
  output_names = list(map(lambda output: output.name, outputs))
  input_name = sess.get_inputs()[0].name
  
  boxes, labels, scores = sess.run(output_names, {input_name: input_tensor})
  display_objdetect_image(input_image, boxes, labels, scores)

  return 'out.png'
  
title="Faster R-CNN"
description="This model is a real-time neural network for object detection that detects 80 different classes."
examples=[["examplemask-rcnn.jpeg"]]
gr.Interface(inference,gr.inputs.Image(type="filepath"),gr.outputs.Image(type="file"),title=title,description=description,examples=examples).launch(enable_queue=True)