import gradio as gr from huggingface_hub import from_pretrained_keras from PIL import Image import io import matplotlib.pyplot as plt import os import re import zipfile import numpy as np import tensorflow as tf from tensorflow import keras import tensorflow_datasets as tfds coco_image = [] coco_dir = 'coco/images/' for idx, images in enumerate(os.listdir(coco_dir)): image = os.path.join(coco_dir, images) if os.path.isfile(image) and idx < 10: coco_image.append(image) _, dataset_info = tfds.load( "coco/2017", split=["train", "validation", "test"], with_info=True, ) #test_dataset = tfds.load("coco/2017", split="test", data_dir="data") int2str = dataset_info.features["objects"]["label"].int2str class AnchorBox: """Generates anchor boxes. This class has operations to generate anchor boxes for feature maps at strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the format `[x, y, width, height]`. Attributes: aspect_ratios: A list of float values representing the aspect ratios of the anchor boxes at each location on the feature map scales: A list of float values representing the scale of the anchor boxes at each location on the feature map. num_anchors: The number of anchor boxes at each location on feature map areas: A list of float values representing the areas of the anchor boxes for each feature map in the feature pyramid. strides: A list of float value representing the strides for each feature map in the feature pyramid. """ def __init__(self): self.aspect_ratios = [0.5, 1.0, 2.0] self.scales = [2 ** x for x in [0, 1 / 3, 2 / 3]] self._num_anchors = len(self.aspect_ratios) * len(self.scales) self._strides = [2 ** i for i in range(3, 8)] self._areas = [x ** 2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]] self._anchor_dims = self._compute_dims() def _compute_dims(self): """Computes anchor box dimensions for all ratios and scales at all levels of the feature pyramid. """ anchor_dims_all = [] for area in self._areas: anchor_dims = [] for ratio in self.aspect_ratios: anchor_height = tf.math.sqrt(area / ratio) anchor_width = area / anchor_height dims = tf.reshape( tf.stack([anchor_width, anchor_height], axis=-1), [1, 1, 2] ) for scale in self.scales: anchor_dims.append(scale * dims) anchor_dims_all.append(tf.stack(anchor_dims, axis=-2)) return anchor_dims_all def _get_anchors(self, feature_height, feature_width, level): """Generates anchor boxes for a given feature map size and level Arguments: feature_height: An integer representing the height of the feature map. feature_width: An integer representing the width of the feature map. level: An integer representing the level of the feature map in the feature pyramid. Returns: anchor boxes with the shape `(feature_height * feature_width * num_anchors, 4)` """ rx = tf.range(feature_width, dtype=tf.float32) + 0.5 ry = tf.range(feature_height, dtype=tf.float32) + 0.5 centers = tf.stack(tf.meshgrid(rx, ry), axis=-1) * self._strides[level - 3] centers = tf.expand_dims(centers, axis=-2) centers = tf.tile(centers, [1, 1, self._num_anchors, 1]) dims = tf.tile( self._anchor_dims[level - 3], [feature_height, feature_width, 1, 1] ) anchors = tf.concat([centers, dims], axis=-1) return tf.reshape( anchors, [feature_height * feature_width * self._num_anchors, 4] ) def get_anchors(self, image_height, image_width): """Generates anchor boxes for all the feature maps of the feature pyramid. Arguments: image_height: Height of the input image. image_width: Width of the input image. Returns: anchor boxes for all the feature maps, stacked as a single tensor with shape `(total_anchors, 4)` """ anchors = [ self._get_anchors( tf.math.ceil(image_height / 2 ** i), tf.math.ceil(image_width / 2 ** i), i, ) for i in range(3, 8) ] return tf.concat(anchors, axis=0) class DecodePredictions(tf.keras.layers.Layer): """A Keras layer that decodes predictions of the RetinaNet model. Attributes: num_classes: Number of classes in the dataset confidence_threshold: Minimum class probability, below which detections are pruned. nms_iou_threshold: IOU threshold for the NMS operation max_detections_per_class: Maximum number of detections to retain per class. max_detections: Maximum number of detections to retain across all classes. box_variance: The scaling factors used to scale the bounding box predictions. """ def __init__( self, num_classes=80, confidence_threshold=0.05, nms_iou_threshold=0.5, max_detections_per_class=100, max_detections=100, box_variance=[0.1, 0.1, 0.2, 0.2], **kwargs ): super(DecodePredictions, self).__init__(**kwargs) self.num_classes = num_classes self.confidence_threshold = confidence_threshold self.nms_iou_threshold = nms_iou_threshold self.max_detections_per_class = max_detections_per_class self.max_detections = max_detections self._anchor_box = AnchorBox() self._box_variance = tf.convert_to_tensor( [0.1, 0.1, 0.2, 0.2], dtype=tf.float32 ) def _decode_box_predictions(self, anchor_boxes, box_predictions): boxes = box_predictions * self._box_variance boxes = tf.concat( [ boxes[:, :, :2] * anchor_boxes[:, :, 2:] + anchor_boxes[:, :, :2], tf.math.exp(boxes[:, :, 2:]) * anchor_boxes[:, :, 2:], ], axis=-1, ) boxes_transformed = convert_to_corners(boxes) return boxes_transformed def call(self, images, predictions): image_shape = tf.cast(tf.shape(images), dtype=tf.float32) anchor_boxes = self._anchor_box.get_anchors(image_shape[1], image_shape[2]) box_predictions = predictions[:, :, :4] cls_predictions = tf.nn.sigmoid(predictions[:, :, 4:]) boxes = self._decode_box_predictions(anchor_boxes[None, ...], box_predictions) return tf.image.combined_non_max_suppression( tf.expand_dims(boxes, axis=2), cls_predictions, self.max_detections_per_class, self.max_detections, self.nms_iou_threshold, self.confidence_threshold, clip_boxes=False, ) def convert_to_corners(boxes): """Changes the box format to corner coordinates Arguments: boxes: A tensor of rank 2 or higher with a shape of `(..., num_boxes, 4)` representing bounding boxes where each box is of the format `[x, y, width, height]`. Returns: converted boxes with shape same as that of boxes. """ return tf.concat( [boxes[..., :2] - boxes[..., 2:] / 2.0, boxes[..., :2] + boxes[..., 2:] / 2.0], axis=-1, ) def resize_and_pad_image( image, min_side=800.0, max_side=1333.0, jitter=[640, 1024], stride=128.0 ): """Resizes and pads image while preserving aspect ratio. 1. Resizes images so that the shorter side is equal to `min_side` 2. If the longer side is greater than `max_side`, then resize the image with longer side equal to `max_side` 3. Pad with zeros on right and bottom to make the image shape divisible by `stride` Arguments: image: A 3-D tensor of shape `(height, width, channels)` representing an image. min_side: The shorter side of the image is resized to this value, if `jitter` is set to None. max_side: If the longer side of the image exceeds this value after resizing, the image is resized such that the longer side now equals to this value. jitter: A list of floats containing minimum and maximum size for scale jittering. If available, the shorter side of the image will be resized to a random value in this range. stride: The stride of the smallest feature map in the feature pyramid. Can be calculated using `image_size / feature_map_size`. Returns: image: Resized and padded image. image_shape: Shape of the image before padding. ratio: The scaling factor used to resize the image """ image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32) if jitter is not None: min_side = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32) ratio = min_side / tf.reduce_min(image_shape) if ratio * tf.reduce_max(image_shape) > max_side: ratio = max_side / tf.reduce_max(image_shape) image_shape = ratio * image_shape image = tf.image.resize(image, tf.cast(image_shape, dtype=tf.int32)) padded_image_shape = tf.cast( tf.math.ceil(image_shape / stride) * stride, dtype=tf.int32 ) image = tf.image.pad_to_bounding_box( image, 0, 0, padded_image_shape[0], padded_image_shape[1] ) return image, image_shape, ratio def visualize_detections( image, boxes, classes, scores, figsize=(7, 7), linewidth=1, color=[0, 0, 1] ): """Visualize Detections""" image = np.array(image, dtype=np.uint8) plt.figure(figsize=figsize) plt.axis("off") plt.imshow(image) ax = plt.gca() for box, _cls, score in zip(boxes, classes, scores): text = "{}: {:.2f}".format(_cls, score) x1, y1, x2, y2 = box w, h = x2 - x1, y2 - y1 patch = plt.Rectangle( [x1, y1], w, h, fill=False, edgecolor=color, linewidth=linewidth ) ax.add_patch(patch) ax.text( x1, y1, text, bbox={"facecolor": color, "alpha": 0.4}, clip_box=ax.clipbox, clip_on=True, ) plt.show() return ax def prepare_image(image): image, _, ratio = resize_and_pad_image(image, jitter=None) image = tf.keras.applications.resnet.preprocess_input(image) return tf.expand_dims(image, axis=0), ratio model = from_pretrained_keras("keras-io/Object-Detection-RetinaNet") img_input = tf.keras.Input(shape=[None, None, 3], name="image") predictions = model(img_input, training=False) detections = DecodePredictions(confidence_threshold=0.5)(img_input, predictions) inference_model = tf.keras.Model(inputs=img_input, outputs=detections) def predict(image): input_image, ratio = prepare_image(image) detections = inference_model.predict(input_image) num_detections = detections.valid_detections[0] class_names = [ int2str(int(x)) for x in detections.nmsed_classes[0][:num_detections] ] img_buf = io.BytesIO() ax = visualize_detections( image, detections.nmsed_boxes[0][:num_detections] / ratio, class_names, detections.nmsed_scores[0][:num_detections], ) ax.figure.savefig(img_buf) img_buf.seek(0) img = Image.open(img_buf) return img # Input input = gr.inputs.Image(image_mode="RGB", type="numpy", label="Enter Object Image") # Output output = gr.outputs.Image(type="pil", label="Detected Objects with Class Category") title = "Object Detection With RetinaNet" description = "Upload an Image or take one from examples to localize objects present in an image, and at the same time, classify them into different categories" gr.Interface(fn=predict, inputs = input, outputs = output, examples=coco_image, allow_flagging=False, analytics_enabled=False, title=title, description=description, article="
Space By: Kavya Bisht \n Based on notebook this notebook
").launch(enable_queue=True, debug=True)