import gradio as gr from huggingface_hub import from_pretrained_keras from PIL import Image import io import matplotlib.pyplot as plt import os import re import zipfile import numpy as np import tensorflow as tf from tensorflow import keras coco_image = [] coco_dir = 'coco/images/' for idx, images in enumerate(os.listdir(coco_dir)): image = os.path.join(coco_dir, images) if os.path.isfile(image) and idx < 10: coco_image.append(image) _, dataset_info = tfds.load( "coco/2017", split=["train", "validation", "test"], with_info=True, ) #test_dataset = tfds.load("coco/2017", split="test", data_dir="data") int2str = dataset_info.features["objects"]["label"].int2str class AnchorBox: """Generates anchor boxes. This class has operations to generate anchor boxes for feature maps at strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the format `[x, y, width, height]`. Attributes: aspect_ratios: A list of float values representing the aspect ratios of the anchor boxes at each location on the feature map scales: A list of float values representing the scale of the anchor boxes at each location on the feature map. num_anchors: The number of anchor boxes at each location on feature map areas: A list of float values representing the areas of the anchor boxes for each feature map in the feature pyramid. strides: A list of float value representing the strides for each feature map in the feature pyramid. """ def __init__(self): self.aspect_ratios = [0.5, 1.0, 2.0] self.scales = [2 ** x for x in [0, 1 / 3, 2 / 3]] self._num_anchors = len(self.aspect_ratios) * len(self.scales) self._strides = [2 ** i for i in range(3, 8)] self._areas = [x ** 2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]] self._anchor_dims = self._compute_dims() def _compute_dims(self): """Computes anchor box dimensions for all ratios and scales at all levels of the feature pyramid. """ anchor_dims_all = [] for area in self._areas: anchor_dims = [] for ratio in self.aspect_ratios: anchor_height = tf.math.sqrt(area / ratio) anchor_width = area / anchor_height dims = tf.reshape( tf.stack([anchor_width, anchor_height], axis=-1), [1, 1, 2] ) for scale in self.scales: anchor_dims.append(scale * dims) anchor_dims_all.append(tf.stack(anchor_dims, axis=-2)) return anchor_dims_all def _get_anchors(self, feature_height, feature_width, level): """Generates anchor boxes for a given feature map size and level Arguments: feature_height: An integer representing the height of the feature map. feature_width: An integer representing the width of the feature map. level: An integer representing the level of the feature map in the feature pyramid. Returns: anchor boxes with the shape `(feature_height * feature_width * num_anchors, 4)` """ rx = tf.range(feature_width, dtype=tf.float32) + 0.5 ry = tf.range(feature_height, dtype=tf.float32) + 0.5 centers = tf.stack(tf.meshgrid(rx, ry), axis=-1) * self._strides[level - 3] centers = tf.expand_dims(centers, axis=-2) centers = tf.tile(centers, [1, 1, self._num_anchors, 1]) dims = tf.tile( self._anchor_dims[level - 3], [feature_height, feature_width, 1, 1] ) anchors = tf.concat([centers, dims], axis=-1) return tf.reshape( anchors, [feature_height * feature_width * self._num_anchors, 4] ) def get_anchors(self, image_height, image_width): """Generates anchor boxes for all the feature maps of the feature pyramid. Arguments: image_height: Height of the input image. image_width: Width of the input image. Returns: anchor boxes for all the feature maps, stacked as a single tensor with shape `(total_anchors, 4)` """ anchors = [ self._get_anchors( tf.math.ceil(image_height / 2 ** i), tf.math.ceil(image_width / 2 ** i), i, ) for i in range(3, 8) ] return tf.concat(anchors, axis=0) class DecodePredictions(tf.keras.layers.Layer): """A Keras layer that decodes predictions of the RetinaNet model. Attributes: num_classes: Number of classes in the dataset confidence_threshold: Minimum class probability, below which detections are pruned. nms_iou_threshold: IOU threshold for the NMS operation max_detections_per_class: Maximum number of detections to retain per class. max_detections: Maximum number of detections to retain across all classes. box_variance: The scaling factors used to scale the bounding box predictions. """ def __init__( self, num_classes=80, confidence_threshold=0.05, nms_iou_threshold=0.5, max_detections_per_class=100, max_detections=100, box_variance=[0.1, 0.1, 0.2, 0.2], **kwargs ): super(DecodePredictions, self).__init__(**kwargs) self.num_classes = num_classes self.confidence_threshold = confidence_threshold self.nms_iou_threshold = nms_iou_threshold self.max_detections_per_class = max_detections_per_class self.max_detections = max_detections self._anchor_box = AnchorBox() self._box_variance = tf.convert_to_tensor( [0.1, 0.1, 0.2, 0.2], dtype=tf.float32 ) def _decode_box_predictions(self, anchor_boxes, box_predictions): boxes = box_predictions * self._box_variance boxes = tf.concat( [ boxes[:, :, :2] * anchor_boxes[:, :, 2:] + anchor_boxes[:, :, :2], tf.math.exp(boxes[:, :, 2:]) * anchor_boxes[:, :, 2:], ], axis=-1, ) boxes_transformed = convert_to_corners(boxes) return boxes_transformed def call(self, images, predictions): image_shape = tf.cast(tf.shape(images), dtype=tf.float32) anchor_boxes = self._anchor_box.get_anchors(image_shape[1], image_shape[2]) box_predictions = predictions[:, :, :4] cls_predictions = tf.nn.sigmoid(predictions[:, :, 4:]) boxes = self._decode_box_predictions(anchor_boxes[None, ...], box_predictions) return tf.image.combined_non_max_suppression( tf.expand_dims(boxes, axis=2), cls_predictions, self.max_detections_per_class, self.max_detections, self.nms_iou_threshold, self.confidence_threshold, clip_boxes=False, ) def convert_to_corners(boxes): """Changes the box format to corner coordinates Arguments: boxes: A tensor of rank 2 or higher with a shape of `(..., num_boxes, 4)` representing bounding boxes where each box is of the format `[x, y, width, height]`. Returns: converted boxes with shape same as that of boxes. """ return tf.concat( [boxes[..., :2] - boxes[..., 2:] / 2.0, boxes[..., :2] + boxes[..., 2:] / 2.0], axis=-1, ) def resize_and_pad_image( image, min_side=800.0, max_side=1333.0, jitter=[640, 1024], stride=128.0 ): """Resizes and pads image while preserving aspect ratio. 1. Resizes images so that the shorter side is equal to `min_side` 2. If the longer side is greater than `max_side`, then resize the image with longer side equal to `max_side` 3. Pad with zeros on right and bottom to make the image shape divisible by `stride` Arguments: image: A 3-D tensor of shape `(height, width, channels)` representing an image. min_side: The shorter side of the image is resized to this value, if `jitter` is set to None. max_side: If the longer side of the image exceeds this value after resizing, the image is resized such that the longer side now equals to this value. jitter: A list of floats containing minimum and maximum size for scale jittering. If available, the shorter side of the image will be resized to a random value in this range. stride: The stride of the smallest feature map in the feature pyramid. Can be calculated using `image_size / feature_map_size`. Returns: image: Resized and padded image. image_shape: Shape of the image before padding. ratio: The scaling factor used to resize the image """ image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32) if jitter is not None: min_side = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32) ratio = min_side / tf.reduce_min(image_shape) if ratio * tf.reduce_max(image_shape) > max_side: ratio = max_side / tf.reduce_max(image_shape) image_shape = ratio * image_shape image = tf.image.resize(image, tf.cast(image_shape, dtype=tf.int32)) padded_image_shape = tf.cast( tf.math.ceil(image_shape / stride) * stride, dtype=tf.int32 ) image = tf.image.pad_to_bounding_box( image, 0, 0, padded_image_shape[0], padded_image_shape[1] ) return image, image_shape, ratio def visualize_detections( image, boxes, classes, scores, figsize=(7, 7), linewidth=1, color=[0, 0, 1] ): """Visualize Detections""" image = np.array(image, dtype=np.uint8) plt.figure(figsize=figsize) plt.axis("off") plt.imshow(image) ax = plt.gca() for box, _cls, score in zip(boxes, classes, scores): text = "{}: {:.2f}".format(_cls, score) x1, y1, x2, y2 = box w, h = x2 - x1, y2 - y1 patch = plt.Rectangle( [x1, y1], w, h, fill=False, edgecolor=color, linewidth=linewidth ) ax.add_patch(patch) ax.text( x1, y1, text, bbox={"facecolor": color, "alpha": 0.4}, clip_box=ax.clipbox, clip_on=True, ) plt.show() return ax def prepare_image(image): image, _, ratio = resize_and_pad_image(image, jitter=None) image = tf.keras.applications.resnet.preprocess_input(image) return tf.expand_dims(image, axis=0), ratio model = from_pretrained_keras("keras-io/Object-Detection-RetinaNet") img_input = tf.keras.Input(shape=[None, None, 3], name="image") predictions = model(img_input, training=False) detections = DecodePredictions(confidence_threshold=0.5)(img_input, predictions) inference_model = tf.keras.Model(inputs=img_input, outputs=detections) def predict(image): input_image, ratio = prepare_image(image) detections = inference_model.predict(input_image) num_detections = detections.valid_detections[0] class_names = [ int2str(int(x)) for x in detections.nmsed_classes[0][:num_detections] ] img_buf = io.BytesIO() ax = visualize_detections( image, detections.nmsed_boxes[0][:num_detections] / ratio, class_names, detections.nmsed_scores[0][:num_detections], ) ax.figure.savefig(img_buf) img_buf.seek(0) img = Image.open(img_buf) return img # Input input = gr.inputs.Image(image_mode="RGB", type="numpy", label="Enter Object Image") # Output output = gr.outputs.Image(type="pil", label="Detected Objects with Class Category") title = "Object Detection With RetinaNet" description = "Upload an Image or take one from examples to localize objects present in an image, and at the same time, classify them into different categories" gr.Interface(fn=predict, inputs = input, outputs = output, examples=coco_image, allow_flagging=False, analytics_enabled=False, title=title, description=description, article="
Space By: Kavya Bisht \n Based on notebook this notebook
").launch(enable_queue=True, debug=True)