Spaces:

keras-io
/

Object-Detection-Using-RetinaNet

Runtime error

App Files Files Community

RobotJelly commited on Jun 13, 2022

Commit

ada4657

•

1 Parent(s): 4b7383b

app.py

Browse files

Files changed (1) hide show

app.py +309 -0

app.py ADDED Viewed

	@@ -0,0 +1,309 @@

+import gradio as gr
+from huggingface_hub import from_pretrained_keras
+from PIL import Image
+import io
+import matplotlib.pyplot as plt
+import os
+import re
+import zipfile
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+coco_image = []
+coco_dir = 'coco/images/test2017/'
+for idx, images in enumerate(os.listdir(coco_dir)):
+  image = os.path.join(coco_dir, images)
+  if os.path.isfile(image) and idx < 10:
+    coco_image.append(image)
+class AnchorBox:
+    """Generates anchor boxes.
+    This class has operations to generate anchor boxes for feature maps at
+    strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the
+    format `[x, y, width, height]`.
+    Attributes:
+      aspect_ratios: A list of float values representing the aspect ratios of
+        the anchor boxes at each location on the feature map
+      scales: A list of float values representing the scale of the anchor boxes
+        at each location on the feature map.
+      num_anchors: The number of anchor boxes at each location on feature map
+      areas: A list of float values representing the areas of the anchor
+        boxes for each feature map in the feature pyramid.
+      strides: A list of float value representing the strides for each feature
+        map in the feature pyramid.
+    """
+    def __init__(self):
+        self.aspect_ratios = [0.5, 1.0, 2.0]
+        self.scales = [2 ** x for x in [0, 1 / 3, 2 / 3]]
+        self._num_anchors = len(self.aspect_ratios) * len(self.scales)
+        self._strides = [2 ** i for i in range(3, 8)]
+        self._areas = [x ** 2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]]
+        self._anchor_dims = self._compute_dims()
+    def _compute_dims(self):
+        """Computes anchor box dimensions for all ratios and scales at all levels
+        of the feature pyramid.
+        """
+        anchor_dims_all = []
+        for area in self._areas:
+            anchor_dims = []
+            for ratio in self.aspect_ratios:
+                anchor_height = tf.math.sqrt(area / ratio)
+                anchor_width = area / anchor_height
+                dims = tf.reshape(
+                    tf.stack([anchor_width, anchor_height], axis=-1), [1, 1, 2]
+                )
+                for scale in self.scales:
+                    anchor_dims.append(scale * dims)
+            anchor_dims_all.append(tf.stack(anchor_dims, axis=-2))
+        return anchor_dims_all
+    def _get_anchors(self, feature_height, feature_width, level):
+        """Generates anchor boxes for a given feature map size and level
+        Arguments:
+          feature_height: An integer representing the height of the feature map.
+          feature_width: An integer representing the width of the feature map.
+          level: An integer representing the level of the feature map in the
+            feature pyramid.
+        Returns:
+          anchor boxes with the shape
+          `(feature_height * feature_width * num_anchors, 4)`
+        """
+        rx = tf.range(feature_width, dtype=tf.float32) + 0.5
+        ry = tf.range(feature_height, dtype=tf.float32) + 0.5
+        centers = tf.stack(tf.meshgrid(rx, ry), axis=-1) * self._strides[level - 3]
+        centers = tf.expand_dims(centers, axis=-2)
+        centers = tf.tile(centers, [1, 1, self._num_anchors, 1])
+        dims = tf.tile(
+            self._anchor_dims[level - 3], [feature_height, feature_width, 1, 1]
+        )
+        anchors = tf.concat([centers, dims], axis=-1)
+        return tf.reshape(
+            anchors, [feature_height * feature_width * self._num_anchors, 4]
+        )
+    def get_anchors(self, image_height, image_width):
+        """Generates anchor boxes for all the feature maps of the feature pyramid.
+        Arguments:
+          image_height: Height of the input image.
+          image_width: Width of the input image.
+        Returns:
+          anchor boxes for all the feature maps, stacked as a single tensor
+            with shape `(total_anchors, 4)`
+        """
+        anchors = [
+            self._get_anchors(
+                tf.math.ceil(image_height / 2 ** i),
+                tf.math.ceil(image_width / 2 ** i),
+                i,
+            )
+            for i in range(3, 8)
+        ]
+        return tf.concat(anchors, axis=0)
+class DecodePredictions(tf.keras.layers.Layer):
+    """A Keras layer that decodes predictions of the RetinaNet model.
+    Attributes:
+      num_classes: Number of classes in the dataset
+      confidence_threshold: Minimum class probability, below which detections
+        are pruned.
+      nms_iou_threshold: IOU threshold for the NMS operation
+      max_detections_per_class: Maximum number of detections to retain per
+       class.
+      max_detections: Maximum number of detections to retain across all
+        classes.
+      box_variance: The scaling factors used to scale the bounding box
+        predictions.
+    """
+    def __init__(
+        self,
+        num_classes=80,
+        confidence_threshold=0.05,
+        nms_iou_threshold=0.5,
+        max_detections_per_class=100,
+        max_detections=100,
+        box_variance=[0.1, 0.1, 0.2, 0.2],
+        **kwargs
+    ):
+        super(DecodePredictions, self).__init__(**kwargs)
+        self.num_classes = num_classes
+        self.confidence_threshold = confidence_threshold
+        self.nms_iou_threshold = nms_iou_threshold
+        self.max_detections_per_class = max_detections_per_class
+        self.max_detections = max_detections
+        self._anchor_box = AnchorBox()
+        self._box_variance = tf.convert_to_tensor(
+            [0.1, 0.1, 0.2, 0.2], dtype=tf.float32
+        )
+    def _decode_box_predictions(self, anchor_boxes, box_predictions):
+        boxes = box_predictions * self._box_variance
+        boxes = tf.concat(
+            [
+                boxes[:, :, :2] * anchor_boxes[:, :, 2:] + anchor_boxes[:, :, :2],
+                tf.math.exp(boxes[:, :, 2:]) * anchor_boxes[:, :, 2:],
+            ],
+            axis=-1,
+        )
+        boxes_transformed = convert_to_corners(boxes)
+        return boxes_transformed
+    def call(self, images, predictions):
+        image_shape = tf.cast(tf.shape(images), dtype=tf.float32)
+        anchor_boxes = self._anchor_box.get_anchors(image_shape[1], image_shape[2])
+        box_predictions = predictions[:, :, :4]
+        cls_predictions = tf.nn.sigmoid(predictions[:, :, 4:])
+        boxes = self._decode_box_predictions(anchor_boxes[None, ...], box_predictions)
+        return tf.image.combined_non_max_suppression(
+            tf.expand_dims(boxes, axis=2),
+            cls_predictions,
+            self.max_detections_per_class,
+            self.max_detections,
+            self.nms_iou_threshold,
+            self.confidence_threshold,
+            clip_boxes=False,
+        )
+def convert_to_corners(boxes):
+    """Changes the box format to corner coordinates
+    Arguments:
+      boxes: A tensor of rank 2 or higher with a shape of `(..., num_boxes, 4)`
+        representing bounding boxes where each box is of the format
+        `[x, y, width, height]`.
+    Returns:
+      converted boxes with shape same as that of boxes.
+    """
+    return tf.concat(
+        [boxes[..., :2] - boxes[..., 2:] / 2.0, boxes[..., :2] + boxes[..., 2:] / 2.0],
+        axis=-1,
+    )
+def resize_and_pad_image(
+    image, min_side=800.0, max_side=1333.0, jitter=[640, 1024], stride=128.0
+):
+    """Resizes and pads image while preserving aspect ratio.
+    1. Resizes images so that the shorter side is equal to `min_side`
+    2. If the longer side is greater than `max_side`, then resize the image
+      with longer side equal to `max_side`
+    3. Pad with zeros on right and bottom to make the image shape divisible by
+    `stride`
+    Arguments:
+      image: A 3-D tensor of shape `(height, width, channels)` representing an
+        image.
+      min_side: The shorter side of the image is resized to this value, if
+        `jitter` is set to None.
+      max_side: If the longer side of the image exceeds this value after
+        resizing, the image is resized such that the longer side now equals to
+        this value.
+      jitter: A list of floats containing minimum and maximum size for scale
+        jittering. If available, the shorter side of the image will be
+        resized to a random value in this range.
+      stride: The stride of the smallest feature map in the feature pyramid.
+        Can be calculated using `image_size / feature_map_size`.
+    Returns:
+      image: Resized and padded image.
+      image_shape: Shape of the image before padding.
+      ratio: The scaling factor used to resize the image
+    """
+    image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32)
+    if jitter is not None:
+        min_side = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32)
+    ratio = min_side / tf.reduce_min(image_shape)
+    if ratio * tf.reduce_max(image_shape) > max_side:
+        ratio = max_side / tf.reduce_max(image_shape)
+    image_shape = ratio * image_shape
+    image = tf.image.resize(image, tf.cast(image_shape, dtype=tf.int32))
+    padded_image_shape = tf.cast(
+        tf.math.ceil(image_shape / stride) * stride, dtype=tf.int32
+    )
+    image = tf.image.pad_to_bounding_box(
+        image, 0, 0, padded_image_shape[0], padded_image_shape[1]
+    )
+    return image, image_shape, ratio
+def visualize_detections(
+    image, boxes, classes, scores, figsize=(7, 7), linewidth=1, color=[0, 0, 1]
+):
+    """Visualize Detections"""
+    image = np.array(image, dtype=np.uint8)
+    plt.figure(figsize=figsize)
+    plt.axis("off")
+    plt.imshow(image)
+    ax = plt.gca()
+    for box, _cls, score in zip(boxes, classes, scores):
+        text = "{}: {:.2f}".format(_cls, score)
+        x1, y1, x2, y2 = box
+        w, h = x2 - x1, y2 - y1
+        patch = plt.Rectangle(
+            [x1, y1], w, h, fill=False, edgecolor=color, linewidth=linewidth
+        )
+        ax.add_patch(patch)
+        ax.text(
+            x1,
+            y1,
+            text,
+            bbox={"facecolor": color, "alpha": 0.4},
+            clip_box=ax.clipbox,
+            clip_on=True,
+        )
+    plt.show()
+    return ax
+def prepare_image(image):
+    image, _, ratio = resize_and_pad_image(image, jitter=None)
+    image = tf.keras.applications.resnet.preprocess_input(image)
+    return tf.expand_dims(image, axis=0), ratio
+model = from_pretrained_keras("keras-io/Object-Detection-RetinaNet")
+img_input = tf.keras.Input(shape=[None, None, 3], name="image")
+predictions = model(img_input, training=False)
+detections = DecodePredictions(confidence_threshold=0.5)(img_input, predictions)
+inference_model = tf.keras.Model(inputs=img_input, outputs=detections)
+def predict(image):
+  input_image, ratio = prepare_image(image)
+  detections = inference_model.predict(input_image)
+  num_detections = detections.valid_detections[0]
+  class_names = [
+                 int2str(int(x)) for x in detections.nmsed_classes[0][:num_detections]
+                 ]
+  img_buf = io.BytesIO()
+  ax = visualize_detections(
+        image,
+        detections.nmsed_boxes[0][:num_detections] / ratio,
+        class_names,
+        detections.nmsed_scores[0][:num_detections],
+  )
+  ax.figure.savefig(img_buf)
+  img_buf.seek(0)
+  img = Image.open(img_buf)
+  return img
+# Input
+input = gr.inputs.Image(image_mode="RGB", type="numpy", label="Enter Object Image")
+# Output
+output = gr.outputs.Image(type="pil", label="Detected Objects with Class Category")
+title = "Object Detection With RetinaNet"
+description = "Upload an Image or take one from examples to localize objects present in an image, and at the same time, classify them into different categories"
+gr.Interface(fn=predict, inputs = input, outputs = output, examples=coco_image, allow_flagging=False, analytics_enabled=False, title=title, description=description).launch(enable_queue=True, debug=True)