Spaces:

Kroy
/

yolo

Runtime error

App Files Files Community

Kroy commited on Apr 13, 2023

Commit

e80fad1

•

1 Parent(s): 162c0bc

Upload 10 files

Browse files

Files changed (10) hide show

Dockerfile +30 -0
app.py +87 -0
classes.py +15 -0
coco.py +519 -0
config.py +172 -0
model.py +0 -0
parallel_model.py +173 -0
requirements.txt +20 -0
shapes.py +184 -0
utils.py +736 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+#Use the official Python 3.9 image
+FROM python:3.7
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+# Start the FastAPI app on port 7860, the default port expected by Spaces
+# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+CMD gunicorn -k uvicorn.workers.UvicornWorker --workers 2 --threads=2 --max-requests 512 --bind 0.0.0.0:7860 app:app

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import warnings
+warnings.filterwarnings("ignore")
+## import necessary packages
+import os
+import io
+import sys
+import base64
+import random
+import argparse
+import math
+import numpy as np
+from typing import Any, Union,Dict, List
+import numpy as np
+import requests
+from PIL import Image
+from imageio import imread
+from keras import backend as K
+import coco
+import utils
+import model as modellib
+import visualize
+from classes import class_names
+from fastapi import FastAPI
+# Create a new FastAPI app instance
+app = FastAPI()
+# Root directory of the project
+ROOT_DIR = os.getcwd()
+# Directory to save logs and trained model
+MODEL_DIR = os.path.join(ROOT_DIR, "logs")
+# Local path to trained weights file
+COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
+os.system("pip install pycocotools==2.0.0")
+K.clear_session()
+if not os.path.exists(COCO_MODEL_PATH):
+    utils.download_trained_weights(COCO_MODEL_PATH)
+class InferenceConfig(coco.CocoConfig):
+    GPU_COUNT = 1
+    IMAGES_PER_GPU = 1
+config = InferenceConfig()
+model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
+model.load_weights(COCO_MODEL_PATH, by_name=True)
+# Define a function to handle the GET request at `/generate`
+# The generate() function is defined as a FastAPI route that takes a
+# string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
+# containing the generated text under the key "output"
+@app.get("/generate")
+def generate(path: str):
+    """
+    Using the text summarization pipeline from `transformers`, summerize text
+    from the given input text. The model used is `philschmid/bart-large-cnn-samsum`, which
+    can be found [here](<https://huggingface.co/philschmid/bart-large-cnn-samsum>).
+    """
+    # Use the pipeline to generate text from the given input text
+    r = requests.get(path, stream=True)
+    img = Image.open(io.BytesIO(r.content)).convert('RGB')
+    open_cv_image = np.array(img)
+    image = open_cv_image
+    results = model.detect([image], verbose=1)
+    # Get results and save them
+    r = results[0]
+    output_image = visualize.display_instances_and_save(image,
+        r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
+    image = Image.fromarray(output_image)
+    im_file = io.BytesIO()
+    image.save(im_file, format="JPEG")
+    im_bytes = im_file.getvalue()  # im_bytes: image in binary for
+    # Return the generated text in a JSON response
+    return {"output": im_bytes}

classes.py ADDED Viewed

	@@ -0,0 +1,15 @@

+class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+            'bus', 'train', 'truck', 'boat', 'traffic light',
+            'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
+            'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
+            'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+            'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+            'kite', 'baseball bat', 'baseball glove', 'skateboard',
+            'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+            'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+            'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+            'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+            'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+            'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
+            'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+            'teddy bear', 'hair drier', 'toothbrush']

coco.py ADDED Viewed

	@@ -0,0 +1,519 @@

+"""
+Mask R-CNN
+Configurations and data loading code for MS COCO.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+------------------------------------------------------------
+Usage: import the module (see Jupyter notebooks for examples), or run from
+       the command line as such:
+    # Train a new model starting from pre-trained COCO weights
+    python3 coco.py train --dataset=/path/to/coco/ --model=coco
+    # Train a new model starting from ImageNet weights
+    python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
+    # Continue training a model that you had trained earlier
+    python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
+    # Continue training the last model you trained
+    python3 coco.py train --dataset=/path/to/coco/ --model=last
+    # Run COCO evaluatoin on the last model you trained
+    python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
+"""
+import os
+import time
+import numpy as np
+# Download and install the Python COCO tools from https://github.com/waleedka/coco
+# That's a fork from the original https://github.com/pdollar/coco with a bug
+# fix for Python 3.
+# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
+# If the PR is merged then use the original repo.
+# Note: Edit PythonAPI/Makefile and replace "python" with "python3".
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from pycocotools import mask as maskUtils
+import zipfile
+import urllib.request
+import shutil
+from config import Config
+import utils
+import model as modellib
+# Root directory of the project
+ROOT_DIR = os.getcwd()
+# Path to trained weights file
+COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
+# Directory to save logs and model checkpoints, if not provided
+# through the command line argument --logs
+DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
+DEFAULT_DATASET_YEAR = "2014"
+############################################################
+#  Configurations
+############################################################
+class CocoConfig(Config):
+    """Configuration for training on MS COCO.
+    Derives from the base Config class and overrides values specific
+    to the COCO dataset.
+    """
+    # Give the configuration a recognizable name
+    NAME = "coco"
+    # We use a GPU with 12GB memory, which can fit two images.
+    # Adjust down if you use a smaller GPU.
+    IMAGES_PER_GPU = 2
+    # Uncomment to train on 8 GPUs (default is 1)
+    # GPU_COUNT = 8
+    # Number of classes (including background)
+    NUM_CLASSES = 1 + 80  # COCO has 80 classes
+############################################################
+#  Dataset
+############################################################
+class CocoDataset(utils.Dataset):
+    def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
+                  class_map=None, return_coco=False, auto_download=False):
+        """Load a subset of the COCO dataset.
+        dataset_dir: The root directory of the COCO dataset.
+        subset: What to load (train, val, minival, valminusminival)
+        year: What dataset year to load (2014, 2017) as a string, not an integer
+        class_ids: If provided, only loads images that have the given classes.
+        class_map: TODO: Not implemented yet. Supports maping classes from
+            different datasets to the same class ID.
+        return_coco: If True, returns the COCO object.
+        auto_download: Automatically download and unzip MS-COCO images and annotations
+        """
+        if auto_download is True:
+            self.auto_download(dataset_dir, subset, year)
+        coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
+        if subset == "minival" or subset == "valminusminival":
+            subset = "val"
+        image_dir = "{}/{}{}".format(dataset_dir, subset, year)
+        # Load all classes or a subset?
+        if not class_ids:
+            # All classes
+            class_ids = sorted(coco.getCatIds())
+        # All images or a subset?
+        if class_ids:
+            image_ids = []
+            for id in class_ids:
+                image_ids.extend(list(coco.getImgIds(catIds=[id])))
+            # Remove duplicates
+            image_ids = list(set(image_ids))
+        else:
+            # All images
+            image_ids = list(coco.imgs.keys())
+        # Add classes
+        for i in class_ids:
+            self.add_class("coco", i, coco.loadCats(i)[0]["name"])
+        # Add images
+        for i in image_ids:
+            self.add_image(
+                "coco", image_id=i,
+                path=os.path.join(image_dir, coco.imgs[i]['file_name']),
+                width=coco.imgs[i]["width"],
+                height=coco.imgs[i]["height"],
+                annotations=coco.loadAnns(coco.getAnnIds(
+                    imgIds=[i], catIds=class_ids, iscrowd=None)))
+        if return_coco:
+            return coco
+    def auto_download(self, dataDir, dataType, dataYear):
+        """Download the COCO dataset/annotations if requested.
+        dataDir: The root directory of the COCO dataset.
+        dataType: What to load (train, val, minival, valminusminival)
+        dataYear: What dataset year to load (2014, 2017) as a string, not an integer
+        Note:
+            For 2014, use "train", "val", "minival", or "valminusminival"
+            For 2017, only "train" and "val" annotations are available
+        """
+        # Setup paths and file names
+        if dataType == "minival" or dataType == "valminusminival":
+            imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
+            imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
+            imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
+        else:
+            imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
+            imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
+            imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
+        # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
+        # Create main folder if it doesn't exist yet
+        if not os.path.exists(dataDir):
+            os.makedirs(dataDir)
+        # Download images if not available locally
+        if not os.path.exists(imgDir):
+            os.makedirs(imgDir)
+            print("Downloading images to " + imgZipFile + " ...")
+            with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
+                shutil.copyfileobj(resp, out)
+            print("... done downloading.")
+            print("Unzipping " + imgZipFile)
+            with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
+                zip_ref.extractall(dataDir)
+            print("... done unzipping")
+        print("Will use images in " + imgDir)
+        # Setup annotations data paths
+        annDir = "{}/annotations".format(dataDir)
+        if dataType == "minival":
+            annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
+            annFile = "{}/instances_minival2014.json".format(annDir)
+            annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
+            unZipDir = annDir
+        elif dataType == "valminusminival":
+            annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
+            annFile = "{}/instances_valminusminival2014.json".format(annDir)
+            annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
+            unZipDir = annDir
+        else:
+            annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
+            annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
+            annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
+            unZipDir = dataDir
+        # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
+        # Download annotations if not available locally
+        if not os.path.exists(annDir):
+            os.makedirs(annDir)
+        if not os.path.exists(annFile):
+            if not os.path.exists(annZipFile):
+                print("Downloading zipped annotations to " + annZipFile + " ...")
+                with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
+                    shutil.copyfileobj(resp, out)
+                print("... done downloading.")
+            print("Unzipping " + annZipFile)
+            with zipfile.ZipFile(annZipFile, "r") as zip_ref:
+                zip_ref.extractall(unZipDir)
+            print("... done unzipping")
+        print("Will use annotations in " + annFile)
+    def load_mask(self, image_id):
+        """Load instance masks for the given image.
+        Different datasets use different ways to store masks. This
+        function converts the different mask format to one format
+        in the form of a bitmap [height, width, instances].
+        Returns:
+        masks: A bool array of shape [height, width, instance count] with
+            one mask per instance.
+        class_ids: a 1D array of class IDs of the instance masks.
+        """
+        # If not a COCO image, delegate to parent class.
+        image_info = self.image_info[image_id]
+        if image_info["source"] != "coco":
+            return super(CocoDataset, self).load_mask(image_id)
+        instance_masks = []
+        class_ids = []
+        annotations = self.image_info[image_id]["annotations"]
+        # Build mask of shape [height, width, instance_count] and list
+        # of class IDs that correspond to each channel of the mask.
+        for annotation in annotations:
+            class_id = self.map_source_class_id(
+                "coco.{}".format(annotation['category_id']))
+            if class_id:
+                m = self.annToMask(annotation, image_info["height"],
+                                   image_info["width"])
+                # Some objects are so small that they're less than 1 pixel area
+                # and end up rounded out. Skip those objects.
+                if m.max() < 1:
+                    continue
+                # Is it a crowd? If so, use a negative class ID.
+                if annotation['iscrowd']:
+                    # Use negative class ID for crowds
+                    class_id *= -1
+                    # For crowd masks, annToMask() sometimes returns a mask
+                    # smaller than the given dimensions. If so, resize it.
+                    if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
+                        m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
+                instance_masks.append(m)
+                class_ids.append(class_id)
+        # Pack instance masks into an array
+        if class_ids:
+            mask = np.stack(instance_masks, axis=2)
+            class_ids = np.array(class_ids, dtype=np.int32)
+            return mask, class_ids
+        else:
+            # Call super class to return an empty mask
+            return super(CocoDataset, self).load_mask(image_id)
+    def image_reference(self, image_id):
+        """Return a link to the image in the COCO Website."""
+        info = self.image_info[image_id]
+        if info["source"] == "coco":
+            return "http://cocodataset.org/#explore?id={}".format(info["id"])
+        else:
+            super(CocoDataset, self).image_reference(image_id)
+    # The following two functions are from pycocotools with a few changes.
+    def annToRLE(self, ann, height, width):
+        """
+        Convert annotation which can be polygons, uncompressed RLE to RLE.
+        :return: binary mask (numpy 2D array)
+        """
+        segm = ann['segmentation']
+        if isinstance(segm, list):
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(segm, height, width)
+            rle = maskUtils.merge(rles)
+        elif isinstance(segm['counts'], list):
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(segm, height, width)
+        else:
+            # rle
+            rle = ann['segmentation']
+        return rle
+    def annToMask(self, ann, height, width):
+        """
+        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
+        :return: binary mask (numpy 2D array)
+        """
+        rle = self.annToRLE(ann, height, width)
+        m = maskUtils.decode(rle)
+        return m
+############################################################
+#  COCO Evaluation
+############################################################
+def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
+    """Arrange resutls to match COCO specs in http://cocodataset.org/#format
+    """
+    # If no results, return an empty list
+    if rois is None:
+        return []
+    results = []
+    for image_id in image_ids:
+        # Loop through detections
+        for i in range(rois.shape[0]):
+            class_id = class_ids[i]
+            score = scores[i]
+            bbox = np.around(rois[i], 1)
+            mask = masks[:, :, i]
+            result = {
+                "image_id": image_id,
+                "category_id": dataset.get_source_class_id(class_id, "coco"),
+                "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
+                "score": score,
+                "segmentation": maskUtils.encode(np.asfortranarray(mask))
+            }
+            results.append(result)
+    return results
+def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
+    """Runs official COCO evaluation.
+    dataset: A Dataset object with valiadtion data
+    eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
+    limit: if not 0, it's the number of images to use for evaluation
+    """
+    # Pick COCO images from the dataset
+    image_ids = image_ids or dataset.image_ids
+    # Limit to a subset
+    if limit:
+        image_ids = image_ids[:limit]
+    # Get corresponding COCO image IDs.
+    coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
+    t_prediction = 0
+    t_start = time.time()
+    results = []
+    for i, image_id in enumerate(image_ids):
+        # Load image
+        image = dataset.load_image(image_id)
+        # Run detection
+        t = time.time()
+        r = model.detect([image], verbose=0)[0]
+        t_prediction += (time.time() - t)
+        # Convert results to COCO format
+        image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
+                                           r["rois"], r["class_ids"],
+                                           r["scores"], r["masks"])
+        results.extend(image_results)
+    # Load results. This modifies results with additional attributes.
+    coco_results = coco.loadRes(results)
+    # Evaluate
+    cocoEval = COCOeval(coco, coco_results, eval_type)
+    cocoEval.params.imgIds = coco_image_ids
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    cocoEval.summarize()
+    print("Prediction time: {}. Average {}/image".format(
+        t_prediction, t_prediction / len(image_ids)))
+    print("Total time: ", time.time() - t_start)
+############################################################
+#  Training
+############################################################
+if __name__ == '__main__':
+    import argparse
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description='Train Mask R-CNN on MS COCO.')
+    parser.add_argument("command",
+                        metavar="<command>",
+                        help="'train' or 'evaluate' on MS COCO")
+    parser.add_argument('--dataset', required=True,
+                        metavar="/path/to/coco/",
+                        help='Directory of the MS-COCO dataset')
+    parser.add_argument('--year', required=False,
+                        default=DEFAULT_DATASET_YEAR,
+                        metavar="<year>",
+                        help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
+    parser.add_argument('--model', required=True,
+                        metavar="/path/to/weights.h5",
+                        help="Path to weights .h5 file or 'coco'")
+    parser.add_argument('--logs', required=False,
+                        default=DEFAULT_LOGS_DIR,
+                        metavar="/path/to/logs/",
+                        help='Logs and checkpoints directory (default=logs/)')
+    parser.add_argument('--limit', required=False,
+                        default=500,
+                        metavar="<image count>",
+                        help='Images to use for evaluation (default=500)')
+    parser.add_argument('--download', required=False,
+                        default=False,
+                        metavar="<True|False>",
+                        help='Automatically download and unzip MS-COCO files (default=False)',
+                        type=bool)
+    args = parser.parse_args()
+    print("Command: ", args.command)
+    print("Model: ", args.model)
+    print("Dataset: ", args.dataset)
+    print("Year: ", args.year)
+    print("Logs: ", args.logs)
+    print("Auto Download: ", args.download)
+    # Configurations
+    if args.command == "train":
+        config = CocoConfig()
+    else:
+        class InferenceConfig(CocoConfig):
+            # Set batch size to 1 since we'll be running inference on
+            # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
+            GPU_COUNT = 1
+            IMAGES_PER_GPU = 1
+            DETECTION_MIN_CONFIDENCE = 0
+        config = InferenceConfig()
+    config.display()
+    # Create model
+    if args.command == "train":
+        model = modellib.MaskRCNN(mode="training", config=config,
+                                  model_dir=args.logs)
+    else:
+        model = modellib.MaskRCNN(mode="inference", config=config,
+                                  model_dir=args.logs)
+    # Select weights file to load
+    if args.model.lower() == "coco":
+        model_path = COCO_MODEL_PATH
+    elif args.model.lower() == "last":
+        # Find last trained weights
+        model_path = model.find_last()[1]
+    elif args.model.lower() == "imagenet":
+        # Start from ImageNet trained weights
+        model_path = model.get_imagenet_weights()
+    else:
+        model_path = args.model
+    # Load weights
+    print("Loading weights ", model_path)
+    model.load_weights(model_path, by_name=True)
+    # Train or evaluate
+    if args.command == "train":
+        # Training dataset. Use the training set and 35K from the
+        # validation set, as as in the Mask RCNN paper.
+        dataset_train = CocoDataset()
+        dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
+        dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
+        dataset_train.prepare()
+        # Validation dataset
+        dataset_val = CocoDataset()
+        dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
+        dataset_val.prepare()
+        # *** This training schedule is an example. Update to your needs ***
+        # Training - Stage 1
+        print("Training network heads")
+        model.train(dataset_train, dataset_val,
+                    learning_rate=config.LEARNING_RATE,
+                    epochs=40,
+                    layers='heads')
+        # Training - Stage 2
+        # Finetune layers from ResNet stage 4 and up
+        print("Fine tune Resnet stage 4 and up")
+        model.train(dataset_train, dataset_val,
+                    learning_rate=config.LEARNING_RATE,
+                    epochs=120,
+                    layers='4+')
+        # Training - Stage 3
+        # Fine tune all layers
+        print("Fine tune all layers")
+        model.train(dataset_train, dataset_val,
+                    learning_rate=config.LEARNING_RATE / 10,
+                    epochs=160,
+                    layers='all')
+    elif args.command == "evaluate":
+        # Validation dataset
+        dataset_val = CocoDataset()
+        coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
+        dataset_val.prepare()
+        print("Running COCO evaluation on {} images.".format(args.limit))
+        evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
+    else:
+        print("'{}' is not recognized. "
+              "Use 'train' or 'evaluate'".format(args.command))

config.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""
+Mask R-CNN
+Base Configurations class.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+import math
+import numpy as np
+# Base Configuration Class
+# Don't use this class directly. Instead, sub-class it and override
+# the configurations you need to change.
+class Config(object):
+    """Base configuration class. For custom configurations, create a
+    sub-class that inherits from this one and override properties
+    that need to be changed.
+    """
+    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
+    # Useful if your code needs to do things differently depending on which
+    # experiment is running.
+    NAME = None  # Override in sub-classes
+    # NUMBER OF GPUs to use. For CPU training, use 1
+    GPU_COUNT = 1
+    # Number of images to train with on each GPU. A 12GB GPU can typically
+    # handle 2 images of 1024x1024px.
+    # Adjust based on your GPU memory and image sizes. Use the highest
+    # number that your GPU can handle for best performance.
+    IMAGES_PER_GPU = 2
+    # Number of training steps per epoch
+    # This doesn't need to match the size of the training set. Tensorboard
+    # updates are saved at the end of each epoch, so setting this to a
+    # smaller number means getting more frequent TensorBoard updates.
+    # Validation stats are also calculated at each epoch end and they
+    # might take a while, so don't set this too small to avoid spending
+    # a lot of time on validation stats.
+    STEPS_PER_EPOCH = 1000
+    # Number of validation steps to run at the end of every training epoch.
+    # A bigger number improves accuracy of validation stats, but slows
+    # down the training.
+    VALIDATION_STEPS = 50
+    # Backbone network architecture
+    # Supported values are: resnet50, resnet101
+    BACKBONE = "resnet101"
+    # The strides of each layer of the FPN Pyramid. These values
+    # are based on a Resnet101 backbone.
+    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
+    # Number of classification classes (including background)
+    NUM_CLASSES = 1  # Override in sub-classes
+    # Length of square anchor side in pixels
+    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
+    # Ratios of anchors at each cell (width/height)
+    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
+    RPN_ANCHOR_RATIOS = [0.5, 1, 2]
+    # Anchor stride
+    # If 1 then anchors are created for each cell in the backbone feature map.
+    # If 2, then anchors are created for every other cell, and so on.
+    RPN_ANCHOR_STRIDE = 1
+    # Non-max suppression threshold to filter RPN proposals.
+    # You can reduce this during training to generate more propsals.
+    RPN_NMS_THRESHOLD = 0.7
+    # How many anchors per image to use for RPN training
+    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
+    # ROIs kept after non-maximum supression (training and inference)
+    POST_NMS_ROIS_TRAINING = 2000
+    POST_NMS_ROIS_INFERENCE = 1000
+    # If enabled, resizes instance masks to a smaller size to reduce
+    # memory load. Recommended when using high-resolution images.
+    USE_MINI_MASK = True
+    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
+    # Input image resing
+    # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
+    # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
+    # be satisfied together the IMAGE_MAX_DIM is enforced.
+    IMAGE_MIN_DIM = 800
+    IMAGE_MAX_DIM = 1024
+    # If True, pad images with zeros such that they're (max_dim by max_dim)
+    IMAGE_PADDING = True  # currently, the False option is not supported
+    # Image mean (RGB)
+    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
+    # Number of ROIs per image to feed to classifier/mask heads
+    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
+    # enough positive proposals to fill this and keep a positive:negative
+    # ratio of 1:3. You can increase the number of proposals by adjusting
+    # the RPN NMS threshold.
+    TRAIN_ROIS_PER_IMAGE = 200
+    # Percent of positive ROIs used to train classifier/mask heads
+    ROI_POSITIVE_RATIO = 0.33
+    # Pooled ROIs
+    POOL_SIZE = 7
+    MASK_POOL_SIZE = 14
+    MASK_SHAPE = [28, 28]
+    # Maximum number of ground truth instances to use in one image
+    MAX_GT_INSTANCES = 100
+    # Bounding box refinement standard deviation for RPN and final detections.
+    RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+    # Max number of final detections
+    DETECTION_MAX_INSTANCES = 100
+    # Minimum probability value to accept a detected instance
+    # ROIs below this threshold are skipped
+    DETECTION_MIN_CONFIDENCE = 0.7
+    # Non-maximum suppression threshold for detection
+    DETECTION_NMS_THRESHOLD = 0.3
+    # Learning rate and momentum
+    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
+    # weights to explode. Likely due to differences in optimzer
+    # implementation.
+    LEARNING_RATE = 0.001
+    LEARNING_MOMENTUM = 0.9
+    # Weight decay regularization
+    WEIGHT_DECAY = 0.0001
+    # Use RPN ROIs or externally generated ROIs for training
+    # Keep this True for most situations. Set to False if you want to train
+    # the head branches on ROI generated by code rather than the ROIs from
+    # the RPN. For example, to debug the classifier head without having to
+    # train the RPN.
+    USE_RPN_ROIS = True
+    def __init__(self):
+        """Set values of computed attributes."""
+        # Effective batch size
+        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
+        # Input image size
+        self.IMAGE_SHAPE = np.array(
+            [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
+        # Compute backbone size from input image size
+        self.BACKBONE_SHAPES = np.array(
+            [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
+              int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
+             for stride in self.BACKBONE_STRIDES])
+    def display(self):
+        """Display Configuration values."""
+        print("\nConfigurations:")
+        for a in dir(self):
+            if not a.startswith("__") and not callable(getattr(self, a)):
+                print("{:30} {}".format(a, getattr(self, a)))
+        print("\n")

model.py ADDED Viewed

The diff for this file is too large to render. See raw diff

parallel_model.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+Mask R-CNN
+Multi-GPU Support for Keras.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+Ideas and a small code snippets from these sources:
+https://github.com/fchollet/keras/issues/2436
+https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
+https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
+https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
+"""
+import tensorflow as tf
+import keras.backend as K
+import keras.layers as KL
+import keras.models as KM
+class ParallelModel(KM.Model):
+    """Subclasses the standard Keras Model and adds multi-GPU support.
+    It works by creating a copy of the model on each GPU. Then it slices
+    the inputs and sends a slice to each copy of the model, and then
+    merges the outputs together and applies the loss on the combined
+    outputs.
+    """
+    def __init__(self, keras_model, gpu_count):
+        """Class constructor.
+        keras_model: The Keras model to parallelize
+        gpu_count: Number of GPUs. Must be > 1
+        """
+        self.inner_model = keras_model
+        self.gpu_count = gpu_count
+        merged_outputs = self.make_parallel()
+        super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
+                                            outputs=merged_outputs)
+    def __getattribute__(self, attrname):
+        """Redirect loading and saving methods to the inner model. That's where
+        the weights are stored."""
+        if 'load' in attrname or 'save' in attrname:
+            return getattr(self.inner_model, attrname)
+        return super(ParallelModel, self).__getattribute__(attrname)
+    def summary(self, *args, **kwargs):
+        """Override summary() to display summaries of both, the wrapper
+        and inner models."""
+        super(ParallelModel, self).summary(*args, **kwargs)
+        self.inner_model.summary(*args, **kwargs)
+    def make_parallel(self):
+        """Creates a new wrapper model that consists of multiple replicas of
+        the original model placed on different GPUs.
+        """
+        # Slice inputs. Slice inputs on the CPU to avoid sending a copy
+        # of the full inputs to all GPUs. Saves on bandwidth and memory.
+        input_slices = {name: tf.split(x, self.gpu_count)
+                        for name, x in zip(self.inner_model.input_names,
+                                           self.inner_model.inputs)}
+        output_names = self.inner_model.output_names
+        outputs_all = []
+        for i in range(len(self.inner_model.outputs)):
+            outputs_all.append([])
+        # Run the model call() on each GPU to place the ops there
+        for i in range(self.gpu_count):
+            with tf.device('/gpu:%d' % i):
+                with tf.name_scope('tower_%d' % i):
+                    # Run a slice of inputs through this replica
+                    zipped_inputs = zip(self.inner_model.input_names,
+                                        self.inner_model.inputs)
+                    inputs = [
+                        KL.Lambda(lambda s: input_slices[name][i],
+                                  output_shape=lambda s: (None,) + s[1:])(tensor)
+                        for name, tensor in zipped_inputs]
+                    # Create the model replica and get the outputs
+                    outputs = self.inner_model(inputs)
+                    if not isinstance(outputs, list):
+                        outputs = [outputs]
+                    # Save the outputs for merging back together later
+                    for l, o in enumerate(outputs):
+                        outputs_all[l].append(o)
+        # Merge outputs on CPU
+        with tf.device('/cpu:0'):
+            merged = []
+            for outputs, name in zip(outputs_all, output_names):
+                # If outputs are numbers without dimensions, add a batch dim.
+                def add_dim(tensor):
+                    """Add a dimension to tensors that don't have any."""
+                    if K.int_shape(tensor) == ():
+                        return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
+                    return tensor
+                outputs = list(map(add_dim, outputs))
+                # Concatenate
+                merged.append(KL.Concatenate(axis=0, name=name)(outputs))
+        return merged
+if __name__ == "__main__":
+    # Testing code below. It creates a simple model to train on MNIST and
+    # tries to run it on 2 GPUs. It saves the graph so it can be viewed
+    # in TensorBoard. Run it as:
+    #
+    # python3 parallel_model.py
+    import os
+    import numpy as np
+    import keras.optimizers
+    from keras.datasets import mnist
+    from keras.preprocessing.image import ImageDataGenerator
+    GPU_COUNT = 2
+    # Root directory of the project
+    ROOT_DIR = os.getcwd()
+    # Directory to save logs and trained model
+    MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
+    def build_model(x_train, num_classes):
+        # Reset default graph. Keras leaves old ops in the graph,
+        # which are ignored for execution but clutter graph
+        # visualization in TensorBoard.
+        tf.reset_default_graph()
+        inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
+        x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
+                      name="conv1")(inputs)
+        x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
+                      name="conv2")(x)
+        x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
+        x = KL.Flatten(name="flat1")(x)
+        x = KL.Dense(128, activation='relu', name="dense1")(x)
+        x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
+        return KM.Model(inputs, x, "digit_classifier_model")
+    # Load MNIST Data
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+    x_train = np.expand_dims(x_train, -1).astype('float32') / 255
+    x_test = np.expand_dims(x_test, -1).astype('float32') / 255
+    print('x_train shape:', x_train.shape)
+    print('x_test shape:', x_test.shape)
+    # Build data generator and model
+    datagen = ImageDataGenerator()
+    model = build_model(x_train, 10)
+    # Add multi-GPU support.
+    model = ParallelModel(model, GPU_COUNT)
+    optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
+    model.compile(loss='sparse_categorical_crossentropy',
+                  optimizer=optimizer, metrics=['accuracy'])
+    model.summary()
+    # Train
+    model.fit_generator(
+        datagen.flow(x_train, y_train, batch_size=64),
+        steps_per_epoch=50, epochs=10, verbose=1,
+        validation_data=(x_test, y_test),
+        callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
+                                               write_graph=True)]
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+numpy==1.21.6
+scipy==1.2.2
+Pillow==9.5.0
+Cython==0.29.34
+matplotlib==3.5.3
+scikit-image==0.19.3
+tensorflow==1.13.1
+keras==2.0.8
+opencv-python-headless==4.7.0.72
+h5py==2.10.0
+imgaug==0.4.0
+ipython==7.34.0
+imageio==2.9.0
+requests==2.27.*
+uvloop==0.15.2
+uvicorn==0.13.4
+httptools==0.2.0
+fastapi==0.74.*
+gunicorn==20.1.0

shapes.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""
+Mask R-CNN
+Configurations and data loading code for the synthetic Shapes dataset.
+This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
+import into other notebooks, such as inspect_model.ipynb.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+import math
+import random
+import numpy as np
+import cv2
+from config import Config
+import utils
+class ShapesConfig(Config):
+    """Configuration for training on the toy shapes dataset.
+    Derives from the base Config class and overrides values specific
+    to the toy shapes dataset.
+    """
+    # Give the configuration a recognizable name
+    NAME = "shapes"
+    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
+    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
+    GPU_COUNT = 1
+    IMAGES_PER_GPU = 8
+    # Number of classes (including background)
+    NUM_CLASSES = 1 + 3  # background + 3 shapes
+    # Use small images for faster training. Set the limits of the small side
+    # the large side, and that determines the image shape.
+    IMAGE_MIN_DIM = 128
+    IMAGE_MAX_DIM = 128
+    # Use smaller anchors because our image and objects are small
+    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
+    # Reduce training ROIs per image because the images are small and have
+    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
+    TRAIN_ROIS_PER_IMAGE = 32
+    # Use a small epoch since the data is simple
+    STEPS_PER_EPOCH = 100
+    # use small validation steps since the epoch is small
+    VALIDATION_STEPS = 5
+class ShapesDataset(utils.Dataset):
+    """Generates the shapes synthetic dataset. The dataset consists of simple
+    shapes (triangles, squares, circles) placed randomly on a blank surface.
+    The images are generated on the fly. No file access required.
+    """
+    def load_shapes(self, count, height, width):
+        """Generate the requested number of synthetic images.
+        count: number of images to generate.
+        height, width: the size of the generated images.
+        """
+        # Add classes
+        self.add_class("shapes", 1, "square")
+        self.add_class("shapes", 2, "circle")
+        self.add_class("shapes", 3, "triangle")
+        # Add images
+        # Generate random specifications of images (i.e. color and
+        # list of shapes sizes and locations). This is more compact than
+        # actual images. Images are generated on the fly in load_image().
+        for i in range(count):
+            bg_color, shapes = self.random_image(height, width)
+            self.add_image("shapes", image_id=i, path=None,
+                           width=width, height=height,
+                           bg_color=bg_color, shapes=shapes)
+    def load_image(self, image_id):
+        """Generate an image from the specs of the given image ID.
+        Typically this function loads the image from a file, but
+        in this case it generates the image on the fly from the
+        specs in image_info.
+        """
+        info = self.image_info[image_id]
+        bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
+        image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
+        image = image * bg_color.astype(np.uint8)
+        for shape, color, dims in info['shapes']:
+            image = self.draw_shape(image, shape, dims, color)
+        return image
+    def image_reference(self, image_id):
+        """Return the shapes data of the image."""
+        info = self.image_info[image_id]
+        if info["source"] == "shapes":
+            return info["shapes"]
+        else:
+            super(self.__class__).image_reference(self, image_id)
+    def load_mask(self, image_id):
+        """Generate instance masks for shapes of the given image ID.
+        """
+        info = self.image_info[image_id]
+        shapes = info['shapes']
+        count = len(shapes)
+        mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
+        for i, (shape, _, dims) in enumerate(info['shapes']):
+            mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
+                                                  shape, dims, 1)
+        # Handle occlusions
+        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
+        for i in range(count - 2, -1, -1):
+            mask[:, :, i] = mask[:, :, i] * occlusion
+            occlusion = np.logical_and(
+                occlusion, np.logical_not(mask[:, :, i]))
+        # Map class names to class IDs.
+        class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
+        return mask, class_ids.astype(np.int32)
+    def draw_shape(self, image, shape, dims, color):
+        """Draws a shape from the given specs."""
+        # Get the center x, y and the size s
+        x, y, s = dims
+        if shape == 'square':
+            image = cv2.rectangle(image, (x - s, y - s),
+                                  (x + s, y + s), color, -1)
+        elif shape == "circle":
+            image = cv2.circle(image, (x, y), s, color, -1)
+        elif shape == "triangle":
+            points = np.array([[(x, y - s),
+                                (x - s / math.sin(math.radians(60)), y + s),
+                                (x + s / math.sin(math.radians(60)), y + s),
+                                ]], dtype=np.int32)
+            image = cv2.fillPoly(image, points, color)
+        return image
+    def random_shape(self, height, width):
+        """Generates specifications of a random shape that lies within
+        the given height and width boundaries.
+        Returns a tuple of three valus:
+        * The shape name (square, circle, ...)
+        * Shape color: a tuple of 3 values, RGB.
+        * Shape dimensions: A tuple of values that define the shape size
+                            and location. Differs per shape type.
+        """
+        # Shape
+        shape = random.choice(["square", "circle", "triangle"])
+        # Color
+        color = tuple([random.randint(0, 255) for _ in range(3)])
+        # Center x, y
+        buffer = 20
+        y = random.randint(buffer, height - buffer - 1)
+        x = random.randint(buffer, width - buffer - 1)
+        # Size
+        s = random.randint(buffer, height // 4)
+        return shape, color, (x, y, s)
+    def random_image(self, height, width):
+        """Creates random specifications of an image with multiple shapes.
+        Returns the background color of the image and a list of shape
+        specifications that can be used to draw the image.
+        """
+        # Pick random background color
+        bg_color = np.array([random.randint(0, 255) for _ in range(3)])
+        # Generate a few random shapes and record their
+        # bounding boxes
+        shapes = []
+        boxes = []
+        N = random.randint(1, 4)
+        for _ in range(N):
+            shape, color, dims = self.random_shape(height, width)
+            shapes.append((shape, color, dims))
+            x, y, s = dims
+            boxes.append([y - s, x - s, y + s, x + s])
+        # Apply non-max suppression wit 0.3 threshold to avoid
+        # shapes covering each other
+        keep_ixs = utils.non_max_suppression(
+            np.array(boxes), np.arange(N), 0.3)
+        shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
+        return bg_color, shapes

utils.py ADDED Viewed

	@@ -0,0 +1,736 @@

+"""
+Mask R-CNN
+Common utility functions and classes.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+import sys
+import os
+import math
+import random
+import numpy as np
+import tensorflow as tf
+import scipy.misc
+import skimage.color
+import skimage.io
+import urllib.request
+import shutil
+# URL from which to download the latest COCO trained weights
+COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
+############################################################
+#  Bounding Boxes
+############################################################
+def extract_bboxes(mask):
+    """Compute bounding boxes from masks.
+    mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
+    Returns: bbox array [num_instances, (y1, x1, y2, x2)].
+    """
+    boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
+    for i in range(mask.shape[-1]):
+        m = mask[:, :, i]
+        # Bounding box.
+        horizontal_indicies = np.where(np.any(m, axis=0))[0]
+        vertical_indicies = np.where(np.any(m, axis=1))[0]
+        if horizontal_indicies.shape[0]:
+            x1, x2 = horizontal_indicies[[0, -1]]
+            y1, y2 = vertical_indicies[[0, -1]]
+            # x2 and y2 should not be part of the box. Increment by 1.
+            x2 += 1
+            y2 += 1
+        else:
+            # No mask for this instance. Might happen due to
+            # resizing or cropping. Set bbox to zeros
+            x1, x2, y1, y2 = 0, 0, 0, 0
+        boxes[i] = np.array([y1, x1, y2, x2])
+    return boxes.astype(np.int32)
+def compute_iou(box, boxes, box_area, boxes_area):
+    """Calculates IoU of the given box with the array of the given boxes.
+    box: 1D vector [y1, x1, y2, x2]
+    boxes: [boxes_count, (y1, x1, y2, x2)]
+    box_area: float. the area of 'box'
+    boxes_area: array of length boxes_count.
+    Note: the areas are passed in rather than calculated here for
+          efficency. Calculate once in the caller to avoid duplicate work.
+    """
+    # Calculate intersection areas
+    y1 = np.maximum(box[0], boxes[:, 0])
+    y2 = np.minimum(box[2], boxes[:, 2])
+    x1 = np.maximum(box[1], boxes[:, 1])
+    x2 = np.minimum(box[3], boxes[:, 3])
+    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
+    union = box_area + boxes_area[:] - intersection[:]
+    iou = intersection / union
+    return iou
+def compute_overlaps(boxes1, boxes2):
+    """Computes IoU overlaps between two sets of boxes.
+    boxes1, boxes2: [N, (y1, x1, y2, x2)].
+    For better performance, pass the largest set first and the smaller second.
+    """
+    # Areas of anchors and GT boxes
+    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
+    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
+    # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
+    # Each cell contains the IoU value.
+    overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
+    for i in range(overlaps.shape[1]):
+        box2 = boxes2[i]
+        overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
+    return overlaps
+def compute_overlaps_masks(masks1, masks2):
+    '''Computes IoU overlaps between two sets of masks.
+    masks1, masks2: [Height, Width, instances]
+    '''
+    # flatten masks
+    masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
+    masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
+    area1 = np.sum(masks1, axis=0)
+    area2 = np.sum(masks2, axis=0)
+    # intersections and union
+    intersections = np.dot(masks1.T, masks2)
+    union = area1[:, None] + area2[None, :] - intersections
+    overlaps = intersections / union
+    return overlaps
+def non_max_suppression(boxes, scores, threshold):
+    """Performs non-maximum supression and returns indicies of kept boxes.
+    boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
+    scores: 1-D array of box scores.
+    threshold: Float. IoU threshold to use for filtering.
+    """
+    assert boxes.shape[0] > 0
+    if boxes.dtype.kind != "f":
+        boxes = boxes.astype(np.float32)
+    # Compute box areas
+    y1 = boxes[:, 0]
+    x1 = boxes[:, 1]
+    y2 = boxes[:, 2]
+    x2 = boxes[:, 3]
+    area = (y2 - y1) * (x2 - x1)
+    # Get indicies of boxes sorted by scores (highest first)
+    ixs = scores.argsort()[::-1]
+    pick = []
+    while len(ixs) > 0:
+        # Pick top box and add its index to the list
+        i = ixs[0]
+        pick.append(i)
+        # Compute IoU of the picked box with the rest
+        iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
+        # Identify boxes with IoU over the threshold. This
+        # returns indicies into ixs[1:], so add 1 to get
+        # indicies into ixs.
+        remove_ixs = np.where(iou > threshold)[0] + 1
+        # Remove indicies of the picked and overlapped boxes.
+        ixs = np.delete(ixs, remove_ixs)
+        ixs = np.delete(ixs, 0)
+    return np.array(pick, dtype=np.int32)
+def apply_box_deltas(boxes, deltas):
+    """Applies the given deltas to the given boxes.
+    boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
+    deltas: [N, (dy, dx, log(dh), log(dw))]
+    """
+    boxes = boxes.astype(np.float32)
+    # Convert to y, x, h, w
+    height = boxes[:, 2] - boxes[:, 0]
+    width = boxes[:, 3] - boxes[:, 1]
+    center_y = boxes[:, 0] + 0.5 * height
+    center_x = boxes[:, 1] + 0.5 * width
+    # Apply deltas
+    center_y += deltas[:, 0] * height
+    center_x += deltas[:, 1] * width
+    height *= np.exp(deltas[:, 2])
+    width *= np.exp(deltas[:, 3])
+    # Convert back to y1, x1, y2, x2
+    y1 = center_y - 0.5 * height
+    x1 = center_x - 0.5 * width
+    y2 = y1 + height
+    x2 = x1 + width
+    return np.stack([y1, x1, y2, x2], axis=1)
+def box_refinement_graph(box, gt_box):
+    """Compute refinement needed to transform box to gt_box.
+    box and gt_box are [N, (y1, x1, y2, x2)]
+    """
+    box = tf.cast(box, tf.float32)
+    gt_box = tf.cast(gt_box, tf.float32)
+    height = box[:, 2] - box[:, 0]
+    width = box[:, 3] - box[:, 1]
+    center_y = box[:, 0] + 0.5 * height
+    center_x = box[:, 1] + 0.5 * width
+    gt_height = gt_box[:, 2] - gt_box[:, 0]
+    gt_width = gt_box[:, 3] - gt_box[:, 1]
+    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
+    gt_center_x = gt_box[:, 1] + 0.5 * gt_width
+    dy = (gt_center_y - center_y) / height
+    dx = (gt_center_x - center_x) / width
+    dh = tf.log(gt_height / height)
+    dw = tf.log(gt_width / width)
+    result = tf.stack([dy, dx, dh, dw], axis=1)
+    return result
+def box_refinement(box, gt_box):
+    """Compute refinement needed to transform box to gt_box.
+    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
+    assumed to be outside the box.
+    """
+    box = box.astype(np.float32)
+    gt_box = gt_box.astype(np.float32)
+    height = box[:, 2] - box[:, 0]
+    width = box[:, 3] - box[:, 1]
+    center_y = box[:, 0] + 0.5 * height
+    center_x = box[:, 1] + 0.5 * width
+    gt_height = gt_box[:, 2] - gt_box[:, 0]
+    gt_width = gt_box[:, 3] - gt_box[:, 1]
+    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
+    gt_center_x = gt_box[:, 1] + 0.5 * gt_width
+    dy = (gt_center_y - center_y) / height
+    dx = (gt_center_x - center_x) / width
+    dh = np.log(gt_height / height)
+    dw = np.log(gt_width / width)
+    return np.stack([dy, dx, dh, dw], axis=1)
+############################################################
+#  Dataset
+############################################################
+class Dataset(object):
+    """The base class for dataset classes.
+    To use it, create a new class that adds functions specific to the dataset
+    you want to use. For example:
+    class CatsAndDogsDataset(Dataset):
+        def load_cats_and_dogs(self):
+            ...
+        def load_mask(self, image_id):
+            ...
+        def image_reference(self, image_id):
+            ...
+    See COCODataset and ShapesDataset as examples.
+    """
+    def __init__(self, class_map=None):
+        self._image_ids = []
+        self.image_info = []
+        # Background is always the first class
+        self.class_info = [{"source": "", "id": 0, "name": "BG"}]
+        self.source_class_ids = {}
+    def add_class(self, source, class_id, class_name):
+        assert "." not in source, "Source name cannot contain a dot"
+        # Does the class exist already?
+        for info in self.class_info:
+            if info['source'] == source and info["id"] == class_id:
+                # source.class_id combination already available, skip
+                return
+        # Add the class
+        self.class_info.append({
+            "source": source,
+            "id": class_id,
+            "name": class_name,
+        })
+    def add_image(self, source, image_id, path, **kwargs):
+        image_info = {
+            "id": image_id,
+            "source": source,
+            "path": path,
+        }
+        image_info.update(kwargs)
+        self.image_info.append(image_info)
+    def image_reference(self, image_id):
+        """Return a link to the image in its source Website or details about
+        the image that help looking it up or debugging it.
+        Override for your dataset, but pass to this function
+        if you encounter images not in your dataset.
+        """
+        return ""
+    def prepare(self, class_map=None):
+        """Prepares the Dataset class for use.
+        TODO: class map is not supported yet. When done, it should handle mapping
+              classes from different datasets to the same class ID.
+        """
+        def clean_name(name):
+            """Returns a shorter version of object names for cleaner display."""
+            return ",".join(name.split(",")[:1])
+        # Build (or rebuild) everything else from the info dicts.
+        self.num_classes = len(self.class_info)
+        self.class_ids = np.arange(self.num_classes)
+        self.class_names = [clean_name(c["name"]) for c in self.class_info]
+        self.num_images = len(self.image_info)
+        self._image_ids = np.arange(self.num_images)
+        self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
+                                      for info, id in zip(self.class_info, self.class_ids)}
+        # Map sources to class_ids they support
+        self.sources = list(set([i['source'] for i in self.class_info]))
+        self.source_class_ids = {}
+        # Loop over datasets
+        for source in self.sources:
+            self.source_class_ids[source] = []
+            # Find classes that belong to this dataset
+            for i, info in enumerate(self.class_info):
+                # Include BG class in all datasets
+                if i == 0 or source == info['source']:
+                    self.source_class_ids[source].append(i)
+    def map_source_class_id(self, source_class_id):
+        """Takes a source class ID and returns the int class ID assigned to it.
+        For example:
+        dataset.map_source_class_id("coco.12") -> 23
+        """
+        return self.class_from_source_map[source_class_id]
+    def get_source_class_id(self, class_id, source):
+        """Map an internal class ID to the corresponding class ID in the source dataset."""
+        info = self.class_info[class_id]
+        assert info['source'] == source
+        return info['id']
+    def append_data(self, class_info, image_info):
+        self.external_to_class_id = {}
+        for i, c in enumerate(self.class_info):
+            for ds, id in c["map"]:
+                self.external_to_class_id[ds + str(id)] = i
+        # Map external image IDs to internal ones.
+        self.external_to_image_id = {}
+        for i, info in enumerate(self.image_info):
+            self.external_to_image_id[info["ds"] + str(info["id"])] = i
+    @property
+    def image_ids(self):
+        return self._image_ids
+    def source_image_link(self, image_id):
+        """Returns the path or URL to the image.
+        Override this to return a URL to the image if it's availble online for easy
+        debugging.
+        """
+        return self.image_info[image_id]["path"]
+    def load_image(self, image_id):
+        """Load the specified image and return a [H,W,3] Numpy array.
+        """
+        # Load image
+        image = skimage.io.imread(self.image_info[image_id]['path'])
+        # If grayscale. Convert to RGB for consistency.
+        if image.ndim != 3:
+            image = skimage.color.gray2rgb(image)
+        return image
+    def load_mask(self, image_id):
+        """Load instance masks for the given image.
+        Different datasets use different ways to store masks. Override this
+        method to load instance masks and return them in the form of am
+        array of binary masks of shape [height, width, instances].
+        Returns:
+            masks: A bool array of shape [height, width, instance count] with
+                a binary mask per instance.
+            class_ids: a 1D array of class IDs of the instance masks.
+        """
+        # Override this function to load a mask from your dataset.
+        # Otherwise, it returns an empty mask.
+        mask = np.empty([0, 0, 0])
+        class_ids = np.empty([0], np.int32)
+        return mask, class_ids
+def resize_image(image, min_dim=None, max_dim=None, padding=False):
+    """
+    Resizes an image keeping the aspect ratio.
+    min_dim: if provided, resizes the image such that it's smaller
+        dimension == min_dim
+    max_dim: if provided, ensures that the image longest side doesn't
+        exceed this value.
+    padding: If true, pads image with zeros so it's size is max_dim x max_dim
+    Returns:
+    image: the resized image
+    window: (y1, x1, y2, x2). If max_dim is provided, padding might
+        be inserted in the returned image. If so, this window is the
+        coordinates of the image part of the full image (excluding
+        the padding). The x2, y2 pixels are not included.
+    scale: The scale factor used to resize the image
+    padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
+    """
+    # Default window (y1, x1, y2, x2) and default scale == 1.
+    h, w = image.shape[:2]
+    window = (0, 0, h, w)
+    scale = 1
+    # Scale?
+    if min_dim:
+        # Scale up but not down
+        scale = max(1, min_dim / min(h, w))
+    # Does it exceed max dim?
+    if max_dim:
+        image_max = max(h, w)
+        if round(image_max * scale) > max_dim:
+            scale = max_dim / image_max
+    # Resize image and mask
+    if scale != 1:
+        image = scipy.misc.imresize(
+            image, (round(h * scale), round(w * scale)))
+    # Need padding?
+    if padding:
+        # Get new height and width
+        h, w = image.shape[:2]
+        top_pad = (max_dim - h) // 2
+        bottom_pad = max_dim - h - top_pad
+        left_pad = (max_dim - w) // 2
+        right_pad = max_dim - w - left_pad
+        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
+        image = np.pad(image, padding, mode='constant', constant_values=0)
+        window = (top_pad, left_pad, h + top_pad, w + left_pad)
+    return image, window, scale, padding
+def resize_mask(mask, scale, padding):
+    """Resizes a mask using the given scale and padding.
+    Typically, you get the scale and padding from resize_image() to
+    ensure both, the image and the mask, are resized consistently.
+    scale: mask scaling factor
+    padding: Padding to add to the mask in the form
+            [(top, bottom), (left, right), (0, 0)]
+    """
+    h, w = mask.shape[:2]
+    mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
+    mask = np.pad(mask, padding, mode='constant', constant_values=0)
+    return mask
+def minimize_mask(bbox, mask, mini_shape):
+    """Resize masks to a smaller version to cut memory load.
+    Mini-masks can then resized back to image scale using expand_masks()
+    See inspect_data.ipynb notebook for more details.
+    """
+    mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
+    for i in range(mask.shape[-1]):
+        m = mask[:, :, i]
+        y1, x1, y2, x2 = bbox[i][:4]
+        m = m[y1:y2, x1:x2]
+        if m.size == 0:
+            raise Exception("Invalid bounding box with area of zero")
+        m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
+        mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
+    return mini_mask
+def expand_mask(bbox, mini_mask, image_shape):
+    """Resizes mini masks back to image size. Reverses the change
+    of minimize_mask().
+    See inspect_data.ipynb notebook for more details.
+    """
+    mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
+    for i in range(mask.shape[-1]):
+        m = mini_mask[:, :, i]
+        y1, x1, y2, x2 = bbox[i][:4]
+        h = y2 - y1
+        w = x2 - x1
+        m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
+        mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
+    return mask
+# TODO: Build and use this function to reduce code duplication
+def mold_mask(mask, config):
+    pass
+def unmold_mask(mask, bbox, image_shape):
+    """Converts a mask generated by the neural network into a format similar
+    to it's original shape.
+    mask: [height, width] of type float. A small, typically 28x28 mask.
+    bbox: [y1, x1, y2, x2]. The box to fit the mask in.
+    Returns a binary mask with the same size as the original image.
+    """
+    threshold = 0.5
+    y1, x1, y2, x2 = bbox
+    mask = scipy.misc.imresize(
+        mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
+    mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
+    # Put the mask in the right location.
+    full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
+    full_mask[y1:y2, x1:x2] = mask
+    return full_mask
+############################################################
+#  Anchors
+############################################################
+def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
+    """
+    scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
+    ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
+    shape: [height, width] spatial shape of the feature map over which
+            to generate anchors.
+    feature_stride: Stride of the feature map relative to the image in pixels.
+    anchor_stride: Stride of anchors on the feature map. For example, if the
+        value is 2 then generate anchors for every other feature map pixel.
+    """
+    # Get all combinations of scales and ratios
+    scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
+    scales = scales.flatten()
+    ratios = ratios.flatten()
+    # Enumerate heights and widths from scales and ratios
+    heights = scales / np.sqrt(ratios)
+    widths = scales * np.sqrt(ratios)
+    # Enumerate shifts in feature space
+    shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
+    shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
+    shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
+    # Enumerate combinations of shifts, widths, and heights
+    box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
+    box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
+    # Reshape to get a list of (y, x) and a list of (h, w)
+    box_centers = np.stack(
+        [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
+    box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
+    # Convert to corner coordinates (y1, x1, y2, x2)
+    boxes = np.concatenate([box_centers - 0.5 * box_sizes,
+                            box_centers + 0.5 * box_sizes], axis=1)
+    return boxes
+def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
+                             anchor_stride):
+    """Generate anchors at different levels of a feature pyramid. Each scale
+    is associated with a level of the pyramid, but each ratio is used in
+    all levels of the pyramid.
+    Returns:
+    anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
+        with the same order of the given scales. So, anchors of scale[0] come
+        first, then anchors of scale[1], and so on.
+    """
+    # Anchors
+    # [anchor_count, (y1, x1, y2, x2)]
+    anchors = []
+    for i in range(len(scales)):
+        anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
+                                        feature_strides[i], anchor_stride))
+    return np.concatenate(anchors, axis=0)
+############################################################
+#  Miscellaneous
+############################################################
+def trim_zeros(x):
+    """It's common to have tensors larger than the available data and
+    pad with zeros. This function removes rows that are all zeros.
+    x: [rows, columns].
+    """
+    assert len(x.shape) == 2
+    return x[~np.all(x == 0, axis=1)]
+def compute_ap(gt_boxes, gt_class_ids, gt_masks,
+               pred_boxes, pred_class_ids, pred_scores, pred_masks,
+               iou_threshold=0.5):
+    """Compute Average Precision at a set IoU threshold (default 0.5).
+    Returns:
+    mAP: Mean Average Precision
+    precisions: List of precisions at different class score thresholds.
+    recalls: List of recall values at different class score thresholds.
+    overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+    """
+    # Trim zero padding and sort predictions by score from high to low
+    # TODO: cleaner to do zero unpadding upstream
+    gt_boxes = trim_zeros(gt_boxes)
+    gt_masks = gt_masks[..., :gt_boxes.shape[0]]
+    pred_boxes = trim_zeros(pred_boxes)
+    pred_scores = pred_scores[:pred_boxes.shape[0]]
+    indices = np.argsort(pred_scores)[::-1]
+    pred_boxes = pred_boxes[indices]
+    pred_class_ids = pred_class_ids[indices]
+    pred_scores = pred_scores[indices]
+    pred_masks = pred_masks[..., indices]
+    # Compute IoU overlaps [pred_masks, gt_masks]
+    overlaps = compute_overlaps_masks(pred_masks, gt_masks)
+    # Loop through ground truth boxes and find matching predictions
+    match_count = 0
+    pred_match = np.zeros([pred_boxes.shape[0]])
+    gt_match = np.zeros([gt_boxes.shape[0]])
+    for i in range(len(pred_boxes)):
+        # Find best matching ground truth box
+        sorted_ixs = np.argsort(overlaps[i])[::-1]
+        for j in sorted_ixs:
+            # If ground truth box is already matched, go to next one
+            if gt_match[j] == 1:
+                continue
+            # If we reach IoU smaller than the threshold, end the loop
+            iou = overlaps[i, j]
+            if iou < iou_threshold:
+                break
+            # Do we have a match?
+            if pred_class_ids[i] == gt_class_ids[j]:
+                match_count += 1
+                gt_match[j] = 1
+                pred_match[i] = 1
+                break
+    # Compute precision and recall at each prediction box step
+    precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
+    recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
+    # Pad with start and end values to simplify the math
+    precisions = np.concatenate([[0], precisions, [0]])
+    recalls = np.concatenate([[0], recalls, [1]])
+    # Ensure precision values decrease but don't increase. This way, the
+    # precision value at each recall threshold is the maximum it can be
+    # for all following recall thresholds, as specified by the VOC paper.
+    for i in range(len(precisions) - 2, -1, -1):
+        precisions[i] = np.maximum(precisions[i], precisions[i + 1])
+    # Compute mean AP over recall range
+    indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
+    mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
+                 precisions[indices])
+    return mAP, precisions, recalls, overlaps
+def compute_recall(pred_boxes, gt_boxes, iou):
+    """Compute the recall at the given IoU threshold. It's an indication
+    of how many GT boxes were found by the given prediction boxes.
+    pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
+    gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
+    """
+    # Measure overlaps
+    overlaps = compute_overlaps(pred_boxes, gt_boxes)
+    iou_max = np.max(overlaps, axis=1)
+    iou_argmax = np.argmax(overlaps, axis=1)
+    positive_ids = np.where(iou_max >= iou)[0]
+    matched_gt_boxes = iou_argmax[positive_ids]
+    recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
+    return recall, positive_ids
+# ## Batch Slicing
+# Some custom layers support a batch size of 1 only, and require a lot of work
+# to support batches greater than 1. This function slices an input tensor
+# across the batch dimension and feeds batches of size 1. Effectively,
+# an easy way to support batches > 1 quickly with little code modification.
+# In the long run, it's more efficient to modify the code to support large
+# batches and getting rid of this function. Consider this a temporary solution
+def batch_slice(inputs, graph_fn, batch_size, names=None):
+    """Splits inputs into slices and feeds each slice to a copy of the given
+    computation graph and then combines the results. It allows you to run a
+    graph on a batch of inputs even if the graph is written to support one
+    instance only.
+    inputs: list of tensors. All must have the same first dimension length
+    graph_fn: A function that returns a TF tensor that's part of a graph.
+    batch_size: number of slices to divide the data into.
+    names: If provided, assigns names to the resulting tensors.
+    """
+    if not isinstance(inputs, list):
+        inputs = [inputs]
+    outputs = []
+    for i in range(batch_size):
+        inputs_slice = [x[i] for x in inputs]
+        output_slice = graph_fn(*inputs_slice)
+        if not isinstance(output_slice, (tuple, list)):
+            output_slice = [output_slice]
+        outputs.append(output_slice)
+    # Change outputs from a list of slices where each is
+    # a list of outputs to a list of outputs and each has
+    # a list of slices
+    outputs = list(zip(*outputs))
+    if names is None:
+        names = [None] * len(outputs)
+    result = [tf.stack(o, axis=0, name=n)
+              for o, n in zip(outputs, names)]
+    if len(result) == 1:
+        result = result[0]
+    return result
+def download_trained_weights(coco_model_path, verbose=1):
+    """Download COCO trained weights from Releases.
+    coco_model_path: local path of COCO trained weights
+    """
+    if verbose > 0:
+        print("Downloading pretrained model to " + coco_model_path + " ...")
+    with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
+        shutil.copyfileobj(resp, out)
+    if verbose > 0:
+        print("... done downloading pretrained model!")
+def resize_image_with_scale(h1, w1, h2_max, w2_max):
+    """resize image with scale and which fits in rectangle h2_max x w2_max"""
+    if h1 == w1: return h2_max, h2_max                           # square image
+    elif h1 < w1: return int(h1/(w1/w2_max)), int(w2_max)        # horizontal image
+    else: return int(h2_max), int(w1/(h1/h2_max))                # vertical image