Spaces:

Genius-Society
/

insecta

Running

App Files Files Community

admin commited on Aug 7, 2024

Commit

67a9b5d

1 Parent(s): 89fbbde

sync

Browse files

Files changed (49) hide show

.gitattributes +2 -0
.gitignore +7 -0
README.md +8 -7
app.py +86 -0
insectid/__init__.py +2 -0
insectid/base.py +51 -0
insectid/detector.py +58 -0
insectid/identifier.py +76 -0
khandy/__init__.py +18 -0
khandy/boxes/__init__.py +13 -0
khandy/boxes/boxes_and_indices.py +68 -0
khandy/boxes/boxes_clip.py +34 -0
khandy/boxes/boxes_coder.py +69 -0
khandy/boxes/boxes_convert.py +101 -0
khandy/boxes/boxes_filter.py +113 -0
khandy/boxes/boxes_overlap.py +166 -0
khandy/boxes/boxes_transform_flip.py +135 -0
khandy/boxes/boxes_transform_rotate.py +140 -0
khandy/boxes/boxes_transform_scale.py +86 -0
khandy/boxes/boxes_transform_translate.py +136 -0
khandy/boxes/boxes_utils.py +28 -0
khandy/dict_utils.py +168 -0
khandy/draw_utils.py +148 -0
khandy/feature_utils.py +62 -0
khandy/file_io_utils.py +87 -0
khandy/fs_utils.py +375 -0
khandy/hash_utils.py +25 -0
khandy/image/__init__.py +10 -0
khandy/image/align_and_crop.py +60 -0
khandy/image/crop_or_pad.py +138 -0
khandy/image/flip.py +72 -0
khandy/image/image_hash.py +69 -0
khandy/image/misc.py +329 -0
khandy/image/resize.py +177 -0
khandy/image/rotate.py +72 -0
khandy/image/translate.py +57 -0
khandy/label/__init__.py +2 -0
khandy/label/detect.py +594 -0
khandy/list_utils.py +68 -0
khandy/misc.py +245 -0
khandy/numpy_utils.py +173 -0
khandy/points/__init__.py +2 -0
khandy/points/pts_letterbox.py +19 -0
khandy/points/pts_transform_scale.py +33 -0
khandy/split_utils.py +71 -0
khandy/text_utils.py +33 -0
khandy/time_utils.py +101 -0
khandy/version.py +3 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/Coccinella_septempunctata.jpg filter=lfs diff=lfs merge=lfs -text
+simsun.ttc filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__/
+_local/
+*.pyc
+local_models_*/
+rename.sh
+*.onnx
+simsun.ttc

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Insecta
-emoji: 📉
-colorFrom: purple
-colorTo: yellow
 sdk: gradio
-sdk_version: 3.42.0
 app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: insecta
+emoji: 🐞
+colorFrom: indigo
+colorTo: pink
 sdk: gradio
+sdk_version: 4.39.0
 app_file: app.py
 pinned: false
 ---
+# 特性
+- 支持 2037 类 (可能是目, 科, 属或种等) 昆虫或其他节肢动物
+- 模型开源, 持续更新.

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import cv2
+import khandy
+import numpy as np
+import gradio as gr
+from PIL import Image
+from modelscope import snapshot_download
+from insectid import InsectDetector, InsectIdentifier
+MODEL_DIR = snapshot_download("MuGeminorum/insecta", cache_dir="./insectid/__pycache__")
+def infer(filename: str):
+    if not filename:
+        None, "请上传图片 Please upload a picture"
+    detector = InsectDetector()
+    identifier = InsectIdentifier()
+    image = khandy.imread(filename)
+    if image is None:
+        return None
+    if max(image.shape[:2]) > 1280:
+        image = khandy.resize_image_long(image, 1280)
+    image_for_draw = image.copy()
+    image_height, image_width = image.shape[:2]
+    boxes, confs, classes = detector.detect(image)
+    text = "未知"
+    for box, _, _ in zip(boxes, confs, classes):
+        box = box.astype(np.int32)
+        box_width = box[2] - box[0] + 1
+        box_height = box[3] - box[1] + 1
+        if box_width < 30 or box_height < 30:
+            continue
+        cropped = khandy.crop_or_pad(image, box[0], box[1], box[2], box[3])
+        results = identifier.identify(cropped)
+        print(results[0])
+        prob = results[0]["probability"]
+        if prob >= 0.10:
+            text = "{} {}: {:.2f}%".format(
+                results[0]["chinese_name"],
+                results[0]["latin_name"],
+                100.0 * results[0]["probability"],
+            )
+        position = [box[0] + 2, box[1] - 20]
+        position[0] = min(max(position[0], 0), image_width)
+        position[1] = min(max(position[1], 0), image_height)
+        cv2.rectangle(
+            image_for_draw,
+            (box[0], box[1]),
+            (box[2], box[3]),
+            (0, 255, 0),
+            2,
+        )
+        image_for_draw = khandy.draw_text(
+            image_for_draw,
+            text,
+            position,
+            font=f"{MODEL_DIR}/simsun.ttc",
+            font_size=15,
+        )
+    outxt = text.split(":")[0] if ":" in text else text
+    return Image.fromarray(image_for_draw[:, :, ::-1], mode="RGB"), outxt
+if __name__ == "__main__":
+    iface = gr.Interface(
+        fn=infer,
+        inputs=gr.Image(label="上传昆虫照片 Upload insect picture", type="filepath"),
+        outputs=[
+            gr.Image(label="识别结果 Recognition result"),
+            gr.Textbox(label="最可能的物种 Best match", show_copy_button=True),
+        ],
+        title="图像文件格式支持 PNG, JPG, JPEG 和 BMP, 且文件大小不超过 10M<br>Image file format support PNG, JPG, JPEG and BMP, and the file size does not exceed 10M.",
+        examples=[
+            f"{MODEL_DIR}/examples/butterfly.jpg",
+            f"{MODEL_DIR}/examples/beetle.jpg",
+        ],
+        allow_flagging="never",
+        cache_examples=False,
+    )
+    iface.launch()

insectid/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .detector import *
2	+ from .identifier import *

insectid/base.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import onnxruntime
+import numpy as np
+class OnnxModel(object):
+    def __init__(self, model_path):
+        sess_options = onnxruntime.SessionOptions()
+        # # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization.
+        # sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
+        # # Use OpenMP optimizations. Only useful for CPU, has little impact for GPUs.
+        # sess_options.intra_op_num_threads = multiprocessing.cpu_count()
+        onnx_gpu = (onnxruntime.get_device() == 'GPU')
+        providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if onnx_gpu else ['CPUExecutionProvider']
+        self.sess = onnxruntime.InferenceSession(model_path, sess_options, providers=providers)
+        self._input_names = [item.name for item in self.sess.get_inputs()]
+        self._output_names = [item.name for item in self.sess.get_outputs()]
+    @property
+    def input_names(self):
+        return self._input_names
+    @property
+    def output_names(self):
+        return self._output_names
+    def forward(self, inputs):
+        to_list_flag = False
+        if not isinstance(inputs, (tuple, list)):
+            inputs = [inputs]
+            to_list_flag = True
+        input_feed = {name: input for name, input in zip(self.input_names, inputs)}
+        outputs = self.sess.run(self.output_names, input_feed)
+        if (len(self.output_names) == 1) and to_list_flag:
+            return outputs[0]
+        else:
+            return outputs
+def check_image_dtype_and_shape(image):
+    if not isinstance(image, np.ndarray):
+        raise Exception(f'image is not np.ndarray!')
+    if isinstance(image.dtype, (np.uint8, np.uint16)):
+        raise Exception(f'Unsupported image dtype, only support uint8 and uint16, got {image.dtype}!')
+    if image.ndim not in {2, 3}:
+        raise Exception(f'Unsupported image dimension number, only support 2 and 3, got {image.ndim}!')
+    if image.ndim == 3:
+        num_channels = image.shape[-1]
+        if num_channels not in {1, 3, 4}:
+            raise Exception(f'Unsupported image channel number, only support 1, 3 and 4, got {num_channels}!')

insectid/detector.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import khandy
+import numpy as np
+from .base import OnnxModel
+from .base import check_image_dtype_and_shape
+class InsectDetector(OnnxModel):
+    def __init__(self):
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        model_path = os.path.join(
+            current_dir,
+            "__pycache__/MuGeminorum/insecta/quarrying_insect_detector.onnx",
+        )
+        self.input_width = 640
+        self.input_height = 640
+        super(InsectDetector, self).__init__(model_path)
+    def _preprocess(self, image):
+        check_image_dtype_and_shape(image)
+        # image size normalization
+        image, scale, pad_left, pad_top = khandy.letterbox_image(
+            image, self.input_width, self.input_height, 0, return_scale=True
+        )
+        # image channel normalization
+        image = khandy.normalize_image_channel(image, swap_rb=True)
+        # image dtype normalization
+        image = khandy.rescale_image(image, "auto", np.float32)
+        # to tensor
+        image = np.transpose(image, (2, 0, 1))
+        image = np.expand_dims(image, axis=0)
+        return image, scale, pad_left, pad_top
+    def _post_process(
+        self, outputs_list, scale, pad_left, pad_top, conf_thresh, iou_thresh
+    ):
+        pred = outputs_list[0][0]
+        pass_t = pred[:, 4] > conf_thresh
+        pred = pred[pass_t]
+        boxes = khandy.convert_boxes_format(pred[:, :4], "cxcywh", "xyxy")
+        boxes = khandy.unletterbox_2d_points(boxes, scale, pad_left, pad_top, False)
+        confs = np.max(pred[:, 5:] * pred[:, 4:5], axis=-1)
+        classes = np.argmax(pred[:, 5:] * pred[:, 4:5], axis=-1)
+        keep = khandy.non_max_suppression(boxes, confs, iou_thresh)
+        return boxes[keep], confs[keep], classes[keep]
+    def detect(self, image, conf_thresh=0.5, iou_thresh=0.5):
+        image, scale, pad_left, pad_top = self._preprocess(image)
+        outputs_list = self.forward(image)
+        boxes, confs, classes = self._post_process(
+            outputs_list,
+            scale=scale,
+            pad_left=pad_left,
+            pad_top=pad_top,
+            conf_thresh=conf_thresh,
+            iou_thresh=iou_thresh,
+        )
+        return boxes, confs, classes

insectid/identifier.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+import copy
+import khandy
+import numpy as np
+from .base import OnnxModel
+from collections import OrderedDict
+from .base import check_image_dtype_and_shape
+class InsectIdentifier(OnnxModel):
+    def __init__(self):
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        model_path = os.path.join(
+            current_dir,
+            "__pycache__/MuGeminorum/insecta/quarrying_insect_identifier.onnx",
+        )
+        label_map_path = os.path.join(
+            current_dir,
+            "__pycache__/MuGeminorum/insecta/quarrying_insectid_label_map.txt",
+        )
+        super(InsectIdentifier, self).__init__(model_path)
+        self.label_name_dict = self._get_label_name_dict(label_map_path)
+        self.names = [
+            self.label_name_dict[i]["chinese_name"]
+            for i in range(len(self.label_name_dict))
+        ]
+        self.num_classes = len(self.label_name_dict)
+    @staticmethod
+    def _get_label_name_dict(filename):
+        records = khandy.load_list(filename)
+        label_name_dict = {}
+        for record in records:
+            label, chinese_name, latin_name = record.split(",")
+            label_name_dict[int(label)] = OrderedDict(
+                [("chinese_name", chinese_name), ("latin_name", latin_name)]
+            )
+        return label_name_dict
+    @staticmethod
+    def _preprocess(image):
+        check_image_dtype_and_shape(image)
+        # image size normalization
+        image = khandy.letterbox_image(image, 224, 224)
+        # image channel normalization
+        image = khandy.normalize_image_channel(image, swap_rb=True)
+        # image dtype normalization
+        # image dtype and value range normalization
+        mean, stddev = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+        image = khandy.normalize_image_value(image, mean, stddev, "auto")
+        # to tensor
+        image = np.transpose(image, (2, 0, 1))
+        image = np.expand_dims(image, axis=0)
+        return image
+    def predict(self, image):
+        inputs = self._preprocess(image)
+        logits = self.forward(inputs)
+        probs = khandy.softmax(logits)
+        return probs
+    def identify(self, image, topk=5):
+        assert isinstance(topk, int)
+        if topk <= 0 or topk > self.num_classes:
+            topk = self.num_classes
+        probs = self.predict(image)
+        topk_probs, topk_indices = khandy.top_k(probs, topk)
+        results = []
+        for ind, prob in zip(topk_indices[0], topk_probs[0]):
+            one_result = copy.deepcopy(self.label_name_dict[ind])
+            one_result["probability"] = prob
+            results.append(one_result)
+        return results

khandy/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from .dict_utils import *
+from .draw_utils import *
+from .feature_utils import *
+from .file_io_utils import *
+from .fs_utils import *
+from .hash_utils import *
+from .list_utils import *
+from .misc import *
+from .numpy_utils import *
+from .split_utils import *
+from .text_utils import *
+from .time_utils import *
+from .version import *
+from .boxes import *
+from .image import *
+from .points import *
+from . import label

khandy/boxes/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .boxes_clip import *
+from .boxes_overlap import *
+from .boxes_filter import *
+from .boxes_convert import *
+from .boxes_coder import *
+from .boxes_transform_flip import *
+from .boxes_transform_rotate import *
+from .boxes_transform_scale import *
+from .boxes_transform_translate import *
+from .boxes_utils import *
+from .boxes_and_indices import *

khandy/boxes/boxes_and_indices.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import numpy as np
+def _concat(arr_list, axis=0):
+    """Avoids a copy if there is only a single element in a list.
+    """
+    if len(arr_list) == 1:
+        return arr_list[0]
+    return np.concatenate(arr_list, axis)
+def convert_boxes_list_to_boxes_and_indices(boxes_list):
+    """
+    Args:
+        boxes_list (np.ndarray): list or tuple of ndarray with shape (N_i, 4+K)
+    Returns:
+        boxes (ndarray): shape (M, 4+K) where M is sum of N_i.
+        indices (ndarray): shape (M, 1) where M is sum of N_i.
+    References:
+        `mmdet.core.bbox.bbox2roi` in mmdetection
+        `convert_boxes_to_roi_format` in TorchVision
+        `modeling.poolers.convert_boxes_to_pooler_format` in detectron2
+    """
+    assert isinstance(boxes_list, (list, tuple))
+    boxes = _concat(boxes_list, axis=0)
+    indices_list = [np.full((len(b), 1), i, boxes.dtype)
+                    for i, b in enumerate(boxes_list)]
+    indices = _concat(indices_list, axis=0)
+    return boxes, indices
+def convert_boxes_and_indices_to_boxes_list(boxes, indices, num_indices):
+    """
+    Args:
+        boxes (np.ndarray): shape (N, 4+K)
+        indices (np.ndarray): shape (N,) or (N, 1), maybe batch index
+            in mini-batch or class label index.
+        num_indices (int): number of index.
+    Returns:
+        list (ndarray): boxes list of each index
+    References:
+        `mmdet.core.bbox2result` in mmdetection
+        `mmdet.core.bbox.roi2bbox` in mmdetection
+        `convert_boxes_to_roi_format` in TorchVision
+        `modeling.poolers.convert_boxes_to_pooler_format` in detectron2
+    """
+    boxes = np.asarray(boxes)
+    indices = np.asarray(indices)
+    assert boxes.ndim == 2, "boxes ndim must be 2, got {}".format(boxes.ndim)
+    assert (indices.ndim == 1) or (indices.ndim == 2 and indices.shape[-1] == 1), \
+        "indices ndim must be 1 or 2 if last dimension size is 1, got shape {}".format(indices.shape)
+    assert boxes.shape[0] == indices.shape[0], "the 1st dimension size of boxes and indices "\
+        "must be the same, got {} != {}".format(boxes.shape[0], indices.shape[0])
+    if boxes.shape[0] == 0:
+        return [np.zeros((0, boxes.shape[1]), dtype=np.float32)
+                for i in range(num_indices)]
+    else:
+        if indices.ndim == 2:
+            indices = np.squeeze(indices, axis=-1)
+        return [boxes[indices == i, :] for i in range(num_indices)]

khandy/boxes/boxes_clip.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import numpy as np
+def clip_boxes(boxes, reference_box, copy=True):
+    """Clip boxes to reference box.
+    References:
+        `clip_to_window` in TensorFlow object detection API.
+    """
+    if copy:
+        boxes = boxes.copy()
+    ref_x_min, ref_y_min, ref_x_max, ref_y_max = reference_box[:4]
+    lower = np.array([ref_x_min, ref_y_min, ref_x_min, ref_y_min])
+    upper = np.array([ref_x_max, ref_y_max, ref_x_max, ref_y_max])
+    np.clip(boxes[..., :4], lower, upper, boxes[..., :4])
+    return boxes
+def clip_boxes_to_image(boxes, image_width, image_height, subpixel=True, copy=True):
+    """Clip boxes to image boundaries.
+    References:
+        `clip_boxes` in py-faster-rcnn
+        `core.boxes_op_list.clip_to_window` in TensorFlow object detection API.
+        `structures.Boxes.clip` in detectron2
+    Notes:
+        Equivalent to `clip_boxes(boxes, [0,0,image_width-1,image_height-1], copy)`
+    """
+    if not subpixel:
+        image_width -= 1
+        image_height -= 1
+    reference_box = [0, 0, image_width, image_height]
+    return clip_boxes(boxes, reference_box, copy)

khandy/boxes/boxes_coder.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import numpy as np
+class FasterRcnnBoxCoder:
+    """Faster RCNN box coder.
+    Notes:
+        boxes should be in cxcywh format.
+    """
+    def __init__(self, stddevs=None):
+        """Constructor for FasterRcnnBoxCoder.
+        Args:
+          stddevs: List of 4 positive scalars to scale ty, tx, th and tw.
+            If set to None, does not perform scaling. For Faster RCNN,
+            the open-source implementation recommends using [0.1, 0.1, 0.2, 0.2].
+        """
+        if stddevs:
+            assert len(stddevs) == 4
+            for scalar in stddevs:
+                assert scalar > 0
+        self.stddevs = stddevs
+    def encode(self, boxes, reference_boxes, copy=True):
+        """Encode boxes with respect to reference boxes.
+        """
+        if copy:
+            boxes = boxes.copy()
+        boxes[..., 2:4] += 1e-8
+        reference_boxes[..., 2:4] += 1e-8
+        boxes[..., 0:2] -= reference_boxes[..., 0:2]
+        boxes[..., 0:2] /= reference_boxes[..., 2:4]
+        boxes[..., 2:4] /= reference_boxes[..., 2:4]
+        boxes[..., 2:4] = np.log(boxes[..., 2:4], boxes[..., 2:4])
+        if self.stddevs:
+            boxes[..., 0:4] /= self.stddevs
+        return boxes
+    def decode(self, rel_boxes, reference_boxes, copy=True):
+        """Decode relative codes to boxes.
+        """
+        if copy:
+            rel_boxes = rel_boxes.copy()
+        if self.stddevs:
+            rel_boxes[..., 0:4] *= self.stddevs
+        rel_boxes[..., 0:2] *= reference_boxes[..., 2:4]
+        rel_boxes[..., 0:2] += reference_boxes[..., 0:2]
+        rel_boxes[..., 2:4] = np.exp(rel_boxes[..., 2:4], rel_boxes[..., 2:4])
+        rel_boxes[..., 2:4] *= reference_boxes[..., 2:4]
+        return rel_boxes
+    def decode_points(self, rel_points, reference_boxes, copy=True):
+        """Decode relative codes to points.
+        """
+        if copy:
+            rel_points = rel_points.copy()
+        if self.stddevs:
+            rel_points[..., 0::2] *= self.stddevs[0]
+            rel_points[..., 1::2] *= self.stddevs[1]
+        rel_points[..., 0::2] *= reference_boxes[..., 2:3]
+        rel_points[..., 1::2] *= reference_boxes[..., 3:4]
+        rel_points[..., 0::2] += reference_boxes[..., 0:1]
+        rel_points[..., 1::2] += reference_boxes[..., 1:2]
+        return rel_points

khandy/boxes/boxes_convert.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import numpy as np
+def convert_xyxy_to_xywh(boxes, copy=True):
+    """Convert [x_min, y_min, x_max, y_max] format to [x_min, y_min, width, height] format.
+    """
+    if copy:
+        boxes = boxes.copy()
+    boxes[..., 2:4] -= boxes[..., 0:2]
+    return boxes
+def convert_xywh_to_xyxy(boxes, copy=True):
+    """Convert [x_min, y_min, width, height] format to [x_min, y_min, x_max, y_max] format.
+    """
+    if copy:
+        boxes = boxes.copy()
+    boxes[..., 2:4] += boxes[..., 0:2]
+    return boxes
+def convert_xywh_to_cxcywh(boxes, copy=True):
+    """Convert [x_min, y_min, width, height] format to [cx, cy, width, height] format.
+    """
+    if copy:
+        boxes = boxes.copy()
+    boxes[..., 0:2] += boxes[..., 2:4] * 0.5
+    return boxes
+def convert_cxcywh_to_xywh(boxes, copy=True):
+    """Convert [cx, cy, width, height] format to [x_min, y_min, width, height] format.
+    """
+    if copy:
+        boxes = boxes.copy()
+    boxes[..., 0:2] -= boxes[..., 2:4] * 0.5
+    return boxes
+def convert_xyxy_to_cxcywh(boxes, copy=True):
+    """Convert [x_min, y_min, x_max, y_max] format to [cx, cy, width, height] format.
+    """
+    if copy:
+        boxes = boxes.copy()
+    boxes[..., 2:4] -= boxes[..., 0:2]
+    boxes[..., 0:2] += boxes[..., 2:4] * 0.5
+    return boxes
+def convert_cxcywh_to_xyxy(boxes, copy=True):
+    """Convert [cx, cy, width, height] format to [x_min, y_min, x_max, y_max] format.
+    """
+    if copy:
+        boxes = boxes.copy()
+    boxes[..., 0:2] -= boxes[..., 2:4] * 0.5
+    boxes[..., 2:4] += boxes[..., 0:2]
+    return boxes
+def convert_boxes_format(boxes, in_fmt, out_fmt, copy=True):
+    """Converts boxes from given in_fmt to out_fmt.
+    Supported in_fmt and out_fmt are:
+        'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right.
+        'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.
+        'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h
+            being width and height.
+    Args:
+        boxes: boxes which will be converted.
+        in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'].
+        out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']
+    Returns:
+        boxes: Boxes into converted format.
+    References:
+        torchvision.ops.box_convert
+    """
+    allowed_fmts = ("xyxy", "xywh", "cxcywh")
+    if in_fmt not in allowed_fmts or out_fmt not in allowed_fmts:
+        raise ValueError("Unsupported Bounding Box Conversions for given in_fmt and out_fmt")
+    if copy:
+        boxes = boxes.copy()
+    if in_fmt == out_fmt:
+        return boxes
+    if (in_fmt, out_fmt) == ("xyxy", "xywh"):
+        boxes = convert_xyxy_to_xywh(boxes, copy=False)
+    elif (in_fmt, out_fmt) == ("xywh", "xyxy"):
+        boxes = convert_xywh_to_xyxy(boxes, copy=False)
+    elif (in_fmt, out_fmt) == ("xywh", "cxcywh"):
+        boxes = convert_xywh_to_cxcywh(boxes, copy=False)
+    elif (in_fmt, out_fmt) == ("cxcywh", "xywh"):
+        boxes = convert_cxcywh_to_xywh(boxes, copy=False)
+    elif (in_fmt, out_fmt) == ("xyxy", "cxcywh"):
+        boxes = convert_xyxy_to_cxcywh(boxes, copy=False)
+    elif (in_fmt, out_fmt) == ("cxcywh", "xyxy"):
+        boxes = convert_cxcywh_to_xyxy(boxes, copy=False)
+    return boxes

khandy/boxes/boxes_filter.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import numpy as np
+def filter_small_boxes(boxes, min_width, min_height):
+    """Filters all boxes with side smaller than min size.
+    Args:
+        boxes: a numpy array with shape [N, 4] holding N boxes.
+        min_width (float): minimum width
+        min_height (float): minimum height
+    Returns:
+        keep: indices of the boxes that have width larger than
+            min_width and height larger than min_height.
+    References:
+        `_filter_boxes` in py-faster-rcnn
+        `prune_small_boxes` in TensorFlow object detection API.
+        `structures.Boxes.nonempty` in detectron2
+        `ops.boxes.remove_small_boxes` in torchvision
+    """
+    widths = boxes[:, 2] - boxes[:, 0]
+    heights = boxes[:, 3] - boxes[:, 1]
+    # keep represents indices to keep,
+    # mask represents bool ndarray, so use mask here.
+    mask = (widths >= min_width)
+    mask &= (heights >= min_height)
+    return np.nonzero(mask)[0]
+def filter_boxes_outside(boxes, reference_box):
+    """Filters bounding boxes that fall outside reference box.
+    References:
+        `prune_outside_window` in TensorFlow object detection API.
+    """
+    x_min, y_min, x_max, y_max = reference_box[:4]
+    mask = ((boxes[:, 0] >= x_min) & (boxes[:, 1] >= y_min) &
+            (boxes[:, 2] <= x_max) & (boxes[:, 3] <= y_max))
+    return np.nonzero(mask)[0]
+def filter_boxes_completely_outside(boxes, reference_box):
+    """Filters bounding boxes that fall completely outside of reference box.
+    References:
+        `prune_completely_outside_window` in TensorFlow object detection API.
+    """
+    x_min, y_min, x_max, y_max = reference_box[:4]
+    mask = ((boxes[:, 0] < x_max) & (boxes[:, 1] < y_max) &
+            (boxes[:, 2] > x_min) & (boxes[:, 3] > y_min))
+    return np.nonzero(mask)[0]
+def non_max_suppression(boxes, scores, thresh, classes=None, ratio_type="iou"):
+    """Greedily select boxes with high confidence
+    Args:
+        boxes: [[x_min, y_min, x_max, y_max], ...]
+        scores: object confidence
+        thresh: retain overlap_ratio <= thresh
+        classes: class labels
+    Returns:
+        indices to keep
+    References:
+        `py_cpu_nms` in py-faster-rcnn
+        torchvision.ops.nms
+        torchvision.ops.batched_nms
+    """
+    if boxes.size == 0:
+        return np.empty((0,), dtype=np.int64)
+    if classes is not None:
+        # strategy: in order to perform NMS independently per class,
+        # we add an offset to all the boxes. The offset is dependent
+        # only on the class idx, and is large enough so that boxes
+        # from different classes do not overlap
+        max_coordinate = np.max(boxes)
+        offsets = classes * (max_coordinate + 1)
+        boxes = boxes + offsets[:, None]
+    x_mins = boxes[:, 0]
+    y_mins = boxes[:, 1]
+    x_maxs = boxes[:, 2]
+    y_maxs = boxes[:, 3]
+    areas = (x_maxs - x_mins) * (y_maxs - y_mins)
+    order = scores.flatten().argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        max_x_mins = np.maximum(x_mins[i], x_mins[order[1:]])
+        max_y_mins = np.maximum(y_mins[i], y_mins[order[1:]])
+        min_x_maxs = np.minimum(x_maxs[i], x_maxs[order[1:]])
+        min_y_maxs = np.minimum(y_maxs[i], y_maxs[order[1:]])
+        widths = np.maximum(0, min_x_maxs - max_x_mins)
+        heights = np.maximum(0, min_y_maxs - max_y_mins)
+        intersect_areas = widths * heights
+        if ratio_type in ["union", 'iou']:
+            ratio = intersect_areas / (areas[i] + areas[order[1:]] - intersect_areas)
+        elif ratio_type == "min":
+            ratio = intersect_areas / np.minimum(areas[i], areas[order[1:]])
+        else:
+            raise ValueError('Unsupported ratio_type. Got {}'.format(ratio_type))
+        inds = np.nonzero(ratio <= thresh)[0]
+        order = order[inds + 1]
+    return np.asarray(keep)

khandy/boxes/boxes_overlap.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import numpy as np
+def paired_intersection(boxes1, boxes2):
+    """Compute paired intersection areas between boxes.
+    Args:
+        boxes1: a numpy array with shape [N, 4] holding N boxes
+        boxes2: a numpy array with shape [N, 4] holding N boxes
+    Returns:
+        a numpy array with shape [N,] representing itemwise intersection area
+    References:
+        `core.box_list_ops.matched_intersection` in Tensorflow object detection API
+    Notes:
+        can called as itemwise_intersection, matched_intersection, aligned_intersection
+    """
+    max_x_mins = np.maximum(boxes1[:, 0], boxes2[:, 0])
+    max_y_mins = np.maximum(boxes1[:, 1], boxes2[:, 1])
+    min_x_maxs = np.minimum(boxes1[:, 2], boxes2[:, 2])
+    min_y_maxs = np.minimum(boxes1[:, 3], boxes2[:, 3])
+    intersect_widths = np.maximum(0, min_x_maxs - max_x_mins)
+    intersect_heights = np.maximum(0, min_y_maxs - max_y_mins)
+    return intersect_widths * intersect_heights
+def pairwise_intersection(boxes1, boxes2):
+    """Compute pairwise intersection areas between boxes.
+    Args:
+        boxes1: a numpy array with shape [N, 4] holding N boxes.
+        boxes2: a numpy array with shape [M, 4] holding M boxes.
+    Returns:
+        a numpy array with shape [N, M] representing pairwise intersection area.
+    References:
+        `core.box_list_ops.intersection` in Tensorflow object detection API
+        `utils.box_list_ops.intersection` in Tensorflow object detection API
+    """
+    if boxes1.shape[0] * boxes2.shape[0] == 0:
+        return np.zeros((boxes1.shape[0], boxes2.shape[0]), dtype=boxes1.dtype)
+    swap = False
+    if boxes1.shape[0] > boxes2.shape[0]:
+        boxes1, boxes2 = boxes2, boxes1
+        swap = True
+    intersect_areas = np.empty((boxes1.shape[0], boxes2.shape[0]), dtype=boxes1.dtype)
+    for i in range(boxes1.shape[0]):
+        max_x_mins = np.maximum(boxes1[i, 0], boxes2[:, 0])
+        max_y_mins = np.maximum(boxes1[i, 1], boxes2[:, 1])
+        min_x_maxs = np.minimum(boxes1[i, 2], boxes2[:, 2])
+        min_y_maxs = np.minimum(boxes1[i, 3], boxes2[:, 3])
+        intersect_widths = np.maximum(0, min_x_maxs - max_x_mins)
+        intersect_heights = np.maximum(0, min_y_maxs - max_y_mins)
+        intersect_areas[i, :] = intersect_widths * intersect_heights
+    if swap:
+        intersect_areas = intersect_areas.T
+    return intersect_areas
+def paired_overlap_ratio(boxes1, boxes2, ratio_type='iou'):
+    """Compute paired overlap ratio between boxes.
+    Args:
+        boxes1: a numpy array with shape [N, 4] holding N boxes
+        boxes2: a numpy array with shape [N, 4] holding N boxes
+        ratio_type:
+            iou: Intersection-over-union (iou).
+            ioa: Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+                their intersection area over box2's area. Note that ioa is not symmetric,
+                that is, IOA(box1, box2) != IOA(box2, box1).
+            min: Compute the ratio as the area of intersection between box1 and box2,
+                divided by the minimum area of the two bounding boxes.
+    Returns:
+        a numpy array with shape [N,] representing itemwise overlap ratio.
+    References:
+        `core.box_list_ops.matched_iou` in Tensorflow object detection API
+        `structures.boxes.matched_boxlist_iou` in detectron2
+        `mmdet.core.bbox.bbox_overlaps`, see https://mmdetection.readthedocs.io/en/v2.17.0/api.html#mmdet.core.bbox.bbox_overlaps
+    """
+    intersect_areas = paired_intersection(boxes1, boxes2)
+    areas1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
+    areas2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
+    if ratio_type in ['union', 'iou', 'giou']:
+        union_areas = areas1 - intersect_areas
+        union_areas += areas2
+        intersect_areas /= union_areas
+    elif ratio_type == 'min':
+        min_areas = np.minimum(areas1, areas2)
+        intersect_areas /= min_areas
+    elif ratio_type == 'ioa':
+        intersect_areas /= areas2
+    else:
+        raise ValueError('Unsupported ratio_type. Got {}'.format(ratio_type))
+    if ratio_type == 'giou':
+        min_xy_mins = np.minimum(boxes1[:, 0:2], boxes2[:, 0:2])
+        max_xy_mins = np.maximum(boxes1[:, 2:4], boxes2[:, 2:4])
+        # mebb = minimum enclosing bounding boxes
+        mebb_whs = np.maximum(0, max_xy_mins - min_xy_mins)
+        mebb_areas = mebb_whs[:, 0] * mebb_whs[:, 1]
+        union_areas -= mebb_areas
+        union_areas /= mebb_areas
+        intersect_areas += union_areas
+    return intersect_areas
+def pairwise_overlap_ratio(boxes1, boxes2, ratio_type='iou'):
+    """Compute pairwise overlap ratio between boxes.
+    Args:
+        boxes1: a numpy array with shape [N, 4] holding N boxes
+        boxes2: a numpy array with shape [M, 4] holding M boxes
+        ratio_type:
+            iou: Intersection-over-union (iou).
+            ioa: Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+                their intersection area over box2's area. Note that ioa is not symmetric,
+                that is, IOA(box1, box2) != IOA(box2, box1).
+            min: Compute the ratio as the area of intersection between box1 and box2,
+                divided by the minimum area of the two bounding boxes.
+    Returns:
+        a numpy array with shape [N, M] representing pairwise overlap ratio.
+    References:
+        `utils.np_box_ops.iou` in Tensorflow object detection API
+        `utils.np_box_ops.ioa` in Tensorflow object detection API
+        `utils.np_box_ops.giou` in Tensorflow object detection API
+        `mmdet.core.bbox.bbox_overlaps`, see https://mmdetection.readthedocs.io/en/v2.17.0/api.html#mmdet.core.bbox.bbox_overlaps
+        `torchvision.ops.box_iou`, see https://pytorch.org/vision/stable/ops.html#torchvision.ops.box_iou
+        `torchvision.ops.generalized_box_iou`, see https://pytorch.org/vision/stable/ops.html#torchvision.ops.generalized_box_iou
+        http://ww2.mathworks.cn/help/vision/ref/bboxoverlapratio.html
+    """
+    intersect_areas = pairwise_intersection(boxes1, boxes2)
+    areas1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
+    areas2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
+    if ratio_type in ['union', 'iou', 'giou']:
+        union_areas = np.expand_dims(areas1, axis=1) - intersect_areas
+        union_areas += np.expand_dims(areas2, axis=0)
+        intersect_areas /= union_areas
+    elif ratio_type == 'min':
+        min_areas = np.minimum(np.expand_dims(areas1, axis=1), np.expand_dims(areas2, axis=0))
+        intersect_areas /= min_areas
+    elif ratio_type == 'ioa':
+        intersect_areas /= np.expand_dims(areas2, axis=0)
+    else:
+        raise ValueError('Unsupported ratio_type. Got {}'.format(ratio_type))
+    if ratio_type == 'giou':
+        min_xy_mins = np.minimum(boxes1[:, None, 0:2], boxes2[:, 0:2])
+        max_xy_mins = np.maximum(boxes1[:, None, 2:4], boxes2[:, 2:4])
+        # mebb = minimum enclosing bounding boxes
+        mebb_whs = np.maximum(0, max_xy_mins - min_xy_mins)
+        mebb_areas = mebb_whs[:, :, 0] * mebb_whs[:, :, 1]
+        union_areas -= mebb_areas
+        union_areas /= mebb_areas
+        intersect_areas += union_areas
+    return intersect_areas

khandy/boxes/boxes_transform_flip.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import numpy as np
+from .boxes_utils import assert_and_normalize_shape
+def flip_boxes(boxes, x_center=0, y_center=0, direction='h'):
+    """
+    Args:
+        boxes: (N, 4+K)
+        x_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        y_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        direction: str
+    """
+    assert direction in ['x', 'h', 'horizontal',
+                         'y', 'v', 'vertical',
+                         'o', 'b', 'both']
+    boxes = np.asarray(boxes, np.float32)
+    ret_boxes = boxes.copy()
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    x_center = assert_and_normalize_shape(x_center, boxes.shape[0])
+    y_center = assert_and_normalize_shape(y_center, boxes.shape[0])
+    if direction in ['o', 'b', 'both', 'x', 'h', 'horizontal']:
+        ret_boxes[:, 0] = 2 * x_center - boxes[:, 2]
+        ret_boxes[:, 2] = 2 * x_center - boxes[:, 0]
+    if direction in ['o', 'b', 'both', 'y', 'v', 'vertical']:
+        ret_boxes[:, 1] = 2 * y_center - boxes[:, 3]
+        ret_boxes[:, 3] = 2 * y_center - boxes[:, 1]
+    return ret_boxes
+def fliplr_boxes(boxes, x_center=0, y_center=0):
+    """
+    Args:
+        boxes: (N, 4+K)
+        x_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        y_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+    """
+    boxes = np.asarray(boxes, np.float32)
+    ret_boxes = boxes.copy()
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    x_center = assert_and_normalize_shape(x_center, boxes.shape[0])
+    y_center = assert_and_normalize_shape(y_center, boxes.shape[0])
+    ret_boxes[:, 0] = 2 * x_center - boxes[:, 2]
+    ret_boxes[:, 2] = 2 * x_center - boxes[:, 0]
+    return ret_boxes
+def flipud_boxes(boxes, x_center=0, y_center=0):
+    """
+    Args:
+        boxes: (N, 4+K)
+        x_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        y_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+    """
+    boxes = np.asarray(boxes, np.float32)
+    ret_boxes = boxes.copy()
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    x_center = assert_and_normalize_shape(x_center, boxes.shape[0])
+    y_center = assert_and_normalize_shape(y_center, boxes.shape[0])
+    ret_boxes[:, 1] = 2 * y_center - boxes[:, 3]
+    ret_boxes[:, 3] = 2 * y_center - boxes[:, 1]
+    return ret_boxes
+def transpose_boxes(boxes, x_center=0, y_center=0):
+    """
+    Args:
+        boxes: (N, 4+K)
+        x_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        y_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+    """
+    boxes = np.asarray(boxes, np.float32)
+    ret_boxes = boxes.copy()
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    x_center = assert_and_normalize_shape(x_center, boxes.shape[0])
+    y_center = assert_and_normalize_shape(y_center, boxes.shape[0])
+    shift = x_center - y_center
+    ret_boxes[:, 0] = boxes[:, 1] + shift
+    ret_boxes[:, 1] = boxes[:, 0] - shift
+    ret_boxes[:, 2] = boxes[:, 3] + shift
+    ret_boxes[:, 3] = boxes[:, 2] - shift
+    return ret_boxes
+def flip_boxes_in_image(boxes, image_width, image_height, direction='h'):
+    """
+    Args:
+        boxes: (N, 4+K)
+        image_width: int
+        image_width: int
+        direction: str
+    References:
+        `core.bbox.bbox_flip` in mmdetection
+        `datasets.pipelines.RandomFlip.bbox_flip` in mmdetection
+    """
+    x_center = (image_width - 1) * 0.5
+    y_center = (image_height - 1) * 0.5
+    ret_boxes = flip_boxes(boxes, x_center, y_center, direction=direction)
+    return ret_boxes
+def rot90_boxes_in_image(boxes, image_width, image_height, n=1):
+    """Rotate boxes counter-clockwise by 90 degrees.
+    References:
+        np.rot90
+        cv2.rotate
+        tf.image.rot90
+    """
+    n = n % 4
+    if n == 0:
+        ret_boxes = boxes.copy()
+    elif n == 1:
+        ret_boxes = transpose_boxes(boxes)
+        ret_boxes = flip_boxes_in_image(ret_boxes, image_width, image_height, 'v')
+    elif n == 2:
+        ret_boxes = flip_boxes_in_image(boxes, image_width, image_height, 'o')
+    else:
+        ret_boxes = transpose_boxes(boxes)
+        ret_boxes = flip_boxes_in_image(ret_boxes, image_width, image_height, 'h');
+    return ret_boxes

khandy/boxes/boxes_transform_rotate.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import numpy as np
+from .boxes_utils import assert_and_normalize_shape
+def rotate_boxes(boxes, angle, x_center=0, y_center=0, scale=1,
+                 degrees=True, return_rotated_boxes=False):
+    """
+    Args:
+        boxes: (N, 4+K)
+        angle: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        x_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        y_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        scale: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            scale factor in x and y dimension
+        degrees: bool
+        return_rotated_boxes: bool
+    """
+    boxes = np.asarray(boxes, np.float32)
+    angle = np.asarray(angle, np.float32)
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    scale = np.asarray(scale, np.float32)
+    angle = assert_and_normalize_shape(angle, boxes.shape[0])
+    x_center = assert_and_normalize_shape(x_center, boxes.shape[0])
+    y_center = assert_and_normalize_shape(y_center, boxes.shape[0])
+    scale = assert_and_normalize_shape(scale, boxes.shape[0])
+    if degrees:
+        angle = np.deg2rad(angle)
+    cos_val = scale * np.cos(angle)
+    sin_val = scale * np.sin(angle)
+    x_shift = x_center - x_center * cos_val + y_center * sin_val
+    y_shift = y_center - x_center * sin_val - y_center * cos_val
+    x_mins, y_mins = boxes[:,0], boxes[:,1]
+    x_maxs, y_maxs = boxes[:,2], boxes[:,3]
+    x00 = x_mins * cos_val - y_mins * sin_val + x_shift
+    x10 = x_maxs * cos_val - y_mins * sin_val + x_shift
+    x11 = x_maxs * cos_val - y_maxs * sin_val + x_shift
+    x01 = x_mins * cos_val - y_maxs * sin_val + x_shift
+    y00 = x_mins * sin_val + y_mins * cos_val + y_shift
+    y10 = x_maxs * sin_val + y_mins * cos_val + y_shift
+    y11 = x_maxs * sin_val + y_maxs * cos_val + y_shift
+    y01 = x_mins * sin_val + y_maxs * cos_val + y_shift
+    rotated_boxes = np.stack([x00, y00, x10, y10, x11, y11, x01, y01], axis=-1)
+    ret_x_mins = np.min(rotated_boxes[:,0::2], axis=1)
+    ret_y_mins = np.min(rotated_boxes[:,1::2], axis=1)
+    ret_x_maxs = np.max(rotated_boxes[:,0::2], axis=1)
+    ret_y_maxs = np.max(rotated_boxes[:,1::2], axis=1)
+    if boxes.ndim == 4:
+        ret_boxes = np.stack([ret_x_mins, ret_y_mins, ret_x_maxs, ret_y_maxs], axis=-1)
+    else:
+        ret_boxes = boxes.copy()
+        ret_boxes[:, :4] = np.stack([ret_x_mins, ret_y_mins, ret_x_maxs, ret_y_maxs], axis=-1)
+    if not return_rotated_boxes:
+        return ret_boxes
+    else:
+        return ret_boxes, rotated_boxes
+def rotate_boxes_wrt_centers(boxes, angle, scale=1, degrees=True,
+                             return_rotated_boxes=False):
+    """
+    Args:
+        boxes: (N, 4+K)
+        angle: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        scale: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            scale factor in x and y dimension
+        degrees: bool
+        return_rotated_boxes: bool
+    """
+    boxes = np.asarray(boxes, np.float32)
+    angle = np.asarray(angle, np.float32)
+    scale = np.asarray(scale, np.float32)
+    angle = assert_and_normalize_shape(angle, boxes.shape[0])
+    scale = assert_and_normalize_shape(scale, boxes.shape[0])
+    if degrees:
+        angle = np.deg2rad(angle)
+    cos_val = scale * np.cos(angle)
+    sin_val = scale * np.sin(angle)
+    x_centers = boxes[:, 2] + boxes[:, 0]
+    y_centers = boxes[:, 3] + boxes[:, 1]
+    x_centers *= 0.5
+    y_centers *= 0.5
+    half_widths = boxes[:, 2] - boxes[:, 0]
+    half_heights = boxes[:, 3] - boxes[:, 1]
+    half_widths *= 0.5
+    half_heights *= 0.5
+    half_widths_cos = half_widths * cos_val
+    half_widths_sin = half_widths * sin_val
+    half_heights_cos = half_heights * cos_val
+    half_heights_sin = half_heights * sin_val
+    x00 = -half_widths_cos + half_heights_sin
+    x10 = half_widths_cos + half_heights_sin
+    x11 = half_widths_cos - half_heights_sin
+    x01 = -half_widths_cos - half_heights_sin
+    x00 += x_centers
+    x10 += x_centers
+    x11 += x_centers
+    x01 += x_centers
+    y00 = -half_widths_sin - half_heights_cos
+    y10 = half_widths_sin - half_heights_cos
+    y11 = half_widths_sin + half_heights_cos
+    y01 = -half_widths_sin + half_heights_cos
+    y00 += y_centers
+    y10 += y_centers
+    y11 += y_centers
+    y01 += y_centers
+    rotated_boxes = np.stack([x00, y00, x10, y10, x11, y11, x01, y01], axis=-1)
+    ret_x_mins = np.min(rotated_boxes[:,0::2], axis=1)
+    ret_y_mins = np.min(rotated_boxes[:,1::2], axis=1)
+    ret_x_maxs = np.max(rotated_boxes[:,0::2], axis=1)
+    ret_y_maxs = np.max(rotated_boxes[:,1::2], axis=1)
+    if boxes.ndim == 4:
+        ret_boxes = np.stack([ret_x_mins, ret_y_mins, ret_x_maxs, ret_y_maxs], axis=-1)
+    else:
+        ret_boxes = boxes.copy()
+        ret_boxes[:, :4] = np.stack([ret_x_mins, ret_y_mins, ret_x_maxs, ret_y_maxs], axis=-1)
+    if not return_rotated_boxes:
+        return ret_boxes
+    else:
+        return ret_boxes, rotated_boxes

khandy/boxes/boxes_transform_scale.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import numpy as np
+from .boxes_utils import assert_and_normalize_shape
+def scale_boxes(boxes, x_scale=1, y_scale=1, x_center=0, y_center=0, copy=True):
+    """Scale boxes coordinates in x and y dimensions.
+    Args:
+        boxes: (N, 4+K)
+        x_scale: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            scale factor in x dimension
+        y_scale: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            scale factor in y dimension
+        x_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        y_center: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+    References:
+        `core.box_list_ops.scale` in TensorFlow object detection API
+        `utils.box_list_ops.scale` in TensorFlow object detection API
+        `datasets.pipelines.Resize._resize_bboxes` in mmdetection
+        `core.anchor.guided_anchor_target.calc_region` in mmdetection where comments may be misleading!
+        `layers.mask_ops.scale_boxes` in detectron2
+        `mmcv.bbox_scaling`
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    x_scale = np.asarray(x_scale, np.float32)
+    y_scale = np.asarray(y_scale, np.float32)
+    x_scale = assert_and_normalize_shape(x_scale, boxes.shape[0])
+    y_scale = assert_and_normalize_shape(y_scale, boxes.shape[0])
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    x_center = assert_and_normalize_shape(x_center, boxes.shape[0])
+    y_center = assert_and_normalize_shape(y_center, boxes.shape[0])
+    x_shift = 1 - x_scale
+    y_shift = 1 - y_scale
+    x_shift *= x_center
+    y_shift *= y_center
+    boxes[:, 0] *= x_scale
+    boxes[:, 1] *= y_scale
+    boxes[:, 2] *= x_scale
+    boxes[:, 3] *= y_scale
+    boxes[:, 0] += x_shift
+    boxes[:, 1] += y_shift
+    boxes[:, 2] += x_shift
+    boxes[:, 3] += y_shift
+    return boxes
+def scale_boxes_wrt_centers(boxes, x_scale=1, y_scale=1, copy=True):
+    """
+    Args:
+        boxes: (N, 4+K)
+        x_scale: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            scale factor in x dimension
+        y_scale: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            scale factor in y dimension
+    References:
+        `core.anchor.guided_anchor_target.calc_region` in mmdetection where comments may be misleading!
+        `layers.mask_ops.scale_boxes` in detectron2
+        `mmcv.bbox_scaling`
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    x_scale = np.asarray(x_scale, np.float32)
+    y_scale = np.asarray(y_scale, np.float32)
+    x_scale = assert_and_normalize_shape(x_scale, boxes.shape[0])
+    y_scale = assert_and_normalize_shape(y_scale, boxes.shape[0])
+    x_factor = (x_scale - 1) * 0.5
+    y_factor = (y_scale - 1) * 0.5
+    x_deltas = boxes[:, 2] - boxes[:, 0]
+    y_deltas = boxes[:, 3] - boxes[:, 1]
+    x_deltas *= x_factor
+    y_deltas *= y_factor
+    boxes[:, 0] -= x_deltas
+    boxes[:, 1] -= y_deltas
+    boxes[:, 2] += x_deltas
+    boxes[:, 3] += y_deltas
+    return boxes

khandy/boxes/boxes_transform_translate.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import numpy as np
+from .boxes_utils import assert_and_normalize_shape
+def translate_boxes(boxes, x_shift=0, y_shift=0, copy=True):
+    """translate boxes coordinates in x and y dimensions.
+    Args:
+        boxes: (N, 4+K)
+        x_shift: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            shift in x dimension
+        y_shift: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            shift in y dimension
+        copy: bool
+    References:
+        `datasets.pipelines.RandomCrop` in mmdetection
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    x_shift = np.asarray(x_shift, np.float32)
+    y_shift = np.asarray(y_shift, np.float32)
+    x_shift = assert_and_normalize_shape(x_shift, boxes.shape[0])
+    y_shift = assert_and_normalize_shape(y_shift, boxes.shape[0])
+    boxes[:, 0] += x_shift
+    boxes[:, 1] += y_shift
+    boxes[:, 2] += x_shift
+    boxes[:, 3] += y_shift
+    return boxes
+def adjust_boxes(boxes, x_min_shift, y_min_shift, x_max_shift, y_max_shift, copy=True):
+    """
+    Args:
+        boxes: (N, 4+K)
+        x_min_shift: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            shift (x_min, y_min) in x dimension
+        y_min_shift: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            shift (x_min, y_min) in y dimension
+        x_max_shift: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            shift (x_max, y_max) in x dimension
+        y_max_shift: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+            shift (x_max, y_max) in y dimension
+        copy: bool
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    x_min_shift = np.asarray(x_min_shift, np.float32)
+    y_min_shift = np.asarray(y_min_shift, np.float32)
+    x_max_shift = np.asarray(x_max_shift, np.float32)
+    y_max_shift = np.asarray(y_max_shift, np.float32)
+    x_min_shift = assert_and_normalize_shape(x_min_shift, boxes.shape[0])
+    y_min_shift = assert_and_normalize_shape(y_min_shift, boxes.shape[0])
+    x_max_shift = assert_and_normalize_shape(x_max_shift, boxes.shape[0])
+    y_max_shift = assert_and_normalize_shape(y_max_shift, boxes.shape[0])
+    boxes[:, 0] += x_min_shift
+    boxes[:, 1] += y_min_shift
+    boxes[:, 2] += x_max_shift
+    boxes[:, 3] += y_max_shift
+    return boxes
+def inflate_or_deflate_boxes(boxes, width_delta=0, height_delta=0, copy=True):
+    """
+    Args:
+        boxes: (N, 4+K)
+        width_delta: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        height_delta: array-like whose shape is (), (1,), (N,), (1, 1) or (N, 1)
+        copy: bool
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    width_delta = np.asarray(width_delta, np.float32)
+    height_delta = np.asarray(height_delta, np.float32)
+    width_delta = assert_and_normalize_shape(width_delta, boxes.shape[0])
+    height_delta = assert_and_normalize_shape(height_delta, boxes.shape[0])
+    half_width_delta = width_delta * 0.5
+    half_height_delta = height_delta * 0.5
+    boxes[:, 0] -= half_width_delta
+    boxes[:, 1] -= half_height_delta
+    boxes[:, 2] += half_width_delta
+    boxes[:, 3] += half_height_delta
+    return boxes
+def inflate_boxes_to_square(boxes, copy=True):
+    """Inflate boxes to square
+    Args:
+        boxes: (N, 4+K)
+        copy: bool
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    widths = boxes[:, 2] - boxes[:, 0]
+    heights = boxes[:, 3] - boxes[:, 1]
+    max_side_lengths = np.maximum(widths, heights)
+    width_deltas = np.subtract(max_side_lengths, widths, widths)
+    height_deltas = np.subtract(max_side_lengths, heights, heights)
+    width_deltas *= 0.5
+    height_deltas *= 0.5
+    boxes[:, 0] -= width_deltas
+    boxes[:, 1] -= height_deltas
+    boxes[:, 2] += width_deltas
+    boxes[:, 3] += height_deltas
+    return boxes
+def deflate_boxes_to_square(boxes, copy=True):
+    """Deflate boxes to square
+    Args:
+        boxes: (N, 4+K)
+        copy: bool
+    """
+    boxes = np.array(boxes, dtype=np.float32, copy=copy)
+    widths = boxes[:, 2] - boxes[:, 0]
+    heights = boxes[:, 3] - boxes[:, 1]
+    min_side_lengths = np.minimum(widths, heights)
+    width_deltas = np.subtract(min_side_lengths, widths, widths)
+    height_deltas = np.subtract(min_side_lengths, heights, heights)
+    width_deltas *= 0.5
+    height_deltas *= 0.5
+    boxes[:, 0] -= width_deltas
+    boxes[:, 1] -= height_deltas
+    boxes[:, 2] += width_deltas
+    boxes[:, 3] += height_deltas
+    return boxes

khandy/boxes/boxes_utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import numpy as np
+def assert_and_normalize_shape(x, length):
+    """
+    Args:
+        x: ndarray
+        length: int
+    """
+    if x.ndim == 0:
+        return x
+    elif x.ndim == 1:
+        if len(x) == 1:
+            return x
+        elif len(x) == length:
+            return x
+        else:
+            raise ValueError('Incompatible shape!')
+    elif x.ndim == 2:
+        if x.shape == (1, 1):
+            return np.squeeze(x, axis=-1)
+        elif x.shape == (length, 1):
+            return np.squeeze(x, axis=-1)
+        else:
+            raise ValueError('Incompatible shape!')
+    else:
+        raise ValueError('Incompatible ndim!')

khandy/dict_utils.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import random
+from collections import OrderedDict
+def get_dict_first_item(dict_obj):
+    for key in dict_obj:
+        return key, dict_obj[key]
+def sort_dict(dict_obj, key=None, reverse=False):
+    return OrderedDict(sorted(dict_obj.items(), key=key, reverse=reverse))
+def create_multidict(key_list, value_list):
+    assert len(key_list) == len(value_list)
+    multidict_obj = {}
+    for key, value in zip(key_list, value_list):
+        multidict_obj.setdefault(key, []).append(value)
+    return multidict_obj
+def convert_multidict_to_list(multidict_obj):
+    key_list, value_list = [], []
+    for key, value in multidict_obj.items():
+        key_list += [key] * len(value)
+        value_list += value
+    return key_list, value_list
+def convert_multidict_to_records(multidict_obj, key_map=None, raise_if_key_error=True):
+    records = []
+    if key_map is None:
+        for key in multidict_obj:
+            for value in multidict_obj[key]:
+                records.append('{},{}'.format(value, key))
+    else:
+        for key in multidict_obj:
+            if raise_if_key_error:
+                mapped_key = key_map[key]
+            else:
+                mapped_key = key_map.get(key, key)
+            for value in multidict_obj[key]:
+                records.append('{},{}'.format(value, mapped_key))
+    return records
+def sample_multidict(multidict_obj, num_keys, num_per_key=None):
+    num_keys = min(num_keys, len(multidict_obj))
+    sub_keys = random.sample(list(multidict_obj), num_keys)
+    if num_per_key is None:
+        sub_mdict = {key: multidict_obj[key] for key in sub_keys}
+    else:
+        sub_mdict = {}
+        for key in sub_keys:
+            num_examples_inner = min(num_per_key, len(multidict_obj[key]))
+            sub_mdict[key] = random.sample(multidict_obj[key], num_examples_inner)
+    return sub_mdict
+def split_multidict_on_key(multidict_obj, split_ratio, use_shuffle=False):
+    """Split multidict_obj on its key.
+    """
+    assert isinstance(multidict_obj, dict)
+    assert isinstance(split_ratio, (list, tuple))
+    pdf = [k / float(sum(split_ratio)) for k in split_ratio]
+    cdf = [sum(pdf[:k]) for k in range(len(pdf) + 1)]
+    indices = [int(round(len(multidict_obj) * k)) for k in cdf]
+    dict_keys = list(multidict_obj)
+    if use_shuffle:
+        random.shuffle(dict_keys)
+    be_split_list = []
+    for i in range(len(split_ratio)):
+        part_keys = dict_keys[indices[i]: indices[i + 1]]
+        part_dict = dict([(key, multidict_obj[key]) for key in part_keys])
+        be_split_list.append(part_dict)
+    return be_split_list
+def split_multidict_on_value(multidict_obj, split_ratio, use_shuffle=False):
+    """Split multidict_obj on its value.
+    """
+    assert isinstance(multidict_obj, dict)
+    assert isinstance(split_ratio, (list, tuple))
+    pdf = [k / float(sum(split_ratio)) for k in split_ratio]
+    cdf = [sum(pdf[:k]) for k in range(len(pdf) + 1)]
+    be_split_list = [dict() for k in range(len(split_ratio))]
+    for key, value in multidict_obj.items():
+        indices = [int(round(len(value) * k)) for k in cdf]
+        cloned = value[:]
+        if use_shuffle:
+            random.shuffle(cloned)
+        for i in range(len(split_ratio)):
+            be_split_list[i][key] = cloned[indices[i]: indices[i + 1]]
+    return be_split_list
+def get_multidict_info(multidict_obj, with_print=False, desc=None):
+    num_list = [len(val) for val in multidict_obj.values()]
+    num_keys = len(num_list)
+    num_values = sum(num_list)
+    max_values_per_key = max(num_list)
+    min_values_per_key = min(num_list)
+    if num_keys == 0:
+        avg_values_per_key = 0
+    else:
+        avg_values_per_key = num_values / num_keys
+    info = {
+        'num_keys': num_keys,
+        'num_values': num_values,
+        'max_values_per_key': max_values_per_key,
+        'min_values_per_key': min_values_per_key,
+        'avg_values_per_key': avg_values_per_key,
+    }
+    if with_print:
+        desc = desc or '<unknown>'
+        print('{} key number:    {}'.format(desc, info['num_keys']))
+        print('{} value number:    {}'.format(desc, info['num_values']))
+        print('{} max number per-key: {}'.format(desc, info['max_values_per_key']))
+        print('{} min number per-key: {}'.format(desc, info['min_values_per_key']))
+        print('{} avg number per-key: {:.2f}'.format(desc, info['avg_values_per_key']))
+    return info
+def filter_multidict_by_number(multidict_obj, lower, upper=None):
+    if upper is None:
+        return {key: value for key, value in multidict_obj.items()
+                if lower <= len(value) }
+    else:
+        assert lower <= upper, 'lower must not be greater than upper'
+        return {key: value for key, value in multidict_obj.items()
+                if lower <= len(value) <= upper }
+def sort_multidict_by_number(multidict_obj, num_keys_to_keep=None, reverse=True):
+    """
+    Args:
+        reverse: sort in ascending order when is True.
+    """
+    if num_keys_to_keep is None:
+        num_keys_to_keep = len(multidict_obj)
+    else:
+        num_keys_to_keep = min(num_keys_to_keep, len(multidict_obj))
+    sorted_items = sorted(multidict_obj.items(), key=lambda x: len(x[1]), reverse=reverse)
+    filtered_dict = OrderedDict()
+    for i in range(num_keys_to_keep):
+        filtered_dict[sorted_items[i][0]] = sorted_items[i][1]
+    return filtered_dict
+def merge_multidict(*mdicts):
+    merged_multidict = {}
+    for item in mdicts:
+        for key, value in item.items():
+            merged_multidict.setdefault(key, []).extend(value)
+    return merged_multidict
+def invert_multidict(multidict_obj):
+    inverted_dict = {}
+    for key, value in multidict_obj.items():
+        for item in value:
+            inverted_dict.setdefault(item, []).append(key)
+    return inverted_dict

khandy/draw_utils.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import numpy as np
+import PIL
+from PIL import Image
+from PIL import ImageDraw
+from PIL import ImageFont
+from PIL import ImageColor
+def _is_legal_color(color):
+    if color is None:
+        return True
+    if isinstance(color, str):
+        return True
+    return isinstance(color, (tuple, list)) and len(color) == 3
+def _normalize_color(color, pil_mode, swap_rgb=False):
+    if color is None:
+        return color
+    if isinstance(color, str):
+        color = ImageColor.getrgb(color)
+    gray = color[0]
+    if swap_rgb:
+        color = (color[2], color[1], color[0])
+    if pil_mode == 'L':
+        color = gray
+    return color
+def draw_text(image, text, position, color=(255,0,0), font=None, font_size=15):
+    """Draws text on given image.
+    Args:
+        image (ndarray).
+        text (str): text to be drawn.
+        position (Tuple[int, int]): position where to be drawn.
+        color (List[Union[str, Tuple[int, int, int]]]): text color.
+        font (str):  A filename or file-like object containing a TrueType font. If the file is not found in this
+            filename, the loader may also search in other directories, such as the `fonts/` directory on Windows
+            or `/Library/Fonts/`, `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
+        font_size (int): The requested font size in points.
+    References:
+        torchvision.utils.draw_bounding_boxes
+    """
+    if isinstance(image, np.ndarray):
+        # PIL.Image.fromarray fails with uint16 arrays
+        # https://github.com/python-pillow/Pillow/issues/1514
+        if (image.dtype == np.uint16) and (image.ndim != 2):
+            image = (image / 256).astype(np.uint8)
+        pil_image = Image.fromarray(image)
+    elif isinstance(image, PIL.Image.Image):
+        pil_image = image
+    else:
+        raise TypeError('Unsupported image type!')
+    assert pil_image.mode in ['L', 'RGB', 'RGBA']
+    assert _is_legal_color(color)
+    color = _normalize_color(color, pil_image.mode, isinstance(image, np.ndarray))
+    if font is None:
+        font_object = ImageFont.load_default()
+    else:
+        font_object = ImageFont.truetype(font, size=font_size)
+    draw = ImageDraw.Draw(pil_image)
+    draw.text((position[0], position[1]), text,
+              fill=color, font=font_object)
+    if isinstance(image, np.ndarray):
+        return np.asarray(pil_image)
+    return pil_image
+def draw_bounding_boxes(image, boxes, labels=None, colors=None,
+                        fill=False, width=1, font=None, font_size=15):
+    """Draws bounding boxes on given image.
+    Args:
+        image (ndarray).
+        boxes (ndarray): ndarray of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format.
+        labels (List[str]): List containing the labels of bounding boxes.
+        colors (List[Union[str, Tuple[int, int, int]]]): List containing the colors of bounding boxes or labels.
+        fill (bool): If `True` fills the bounding box with specified color.
+        width (int): Width of bounding box.
+        font (str):  A filename or file-like object containing a TrueType font. If the file is not found in this
+            filename, the loader may also search in other directories, such as the `fonts/` directory on Windows
+            or `/Library/Fonts/`, `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
+        font_size (int): The requested font size in points.
+    References:
+        torchvision.utils.draw_bounding_boxes
+    """
+    if isinstance(image, np.ndarray):
+        # PIL.Image.fromarray fails with uint16 arrays
+        # https://github.com/python-pillow/Pillow/issues/1514
+        if (image.dtype == np.uint16) and (image.ndim != 2):
+            image = (image / 256).astype(np.uint8)
+        pil_image = Image.fromarray(image)
+    elif isinstance(image, PIL.Image.Image):
+        pil_image = image
+    else:
+        raise TypeError('Unsupported image type!')
+    pil_image = pil_image.convert('RGB')
+    if font is None:
+        font_object = ImageFont.load_default()
+    else:
+        font_object = ImageFont.truetype(font, size=font_size)
+    if fill:
+        draw = ImageDraw.Draw(pil_image, "RGBA")
+    else:
+        draw = ImageDraw.Draw(pil_image)
+    for i, bbox in enumerate(boxes):
+        if colors is None:
+            color = None
+        else:
+            color = colors[i]
+        assert _is_legal_color(color)
+        color = _normalize_color(color, pil_image.mode, isinstance(image, np.ndarray))
+        if fill:
+            if color is None:
+                fill_color = (255, 255, 255, 100)
+            elif isinstance(color, str):
+                # This will automatically raise Error if rgb cannot be parsed.
+                fill_color = ImageColor.getrgb(color) + (100,)
+            elif isinstance(color, tuple):
+                fill_color = color + (100,)
+            # the first argument of ImageDraw.rectangle:
+            # in old version only supports [(x0, y0), (x1, y1)]
+            # in new version supports either [(x0, y0), (x1, y1)] or [x0, y0, x1, y1]
+            draw.rectangle([(bbox[0], bbox[1]), (bbox[2], bbox[3])], width=width, outline=color, fill=fill_color)
+        else:
+            draw.rectangle([(bbox[0], bbox[1]), (bbox[2], bbox[3])], width=width, outline=color)
+        if labels is not None:
+            margin = width + 1
+            draw.text((bbox[0] + margin, bbox[1] + margin), labels[i], fill=color, font=font_object)
+    if isinstance(image, np.ndarray):
+        return np.asarray(pil_image)
+    return pil_image

khandy/feature_utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from collections import OrderedDict
+import khandy
+import numpy as np
+def convert_feature_dict_to_array(feature_dict):
+    one_feature = khandy.get_dict_first_item(feature_dict)[1]
+    num_features = sum([len(item) for item in feature_dict.values()])
+    key_list = []
+    start_index = 0
+    feature_array = np.empty((num_features, one_feature.shape[-1]), one_feature.dtype)
+    for key, value in feature_dict.items():
+        feature_array[start_index: start_index + len(value)]= value
+        key_list += [key] * len(value)
+        start_index += len(value)
+    return key_list, feature_array
+def convert_feature_array_to_dict(key_list, feature_array):
+    assert len(key_list) == len(feature_array)
+    feature_dict = OrderedDict()
+    for key, feat in zip(key_list, feature_array):
+        feature_dict.setdefault(key, []).append(feat)
+    for label in feature_dict.keys():
+        feature_dict[label] = np.vstack(feature_dict[label])
+    return feature_dict
+def pairwise_distances(x, y, squared=True):
+    """Compute pairwise (squared) Euclidean distances.
+    References:
+        [2016 CVPR] Deep Metric Learning via Lifted Structured Feature Embedding
+        `euclidean_distances` from sklearn
+    """
+    assert isinstance(x, np.ndarray) and x.ndim == 2
+    assert isinstance(y, np.ndarray) and y.ndim == 2
+    assert x.shape[1] == y.shape[1]
+    x_square = np.expand_dims(np.einsum('ij,ij->i', x, x), axis=1)
+    if x is y:
+        y_square = x_square.T
+    else:
+        y_square = np.expand_dims(np.einsum('ij,ij->i', y, y), axis=0)
+    distances = np.dot(x, y.T)
+    # use inplace operation to accelerate
+    distances *= -2
+    distances += x_square
+    distances += y_square
+    # result maybe less than 0 due to floating point rounding errors.
+    np.maximum(distances, 0, distances)
+    if x is y:
+        # Ensure that distances between vectors and themselves are set to 0.0.
+        # This may not be the case due to floating point rounding errors.
+        distances.flat[::distances.shape[0] + 1] = 0.0
+    if not squared:
+        np.sqrt(distances, distances)
+    return distances

khandy/file_io_utils.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import base64
+import json
+import numbers
+import pickle
+import warnings
+from collections import OrderedDict
+def load_list(filename, encoding='utf-8', start=0, stop=None):
+    assert isinstance(start, numbers.Integral) and start >= 0
+    assert (stop is None) or (isinstance(stop, numbers.Integral) and stop > start)
+    lines = []
+    with open(filename, 'r', encoding=encoding) as f:
+        for _ in range(start):
+            f.readline()
+        for k, line in enumerate(f):
+            if (stop is not None) and (k + start > stop):
+                break
+            lines.append(line.rstrip('\n'))
+    return lines
+def save_list(filename, list_obj, encoding='utf-8', append_break=True):
+    with open(filename, 'w', encoding=encoding) as f:
+        if append_break:
+            for item in list_obj:
+                f.write(str(item) + '\n')
+        else:
+            for item in list_obj:
+                f.write(str(item))
+def load_json(filename, encoding='utf-8'):
+    with open(filename, 'r', encoding=encoding) as f:
+        data = json.load(f, object_pairs_hook=OrderedDict)
+    return data
+def save_json(filename, data, encoding='utf-8', indent=4, cls=None, sort_keys=False):
+    if not filename.endswith('.json'):
+        filename = filename + '.json'
+    with open(filename, 'w', encoding=encoding) as f:
+        json.dump(data, f, indent=indent, separators=(',',': '),
+                  ensure_ascii=False, cls=cls, sort_keys=sort_keys)
+def load_bytes(filename, use_base64: bool = False) -> bytes:
+    """Open the file in bytes mode, read it, and close the file.
+    References:
+        pathlib.Path.read_bytes
+    """
+    with open(filename, 'rb') as f:
+        data = f.read()
+    if use_base64:
+        data = base64.b64encode(data)
+    return data
+def save_bytes(filename, data: bytes, use_base64: bool = False) -> int:
+    """Open the file in bytes mode, write to it, and close the file.
+    References:
+        pathlib.Path.write_bytes
+    """
+    if use_base64:
+        data = base64.b64decode(data)
+    with open(filename, 'wb') as f:
+        ret = f.write(data)
+    return ret
+def load_as_base64(filename) -> bytes:
+    warnings.warn('khandy.load_as_base64 will be deprecated, use khandy.load_bytes instead!')
+    return load_bytes(filename, True)
+def load_object(filename):
+    with open(filename, 'rb') as f:
+        return pickle.load(f)
+def save_object(filename, obj):
+    with open(filename, 'wb') as f:
+        pickle.dump(obj, f)

khandy/fs_utils.py ADDED Viewed

	@@ -0,0 +1,375 @@

+import os
+import re
+import shutil
+import warnings
+def get_path_stem(path):
+    """
+    References:
+        `std::filesystem::path::stem` since C++17
+    """
+    return os.path.splitext(os.path.basename(path))[0]
+def replace_path_stem(path, new_stem):
+    dirname, basename = os.path.split(path)
+    stem, extension = os.path.splitext(basename)
+    if isinstance(new_stem, str):
+        return os.path.join(dirname, new_stem + extension)
+    elif hasattr(new_stem, '__call__'):
+        return os.path.join(dirname, new_stem(stem) + extension)
+    else:
+        raise TypeError('Unsupported Type!')
+def get_path_extension(path):
+    """
+    References:
+        `std::filesystem::path::extension` since C++17
+    Notes:
+        Not fully consistent with `std::filesystem::path::extension`
+    """
+    return os.path.splitext(os.path.basename(path))[1]
+def replace_path_extension(path, new_extension=None):
+    """Replaces the extension with new_extension or removes it when the default value is used.
+    Firstly, if this path has an extension, it is removed. Then, a dot character is appended
+    to the pathname, if new_extension is not empty or does not begin with a dot character.
+    References:
+        `std::filesystem::path::replace_extension` since C++17
+    """
+    filename_wo_ext = os.path.splitext(path)[0]
+    if new_extension == '' or new_extension is None:
+        return filename_wo_ext
+    elif new_extension.startswith('.'):
+        return ''.join([filename_wo_ext, new_extension])
+    else:
+        return '.'.join([filename_wo_ext, new_extension])
+def normalize_extension(extension):
+    if extension.startswith('.'):
+        new_extension = extension.lower()
+    else:
+        new_extension =  '.' + extension.lower()
+    return new_extension
+def is_path_in_extensions(path, extensions):
+    if isinstance(extensions, str):
+        extensions = [extensions]
+    extensions = [normalize_extension(item) for item in extensions]
+    extension = get_path_extension(path)
+    return extension.lower() in extensions
+def normalize_path(path, norm_case=True):
+    """
+    References:
+        https://en.cppreference.com/w/cpp/filesystem/canonical
+    """
+    # On Unix and Windows, return the argument with an initial
+    # component of ~ or ~user replaced by that user's home directory.
+    path = os.path.expanduser(path)
+    # Return a normalized absolutized version of the pathname path.
+    # On most platforms, this is equivalent to calling the function
+    # normpath() as follows: normpath(join(os.getcwd(), path)).
+    path = os.path.abspath(path)
+    if norm_case:
+        # Normalize the case of a pathname. On Windows,
+        # convert all characters in the pathname to lowercase,
+        # and also convert forward slashes to backward slashes.
+        # On other operating systems, return the path unchanged.
+        path = os.path.normcase(path)
+    return path
+def makedirs(name, mode=0o755):
+    """
+    References:
+        mmcv.mkdir_or_exist
+    """
+    warnings.warn('`makedirs` will be deprecated!')
+    if name == '':
+        return
+    name = os.path.expanduser(name)
+    os.makedirs(name, mode=mode, exist_ok=True)
+def listdirs(paths, path_sep=None, full_path=True):
+    """Enhancement on `os.listdir`
+    """
+    warnings.warn('`listdirs` will be deprecated!')
+    assert isinstance(paths, (str, tuple, list))
+    if isinstance(paths, str):
+        path_sep = path_sep or os.path.pathsep
+        paths = paths.split(path_sep)
+    all_filenames = []
+    for path in paths:
+        path_ex = os.path.expanduser(path)
+        filenames = os.listdir(path_ex)
+        if full_path:
+            filenames = [os.path.join(path_ex, filename) for filename in filenames]
+        all_filenames.extend(filenames)
+    return all_filenames
+def get_all_filenames(path, extensions=None, is_valid_file=None):
+    warnings.warn('`get_all_filenames` will be deprecated, use `list_files_in_dir` with `recursive=True` instead!')
+    if (extensions is not None) and (is_valid_file is not None):
+        raise ValueError("Both extensions and is_valid_file cannot "
+                         "be not None at the same time")
+    if is_valid_file is None:
+        if extensions is not None:
+            def is_valid_file(filename):
+                return is_path_in_extensions(filename, extensions)
+        else:
+            def is_valid_file(filename):
+                return True
+    all_filenames = []
+    path_ex = os.path.expanduser(path)
+    for root, _, filenames in sorted(os.walk(path_ex, followlinks=True)):
+        for filename in sorted(filenames):
+            fullname = os.path.join(root, filename)
+            if is_valid_file(fullname):
+                all_filenames.append(fullname)
+    return all_filenames
+def get_top_level_dirs(path, full_path=True):
+    warnings.warn('`get_top_level_dirs` will be deprecated, use `list_dirs_in_dir` instead!')
+    if path is None:
+        path = os.getcwd()
+    path_ex = os.path.expanduser(path)
+    filenames = os.listdir(path_ex)
+    if full_path:
+        return [os.path.join(path_ex, item) for item in filenames
+                if os.path.isdir(os.path.join(path_ex, item))]
+    else:
+        return [item for item in filenames
+                if os.path.isdir(os.path.join(path_ex, item))]
+def get_top_level_files(path, full_path=True):
+    warnings.warn('`get_top_level_files` will be deprecated, use `list_files_in_dir` instead!')
+    if path is None:
+        path = os.getcwd()
+    path_ex = os.path.expanduser(path)
+    filenames = os.listdir(path_ex)
+    if full_path:
+        return [os.path.join(path_ex, item) for item in filenames
+                if os.path.isfile(os.path.join(path_ex, item))]
+    else:
+        return [item for item in filenames
+                if os.path.isfile(os.path.join(path_ex, item))]
+def list_items_in_dir(path=None, recursive=False, full_path=True):
+    """List all entries in directory
+    """
+    if path is None:
+        path = os.getcwd()
+    path_ex = os.path.expanduser(path)
+    if not recursive:
+        names = os.listdir(path_ex)
+        if full_path:
+            return [os.path.join(path_ex, name) for name in sorted(names)]
+        else:
+            return sorted(names)
+    else:
+        all_names = []
+        for root, dirnames, filenames in sorted(os.walk(path_ex, followlinks=True)):
+            all_names += [os.path.join(root, name) for name in sorted(dirnames)]
+            all_names += [os.path.join(root, name) for name in sorted(filenames)]
+        return all_names
+def list_dirs_in_dir(path=None, recursive=False, full_path=True):
+    """List all dirs in directory
+    """
+    if path is None:
+        path = os.getcwd()
+    path_ex = os.path.expanduser(path)
+    if not recursive:
+        names = os.listdir(path_ex)
+        if full_path:
+            return [os.path.join(path_ex, name) for name in sorted(names)
+                    if os.path.isdir(os.path.join(path_ex, name))]
+        else:
+            return [name for name in sorted(names)
+                    if os.path.isdir(os.path.join(path_ex, name))]
+    else:
+        all_names = []
+        for root, dirnames, _ in sorted(os.walk(path_ex, followlinks=True)):
+            all_names += [os.path.join(root, name) for name in sorted(dirnames)]
+        return all_names
+def list_files_in_dir(path=None, recursive=False, full_path=True):
+    """List all files in directory
+    """
+    if path is None:
+        path = os.getcwd()
+    path_ex = os.path.expanduser(path)
+    if not recursive:
+        names = os.listdir(path_ex)
+        if full_path:
+            return [os.path.join(path_ex, name) for name in sorted(names)
+                    if os.path.isfile(os.path.join(path_ex, name))]
+        else:
+            return [name for name in sorted(names)
+                    if os.path.isfile(os.path.join(path_ex, name))]
+    else:
+        all_names = []
+        for root, _, filenames in sorted(os.walk(path_ex, followlinks=True)):
+            all_names += [os.path.join(root, name) for name in sorted(filenames)]
+        return all_names
+def get_folder_size(dirname):
+    if not os.path.exists(dirname):
+        raise ValueError("Incorrect path: {}".format(dirname))
+    total_size = 0
+    for root, _, filenames in os.walk(dirname):
+        for name in filenames:
+            total_size += os.path.getsize(os.path.join(root, name))
+    return total_size
+def escape_filename(filename, new_char='_'):
+    assert isinstance(new_char, str)
+    control_chars = ''.join((map(chr, range(0x00, 0x20))))
+    pattern = r'[\\/*?:"<>|{}]'.format(control_chars)
+    return re.sub(pattern, new_char, filename)
+def replace_invalid_filename_char(filename, new_char='_'):
+    warnings.warn('`replace_invalid_filename_char` will be deprecated, use `escape_filename` instead!')
+    return escape_filename(filename, new_char)
+def copy_file(src, dst_dir, action_if_exist='rename'):
+    """
+    Args:
+        src: source file path
+        dst_dir: dest dir
+        action_if_exist:
+            None: same as shutil.copy
+            ignore: when dest file exists, don't copy and return None
+            rename: when dest file exists, copy after rename
+    Returns:
+        dest filename
+    """
+    dst = os.path.join(dst_dir, os.path.basename(src))
+    if action_if_exist is None:
+        os.makedirs(dst_dir, exist_ok=True)
+        shutil.copy(src, dst)
+    elif action_if_exist.lower() == 'ignore':
+        if os.path.exists(dst):
+            warnings.warn(f'{dst} already exists, do not copy!')
+            return dst
+        os.makedirs(dst_dir, exist_ok=True)
+        shutil.copy(src, dst)
+    elif action_if_exist.lower() == 'rename':
+        suffix = 2
+        stem, extension = os.path.splitext(os.path.basename(src))
+        while os.path.exists(dst):
+            dst = os.path.join(dst_dir, f'{stem} ({suffix}){extension}')
+            suffix += 1
+        os.makedirs(dst_dir, exist_ok=True)
+        shutil.copy(src, dst)
+    else:
+        raise ValueError('Invalid action_if_exist, got {}.'.format(action_if_exist))
+    return dst
+def move_file(src, dst_dir, action_if_exist='rename'):
+    """
+    Args:
+        src: source file path
+        dst_dir: dest dir
+        action_if_exist:
+            None: same as shutil.move
+            ignore: when dest file exists, don't move and return None
+            rename: when dest file exists, move after rename
+    Returns:
+        dest filename
+    """
+    dst = os.path.join(dst_dir, os.path.basename(src))
+    if action_if_exist is None:
+        os.makedirs(dst_dir, exist_ok=True)
+        shutil.move(src, dst)
+    elif action_if_exist.lower() == 'ignore':
+        if os.path.exists(dst):
+            warnings.warn(f'{dst} already exists, do not move!')
+            return dst
+        os.makedirs(dst_dir, exist_ok=True)
+        shutil.move(src, dst)
+    elif action_if_exist.lower() == 'rename':
+        suffix = 2
+        stem, extension = os.path.splitext(os.path.basename(src))
+        while os.path.exists(dst):
+            dst = os.path.join(dst_dir, f'{stem} ({suffix}){extension}')
+            suffix += 1
+        os.makedirs(dst_dir, exist_ok=True)
+        shutil.move(src, dst)
+    else:
+        raise ValueError('Invalid action_if_exist, got {}.'.format(action_if_exist))
+    return dst
+def rename_file(src, dst, action_if_exist='rename'):
+    """
+    Args:
+        src: source file path
+        dst: dest file path
+        action_if_exist:
+            None: same as os.rename
+            ignore: when dest file exists, don't rename and return None
+            rename: when dest file exists, rename it
+    Returns:
+        dest filename
+    """
+    if dst == src:
+        return dst
+    dst_dir = os.path.dirname(os.path.abspath(dst))
+    if action_if_exist is None:
+        os.makedirs(dst_dir, exist_ok=True)
+        os.rename(src, dst)
+    elif action_if_exist.lower() == 'ignore':
+        if os.path.exists(dst):
+            warnings.warn(f'{dst} already exists, do not rename!')
+            return dst
+        os.makedirs(dst_dir, exist_ok=True)
+        os.rename(src, dst)
+    elif action_if_exist.lower() == 'rename':
+        suffix = 2
+        stem, extension = os.path.splitext(os.path.basename(dst))
+        while os.path.exists(dst):
+            dst = os.path.join(dst_dir, f'{stem} ({suffix}){extension}')
+            suffix += 1
+        os.makedirs(dst_dir, exist_ok=True)
+        os.rename(src, dst)
+    else:
+        raise ValueError('Invalid action_if_exist, got {}.'.format(action_if_exist))
+    return dst

khandy/hash_utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import hashlib
+def calc_hash(content, hash_object=None):
+    hash_object = hash_object or hashlib.md5()
+    if isinstance(hash_object, str):
+        hash_object = hashlib.new(hash_object)
+    hash_object.update(content)
+    return hash_object.hexdigest()
+def calc_file_hash(filename, hash_object=None, chunk_size=1024 * 1024):
+    hash_object = hash_object or hashlib.md5()
+    if isinstance(hash_object, str):
+        hash_object = hashlib.new(hash_object)
+    with open(filename, "rb") as f:
+        while True:
+            chunk = f.read(chunk_size)
+            if not chunk:
+                break
+            hash_object.update(chunk)
+    return hash_object.hexdigest()

khandy/image/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .align_and_crop import *
+from .crop_or_pad import *
+from .flip import *
+from .image_hash import *
+from .resize import *
+from .rotate import *
+from .translate import *
+from .misc import *

khandy/image/align_and_crop.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import cv2
+import numpy as np
+def get_similarity_transform(src_pts, dst_pts):
+    """Get similarity transform matrix from src_pts to dst_pts
+    Args:
+        src_pts: Kx2 np.array
+            source points matrix, each row is a pair of coordinates (x, y)
+        dst_pts: Kx2 np.array
+            destination points matrix, each row is a pair of coordinates (x, y)
+    Returns:
+        xform_matrix: 3x3 np.array
+            transform matrix from src_pts to dst_pts
+    """
+    src_pts = np.asarray(src_pts)
+    dst_pts = np.asarray(dst_pts)
+    assert src_pts.shape == dst_pts.shape
+    assert (src_pts.ndim == 2) and (src_pts.shape[-1] == 2)
+    npts = src_pts.shape[0]
+    src_x = src_pts[:, 0].reshape((-1, 1))
+    src_y = src_pts[:, 1].reshape((-1, 1))
+    tmp1 = np.hstack((src_x, -src_y, np.ones((npts, 1)), np.zeros((npts, 1))))
+    tmp2 = np.hstack((src_y, src_x, np.zeros((npts, 1)), np.ones((npts, 1))))
+    A = np.vstack((tmp1, tmp2))
+    dst_x = dst_pts[:, 0].reshape((-1, 1))
+    dst_y = dst_pts[:, 1].reshape((-1, 1))
+    b = np.vstack((dst_x, dst_y))
+    x = np.linalg.lstsq(A, b, rcond=-1)[0]
+    x = np.squeeze(x)
+    sc, ss, tx, ty = x[0], x[1], x[2], x[3]
+    xform_matrix = np.array([
+        [sc, -ss, tx],
+        [ss,  sc, ty],
+        [ 0,   0,  1]
+    ])
+    return xform_matrix
+def align_and_crop(image, landmarks, std_landmarks, align_size,
+                   border_value=0, return_transform_matrix=False):
+    landmarks = np.asarray(landmarks)
+    std_landmarks = np.asarray(std_landmarks)
+    xform_matrix = get_similarity_transform(landmarks, std_landmarks)
+    landmarks_ex = np.pad(landmarks, ((0,0),(0,1)), mode='constant', constant_values=1)
+    dst_landmarks = np.dot(landmarks_ex, xform_matrix[:2,:].T)
+    dst_image = cv2.warpAffine(image, xform_matrix[:2,:], dsize=align_size,
+                               borderValue=border_value)
+    if return_transform_matrix:
+        return dst_image, dst_landmarks, xform_matrix
+    else:
+        return dst_image, dst_landmarks

khandy/image/crop_or_pad.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import numbers
+import warnings
+import khandy
+import numpy as np
+def crop(image, x_min, y_min, x_max, y_max, border_value=0):
+    """Crop the given image at specified rectangular area.
+    See Also:
+        translate_image
+    References:
+        PIL.Image.crop
+        tf.image.resize_image_with_crop_or_pad
+    """
+    assert khandy.is_numpy_image(image)
+    assert isinstance(x_min, numbers.Integral) and isinstance(y_min, numbers.Integral)
+    assert isinstance(x_max, numbers.Integral) and isinstance(y_max, numbers.Integral)
+    assert (x_min <= x_max) and (y_min <= y_max)
+    src_height, src_width = image.shape[:2]
+    dst_height, dst_width = y_max - y_min + 1, x_max - x_min + 1
+    channels = 1 if image.ndim == 2 else image.shape[2]
+    if isinstance(border_value, (tuple, list)):
+        assert len(border_value) == channels, \
+            'Expected the num of elements in tuple equals the channels ' \
+            'of input image. Found {} vs {}'.format(
+                len(border_value), channels)
+    else:
+        border_value = (border_value,) * channels
+    dst_image = khandy.create_solid_color_image(
+        dst_width, dst_height, border_value, dtype=image.dtype)
+    src_x_begin = max(x_min, 0)
+    src_x_end   = min(x_max + 1, src_width)
+    dst_x_begin = src_x_begin - x_min
+    dst_x_end   = src_x_end - x_min
+    src_y_begin = max(y_min, 0)
+    src_y_end   = min(y_max + 1, src_height)
+    dst_y_begin = src_y_begin - y_min
+    dst_y_end   = src_y_end - y_min
+    if (src_x_begin >= src_x_end) or (src_y_begin >= src_y_end):
+        return dst_image
+    dst_image[dst_y_begin: dst_y_end, dst_x_begin: dst_x_end, ...] = \
+        image[src_y_begin: src_y_end, src_x_begin: src_x_end, ...]
+    return dst_image
+def crop_or_pad(image, x_min, y_min, x_max, y_max, border_value=0):
+    warnings.warn('crop_or_pad will be deprecated, use crop instead!')
+    return crop(image, x_min, y_min, x_max, y_max, border_value)
+def crop_coords(boxes, image_width, image_height):
+    """
+    References:
+        `mmcv.impad`
+        `pad` in https://github.com/kpzhang93/MTCNN_face_detection_alignment
+        `MtcnnDetector.pad` in https://github.com/AITTSMD/MTCNN-Tensorflow
+    """
+    x_mins = boxes[:, 0]
+    y_mins = boxes[:, 1]
+    x_maxs = boxes[:, 2]
+    y_maxs = boxes[:, 3]
+    dst_widths = x_maxs - x_mins + 1
+    dst_heights = y_maxs - y_mins + 1
+    src_x_begin = np.maximum(x_mins, 0)
+    src_x_end   = np.minimum(x_maxs + 1, image_width)
+    dst_x_begin = src_x_begin - x_mins
+    dst_x_end   = src_x_end - x_mins
+    src_y_begin = np.maximum(y_mins, 0)
+    src_y_end   = np.minimum(y_maxs + 1, image_height)
+    dst_y_begin = src_y_begin - y_mins
+    dst_y_end   = src_y_end - y_mins
+    coords = np.stack([dst_y_begin, dst_y_end, dst_x_begin, dst_x_end,
+                       src_y_begin, src_y_end, src_x_begin, src_x_end,
+                       dst_heights, dst_widths], axis=0)
+    return coords
+def crop_or_pad_coords(boxes, image_width, image_height):
+    warnings.warn('crop_or_pad_coords will be deprecated, use crop_coords instead!')
+    return crop_coords(boxes, image_width, image_height)
+def center_crop(image, dst_width, dst_height, strict=True):
+    """
+    strict:
+        when True, raise error if src size is less than dst size.
+        when False, remain unchanged if src size is less than dst size, otherwise center crop.
+    """
+    assert khandy.is_numpy_image(image)
+    assert isinstance(dst_width, numbers.Integral) and isinstance(dst_height, numbers.Integral)
+    src_height, src_width = image.shape[:2]
+    if strict:
+        assert (src_height >= dst_height) and (src_width >= dst_width)
+    crop_top = max((src_height - dst_height) // 2, 0)
+    crop_left = max((src_width - dst_width) // 2, 0)
+    cropped = image[crop_top: dst_height + crop_top,
+                    crop_left: dst_width + crop_left, ...]
+    return cropped
+def center_pad(image, dst_width, dst_height, strict=True):
+    """
+    strict:
+        when True, raise error if src size is greater than dst size.
+        when False, remain unchanged if src size is greater than dst size, otherwise center pad.
+    """
+    assert khandy.is_numpy_image(image)
+    assert isinstance(dst_width, numbers.Integral) and isinstance(dst_height, numbers.Integral)
+    src_height, src_width = image.shape[:2]
+    if strict:
+        assert (src_height <= dst_height) and (src_width <= dst_width)
+    padding_x = max(dst_width - src_width, 0)
+    padding_y = max(dst_height - src_height, 0)
+    padding_top = padding_y // 2
+    padding_left = padding_x // 2
+    if image.ndim == 2:
+        padding = ((padding_top, padding_y - padding_top),
+                   (padding_left, padding_x - padding_left))
+    else:
+        padding = ((padding_top, padding_y - padding_top),
+                   (padding_left, padding_x - padding_left), (0, 0))
+    return np.pad(image, padding, 'constant')

khandy/image/flip.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import khandy
+import numpy as np
+def flip_image(image, direction='h', copy=True):
+    """
+    References:
+        np.flipud, np.fliplr, np.flip
+        cv2.flip
+        tf.image.flip_up_down
+        tf.image.flip_left_right
+    """
+    assert khandy.is_numpy_image(image)
+    assert direction in ['x', 'h', 'horizontal',
+                         'y', 'v', 'vertical',
+                         'o', 'b', 'both']
+    if copy:
+        image = image.copy()
+    if direction in ['o', 'b', 'both', 'x', 'h', 'horizontal']:
+        image = np.fliplr(image)
+    if direction in ['o', 'b', 'both', 'y', 'v', 'vertical']:
+        image = np.flipud(image)
+    return image
+def transpose_image(image, copy=True):
+    """Transpose image.
+    References:
+        np.transpose
+        cv2.transpose
+        tf.image.transpose
+    """
+    assert khandy.is_numpy_image(image)
+    if copy:
+        image = image.copy()
+    if image.ndim == 2:
+        transpose_axes = (1, 0)
+    else:
+        transpose_axes = (1, 0, 2)
+    image = np.transpose(image, transpose_axes)
+    return image
+def rot90_image(image, n=1, copy=True):
+    """Rotate image counter-clockwise by 90 degrees.
+    References:
+        np.rot90
+        cv2.rotate
+        tf.image.rot90
+    """
+    assert khandy.is_numpy_image(image)
+    if copy:
+        image = image.copy()
+    if image.ndim == 2:
+        transpose_axes = (1, 0)
+    else:
+        transpose_axes = (1, 0, 2)
+    n = n % 4
+    if n == 0:
+        return image[:]
+    elif n == 1:
+        image = np.transpose(image, transpose_axes)
+        image = np.flipud(image)
+    elif n == 2:
+        image = np.fliplr(np.flipud(image))
+    else:
+        image = np.transpose(image, transpose_axes)
+        image = np.fliplr(image)
+    return image

khandy/image/image_hash.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import cv2
+import khandy
+import numpy as np
+def _convert_bool_matrix_to_int(bool_mat):
+    hash_val = int(0)
+    for item in bool_mat.flatten():
+        hash_val <<= 1
+        hash_val |= int(item)
+    return hash_val
+def calc_image_ahash(image):
+    """Average Hashing
+    References:
+        http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
+    """
+    assert khandy.is_numpy_image(image)
+    if image.ndim == 3:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    resized = cv2.resize(image, (8, 8))
+    mean_val = np.mean(resized)
+    hash_mat = resized >= mean_val
+    hash_val = _convert_bool_matrix_to_int(hash_mat)
+    return f'{hash_val:016x}'
+def calc_image_dhash(image):
+    """Difference Hashing
+    References:
+        http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
+    """
+    assert khandy.is_numpy_image(image)
+    if image.ndim == 3:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    resized = cv2.resize(image, (9, 8))
+    hash_mat = resized[:,:-1] >= resized[:,1:]
+    hash_val = _convert_bool_matrix_to_int(hash_mat)
+    return f'{hash_val:016x}'
+def calc_image_phash(image):
+    """Perceptual Hashing
+    References:
+        http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
+    """
+    assert khandy.is_numpy_image(image)
+    if image.ndim == 3:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    resized = cv2.resize(image, (32, 32))
+    dct_coeff = cv2.dct(resized.astype(np.float32))
+    reduced_dct_coeff = dct_coeff[:8, :8]
+    # # mean of coefficients excluding the DC term (0th term)
+    # mean_val = np.mean(reduced_dct_coeff.flatten()[1:])
+    # median of coefficients
+    median_val = np.median(reduced_dct_coeff)
+    hash_mat = reduced_dct_coeff >= median_val
+    hash_val = _convert_bool_matrix_to_int(hash_mat)
+    return f'{hash_val:016x}'

khandy/image/misc.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import os
+import imghdr
+import numbers
+import warnings
+from io import BytesIO
+import cv2
+import khandy
+import numpy as np
+from PIL import Image
+def imread(file_or_buffer, flags=-1):
+    """Improvement on cv2.imread, make it support filename including chinese character.
+    """
+    try:
+        if isinstance(file_or_buffer, bytes):
+            return cv2.imdecode(np.frombuffer(file_or_buffer, dtype=np.uint8), flags)
+        else:
+            # support type: file or str or Path
+            return cv2.imdecode(np.fromfile(file_or_buffer, dtype=np.uint8), flags)
+    except Exception as e:
+        print(e)
+        return None
+def imread_cv(file_or_buffer, flags=-1):
+    warnings.warn('khandy.imread_cv will be deprecated, use khandy.imread instead!')
+    return imread(file_or_buffer, flags)
+def imwrite(filename, image, params=None):
+    """Improvement on cv2.imwrite, make it support filename including chinese character.
+    """
+    cv2.imencode(os.path.splitext(filename)[-1], image, params)[1].tofile(filename)
+def imwrite_cv(filename, image, params=None):
+    warnings.warn('khandy.imwrite_cv will be deprecated, use khandy.imwrite instead!')
+    return imwrite(filename, image, params)
+def imread_pil(file_or_buffer, to_mode=None):
+    """Improvement on Image.open to avoid ResourceWarning.
+    """
+    try:
+        if isinstance(file_or_buffer, bytes):
+            buffer = BytesIO()
+            buffer.write(file_or_buffer)
+            buffer.seek(0)
+            file_or_buffer = buffer
+        if hasattr(file_or_buffer, 'read'):
+            image = Image.open(file_or_buffer)
+            if to_mode is not None:
+                image = image.convert(to_mode)
+        else:
+            # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+            with open(file_or_buffer, 'rb') as f:
+                image = Image.open(f)
+                # If convert outside with statement, will raise "seek of closed file" as
+                # https://github.com/microsoft/Swin-Transformer/issues/66
+                if to_mode is not None:
+                    image = image.convert(to_mode)
+        return image
+    except Exception as e:
+        print(e)
+        return None
+def imwrite_bytes(filename, image_bytes: bytes, update_extension: bool = True):
+    """Write image bytes to file.
+    Args:
+        filename: str
+            filename which image_bytes is written into.
+        image_bytes: bytes
+            image content to be written.
+        update_extension: bool
+            whether update extension according to image_bytes or not.
+            the cost of update extension is smaller than update image format.
+    """
+    extension = imghdr.what('', image_bytes)
+    file_extension = khandy.get_path_extension(filename)
+    # imghdr.what fails to determine image format sometimes!
+    # so when its return value is None, never update extension.
+    if extension is None:
+        image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), -1)
+        image_bytes = cv2.imencode(file_extension, image)[1]
+    elif (extension.lower() != file_extension.lower()[1:]):
+        if update_extension:
+            filename = khandy.replace_path_extension(filename, extension)
+        else:
+            image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), -1)
+            image_bytes = cv2.imencode(file_extension, image)[1]
+    with open(filename, "wb") as f:
+        f.write(image_bytes)
+    return filename
+def rescale_image(image: np.ndarray, rescale_factor='auto', dst_dtype=np.float32):
+    """Rescale image by rescale_factor.
+    Args:
+        img (ndarray): Image to be rescaled.
+        rescale_factor (str, int or float, *optional*, defaults to `'auto'`):
+            rescale the image by the specified scale factor. When is `'auto'`,
+            rescale the image to [0, 1).
+        dtype (np.dtype, *optional*, defaults to `np.float32`):
+            The dtype of the output image. Defaults to `np.float32`.
+    Returns:
+        ndarray: The rescaled image.
+    """
+    if rescale_factor == 'auto':
+        if np.issubdtype(image.dtype, np.unsignedinteger):
+            rescale_factor = 1. / np.iinfo(image.dtype).max
+        else:
+            raise TypeError(f'Only support uint dtype ndarray when `rescale_factor` is `auto`, got {image.dtype}')
+    elif issubclass(rescale_factor, (int, float)):
+        pass
+    else:
+        raise TypeError('rescale_factor must be "auto", int or float')
+    image = image.astype(dst_dtype, copy=True)
+    image *= rescale_factor
+    image = image.astype(dst_dtype)
+    return image
+def normalize_image_value(image: np.ndarray, mean, std, rescale_factor=None):
+    """Normalize an image with mean and std, rescale optionally.
+    Args:
+        image (ndarray): Image to be normalized.
+        mean (int, float, Sequence[int], Sequence[float], ndarray): The mean to be used for normalize.
+        std (int, float, Sequence[int], Sequence[float], ndarray): The std to be used for normalize.
+        rescale_factor (None, 'auto', int or float, *optional*, defaults to `None`):
+            rescale the image by the specified scale factor. When is `'auto'`,
+            rescale the image to [0, 1); When is `None`, do not rescale.
+    Returns:
+        ndarray: The normalized image which dtype is np.float32.
+    """
+    dst_dtype = np.float32
+    mean = np.array(mean, dtype=dst_dtype).flatten()
+    std = np.array(std, dtype=dst_dtype).flatten()
+    if rescale_factor == 'auto':
+        if np.issubdtype(image.dtype, np.unsignedinteger):
+            mean *= np.iinfo(image.dtype).max
+            std *= np.iinfo(image.dtype).max
+        else:
+            raise TypeError(f'Only support uint dtype ndarray when `rescale_factor` is `auto`, got {image.dtype}')
+    elif isinstance(rescale_factor, (int, float)):
+        mean *= rescale_factor
+        std *= rescale_factor
+    image = image.astype(dst_dtype, copy=True)
+    image -= mean
+    image /= std
+    return image
+def normalize_image_dtype(image, keep_num_channels=False):
+    """Normalize image dtype to uint8 (usually for visualization).
+    Args:
+        image : ndarray
+            Input image.
+        keep_num_channels : bool, optional
+            If this is set to True, the result is an array which has
+            the same shape as input image, otherwise the result is
+            an array whose channels number is 3.
+    Returns:
+        out: ndarray
+            Image whose dtype is np.uint8.
+    """
+    assert (image.ndim == 3 and image.shape[-1] in [1, 3]) or (image.ndim == 2)
+    image = image.astype(np.float32)
+    image = khandy.minmax_normalize(image, axis=None, copy=False)
+    image = np.array(image * 255, dtype=np.uint8)
+    if not keep_num_channels:
+        if image.ndim == 2:
+            image = np.expand_dims(image, -1)
+        if image.shape[-1] == 1:
+            image = np.tile(image, (1,1,3))
+    return image
+def normalize_image_channel(image, swap_rb=False):
+    """Normalize image channel number and order to RGB or BGR.
+    Args:
+        image : ndarray
+            Input image.
+        swap_rb : bool, optional
+            whether swap red and blue channel or not
+    Returns:
+        out: ndarray
+            Image whose shape is (..., 3).
+    """
+    if image.ndim == 2:
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    elif image.ndim == 3:
+        num_channels = image.shape[-1]
+        if num_channels == 1:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+        elif num_channels == 3:
+            if swap_rb:
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        elif num_channels == 4:
+            if swap_rb:
+                image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
+            else:
+                image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
+        else:
+            raise ValueError(f'Unsupported image channel number, only support 1, 3 and 4, got {num_channels}!')
+    else:
+        raise ValueError(f'Unsupported image ndarray ndim, only support 2 and 3, got {image.ndim}!')
+    return image
+def normalize_image_shape(image, swap_rb=False):
+    warnings.warn('khandy.normalize_image_shape will be deprecated, use khandy.normalize_image_channel instead!')
+    return normalize_image_channel(image, swap_rb)
+def stack_image_list(image_list, dtype=np.float32):
+    """Join a sequence of image along a new axis before first axis.
+    References:
+        `im_list_to_blob` in `py-faster-rcnn-master/lib/utils/blob.py`
+    """
+    assert isinstance(image_list, (tuple, list))
+    max_dimension = np.array([image.ndim for image in image_list]).max()
+    assert max_dimension in [2, 3]
+    max_shape = np.array([image.shape[:2] for image in image_list]).max(axis=0)
+    num_channels = []
+    for image in image_list:
+        if image.ndim == 2:
+            num_channels.append(1)
+        else:
+            num_channels.append(image.shape[-1])
+    assert len(set(num_channels) - set([1])) in [0, 1]
+    max_num_channels = np.max(num_channels)
+    blob = np.empty((len(image_list), max_shape[0], max_shape[1], max_num_channels), dtype=dtype)
+    for k, image in enumerate(image_list):
+        blob[k, :image.shape[0], :image.shape[1], :] = np.atleast_3d(image).astype(dtype, copy=False)
+    if max_dimension == 2:
+        blob = np.squeeze(blob, axis=-1)
+    return blob
+def is_numpy_image(image):
+    return isinstance(image, np.ndarray) and image.ndim in {2, 3}
+def is_gray_image(image, tol=3):
+    assert is_numpy_image(image)
+    if image.ndim == 2:
+        return True
+    elif image.ndim == 3:
+        num_channels = image.shape[-1]
+        if num_channels == 1:
+            return True
+        elif num_channels == 3:
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            gray3 = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+            mae = np.mean(cv2.absdiff(image, gray3))
+            return mae <= tol
+        elif num_channels == 4:
+            rgb = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
+            gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
+            gray3 = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+            mae = np.mean(cv2.absdiff(rgb, gray3))
+            return mae <= tol
+        else:
+            return False
+    else:
+        return False
+def is_solid_color_image(image, tol=4):
+    assert is_numpy_image(image)
+    mean = np.array(cv2.mean(image)[:-1], dtype=np.float32)
+    if image.ndim == 2:
+        mae = np.mean(np.abs(image - mean[0]))
+        return mae <= tol
+    elif image.ndim == 3:
+        num_channels = image.shape[-1]
+        if num_channels == 1:
+            mae = np.mean(np.abs(image - mean[0]))
+            return mae <= tol
+        elif num_channels == 3:
+            mae = np.mean(np.abs(image - mean))
+            return mae <= tol
+        elif num_channels == 4:
+            mae = np.mean(np.abs(image[:,:,:-1] - mean))
+            return mae <= tol
+        else:
+            return False
+    else:
+        return False
+def create_solid_color_image(image_width, image_height, color, dtype=None):
+    if isinstance(color, numbers.Real):
+        image = np.full((image_height, image_width), color, dtype=dtype)
+    elif isinstance(color, (tuple, list)):
+        if len(color) == 1:
+            image = np.full((image_height, image_width), color[0], dtype=dtype)
+        elif len(color) in (3, 4):
+            image = np.full((1, 1, len(color)), color, dtype=dtype)
+            image = cv2.copyMakeBorder(image, 0, image_height-1, 0, image_width-1,
+                                       cv2.BORDER_CONSTANT, value=color)
+        else:
+            color = np.asarray(color, dtype=dtype)
+            image = np.empty((image_height, image_width, len(color)), dtype=dtype)
+            image[:] = color
+    else:
+        raise TypeError(f'Invalid type {type(color)} for `color`.')
+    return image

khandy/image/resize.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import warnings
+import cv2
+import khandy
+import numpy as np
+interp_codes = {
+    'nearest': cv2.INTER_NEAREST,
+    'bilinear': cv2.INTER_LINEAR,
+    'bicubic': cv2.INTER_CUBIC,
+    'area': cv2.INTER_AREA,
+    'lanczos': cv2.INTER_LANCZOS4
+}
+def scale_image(image, x_scale, y_scale, interpolation='bilinear'):
+    """Scale image.
+    Reference:
+        mmcv.imrescale
+    """
+    assert khandy.is_numpy_image(image)
+    src_height, src_width = image.shape[:2]
+    dst_width = int(round(x_scale * src_width))
+    dst_height = int(round(y_scale * src_height))
+    resized_image = cv2.resize(image, (dst_width, dst_height),
+                               interpolation=interp_codes[interpolation])
+    return resized_image
+def resize_image(image, dst_width, dst_height, return_scale=False, interpolation='bilinear'):
+    """Resize image to a given size.
+    Args:
+        image (ndarray): The input image.
+        dst_width (int): Target width.
+        dst_height (int): Target height.
+        return_scale (bool): Whether to return `x_scale` and `y_scale`.
+        interpolation (str): Interpolation method, accepted values are
+            "nearest", "bilinear", "bicubic", "area", "lanczos".
+    Returns:
+        tuple or ndarray: (`resized_image`, `x_scale`, `y_scale`) or `resized_image`.
+    Reference:
+        mmcv.imresize
+    """
+    assert khandy.is_numpy_image(image)
+    resized_image = cv2.resize(image, (dst_width, dst_height),
+                               interpolation=interp_codes[interpolation])
+    if not return_scale:
+        return resized_image
+    else:
+        src_height, src_width = image.shape[:2]
+        x_scale = dst_width / src_width
+        y_scale = dst_height / src_height
+        return resized_image, x_scale, y_scale
+def resize_image_short(image, dst_size, return_scale=False, interpolation='bilinear'):
+    """Resize an image so that the length of shorter side is dst_size while
+    preserving the original aspect ratio.
+    References:
+        `resize_min` in `https://github.com/pjreddie/darknet/blob/master/src/image.c`
+    """
+    assert khandy.is_numpy_image(image)
+    src_height, src_width = image.shape[:2]
+    scale = max(dst_size / src_width, dst_size / src_height)
+    dst_width = int(round(scale * src_width))
+    dst_height = int(round(scale * src_height))
+    resized_image = cv2.resize(image, (dst_width, dst_height),
+                               interpolation=interp_codes[interpolation])
+    if not return_scale:
+        return resized_image
+    else:
+        return resized_image, scale
+def resize_image_long(image, dst_size, return_scale=False, interpolation='bilinear'):
+    """Resize an image so that the length of longer side is dst_size while
+    preserving the original aspect ratio.
+    References:
+        `resize_max` in `https://github.com/pjreddie/darknet/blob/master/src/image.c`
+    """
+    assert khandy.is_numpy_image(image)
+    src_height, src_width = image.shape[:2]
+    scale = min(dst_size / src_width, dst_size / src_height)
+    dst_width = int(round(scale * src_width))
+    dst_height = int(round(scale * src_height))
+    resized_image = cv2.resize(image, (dst_width, dst_height),
+                               interpolation=interp_codes[interpolation])
+    if not return_scale:
+        return resized_image
+    else:
+        return resized_image, scale
+def resize_image_to_range(image, min_length, max_length, return_scale=False, interpolation='bilinear'):
+    """Resizes an image so its dimensions are within the provided value.
+    Rescale the shortest side of the image up to `min_length` pixels
+    while keeping the largest side below `max_length` pixels without
+    changing the aspect ratio. Often used in object detection (e.g. RCNN and SSH.)
+    The output size can be described by two cases:
+    1. If the image can be rescaled so its shortest side is equal to the
+        `min_length` without the other side exceeding `max_length`, then do so.
+    2. Otherwise, resize so the longest side is equal to `max_length`.
+    Returns:
+        resized_image: resized image so that
+            min(dst_height, dst_width) == min_length or
+            max(dst_height, dst_width) == max_length.
+    References:
+        `resize_to_range` in `models-master/research/object_detection/core/preprocessor.py`
+        `prep_im_for_blob` in `py-faster-rcnn-master/lib/utils/blob.py`
+        mmcv.imrescale
+    """
+    assert khandy.is_numpy_image(image)
+    assert min_length < max_length
+    src_height, src_width = image.shape[:2]
+    min_side_length = min(src_width, src_height)
+    max_side_length = max(src_width, src_height)
+    scale = min_length / min_side_length
+    if round(scale * max_side_length) > max_length:
+        scale = max_length / max_side_length
+    dst_width = int(round(scale * src_width))
+    dst_height = int(round(scale * src_height))
+    resized_image = cv2.resize(image, (dst_width, dst_height),
+                               interpolation=interp_codes[interpolation])
+    if not return_scale:
+        return resized_image
+    else:
+        return resized_image, scale
+def letterbox_image(image, dst_width, dst_height, border_value=0,
+                    return_scale=False, interpolation='bilinear'):
+    """Resize an image preserving the original aspect ratio using padding.
+    References:
+        `letterbox_image` in `https://github.com/pjreddie/darknet/blob/master/src/image.c`
+    """
+    assert khandy.is_numpy_image(image)
+    src_height, src_width = image.shape[:2]
+    scale = min(dst_width / src_width, dst_height / src_height)
+    resize_w = int(round(scale * src_width))
+    resize_h = int(round(scale * src_height))
+    resized_image = cv2.resize(image, (resize_w, resize_h),
+                               interpolation=interp_codes[interpolation])
+    pad_top = (dst_height - resize_h) // 2
+    pad_bottom = (dst_height - resize_h) - pad_top
+    pad_left = (dst_width - resize_w) // 2
+    pad_right = (dst_width - resize_w) - pad_left
+    padded_image = cv2.copyMakeBorder(resized_image, pad_top, pad_bottom, pad_left, pad_right,
+                                      cv2.BORDER_CONSTANT, value=border_value)
+    if not return_scale:
+        return padded_image
+    else:
+        return padded_image, scale, pad_left, pad_top
+def letterbox_resize_image(image, dst_width, dst_height, border_value=0,
+                           return_scale=False, interpolation='bilinear'):
+    warnings.warn('letterbox_resize_image will be deprecated, use letterbox_image instead!')
+    return letterbox_image(image, dst_width, dst_height, border_value,
+                           return_scale, interpolation)

khandy/image/rotate.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import cv2
+import khandy
+import numpy as np
+def get_2d_rotation_matrix(angle, cx=0, cy=0, scale=1,
+                           degrees=True, dtype=np.float32):
+    """
+    References:
+        `cv2.getRotationMatrix2D` in OpenCV
+    """
+    if degrees:
+        angle = np.deg2rad(angle)
+    c = scale * np.cos(angle)
+    s = scale * np.sin(angle)
+    tx = cx - cx * c + cy * s
+    ty = cy - cx * s - cy * c
+    return np.array([[ c, -s, tx],
+                     [ s,  c, ty],
+                     [ 0,  0, 1]], dtype=dtype)
+def rotate_image(image, angle, scale=1.0, center=None,
+                 degrees=True, border_value=0, auto_bound=False):
+    """Rotate an image.
+    Args:
+        image : ndarray
+            Image to be rotated.
+        angle : float
+            Rotation angle in degrees, positive values mean clockwise rotation.
+        center : tuple
+            Center of the rotation in the source image, by default
+            it is the center of the image.
+        scale : float
+            Isotropic scale factor.
+        degrees : bool
+        border_value : int
+            Border value.
+        auto_bound : bool
+            Whether to adjust the image size to cover the whole rotated image.
+    Returns:
+        ndarray: The rotated image.
+    References:
+        mmcv.imrotate
+    """
+    assert khandy.is_numpy_image(image)
+    image_height, image_width = image.shape[:2]
+    if auto_bound:
+        center = None
+    if center is None:
+        center = ((image_width - 1) * 0.5, (image_height - 1) * 0.5)
+    assert isinstance(center, tuple)
+    rotation_matrix = get_2d_rotation_matrix(angle, center[0], center[1], scale, degrees)
+    if auto_bound:
+        scale_cos = np.abs(rotation_matrix[0, 0])
+        scale_sin = np.abs(rotation_matrix[0, 1])
+        new_width = image_width * scale_cos + image_height * scale_sin
+        new_height = image_width * scale_sin + image_height * scale_cos
+        rotation_matrix[0, 2] += (new_width - image_width) * 0.5
+        rotation_matrix[1, 2] += (new_height - image_height) * 0.5
+        image_width = int(np.round(new_width))
+        image_height = int(np.round(new_height))
+    rotated = cv2.warpAffine(image, rotation_matrix[:2,:], (image_width, image_height),
+                             borderValue=border_value)
+    return rotated

khandy/image/translate.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numbers
+import khandy
+def translate_image(image, x_shift, y_shift, border_value=0):
+    """Translate an image.
+    Args:
+        image (ndarray): Image to be translated with format (h, w) or (h, w, c).
+        x_shift (int): The offset used for translate in horizontal
+            direction. right is the positive direction.
+        y_shift (int): The offset used for translate in vertical
+            direction. down is the positive direction.
+        border_value (int | tuple[int]): Value used in case of a
+            constant border.
+    Returns:
+        ndarray: The translated image.
+    See Also:
+        crop_or_pad
+    """
+    assert khandy.is_numpy_image(image)
+    assert isinstance(x_shift, numbers.Integral)
+    assert isinstance(y_shift, numbers.Integral)
+    image_height, image_width = image.shape[:2]
+    channels = 1 if image.ndim == 2 else image.shape[2]
+    if isinstance(border_value, (tuple, list)):
+        assert len(border_value) == channels, \
+            'Expected the num of elements in tuple equals the channels ' \
+            'of input image. Found {} vs {}'.format(
+                len(border_value), channels)
+    else:
+        border_value = (border_value,) * channels
+    dst_image = khandy.create_solid_color_image(
+        image_height, image_width, border_value, dtype=image.dtype)
+    if (abs(x_shift) >= image_width) or (abs(y_shift) >= image_height):
+        return dst_image
+    src_x_begin = max(-x_shift, 0)
+    src_x_end   = min(image_width - x_shift, image_width)
+    dst_x_begin = max(x_shift, 0)
+    dst_x_end   = min(image_width + x_shift, image_width)
+    src_y_begin = max(-y_shift, 0)
+    src_y_end   = min(image_height - y_shift, image_height)
+    dst_y_begin = max(y_shift, 0)
+    dst_y_end   = min(image_height + y_shift, image_height)
+    dst_image[dst_y_begin:dst_y_end, dst_x_begin:dst_x_end] = \
+        image[src_y_begin:src_y_end, src_x_begin:src_x_end]
+    return dst_image

khandy/label/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .detect import *
2	+

khandy/label/detect.py ADDED Viewed

	@@ -0,0 +1,594 @@

+import os
+import copy
+import json
+import dataclasses
+from dataclasses import dataclass, field
+from collections import OrderedDict
+from typing import Optional, List
+import xml.etree.ElementTree as ET
+import khandy
+import lxml
+import lxml.builder
+import numpy as np
+__all__ = ['DetectIrObject', 'DetectIrRecord', 'load_detect',
+           'save_detect', 'convert_detect', 'replace_detect_label',
+           'load_coco_class_names']
+@dataclass
+class DetectIrObject:
+    """Intermediate Representation Format of Object
+    """
+    label: str
+    x_min: float
+    y_min: float
+    x_max: float
+    y_max: float
+@dataclass
+class DetectIrRecord:
+    """Intermediate Representation Format of Record
+    """
+    filename: str
+    width: int
+    height: int
+    objects: List[DetectIrObject] = field(default_factory=list)
+@dataclass
+class PascalVocSource:
+    database: str = ''
+    annotation: str = ''
+    image: str = ''
+@dataclass
+class PascalVocSize:
+    height: int
+    width: int
+    depth: int
+@dataclass
+class PascalVocBndbox:
+    xmin: float
+    ymin: float
+    xmax: float
+    ymax: float
+@dataclass
+class PascalVocObject:
+    name: str
+    pose: str = 'Unspecified'
+    truncated: int = 0
+    difficult: int = 0
+    bndbox: Optional[PascalVocBndbox] = None
+@dataclass
+class PascalVocRecord:
+    folder: str = ''
+    filename: str = ''
+    path: str = ''
+    source: PascalVocSource = PascalVocSource()
+    size: Optional[PascalVocSize] = None
+    segmented: int = 0
+    objects: List[PascalVocObject] = field(default_factory=list)
+class PascalVocHandler:
+    @staticmethod
+    def load(filename, **kwargs) -> PascalVocRecord:
+        pascal_voc_record = PascalVocRecord()
+        xml_tree = ET.parse(filename)
+        pascal_voc_record.folder = xml_tree.find('folder').text
+        pascal_voc_record.filename = xml_tree.find('filename').text
+        pascal_voc_record.path = xml_tree.find('path').text
+        pascal_voc_record.segmented = xml_tree.find('segmented').text
+        source_tag = xml_tree.find('source')
+        pascal_voc_record.source = PascalVocSource(
+            database=source_tag.find('database').text,
+            # annotation=source_tag.find('annotation').text,
+            # image=source_tag.find('image').text
+        )
+        size_tag = xml_tree.find('size')
+        pascal_voc_record.size = PascalVocSize(
+            width=int(size_tag.find('width').text),
+            height=int(size_tag.find('height').text),
+            depth=int(size_tag.find('depth').text)
+        )
+        object_tags = xml_tree.findall('object')
+        for index, object_tag in enumerate(object_tags):
+            bndbox_tag = object_tag.find('bndbox')
+            bndbox = PascalVocBndbox(
+                xmin=float(bndbox_tag.find('xmin').text) - 1,
+                ymin=float(bndbox_tag.find('ymin').text) - 1,
+                xmax=float(bndbox_tag.find('xmax').text) - 1,
+                ymax=float(bndbox_tag.find('ymax').text) - 1
+            )
+            pascal_voc_object = PascalVocObject(
+                name=object_tag.find('name').text,
+                pose=object_tag.find('pose').text,
+                truncated=object_tag.find('truncated').text,
+                difficult=object_tag.find('difficult').text,
+                bndbox=bndbox
+            )
+            pascal_voc_record.objects.append(pascal_voc_object)
+        return pascal_voc_record
+    @staticmethod
+    def save(filename, pascal_voc_record: PascalVocRecord):
+        maker = lxml.builder.ElementMaker()
+        xml = maker.annotation(
+            maker.folder(pascal_voc_record.folder),
+            maker.filename(pascal_voc_record.filename),
+            maker.path(pascal_voc_record.path),
+            maker.source(
+                maker.database(pascal_voc_record.source.database),
+            ),
+            maker.size(
+                maker.width(str(pascal_voc_record.size.width)),
+                maker.height(str(pascal_voc_record.size.height)),
+                maker.depth(str(pascal_voc_record.size.depth)),
+            ),
+            maker.segmented(str(pascal_voc_record.segmented)),
+        )
+        for pascal_voc_object in pascal_voc_record.objects:
+            object_tag = maker.object(
+                maker.name(pascal_voc_object.name),
+                maker.pose(pascal_voc_object.pose),
+                maker.truncated(str(pascal_voc_object.truncated)),
+                maker.difficult(str(pascal_voc_object.difficult)),
+                maker.bndbox(
+                    maker.xmin(str(float(pascal_voc_object.bndbox.xmin))),
+                    maker.ymin(str(float(pascal_voc_object.bndbox.ymin))),
+                    maker.xmax(str(float(pascal_voc_object.bndbox.xmax))),
+                    maker.ymax(str(float(pascal_voc_object.bndbox.ymax))),
+                ),
+            )
+            xml.append(object_tag)
+        if not filename.endswith('.xml'):
+            filename = filename + '.xml'
+        with open(filename, 'wb') as f:
+            f.write(lxml.etree.tostring(
+                xml, pretty_print=True, encoding='utf-8'))
+    @staticmethod
+    def to_ir(pascal_voc_record: PascalVocRecord) -> DetectIrRecord:
+        ir_record = DetectIrRecord(
+            filename=pascal_voc_record.filename,
+            width=pascal_voc_record.size.width,
+            height=pascal_voc_record.size.height
+        )
+        for pascal_voc_object in pascal_voc_record.objects:
+            ir_object = DetectIrObject(
+                label=pascal_voc_object.name,
+                x_min=pascal_voc_object.bndbox.xmin,
+                y_min=pascal_voc_object.bndbox.ymin,
+                x_max=pascal_voc_object.bndbox.xmax,
+                y_max=pascal_voc_object.bndbox.ymax
+            )
+            ir_record.objects.append(ir_object)
+        return ir_record
+    @staticmethod
+    def from_ir(ir_record: DetectIrRecord) -> PascalVocRecord:
+        pascal_voc_record = PascalVocRecord(
+            filename=ir_record.filename,
+            size=PascalVocSize(
+                width=ir_record.width,
+                height=ir_record.height,
+                depth=3
+            )
+        )
+        for ir_object in ir_record.objects:
+            pascal_voc_object = PascalVocObject(
+                name=ir_object.label,
+                bndbox=PascalVocBndbox(
+                    xmin=ir_object.x_min,
+                    ymin=ir_object.y_min,
+                    xmax=ir_object.x_max,
+                    ymax=ir_object.y_max,
+                )
+            )
+            pascal_voc_record.objects.append(pascal_voc_object)
+        return pascal_voc_record
+class _NumpyEncoder(json.JSONEncoder):
+    """ Special json encoder for numpy types """
+    def default(self, obj):
+        if isinstance(obj, (np.bool_,)):
+            return bool(obj)
+        elif isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
+                              np.int16, np.int32, np.int64, np.uint8,
+                              np.uint16, np.uint32, np.uint64)):
+            return int(obj)
+        elif isinstance(obj, (np.float_, np.float16, np.float32,
+                              np.float64)):
+            return float(obj)
+        elif isinstance(obj, (np.ndarray,)):
+            return obj.tolist()
+        return json.JSONEncoder.default(self, obj)
+@dataclass
+class LabelmeShape:
+    label: str
+    points: np.ndarray
+    shape_type: str
+    flags: dict = field(default_factory=dict)
+    group_id: Optional[int] = None
+    def __post_init__(self):
+        self.points = np.asarray(self.points)
+@dataclass
+class LabelmeRecord:
+    version: str = '4.5.6'
+    flags: dict = field(default_factory=dict)
+    shapes: List[LabelmeShape] = field(default_factory=list)
+    imagePath: Optional[str] = None
+    imageData: Optional[str] = None
+    imageHeight: Optional[int] = None
+    imageWidth: Optional[int] = None
+    def __post_init__(self):
+        for k, shape in enumerate(self.shapes):
+            self.shapes[k] = LabelmeShape(**shape)
+class LabelmeHandler:
+    @staticmethod
+    def load(filename, **kwargs) -> LabelmeRecord:
+        json_content = khandy.load_json(filename)
+        return LabelmeRecord(**json_content)
+    @staticmethod
+    def save(filename, labelme_record: LabelmeRecord):
+        json_content = dataclasses.asdict(labelme_record)
+        khandy.save_json(filename, json_content, cls=_NumpyEncoder)
+    @staticmethod
+    def to_ir(labelme_record: LabelmeRecord) -> DetectIrRecord:
+        ir_record = DetectIrRecord(
+            filename=labelme_record.imagePath,
+            width=labelme_record.imageWidth,
+            height=labelme_record.imageHeight
+        )
+        for labelme_shape in labelme_record.shapes:
+            if labelme_shape.shape_type != 'rectangle':
+                continue
+            ir_object = DetectIrObject(
+                label=labelme_shape.label,
+                x_min=labelme_shape.points[0][0],
+                y_min=labelme_shape.points[0][1],
+                x_max=labelme_shape.points[1][0],
+                y_max=labelme_shape.points[1][1],
+            )
+            ir_record.objects.append(ir_object)
+        return ir_record
+    @staticmethod
+    def from_ir(ir_record: DetectIrRecord) -> LabelmeRecord:
+        labelme_record = LabelmeRecord(
+            imagePath=ir_record.filename,
+            imageWidth=ir_record.width,
+            imageHeight=ir_record.height
+        )
+        for ir_object in ir_record.objects:
+            labelme_shape = LabelmeShape(
+                label=ir_object.label,
+                shape_type='rectangle',
+                points=[[ir_object.x_min, ir_object.y_min],
+                        [ir_object.x_max, ir_object.y_max]]
+            )
+            labelme_record.shapes.append(labelme_shape)
+        return labelme_record
+@dataclass
+class YoloObject:
+    label: str
+    x_center: float
+    y_center: float
+    width: float
+    height: float
+@dataclass
+class YoloRecord:
+    filename: Optional[str] = None
+    width: Optional[int] = None
+    height: Optional[int] = None
+    objects: List[YoloObject] = field(default_factory=list)
+class YoloHandler:
+    @staticmethod
+    def load(filename, **kwargs) -> YoloRecord:
+        assert 'image_filename' in kwargs
+        assert 'width' in kwargs and 'height' in kwargs
+        records = khandy.load_list(filename)
+        yolo_record = YoloRecord(
+            filename=kwargs.get('image_filename'),
+            width=kwargs.get('width'),
+            height=kwargs.get('height'))
+        for record in records:
+            record_parts = record.split()
+            yolo_record.objects.append(YoloObject(
+                label=record_parts[0],
+                x_center=float(record_parts[1]),
+                y_center=float(record_parts[2]),
+                width=float(record_parts[3]),
+                height=float(record_parts[4]),
+            ))
+        return yolo_record
+    @staticmethod
+    def save(filename, yolo_record: YoloRecord):
+        records = []
+        for object in yolo_record.objects:
+            records.append(
+                f'{object.label} {object.x_center} {object.y_center} {object.width} {object.height}')
+        if not filename.endswith('.txt'):
+            filename = filename + '.txt'
+        khandy.save_list(filename, records)
+    @staticmethod
+    def to_ir(yolo_record: YoloRecord) -> DetectIrRecord:
+        ir_record = DetectIrRecord(
+            filename=yolo_record.filename,
+            width=yolo_record.width,
+            height=yolo_record.height
+        )
+        for yolo_object in yolo_record.objects:
+            x_min = (yolo_object.x_center - 0.5 *
+                     yolo_object.width) * yolo_record.width
+            y_min = (yolo_object.y_center - 0.5 *
+                     yolo_object.height) * yolo_record.height
+            x_max = (yolo_object.x_center + 0.5 *
+                     yolo_object.width) * yolo_record.width
+            y_max = (yolo_object.y_center + 0.5 *
+                     yolo_object.height) * yolo_record.height
+            ir_object = DetectIrObject(
+                label=yolo_object.label,
+                x_min=x_min,
+                y_min=y_min,
+                x_max=x_max,
+                y_max=y_max
+            )
+            ir_record.objects.append(ir_object)
+        return ir_record
+    @staticmethod
+    def from_ir(ir_record: DetectIrRecord) -> YoloRecord:
+        yolo_record = YoloRecord(
+            filename=ir_record.filename,
+            width=ir_record.width,
+            height=ir_record.height
+        )
+        for ir_object in ir_record.objects:
+            x_center = (ir_object.x_max + ir_object.x_min) / \
+                (2 * ir_record.width)
+            y_center = (ir_object.y_max + ir_object.y_min) / \
+                (2 * ir_record.height)
+            width = abs(ir_object.x_max - ir_object.x_min) / ir_record.width
+            height = abs(ir_object.y_max - ir_object.y_min) / ir_record.height
+            yolo_object = YoloObject(
+                label=ir_object.label,
+                x_center=x_center,
+                y_center=y_center,
+                width=width,
+                height=height,
+            )
+            yolo_record.objects.append(yolo_object)
+        return yolo_record
+@dataclass
+class CocoObject:
+    label: str
+    x_min: float
+    y_min: float
+    width: float
+    height: float
+@dataclass
+class CocoRecord:
+    filename: str
+    width: int
+    height: int
+    objects: List[CocoObject] = field(default_factory=list)
+class CocoHandler:
+    @staticmethod
+    def load(filename, **kwargs) -> List[CocoRecord]:
+        json_data = khandy.load_json(filename)
+        images = json_data['images']
+        annotations = json_data['annotations']
+        categories = json_data['categories']
+        label_map = {}
+        for cat_item in categories:
+            label_map[cat_item['id']] = cat_item['name']
+        coco_records = OrderedDict()
+        for image_item in images:
+            coco_records[image_item['id']] = CocoRecord(
+                filename=image_item['file_name'],
+                width=image_item['width'],
+                height=image_item['height'],
+                objects=[])
+        for annotation_item in annotations:
+            coco_object = CocoObject(
+                label=label_map[annotation_item['category_id']],
+                x_min=annotation_item['bbox'][0],
+                y_min=annotation_item['bbox'][1],
+                width=annotation_item['bbox'][2],
+                height=annotation_item['bbox'][3])
+            coco_records[annotation_item['image_id']
+                         ].objects.append(coco_object)
+        return list(coco_records.values())
+    @staticmethod
+    def to_ir(coco_record: CocoRecord) -> DetectIrRecord:
+        ir_record = DetectIrRecord(
+            filename=coco_record.filename,
+            width=coco_record.width,
+            height=coco_record.height,
+        )
+        for coco_object in coco_record.objects:
+            ir_object = DetectIrObject(
+                label=coco_object.label,
+                x_min=coco_object.x_min,
+                y_min=coco_object.y_min,
+                x_max=coco_object.x_min + coco_object.width,
+                y_max=coco_object.y_min + coco_object.height
+            )
+            ir_record.objects.append(ir_object)
+        return ir_record
+    @staticmethod
+    def from_ir(ir_record: DetectIrRecord) -> CocoRecord:
+        coco_record = CocoRecord(
+            filename=ir_record.filename,
+            width=ir_record.width,
+            height=ir_record.height
+        )
+        for ir_object in ir_record.objects:
+            coco_object = CocoObject(
+                label=ir_object.label,
+                x_min=ir_object.x_min,
+                y_min=ir_object.y_min,
+                width=ir_object.x_max - ir_object.x_min,
+                height=ir_object.y_max - ir_object.y_min
+            )
+            coco_record.objects.append(coco_object)
+        return coco_record
+def load_detect(filename, fmt, **kwargs) -> DetectIrRecord:
+    if fmt == 'labelme':
+        labelme_record = LabelmeHandler.load(filename, **kwargs)
+        ir_record = LabelmeHandler.to_ir(labelme_record)
+    elif fmt == 'yolo':
+        yolo_record = YoloHandler.load(filename, **kwargs)
+        ir_record = YoloHandler.to_ir(yolo_record)
+    elif fmt in ('voc', 'pascal', 'pascal_voc'):
+        pascal_voc_record = PascalVocHandler.load(filename, **kwargs)
+        ir_record = PascalVocHandler.to_ir(pascal_voc_record)
+    elif fmt == 'coco':
+        coco_records = CocoHandler.load(filename, **kwargs)
+        ir_record = [CocoHandler.to_ir(coco_record)
+                     for coco_record in coco_records]
+    else:
+        raise ValueError(f"Unsupported detect label fmt. Got {fmt}")
+    return ir_record
+def save_detect(filename, ir_record: DetectIrRecord, out_fmt):
+    os.makedirs(os.path.dirname(os.path.abspath(filename)), exist_ok=True)
+    if out_fmt == 'labelme':
+        labelme_record = LabelmeHandler.from_ir(ir_record)
+        LabelmeHandler.save(filename, labelme_record)
+    elif out_fmt == 'yolo':
+        yolo_record = YoloHandler.from_ir(ir_record)
+        YoloHandler.save(filename, yolo_record)
+    elif out_fmt in ('voc', 'pascal', 'pascal_voc'):
+        pascal_voc_record = PascalVocHandler.from_ir(ir_record)
+        PascalVocHandler.save(filename, pascal_voc_record)
+    elif out_fmt == 'coco':
+        raise ValueError("Unsupported for `coco` now!")
+    else:
+        raise ValueError(f"Unsupported detect label fmt. Got {out_fmt}")
+def _get_format(record):
+    if isinstance(record, LabelmeRecord):
+        return ('labelme',)
+    elif isinstance(record, YoloRecord):
+        return ('yolo',)
+    elif isinstance(record, PascalVocRecord):
+        return ('voc', 'pascal', 'pascal_voc')
+    elif isinstance(record, CocoRecord):
+        return ('coco',)
+    elif isinstance(record, DetectIrRecord):
+        return ('ir', 'detect_ir')
+    else:
+        return ()
+def convert_detect(record, out_fmt):
+    allowed_fmts = ('labelme', 'yolo', 'voc', 'coco',
+                    'pascal', 'pascal_voc', 'ir', 'detect_ir')
+    if out_fmt not in allowed_fmts:
+        raise ValueError(
+            "Unsupported label format conversions for given out_fmt")
+    if out_fmt in _get_format(record):
+        return record
+    if isinstance(record, LabelmeRecord):
+        ir_record = LabelmeHandler.to_ir(record)
+    elif isinstance(record, YoloRecord):
+        ir_record = YoloHandler.to_ir(record)
+    elif isinstance(record, PascalVocRecord):
+        ir_record = PascalVocHandler.to_ir(record)
+    elif isinstance(record, CocoRecord):
+        ir_record = CocoHandler.to_ir(record)
+    elif isinstance(record, DetectIrRecord):
+        ir_record = record
+    else:
+        raise TypeError('Unsupported type for record')
+    if out_fmt in ('ir', 'detect_ir'):
+        dst_record = ir_record
+    elif out_fmt == 'labelme':
+        dst_record = LabelmeHandler.from_ir(ir_record)
+    elif out_fmt == 'yolo':
+        dst_record = YoloHandler.from_ir(ir_record)
+    elif out_fmt in ('voc', 'pascal', 'pascal_voc'):
+        dst_record = PascalVocHandler.from_ir(ir_record)
+    elif out_fmt == 'coco':
+        dst_record = CocoHandler.from_ir(ir_record)
+    return dst_record
+def replace_detect_label(record: DetectIrRecord, label_map, ignore=True):
+    dst_record = copy.deepcopy(record)
+    dst_objects = []
+    for ir_object in dst_record.objects:
+        if not ignore:
+            if ir_object.label in label_map:
+                ir_object.label = label_map[ir_object.label]
+            dst_objects.append(ir_object)
+        else:
+            if ir_object.label in label_map:
+                ir_object.label = label_map[ir_object.label]
+                dst_objects.append(ir_object)
+    dst_record.objects = dst_objects
+    return dst_record
+def load_coco_class_names(filename):
+    json_data = khandy.load_json(filename)
+    categories = json_data['categories']
+    return [cat_item['name'] for cat_item in categories]

khandy/list_utils.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import random
+import itertools
+def to_list(obj):
+    if obj is None:
+        return None
+    elif hasattr(obj, '__iter__') and not isinstance(obj, str):
+        try:
+            return list(obj)
+        except:
+            return [obj]
+    else:
+        return [obj]
+def convert_lists_to_record(*list_objs, delimiter=None):
+    assert len(list_objs) >= 1, 'list_objs length must >= 1.'
+    delimiter = delimiter or ','
+    assert isinstance(list_objs[0], (tuple, list))
+    number = len(list_objs[0])
+    for item in list_objs[1:]:
+        assert isinstance(item, (tuple, list))
+        assert len(item) == number, '{} != {}'.format(len(item), number)
+    records = []
+    record_list = zip(*list_objs)
+    for record in record_list:
+        record_str = [str(item) for item in record]
+        records.append(delimiter.join(record_str))
+    return records
+def shuffle_table(*table):
+    """
+    Notes:
+        table can be seen as list of list which have equal items.
+    """
+    shuffled_list = list(zip(*table))
+    random.shuffle(shuffled_list)
+    tuple_list = zip(*shuffled_list)
+    return [list(item) for item in tuple_list]
+def transpose_table(table):
+    """
+    Notes:
+        table can be seen as list of list which have equal items.
+    """
+    m, n = len(table), len(table[0])
+    return [[table[i][j] for i in range(m)] for j in range(n)]
+def concat_list(in_list):
+    """Concatenate a list of list into a single list.
+    Args:
+        in_list (list): The list of list to be merged.
+    Returns:
+        list: The concatenated flat list.
+    References:
+        mmcv.concat_list
+    """
+    return list(itertools.chain(*in_list))

khandy/misc.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import json
+import socket
+import logging
+import argparse
+import warnings
+from enum import Enum
+import requests
+def all_of(iterable, pred):
+    """Returns whether all elements in the iterable satisfy the predicate.
+    Args:
+        iterable (Iterable): An iterable to check.
+        pred (callable): A predicate to apply to each element.
+    Returns:
+        bool: True if all elements satisfy the predicate, False otherwise.
+    References:
+        https://en.cppreference.com/w/cpp/algorithm/all_any_none_of
+    """
+    return all(pred(element) for element in iterable)
+def any_of(iterable, pred):
+    """Returns whether any element in the iterable satisfies the predicate.
+    Args:
+        iterable (Iterable): An iterable to check.
+        pred (callable): A predicate to apply to each element.
+    Returns:
+        bool: True if any element satisfies the predicate, False otherwise.
+    References:
+        https://en.cppreference.com/w/cpp/algorithm/all_any_none_of
+    """
+    return any(pred(element) for element in iterable)
+def none_of(iterable, pred):
+    """Returns whether no elements in the iterable satisfy the predicate.
+    Args:
+        iterable (Iterable): An iterable to check.
+        pred (callable): A predicate to apply to each element.
+    Returns:
+        bool: True if no elements satisfy the predicate, False otherwise.
+    References:
+        https://en.cppreference.com/w/cpp/algorithm/all_any_none_of
+    """
+    return not any(pred(element) for element in iterable)
+def print_with_no(obj):
+    if hasattr(obj, '__len__'):
+        for k, item in enumerate(obj):
+            print('[{}/{}] {}'.format(k+1, len(obj), item))
+    elif hasattr(obj, '__iter__'):
+        for k, item in enumerate(obj):
+            print('[{}] {}'.format(k+1, item))
+    else:
+        print('[1] {}'.format(obj))
+def get_file_line_count(filename, encoding='utf-8'):
+    line_count = 0
+    buffer_size = 1024 * 1024 * 8
+    with open(filename, 'r', encoding=encoding) as f:
+        while True:
+            data = f.read(buffer_size)
+            if not data:
+                break
+            line_count += data.count('\n')
+    return line_count
+def get_host_ip():
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(('8.8.8.8', 80))
+        ip = s.getsockname()[0]
+    finally:
+        s.close()
+    return ip
+def set_logger(filename, level=logging.INFO, logger_name=None, formatter=None, with_print=True):
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(level)
+    if formatter is None:
+        formatter = logging.Formatter('%(message)s')
+    # Never mutate (insert/remove elements) the list you're currently iterating on.
+    # If you need, make a copy.
+    for handler in logger.handlers[:]:
+        if isinstance(handler, logging.FileHandler):
+            logger.removeHandler(handler)
+        # FileHandler is subclass of StreamHandler, so isinstance(handler,
+        # logging.StreamHandler) is True even if handler is FileHandler.
+        # if (type(handler) == logging.StreamHandler) and (handler.stream == sys.stderr):
+        elif type(handler) == logging.StreamHandler:
+            logger.removeHandler(handler)
+    file_handler = logging.FileHandler(filename, encoding='utf-8')
+    file_handler.setFormatter(formatter)
+    logger.addHandler(file_handler)
+    if with_print:
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(formatter)
+        logger.addHandler(console_handler)
+    return logger
+def print_arguments(args):
+    assert isinstance(args, argparse.Namespace)
+    arg_list = sorted(vars(args).items())
+    for key, value in arg_list:
+        print('{}: {}'.format(key, value))
+def save_arguments(filename, args, sort=True):
+    assert isinstance(args, argparse.Namespace)
+    args = vars(args)
+    with open(filename, 'w') as f:
+        json.dump(args, f, indent=4, sort_keys=sort)
+class DownloadStatusCode(Enum):
+    FILE_SIZE_TOO_LARGE = (-100, 'the size of file from url is too large')
+    FILE_SIZE_TOO_SMALL = (-101, 'the size of file from url is too small')
+    FILE_SIZE_IS_ZERO = (-102, 'the size of file from url is zero')
+    URL_IS_NOT_IMAGE = (-103, 'URL is not an image')
+    @property
+    def code(self):
+        return self.value[0]
+    @property
+    def message(self):
+        return self.value[1]
+class DownloadError(Exception):
+    def __init__(self, status_code: DownloadStatusCode, extra_str: str=None):
+        self.name = status_code.name
+        self.code = status_code.code
+        if extra_str is None:
+            self.message = status_code.message
+        else:
+            self.message = f'{status_code.message}: {extra_str}'
+        Exception.__init__(self)
+    def __repr__(self):
+        return f'[{self.__class__.__name__} {self.code}] {self.message}'
+    __str__ = __repr__
+def download_image(image_url, min_filesize=0, max_filesize=100*1024*1024,
+                   params=None, **kwargs) -> bytes:
+    """
+    References:
+        https://httpwg.org/specs/rfc9110.html#field.content-length
+        https://requests.readthedocs.io/en/latest/user/advanced/#body-content-workflow
+    """
+    stream = kwargs.pop('stream', True)
+    with requests.get(image_url, stream=stream, params=params, **kwargs) as response:
+        response.raise_for_status()
+        content_type = response.headers.get('content-type')
+        if content_type is None:
+            warnings.warn('No Content-Type!')
+        else:
+            if not content_type.startswith(('image/', 'application/octet-stream')):
+                raise DownloadError(DownloadStatusCode.URL_IS_NOT_IMAGE)
+        # when Transfer-Encoding == chunked, Content-Length does not exist.
+        content_length = response.headers.get('content-length')
+        if content_length is None:
+            warnings.warn('No Content-Length!')
+        else:
+            content_length = int(content_length)
+            if content_length > max_filesize:
+                raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_LARGE)
+            if content_length < min_filesize:
+                raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_SMALL)
+        filesize = 0
+        chunks = []
+        for chunk in response.iter_content(chunk_size=10*1024):
+            chunks.append(chunk)
+            filesize += len(chunk)
+            if filesize > max_filesize:
+                raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_LARGE)
+        if filesize < min_filesize:
+            raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_SMALL)
+        image_bytes = b''.join(chunks)
+    return image_bytes
+def download_file(url, min_filesize=0, max_filesize=100*1024*1024,
+                  params=None, **kwargs) -> bytes:
+    """
+    References:
+        https://httpwg.org/specs/rfc9110.html#field.content-length
+        https://requests.readthedocs.io/en/latest/user/advanced/#body-content-workflow
+    """
+    stream = kwargs.pop('stream', True)
+    with requests.get(url, stream=stream, params=params, **kwargs) as response:
+        response.raise_for_status()
+        # when Transfer-Encoding == chunked, Content-Length does not exist.
+        content_length = response.headers.get('content-length')
+        if content_length is None:
+            warnings.warn('No Content-Length!')
+        else:
+            content_length = int(content_length)
+            if content_length > max_filesize:
+                raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_LARGE)
+            if content_length < min_filesize:
+                raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_SMALL)
+        filesize = 0
+        chunks = []
+        for chunk in response.iter_content(chunk_size=10*1024):
+            chunks.append(chunk)
+            filesize += len(chunk)
+            if filesize > max_filesize:
+                raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_LARGE)
+        if filesize < min_filesize:
+            raise DownloadError(DownloadStatusCode.FILE_SIZE_TOO_SMALL)
+        file_bytes = b''.join(chunks)
+    return file_bytes

khandy/numpy_utils.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import numpy as np
+def sigmoid(x):
+    return 1. / (1 + np.exp(-x))
+def softmax(x, axis=-1, copy=True):
+    """
+    Args:
+        copy: Copy x or not.
+    Referneces:
+        `from sklearn.utils.extmath import softmax`
+    """
+    if copy:
+        x = np.copy(x)
+    max_val = np.max(x, axis=axis, keepdims=True)
+    x -= max_val
+    np.exp(x, x)
+    sum_exp = np.sum(x, axis=axis, keepdims=True)
+    x /= sum_exp
+    return x
+def log_sum_exp(x, axis=-1, keepdims=False):
+    """
+    References:
+        numpy.logaddexp
+        numpy.logaddexp2
+        scipy.misc.logsumexp
+    """
+    max_val = np.max(x, axis=axis, keepdims=True)
+    x -= max_val
+    np.exp(x, x)
+    sum_exp = np.sum(x, axis=axis, keepdims=keepdims)
+    lse = np.log(sum_exp, sum_exp)
+    if not keepdims:
+        max_val = np.squeeze(max_val, axis=axis)
+    return max_val + lse
+def l2_normalize(x, axis=None, epsilon=1e-12, copy=True):
+    """L2 normalize an array along an axis.
+    Args:
+        x : array_like of floats
+            Input data.
+        axis : None or int or tuple of ints, optional
+            Axis or axes along which to operate.
+        epsilon: float, optional
+            A small value such as to avoid division by zero.
+        copy : bool, optional
+            Copy x or not.
+    """
+    if copy:
+        x = np.copy(x)
+    x /= np.maximum(np.linalg.norm(x, axis=axis, keepdims=True), epsilon)
+    return x
+def minmax_normalize(x, axis=None, epsilon=1e-12, copy=True):
+    """minmax normalize an array along a given axis.
+    Args:
+        x : array_like of floats
+            Input data.
+        axis : None or int or tuple of ints, optional
+            Axis or axes along which to operate.
+        epsilon: float, optional
+            A small value such as to avoid division by zero.
+        copy : bool, optional
+            Copy x or not.
+    """
+    if copy:
+        x = np.copy(x)
+    minval = np.min(x, axis=axis, keepdims=True)
+    maxval = np.max(x, axis=axis, keepdims=True)
+    maxval -= minval
+    maxval = np.maximum(maxval, epsilon)
+    x -= minval
+    x /= maxval
+    return x
+def zscore_normalize(x, mean=None, std=None, axis=None, epsilon=1e-12, copy=True):
+    """z-score normalize an array along a given axis.
+    Args:
+        x : array_like of floats
+            Input data.
+        mean:  array_like of floats, optional
+            mean for z-score
+        std: array_like of floats, optional
+            std for z-score
+        axis : None or int or tuple of ints, optional
+            Axis or axes along which to operate.
+        epsilon: float, optional
+            A small value such as to avoid division by zero.
+        copy : bool, optional
+            Copy x or not.
+    """
+    if copy:
+        x = np.copy(x)
+    if mean is None:
+        mean = np.mean(x, axis=axis, keepdims=True)
+    if std is None:
+        std = np.std(x, axis=axis, keepdims=True)
+    mean = np.asarray(mean, dtype=x.dtype)
+    std = np.asarray(std, dtype=x.dtype)
+    std = np.maximum(std, epsilon)
+    x -= mean
+    x /= std
+    return x
+def get_order_of_magnitude(number):
+    number = np.where(number == 0, 1, number)
+    oom = np.floor(np.log10(np.abs(number)))
+    return oom.astype(np.int32)
+def top_k(x, k, axis=-1, largest=True, sorted=True):
+    """Finds values and indices of the k largest/smallest
+    elements along a given axis.
+    Args:
+        x: numpy ndarray
+            1-D or higher with given axis at least k.
+        k: int
+            Number of top elements to look for along the given axis.
+        axis: int
+            The axis to sort along.
+        largest: bool
+            Controls whether to return largest or smallest elements
+        sorted: bool
+            If true the resulting k elements will be sorted by the values.
+    Returns:
+        topk_values:
+            The k largest/smallest elements along the given axis.
+        topk_indices:
+            The indices of the k largest/smallest elements along the given axis.
+    """
+    if axis is None:
+        axis_size = x.size
+    else:
+        axis_size = x.shape[axis]
+    assert 1 <= k <= axis_size
+    x = np.asanyarray(x)
+    if largest:
+        index_array = np.argpartition(x, axis_size-k, axis=axis)
+        topk_indices = np.take(index_array, -np.arange(k)-1, axis=axis)
+    else:
+        index_array = np.argpartition(x, k-1, axis=axis)
+        topk_indices = np.take(index_array, np.arange(k), axis=axis)
+    topk_values = np.take_along_axis(x, topk_indices, axis=axis)
+    if sorted:
+        sorted_indices_in_topk = np.argsort(topk_values, axis=axis)
+        if largest:
+            sorted_indices_in_topk = np.flip(sorted_indices_in_topk, axis=axis)
+        sorted_topk_values = np.take_along_axis(
+            topk_values, sorted_indices_in_topk, axis=axis)
+        sorted_topk_indices = np.take_along_axis(
+            topk_indices, sorted_indices_in_topk, axis=axis)
+        return sorted_topk_values, sorted_topk_indices
+    return topk_values, topk_indices

khandy/points/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .pts_letterbox import *
2	+ from .pts_transform_scale import *

khandy/points/pts_letterbox.py ADDED Viewed

	@@ -0,0 +1,19 @@

+__all__ = ['letterbox_2d_points', 'unletterbox_2d_points']
+def letterbox_2d_points(points, scale=1.0, pad_left=0, pad_top=0, copy=True):
+    if copy:
+        points = points.copy()
+    points[..., 0::2] = points[..., 0::2] * scale + pad_left
+    points[..., 1::2] = points[..., 1::2] * scale + pad_top
+    return points
+def unletterbox_2d_points(points, scale=1.0, pad_left=0, pad_top=0, copy=True):
+    if copy:
+        points = points.copy()
+    points[..., 0::2] = (points[..., 0::2] - pad_left) / scale
+    points[..., 1::2] = (points[..., 1::2] - pad_top) / scale
+    return points

khandy/points/pts_transform_scale.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import numpy as np
+__all__ = ['scale_2d_points']
+def scale_2d_points(points, x_scale=1, y_scale=1, x_center=0, y_center=0, copy=True):
+    """Scale 2d points.
+    Args:
+        points: (..., 2N)
+        x_scale: scale factor in x dimension
+        y_scale: scale factor in y dimension
+        x_center: scale center in x dimension
+        y_center: scale center in y dimension
+    """
+    points = np.array(points, dtype=np.float32, copy=copy)
+    x_scale = np.asarray(x_scale, np.float32)
+    y_scale = np.asarray(y_scale, np.float32)
+    x_center = np.asarray(x_center, np.float32)
+    y_center = np.asarray(y_center, np.float32)
+    x_shift = 1 - x_scale
+    y_shift = 1 - y_scale
+    x_shift *= x_center
+    y_shift *= y_center
+    points[..., 0::2] *= x_scale
+    points[..., 1::2] *= y_scale
+    points[..., 0::2] += x_shift
+    points[..., 1::2] += y_shift
+    return points

khandy/split_utils.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import numbers
+from collections.abc import Sequence
+import numpy as np
+def split_by_num(x, num_splits, strict=True):
+    """
+    Args:
+        num_splits: an integer indicating the number of splits
+    References:
+        numpy.split and numpy.array_split
+    """
+    # NB: np.ndarray is not Sequence
+    assert isinstance(x, (Sequence, np.ndarray))
+    assert isinstance(num_splits, numbers.Integral)
+    if strict:
+        assert len(x) % num_splits == 0
+    split_size = (len(x) + num_splits - 1) // num_splits
+    out_list = []
+    for i in range(0, len(x), split_size):
+        out_list.append(x[i: i + split_size])
+    return out_list
+def split_by_size(x, sizes):
+    """
+    References:
+        tf.split
+        https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/misc.py
+    """
+    # NB: np.ndarray is not Sequence
+    assert isinstance(x, (Sequence, np.ndarray))
+    assert isinstance(sizes, (list, tuple))
+    assert sum(sizes) == len(x)
+    out_list = []
+    start_index = 0
+    for size in sizes:
+        out_list.append(x[start_index: start_index + size])
+        start_index += size
+    return out_list
+def split_by_slice(x, slices):
+    """
+    References:
+        SliceLayer in Caffe, and numpy.split
+    """
+    # NB: np.ndarray is not Sequence
+    assert isinstance(x, (Sequence, np.ndarray))
+    assert isinstance(slices, (list, tuple))
+    out_list = []
+    indices = [0] + list(slices) + [len(x)]
+    for i in range(len(slices) + 1):
+        out_list.append(x[indices[i]: indices[i + 1]])
+    return out_list
+def split_by_ratio(x, ratios):
+    # NB: np.ndarray is not Sequence
+    assert isinstance(x, (Sequence, np.ndarray))
+    assert isinstance(ratios, (list, tuple))
+    pdf = [k / sum(ratios) for k in ratios]
+    cdf = [sum(pdf[:k]) for k in range(len(pdf) + 1)]
+    indices = [int(round(len(x) * k)) for k in cdf]
+    return [x[indices[i]: indices[i + 1]] for i in range(len(ratios))]

khandy/text_utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import re
+def strip_content_in_paren(string):
+    """
+    Notes:
+        strip_content_in_paren cannot process nested paren correctly
+    """
+    return re.sub(r"\([^)]*\)|（[^）]*）", "", string)
+def is_chinese_char(uchar: str) -> bool:
+    """Whether the input char is a Chinese character.
+    Args:
+        uchar: input char in unicode
+    References:
+        `is_chinese_char` in https://github.com/thunlp/OpenNRE/
+    """
+    codepoint = ord(uchar)
+    if ((0x4E00 <= codepoint <= 0x9FFF) or # CJK Unified Ideographs
+        (0x3400 <= codepoint <= 0x4DBF) or # CJK Unified Ideographs Extension A
+        (0xF900 <= codepoint <= 0xFAFF) or # CJK Compatibility Ideographs
+        (0x20000 <= codepoint <= 0x2A6DF) or # CJK Unified Ideographs Extension B
+        (0x2A700 <= codepoint <= 0x2B73F) or
+        (0x2B740 <= codepoint <= 0x2B81F) or
+        (0x2B820 <= codepoint <= 0x2CEAF) or
+        (0x2F800 <= codepoint <= 0x2FA1F)): # CJK Compatibility Supplement
+        return True
+    return False

khandy/time_utils.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import time
+import logging
+import numbers
+import datetime
+def _to_timestamp(val, multiplier=1, rounded=False):
+    if val is None:
+        timestamp = time.time()
+    elif isinstance(val, numbers.Real):
+        timestamp = float(val)
+    elif isinstance(val, time.struct_time):
+        timestamp = time.mktime(val)
+    elif isinstance(val, datetime.datetime):
+        timestamp = val.timestamp()
+    elif isinstance(val, datetime.date):
+        dt = datetime.datetime.combine(val, datetime.time())
+        timestamp = dt.timestamp()
+    elif isinstance(val, str):
+        try:
+            # The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmm'.
+            dt = datetime.datetime.fromisoformat(val)
+            timestamp = dt.timestamp()
+        except:
+            raise TypeError('when argument is str, it should conform to isoformat')
+    else:
+        raise TypeError('unsupported type!')
+    timestamp = timestamp * multiplier
+    if rounded:
+        # The return value is an integer if ndigits is omitted or None.
+        timestamp = round(timestamp)
+    return timestamp
+def get_timestamp(time_val=None, rounded=True):
+    """timestamp in seconds.
+    """
+    return _to_timestamp(time_val, multiplier=1, rounded=rounded)
+def get_timestamp_ms(time_val=None, rounded=True):
+    """timestamp in milliseconds.
+    """
+    return _to_timestamp(time_val, multiplier=1000, rounded=rounded)
+def get_timestamp_us(time_val=None, rounded=True):
+    """timestamp in microseconds.
+    """
+    return _to_timestamp(time_val, multiplier=1000000, rounded=rounded)
+def get_utc8now() -> datetime.datetime:
+    """get current UTC-8 time or Beijing time
+    """
+    tz = datetime.timezone(datetime.timedelta(hours=8))
+    utc8now = datetime.datetime.now(tz)
+    return utc8now
+class ContextTimer(object):
+    """
+    References:
+        WithTimer in https://github.com/uber/ludwig/blob/master/ludwig/utils/time_utils.py
+    """
+    def __init__(self, name=None, use_log=False, quiet=False):
+        self.use_log = use_log
+        self.quiet = quiet
+        if name is None:
+            self.name = ''
+        else:
+            self.name = '{}, '.format(name.rstrip())
+    def __enter__(self):
+        self.start_time = time.time()
+        if not self.quiet:
+            self._print_or_log('{}{} starts'.format(self.name, self._now_time_str))
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if not self.quiet:
+            self._print_or_log('{}elapsed_time = {:.5}s'.format(self.name, self.get_eplased_time()))
+            self._print_or_log('{}{} ends'.format(self.name, self._now_time_str))
+    @property
+    def _now_time_str(self):
+        return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
+    def _print_or_log(self, output_str):
+        if self.use_log:
+            logging.info(output_str)
+        else:
+            print(output_str)
+    def get_eplased_time(self):
+        return time.time() - self.start_time
+    def enter(self):
+        """Manually trigger enter"""
+        self.__enter__()

khandy/version.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ __version__ = '0.1.8'
2	+
3	+ __all__ = ['__version__']

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+opencv-python>=4.5
+numpy>=1.11.1
+lxml
+requests
+onnxruntime
+Pillow
+modelscope==1.15