Spaces:

deepghs
/

anime_object_detection

Running

App Files Files Community

narugo1992 commited on May 1, 2023

Commit

3903f4f

1 Parent(s): 7e9b170

dev(narugo): add manbits detect

Browse files

Files changed (6) hide show

app.py +25 -0
face.py +12 -8
manbits.py +45 -0
person.py +11 -9
plot.py +55 -15
yolo_.py +12 -11

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import gradio as gr
 from face import _FACE_MODELS, _DEFAULT_FACE_MODEL, _gr_detect_faces
 from person import _PERSON_MODELS, _DEFAULT_PERSON_MODEL, _gr_detect_person
 if __name__ == '__main__':
@@ -56,4 +57,28 @@ if __name__ == '__main__':
                         outputs=[gr_person_output_image],
                     )
     demo.queue(os.cpu_count()).launch()

 import gradio as gr
 from face import _FACE_MODELS, _DEFAULT_FACE_MODEL, _gr_detect_faces
+from manbits import _MANBIT_MODELS, _DEFAULT_MANBIT_MODEL, _gr_detect_manbits
 from person import _PERSON_MODELS, _DEFAULT_PERSON_MODEL, _gr_detect_person
 if __name__ == '__main__':
                         outputs=[gr_person_output_image],
                     )
+            with gr.Tab('Manbits Detection'):
+                with gr.Row():
+                    with gr.Column():
+                        gr_manbit_input_image = gr.Image(type='pil', label='Original Image')
+                        gr_manbit_model = gr.Dropdown(_MANBIT_MODELS, value=_DEFAULT_MANBIT_MODEL, label='Model')
+                        gr_manbit_infer_size = gr.Slider(480, 1600, value=1216, step=32, label='Max Infer Size')
+                        with gr.Row():
+                            gr_manbit_iou_threshold = gr.Slider(0.0, 1.0, 0.7, label='IOU Threshold')
+                            gr_manbit_score_threshold = gr.Slider(0.0, 1.0, 0.25, label='Score Threshold')
+                        gr_manbit_submit = gr.Button(value='Submit', variant='primary')
+                    with gr.Column():
+                        gr_manbit_output_image = gr.Image(type='pil', label="Labeled")
+                    gr_manbit_submit.click(
+                        _gr_detect_manbits,
+                        inputs=[
+                            gr_manbit_input_image, gr_manbit_model,
+                            gr_manbit_infer_size, gr_manbit_score_threshold, gr_manbit_iou_threshold,
+                        ],
+                        outputs=[gr_manbit_output_image],
+                    )
     demo.queue(os.cpu_count()).launch()

face.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from functools import lru_cache
 from huggingface_hub import hf_hub_download
 from imgutils.data import ImageTyping, load_image, rgb_encode
 from onnx_ import _open_onnx_model
-from plot import plot_detection
-from yolo_ import _image_preprocess, _data_simple_postprocess
 _FACE_MODELS = [
     'face_detect_best_n.onnx',
@@ -22,18 +23,21 @@ def _open_face_detect_model(model_name):
     ))
-def detect_faces(image: ImageTyping, model_name: str, max_infer_size=1216,
-                 conf_threshold: float = 0.25, iou_threshold: float = 0.7):
     image = load_image(image, mode='RGB')
     new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
     data = rgb_encode(new_image)[None, ...]
-    output, = _open_face_detect_model(model_name).run(['output0'], {'images': data})
-    return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)
 def _gr_detect_faces(image: ImageTyping, model_name: str, max_infer_size=1216,
                      conf_threshold: float = 0.25, iou_threshold: float = 0.7):
     ret = detect_faces(image, model_name, max_infer_size, conf_threshold, iou_threshold)
-    detections = [(box, 0, score) for box, score in ret]
-    return plot_detection(image, detections, ['head'])

 from functools import lru_cache
+from typing import List, Tuple
 from huggingface_hub import hf_hub_download
 from imgutils.data import ImageTyping, load_image, rgb_encode
 from onnx_ import _open_onnx_model
+from plot import detection_visualize
+from yolo_ import _image_preprocess, _data_postprocess
 _FACE_MODELS = [
     'face_detect_best_n.onnx',
     ))
+_LABELS = ['head']
+def detect_faces(image: ImageTyping, level: str = 's', max_infer_size=1216,
+                 conf_threshold: float = 0.25, iou_threshold: float = 0.7) \
+        -> List[Tuple[Tuple[int, int, int, int], str, float]]:
     image = load_image(image, mode='RGB')
     new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
     data = rgb_encode(new_image)[None, ...]
+    output, = _open_face_detect_model(level).run(['output0'], {'images': data})
+    return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, _LABELS)
 def _gr_detect_faces(image: ImageTyping, model_name: str, max_infer_size=1216,
                      conf_threshold: float = 0.25, iou_threshold: float = 0.7):
     ret = detect_faces(image, model_name, max_infer_size, conf_threshold, iou_threshold)
+    return detection_visualize(image, ret, _LABELS)

manbits.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from functools import lru_cache
+from typing import List, Tuple
+from huggingface_hub import hf_hub_download
+from imgutils.data import ImageTyping, load_image, rgb_encode
+from onnx_ import _open_onnx_model
+from plot import detection_visualize
+from yolo_ import _image_preprocess, _data_postprocess
+_MANBIT_MODELS = [
+    'manbits_detect_best_m.onnx',
+]
+_DEFAULT_MANBIT_MODEL = _MANBIT_MODELS[0]
+@lru_cache()
+def _open_manbits_detect_model(model_name):
+    return _open_onnx_model(hf_hub_download(
+        'deepghs/imgutils-models',
+        f'manbits_detect/{model_name}'
+    ))
+_LABELS = [
+    'EXPOSED_BELLY', 'EXPOSED_BREAST_F', 'EXPOSED_BREAST_M',
+    'EXPOSED_BUTTOCKS', 'EXPOSED_GENITALIA_F', 'EXPOSED_GENITALIA_M'
+]
+def detect_manbits(image: ImageTyping, level: str = 'm', max_infer_size=1216,
+                   conf_threshold: float = 0.25, iou_threshold: float = 0.7) \
+        -> List[Tuple[Tuple[int, int, int, int], str, float]]:
+    image = load_image(image, mode='RGB')
+    new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
+    data = rgb_encode(new_image)[None, ...]
+    output, = _open_manbits_detect_model(level).run(['output0'], {'images': data})
+    return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, _LABELS)
+def _gr_detect_manbits(image: ImageTyping, model_name: str, max_infer_size=1216,
+                       conf_threshold: float = 0.25, iou_threshold: float = 0.7):
+    ret = detect_manbits(image, model_name, max_infer_size, conf_threshold, iou_threshold)
+    return detection_visualize(image, ret, _LABELS)

person.py CHANGED Viewed

@@ -4,8 +4,8 @@ from huggingface_hub import hf_hub_download
 from imgutils.data import ImageTyping, load_image, rgb_encode
 from onnx_ import _open_onnx_model
-from plot import plot_detection
-from yolo_ import _image_preprocess, _data_simple_postprocess
 _PERSON_MODELS = [
     'person_detect_best_m.onnx',
@@ -22,18 +22,20 @@ def _open_person_detect_model(model_name):
     ))
-def detect_person(image: ImageTyping, model_name: str, max_infer_size=1216,
-                  conf_threshold: float = 0.25, iou_threshold: float = 0.7):
     image = load_image(image, mode='RGB')
     new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
     data = rgb_encode(new_image)[None, ...]
-    output, = _open_person_detect_model(model_name).run(['output0'], {'images': data})
-    return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)
 def _gr_detect_person(image: ImageTyping, model_name: str, max_infer_size=1216,
-                      conf_threshold: float = 0.25, iou_threshold: float = 0.7):
     ret = detect_person(image, model_name, max_infer_size, conf_threshold, iou_threshold)
-    detections = [(box, 0, score) for box, score in ret]
-    return plot_detection(image, detections, ['person'])

 from imgutils.data import ImageTyping, load_image, rgb_encode
 from onnx_ import _open_onnx_model
+from plot import detection_visualize
+from yolo_ import _image_preprocess, _data_postprocess
 _PERSON_MODELS = [
     'person_detect_best_m.onnx',
     ))
+_LABELS = ['person']
+def detect_person(image: ImageTyping, level: str = 's', max_infer_size=1216,
+                  conf_threshold: float = 0.3, iou_threshold: float = 0.5):
     image = load_image(image, mode='RGB')
     new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
     data = rgb_encode(new_image)[None, ...]
+    output, = _open_person_detect_model(level).run(['output0'], {'images': data})
+    return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, _LABELS)
 def _gr_detect_person(image: ImageTyping, model_name: str, max_infer_size=1216,
+                      conf_threshold: float = 0.3, iou_threshold: float = 0.5):
     ret = detect_person(image, model_name, max_infer_size, conf_threshold, iou_threshold)
+    return detection_visualize(image, ret, _LABELS)

plot.py CHANGED Viewed

@@ -1,26 +1,66 @@
-from typing import List, Tuple
-from PIL import Image, ImageFont, ImageDraw
 from hbutils.color import rnd_colors, Color
-def plot_detection(pil_img: Image.Image, detection: List[Tuple[Tuple[float, float, float, float], int, float]],
-                   captions: List[str], text_padding: int = 5, font: ImageFont.ImageFont = None,
-                   no_label: bool = False):
-    new_img = pil_img.copy()
-    draw = ImageDraw.Draw(new_img, mode='RGBA')
-    _colors = list(map(str, rnd_colors(len(captions))))
-    _caption_map = {i: name for i, name in enumerate(captions)}
     for (xmin, ymin, xmax, ymax), label, score in detection:
-        box_color = _colors[label]
         draw.rectangle((xmin, ymin, xmax, ymax), outline=box_color, width=2)
         if not no_label:
-            label_text = _caption_map.get(label, str(label))
-            label_text = f'{label_text}: {score * 100:.2f}%'
-            font = font or ImageFont.load_default()
             _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((xmin, ymin), label_text, font=font)
             _t_width, _t_height = _t_x1 - _t_x0, _t_y1 - _t_y0
             if ymin - _t_height - text_padding < 0:
@@ -33,4 +73,4 @@ def plot_detection(pil_img: Image.Image, detection: List[Tuple[Tuple[float, floa
             draw.rectangle(_t_text_rect, fill=str(Color(box_color, alpha=0.5)))
             draw.text(_t_text_co, label_text, fill="black", font=font)
-    return new_img

+"""
+Overview:
+    Visualize the detection results.
+    See :func:`imgutils.detect.face.detect_faces` and :func:`imgutils.detect.person.detect_person` for examples.
+"""
+from typing import List, Tuple, Optional
+from PIL import ImageFont, ImageDraw
 from hbutils.color import rnd_colors, Color
+from imgutils.data import ImageTyping, load_image
+def _try_get_font_from_matplotlib(fontsize: int = 12):
+    try:
+        # noinspection PyPackageRequirements
+        import matplotlib
+    except (ModuleNotFoundError, ImportError):
+        return None
+    else:
+        # noinspection PyPackageRequirements
+        from matplotlib.font_manager import findfont, FontProperties
+        font = findfont(FontProperties(family=['sans-serif']))
+        return ImageFont.truetype(font, fontsize)
+def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]],
+                        labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12,
+                        no_label: bool = False):
+    """
+    Overview:
+        Visualize the results of the object detection.
+    :param image: Image be detected.
+    :param detection: The detection results list, each item includes the detected area `(x0, y0, x1, y1)`,
+        the target type (always `head`) and the target confidence score.
+    :param labels: An array of known labels. If not provided, the labels will be automatically detected
+        from the given ``detection``.
+    :param text_padding: Text padding of the labels. Default is ``6``.
+    :param fontsize: Font size of the labels. At runtime, an attempt will be made to retrieve the font used
+        for rendering from `matplotlib`. Therefore, if `matplotlib` is not installed, only the default pixel font
+        provided with `Pillow` can be used, and the font size cannot be changed.
+    :param no_label: Do not show labels. Default is ``False``.
+    :return: A `PIL` image with the same size as the provided image `image`, which contains the original image
+        content as well as the visualized bounding boxes.
+    Examples::
+        See :func:`imgutils.detect.face.detect_faces` and :func:`imgutils.detect.person.detect_person` for examples.
+    """
+    image = load_image(image, force_background=None, mode='RGBA')
+    visual_image = image.copy()
+    draw = ImageDraw.Draw(visual_image, mode='RGBA')
+    font = _try_get_font_from_matplotlib(fontsize) or ImageFont.load_default()
+    labels = sorted(labels or {label for _, label, _ in detection})
+    _colors = list(map(str, rnd_colors(len(labels))))
+    _color_map = dict(zip(labels, _colors))
     for (xmin, ymin, xmax, ymax), label, score in detection:
+        box_color = _color_map[label]
         draw.rectangle((xmin, ymin, xmax, ymax), outline=box_color, width=2)
         if not no_label:
+            label_text = f'{label}: {score * 100:.2f}%'
             _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((xmin, ymin), label_text, font=font)
             _t_width, _t_height = _t_x1 - _t_x0, _t_y1 - _t_y0
             if ymin - _t_height - text_padding < 0:
             draw.rectangle(_t_text_rect, fill=str(Color(box_color, alpha=0.5)))
             draw.text(_t_text_co, label_text, fill="black", font=font)
+    return visual_image

yolo_.py CHANGED Viewed

@@ -86,24 +86,25 @@ def _xy_postprocess(x, y, old_size, new_size):
     return x, y
-def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size):
-    output = output[:, output[-1, :] > conf_threshold]
-    boxes = output[:4, :].transpose(1, 0)
-    scores = output[4, :]
-    records = sorted(zip(boxes, scores), key=lambda x: -x[1])
-    if not records:
         return []
-    boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records]))
-    scores = np.stack([score for _, score in records])
-    idx = _yolo_nms(boxes, scores, thresh=iou_threshold)
     boxes, scores = boxes[idx], scores[idx]
     detections = []
     for box, score in zip(boxes, scores):
         x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
         x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
-        detections.append(((x0, y0, x1, y1), float(score)))
     return detections

     return x, y
+def _data_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, labels: List[str]):
+    max_scores = output[4:, :].max(axis=0)
+    output = output[:, max_scores > conf_threshold].transpose(1, 0)
+    boxes = output[:, :4]
+    scores = output[:, 4:]
+    filtered_max_scores = scores.max(axis=1)
+    if not boxes.size:
         return []
+    boxes = _yolo_xywh2xyxy(boxes)
+    idx = _yolo_nms(boxes, filtered_max_scores, thresh=iou_threshold)
     boxes, scores = boxes[idx], scores[idx]
     detections = []
     for box, score in zip(boxes, scores):
         x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
         x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
+        max_score_id = score.argmax()
+        detections.append(((x0, y0, x1, y1), labels[max_score_id], float(score[max_score_id])))
     return detections