narugo1992 commited on
Commit
3903f4f
1 Parent(s): 7e9b170

dev(narugo): add manbits detect

Browse files
Files changed (6) hide show
  1. app.py +25 -0
  2. face.py +12 -8
  3. manbits.py +45 -0
  4. person.py +11 -9
  5. plot.py +55 -15
  6. yolo_.py +12 -11
app.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import gradio as gr
4
 
5
  from face import _FACE_MODELS, _DEFAULT_FACE_MODEL, _gr_detect_faces
 
6
  from person import _PERSON_MODELS, _DEFAULT_PERSON_MODEL, _gr_detect_person
7
 
8
  if __name__ == '__main__':
@@ -56,4 +57,28 @@ if __name__ == '__main__':
56
  outputs=[gr_person_output_image],
57
  )
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  demo.queue(os.cpu_count()).launch()
 
3
  import gradio as gr
4
 
5
  from face import _FACE_MODELS, _DEFAULT_FACE_MODEL, _gr_detect_faces
6
+ from manbits import _MANBIT_MODELS, _DEFAULT_MANBIT_MODEL, _gr_detect_manbits
7
  from person import _PERSON_MODELS, _DEFAULT_PERSON_MODEL, _gr_detect_person
8
 
9
  if __name__ == '__main__':
 
57
  outputs=[gr_person_output_image],
58
  )
59
 
60
+ with gr.Tab('Manbits Detection'):
61
+ with gr.Row():
62
+ with gr.Column():
63
+ gr_manbit_input_image = gr.Image(type='pil', label='Original Image')
64
+ gr_manbit_model = gr.Dropdown(_MANBIT_MODELS, value=_DEFAULT_MANBIT_MODEL, label='Model')
65
+ gr_manbit_infer_size = gr.Slider(480, 1600, value=1216, step=32, label='Max Infer Size')
66
+ with gr.Row():
67
+ gr_manbit_iou_threshold = gr.Slider(0.0, 1.0, 0.7, label='IOU Threshold')
68
+ gr_manbit_score_threshold = gr.Slider(0.0, 1.0, 0.25, label='Score Threshold')
69
+
70
+ gr_manbit_submit = gr.Button(value='Submit', variant='primary')
71
+
72
+ with gr.Column():
73
+ gr_manbit_output_image = gr.Image(type='pil', label="Labeled")
74
+
75
+ gr_manbit_submit.click(
76
+ _gr_detect_manbits,
77
+ inputs=[
78
+ gr_manbit_input_image, gr_manbit_model,
79
+ gr_manbit_infer_size, gr_manbit_score_threshold, gr_manbit_iou_threshold,
80
+ ],
81
+ outputs=[gr_manbit_output_image],
82
+ )
83
+
84
  demo.queue(os.cpu_count()).launch()
face.py CHANGED
@@ -1,11 +1,12 @@
1
  from functools import lru_cache
 
2
 
3
  from huggingface_hub import hf_hub_download
4
  from imgutils.data import ImageTyping, load_image, rgb_encode
5
 
6
  from onnx_ import _open_onnx_model
7
- from plot import plot_detection
8
- from yolo_ import _image_preprocess, _data_simple_postprocess
9
 
10
  _FACE_MODELS = [
11
  'face_detect_best_n.onnx',
@@ -22,18 +23,21 @@ def _open_face_detect_model(model_name):
22
  ))
23
 
24
 
25
- def detect_faces(image: ImageTyping, model_name: str, max_infer_size=1216,
26
- conf_threshold: float = 0.25, iou_threshold: float = 0.7):
 
 
 
 
27
  image = load_image(image, mode='RGB')
28
  new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
29
 
30
  data = rgb_encode(new_image)[None, ...]
31
- output, = _open_face_detect_model(model_name).run(['output0'], {'images': data})
32
- return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)
33
 
34
 
35
  def _gr_detect_faces(image: ImageTyping, model_name: str, max_infer_size=1216,
36
  conf_threshold: float = 0.25, iou_threshold: float = 0.7):
37
  ret = detect_faces(image, model_name, max_infer_size, conf_threshold, iou_threshold)
38
- detections = [(box, 0, score) for box, score in ret]
39
- return plot_detection(image, detections, ['head'])
 
1
  from functools import lru_cache
2
+ from typing import List, Tuple
3
 
4
  from huggingface_hub import hf_hub_download
5
  from imgutils.data import ImageTyping, load_image, rgb_encode
6
 
7
  from onnx_ import _open_onnx_model
8
+ from plot import detection_visualize
9
+ from yolo_ import _image_preprocess, _data_postprocess
10
 
11
  _FACE_MODELS = [
12
  'face_detect_best_n.onnx',
 
23
  ))
24
 
25
 
26
+ _LABELS = ['head']
27
+
28
+
29
+ def detect_faces(image: ImageTyping, level: str = 's', max_infer_size=1216,
30
+ conf_threshold: float = 0.25, iou_threshold: float = 0.7) \
31
+ -> List[Tuple[Tuple[int, int, int, int], str, float]]:
32
  image = load_image(image, mode='RGB')
33
  new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
34
 
35
  data = rgb_encode(new_image)[None, ...]
36
+ output, = _open_face_detect_model(level).run(['output0'], {'images': data})
37
+ return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, _LABELS)
38
 
39
 
40
  def _gr_detect_faces(image: ImageTyping, model_name: str, max_infer_size=1216,
41
  conf_threshold: float = 0.25, iou_threshold: float = 0.7):
42
  ret = detect_faces(image, model_name, max_infer_size, conf_threshold, iou_threshold)
43
+ return detection_visualize(image, ret, _LABELS)
 
manbits.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+ from typing import List, Tuple
3
+
4
+ from huggingface_hub import hf_hub_download
5
+ from imgutils.data import ImageTyping, load_image, rgb_encode
6
+
7
+ from onnx_ import _open_onnx_model
8
+ from plot import detection_visualize
9
+ from yolo_ import _image_preprocess, _data_postprocess
10
+
11
+ _MANBIT_MODELS = [
12
+ 'manbits_detect_best_m.onnx',
13
+ ]
14
+ _DEFAULT_MANBIT_MODEL = _MANBIT_MODELS[0]
15
+
16
+
17
+ @lru_cache()
18
+ def _open_manbits_detect_model(model_name):
19
+ return _open_onnx_model(hf_hub_download(
20
+ 'deepghs/imgutils-models',
21
+ f'manbits_detect/{model_name}'
22
+ ))
23
+
24
+
25
+ _LABELS = [
26
+ 'EXPOSED_BELLY', 'EXPOSED_BREAST_F', 'EXPOSED_BREAST_M',
27
+ 'EXPOSED_BUTTOCKS', 'EXPOSED_GENITALIA_F', 'EXPOSED_GENITALIA_M'
28
+ ]
29
+
30
+
31
+ def detect_manbits(image: ImageTyping, level: str = 'm', max_infer_size=1216,
32
+ conf_threshold: float = 0.25, iou_threshold: float = 0.7) \
33
+ -> List[Tuple[Tuple[int, int, int, int], str, float]]:
34
+ image = load_image(image, mode='RGB')
35
+ new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
36
+
37
+ data = rgb_encode(new_image)[None, ...]
38
+ output, = _open_manbits_detect_model(level).run(['output0'], {'images': data})
39
+ return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, _LABELS)
40
+
41
+
42
+ def _gr_detect_manbits(image: ImageTyping, model_name: str, max_infer_size=1216,
43
+ conf_threshold: float = 0.25, iou_threshold: float = 0.7):
44
+ ret = detect_manbits(image, model_name, max_infer_size, conf_threshold, iou_threshold)
45
+ return detection_visualize(image, ret, _LABELS)
person.py CHANGED
@@ -4,8 +4,8 @@ from huggingface_hub import hf_hub_download
4
  from imgutils.data import ImageTyping, load_image, rgb_encode
5
 
6
  from onnx_ import _open_onnx_model
7
- from plot import plot_detection
8
- from yolo_ import _image_preprocess, _data_simple_postprocess
9
 
10
  _PERSON_MODELS = [
11
  'person_detect_best_m.onnx',
@@ -22,18 +22,20 @@ def _open_person_detect_model(model_name):
22
  ))
23
 
24
 
25
- def detect_person(image: ImageTyping, model_name: str, max_infer_size=1216,
26
- conf_threshold: float = 0.25, iou_threshold: float = 0.7):
 
 
 
27
  image = load_image(image, mode='RGB')
28
  new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
29
 
30
  data = rgb_encode(new_image)[None, ...]
31
- output, = _open_person_detect_model(model_name).run(['output0'], {'images': data})
32
- return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)
33
 
34
 
35
  def _gr_detect_person(image: ImageTyping, model_name: str, max_infer_size=1216,
36
- conf_threshold: float = 0.25, iou_threshold: float = 0.7):
37
  ret = detect_person(image, model_name, max_infer_size, conf_threshold, iou_threshold)
38
- detections = [(box, 0, score) for box, score in ret]
39
- return plot_detection(image, detections, ['person'])
 
4
  from imgutils.data import ImageTyping, load_image, rgb_encode
5
 
6
  from onnx_ import _open_onnx_model
7
+ from plot import detection_visualize
8
+ from yolo_ import _image_preprocess, _data_postprocess
9
 
10
  _PERSON_MODELS = [
11
  'person_detect_best_m.onnx',
 
22
  ))
23
 
24
 
25
+ _LABELS = ['person']
26
+
27
+
28
+ def detect_person(image: ImageTyping, level: str = 's', max_infer_size=1216,
29
+ conf_threshold: float = 0.3, iou_threshold: float = 0.5):
30
  image = load_image(image, mode='RGB')
31
  new_image, old_size, new_size = _image_preprocess(image, max_infer_size)
32
 
33
  data = rgb_encode(new_image)[None, ...]
34
+ output, = _open_person_detect_model(level).run(['output0'], {'images': data})
35
+ return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, _LABELS)
36
 
37
 
38
  def _gr_detect_person(image: ImageTyping, model_name: str, max_infer_size=1216,
39
+ conf_threshold: float = 0.3, iou_threshold: float = 0.5):
40
  ret = detect_person(image, model_name, max_infer_size, conf_threshold, iou_threshold)
41
+ return detection_visualize(image, ret, _LABELS)
 
plot.py CHANGED
@@ -1,26 +1,66 @@
1
- from typing import List, Tuple
 
 
2
 
3
- from PIL import Image, ImageFont, ImageDraw
 
 
 
 
4
  from hbutils.color import rnd_colors, Color
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
- def plot_detection(pil_img: Image.Image, detection: List[Tuple[Tuple[float, float, float, float], int, float]],
8
- captions: List[str], text_padding: int = 5, font: ImageFont.ImageFont = None,
9
- no_label: bool = False):
10
- new_img = pil_img.copy()
11
- draw = ImageDraw.Draw(new_img, mode='RGBA')
 
12
 
13
- _colors = list(map(str, rnd_colors(len(captions))))
14
- _caption_map = {i: name for i, name in enumerate(captions)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  for (xmin, ymin, xmax, ymax), label, score in detection:
16
- box_color = _colors[label]
17
  draw.rectangle((xmin, ymin, xmax, ymax), outline=box_color, width=2)
18
 
19
  if not no_label:
20
- label_text = _caption_map.get(label, str(label))
21
- label_text = f'{label_text}: {score * 100:.2f}%'
22
-
23
- font = font or ImageFont.load_default()
24
  _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((xmin, ymin), label_text, font=font)
25
  _t_width, _t_height = _t_x1 - _t_x0, _t_y1 - _t_y0
26
  if ymin - _t_height - text_padding < 0:
@@ -33,4 +73,4 @@ def plot_detection(pil_img: Image.Image, detection: List[Tuple[Tuple[float, floa
33
  draw.rectangle(_t_text_rect, fill=str(Color(box_color, alpha=0.5)))
34
  draw.text(_t_text_co, label_text, fill="black", font=font)
35
 
36
- return new_img
 
1
+ """
2
+ Overview:
3
+ Visualize the detection results.
4
 
5
+ See :func:`imgutils.detect.face.detect_faces` and :func:`imgutils.detect.person.detect_person` for examples.
6
+ """
7
+ from typing import List, Tuple, Optional
8
+
9
+ from PIL import ImageFont, ImageDraw
10
  from hbutils.color import rnd_colors, Color
11
+ from imgutils.data import ImageTyping, load_image
12
+
13
+
14
+ def _try_get_font_from_matplotlib(fontsize: int = 12):
15
+ try:
16
+ # noinspection PyPackageRequirements
17
+ import matplotlib
18
+ except (ModuleNotFoundError, ImportError):
19
+ return None
20
+ else:
21
+ # noinspection PyPackageRequirements
22
+ from matplotlib.font_manager import findfont, FontProperties
23
+ font = findfont(FontProperties(family=['sans-serif']))
24
+ return ImageFont.truetype(font, fontsize)
25
 
26
 
27
+ def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]],
28
+ labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12,
29
+ no_label: bool = False):
30
+ """
31
+ Overview:
32
+ Visualize the results of the object detection.
33
 
34
+ :param image: Image be detected.
35
+ :param detection: The detection results list, each item includes the detected area `(x0, y0, x1, y1)`,
36
+ the target type (always `head`) and the target confidence score.
37
+ :param labels: An array of known labels. If not provided, the labels will be automatically detected
38
+ from the given ``detection``.
39
+ :param text_padding: Text padding of the labels. Default is ``6``.
40
+ :param fontsize: Font size of the labels. At runtime, an attempt will be made to retrieve the font used
41
+ for rendering from `matplotlib`. Therefore, if `matplotlib` is not installed, only the default pixel font
42
+ provided with `Pillow` can be used, and the font size cannot be changed.
43
+ :param no_label: Do not show labels. Default is ``False``.
44
+ :return: A `PIL` image with the same size as the provided image `image`, which contains the original image
45
+ content as well as the visualized bounding boxes.
46
+
47
+ Examples::
48
+ See :func:`imgutils.detect.face.detect_faces` and :func:`imgutils.detect.person.detect_person` for examples.
49
+ """
50
+ image = load_image(image, force_background=None, mode='RGBA')
51
+ visual_image = image.copy()
52
+ draw = ImageDraw.Draw(visual_image, mode='RGBA')
53
+ font = _try_get_font_from_matplotlib(fontsize) or ImageFont.load_default()
54
+
55
+ labels = sorted(labels or {label for _, label, _ in detection})
56
+ _colors = list(map(str, rnd_colors(len(labels))))
57
+ _color_map = dict(zip(labels, _colors))
58
  for (xmin, ymin, xmax, ymax), label, score in detection:
59
+ box_color = _color_map[label]
60
  draw.rectangle((xmin, ymin, xmax, ymax), outline=box_color, width=2)
61
 
62
  if not no_label:
63
+ label_text = f'{label}: {score * 100:.2f}%'
 
 
 
64
  _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((xmin, ymin), label_text, font=font)
65
  _t_width, _t_height = _t_x1 - _t_x0, _t_y1 - _t_y0
66
  if ymin - _t_height - text_padding < 0:
 
73
  draw.rectangle(_t_text_rect, fill=str(Color(box_color, alpha=0.5)))
74
  draw.text(_t_text_co, label_text, fill="black", font=font)
75
 
76
+ return visual_image
yolo_.py CHANGED
@@ -86,24 +86,25 @@ def _xy_postprocess(x, y, old_size, new_size):
86
  return x, y
87
 
88
 
89
- def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size):
90
- output = output[:, output[-1, :] > conf_threshold]
91
- boxes = output[:4, :].transpose(1, 0)
92
- scores = output[4, :]
93
- records = sorted(zip(boxes, scores), key=lambda x: -x[1])
94
-
95
- if not records:
 
96
  return []
97
 
98
- boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records]))
99
- scores = np.stack([score for _, score in records])
100
- idx = _yolo_nms(boxes, scores, thresh=iou_threshold)
101
  boxes, scores = boxes[idx], scores[idx]
102
 
103
  detections = []
104
  for box, score in zip(boxes, scores):
105
  x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
106
  x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
107
- detections.append(((x0, y0, x1, y1), float(score)))
 
108
 
109
  return detections
 
86
  return x, y
87
 
88
 
89
+ def _data_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, labels: List[str]):
90
+ max_scores = output[4:, :].max(axis=0)
91
+ output = output[:, max_scores > conf_threshold].transpose(1, 0)
92
+ boxes = output[:, :4]
93
+ scores = output[:, 4:]
94
+ filtered_max_scores = scores.max(axis=1)
95
+
96
+ if not boxes.size:
97
  return []
98
 
99
+ boxes = _yolo_xywh2xyxy(boxes)
100
+ idx = _yolo_nms(boxes, filtered_max_scores, thresh=iou_threshold)
 
101
  boxes, scores = boxes[idx], scores[idx]
102
 
103
  detections = []
104
  for box, score in zip(boxes, scores):
105
  x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
106
  x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
107
+ max_score_id = score.argmax()
108
+ detections.append(((x0, y0, x1, y1), labels[max_score_id], float(score[max_score_id])))
109
 
110
  return detections