File size: 2,963 Bytes
139dd3e
 
24c941c
139dd3e
5b889a9
139dd3e
 
 
 
5b889a9
 
 
 
 
139dd3e
 
 
 
 
 
049f239
139dd3e
 
5b889a9
139dd3e
 
 
c23be95
 
139dd3e
 
 
08aaa60
139dd3e
 
24c941c
139dd3e
 
 
 
 
 
 
 
 
 
 
24c941c
 
 
 
 
 
139dd3e
 
049f239
139dd3e
 
 
 
 
 
 
049f239
139dd3e
 
08ca080
e04c82d
 
6cd04e1
24c941c
e04c82d
24c941c
e04c82d
ec95343
24c941c
 
5b889a9
139dd3e
 
 
 
 
 
 
5b889a9
139dd3e
5b889a9
139dd3e
 
5b889a9
139dd3e
160ded7
139dd3e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import tensorflow as tf
import numpy as np
from imgviz import instances2rgb, label2rgb

from configuration import Config

# detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
# actions: list of f'{action_name}: {confidence}'

def format_frame(frame, config: Config):
  frame = tf.image.convert_image_dtype(frame, tf.float32)
  frame = tf.image.resize_with_pad(frame, *config.frame_size)
  return frame

def detect_object(detector, frame):
  result = detector(frame, classes=4, verbose=False)[0]
  classes = result.boxes.cls.numpy()
  boxes = result.boxes.xyxy.numpy()
  detections = (
    [result.names[i].capitalize() for i in classes],
    boxes)
  return detections

def classify_action(classifier, frames, id_to_name):
  actions = []
  frames = np.array(frames)
  frames = tf.expand_dims(frames, 0)
  y = classifier(frames)
  confidences = tf.squeeze(y).numpy()
  for (class_id, confidence) in enumerate(confidences):
    other_class_id = 2
    if confidence > 0.3 and class_id != other_class_id:
      actions.append(f'{id_to_name[class_id]}: {confidence:.2f}')
  return actions

def draw_boxes(frame, detections, actions, do_classify):
  (classes, boxes) = detections
  max_area = 0
  max_area_id = 0
  for i, box in enumerate(boxes):
    area = (box[3] - box[1]) * (box[2] - box[0])
    if area > max_area:
      max_area = area
      max_area_id = i
  labels = [0 for _ in classes]
  colormap = [(0x39, 0xc5, 0xbb)]
  line_width = 2
  if not do_classify:
    captions = classes
  else:
    captions = [
      f'{class_name}\n' + '\n'.join(actions if i == max_area_id else [])
      for (i, class_name) in enumerate(classes)]
  bboxes = [
    [box[1], box[0], box[3], box[2]]
    for box in boxes]
  frame = instances2rgb(
    frame,
    labels=labels,
    captions=captions,
    bboxes=bboxes,
    colormap=colormap,
    font_size=20,
    line_width=line_width)
  return frame

def draw_classes(frame, actions):
  height, width, _ = frame.shape
  labels = np.zeros((height, width), dtype=int)
  label_names = ['\n'.join(actions)]
  frame = label2rgb(
    label=labels,
    image=frame,
    label_names=label_names,
    alpha=0)
  return frame

def FrameProcessor(detector, classifier, config: Config):
  current_frame = 0
  frames = []
  actions = []
  detections = ([], [])
  def process_frame(frame):
    nonlocal current_frame, frames, actions, detections
    current_frame += 1
    if current_frame % config.classify_action_frame_steps == 0:
      frames.append(format_frame(frame))
    if current_frame % config.detect_object_frame_steps == 0:
      print(f'Detect object: Frame {current_frame}')
      detections = detect_object(detector, frame)
    if len(frames) == config.classify_action_num_frames:
      print(f'Classify action: Until frame {current_frame}')
      actions = classify_action(classifier, frames)
      frames = []
    frame = draw_boxes(frame, detections, actions)
    return frame
  return process_frame