chiyoi commited on
Commit
5b889a9
1 Parent(s): 9f004b3
app.py CHANGED
@@ -1,24 +1,20 @@
1
  import tempfile
2
-
3
  import gradio as gr
4
  import tensorflow as tf
5
  from moviepy.editor import VideoFileClip
6
  from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
7
- from ultralytics import YOLO
8
 
9
- from configurations import *
10
- from core.data import format_frame
11
- from core.model import load_classifier
12
- from core.inference import detect_object, classify_action, draw_boxes
13
- print("Tensorflow version " + tf.__version__)
14
 
15
- print('Load classifier.')
16
- classifier_path = 'weights/classifier-8-epoch10.keras'
17
- classifier = load_classifier(classifier_path)
18
 
19
  print('Load detector.')
20
- detector_path = 'weights/yolov8n.pt'
21
- detector = YOLO(detector_path)
22
 
23
  def fn(video: gr.Video):
24
  print('Process video.')
@@ -30,18 +26,18 @@ def fn(video: gr.Video):
30
  actions = []
31
  detections = ([], [])
32
  for i, frame in enumerate(clip.iter_frames()):
33
- if i % classify_action_frame_steps == 0:
34
  frames.append(format_frame(frame))
35
- if i % detect_object_frame_steps == 0:
36
  print(f'Detect object: Frame {i}')
37
  detections = detect_object(detector, frame)
38
- if len(frames) == classify_action_num_frames:
39
  print(f'Classify action: Until frame {i}')
40
- actions = classify_action(classifier, frames)
41
  frames = []
42
  frame = draw_boxes(frame, detections, actions)
43
  processed_frames.append(frame)
44
- if i % yield_frame_steps == 0:
45
  yield frame, None
46
  processed_clip = ImageSequenceClip(processed_frames, clip.fps)
47
  processed_clip.audio = clip.audio
 
1
  import tempfile
 
2
  import gradio as gr
3
  import tensorflow as tf
4
  from moviepy.editor import VideoFileClip
5
  from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
 
6
 
7
+ from configuration import Config
8
+ from model import load_classifier, load_detector
9
+ from inference import format_frame, detect_object, classify_action, draw_boxes
10
+ config = Config()
11
+ print(f'TensorFlow {tf.__version__}')
12
 
13
+ print(f'Load classifier from {config.classifier_path}')
14
+ classifier = load_classifier(config)
 
15
 
16
  print('Load detector.')
17
+ detector = load_detector(config)
 
18
 
19
  def fn(video: gr.Video):
20
  print('Process video.')
 
26
  actions = []
27
  detections = ([], [])
28
  for i, frame in enumerate(clip.iter_frames()):
29
+ if i % config.classify_action_frame_steps == 0:
30
  frames.append(format_frame(frame))
31
+ if i % config.detect_object_frame_steps == 0:
32
  print(f'Detect object: Frame {i}')
33
  detections = detect_object(detector, frame)
34
+ if len(frames) == config.classify_action_num_frames:
35
  print(f'Classify action: Until frame {i}')
36
+ actions = classify_action(classifier, frames, config.id_to_name)
37
  frames = []
38
  frame = draw_boxes(frame, detections, actions)
39
  processed_frames.append(frame)
40
+ if i % config.yield_frame_steps == 0:
41
  yield frame, None
42
  processed_clip = ImageSequenceClip(processed_frames, clip.fps)
43
  processed_clip.audio = clip.audio
configuration.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Config:
2
+ num_frames = 8
3
+ frame_step = 15
4
+ resolution = 224
5
+ frame_size = (resolution, resolution)
6
+ id_to_name = {
7
+ 0: 'Flying',
8
+ 1: 'Landing',
9
+ 2: 'Other',
10
+ 3: 'Straight Taxiing',
11
+ 4: 'Takeoff',
12
+ 5: 'Turning Maneuver',
13
+ }
14
+ name_to_id = {
15
+ 'Flying': 0,
16
+ 'Landing': 1,
17
+ 'Other': 2,
18
+ 'Straight Taxiing': 3,
19
+ 'Takeoff': 4,
20
+ 'Turning Maneuver': 5,
21
+ }
22
+
23
+ model_id = 'a0'
24
+ detector_path = 'weights/yolov8n.pt'
25
+ classifier_path = 'weights/classifier-8-epoch10.keras'
26
+ num_classes = len(id_to_name)
27
+ input_shape = (1, num_frames, resolution, resolution, 3)
28
+
29
+ detect_object_frame_steps = 5
30
+ classify_action_frame_steps = 15
31
+ classify_action_num_frames = 8
32
+ yield_frame_steps = 10
configurations.py DELETED
@@ -1,48 +0,0 @@
1
- # Data
2
- data_dir = 'storage/dataset'
3
- training_ratio = 0.7
4
- validation_ratio = 0.02
5
- num_frames = 8
6
- frame_step = 1
7
- resolution = 224
8
- frame_size = (resolution, resolution)
9
-
10
- id_to_name = {
11
- 0: 'Flying',
12
- 1: 'Landing',
13
- 2: 'Other',
14
- 3: 'Straight Taxiing',
15
- 4: 'Takeoff',
16
- 5: 'Turning Maneuver',
17
- }
18
-
19
- name_to_id = {
20
- 'Flying': 0,
21
- 'Landing': 1,
22
- 'Other': 2,
23
- 'Straight Taxiing': 3,
24
- 'Takeoff': 4,
25
- 'Turning Maneuver': 5,
26
- }
27
-
28
- # Model
29
- model_id = 'a0'
30
- checkpoint_dir = f'storage/pretrained_weights/movinet_{model_id}_base'
31
- num_classes = 6
32
-
33
- # Inference
34
- detect_object_frame_steps = 10
35
- classify_action_frame_steps = 15
36
- classify_action_num_frames = 8
37
- yield_frame_steps = 10
38
-
39
- # Train
40
- train_id = 8
41
- batch_size = 16
42
- learning_rate = 0.001
43
- epochs = 15
44
- model_save_path = f'storage/output/classifier-{train_id}.keras'
45
- log_dir = f'storage/logs/classifier-{train_id}.log'
46
-
47
- # Train more
48
- initial_epoch = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/data.py DELETED
@@ -1,71 +0,0 @@
1
- from pathlib import Path
2
- import random
3
- from typing import Literal
4
- import cv2
5
- import numpy as np
6
- import tensorflow as tf
7
-
8
- from configurations import *
9
-
10
- def format_frame(frame):
11
- frame = tf.image.convert_image_dtype(frame, tf.float32)
12
- frame = tf.image.resize_with_pad(frame, *frame_size)
13
- return frame
14
-
15
- def pick_frames(video: str):
16
- capture = cv2.VideoCapture(video)
17
- if not capture.isOpened(): raise ValueError('Video file could not be opened.')
18
- total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
19
- need_frames = 1 + (num_frames - 1) * frame_step
20
- if need_frames <= total_frames:
21
- start = random.randint(0, total_frames - need_frames + 1)
22
- capture.set(cv2.CAP_PROP_POS_FRAMES, start)
23
- frames = []
24
- for _ in range(num_frames):
25
- for _ in range(frame_step):
26
- ok, frame = capture.read()
27
- if ok: frames.append(format_frame(frame))
28
- else: frames.append(np.zeros(frame_size + (3,)))
29
- capture.release()
30
- frames = np.array(frames)
31
- frames = frames[..., [2, 1, 0]]
32
- return frames
33
-
34
- def Data():
35
- data_dir_path = Path(data_dir)
36
- return {
37
- 'training': {
38
- a.name: (
39
- lambda ps: ps[
40
- :int(len(ps) * training_ratio)])(
41
- [x for x in a.iterdir()])
42
- for a in data_dir_path.iterdir()},
43
- 'validation': {
44
- a.name: (
45
- lambda ps: ps[
46
- int(len(ps) * training_ratio):
47
- int(len(ps) * (training_ratio + validation_ratio))])(
48
- [x for x in a.iterdir()])
49
- for a in data_dir_path.iterdir()},
50
- 'testing': {
51
- a.name: (
52
- lambda ps: ps[
53
- int(len(ps) * (training_ratio + validation_ratio)):])(
54
- [x for x in a.iterdir()])
55
- for a in data_dir_path.iterdir()},
56
- }
57
-
58
- def FrameGenerator(split: Literal['training', 'validation']):
59
- data = Data()
60
- def generator():
61
- pairs = [
62
- (str(video), class_name)
63
- for class_name, videos in data[split].items()
64
- for video in videos
65
- ]
66
- random.shuffle(pairs)
67
- for video, class_name in pairs:
68
- frames = pick_frames(video)
69
- label = name_to_id[class_name]
70
- yield frames, label
71
- return generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/model.py DELETED
@@ -1,38 +0,0 @@
1
- import tensorflow as tf
2
- from tensorflow import keras
3
- from official.projects.movinet.modeling import movinet
4
- from official.projects.movinet.modeling import movinet_model
5
-
6
- from configurations import *
7
-
8
- def load_backbone():
9
- return movinet.Movinet()
10
-
11
- def build_classifier():
12
- backbone = load_backbone()
13
- model = movinet_model.MovinetClassifier(
14
- backbone=backbone,
15
- num_classes=600)
16
- checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
17
- checkpoint = tf.train.Checkpoint(model=model)
18
- status = checkpoint.restore(checkpoint_path)
19
- status.assert_existing_objects_matched()
20
- model.build([batch_size, num_frames, resolution, resolution, 3])
21
- output = keras.layers.Dense(num_classes)
22
- return keras.Sequential(layers=[model, output])
23
-
24
- def load_classifier(classifier_path):
25
- backbone = load_backbone()
26
- model = movinet_model.MovinetClassifier(
27
- backbone=backbone,
28
- num_classes=600)
29
- model.build([batch_size, num_frames, resolution, resolution, 3])
30
- output = keras.layers.Dense(num_classes)
31
- model = keras.Sequential(layers=[model, output])
32
- model.load_weights(classifier_path)
33
- return model
34
-
35
- def compile_classifier(model):
36
- loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
37
- optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
38
- model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/inference.py → inference.py RENAMED
@@ -2,12 +2,16 @@ from imgviz import instances2rgb
2
  import tensorflow as tf
3
  import numpy as np
4
 
5
- from configurations import *
6
- from core.data import format_frame
7
 
8
  # detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
9
  # actions: list of f'{action_name}: {confidence}'
10
 
 
 
 
 
 
11
  def detect_object(detector, frame):
12
  result = detector(frame, classes=4, verbose=False)[0]
13
  classes = result.boxes.cls.numpy()
@@ -18,10 +22,9 @@ def detect_object(detector, frame):
18
  )
19
  return detections
20
 
21
- def classify_action(classifier, frames):
22
  actions = []
23
  frames = np.array(frames)
24
- # frames = frames[..., [2, 1, 0]]
25
  frames = tf.expand_dims(frames, 0)
26
  output = classifier(frames)
27
  confidences = tf.nn.softmax(output).numpy()[0]
@@ -62,7 +65,7 @@ def draw_boxes(frame, detections, actions):
62
  )
63
  return frame
64
 
65
- def FrameProcessor(detector, classifier):
66
  current_frame = 0
67
  frames = []
68
  actions = []
@@ -70,12 +73,12 @@ def FrameProcessor(detector, classifier):
70
  def process_frame(frame):
71
  nonlocal current_frame, frames, actions, detections
72
  current_frame += 1
73
- if current_frame % classify_action_frame_steps == 0:
74
  frames.append(format_frame(frame))
75
- if current_frame % detect_object_frame_steps == 0:
76
  print(f'Detect object: Frame {current_frame}')
77
  detections = detect_object(detector, frame)
78
- if len(frames) == classify_action_num_frames:
79
  print(f'Classify action: Until frame {current_frame}')
80
  actions = classify_action(classifier, frames)
81
  frames = []
 
2
  import tensorflow as tf
3
  import numpy as np
4
 
5
+ from configuration import Config
 
6
 
7
  # detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
8
  # actions: list of f'{action_name}: {confidence}'
9
 
10
+ def format_frame(frame, config: Config):
11
+ frame = tf.image.convert_image_dtype(frame, tf.float32)
12
+ frame = tf.image.resize_with_pad(frame, *config.frame_size)
13
+ return frame
14
+
15
  def detect_object(detector, frame):
16
  result = detector(frame, classes=4, verbose=False)[0]
17
  classes = result.boxes.cls.numpy()
 
22
  )
23
  return detections
24
 
25
+ def classify_action(classifier, frames, id_to_name):
26
  actions = []
27
  frames = np.array(frames)
 
28
  frames = tf.expand_dims(frames, 0)
29
  output = classifier(frames)
30
  confidences = tf.nn.softmax(output).numpy()[0]
 
65
  )
66
  return frame
67
 
68
+ def FrameProcessor(detector, classifier, config: Config):
69
  current_frame = 0
70
  frames = []
71
  actions = []
 
73
  def process_frame(frame):
74
  nonlocal current_frame, frames, actions, detections
75
  current_frame += 1
76
+ if current_frame % config.classify_action_frame_steps == 0:
77
  frames.append(format_frame(frame))
78
+ if current_frame % config.detect_object_frame_steps == 0:
79
  print(f'Detect object: Frame {current_frame}')
80
  detections = detect_object(detector, frame)
81
+ if len(frames) == config.classify_action_num_frames:
82
  print(f'Classify action: Until frame {current_frame}')
83
  actions = classify_action(classifier, frames)
84
  frames = []
model.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow import keras
3
+ from ultralytics import YOLO
4
+ from official.projects.movinet.modeling.movinet import Movinet
5
+ from official.projects.movinet.modeling.movinet_model import MovinetClassifier
6
+
7
+ from configuration import Config
8
+
9
+ class AttentionDenseClassifierHead(keras.layers.Layer):
10
+ def __init__(self, attention_heads, dense_units, dropout_rate=0.2, **kwargs):
11
+ super().__init__(**kwargs)
12
+ self.attention = keras.layers.MultiHeadAttention(num_heads=attention_heads, key_dim=1)
13
+ self.normalization = keras.layers.LayerNormalization(epsilon=1e-6)
14
+ self.dropout = keras.layers.Dropout(dropout_rate)
15
+ self.dense = keras.layers.Dense(dense_units, activation='softmax')
16
+
17
+ def call(self, x, training):
18
+ y = tf.expand_dims(x, -1)
19
+ y = self.attention(query=y, key=y, value=y)
20
+ y = tf.squeeze(y, axis=-1)
21
+ y = self.dropout(y, training=training)
22
+ y = self.normalization(x + y*0.01)
23
+ y = self.dense(y)
24
+ return y
25
+
26
+ def build_movinet(output_size, config: Config):
27
+ model = MovinetClassifier(
28
+ backbone=Movinet(model_id=config.model_id),
29
+ num_classes=output_size)
30
+ model.build(config.input_shape)
31
+ return model
32
+
33
+ def build_classifier_head(input_size, config: Config):
34
+ inputs = keras.Input(shape=(input_size,))
35
+ classifier = AttentionDenseClassifierHead(2, config.num_classes)(inputs)
36
+ model = keras.Model(inputs=inputs, outputs=classifier)
37
+ return model
38
+
39
+ def build_model(movinet, classifier_head):
40
+ return keras.models.Sequential([movinet, classifier_head])
41
+
42
+ def load_classifier(config: Config):
43
+ movinet = build_movinet(600, config)
44
+ classifier_head = build_classifier_head(600, config)
45
+ model = build_model(movinet, classifier_head)
46
+ model.load_weights(config.classifier_path)
47
+ return model
48
+
49
+ def load_detector(config: Config):
50
+ return YOLO(config.detector_path)
51
+
52
+ def compile_classifier(model, config: Config):
53
+ optimizer = keras.optimizers.Adam(learning_rate=config.learning_rate)
54
+ model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- tensorflow
2
  numpy
3
  opencv-python
4
- tf-models-official
 
5
  ultralytics
6
  imgviz
7
  moviepy
 
 
1
  numpy
2
  opencv-python
3
+ tensorflow==2.15.0
4
+ tf-models-official==2.15.0
5
  ultralytics
6
  imgviz
7
  moviepy
weights/classifier-7.keras DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:13a9436ec0971fe72b53f03d9dd57b89a7c48a4cb82380e14b298c3e2d712f50
3
- size 25261904