Spaces:
Sleeping
Sleeping
update
Browse files- app.py +13 -17
- configuration.py +32 -0
- configurations.py +0 -48
- core/data.py +0 -71
- core/model.py +0 -38
- core/inference.py → inference.py +11 -8
- model.py +54 -0
- requirements.txt +2 -2
- weights/classifier-7.keras +0 -3
app.py
CHANGED
@@ -1,24 +1,20 @@
|
|
1 |
import tempfile
|
2 |
-
|
3 |
import gradio as gr
|
4 |
import tensorflow as tf
|
5 |
from moviepy.editor import VideoFileClip
|
6 |
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
|
7 |
-
from ultralytics import YOLO
|
8 |
|
9 |
-
from
|
10 |
-
from
|
11 |
-
from
|
12 |
-
|
13 |
-
print(
|
14 |
|
15 |
-
print('Load classifier.')
|
16 |
-
|
17 |
-
classifier = load_classifier(classifier_path)
|
18 |
|
19 |
print('Load detector.')
|
20 |
-
|
21 |
-
detector = YOLO(detector_path)
|
22 |
|
23 |
def fn(video: gr.Video):
|
24 |
print('Process video.')
|
@@ -30,18 +26,18 @@ def fn(video: gr.Video):
|
|
30 |
actions = []
|
31 |
detections = ([], [])
|
32 |
for i, frame in enumerate(clip.iter_frames()):
|
33 |
-
if i % classify_action_frame_steps == 0:
|
34 |
frames.append(format_frame(frame))
|
35 |
-
if i % detect_object_frame_steps == 0:
|
36 |
print(f'Detect object: Frame {i}')
|
37 |
detections = detect_object(detector, frame)
|
38 |
-
if len(frames) == classify_action_num_frames:
|
39 |
print(f'Classify action: Until frame {i}')
|
40 |
-
actions = classify_action(classifier, frames)
|
41 |
frames = []
|
42 |
frame = draw_boxes(frame, detections, actions)
|
43 |
processed_frames.append(frame)
|
44 |
-
if i % yield_frame_steps == 0:
|
45 |
yield frame, None
|
46 |
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
|
47 |
processed_clip.audio = clip.audio
|
|
|
1 |
import tempfile
|
|
|
2 |
import gradio as gr
|
3 |
import tensorflow as tf
|
4 |
from moviepy.editor import VideoFileClip
|
5 |
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
|
|
|
6 |
|
7 |
+
from configuration import Config
|
8 |
+
from model import load_classifier, load_detector
|
9 |
+
from inference import format_frame, detect_object, classify_action, draw_boxes
|
10 |
+
config = Config()
|
11 |
+
print(f'TensorFlow {tf.__version__}')
|
12 |
|
13 |
+
print(f'Load classifier from {config.classifier_path}')
|
14 |
+
classifier = load_classifier(config)
|
|
|
15 |
|
16 |
print('Load detector.')
|
17 |
+
detector = load_detector(config)
|
|
|
18 |
|
19 |
def fn(video: gr.Video):
|
20 |
print('Process video.')
|
|
|
26 |
actions = []
|
27 |
detections = ([], [])
|
28 |
for i, frame in enumerate(clip.iter_frames()):
|
29 |
+
if i % config.classify_action_frame_steps == 0:
|
30 |
frames.append(format_frame(frame))
|
31 |
+
if i % config.detect_object_frame_steps == 0:
|
32 |
print(f'Detect object: Frame {i}')
|
33 |
detections = detect_object(detector, frame)
|
34 |
+
if len(frames) == config.classify_action_num_frames:
|
35 |
print(f'Classify action: Until frame {i}')
|
36 |
+
actions = classify_action(classifier, frames, config.id_to_name)
|
37 |
frames = []
|
38 |
frame = draw_boxes(frame, detections, actions)
|
39 |
processed_frames.append(frame)
|
40 |
+
if i % config.yield_frame_steps == 0:
|
41 |
yield frame, None
|
42 |
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
|
43 |
processed_clip.audio = clip.audio
|
configuration.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Config:
|
2 |
+
num_frames = 8
|
3 |
+
frame_step = 15
|
4 |
+
resolution = 224
|
5 |
+
frame_size = (resolution, resolution)
|
6 |
+
id_to_name = {
|
7 |
+
0: 'Flying',
|
8 |
+
1: 'Landing',
|
9 |
+
2: 'Other',
|
10 |
+
3: 'Straight Taxiing',
|
11 |
+
4: 'Takeoff',
|
12 |
+
5: 'Turning Maneuver',
|
13 |
+
}
|
14 |
+
name_to_id = {
|
15 |
+
'Flying': 0,
|
16 |
+
'Landing': 1,
|
17 |
+
'Other': 2,
|
18 |
+
'Straight Taxiing': 3,
|
19 |
+
'Takeoff': 4,
|
20 |
+
'Turning Maneuver': 5,
|
21 |
+
}
|
22 |
+
|
23 |
+
model_id = 'a0'
|
24 |
+
detector_path = 'weights/yolov8n.pt'
|
25 |
+
classifier_path = 'weights/classifier-8-epoch10.keras'
|
26 |
+
num_classes = len(id_to_name)
|
27 |
+
input_shape = (1, num_frames, resolution, resolution, 3)
|
28 |
+
|
29 |
+
detect_object_frame_steps = 5
|
30 |
+
classify_action_frame_steps = 15
|
31 |
+
classify_action_num_frames = 8
|
32 |
+
yield_frame_steps = 10
|
configurations.py
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
# Data
|
2 |
-
data_dir = 'storage/dataset'
|
3 |
-
training_ratio = 0.7
|
4 |
-
validation_ratio = 0.02
|
5 |
-
num_frames = 8
|
6 |
-
frame_step = 1
|
7 |
-
resolution = 224
|
8 |
-
frame_size = (resolution, resolution)
|
9 |
-
|
10 |
-
id_to_name = {
|
11 |
-
0: 'Flying',
|
12 |
-
1: 'Landing',
|
13 |
-
2: 'Other',
|
14 |
-
3: 'Straight Taxiing',
|
15 |
-
4: 'Takeoff',
|
16 |
-
5: 'Turning Maneuver',
|
17 |
-
}
|
18 |
-
|
19 |
-
name_to_id = {
|
20 |
-
'Flying': 0,
|
21 |
-
'Landing': 1,
|
22 |
-
'Other': 2,
|
23 |
-
'Straight Taxiing': 3,
|
24 |
-
'Takeoff': 4,
|
25 |
-
'Turning Maneuver': 5,
|
26 |
-
}
|
27 |
-
|
28 |
-
# Model
|
29 |
-
model_id = 'a0'
|
30 |
-
checkpoint_dir = f'storage/pretrained_weights/movinet_{model_id}_base'
|
31 |
-
num_classes = 6
|
32 |
-
|
33 |
-
# Inference
|
34 |
-
detect_object_frame_steps = 10
|
35 |
-
classify_action_frame_steps = 15
|
36 |
-
classify_action_num_frames = 8
|
37 |
-
yield_frame_steps = 10
|
38 |
-
|
39 |
-
# Train
|
40 |
-
train_id = 8
|
41 |
-
batch_size = 16
|
42 |
-
learning_rate = 0.001
|
43 |
-
epochs = 15
|
44 |
-
model_save_path = f'storage/output/classifier-{train_id}.keras'
|
45 |
-
log_dir = f'storage/logs/classifier-{train_id}.log'
|
46 |
-
|
47 |
-
# Train more
|
48 |
-
initial_epoch = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/data.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
from pathlib import Path
|
2 |
-
import random
|
3 |
-
from typing import Literal
|
4 |
-
import cv2
|
5 |
-
import numpy as np
|
6 |
-
import tensorflow as tf
|
7 |
-
|
8 |
-
from configurations import *
|
9 |
-
|
10 |
-
def format_frame(frame):
|
11 |
-
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
12 |
-
frame = tf.image.resize_with_pad(frame, *frame_size)
|
13 |
-
return frame
|
14 |
-
|
15 |
-
def pick_frames(video: str):
|
16 |
-
capture = cv2.VideoCapture(video)
|
17 |
-
if not capture.isOpened(): raise ValueError('Video file could not be opened.')
|
18 |
-
total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
|
19 |
-
need_frames = 1 + (num_frames - 1) * frame_step
|
20 |
-
if need_frames <= total_frames:
|
21 |
-
start = random.randint(0, total_frames - need_frames + 1)
|
22 |
-
capture.set(cv2.CAP_PROP_POS_FRAMES, start)
|
23 |
-
frames = []
|
24 |
-
for _ in range(num_frames):
|
25 |
-
for _ in range(frame_step):
|
26 |
-
ok, frame = capture.read()
|
27 |
-
if ok: frames.append(format_frame(frame))
|
28 |
-
else: frames.append(np.zeros(frame_size + (3,)))
|
29 |
-
capture.release()
|
30 |
-
frames = np.array(frames)
|
31 |
-
frames = frames[..., [2, 1, 0]]
|
32 |
-
return frames
|
33 |
-
|
34 |
-
def Data():
|
35 |
-
data_dir_path = Path(data_dir)
|
36 |
-
return {
|
37 |
-
'training': {
|
38 |
-
a.name: (
|
39 |
-
lambda ps: ps[
|
40 |
-
:int(len(ps) * training_ratio)])(
|
41 |
-
[x for x in a.iterdir()])
|
42 |
-
for a in data_dir_path.iterdir()},
|
43 |
-
'validation': {
|
44 |
-
a.name: (
|
45 |
-
lambda ps: ps[
|
46 |
-
int(len(ps) * training_ratio):
|
47 |
-
int(len(ps) * (training_ratio + validation_ratio))])(
|
48 |
-
[x for x in a.iterdir()])
|
49 |
-
for a in data_dir_path.iterdir()},
|
50 |
-
'testing': {
|
51 |
-
a.name: (
|
52 |
-
lambda ps: ps[
|
53 |
-
int(len(ps) * (training_ratio + validation_ratio)):])(
|
54 |
-
[x for x in a.iterdir()])
|
55 |
-
for a in data_dir_path.iterdir()},
|
56 |
-
}
|
57 |
-
|
58 |
-
def FrameGenerator(split: Literal['training', 'validation']):
|
59 |
-
data = Data()
|
60 |
-
def generator():
|
61 |
-
pairs = [
|
62 |
-
(str(video), class_name)
|
63 |
-
for class_name, videos in data[split].items()
|
64 |
-
for video in videos
|
65 |
-
]
|
66 |
-
random.shuffle(pairs)
|
67 |
-
for video, class_name in pairs:
|
68 |
-
frames = pick_frames(video)
|
69 |
-
label = name_to_id[class_name]
|
70 |
-
yield frames, label
|
71 |
-
return generator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/model.py
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
import tensorflow as tf
|
2 |
-
from tensorflow import keras
|
3 |
-
from official.projects.movinet.modeling import movinet
|
4 |
-
from official.projects.movinet.modeling import movinet_model
|
5 |
-
|
6 |
-
from configurations import *
|
7 |
-
|
8 |
-
def load_backbone():
|
9 |
-
return movinet.Movinet()
|
10 |
-
|
11 |
-
def build_classifier():
|
12 |
-
backbone = load_backbone()
|
13 |
-
model = movinet_model.MovinetClassifier(
|
14 |
-
backbone=backbone,
|
15 |
-
num_classes=600)
|
16 |
-
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
17 |
-
checkpoint = tf.train.Checkpoint(model=model)
|
18 |
-
status = checkpoint.restore(checkpoint_path)
|
19 |
-
status.assert_existing_objects_matched()
|
20 |
-
model.build([batch_size, num_frames, resolution, resolution, 3])
|
21 |
-
output = keras.layers.Dense(num_classes)
|
22 |
-
return keras.Sequential(layers=[model, output])
|
23 |
-
|
24 |
-
def load_classifier(classifier_path):
|
25 |
-
backbone = load_backbone()
|
26 |
-
model = movinet_model.MovinetClassifier(
|
27 |
-
backbone=backbone,
|
28 |
-
num_classes=600)
|
29 |
-
model.build([batch_size, num_frames, resolution, resolution, 3])
|
30 |
-
output = keras.layers.Dense(num_classes)
|
31 |
-
model = keras.Sequential(layers=[model, output])
|
32 |
-
model.load_weights(classifier_path)
|
33 |
-
return model
|
34 |
-
|
35 |
-
def compile_classifier(model):
|
36 |
-
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
37 |
-
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
|
38 |
-
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/inference.py → inference.py
RENAMED
@@ -2,12 +2,16 @@ from imgviz import instances2rgb
|
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
4 |
|
5 |
-
from
|
6 |
-
from core.data import format_frame
|
7 |
|
8 |
# detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
|
9 |
# actions: list of f'{action_name}: {confidence}'
|
10 |
|
|
|
|
|
|
|
|
|
|
|
11 |
def detect_object(detector, frame):
|
12 |
result = detector(frame, classes=4, verbose=False)[0]
|
13 |
classes = result.boxes.cls.numpy()
|
@@ -18,10 +22,9 @@ def detect_object(detector, frame):
|
|
18 |
)
|
19 |
return detections
|
20 |
|
21 |
-
def classify_action(classifier, frames):
|
22 |
actions = []
|
23 |
frames = np.array(frames)
|
24 |
-
# frames = frames[..., [2, 1, 0]]
|
25 |
frames = tf.expand_dims(frames, 0)
|
26 |
output = classifier(frames)
|
27 |
confidences = tf.nn.softmax(output).numpy()[0]
|
@@ -62,7 +65,7 @@ def draw_boxes(frame, detections, actions):
|
|
62 |
)
|
63 |
return frame
|
64 |
|
65 |
-
def FrameProcessor(detector, classifier):
|
66 |
current_frame = 0
|
67 |
frames = []
|
68 |
actions = []
|
@@ -70,12 +73,12 @@ def FrameProcessor(detector, classifier):
|
|
70 |
def process_frame(frame):
|
71 |
nonlocal current_frame, frames, actions, detections
|
72 |
current_frame += 1
|
73 |
-
if current_frame % classify_action_frame_steps == 0:
|
74 |
frames.append(format_frame(frame))
|
75 |
-
if current_frame % detect_object_frame_steps == 0:
|
76 |
print(f'Detect object: Frame {current_frame}')
|
77 |
detections = detect_object(detector, frame)
|
78 |
-
if len(frames) == classify_action_num_frames:
|
79 |
print(f'Classify action: Until frame {current_frame}')
|
80 |
actions = classify_action(classifier, frames)
|
81 |
frames = []
|
|
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
4 |
|
5 |
+
from configuration import Config
|
|
|
6 |
|
7 |
# detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
|
8 |
# actions: list of f'{action_name}: {confidence}'
|
9 |
|
10 |
+
def format_frame(frame, config: Config):
|
11 |
+
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
12 |
+
frame = tf.image.resize_with_pad(frame, *config.frame_size)
|
13 |
+
return frame
|
14 |
+
|
15 |
def detect_object(detector, frame):
|
16 |
result = detector(frame, classes=4, verbose=False)[0]
|
17 |
classes = result.boxes.cls.numpy()
|
|
|
22 |
)
|
23 |
return detections
|
24 |
|
25 |
+
def classify_action(classifier, frames, id_to_name):
|
26 |
actions = []
|
27 |
frames = np.array(frames)
|
|
|
28 |
frames = tf.expand_dims(frames, 0)
|
29 |
output = classifier(frames)
|
30 |
confidences = tf.nn.softmax(output).numpy()[0]
|
|
|
65 |
)
|
66 |
return frame
|
67 |
|
68 |
+
def FrameProcessor(detector, classifier, config: Config):
|
69 |
current_frame = 0
|
70 |
frames = []
|
71 |
actions = []
|
|
|
73 |
def process_frame(frame):
|
74 |
nonlocal current_frame, frames, actions, detections
|
75 |
current_frame += 1
|
76 |
+
if current_frame % config.classify_action_frame_steps == 0:
|
77 |
frames.append(format_frame(frame))
|
78 |
+
if current_frame % config.detect_object_frame_steps == 0:
|
79 |
print(f'Detect object: Frame {current_frame}')
|
80 |
detections = detect_object(detector, frame)
|
81 |
+
if len(frames) == config.classify_action_num_frames:
|
82 |
print(f'Classify action: Until frame {current_frame}')
|
83 |
actions = classify_action(classifier, frames)
|
84 |
frames = []
|
model.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow import keras
|
3 |
+
from ultralytics import YOLO
|
4 |
+
from official.projects.movinet.modeling.movinet import Movinet
|
5 |
+
from official.projects.movinet.modeling.movinet_model import MovinetClassifier
|
6 |
+
|
7 |
+
from configuration import Config
|
8 |
+
|
9 |
+
class AttentionDenseClassifierHead(keras.layers.Layer):
|
10 |
+
def __init__(self, attention_heads, dense_units, dropout_rate=0.2, **kwargs):
|
11 |
+
super().__init__(**kwargs)
|
12 |
+
self.attention = keras.layers.MultiHeadAttention(num_heads=attention_heads, key_dim=1)
|
13 |
+
self.normalization = keras.layers.LayerNormalization(epsilon=1e-6)
|
14 |
+
self.dropout = keras.layers.Dropout(dropout_rate)
|
15 |
+
self.dense = keras.layers.Dense(dense_units, activation='softmax')
|
16 |
+
|
17 |
+
def call(self, x, training):
|
18 |
+
y = tf.expand_dims(x, -1)
|
19 |
+
y = self.attention(query=y, key=y, value=y)
|
20 |
+
y = tf.squeeze(y, axis=-1)
|
21 |
+
y = self.dropout(y, training=training)
|
22 |
+
y = self.normalization(x + y*0.01)
|
23 |
+
y = self.dense(y)
|
24 |
+
return y
|
25 |
+
|
26 |
+
def build_movinet(output_size, config: Config):
|
27 |
+
model = MovinetClassifier(
|
28 |
+
backbone=Movinet(model_id=config.model_id),
|
29 |
+
num_classes=output_size)
|
30 |
+
model.build(config.input_shape)
|
31 |
+
return model
|
32 |
+
|
33 |
+
def build_classifier_head(input_size, config: Config):
|
34 |
+
inputs = keras.Input(shape=(input_size,))
|
35 |
+
classifier = AttentionDenseClassifierHead(2, config.num_classes)(inputs)
|
36 |
+
model = keras.Model(inputs=inputs, outputs=classifier)
|
37 |
+
return model
|
38 |
+
|
39 |
+
def build_model(movinet, classifier_head):
|
40 |
+
return keras.models.Sequential([movinet, classifier_head])
|
41 |
+
|
42 |
+
def load_classifier(config: Config):
|
43 |
+
movinet = build_movinet(600, config)
|
44 |
+
classifier_head = build_classifier_head(600, config)
|
45 |
+
model = build_model(movinet, classifier_head)
|
46 |
+
model.load_weights(config.classifier_path)
|
47 |
+
return model
|
48 |
+
|
49 |
+
def load_detector(config: Config):
|
50 |
+
return YOLO(config.detector_path)
|
51 |
+
|
52 |
+
def compile_classifier(model, config: Config):
|
53 |
+
optimizer = keras.optimizers.Adam(learning_rate=config.learning_rate)
|
54 |
+
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
tensorflow
|
2 |
numpy
|
3 |
opencv-python
|
4 |
-
|
|
|
5 |
ultralytics
|
6 |
imgviz
|
7 |
moviepy
|
|
|
|
|
1 |
numpy
|
2 |
opencv-python
|
3 |
+
tensorflow==2.15.0
|
4 |
+
tf-models-official==2.15.0
|
5 |
ultralytics
|
6 |
imgviz
|
7 |
moviepy
|
weights/classifier-7.keras
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:13a9436ec0971fe72b53f03d9dd57b89a7c48a4cb82380e14b298c3e2d712f50
|
3 |
-
size 25261904
|
|
|
|
|
|
|
|