Spaces:
Sleeping
Sleeping
Refactor code structure and import configurations
Browse files- app.py +20 -17
- configurations.py +47 -0
- core/data.py +25 -31
- core/inference.py +6 -9
- core/model.py +17 -22
- tmpyt25_04fTEMP_MPY_wvf_snd.mp4 +3 -0
- weights/classifier-8-epoch10.keras +3 -0
app.py
CHANGED
@@ -1,24 +1,18 @@
|
|
1 |
import tempfile
|
|
|
|
|
2 |
import gradio as gr
|
3 |
import tensorflow as tf
|
4 |
from moviepy.editor import VideoFileClip
|
|
|
5 |
from ultralytics import YOLO
|
6 |
|
7 |
from core.model import load_classifier
|
8 |
from core.inference import FrameProcessor
|
9 |
print("Tensorflow version " + tf.__version__)
|
10 |
|
11 |
-
id_to_name = {
|
12 |
-
0: 'Flying',
|
13 |
-
1: 'Landing',
|
14 |
-
2: 'Other',
|
15 |
-
3: 'Straight Taxiing',
|
16 |
-
4: 'Takeoff',
|
17 |
-
5: 'Turning Maneuver',
|
18 |
-
}
|
19 |
-
|
20 |
print('Load classifier.')
|
21 |
-
classifier_path = 'weights/classifier-
|
22 |
classifier = load_classifier(classifier_path)
|
23 |
|
24 |
print('Load detector.')
|
@@ -30,23 +24,32 @@ def fn(video: gr.Video):
|
|
30 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
|
31 |
output = f.name
|
32 |
clip = VideoFileClip(video)
|
33 |
-
process_frame = FrameProcessor(detector, classifier
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
|
36 |
-
|
37 |
|
38 |
inputs = gr.Video(sources=['upload'], label='Input Video')
|
39 |
-
outputs =
|
|
|
|
|
40 |
|
41 |
examples = [
|
42 |
['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
|
43 |
-
['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']
|
44 |
-
]
|
45 |
|
46 |
iface = gr.Interface(
|
|
|
|
|
|
|
47 |
fn=fn,
|
48 |
inputs=inputs,
|
49 |
outputs=outputs,
|
50 |
examples=examples,
|
51 |
-
)
|
52 |
iface.launch()
|
|
|
1 |
import tempfile
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
import gradio as gr
|
5 |
import tensorflow as tf
|
6 |
from moviepy.editor import VideoFileClip
|
7 |
+
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
|
8 |
from ultralytics import YOLO
|
9 |
|
10 |
from core.model import load_classifier
|
11 |
from core.inference import FrameProcessor
|
12 |
print("Tensorflow version " + tf.__version__)
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
print('Load classifier.')
|
15 |
+
classifier_path = 'weights/classifier-8-epoch10.keras'
|
16 |
classifier = load_classifier(classifier_path)
|
17 |
|
18 |
print('Load detector.')
|
|
|
24 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
|
25 |
output = f.name
|
26 |
clip = VideoFileClip(video)
|
27 |
+
process_frame = FrameProcessor(detector, classifier)
|
28 |
+
processed_frames = []
|
29 |
+
for frame in clip.iter_frames():
|
30 |
+
processed_frames.append(process_frame(frame))
|
31 |
+
yield processed_frames[-1], None
|
32 |
+
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
|
33 |
+
processed_clip.audio = clip.audio
|
34 |
clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
|
35 |
+
yield processed_frames[-1], output
|
36 |
|
37 |
inputs = gr.Video(sources=['upload'], label='Input Video')
|
38 |
+
outputs = [
|
39 |
+
gr.Image(interactive=False, label='Last Frame Processed'),
|
40 |
+
gr.Video(interactive=False, label='Aeroplane Position and Action Marked')]
|
41 |
|
42 |
examples = [
|
43 |
['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
|
44 |
+
['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]
|
|
|
45 |
|
46 |
iface = gr.Interface(
|
47 |
+
title='Aeroplane Position and Action Detection',
|
48 |
+
description='Detect aeroplane position and action in a video.',
|
49 |
+
theme='soft',
|
50 |
fn=fn,
|
51 |
inputs=inputs,
|
52 |
outputs=outputs,
|
53 |
examples=examples,
|
54 |
+
cache_examples=False)
|
55 |
iface.launch()
|
configurations.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Data
|
2 |
+
data_dir = 'storage/dataset'
|
3 |
+
training_ratio = 0.7
|
4 |
+
validation_ratio = 0.02
|
5 |
+
num_frames = 8
|
6 |
+
frame_step = 1
|
7 |
+
resolution = 224
|
8 |
+
frame_size = (resolution, resolution)
|
9 |
+
|
10 |
+
id_to_name = {
|
11 |
+
0: 'Flying',
|
12 |
+
1: 'Landing',
|
13 |
+
2: 'Other',
|
14 |
+
3: 'Straight Taxiing',
|
15 |
+
4: 'Takeoff',
|
16 |
+
5: 'Turning Maneuver',
|
17 |
+
}
|
18 |
+
|
19 |
+
name_to_id = {
|
20 |
+
'Flying': 0,
|
21 |
+
'Landing': 1,
|
22 |
+
'Other': 2,
|
23 |
+
'Straight Taxiing': 3,
|
24 |
+
'Takeoff': 4,
|
25 |
+
'Turning Maneuver': 5,
|
26 |
+
}
|
27 |
+
|
28 |
+
# Model
|
29 |
+
model_id = 'a0'
|
30 |
+
checkpoint_dir = f'storage/pretrained_weights/movinet_{model_id}_base'
|
31 |
+
num_classes = 6
|
32 |
+
|
33 |
+
# Inference
|
34 |
+
detect_object_frame_steps = 5
|
35 |
+
classify_action_frame_steps = 15
|
36 |
+
classify_action_num_frames = 8
|
37 |
+
|
38 |
+
# Train
|
39 |
+
train_id = 8
|
40 |
+
batch_size = 16
|
41 |
+
learning_rate = 0.001
|
42 |
+
epochs = 15
|
43 |
+
model_save_path = f'storage/output/classifier-{train_id}.keras'
|
44 |
+
log_dir = f'storage/logs/classifier-{train_id}.log'
|
45 |
+
|
46 |
+
# Train more
|
47 |
+
initial_epoch = 0
|
core/data.py
CHANGED
@@ -5,11 +5,7 @@ import cv2
|
|
5 |
import numpy as np
|
6 |
import tensorflow as tf
|
7 |
|
8 |
-
|
9 |
-
validation_ratio = 0.02
|
10 |
-
num_frames = 8
|
11 |
-
frame_step = 15
|
12 |
-
frame_size = (224, 224)
|
13 |
|
14 |
def format_frame(frame):
|
15 |
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
@@ -35,37 +31,35 @@ def pick_frames(video: str):
|
|
35 |
frames = frames[..., [2, 1, 0]]
|
36 |
return frames
|
37 |
|
38 |
-
def Data(
|
39 |
-
|
40 |
return {
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
}
|
53 |
|
54 |
-
def
|
55 |
-
|
56 |
-
id_to_name = sorted([x.name for x in data_dir.iterdir()])
|
57 |
-
name_to_id = {
|
58 |
-
name: i
|
59 |
-
for i, name in enumerate(id_to_name)
|
60 |
-
}
|
61 |
-
return (id_to_name, name_to_id)
|
62 |
-
|
63 |
-
def FrameGenerator(data_dir: str, split: Literal['training', 'validation']):
|
64 |
-
_, name_to_id = ClassMapping(data_dir)
|
65 |
-
data = Data(data_dir)
|
66 |
def generator():
|
67 |
pairs = [
|
68 |
-
(video, class_name)
|
69 |
for class_name, videos in data[split].items()
|
70 |
for video in videos
|
71 |
]
|
|
|
5 |
import numpy as np
|
6 |
import tensorflow as tf
|
7 |
|
8 |
+
from configurations import *
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def format_frame(frame):
|
11 |
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
|
|
31 |
frames = frames[..., [2, 1, 0]]
|
32 |
return frames
|
33 |
|
34 |
+
def Data():
|
35 |
+
data_dir_path = Path(data_dir)
|
36 |
return {
|
37 |
+
'training': {
|
38 |
+
a.name: (
|
39 |
+
lambda ps: ps[
|
40 |
+
:int(len(ps) * training_ratio)])(
|
41 |
+
[x for x in a.iterdir()])
|
42 |
+
for a in data_dir_path.iterdir()},
|
43 |
+
'validation': {
|
44 |
+
a.name: (
|
45 |
+
lambda ps: ps[
|
46 |
+
int(len(ps) * training_ratio):
|
47 |
+
int(len(ps) * (training_ratio + validation_ratio))])(
|
48 |
+
[x for x in a.iterdir()])
|
49 |
+
for a in data_dir_path.iterdir()},
|
50 |
+
'testing': {
|
51 |
+
a.name: (
|
52 |
+
lambda ps: ps[
|
53 |
+
int(len(ps) * (training_ratio + validation_ratio)):])(
|
54 |
+
[x for x in a.iterdir()])
|
55 |
+
for a in data_dir_path.iterdir()},
|
56 |
}
|
57 |
|
58 |
+
def FrameGenerator(split: Literal['training', 'validation']):
|
59 |
+
data = Data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
def generator():
|
61 |
pairs = [
|
62 |
+
(str(video), class_name)
|
63 |
for class_name, videos in data[split].items()
|
64 |
for video in videos
|
65 |
]
|
core/inference.py
CHANGED
@@ -2,15 +2,12 @@ from imgviz import instances2rgb
|
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
4 |
|
|
|
5 |
from core.data import format_frame
|
6 |
|
7 |
# detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
|
8 |
# actions: list of f'{action_name}: {confidence}'
|
9 |
|
10 |
-
detect_object_frame_steps = 5
|
11 |
-
classify_action_frame_steps = 15
|
12 |
-
classify_action_num_frames = 8
|
13 |
-
|
14 |
def detect_object(detector, frame):
|
15 |
result = detector(frame, classes=4, verbose=False)[0]
|
16 |
classes = result.boxes.cls.numpy()
|
@@ -21,12 +18,12 @@ def detect_object(detector, frame):
|
|
21 |
)
|
22 |
return detections
|
23 |
|
24 |
-
def classify_action(classifier, frames
|
25 |
actions = []
|
26 |
frames = np.array(frames)
|
27 |
-
frames = frames[..., [2, 1, 0]]
|
28 |
frames = tf.expand_dims(frames, 0)
|
29 |
-
output = classifier(frames
|
30 |
confidences = tf.nn.softmax(output).numpy()[0]
|
31 |
for (class_id, confidence) in enumerate(confidences):
|
32 |
other_class_id = 2
|
@@ -65,7 +62,7 @@ def draw_boxes(frame, detections, actions):
|
|
65 |
)
|
66 |
return frame
|
67 |
|
68 |
-
def FrameProcessor(detector, classifier
|
69 |
current_frame = 0
|
70 |
frames = []
|
71 |
actions = []
|
@@ -80,7 +77,7 @@ def FrameProcessor(detector, classifier, id_to_name):
|
|
80 |
detections = detect_object(detector, frame)
|
81 |
if len(frames) == classify_action_num_frames:
|
82 |
print(f'Classify action: Until frame {current_frame}')
|
83 |
-
actions = classify_action(classifier, frames
|
84 |
frames = []
|
85 |
frame = draw_boxes(frame, detections, actions)
|
86 |
return frame
|
|
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
4 |
|
5 |
+
from configurations import *
|
6 |
from core.data import format_frame
|
7 |
|
8 |
# detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
|
9 |
# actions: list of f'{action_name}: {confidence}'
|
10 |
|
|
|
|
|
|
|
|
|
11 |
def detect_object(detector, frame):
|
12 |
result = detector(frame, classes=4, verbose=False)[0]
|
13 |
classes = result.boxes.cls.numpy()
|
|
|
18 |
)
|
19 |
return detections
|
20 |
|
21 |
+
def classify_action(classifier, frames):
|
22 |
actions = []
|
23 |
frames = np.array(frames)
|
24 |
+
# frames = frames[..., [2, 1, 0]]
|
25 |
frames = tf.expand_dims(frames, 0)
|
26 |
+
output = classifier(frames)
|
27 |
confidences = tf.nn.softmax(output).numpy()[0]
|
28 |
for (class_id, confidence) in enumerate(confidences):
|
29 |
other_class_id = 2
|
|
|
62 |
)
|
63 |
return frame
|
64 |
|
65 |
+
def FrameProcessor(detector, classifier):
|
66 |
current_frame = 0
|
67 |
frames = []
|
68 |
actions = []
|
|
|
77 |
detections = detect_object(detector, frame)
|
78 |
if len(frames) == classify_action_num_frames:
|
79 |
print(f'Classify action: Until frame {current_frame}')
|
80 |
+
actions = classify_action(classifier, frames)
|
81 |
frames = []
|
82 |
frame = draw_boxes(frame, detections, actions)
|
83 |
return frame
|
core/model.py
CHANGED
@@ -3,42 +3,37 @@ from tensorflow import keras
|
|
3 |
from official.projects.movinet.modeling import movinet
|
4 |
from official.projects.movinet.modeling import movinet_model
|
5 |
|
6 |
-
|
7 |
-
num_classes = 6
|
8 |
-
num_frames = 8
|
9 |
-
resolution = 224
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
backbone_trainable = True
|
14 |
|
15 |
-
def
|
16 |
-
backbone =
|
17 |
-
|
18 |
-
|
|
|
19 |
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
20 |
checkpoint = tf.train.Checkpoint(model=model)
|
21 |
status = checkpoint.restore(checkpoint_path)
|
22 |
status.assert_existing_objects_matched()
|
23 |
-
model = movinet_model.MovinetClassifier(
|
24 |
-
backbone=backbone,
|
25 |
-
num_classes=num_classes,
|
26 |
-
)
|
27 |
model.build([batch_size, num_frames, resolution, resolution, 3])
|
28 |
-
|
|
|
29 |
|
30 |
-
def load_classifier(
|
31 |
-
backbone =
|
32 |
model = movinet_model.MovinetClassifier(
|
33 |
backbone=backbone,
|
34 |
num_classes=num_classes,
|
35 |
-
|
36 |
-
model.build([
|
37 |
-
|
|
|
|
|
38 |
return model
|
39 |
|
40 |
def compile_classifier(model):
|
41 |
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
42 |
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
|
43 |
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
|
44 |
-
return model
|
|
|
3 |
from official.projects.movinet.modeling import movinet
|
4 |
from official.projects.movinet.modeling import movinet_model
|
5 |
|
6 |
+
from configurations import *
|
|
|
|
|
|
|
7 |
|
8 |
+
def load_backbone():
|
9 |
+
return movinet.Movinet()
|
|
|
10 |
|
11 |
+
def build_classifier():
|
12 |
+
backbone = load_backbone()
|
13 |
+
model = movinet_model.MovinetClassifier(
|
14 |
+
backbone=backbone,
|
15 |
+
num_classes=600)
|
16 |
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
17 |
checkpoint = tf.train.Checkpoint(model=model)
|
18 |
status = checkpoint.restore(checkpoint_path)
|
19 |
status.assert_existing_objects_matched()
|
|
|
|
|
|
|
|
|
20 |
model.build([batch_size, num_frames, resolution, resolution, 3])
|
21 |
+
output = keras.layers.Dense(num_classes)
|
22 |
+
return keras.Sequential(layers=[model, output])
|
23 |
|
24 |
+
def load_classifier():
|
25 |
+
backbone = load_backbone()
|
26 |
model = movinet_model.MovinetClassifier(
|
27 |
backbone=backbone,
|
28 |
num_classes=num_classes,
|
29 |
+
output_states=True)
|
30 |
+
model.build([batch_size, num_frames, resolution, resolution, 3])
|
31 |
+
output = keras.layers.Dense(num_classes)
|
32 |
+
model = keras.Sequential(layers=[model, output])
|
33 |
+
model.load_weights(model_save_path)
|
34 |
return model
|
35 |
|
36 |
def compile_classifier(model):
|
37 |
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
38 |
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
|
39 |
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
|
|
tmpyt25_04fTEMP_MPY_wvf_snd.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe6df5c78cebb5a214ed7c83d8826c441d088ba75198a65429dbcb3619959f53
|
3 |
+
size 162883
|
weights/classifier-8-epoch10.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c11a51f095a4902755f375740980537257764536bd089d2ae7cbe1cbb8343184
|
3 |
+
size 38477915
|