Spaces:
Sleeping
Sleeping
update
Browse files- .gitattributes +1 -0
- .gitignore +4 -1
- app.py +35 -3
- core/data.py +77 -0
- core/inference.py +91 -0
- core/model.py +44 -0
- movinet/data.py +0 -79
- playgrounds/load_video.py +0 -112
- playgrounds/movinet.py +0 -80
- playgrounds/verify_metal.py +0 -14
- playgrounds/yolo.py +0 -40
- requirements.txt +5 -1
- weights/classifier-7.keras +3 -0
- weights/yolov8n.pt +3 -0
.gitattributes
CHANGED
@@ -7,6 +7,7 @@
|
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
|
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
|
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.keras filter=lfs diff=lfs merge=lfs -text
|
11 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
12 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
13 |
*.model filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -4,7 +4,10 @@
|
|
4 |
|
5 |
# data
|
6 |
assets
|
7 |
-
|
8 |
|
9 |
# python
|
10 |
__pycache__
|
|
|
|
|
|
|
|
4 |
|
5 |
# data
|
6 |
assets
|
7 |
+
output
|
8 |
|
9 |
# python
|
10 |
__pycache__
|
11 |
+
|
12 |
+
# gradio
|
13 |
+
flagged
|
app.py
CHANGED
@@ -1,7 +1,39 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import tensorflow as tf
|
3 |
+
from moviepy.editor import VideoFileClip
|
4 |
+
from ultralytics import YOLO
|
5 |
|
6 |
+
from core.data import ClassMapping
|
7 |
+
from core.model import load_classifier
|
8 |
+
from core.inference import FrameProcessor
|
9 |
|
10 |
+
print("Tensorflow version " + tf.__version__)
|
11 |
+
|
12 |
+
print('Load classifier.')
|
13 |
+
classifier_path = 'weights/classifier-7.keras'
|
14 |
+
classifier = load_classifier(classifier_path)
|
15 |
+
|
16 |
+
print('Load detector.')
|
17 |
+
detector_path = 'weights/yolov8n.pt'
|
18 |
+
detector = YOLO(detector_path)
|
19 |
+
|
20 |
+
def fn(video: gr.Video):
|
21 |
+
print('Process video.')
|
22 |
+
output = f'Marked-{str(video)}'
|
23 |
+
clip = VideoFileClip(video)
|
24 |
+
data_dir = 'storage/dataset'
|
25 |
+
id_to_name, _ = ClassMapping(data_dir)
|
26 |
+
process_frame = FrameProcessor(detector, classifier, id_to_name)
|
27 |
+
clip = clip.fl_image(process_frame)
|
28 |
+
clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
|
29 |
+
return video
|
30 |
+
|
31 |
+
inputs = gr.Video(sources=['upload'], label='Input Video')
|
32 |
+
outputs = gr.Video(interactive=False, label='Aeroplane Position and Action Marked')
|
33 |
+
|
34 |
+
iface = gr.Interface(
|
35 |
+
fn=fn,
|
36 |
+
inputs=inputs,
|
37 |
+
outputs=outputs,
|
38 |
+
)
|
39 |
iface.launch()
|
core/data.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import random
|
3 |
+
from typing import Literal
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import tensorflow as tf
|
7 |
+
|
8 |
+
training_ratio = 0.7
|
9 |
+
validation_ratio = 0.02
|
10 |
+
num_frames = 8
|
11 |
+
frame_step = 15
|
12 |
+
frame_size = (224, 224)
|
13 |
+
|
14 |
+
def format_frame(frame):
|
15 |
+
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
16 |
+
frame = tf.image.resize_with_pad(frame, *frame_size)
|
17 |
+
return frame
|
18 |
+
|
19 |
+
def pick_frames(video: str):
|
20 |
+
capture = cv2.VideoCapture(video)
|
21 |
+
if not capture.isOpened(): raise ValueError('Video file could not be opened.')
|
22 |
+
total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
|
23 |
+
need_frames = 1 + (num_frames - 1) * frame_step
|
24 |
+
if need_frames <= total_frames:
|
25 |
+
start = random.randint(0, total_frames - need_frames + 1)
|
26 |
+
capture.set(cv2.CAP_PROP_POS_FRAMES, start)
|
27 |
+
frames = []
|
28 |
+
for _ in range(num_frames):
|
29 |
+
for _ in range(frame_step):
|
30 |
+
ok, frame = capture.read()
|
31 |
+
if ok: frames.append(format_frame(frame))
|
32 |
+
else: frames.append(np.zeros(frame_size + (3,)))
|
33 |
+
capture.release()
|
34 |
+
frames = np.array(frames)
|
35 |
+
frames = frames[..., [2, 1, 0]]
|
36 |
+
return frames
|
37 |
+
|
38 |
+
def Data(data_dir: str):
|
39 |
+
data_dir = Path(data_dir)
|
40 |
+
return {
|
41 |
+
'training':{
|
42 |
+
a.name: (lambda ps: ps[:int(len(ps) * training_ratio)])([x for x in a.iterdir()])
|
43 |
+
for a in data_dir.iterdir()
|
44 |
+
},
|
45 |
+
'validation': {
|
46 |
+
a.name: (lambda ps: ps[
|
47 |
+
int(len(ps) * training_ratio) :
|
48 |
+
int(len(ps) * (training_ratio + validation_ratio))
|
49 |
+
])([x for x in a.iterdir()])
|
50 |
+
for a in data_dir.iterdir()
|
51 |
+
},
|
52 |
+
}
|
53 |
+
|
54 |
+
def ClassMapping(data_dir: str):
|
55 |
+
data_dir = Path(data_dir)
|
56 |
+
id_to_name = sorted([x.name for x in data_dir.iterdir()])
|
57 |
+
name_to_id = {
|
58 |
+
name: i
|
59 |
+
for i, name in enumerate(id_to_name)
|
60 |
+
}
|
61 |
+
return (id_to_name, name_to_id)
|
62 |
+
|
63 |
+
def FrameGenerator(data_dir: str, split: Literal['training', 'validation']):
|
64 |
+
_, name_to_id = ClassMapping(data_dir)
|
65 |
+
data = Data(data_dir)
|
66 |
+
def generator():
|
67 |
+
pairs = [
|
68 |
+
(video, class_name)
|
69 |
+
for class_name, videos in data[split].items()
|
70 |
+
for video in videos
|
71 |
+
]
|
72 |
+
random.shuffle(pairs)
|
73 |
+
for video, class_name in pairs:
|
74 |
+
frames = pick_frames(video)
|
75 |
+
label = name_to_id[class_name]
|
76 |
+
yield frames, label
|
77 |
+
return generator
|
core/inference.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from imgviz import instances2rgb
|
2 |
+
import tensorflow as tf
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from core.data import format_frame
|
6 |
+
|
7 |
+
# detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
|
8 |
+
# actions: list of f'{action_name}: {confidence}'
|
9 |
+
|
10 |
+
detect_object_frame_steps = 5
|
11 |
+
classify_action_frame_steps = 15
|
12 |
+
classify_action_num_frames = 8
|
13 |
+
|
14 |
+
def detect_object(detector, frame):
|
15 |
+
result = detector(frame, classes=4, verbose=False)[0]
|
16 |
+
classes = result.boxes.cls.numpy()
|
17 |
+
boxes = result.boxes.xyxy.numpy()
|
18 |
+
predictions = [
|
19 |
+
(result.names[classes[i]].capitalize(), boxes[i])
|
20 |
+
for i in range(len(classes))
|
21 |
+
]
|
22 |
+
detections = (
|
23 |
+
[result.names[i].capitalize() for i in classes],
|
24 |
+
boxes,
|
25 |
+
)
|
26 |
+
return detections
|
27 |
+
|
28 |
+
def classify_action(classifier, frames, id_to_name):
|
29 |
+
actions = []
|
30 |
+
frames = np.array(frames)
|
31 |
+
frames = frames[..., [2, 1, 0]]
|
32 |
+
frames = tf.expand_dims(frames, 0)
|
33 |
+
output = classifier(frames, training=False)
|
34 |
+
confidences = tf.nn.softmax(output).numpy()[0]
|
35 |
+
for (class_id, confidence) in enumerate(confidences):
|
36 |
+
other_class_id = 2
|
37 |
+
if confidence > 0.3 and class_id != other_class_id:
|
38 |
+
actions.append(f'{id_to_name[class_id]}: {np.round(confidence, 2)}')
|
39 |
+
return actions
|
40 |
+
|
41 |
+
def draw_boxes(frame, detections, actions):
|
42 |
+
(classes, boxes) = detections
|
43 |
+
max_area = 0
|
44 |
+
max_area_id = 0
|
45 |
+
for i, box in enumerate(boxes):
|
46 |
+
area = (box[3] - box[1]) * (box[2] - box[0])
|
47 |
+
if area > max_area:
|
48 |
+
max_area = area
|
49 |
+
max_area_id = i
|
50 |
+
labels = [0 for _ in classes]
|
51 |
+
colormap = [(0x39, 0xc5, 0xbb)]
|
52 |
+
line_width = 2
|
53 |
+
captions = [
|
54 |
+
f'{class_name}\n' + '\n'.join(actions if i == max_area_id else [])
|
55 |
+
for (i, class_name) in enumerate(classes)
|
56 |
+
]
|
57 |
+
bboxes = [
|
58 |
+
[box[1], box[0], box[3], box[2]]
|
59 |
+
for box in boxes
|
60 |
+
]
|
61 |
+
frame = instances2rgb(
|
62 |
+
frame,
|
63 |
+
labels=labels,
|
64 |
+
captions=captions,
|
65 |
+
bboxes=bboxes,
|
66 |
+
colormap=colormap,
|
67 |
+
font_size=20,
|
68 |
+
line_width=line_width,
|
69 |
+
)
|
70 |
+
return frame
|
71 |
+
|
72 |
+
def FrameProcessor(detector, classifier, id_to_name):
|
73 |
+
current_frame = 0
|
74 |
+
frames = []
|
75 |
+
actions = []
|
76 |
+
detections = ([], [])
|
77 |
+
def process_frame(frame):
|
78 |
+
nonlocal current_frame, frames, actions, detections
|
79 |
+
current_frame += 1
|
80 |
+
if current_frame % classify_action_frame_steps == 0:
|
81 |
+
frames.append(format_frame(frame))
|
82 |
+
if current_frame % detect_object_frame_steps == 0:
|
83 |
+
print(f'Detect object: Frame {current_frame}')
|
84 |
+
detections = detect_object(detector, frame)
|
85 |
+
if len(frames) == classify_action_num_frames:
|
86 |
+
print(f'Classify action: Until frame {current_frame}')
|
87 |
+
actions = classify_action(classifier, frames, id_to_name)
|
88 |
+
frames = []
|
89 |
+
frame = draw_boxes(frame, detections, actions)
|
90 |
+
return frame
|
91 |
+
return process_frame
|
core/model.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow import keras
|
3 |
+
from official.projects.movinet.modeling import movinet
|
4 |
+
from official.projects.movinet.modeling import movinet_model
|
5 |
+
|
6 |
+
model_id = 'a1'
|
7 |
+
num_classes = 6
|
8 |
+
num_frames = 8
|
9 |
+
resolution = 224
|
10 |
+
|
11 |
+
batch_size = 32
|
12 |
+
learning_rate = 0.001
|
13 |
+
backbone_trainable = True
|
14 |
+
|
15 |
+
def build_classifier_with_pretrained_weights(checkpoint_dir: str):
|
16 |
+
backbone = movinet.Movinet(model_id=model_id)
|
17 |
+
backbone.trainable = backbone_trainable
|
18 |
+
model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=600)
|
19 |
+
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
20 |
+
checkpoint = tf.train.Checkpoint(model=model)
|
21 |
+
status = checkpoint.restore(checkpoint_path)
|
22 |
+
status.assert_existing_objects_matched()
|
23 |
+
model = movinet_model.MovinetClassifier(
|
24 |
+
backbone=backbone,
|
25 |
+
num_classes=num_classes,
|
26 |
+
)
|
27 |
+
model.build([batch_size, num_frames, resolution, resolution, 3])
|
28 |
+
return model
|
29 |
+
|
30 |
+
def load_classifier(weights_path: str):
|
31 |
+
backbone = movinet.Movinet(model_id=model_id)
|
32 |
+
model = movinet_model.MovinetClassifier(
|
33 |
+
backbone=backbone,
|
34 |
+
num_classes=num_classes,
|
35 |
+
)
|
36 |
+
model.build([1, num_frames, resolution, resolution, 3])
|
37 |
+
model.load_weights(weights_path)
|
38 |
+
return model
|
39 |
+
|
40 |
+
def compile_classifier(model):
|
41 |
+
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
42 |
+
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
|
43 |
+
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
|
44 |
+
return model
|
movinet/data.py
DELETED
@@ -1,79 +0,0 @@
|
|
1 |
-
from pathlib import Path
|
2 |
-
import random
|
3 |
-
from typing import Literal
|
4 |
-
import cv2
|
5 |
-
import numpy as np
|
6 |
-
import tensorflow as tf
|
7 |
-
|
8 |
-
TRAINING_RATIO = 0.1
|
9 |
-
VALIDATION_RATIO = 0.01
|
10 |
-
|
11 |
-
def format_frames(frame, output_size):
|
12 |
-
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
13 |
-
frame = tf.image.resize_with_pad(frame, *output_size)
|
14 |
-
return frame
|
15 |
-
|
16 |
-
def frames_from_video_file(video_path: str, n_frames: int, output_size=(256, 256), frame_step=15):
|
17 |
-
capture = cv2.VideoCapture(video_path)
|
18 |
-
if not capture.isOpened(): raise ValueError('Video file could not be opened.')
|
19 |
-
total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
|
20 |
-
need_frames = 1 + (n_frames - 1) * frame_step
|
21 |
-
|
22 |
-
if need_frames <= total_frames:
|
23 |
-
start = random.randint(0, total_frames - need_frames + 1)
|
24 |
-
capture.set(cv2.CAP_PROP_POS_FRAMES, start)
|
25 |
-
|
26 |
-
frames = []
|
27 |
-
for _ in range(n_frames - 1):
|
28 |
-
for _ in range(frame_step):
|
29 |
-
ok, frame = capture.read()
|
30 |
-
if ok:
|
31 |
-
frames.append(format_frames(frame, output_size))
|
32 |
-
else:
|
33 |
-
frames.append(np.zeros((output_size[0], output_size[1], 3)))
|
34 |
-
capture.release()
|
35 |
-
|
36 |
-
frames = np.array(frames)
|
37 |
-
frames = frames[..., [2, 1, 0]]
|
38 |
-
return frames
|
39 |
-
|
40 |
-
def Data(data_dir: Path):
|
41 |
-
return {
|
42 |
-
'training':{
|
43 |
-
a.name: (lambda ps: ps[:int(len(ps) * TRAINING_RATIO)])([x for x in a.iterdir()])
|
44 |
-
for a in data_dir.iterdir()
|
45 |
-
},
|
46 |
-
'validation': {
|
47 |
-
a.name: (lambda ps: ps[
|
48 |
-
int(len(ps) * TRAINING_RATIO) :
|
49 |
-
int(len(ps) * (TRAINING_RATIO + VALIDATION_RATIO))
|
50 |
-
])([x for x in a.iterdir()])
|
51 |
-
for a in data_dir.iterdir()
|
52 |
-
},
|
53 |
-
}
|
54 |
-
|
55 |
-
def frame_generator(data_dir: Path, n_frames: int, split: Literal['training', 'validation']):
|
56 |
-
class_names = sorted([x.name for x in data_dir.iterdir()])
|
57 |
-
class_ids_for_name = {
|
58 |
-
name: i
|
59 |
-
for i, name in enumerate(class_names)
|
60 |
-
}
|
61 |
-
data = Data(data_dir)
|
62 |
-
|
63 |
-
def generator():
|
64 |
-
pairs = [
|
65 |
-
(path, name)
|
66 |
-
for name, paths in data[split].items()
|
67 |
-
for path in paths
|
68 |
-
]
|
69 |
-
random.shuffle(pairs)
|
70 |
-
for path, name in pairs:
|
71 |
-
video_frames = frames_from_video_file(str(path), n_frames)
|
72 |
-
label = class_ids_for_name[name]
|
73 |
-
yield video_frames, label
|
74 |
-
return generator
|
75 |
-
|
76 |
-
def total_steps(data_dir: Path):
|
77 |
-
data = Data(data_dir)
|
78 |
-
size = lambda d: sum([len(x) for x in d.values()])
|
79 |
-
return size(data['training']), size(data['validation'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
playgrounds/load_video.py
DELETED
@@ -1,112 +0,0 @@
|
|
1 |
-
import random
|
2 |
-
from typing import *
|
3 |
-
import numpy as np
|
4 |
-
import tensorflow as tf
|
5 |
-
import cv2
|
6 |
-
from pathlib import Path
|
7 |
-
|
8 |
-
SPLIT_RATIO = 0.7
|
9 |
-
BATCH_SIZE = 8
|
10 |
-
NUM_FRAMES = 8
|
11 |
-
|
12 |
-
def main():
|
13 |
-
data_dir = Path('assets/dataset')
|
14 |
-
output_signature = (
|
15 |
-
tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
|
16 |
-
tf.TensorSpec(shape = (), dtype = tf.int16),
|
17 |
-
)
|
18 |
-
train_ds = tf.data.Dataset.from_generator(frame_generator(data_dir, NUM_FRAMES, 'training'), output_signature=output_signature)
|
19 |
-
train_ds = train_ds.batch(BATCH_SIZE)
|
20 |
-
|
21 |
-
|
22 |
-
def format_frames(frame, output_size):
|
23 |
-
"""
|
24 |
-
Pad and resize an image from a video.
|
25 |
-
|
26 |
-
Args:
|
27 |
-
frame: Image that needs to resized and padded.
|
28 |
-
output_size: Pixel size of the output frame image.
|
29 |
-
|
30 |
-
Return:
|
31 |
-
Formatted frame with padding of specified output size.
|
32 |
-
"""
|
33 |
-
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
34 |
-
frame = tf.image.resize_with_pad(frame, *output_size)
|
35 |
-
return frame
|
36 |
-
|
37 |
-
|
38 |
-
def frames_from_video_file(video_path, n_frames, output_size=(224, 224), frame_step=15):
|
39 |
-
"""
|
40 |
-
Creates frames from each video file present for each category.
|
41 |
-
|
42 |
-
Args:
|
43 |
-
video_path: File path to the video.
|
44 |
-
n_frames: Number of frames to be created per video file.
|
45 |
-
output_size: Pixel size of the output frame image.
|
46 |
-
|
47 |
-
Return:
|
48 |
-
An NumPy array of frames in the shape of (n_frames, height, width, channels).
|
49 |
-
"""
|
50 |
-
# Read each video frame by frame
|
51 |
-
result = []
|
52 |
-
src = cv2.VideoCapture(str(video_path))
|
53 |
-
|
54 |
-
video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)
|
55 |
-
|
56 |
-
need_length = 1 + (n_frames - 1) * frame_step
|
57 |
-
|
58 |
-
if need_length > video_length:
|
59 |
-
start = 0
|
60 |
-
else:
|
61 |
-
max_start = video_length - need_length
|
62 |
-
start = random.randint(0, max_start + 1)
|
63 |
-
|
64 |
-
src.set(cv2.CAP_PROP_POS_FRAMES, start)
|
65 |
-
# ret is a boolean indicating whether read was successful, frame is the image itself
|
66 |
-
ok, frame = src.read()
|
67 |
-
if not ok:
|
68 |
-
raise ValueError('read video not success')
|
69 |
-
result.append(format_frames(frame, output_size))
|
70 |
-
|
71 |
-
for _ in range(n_frames - 1):
|
72 |
-
for _ in range(frame_step):
|
73 |
-
ok, frame = src.read()
|
74 |
-
if ok:
|
75 |
-
frame = format_frames(frame, output_size)
|
76 |
-
result.append(frame)
|
77 |
-
else:
|
78 |
-
result.append(np.zeros_like(result[0]))
|
79 |
-
src.release()
|
80 |
-
result = np.array(result)[..., [2, 1, 0]]
|
81 |
-
|
82 |
-
return result
|
83 |
-
|
84 |
-
def frame_generator(data_dir: Path, n_frames: int, split: Literal['training', 'validation']):
|
85 |
-
class_names = sorted([x.name for x in data_dir.iterdir()])
|
86 |
-
class_ids_for_name = {
|
87 |
-
name: i
|
88 |
-
for i, name in enumerate(class_names)
|
89 |
-
}
|
90 |
-
data = {
|
91 |
-
'training':{
|
92 |
-
a.name: (lambda ps: ps[:int(len(ps) * SPLIT_RATIO)])([x for x in a.iterdir()])
|
93 |
-
for a in data_dir.iterdir()
|
94 |
-
},
|
95 |
-
'validation': {
|
96 |
-
a.name: (lambda ps: ps[int(len(ps) * SPLIT_RATIO):])([x for x in a.iterdir()])
|
97 |
-
for a in data_dir.iterdir()
|
98 |
-
},
|
99 |
-
}
|
100 |
-
|
101 |
-
def generator():
|
102 |
-
pairs = [
|
103 |
-
(path, name)
|
104 |
-
for name, paths in data[split].items()
|
105 |
-
for path in paths
|
106 |
-
]
|
107 |
-
random.shuffle(pairs)
|
108 |
-
for path, name in pairs:
|
109 |
-
video_frames = frames_from_video_file(path, n_frames)
|
110 |
-
label = class_ids_for_name[name] # Encode labels
|
111 |
-
yield video_frames, label
|
112 |
-
return generator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
playgrounds/movinet.py
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
import tensorflow as tf
|
2 |
-
import numpy as np
|
3 |
-
import tensorflow_hub as hub
|
4 |
-
import keras
|
5 |
-
|
6 |
-
labels_path = keras.utils.get_file(
|
7 |
-
fname='labels.txt',
|
8 |
-
origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'
|
9 |
-
)
|
10 |
-
|
11 |
-
with open(labels_path, 'r', encoding='utf-8') as file:
|
12 |
-
lines = file.read().splitlines()
|
13 |
-
|
14 |
-
KINETICS_600_LABELS = np.array([line.strip() for line in lines])
|
15 |
-
KINETICS_600_LABELS[:20]
|
16 |
-
|
17 |
-
def main():
|
18 |
-
jumping_jack_path = 'assets/jumping_pack.gif'
|
19 |
-
jumping_jack = load_gif(jumping_jack_path)
|
20 |
-
|
21 |
-
id = 'a2'
|
22 |
-
mode = 'base'
|
23 |
-
version = '3'
|
24 |
-
hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
|
25 |
-
model = hub.load(hub_url)
|
26 |
-
sig = model.signatures['serving_default']
|
27 |
-
print('Model loaded.')
|
28 |
-
|
29 |
-
sig(image=jumping_jack[tf.newaxis, :1])
|
30 |
-
logits = sig(image=jumping_jack[tf.newaxis, ...])
|
31 |
-
logits = logits['classifier_head'][0]
|
32 |
-
probs = tf.nn.softmax(logits, axis=-1)
|
33 |
-
for label, p in get_top_k(probs):
|
34 |
-
print(f'{label:20s}: {p:.3f}')
|
35 |
-
|
36 |
-
def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
|
37 |
-
"""Outputs the top k model labels and probabilities on the given video.
|
38 |
-
|
39 |
-
Args:
|
40 |
-
probs: probability tensor of shape (num_frames, num_classes) that represents
|
41 |
-
the probability of each class on each frame.
|
42 |
-
k: the number of top predictions to select.
|
43 |
-
label_map: a list of labels to map logit indices to label strings.
|
44 |
-
|
45 |
-
Returns:
|
46 |
-
a tuple of the top-k labels and probabilities.
|
47 |
-
"""
|
48 |
-
# Sort predictions to find top_k
|
49 |
-
top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
|
50 |
-
# collect the labels of top_k predictions
|
51 |
-
top_labels = tf.gather(label_map, top_predictions, axis=-1)
|
52 |
-
# decode labels
|
53 |
-
top_labels = [label.decode('utf8') for label in top_labels.numpy()]
|
54 |
-
# top_k probabilities of the predictions
|
55 |
-
top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
|
56 |
-
return tuple(zip(top_labels, top_probs))
|
57 |
-
|
58 |
-
def load_gif(file_path, image_size=(224, 224)):
|
59 |
-
"""Loads a gif file into a TF tensor.
|
60 |
-
|
61 |
-
Use images resized to match what's expected by your model.
|
62 |
-
The model pages say the "A2" models expect 224 x 224 images at 5 fps
|
63 |
-
|
64 |
-
Args:
|
65 |
-
file_path: path to the location of a gif file.
|
66 |
-
image_size: a tuple of target size.
|
67 |
-
|
68 |
-
Returns:
|
69 |
-
a video of the gif file
|
70 |
-
"""
|
71 |
-
# Load a gif file, convert it to a TF tensor
|
72 |
-
raw = tf.io.read_file(file_path)
|
73 |
-
video = tf.io.decode_gif(raw)
|
74 |
-
# Resize the video
|
75 |
-
video = tf.image.resize(video, image_size)
|
76 |
-
# change dtype to a float32
|
77 |
-
# Hub models always want images normalized to [0,1]
|
78 |
-
# ref: https://www.tensorflow.org/hub/common_signatures/images#input
|
79 |
-
video = tf.cast(video, tf.float32) / 255.
|
80 |
-
return video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
playgrounds/verify_metal.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
import tensorflow as tf
|
2 |
-
|
3 |
-
cifar = tf.keras.datasets.cifar100
|
4 |
-
(x_train, y_train), (x_test, y_test) = cifar.load_data()
|
5 |
-
model = tf.keras.applications.ResNet50(
|
6 |
-
include_top=True,
|
7 |
-
weights=None,
|
8 |
-
input_shape=(32, 32, 3),
|
9 |
-
classes=100,
|
10 |
-
)
|
11 |
-
|
12 |
-
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
|
13 |
-
model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
|
14 |
-
model.fit(x_train, y_train, epochs=5, batch_size=64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
playgrounds/yolo.py
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
import keras
|
2 |
-
import keras_cv
|
3 |
-
import numpy as np
|
4 |
-
import tensorflow as tf
|
5 |
-
|
6 |
-
from playgrounds.load_video import frames_from_video_file
|
7 |
-
|
8 |
-
def main():
|
9 |
-
pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
|
10 |
-
"yolo_v8_m_pascalvoc", bounding_box_format="xywh"
|
11 |
-
)
|
12 |
-
print('Model loaded.')
|
13 |
-
|
14 |
-
inference_resizing = keras_cv.layers.Resizing(
|
15 |
-
640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
|
16 |
-
)
|
17 |
-
|
18 |
-
class_ids = [
|
19 |
-
"Aeroplane", "Bicycle", "Bird", "Boat", "Bottle", "Bus", "Car", "Cat", "Chair", "Cow", "Dining Table",
|
20 |
-
"Dog", "Horse", "Motorbike", "Person", "Potted Plant", "Sheep", "Sofa", "Train", "Tvmonitor", "Total",
|
21 |
-
]
|
22 |
-
class_mapping = {i: c for (i, c) in enumerate(class_ids)}
|
23 |
-
|
24 |
-
# raw = tf.io.read_file('assets/IMG_9528.gif')
|
25 |
-
# video = tf.io.decode_gif(raw)
|
26 |
-
video = frames_from_video_file('assets/dataset/Flying/2kNjmM8BnD0_230.0_238.0.mp4', 3, (640,640))
|
27 |
-
image = video[0]
|
28 |
-
image = (image*255).astype(np.uint8)
|
29 |
-
file = tf.io.encode_png(image)
|
30 |
-
tf.io.write_file('out/t.png', file)
|
31 |
-
# image = keras.utils.load_img('assets/nick-morales-BwYcH78rcpI-unsplash.jpg')
|
32 |
-
# image = np.array(image)
|
33 |
-
|
34 |
-
image_batch = inference_resizing([image])
|
35 |
-
|
36 |
-
y_pred = pretrained_model.predict(image_batch)
|
37 |
-
classes = y_pred['classes']
|
38 |
-
boxes = y_pred["boxes"]
|
39 |
-
print(f'Classes: {classes}')
|
40 |
-
print(f'Boxes: {boxes}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
-
gradio
|
2 |
tensorflow
|
|
|
3 |
opencv-python
|
|
|
|
|
|
|
|
|
|
|
|
1 |
tensorflow
|
2 |
+
numpy
|
3 |
opencv-python
|
4 |
+
tf-models-official
|
5 |
+
ultralytics
|
6 |
+
imgviz
|
7 |
+
moviepy
|
weights/classifier-7.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13a9436ec0971fe72b53f03d9dd57b89a7c48a4cb82380e14b298c3e2d712f50
|
3 |
+
size 25261904
|
weights/yolov8n.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
|
3 |
+
size 6534387
|