Spaces:
Sleeping
Sleeping
tidy
Browse files- playgrounds/load_video.py +113 -0
- movinet.py β playgrounds/movinet.py +19 -19
- yolo.py β playgrounds/yolo.py +0 -0
playgrounds/load_video.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
from typing import *
|
3 |
+
import numpy as np
|
4 |
+
import tensorflow as tf
|
5 |
+
import cv2
|
6 |
+
from pathlib import Path
|
7 |
+
print('Modules loaded.')
|
8 |
+
|
9 |
+
SPLIT_RATIO = 0.7
|
10 |
+
BATCH_SIZE = 8
|
11 |
+
NUM_FRAMES = 8
|
12 |
+
|
13 |
+
def main():
|
14 |
+
data_dir = Path('assets/dataset')
|
15 |
+
output_signature = (
|
16 |
+
tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
|
17 |
+
tf.TensorSpec(shape = (), dtype = tf.int16),
|
18 |
+
)
|
19 |
+
train_ds = tf.data.Dataset.from_generator(frame_generator(data_dir, NUM_FRAMES, 'training'), output_signature=output_signature)
|
20 |
+
train_ds = train_ds.batch(BATCH_SIZE)
|
21 |
+
|
22 |
+
|
23 |
+
def format_frames(frame, output_size):
|
24 |
+
"""
|
25 |
+
Pad and resize an image from a video.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
frame: Image that needs to resized and padded.
|
29 |
+
output_size: Pixel size of the output frame image.
|
30 |
+
|
31 |
+
Return:
|
32 |
+
Formatted frame with padding of specified output size.
|
33 |
+
"""
|
34 |
+
frame = tf.image.convert_image_dtype(frame, tf.float32)
|
35 |
+
frame = tf.image.resize_with_pad(frame, *output_size)
|
36 |
+
return frame
|
37 |
+
|
38 |
+
|
39 |
+
def frames_from_video_file(video_path, n_frames, output_size=(224, 224), frame_step=15):
|
40 |
+
"""
|
41 |
+
Creates frames from each video file present for each category.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
video_path: File path to the video.
|
45 |
+
n_frames: Number of frames to be created per video file.
|
46 |
+
output_size: Pixel size of the output frame image.
|
47 |
+
|
48 |
+
Return:
|
49 |
+
An NumPy array of frames in the shape of (n_frames, height, width, channels).
|
50 |
+
"""
|
51 |
+
# Read each video frame by frame
|
52 |
+
result = []
|
53 |
+
src = cv2.VideoCapture(str(video_path))
|
54 |
+
|
55 |
+
video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)
|
56 |
+
|
57 |
+
need_length = 1 + (n_frames - 1) * frame_step
|
58 |
+
|
59 |
+
if need_length > video_length:
|
60 |
+
start = 0
|
61 |
+
else:
|
62 |
+
max_start = video_length - need_length
|
63 |
+
start = random.randint(0, max_start + 1)
|
64 |
+
|
65 |
+
src.set(cv2.CAP_PROP_POS_FRAMES, start)
|
66 |
+
# ret is a boolean indicating whether read was successful, frame is the image itself
|
67 |
+
ret, frame = src.read()
|
68 |
+
result.append(format_frames(frame, output_size))
|
69 |
+
|
70 |
+
for _ in range(n_frames - 1):
|
71 |
+
for _ in range(frame_step):
|
72 |
+
ret, frame = src.read()
|
73 |
+
if ret:
|
74 |
+
frame = format_frames(frame, output_size)
|
75 |
+
result.append(frame)
|
76 |
+
else:
|
77 |
+
result.append(np.zeros_like(result[0]))
|
78 |
+
src.release()
|
79 |
+
result = np.array(result)[..., [2, 1, 0]]
|
80 |
+
|
81 |
+
return result
|
82 |
+
|
83 |
+
def frame_generator(data_dir: Path, n_frames: int, split: Literal['training', 'validation']):
|
84 |
+
class_names = sorted([x.name for x in data_dir.iterdir()])
|
85 |
+
class_ids_for_name = {
|
86 |
+
name: i
|
87 |
+
for i, name in enumerate(class_names)
|
88 |
+
}
|
89 |
+
data = {
|
90 |
+
'training':{
|
91 |
+
a.name: (lambda ps: ps[:int(len(ps) * SPLIT_RATIO)])([x for x in a.iterdir()])
|
92 |
+
for a in data_dir.iterdir()
|
93 |
+
},
|
94 |
+
'validation': {
|
95 |
+
a.name: (lambda ps: ps[int(len(ps) * SPLIT_RATIO):])([x for x in a.iterdir()])
|
96 |
+
for a in data_dir.iterdir()
|
97 |
+
},
|
98 |
+
}
|
99 |
+
|
100 |
+
def generator():
|
101 |
+
pairs = [
|
102 |
+
(path, name)
|
103 |
+
for name, paths in data[split].items()
|
104 |
+
for path in paths
|
105 |
+
]
|
106 |
+
random.shuffle(pairs)
|
107 |
+
for path, name in pairs:
|
108 |
+
video_frames = frames_from_video_file(path, n_frames)
|
109 |
+
label = class_ids_for_name[name] # Encode labels
|
110 |
+
yield video_frames, label
|
111 |
+
return generator
|
112 |
+
|
113 |
+
main()
|
movinet.py β playgrounds/movinet.py
RENAMED
@@ -16,6 +16,24 @@ KINETICS_600_LABELS = np.array([line.strip() for line in lines])
|
|
16 |
KINETICS_600_LABELS[:20]
|
17 |
print('Labels loaded.')
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
|
21 |
"""Outputs the top k model labels and probabilities on the given video.
|
@@ -39,7 +57,6 @@ def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
|
|
39 |
top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
|
40 |
return tuple(zip(top_labels, top_probs))
|
41 |
|
42 |
-
|
43 |
def load_gif(file_path, image_size=(224, 224)):
|
44 |
"""Loads a gif file into a TF tensor.
|
45 |
|
@@ -64,21 +81,4 @@ def load_gif(file_path, image_size=(224, 224)):
|
|
64 |
video = tf.cast(video, tf.float32) / 255.
|
65 |
return video
|
66 |
|
67 |
-
|
68 |
-
jumping_jack_path = 'assets/jumping_pack.gif'
|
69 |
-
jumping_jack = load_gif(jumping_jack_path)
|
70 |
-
|
71 |
-
id = 'a2'
|
72 |
-
mode = 'base'
|
73 |
-
version = '3'
|
74 |
-
hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
|
75 |
-
model = hub.load(hub_url)
|
76 |
-
sig = model.signatures['serving_default']
|
77 |
-
print('Model loaded.')
|
78 |
-
|
79 |
-
sig(image=jumping_jack[tf.newaxis, :1])
|
80 |
-
logits = sig(image=jumping_jack[tf.newaxis, ...])
|
81 |
-
logits = logits['classifier_head'][0]
|
82 |
-
probs = tf.nn.softmax(logits, axis=-1)
|
83 |
-
for label, p in get_top_k(probs):
|
84 |
-
print(f'{label:20s}: {p:.3f}')
|
|
|
16 |
KINETICS_600_LABELS[:20]
|
17 |
print('Labels loaded.')
|
18 |
|
19 |
+
def main():
|
20 |
+
jumping_jack_path = 'assets/jumping_pack.gif'
|
21 |
+
jumping_jack = load_gif(jumping_jack_path)
|
22 |
+
|
23 |
+
id = 'a2'
|
24 |
+
mode = 'base'
|
25 |
+
version = '3'
|
26 |
+
hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
|
27 |
+
model = hub.load(hub_url)
|
28 |
+
sig = model.signatures['serving_default']
|
29 |
+
print('Model loaded.')
|
30 |
+
|
31 |
+
sig(image=jumping_jack[tf.newaxis, :1])
|
32 |
+
logits = sig(image=jumping_jack[tf.newaxis, ...])
|
33 |
+
logits = logits['classifier_head'][0]
|
34 |
+
probs = tf.nn.softmax(logits, axis=-1)
|
35 |
+
for label, p in get_top_k(probs):
|
36 |
+
print(f'{label:20s}: {p:.3f}')
|
37 |
|
38 |
def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
|
39 |
"""Outputs the top k model labels and probabilities on the given video.
|
|
|
57 |
top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
|
58 |
return tuple(zip(top_labels, top_probs))
|
59 |
|
|
|
60 |
def load_gif(file_path, image_size=(224, 224)):
|
61 |
"""Loads a gif file into a TF tensor.
|
62 |
|
|
|
81 |
video = tf.cast(video, tf.float32) / 255.
|
82 |
return video
|
83 |
|
84 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
yolo.py β playgrounds/yolo.py
RENAMED
File without changes
|