chiyoi commited on
Commit
5f3320a
β€’
1 Parent(s): 4094dc9
playgrounds/load_video.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import *
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ import cv2
6
+ from pathlib import Path
7
+ print('Modules loaded.')
8
+
9
+ SPLIT_RATIO = 0.7
10
+ BATCH_SIZE = 8
11
+ NUM_FRAMES = 8
12
+
13
+ def main():
14
+ data_dir = Path('assets/dataset')
15
+ output_signature = (
16
+ tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
17
+ tf.TensorSpec(shape = (), dtype = tf.int16),
18
+ )
19
+ train_ds = tf.data.Dataset.from_generator(frame_generator(data_dir, NUM_FRAMES, 'training'), output_signature=output_signature)
20
+ train_ds = train_ds.batch(BATCH_SIZE)
21
+
22
+
23
+ def format_frames(frame, output_size):
24
+ """
25
+ Pad and resize an image from a video.
26
+
27
+ Args:
28
+ frame: Image that needs to resized and padded.
29
+ output_size: Pixel size of the output frame image.
30
+
31
+ Return:
32
+ Formatted frame with padding of specified output size.
33
+ """
34
+ frame = tf.image.convert_image_dtype(frame, tf.float32)
35
+ frame = tf.image.resize_with_pad(frame, *output_size)
36
+ return frame
37
+
38
+
39
+ def frames_from_video_file(video_path, n_frames, output_size=(224, 224), frame_step=15):
40
+ """
41
+ Creates frames from each video file present for each category.
42
+
43
+ Args:
44
+ video_path: File path to the video.
45
+ n_frames: Number of frames to be created per video file.
46
+ output_size: Pixel size of the output frame image.
47
+
48
+ Return:
49
+ An NumPy array of frames in the shape of (n_frames, height, width, channels).
50
+ """
51
+ # Read each video frame by frame
52
+ result = []
53
+ src = cv2.VideoCapture(str(video_path))
54
+
55
+ video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)
56
+
57
+ need_length = 1 + (n_frames - 1) * frame_step
58
+
59
+ if need_length > video_length:
60
+ start = 0
61
+ else:
62
+ max_start = video_length - need_length
63
+ start = random.randint(0, max_start + 1)
64
+
65
+ src.set(cv2.CAP_PROP_POS_FRAMES, start)
66
+ # ret is a boolean indicating whether read was successful, frame is the image itself
67
+ ret, frame = src.read()
68
+ result.append(format_frames(frame, output_size))
69
+
70
+ for _ in range(n_frames - 1):
71
+ for _ in range(frame_step):
72
+ ret, frame = src.read()
73
+ if ret:
74
+ frame = format_frames(frame, output_size)
75
+ result.append(frame)
76
+ else:
77
+ result.append(np.zeros_like(result[0]))
78
+ src.release()
79
+ result = np.array(result)[..., [2, 1, 0]]
80
+
81
+ return result
82
+
83
+ def frame_generator(data_dir: Path, n_frames: int, split: Literal['training', 'validation']):
84
+ class_names = sorted([x.name for x in data_dir.iterdir()])
85
+ class_ids_for_name = {
86
+ name: i
87
+ for i, name in enumerate(class_names)
88
+ }
89
+ data = {
90
+ 'training':{
91
+ a.name: (lambda ps: ps[:int(len(ps) * SPLIT_RATIO)])([x for x in a.iterdir()])
92
+ for a in data_dir.iterdir()
93
+ },
94
+ 'validation': {
95
+ a.name: (lambda ps: ps[int(len(ps) * SPLIT_RATIO):])([x for x in a.iterdir()])
96
+ for a in data_dir.iterdir()
97
+ },
98
+ }
99
+
100
+ def generator():
101
+ pairs = [
102
+ (path, name)
103
+ for name, paths in data[split].items()
104
+ for path in paths
105
+ ]
106
+ random.shuffle(pairs)
107
+ for path, name in pairs:
108
+ video_frames = frames_from_video_file(path, n_frames)
109
+ label = class_ids_for_name[name] # Encode labels
110
+ yield video_frames, label
111
+ return generator
112
+
113
+ main()
movinet.py β†’ playgrounds/movinet.py RENAMED
@@ -16,6 +16,24 @@ KINETICS_600_LABELS = np.array([line.strip() for line in lines])
16
  KINETICS_600_LABELS[:20]
17
  print('Labels loaded.')
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
21
  """Outputs the top k model labels and probabilities on the given video.
@@ -39,7 +57,6 @@ def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
39
  top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
40
  return tuple(zip(top_labels, top_probs))
41
 
42
-
43
  def load_gif(file_path, image_size=(224, 224)):
44
  """Loads a gif file into a TF tensor.
45
 
@@ -64,21 +81,4 @@ def load_gif(file_path, image_size=(224, 224)):
64
  video = tf.cast(video, tf.float32) / 255.
65
  return video
66
 
67
-
68
- jumping_jack_path = 'assets/jumping_pack.gif'
69
- jumping_jack = load_gif(jumping_jack_path)
70
-
71
- id = 'a2'
72
- mode = 'base'
73
- version = '3'
74
- hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
75
- model = hub.load(hub_url)
76
- sig = model.signatures['serving_default']
77
- print('Model loaded.')
78
-
79
- sig(image=jumping_jack[tf.newaxis, :1])
80
- logits = sig(image=jumping_jack[tf.newaxis, ...])
81
- logits = logits['classifier_head'][0]
82
- probs = tf.nn.softmax(logits, axis=-1)
83
- for label, p in get_top_k(probs):
84
- print(f'{label:20s}: {p:.3f}')
 
16
  KINETICS_600_LABELS[:20]
17
  print('Labels loaded.')
18
 
19
+ def main():
20
+ jumping_jack_path = 'assets/jumping_pack.gif'
21
+ jumping_jack = load_gif(jumping_jack_path)
22
+
23
+ id = 'a2'
24
+ mode = 'base'
25
+ version = '3'
26
+ hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
27
+ model = hub.load(hub_url)
28
+ sig = model.signatures['serving_default']
29
+ print('Model loaded.')
30
+
31
+ sig(image=jumping_jack[tf.newaxis, :1])
32
+ logits = sig(image=jumping_jack[tf.newaxis, ...])
33
+ logits = logits['classifier_head'][0]
34
+ probs = tf.nn.softmax(logits, axis=-1)
35
+ for label, p in get_top_k(probs):
36
+ print(f'{label:20s}: {p:.3f}')
37
 
38
  def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
39
  """Outputs the top k model labels and probabilities on the given video.
 
57
  top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
58
  return tuple(zip(top_labels, top_probs))
59
 
 
60
  def load_gif(file_path, image_size=(224, 224)):
61
  """Loads a gif file into a TF tensor.
62
 
 
81
  video = tf.cast(video, tf.float32) / 255.
82
  return video
83
 
84
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolo.py β†’ playgrounds/yolo.py RENAMED
File without changes