chiyoi commited on
Commit
4094dc9
1 Parent(s): 0faef99
Files changed (3) hide show
  1. .gitignore +4 -0
  2. movinet.py +84 -0
  3. yolo.py +36 -0
.gitignore CHANGED
@@ -1,3 +1,7 @@
1
  *.swp
2
  .vscode
3
  .DS_Store
 
 
 
 
 
1
  *.swp
2
  .vscode
3
  .DS_Store
4
+
5
+ # data
6
+ assets
7
+ out
movinet.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import tensorflow_hub as hub
4
+ import keras
5
+ print('Modules loaded.')
6
+
7
+ labels_path = keras.utils.get_file(
8
+ fname='labels.txt',
9
+ origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'
10
+ )
11
+
12
+ with open(labels_path, 'r', encoding='utf-8') as file:
13
+ lines = file.read().splitlines()
14
+
15
+ KINETICS_600_LABELS = np.array([line.strip() for line in lines])
16
+ KINETICS_600_LABELS[:20]
17
+ print('Labels loaded.')
18
+
19
+
20
+ def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
21
+ """Outputs the top k model labels and probabilities on the given video.
22
+
23
+ Args:
24
+ probs: probability tensor of shape (num_frames, num_classes) that represents
25
+ the probability of each class on each frame.
26
+ k: the number of top predictions to select.
27
+ label_map: a list of labels to map logit indices to label strings.
28
+
29
+ Returns:
30
+ a tuple of the top-k labels and probabilities.
31
+ """
32
+ # Sort predictions to find top_k
33
+ top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
34
+ # collect the labels of top_k predictions
35
+ top_labels = tf.gather(label_map, top_predictions, axis=-1)
36
+ # decode labels
37
+ top_labels = [label.decode('utf8') for label in top_labels.numpy()]
38
+ # top_k probabilities of the predictions
39
+ top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
40
+ return tuple(zip(top_labels, top_probs))
41
+
42
+
43
+ def load_gif(file_path, image_size=(224, 224)):
44
+ """Loads a gif file into a TF tensor.
45
+
46
+ Use images resized to match what's expected by your model.
47
+ The model pages say the "A2" models expect 224 x 224 images at 5 fps
48
+
49
+ Args:
50
+ file_path: path to the location of a gif file.
51
+ image_size: a tuple of target size.
52
+
53
+ Returns:
54
+ a video of the gif file
55
+ """
56
+ # Load a gif file, convert it to a TF tensor
57
+ raw = tf.io.read_file(file_path)
58
+ video = tf.io.decode_gif(raw)
59
+ # Resize the video
60
+ video = tf.image.resize(video, image_size)
61
+ # change dtype to a float32
62
+ # Hub models always want images normalized to [0,1]
63
+ # ref: https://www.tensorflow.org/hub/common_signatures/images#input
64
+ video = tf.cast(video, tf.float32) / 255.
65
+ return video
66
+
67
+
68
+ jumping_jack_path = 'assets/jumping_pack.gif'
69
+ jumping_jack = load_gif(jumping_jack_path)
70
+
71
+ id = 'a2'
72
+ mode = 'base'
73
+ version = '3'
74
+ hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
75
+ model = hub.load(hub_url)
76
+ sig = model.signatures['serving_default']
77
+ print('Model loaded.')
78
+
79
+ sig(image=jumping_jack[tf.newaxis, :1])
80
+ logits = sig(image=jumping_jack[tf.newaxis, ...])
81
+ logits = logits['classifier_head'][0]
82
+ probs = tf.nn.softmax(logits, axis=-1)
83
+ for label, p in get_top_k(probs):
84
+ print(f'{label:20s}: {p:.3f}')
yolo.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import keras
2
+ import keras_cv
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ print('Modules loaded.')
6
+
7
+ pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
8
+ "yolo_v8_m_pascalvoc", bounding_box_format="xywh"
9
+ )
10
+ print('Model loaded.')
11
+
12
+ inference_resizing = keras_cv.layers.Resizing(
13
+ 640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
14
+ )
15
+
16
+ class_ids = [
17
+ "Aeroplane", "Bicycle", "Bird", "Boat", "Bottle", "Bus", "Car", "Cat", "Chair", "Cow", "Dining Table",
18
+ "Dog", "Horse", "Motorbike", "Person", "Potted Plant", "Sheep", "Sofa", "Train", "Tvmonitor", "Total",
19
+ ]
20
+ class_mapping = {i: c for (i, c) in enumerate(class_ids)}
21
+
22
+ raw = tf.io.read_file('assets/IMG_9528.gif')
23
+ video = tf.io.decode_gif(raw)
24
+ image = video[0]
25
+ file = tf.io.encode_png(image)
26
+ tf.io.write_file('out/t.png', file)
27
+ # image = keras.utils.load_img('assets/nick-morales-BwYcH78rcpI-unsplash.jpg')
28
+ # image = np.array(image)
29
+
30
+ image_batch = inference_resizing([image])
31
+
32
+ y_pred = pretrained_model.predict(image_batch)
33
+ classes = y_pred['classes']
34
+ boxes = y_pred["boxes"]
35
+ print(f'Classes: {classes}')
36
+ print(f'Boxes: {boxes}')