Spaces:
Sleeping
Sleeping
update
Browse files- .gitignore +4 -0
- movinet.py +84 -0
- yolo.py +36 -0
.gitignore
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
*.swp
|
2 |
.vscode
|
3 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
1 |
*.swp
|
2 |
.vscode
|
3 |
.DS_Store
|
4 |
+
|
5 |
+
# data
|
6 |
+
assets
|
7 |
+
out
|
movinet.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import numpy as np
|
3 |
+
import tensorflow_hub as hub
|
4 |
+
import keras
|
5 |
+
print('Modules loaded.')
|
6 |
+
|
7 |
+
labels_path = keras.utils.get_file(
|
8 |
+
fname='labels.txt',
|
9 |
+
origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'
|
10 |
+
)
|
11 |
+
|
12 |
+
with open(labels_path, 'r', encoding='utf-8') as file:
|
13 |
+
lines = file.read().splitlines()
|
14 |
+
|
15 |
+
KINETICS_600_LABELS = np.array([line.strip() for line in lines])
|
16 |
+
KINETICS_600_LABELS[:20]
|
17 |
+
print('Labels loaded.')
|
18 |
+
|
19 |
+
|
20 |
+
def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
|
21 |
+
"""Outputs the top k model labels and probabilities on the given video.
|
22 |
+
|
23 |
+
Args:
|
24 |
+
probs: probability tensor of shape (num_frames, num_classes) that represents
|
25 |
+
the probability of each class on each frame.
|
26 |
+
k: the number of top predictions to select.
|
27 |
+
label_map: a list of labels to map logit indices to label strings.
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
a tuple of the top-k labels and probabilities.
|
31 |
+
"""
|
32 |
+
# Sort predictions to find top_k
|
33 |
+
top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
|
34 |
+
# collect the labels of top_k predictions
|
35 |
+
top_labels = tf.gather(label_map, top_predictions, axis=-1)
|
36 |
+
# decode labels
|
37 |
+
top_labels = [label.decode('utf8') for label in top_labels.numpy()]
|
38 |
+
# top_k probabilities of the predictions
|
39 |
+
top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
|
40 |
+
return tuple(zip(top_labels, top_probs))
|
41 |
+
|
42 |
+
|
43 |
+
def load_gif(file_path, image_size=(224, 224)):
|
44 |
+
"""Loads a gif file into a TF tensor.
|
45 |
+
|
46 |
+
Use images resized to match what's expected by your model.
|
47 |
+
The model pages say the "A2" models expect 224 x 224 images at 5 fps
|
48 |
+
|
49 |
+
Args:
|
50 |
+
file_path: path to the location of a gif file.
|
51 |
+
image_size: a tuple of target size.
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
a video of the gif file
|
55 |
+
"""
|
56 |
+
# Load a gif file, convert it to a TF tensor
|
57 |
+
raw = tf.io.read_file(file_path)
|
58 |
+
video = tf.io.decode_gif(raw)
|
59 |
+
# Resize the video
|
60 |
+
video = tf.image.resize(video, image_size)
|
61 |
+
# change dtype to a float32
|
62 |
+
# Hub models always want images normalized to [0,1]
|
63 |
+
# ref: https://www.tensorflow.org/hub/common_signatures/images#input
|
64 |
+
video = tf.cast(video, tf.float32) / 255.
|
65 |
+
return video
|
66 |
+
|
67 |
+
|
68 |
+
jumping_jack_path = 'assets/jumping_pack.gif'
|
69 |
+
jumping_jack = load_gif(jumping_jack_path)
|
70 |
+
|
71 |
+
id = 'a2'
|
72 |
+
mode = 'base'
|
73 |
+
version = '3'
|
74 |
+
hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
|
75 |
+
model = hub.load(hub_url)
|
76 |
+
sig = model.signatures['serving_default']
|
77 |
+
print('Model loaded.')
|
78 |
+
|
79 |
+
sig(image=jumping_jack[tf.newaxis, :1])
|
80 |
+
logits = sig(image=jumping_jack[tf.newaxis, ...])
|
81 |
+
logits = logits['classifier_head'][0]
|
82 |
+
probs = tf.nn.softmax(logits, axis=-1)
|
83 |
+
for label, p in get_top_k(probs):
|
84 |
+
print(f'{label:20s}: {p:.3f}')
|
yolo.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import keras
|
2 |
+
import keras_cv
|
3 |
+
import numpy as np
|
4 |
+
import tensorflow as tf
|
5 |
+
print('Modules loaded.')
|
6 |
+
|
7 |
+
pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
|
8 |
+
"yolo_v8_m_pascalvoc", bounding_box_format="xywh"
|
9 |
+
)
|
10 |
+
print('Model loaded.')
|
11 |
+
|
12 |
+
inference_resizing = keras_cv.layers.Resizing(
|
13 |
+
640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
|
14 |
+
)
|
15 |
+
|
16 |
+
class_ids = [
|
17 |
+
"Aeroplane", "Bicycle", "Bird", "Boat", "Bottle", "Bus", "Car", "Cat", "Chair", "Cow", "Dining Table",
|
18 |
+
"Dog", "Horse", "Motorbike", "Person", "Potted Plant", "Sheep", "Sofa", "Train", "Tvmonitor", "Total",
|
19 |
+
]
|
20 |
+
class_mapping = {i: c for (i, c) in enumerate(class_ids)}
|
21 |
+
|
22 |
+
raw = tf.io.read_file('assets/IMG_9528.gif')
|
23 |
+
video = tf.io.decode_gif(raw)
|
24 |
+
image = video[0]
|
25 |
+
file = tf.io.encode_png(image)
|
26 |
+
tf.io.write_file('out/t.png', file)
|
27 |
+
# image = keras.utils.load_img('assets/nick-morales-BwYcH78rcpI-unsplash.jpg')
|
28 |
+
# image = np.array(image)
|
29 |
+
|
30 |
+
image_batch = inference_resizing([image])
|
31 |
+
|
32 |
+
y_pred = pretrained_model.predict(image_batch)
|
33 |
+
classes = y_pred['classes']
|
34 |
+
boxes = y_pred["boxes"]
|
35 |
+
print(f'Classes: {classes}')
|
36 |
+
print(f'Boxes: {boxes}')
|