Spaces:

chiyoi
/

aero-recognize

Sleeping

App Files Files Community

chiyoi commited on Jan 4

Commit

4094dc9

•

1 Parent(s): 0faef99

update

Browse files

Files changed (3) hide show

.gitignore +4 -0
movinet.py +84 -0
yolo.py +36 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,7 @@
 *.swp
 .vscode
 .DS_Store

 *.swp
 .vscode
 .DS_Store
+# data
+assets
+out

movinet.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import tensorflow as tf
+import numpy as np
+import tensorflow_hub as hub
+import keras
+print('Modules loaded.')
+labels_path = keras.utils.get_file(
+    fname='labels.txt',
+    origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'
+)
+with open(labels_path, 'r', encoding='utf-8') as file:
+    lines = file.read().splitlines()
+KINETICS_600_LABELS = np.array([line.strip() for line in lines])
+KINETICS_600_LABELS[:20]
+print('Labels loaded.')
+def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
+    """Outputs the top k model labels and probabilities on the given video.
+    Args:
+      probs: probability tensor of shape (num_frames, num_classes) that represents
+        the probability of each class on each frame.
+      k: the number of top predictions to select.
+      label_map: a list of labels to map logit indices to label strings.
+    Returns:
+      a tuple of the top-k labels and probabilities.
+    """
+    # Sort predictions to find top_k
+    top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
+    # collect the labels of top_k predictions
+    top_labels = tf.gather(label_map, top_predictions, axis=-1)
+    # decode labels
+    top_labels = [label.decode('utf8') for label in top_labels.numpy()]
+    # top_k probabilities of the predictions
+    top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
+    return tuple(zip(top_labels, top_probs))
+def load_gif(file_path, image_size=(224, 224)):
+    """Loads a gif file into a TF tensor.
+    Use images resized to match what's expected by your model.
+    The model pages say the "A2" models expect 224 x 224 images at 5 fps
+    Args:
+      file_path: path to the location of a gif file.
+      image_size: a tuple of target size.
+    Returns:
+      a video of the gif file
+    """
+    # Load a gif file, convert it to a TF tensor
+    raw = tf.io.read_file(file_path)
+    video = tf.io.decode_gif(raw)
+    # Resize the video
+    video = tf.image.resize(video, image_size)
+    # change dtype to a float32
+    # Hub models always want images normalized to [0,1]
+    # ref: https://www.tensorflow.org/hub/common_signatures/images#input
+    video = tf.cast(video, tf.float32) / 255.
+    return video
+jumping_jack_path = 'assets/jumping_pack.gif'
+jumping_jack = load_gif(jumping_jack_path)
+id = 'a2'
+mode = 'base'
+version = '3'
+hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
+model = hub.load(hub_url)
+sig = model.signatures['serving_default']
+print('Model loaded.')
+sig(image=jumping_jack[tf.newaxis, :1])
+logits = sig(image=jumping_jack[tf.newaxis, ...])
+logits = logits['classifier_head'][0]
+probs = tf.nn.softmax(logits, axis=-1)
+for label, p in get_top_k(probs):
+    print(f'{label:20s}: {p:.3f}')

yolo.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import keras
+import keras_cv
+import numpy as np
+import tensorflow as tf
+print('Modules loaded.')
+pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
+    "yolo_v8_m_pascalvoc", bounding_box_format="xywh"
+)
+print('Model loaded.')
+inference_resizing = keras_cv.layers.Resizing(
+    640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
+)
+class_ids = [
+    "Aeroplane", "Bicycle", "Bird", "Boat", "Bottle", "Bus", "Car", "Cat", "Chair", "Cow", "Dining Table",
+    "Dog", "Horse", "Motorbike", "Person", "Potted Plant", "Sheep", "Sofa", "Train", "Tvmonitor", "Total",
+]
+class_mapping = {i: c for (i, c) in enumerate(class_ids)}
+raw = tf.io.read_file('assets/IMG_9528.gif')
+video = tf.io.decode_gif(raw)
+image = video[0]
+file = tf.io.encode_png(image)
+tf.io.write_file('out/t.png', file)
+# image = keras.utils.load_img('assets/nick-morales-BwYcH78rcpI-unsplash.jpg')
+# image = np.array(image)
+image_batch = inference_resizing([image])
+y_pred = pretrained_model.predict(image_batch)
+classes = y_pred['classes']
+boxes = y_pred["boxes"]
+print(f'Classes: {classes}')
+print(f'Boxes: {boxes}')