Spaces:

chiyoi
/

aero-recognize

Sleeping

File size: 2,876 Bytes

import tensorflow as tf
import numpy as np
import tensorflow_hub as hub
import keras

labels_path = keras.utils.get_file(
    fname='labels.txt',
    origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'
)

with open(labels_path, 'r', encoding='utf-8') as file:
    lines = file.read().splitlines()

KINETICS_600_LABELS = np.array([line.strip() for line in lines])
KINETICS_600_LABELS[:20]

def main():
    jumping_jack_path = 'assets/jumping_pack.gif'
    jumping_jack = load_gif(jumping_jack_path)

    id = 'a2'
    mode = 'base'
    version = '3'
    hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'
    model = hub.load(hub_url)
    sig = model.signatures['serving_default']
    print('Model loaded.')

    sig(image=jumping_jack[tf.newaxis, :1])
    logits = sig(image=jumping_jack[tf.newaxis, ...])
    logits = logits['classifier_head'][0]
    probs = tf.nn.softmax(logits, axis=-1)
    for label, p in get_top_k(probs):
        print(f'{label:20s}: {p:.3f}')

def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):
    """Outputs the top k model labels and probabilities on the given video.

    Args:
      probs: probability tensor of shape (num_frames, num_classes) that represents
        the probability of each class on each frame.
      k: the number of top predictions to select.
      label_map: a list of labels to map logit indices to label strings.

    Returns:
      a tuple of the top-k labels and probabilities.
    """
    # Sort predictions to find top_k
    top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]
    # collect the labels of top_k predictions
    top_labels = tf.gather(label_map, top_predictions, axis=-1)
    # decode labels
    top_labels = [label.decode('utf8') for label in top_labels.numpy()]
    # top_k probabilities of the predictions
    top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()
    return tuple(zip(top_labels, top_probs))

def load_gif(file_path, image_size=(224, 224)):
    """Loads a gif file into a TF tensor.

    Use images resized to match what's expected by your model.
    The model pages say the "A2" models expect 224 x 224 images at 5 fps

    Args:
      file_path: path to the location of a gif file.
      image_size: a tuple of target size.

    Returns:
      a video of the gif file
    """
    # Load a gif file, convert it to a TF tensor
    raw = tf.io.read_file(file_path)
    video = tf.io.decode_gif(raw)
    # Resize the video
    video = tf.image.resize(video, image_size)
    # change dtype to a float32
    # Hub models always want images normalized to [0,1]
    # ref: https://www.tensorflow.org/hub/common_signatures/images#input
    video = tf.cast(video, tf.float32) / 255.
    return video