finalYear / app.py
JefferyJapheth's picture
new prediction logic
6ca127a
import os
import time
import cv2
import gradio as gr
import mediapipe as mp
import numpy as np
from matplotlib import pyplot as plt
mp_holistic = mp.solutions.holistic
# Import TensorFlow
import tensorflow as tf
# Initialize MediaPipe solutions
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh
hands = mp_hands.Hands()
pose = mp_pose.Pose()
face_mesh = mp_face_mesh.FaceMesh()
# Get the absolute path to the directory containing app.py
current_dir = os.path.dirname(os.path.abspath(__file__))
# Define the filename of the TFLite model
model_filename = "model.tflite"
# Construct the full path to the TFLite model file
model_path = os.path.join(current_dir, model_filename)
# Load the TFLite model using the interpreter
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
N_ROWS = 543
N_DIMS = 3
DIM_NAMES = ['x', 'y', 'z']
SEED = 42
NUM_CLASSES = 250
INPUT_SIZE = 64
BATCH_ALL_SIGNS_N = 4
BATCH_SIZE = 256
N_EPOCHS = 100
LR_MAX = 1e-3
N_WARMUP_EPOCHS = 0
WD_RATIO = 0.05
MASK_VAL = 4237
USE_TYPES = ['left_hand', 'pose', 'right_hand']
START_IDX = 468
LIPS_IDXS0 = np.array([
61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
])
index_to_class = {
"TV": 0, "after": 1, "airplane": 2, "all": 3, "alligator": 4, "animal": 5, "another": 6, "any": 7, "apple": 8,
"arm": 9, "aunt": 10, "awake": 11, "backyard": 12, "bad": 13, "balloon": 14, "bath": 15, "because": 16, "bed": 17,
"bedroom": 18, "bee": 19, "before": 20, "beside": 21, "better": 22, "bird": 23, "black": 24, "blow": 25, "blue": 26,
"boat": 27, "book": 28, "boy": 29, "brother": 30, "brown": 31, "bug": 32, "bye": 33, "callonphone": 34, "can": 35,
"car": 36, "carrot": 37, "cat": 38, "cereal": 39, "chair": 40, "cheek": 41, "child": 42, "chin": 43,
"chocolate": 44, "clean": 45, "close": 46, "closet": 47, "cloud": 48, "clown": 49, "cow": 50, "cowboy": 51,
"cry": 52, "cut": 53, "cute": 54, "dad": 55, "dance": 56, "dirty": 57, "dog": 58, "doll": 59, "donkey": 60,
"down": 61, "drawer": 62, "drink": 63, "drop": 64, "dry": 65, "dryer": 66, "duck": 67, "ear": 68, "elephant": 69,
"empty": 70, "every": 71, "eye": 72, "face": 73, "fall": 74, "farm": 75, "fast": 76, "feet": 77, "find": 78,
"fine": 79, "finger": 80, "finish": 81, "fireman": 82, "first": 83, "fish": 84, "flag": 85, "flower": 86,
"food": 87, "for": 88, "frenchfries": 89, "frog": 90, "garbage": 91, "gift": 92, "giraffe": 93, "girl": 94,
"give": 95, "glasswindow": 96, "go": 97, "goose": 98, "grandma": 99, "grandpa": 100, "grass": 101, "green": 102,
"gum": 103, "hair": 104, "happy": 105, "hat": 106, "hate": 107, "have": 108, "haveto": 109, "head": 110,
"hear": 111, "helicopter": 112, "hello": 113, "hen": 114, "hesheit": 115, "hide": 116, "high": 117, "home": 118,
"horse": 119, "hot": 120, "hungry": 121, "icecream": 122, "if": 123, "into": 124, "jacket": 125, "jeans": 126,
"jump": 127, "kiss": 128, "kitty": 129, "lamp": 130, "later": 131, "like": 132, "lion": 133, "lips": 134,
"listen": 135, "look": 136, "loud": 137, "mad": 138, "make": 139, "man": 140, "many": 141, "milk": 142,
"minemy": 143, "mitten": 144, "mom": 145, "moon": 146, "morning": 147, "mouse": 148, "mouth": 149, "nap": 150,
"napkin": 151, "night": 152, "no": 153, "noisy": 154, "nose": 155, "not": 156, "now": 157, "nuts": 158, "old": 159,
"on": 160, "open": 161, "orange": 162, "outside": 163, "owie": 164, "owl": 165, "pajamas": 166, "pen": 167,
"pencil": 168, "penny": 169, "person": 170, "pig": 171, "pizza": 172, "please": 173, "police": 174, "pool": 175,
"potty": 176, "pretend": 177, "pretty": 178, "puppy": 179, "puzzle": 180, "quiet": 181, "radio": 182, "rain": 183,
"read": 184, "red": 185, "refrigerator": 186, "ride": 187, "room": 188, "sad": 189, "same": 190, "say": 191,
"scissors": 192, "see": 193, "shhh": 194, "shirt": 195, "shoe": 196, "shower": 197, "sick": 198, "sleep": 199,
"sleepy": 200, "smile": 201, "snack": 202, "snow": 203, "stairs": 204, "stay": 205, "sticky": 206, "store": 207,
"story": 208, "stuck": 209, "sun": 210, "table": 211, "talk": 212, "taste": 213, "thankyou": 214, "that": 215,
"there": 216, "think": 217, "thirsty": 218, "tiger": 219, "time": 220, "tomorrow": 221, "tongue": 222, "tooth": 223,
"toothbrush": 224, "touch": 225, "toy": 226, "tree": 227, "uncle": 228, "underwear": 229, "up": 230, "vacuum": 231,
"wait": 232, "wake": 233, "water": 234, "wet": 235, "weus": 236, "where": 237, "white": 238, "who": 239, "why": 240,
"will": 241, "wolf": 242, "yellow": 243, "yes": 244, "yesterday": 245, "yourself": 246, "yucky": 247, "zebra": 248,
"zipper": 249
}
inv_index_to_class = {v: k for k, v in index_to_class.items()}
# Landmark indices in original data
LEFT_HAND_IDXS0 = np.arange(468, 489)
RIGHT_HAND_IDXS0 = np.arange(522, 543)
LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510])
RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511])
LANDMARK_IDXS_LEFT_DOMINANT0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, LEFT_POSE_IDXS0))
LANDMARK_IDXS_RIGHT_DOMINANT0 = np.concatenate((LIPS_IDXS0, RIGHT_HAND_IDXS0, RIGHT_POSE_IDXS0))
HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0)
N_COLS = LANDMARK_IDXS_LEFT_DOMINANT0.size
# Landmark indices in processed data
LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LIPS_IDXS0)).squeeze()
LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_HAND_IDXS0)).squeeze()
RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, RIGHT_HAND_IDXS0)).squeeze()
HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, HAND_IDXS0)).squeeze()
POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_POSE_IDXS0)).squeeze()
print(f'# HAND_IDXS: {len(HAND_IDXS)}, N_COLS: {N_COLS}')
LIPS_START = 0
LEFT_HAND_START = LIPS_IDXS.size
RIGHT_HAND_START = LEFT_HAND_START + LEFT_HAND_IDXS.size
POSE_START = RIGHT_HAND_START + RIGHT_HAND_IDXS.size
print(
f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}')
def mediapipe_detection(image, model):
# COLOR CONVERSION BGR 2 RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False # Image is no longer writeable
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
def extract_keypoints(results):
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
) if results.left_hand_landmarks else np.zeros(21 * 3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
) if results.right_hand_landmarks else np.zeros(21 * 3)
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten(
) if results.pose_landmarks else np.zeros(33 * 4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten(
) if results.face_landmarks else np.zeros(468 * 3)
return np.concatenate([lh, rh, pose, face])
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
# Read feed
ret, frame = cap.read()
# Make detections
image, results = mediapipe_detection(frame, holistic)
print(results)
# Function to make predictions using the TensorFlow Lite model
def make_prediction(processed_landmarks):
inputs = np.array(processed_landmarks, dtype=np.float32)
# Set the input tensor for the TFLite model
interpreter.set_tensor(input_details[0]['index'], inputs)
# Invoke the TFLite interpreter to perform inference
interpreter.invoke()
# Get the output tensor of the TFLite model
output_data = interpreter.get_tensor(output_details[0]['index'])
# Find the index of the predicted class
index = np.argmax(output_data)
# Map the index to the corresponding class label using the index_to_class dictionary
prediction = inv_index_to_class[index]
return prediction
class PreprocessLayer(tf.keras.layers.Layer):
def __init__(self):
super(PreprocessLayer, self).__init__()
normalisation_correction = tf.constant([
# Add 0.50 to left hand (original right hand) and substract 0.50 of right hand (original left hand)
[0] * len(LIPS_IDXS) + [0.50] * len(LEFT_HAND_IDXS) + [0.50] * len(POSE_IDXS),
# Y coordinates stay intact
[0] * len(LANDMARK_IDXS_LEFT_DOMINANT0),
# Z coordinates stay intact
[0] * len(LANDMARK_IDXS_LEFT_DOMINANT0),
],
dtype=tf.float32,
)
self.normalisation_correction = tf.transpose(normalisation_correction, [1, 0])
def pad_edge(self, t, repeats, side):
if side == 'LEFT':
return tf.concat((tf.repeat(t[:1], repeats=repeats, axis=0), t), axis=0)
elif side == 'RIGHT':
return tf.concat((t, tf.repeat(t[-1:], repeats=repeats, axis=0)), axis=0)
@tf.function(
input_signature=(tf.TensorSpec(shape=[None, N_ROWS, N_DIMS], dtype=tf.float32),),
)
def call(self, data0):
# Number of Frames in Video
N_FRAMES0 = tf.shape(data0)[0]
# Find dominant hand by comparing summed absolute coordinates
left_hand_sum = tf.math.reduce_sum(
tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1))
right_hand_sum = tf.math.reduce_sum(
tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1))
left_dominant = left_hand_sum >= right_hand_sum
# Count non NaN Hand values in each frame for the dominant hand
if left_dominant:
frames_hands_non_nan_sum = tf.math.reduce_sum(
tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1),
axis=[1, 2],
)
else:
frames_hands_non_nan_sum = tf.math.reduce_sum(
tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1),
axis=[1, 2],
)
# Find frames indices with coordinates of dominant hand
non_empty_frames_idxs = tf.where(frames_hands_non_nan_sum > 0)
non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1)
# Filter frames
data = tf.gather(data0, non_empty_frames_idxs, axis=0)
# Cast Indices in float32 to be compatible with Tensorflow Lite
non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32)
# Normalize to start with 0
non_empty_frames_idxs -= tf.reduce_min(non_empty_frames_idxs)
# Number of Frames in Filtered Video
N_FRAMES = tf.shape(data)[0]
# Gather Relevant Landmark Columns
if left_dominant:
data = tf.gather(data, LANDMARK_IDXS_LEFT_DOMINANT0, axis=1)
else:
data = tf.gather(data, LANDMARK_IDXS_RIGHT_DOMINANT0, axis=1)
data = (
self.normalisation_correction + (
(data - self.normalisation_correction) * tf.where(self.normalisation_correction != 0, -1.0,
1.0))
)
# Video fits in INPUT_SIZE
if N_FRAMES < INPUT_SIZE:
# Pad With -1 to indicate padding
non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, INPUT_SIZE - N_FRAMES]],
constant_values=-1)
# Pad Data With Zeros
data = tf.pad(data, [[0, INPUT_SIZE - N_FRAMES], [0, 0], [0, 0]], constant_values=0)
# Fill NaN Values With 0
data = tf.where(tf.math.is_nan(data), 0.0, data)
return data, non_empty_frames_idxs
# Video needs to be downsampled to INPUT_SIZE
else:
# Repeat
if N_FRAMES < INPUT_SIZE ** 2:
repeats = tf.math.floordiv(INPUT_SIZE * INPUT_SIZE, N_FRAMES0)
data = tf.repeat(data, repeats=repeats, axis=0)
non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0)
# Pad To Multiple Of Input Size
pool_size = tf.math.floordiv(len(data), INPUT_SIZE)
if tf.math.mod(len(data), INPUT_SIZE) > 0:
pool_size += 1
if pool_size == 1:
pad_size = (pool_size * INPUT_SIZE) - len(data)
else:
pad_size = (pool_size * INPUT_SIZE) % len(data)
# Pad Start/End with Start/End value
pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2)
pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2)
if tf.math.mod(pad_size, 2) > 0:
pad_right += 1
# Pad By Concatenating Left/Right Edge Values
data = self.pad_edge(data, pad_left, 'LEFT')
data = self.pad_edge(data, pad_right, 'RIGHT')
# Pad Non Empty Frame Indices
non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT')
non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT')
# Reshape to Mean Pool
data = tf.reshape(data, [INPUT_SIZE, -1, N_COLS, N_DIMS])
non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [INPUT_SIZE, -1])
# Mean Pool
data = tf.experimental.numpy.nanmean(data, axis=1)
non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1)
# Fill NaN Values With 0
data = tf.where(tf.math.is_nan(data), 0.0, data)
return data, non_empty_frames_idxs
preprocess_layer = PreprocessLayer()
def translate_sign_language(image):
# Convert the frame to RGB (Mediapipe expects RGB images)
rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands_tracker:
# Process the frame with Mediapipe Hands
hands_results = hands_tracker.process(rgb_frame)
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose_tracker:
# Process the frame with Mediapipe Pose
pose_results = mp_pose.process(rgb_frame)
# Extract keypoints from the results
hand_pose_keypoints = extract_keypoints(hands_results)
pose_keypoints = extract_keypoints(pose_results)
# Prepare the input data for the TFLite model
left_hand_landmarks = hand_pose_keypoints[:63].reshape(1, -1, 3)
right_hand_landmarks = hand_pose_keypoints[63:126].reshape(1, -1, 3)
pose_landmarks = pose_keypoints[126:].reshape(1, -1, 4)
# Call the PreprocessLayer to preprocess the hand and pose landmark data
preprocessed_left_hand, _ = preprocess_layer(left_hand_landmarks)
preprocessed_right_hand, _ = preprocess_layer(right_hand_landmarks)
preprocessed_pose, _ = preprocess_layer(pose_landmarks)
# Prepare the input data for the TFLite model
input_data = [preprocessed_left_hand, preprocessed_right_hand, preprocessed_pose]
# Perform inference using the loaded sign language model (assuming you have already loaded it)
interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_data[0])
interpreter.set_tensor(interpreter.get_input_details()[1]['index'], input_data[1])
interpreter.set_tensor(interpreter.get_input_details()[2]['index'], input_data[2])
interpreter.invoke()
output = interpreter.get_tensor(interpreter.get_output_details()[0]['index'])
# Make prediction using the processed landmarks
translated_text = make_prediction(output)
# Return the translated text
return translated_text
gr_interface = gr.Interface(fn=translate_sign_language,
inputs="webcam", # Input from webcam
outputs="text", # Output as text
#capture_session=True, # To properly release the webcam after running the interface
live=True, # Show live webcam feed
title="Sign Language Translation",
description="Translate sign language to text using TensorFlow Lite and Mediapipe.")
gr_interface.launch(share=True)
cap.release()
cv2.destroyAllWindows()
video_path = './Test/HAPPY.mp4'
cap = cv2.VideoCapture(video_path)
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
data_list = []
ROWS_PER_FRAME = 543 # Constant number of landmarks per frame
with mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1) as face_mesh, \
mp_hands.Hands(static_image_mode=False, max_num_hands=2) as hands, \
mp_pose.Pose(static_image_mode=False) as pose:
frame_number = 0
while cap.isOpened():
ret, image = cap.read()
if not ret:
break
# Convert the BGR image to RGB for Mediapipe
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Process face landmarks
results_face = face_mesh.process(image_rgb)
if results_face.multi_face_landmarks:
face_landmarks = results_face.multi_face_landmarks[0]
for idx, landmark in enumerate(face_landmarks.landmark):
data_list.append([frame_number, f"{frame_number}-face-{idx}", "face", idx, landmark.x, landmark.y, landmark.z])
# Process hand landmarks
results_hands = hands.process(image_rgb)
if results_hands.multi_hand_landmarks:
for hand_landmarks in results_hands.multi_hand_landmarks:
for idx, landmark in enumerate(hand_landmarks.landmark):
data_list.append([frame_number, f"{frame_number}-right_hand-{idx}", "right-hand", idx, landmark.x, landmark.y, landmark.z])
mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
# Process pose landmarks
results_pose = pose.process(image_rgb)
if results_pose.pose_landmarks:
pose_landmarks = results_pose.pose_landmarks.landmark
for idx, landmark in enumerate(pose_landmarks):
data_list.append([frame_number, f"{frame_number}-pose-{idx}", "pose", idx, landmark.x, landmark.y, landmark.z])
# Pad the landmarks with NaN values if the number of landmarks is less than ROWS_PER_FRAME
while len(data_list) < (frame_number + 1) * ROWS_PER_FRAME:
data_list.append([frame_number, f"{frame_number}-right_hand-{len(data_list) % ROWS_PER_FRAME}", "right-hand", len(data_list) % ROWS_PER_FRAME, np.nan, np.nan, np.nan])
# Draw the landmarks on the frame (optional)
mp_drawing.draw_landmarks(image, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS)
mp_drawing.draw_landmarks(image, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)
# Display the frame (optional)
cv2.imshow('MediaPipe', image)
frame_number += 1
# Press 'q' to quit
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
df = pd.DataFrame(data_list, columns=["frame", "row_id", "type", "landmark_index", "x", "y", "z"])
df.to_parquet("extracted_features.parquet", index=False)
# test_data = pd.read_parquet('./1006440534.parquet')
# test_data_kaggle = pd.read_parquet('1001373962.parquet')
# test_data_kaggle2 = pd.read_parquet('./100015657.parquet')
# test_data_kaggle3 = pd.read_parquet('./1003700302.parquet')
# test_data_kaggle4 = pd.read_parquet('./1007127288.parquet')
test_data_my_own = pd.read_parquet('extracted_features.parquet')
test_data_my_own['frame'] = test_data_my_own['frame'].astype('int16')
test_data_my_own['landmark_index'] = test_data_my_own['landmark_index'].astype('int16')
def load_relevant_data_subset(pq_path, ROWS_PER_FRAME = 543):
data_columns = ['x', 'y', 'z']
data = pd.read_parquet(pq_path, columns=data_columns)
n_frames = int( len(data) / ROWS_PER_FRAME)
print(f"Data: {len(data)} Number of Frames: {n_frames}")
data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
return data.astype(np.float32)
# demo_raw_data = load_relevant_data_subset('./1006440534.parquet')
demo_raw_data = load_relevant_data_subset('./extracted_features.parquet')
# demo_raw_data = load_relevant_data_subset('./1003700302.parquet', test_data_kaggle3['frame'].nunique())
# demo_raw_data = load_relevant_data_subset('./extracted_features.parquet')
ORD2SIGN = {206: 'sticky',
20: 'before',
178: 'pretty',
114: 'hen',
221: 'tomorrow',
230: 'up',
25: 'blow',
236: 'weus',
184: 'read',
191: 'say',
248: 'zebra',
189: 'sad',
62: 'drawer',
5: 'animal',
167: 'pen',
60: 'donkey',
41: 'cheek',
51: 'cowboy',
192: 'scissors',
181: 'quiet',
63: 'drink',
94: 'girl',
200: 'sleepy',
249: 'zipper',
171: 'pig',
13: 'bad',
9: 'arm',
61: 'down',
123: 'if',
240: 'why',
166: 'pajamas',
203: 'snow',
137: 'loud',
195: 'shirt',
31: 'brown',
146: 'moon',
23: 'bird',
210: 'sun',
76: 'fast',
1: 'after',
54: 'cute',
77: 'feet',
4: 'alligator',
87: 'food',
113: 'hello',
93: 'giraffe',
180: 'puzzle',
211: 'table',
132: 'like',
153: 'no',
122: 'icecream',
67: 'duck',
69: 'elephant',
141: 'many',
18: 'bedroom',
205: 'stay',
74: 'fall',
246: 'yourself',
183: 'rain',
135: 'listen',
44: 'chocolate',
124: 'into',
11: 'awake',
40: 'chair',
7: 'any',
155: 'nose',
118: 'home',
161: 'open',
58: 'dog',
50: 'cow',
241: 'will',
149: 'mouth',
177: 'pretend',
172: 'pizza',
75: 'farm',
163: 'outside',
234: 'water',
81: 'finish',
159: 'old',
121: 'hungry',
112: 'helicopter',
130: 'lamp',
222: 'tongue',
194: 'shhh',
6: 'another',
103: 'gum',
214: 'thankyou',
128: 'kiss',
101: 'grass',
64: 'drop',
157: 'now',
233: 'wake',
116: 'hide',
201: 'smile',
226: 'toy',
216: 'there',
147: 'morning',
10: 'aunt',
102: 'green',
36: 'car',
213: 'taste',
39: 'cereal',
207: 'store',
66: 'dryer',
162: 'orange',
218: 'thirsty',
83: 'first',
45: 'clean',
3: 'all',
198: 'sick',
129: 'kitty',
96: 'glasswindow',
202: 'snack',
150: 'nap',
53: 'cut',
73: 'face',
99: 'grandma',
209: 'stuck',
91: 'garbage',
115: 'hesheit',
95: 'give',
104: 'hair',
125: 'jacket',
165: 'owl',
82: 'fireman',
227: 'tree',
16: 'because',
17: 'bed',
30: 'brother',
143: 'minemy',
127: 'jump',
245: 'yesterday',
145: 'mom',
111: 'hear',
174: 'police',
223: 'tooth',
212: 'talk',
224: 'toothbrush',
164: 'owie',
47: 'closet',
169: 'penny',
24: 'black',
85: 'flag',
238: 'white',
134: 'lips',
231: 'vacuum',
8: 'apple',
105: 'happy',
151: 'napkin',
92: 'gift',
70: 'empty',
46: 'close',
52: 'cry',
138: 'mad',
49: 'clown',
204: 'stairs',
42: 'child',
173: 'please',
65: 'dry',
72: 'eye',
235: 'wet',
32: 'bug',
109: 'haveto',
228: 'uncle',
199: 'sleep',
176: 'potty',
29: 'boy',
136: 'look',
107: 'hate',
71: 'every',
12: 'backyard',
22: 'better',
84: 'fish',
56: 'dance',
139: 'make',
98: 'goose',
38: 'cat',
232: 'wait',
14: 'balloon',
247: 'yucky',
2: 'airplane',
88: 'for',
126: 'jeans',
154: 'noisy',
142: 'milk',
239: 'who',
90: 'frog',
35: 'can',
215: 'that',
117: 'high',
244: 'yes',
196: 'shoe',
108: 'have',
48: 'cloud',
170: 'person',
187: 'ride',
34: 'callonphone',
37: 'carrot',
100: 'grandpa',
120: 'hot',
131: 'later',
229: 'underwear',
0: 'TV',
140: 'man',
217: 'think',
220: 'time',
80: 'finger',
86: 'flower',
15: 'bath',
28: 'book',
193: 'see',
208: 'story',
26: 'blue',
78: 'find',
148: 'mouse',
79: 'fine',
179: 'puppy',
55: 'dad',
21: 'beside',
225: 'touch',
89: 'frenchfries',
188: 'room',
19: 'bee',
27: 'boat',
156: 'not',
59: 'doll',
97: 'go',
190: 'same',
144: 'mitten',
160: 'on',
57: 'dirty',
182: 'radio',
197: 'shower',
186: 'refrigerator',
158: 'nuts',
175: 'pool',
242: 'wolf',
243: 'yellow',
110: 'head',
237: 'where',
33: 'bye',
133: 'lion',
152: 'night',
106: 'hat',
43: 'chin',
68: 'ear',
168: 'pencil',
119: 'horse',
219: 'tiger',
185: 'red'}
import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter("./model.tflite")
found_signatures = list(interpreter.get_signature_list().keys())
prediction_fn = interpreter.get_signature_runner("serving_default")
prediction_fn(inputs=demo_raw_data)
output = prediction_fn(inputs=demo_raw_data)
sign = output['outputs'].argmax()
print("PRED : ", ORD2SIGN.get(sign), f'[{sign}]')