Spaces:
Runtime error
Runtime error
import os | |
import time | |
import cv2 | |
import gradio as gr | |
import mediapipe as mp | |
import numpy as np | |
from matplotlib import pyplot as plt | |
mp_holistic = mp.solutions.holistic | |
# Import TensorFlow | |
import tensorflow as tf | |
# Initialize MediaPipe solutions | |
mp_hands = mp.solutions.hands | |
mp_pose = mp.solutions.pose | |
mp_face_mesh = mp.solutions.face_mesh | |
hands = mp_hands.Hands() | |
pose = mp_pose.Pose() | |
face_mesh = mp_face_mesh.FaceMesh() | |
# Get the absolute path to the directory containing app.py | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
# Define the filename of the TFLite model | |
model_filename = "model.tflite" | |
# Construct the full path to the TFLite model file | |
model_path = os.path.join(current_dir, model_filename) | |
# Load the TFLite model using the interpreter | |
interpreter = tf.lite.Interpreter(model_path=model_path) | |
interpreter.allocate_tensors() | |
# Get input and output details | |
input_details = interpreter.get_input_details() | |
output_details = interpreter.get_output_details() | |
N_ROWS = 543 | |
N_DIMS = 3 | |
DIM_NAMES = ['x', 'y', 'z'] | |
SEED = 42 | |
NUM_CLASSES = 250 | |
INPUT_SIZE = 64 | |
BATCH_ALL_SIGNS_N = 4 | |
BATCH_SIZE = 256 | |
N_EPOCHS = 100 | |
LR_MAX = 1e-3 | |
N_WARMUP_EPOCHS = 0 | |
WD_RATIO = 0.05 | |
MASK_VAL = 4237 | |
USE_TYPES = ['left_hand', 'pose', 'right_hand'] | |
START_IDX = 468 | |
LIPS_IDXS0 = np.array([ | |
61, 185, 40, 39, 37, 0, 267, 269, 270, 409, | |
291, 146, 91, 181, 84, 17, 314, 405, 321, 375, | |
78, 191, 80, 81, 82, 13, 312, 311, 310, 415, | |
95, 88, 178, 87, 14, 317, 402, 318, 324, 308, | |
]) | |
index_to_class = { | |
"TV": 0, "after": 1, "airplane": 2, "all": 3, "alligator": 4, "animal": 5, "another": 6, "any": 7, "apple": 8, | |
"arm": 9, "aunt": 10, "awake": 11, "backyard": 12, "bad": 13, "balloon": 14, "bath": 15, "because": 16, "bed": 17, | |
"bedroom": 18, "bee": 19, "before": 20, "beside": 21, "better": 22, "bird": 23, "black": 24, "blow": 25, "blue": 26, | |
"boat": 27, "book": 28, "boy": 29, "brother": 30, "brown": 31, "bug": 32, "bye": 33, "callonphone": 34, "can": 35, | |
"car": 36, "carrot": 37, "cat": 38, "cereal": 39, "chair": 40, "cheek": 41, "child": 42, "chin": 43, | |
"chocolate": 44, "clean": 45, "close": 46, "closet": 47, "cloud": 48, "clown": 49, "cow": 50, "cowboy": 51, | |
"cry": 52, "cut": 53, "cute": 54, "dad": 55, "dance": 56, "dirty": 57, "dog": 58, "doll": 59, "donkey": 60, | |
"down": 61, "drawer": 62, "drink": 63, "drop": 64, "dry": 65, "dryer": 66, "duck": 67, "ear": 68, "elephant": 69, | |
"empty": 70, "every": 71, "eye": 72, "face": 73, "fall": 74, "farm": 75, "fast": 76, "feet": 77, "find": 78, | |
"fine": 79, "finger": 80, "finish": 81, "fireman": 82, "first": 83, "fish": 84, "flag": 85, "flower": 86, | |
"food": 87, "for": 88, "frenchfries": 89, "frog": 90, "garbage": 91, "gift": 92, "giraffe": 93, "girl": 94, | |
"give": 95, "glasswindow": 96, "go": 97, "goose": 98, "grandma": 99, "grandpa": 100, "grass": 101, "green": 102, | |
"gum": 103, "hair": 104, "happy": 105, "hat": 106, "hate": 107, "have": 108, "haveto": 109, "head": 110, | |
"hear": 111, "helicopter": 112, "hello": 113, "hen": 114, "hesheit": 115, "hide": 116, "high": 117, "home": 118, | |
"horse": 119, "hot": 120, "hungry": 121, "icecream": 122, "if": 123, "into": 124, "jacket": 125, "jeans": 126, | |
"jump": 127, "kiss": 128, "kitty": 129, "lamp": 130, "later": 131, "like": 132, "lion": 133, "lips": 134, | |
"listen": 135, "look": 136, "loud": 137, "mad": 138, "make": 139, "man": 140, "many": 141, "milk": 142, | |
"minemy": 143, "mitten": 144, "mom": 145, "moon": 146, "morning": 147, "mouse": 148, "mouth": 149, "nap": 150, | |
"napkin": 151, "night": 152, "no": 153, "noisy": 154, "nose": 155, "not": 156, "now": 157, "nuts": 158, "old": 159, | |
"on": 160, "open": 161, "orange": 162, "outside": 163, "owie": 164, "owl": 165, "pajamas": 166, "pen": 167, | |
"pencil": 168, "penny": 169, "person": 170, "pig": 171, "pizza": 172, "please": 173, "police": 174, "pool": 175, | |
"potty": 176, "pretend": 177, "pretty": 178, "puppy": 179, "puzzle": 180, "quiet": 181, "radio": 182, "rain": 183, | |
"read": 184, "red": 185, "refrigerator": 186, "ride": 187, "room": 188, "sad": 189, "same": 190, "say": 191, | |
"scissors": 192, "see": 193, "shhh": 194, "shirt": 195, "shoe": 196, "shower": 197, "sick": 198, "sleep": 199, | |
"sleepy": 200, "smile": 201, "snack": 202, "snow": 203, "stairs": 204, "stay": 205, "sticky": 206, "store": 207, | |
"story": 208, "stuck": 209, "sun": 210, "table": 211, "talk": 212, "taste": 213, "thankyou": 214, "that": 215, | |
"there": 216, "think": 217, "thirsty": 218, "tiger": 219, "time": 220, "tomorrow": 221, "tongue": 222, "tooth": 223, | |
"toothbrush": 224, "touch": 225, "toy": 226, "tree": 227, "uncle": 228, "underwear": 229, "up": 230, "vacuum": 231, | |
"wait": 232, "wake": 233, "water": 234, "wet": 235, "weus": 236, "where": 237, "white": 238, "who": 239, "why": 240, | |
"will": 241, "wolf": 242, "yellow": 243, "yes": 244, "yesterday": 245, "yourself": 246, "yucky": 247, "zebra": 248, | |
"zipper": 249 | |
} | |
inv_index_to_class = {v: k for k, v in index_to_class.items()} | |
# Landmark indices in original data | |
LEFT_HAND_IDXS0 = np.arange(468, 489) | |
RIGHT_HAND_IDXS0 = np.arange(522, 543) | |
LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510]) | |
RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511]) | |
LANDMARK_IDXS_LEFT_DOMINANT0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, LEFT_POSE_IDXS0)) | |
LANDMARK_IDXS_RIGHT_DOMINANT0 = np.concatenate((LIPS_IDXS0, RIGHT_HAND_IDXS0, RIGHT_POSE_IDXS0)) | |
HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0) | |
N_COLS = LANDMARK_IDXS_LEFT_DOMINANT0.size | |
# Landmark indices in processed data | |
LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LIPS_IDXS0)).squeeze() | |
LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_HAND_IDXS0)).squeeze() | |
RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, RIGHT_HAND_IDXS0)).squeeze() | |
HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, HAND_IDXS0)).squeeze() | |
POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_POSE_IDXS0)).squeeze() | |
print(f'# HAND_IDXS: {len(HAND_IDXS)}, N_COLS: {N_COLS}') | |
LIPS_START = 0 | |
LEFT_HAND_START = LIPS_IDXS.size | |
RIGHT_HAND_START = LEFT_HAND_START + LEFT_HAND_IDXS.size | |
POSE_START = RIGHT_HAND_START + RIGHT_HAND_IDXS.size | |
print( | |
f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}') | |
def mediapipe_detection(image, model): | |
# COLOR CONVERSION BGR 2 RGB | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
image.flags.writeable = False # Image is no longer writeable | |
results = model.process(image) # Make prediction | |
image.flags.writeable = True # Image is now writeable | |
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR | |
return image, results | |
def extract_keypoints(results): | |
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten( | |
) if results.left_hand_landmarks else np.zeros(21 * 3) | |
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten( | |
) if results.right_hand_landmarks else np.zeros(21 * 3) | |
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten( | |
) if results.pose_landmarks else np.zeros(33 * 4) | |
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten( | |
) if results.face_landmarks else np.zeros(468 * 3) | |
return np.concatenate([lh, rh, pose, face]) | |
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) | |
# Set mediapipe model | |
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: | |
while cap.isOpened(): | |
# Read feed | |
ret, frame = cap.read() | |
# Make detections | |
image, results = mediapipe_detection(frame, holistic) | |
print(results) | |
# Function to make predictions using the TensorFlow Lite model | |
def make_prediction(processed_landmarks): | |
inputs = np.array(processed_landmarks, dtype=np.float32) | |
# Set the input tensor for the TFLite model | |
interpreter.set_tensor(input_details[0]['index'], inputs) | |
# Invoke the TFLite interpreter to perform inference | |
interpreter.invoke() | |
# Get the output tensor of the TFLite model | |
output_data = interpreter.get_tensor(output_details[0]['index']) | |
# Find the index of the predicted class | |
index = np.argmax(output_data) | |
# Map the index to the corresponding class label using the index_to_class dictionary | |
prediction = inv_index_to_class[index] | |
return prediction | |
class PreprocessLayer(tf.keras.layers.Layer): | |
def __init__(self): | |
super(PreprocessLayer, self).__init__() | |
normalisation_correction = tf.constant([ | |
# Add 0.50 to left hand (original right hand) and substract 0.50 of right hand (original left hand) | |
[0] * len(LIPS_IDXS) + [0.50] * len(LEFT_HAND_IDXS) + [0.50] * len(POSE_IDXS), | |
# Y coordinates stay intact | |
[0] * len(LANDMARK_IDXS_LEFT_DOMINANT0), | |
# Z coordinates stay intact | |
[0] * len(LANDMARK_IDXS_LEFT_DOMINANT0), | |
], | |
dtype=tf.float32, | |
) | |
self.normalisation_correction = tf.transpose(normalisation_correction, [1, 0]) | |
def pad_edge(self, t, repeats, side): | |
if side == 'LEFT': | |
return tf.concat((tf.repeat(t[:1], repeats=repeats, axis=0), t), axis=0) | |
elif side == 'RIGHT': | |
return tf.concat((t, tf.repeat(t[-1:], repeats=repeats, axis=0)), axis=0) | |
def call(self, data0): | |
# Number of Frames in Video | |
N_FRAMES0 = tf.shape(data0)[0] | |
# Find dominant hand by comparing summed absolute coordinates | |
left_hand_sum = tf.math.reduce_sum( | |
tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1)) | |
right_hand_sum = tf.math.reduce_sum( | |
tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1)) | |
left_dominant = left_hand_sum >= right_hand_sum | |
# Count non NaN Hand values in each frame for the dominant hand | |
if left_dominant: | |
frames_hands_non_nan_sum = tf.math.reduce_sum( | |
tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1), | |
axis=[1, 2], | |
) | |
else: | |
frames_hands_non_nan_sum = tf.math.reduce_sum( | |
tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1), | |
axis=[1, 2], | |
) | |
# Find frames indices with coordinates of dominant hand | |
non_empty_frames_idxs = tf.where(frames_hands_non_nan_sum > 0) | |
non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1) | |
# Filter frames | |
data = tf.gather(data0, non_empty_frames_idxs, axis=0) | |
# Cast Indices in float32 to be compatible with Tensorflow Lite | |
non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32) | |
# Normalize to start with 0 | |
non_empty_frames_idxs -= tf.reduce_min(non_empty_frames_idxs) | |
# Number of Frames in Filtered Video | |
N_FRAMES = tf.shape(data)[0] | |
# Gather Relevant Landmark Columns | |
if left_dominant: | |
data = tf.gather(data, LANDMARK_IDXS_LEFT_DOMINANT0, axis=1) | |
else: | |
data = tf.gather(data, LANDMARK_IDXS_RIGHT_DOMINANT0, axis=1) | |
data = ( | |
self.normalisation_correction + ( | |
(data - self.normalisation_correction) * tf.where(self.normalisation_correction != 0, -1.0, | |
1.0)) | |
) | |
# Video fits in INPUT_SIZE | |
if N_FRAMES < INPUT_SIZE: | |
# Pad With -1 to indicate padding | |
non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, INPUT_SIZE - N_FRAMES]], | |
constant_values=-1) | |
# Pad Data With Zeros | |
data = tf.pad(data, [[0, INPUT_SIZE - N_FRAMES], [0, 0], [0, 0]], constant_values=0) | |
# Fill NaN Values With 0 | |
data = tf.where(tf.math.is_nan(data), 0.0, data) | |
return data, non_empty_frames_idxs | |
# Video needs to be downsampled to INPUT_SIZE | |
else: | |
# Repeat | |
if N_FRAMES < INPUT_SIZE ** 2: | |
repeats = tf.math.floordiv(INPUT_SIZE * INPUT_SIZE, N_FRAMES0) | |
data = tf.repeat(data, repeats=repeats, axis=0) | |
non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0) | |
# Pad To Multiple Of Input Size | |
pool_size = tf.math.floordiv(len(data), INPUT_SIZE) | |
if tf.math.mod(len(data), INPUT_SIZE) > 0: | |
pool_size += 1 | |
if pool_size == 1: | |
pad_size = (pool_size * INPUT_SIZE) - len(data) | |
else: | |
pad_size = (pool_size * INPUT_SIZE) % len(data) | |
# Pad Start/End with Start/End value | |
pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2) | |
pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2) | |
if tf.math.mod(pad_size, 2) > 0: | |
pad_right += 1 | |
# Pad By Concatenating Left/Right Edge Values | |
data = self.pad_edge(data, pad_left, 'LEFT') | |
data = self.pad_edge(data, pad_right, 'RIGHT') | |
# Pad Non Empty Frame Indices | |
non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT') | |
non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT') | |
# Reshape to Mean Pool | |
data = tf.reshape(data, [INPUT_SIZE, -1, N_COLS, N_DIMS]) | |
non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [INPUT_SIZE, -1]) | |
# Mean Pool | |
data = tf.experimental.numpy.nanmean(data, axis=1) | |
non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1) | |
# Fill NaN Values With 0 | |
data = tf.where(tf.math.is_nan(data), 0.0, data) | |
return data, non_empty_frames_idxs | |
preprocess_layer = PreprocessLayer() | |
def translate_sign_language(image): | |
# Convert the frame to RGB (Mediapipe expects RGB images) | |
rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands_tracker: | |
# Process the frame with Mediapipe Hands | |
hands_results = hands_tracker.process(rgb_frame) | |
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose_tracker: | |
# Process the frame with Mediapipe Pose | |
pose_results = mp_pose.process(rgb_frame) | |
# Extract keypoints from the results | |
hand_pose_keypoints = extract_keypoints(hands_results) | |
pose_keypoints = extract_keypoints(pose_results) | |
# Prepare the input data for the TFLite model | |
left_hand_landmarks = hand_pose_keypoints[:63].reshape(1, -1, 3) | |
right_hand_landmarks = hand_pose_keypoints[63:126].reshape(1, -1, 3) | |
pose_landmarks = pose_keypoints[126:].reshape(1, -1, 4) | |
# Call the PreprocessLayer to preprocess the hand and pose landmark data | |
preprocessed_left_hand, _ = preprocess_layer(left_hand_landmarks) | |
preprocessed_right_hand, _ = preprocess_layer(right_hand_landmarks) | |
preprocessed_pose, _ = preprocess_layer(pose_landmarks) | |
# Prepare the input data for the TFLite model | |
input_data = [preprocessed_left_hand, preprocessed_right_hand, preprocessed_pose] | |
# Perform inference using the loaded sign language model (assuming you have already loaded it) | |
interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_data[0]) | |
interpreter.set_tensor(interpreter.get_input_details()[1]['index'], input_data[1]) | |
interpreter.set_tensor(interpreter.get_input_details()[2]['index'], input_data[2]) | |
interpreter.invoke() | |
output = interpreter.get_tensor(interpreter.get_output_details()[0]['index']) | |
# Make prediction using the processed landmarks | |
translated_text = make_prediction(output) | |
# Return the translated text | |
return translated_text | |
gr_interface = gr.Interface(fn=translate_sign_language, | |
inputs="webcam", # Input from webcam | |
outputs="text", # Output as text | |
#capture_session=True, # To properly release the webcam after running the interface | |
live=True, # Show live webcam feed | |
title="Sign Language Translation", | |
description="Translate sign language to text using TensorFlow Lite and Mediapipe.") | |
gr_interface.launch(share=True) | |
cap.release() | |
cv2.destroyAllWindows() | |
video_path = './Test/HAPPY.mp4' | |
cap = cv2.VideoCapture(video_path) | |
mp_drawing = mp.solutions.drawing_utils | |
mp_face_mesh = mp.solutions.face_mesh | |
mp_hands = mp.solutions.hands | |
mp_pose = mp.solutions.pose | |
data_list = [] | |
ROWS_PER_FRAME = 543 # Constant number of landmarks per frame | |
with mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1) as face_mesh, \ | |
mp_hands.Hands(static_image_mode=False, max_num_hands=2) as hands, \ | |
mp_pose.Pose(static_image_mode=False) as pose: | |
frame_number = 0 | |
while cap.isOpened(): | |
ret, image = cap.read() | |
if not ret: | |
break | |
# Convert the BGR image to RGB for Mediapipe | |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# Process face landmarks | |
results_face = face_mesh.process(image_rgb) | |
if results_face.multi_face_landmarks: | |
face_landmarks = results_face.multi_face_landmarks[0] | |
for idx, landmark in enumerate(face_landmarks.landmark): | |
data_list.append([frame_number, f"{frame_number}-face-{idx}", "face", idx, landmark.x, landmark.y, landmark.z]) | |
# Process hand landmarks | |
results_hands = hands.process(image_rgb) | |
if results_hands.multi_hand_landmarks: | |
for hand_landmarks in results_hands.multi_hand_landmarks: | |
for idx, landmark in enumerate(hand_landmarks.landmark): | |
data_list.append([frame_number, f"{frame_number}-right_hand-{idx}", "right-hand", idx, landmark.x, landmark.y, landmark.z]) | |
mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) | |
# Process pose landmarks | |
results_pose = pose.process(image_rgb) | |
if results_pose.pose_landmarks: | |
pose_landmarks = results_pose.pose_landmarks.landmark | |
for idx, landmark in enumerate(pose_landmarks): | |
data_list.append([frame_number, f"{frame_number}-pose-{idx}", "pose", idx, landmark.x, landmark.y, landmark.z]) | |
# Pad the landmarks with NaN values if the number of landmarks is less than ROWS_PER_FRAME | |
while len(data_list) < (frame_number + 1) * ROWS_PER_FRAME: | |
data_list.append([frame_number, f"{frame_number}-right_hand-{len(data_list) % ROWS_PER_FRAME}", "right-hand", len(data_list) % ROWS_PER_FRAME, np.nan, np.nan, np.nan]) | |
# Draw the landmarks on the frame (optional) | |
mp_drawing.draw_landmarks(image, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS) | |
mp_drawing.draw_landmarks(image, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS) | |
# Display the frame (optional) | |
cv2.imshow('MediaPipe', image) | |
frame_number += 1 | |
# Press 'q' to quit | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
break | |
cap.release() | |
cv2.destroyAllWindows() | |
df = pd.DataFrame(data_list, columns=["frame", "row_id", "type", "landmark_index", "x", "y", "z"]) | |
df.to_parquet("extracted_features.parquet", index=False) | |
# test_data = pd.read_parquet('./1006440534.parquet') | |
# test_data_kaggle = pd.read_parquet('1001373962.parquet') | |
# test_data_kaggle2 = pd.read_parquet('./100015657.parquet') | |
# test_data_kaggle3 = pd.read_parquet('./1003700302.parquet') | |
# test_data_kaggle4 = pd.read_parquet('./1007127288.parquet') | |
test_data_my_own = pd.read_parquet('extracted_features.parquet') | |
test_data_my_own['frame'] = test_data_my_own['frame'].astype('int16') | |
test_data_my_own['landmark_index'] = test_data_my_own['landmark_index'].astype('int16') | |
def load_relevant_data_subset(pq_path, ROWS_PER_FRAME = 543): | |
data_columns = ['x', 'y', 'z'] | |
data = pd.read_parquet(pq_path, columns=data_columns) | |
n_frames = int( len(data) / ROWS_PER_FRAME) | |
print(f"Data: {len(data)} Number of Frames: {n_frames}") | |
data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns)) | |
return data.astype(np.float32) | |
# demo_raw_data = load_relevant_data_subset('./1006440534.parquet') | |
demo_raw_data = load_relevant_data_subset('./extracted_features.parquet') | |
# demo_raw_data = load_relevant_data_subset('./1003700302.parquet', test_data_kaggle3['frame'].nunique()) | |
# demo_raw_data = load_relevant_data_subset('./extracted_features.parquet') | |
ORD2SIGN = {206: 'sticky', | |
20: 'before', | |
178: 'pretty', | |
114: 'hen', | |
221: 'tomorrow', | |
230: 'up', | |
25: 'blow', | |
236: 'weus', | |
184: 'read', | |
191: 'say', | |
248: 'zebra', | |
189: 'sad', | |
62: 'drawer', | |
5: 'animal', | |
167: 'pen', | |
60: 'donkey', | |
41: 'cheek', | |
51: 'cowboy', | |
192: 'scissors', | |
181: 'quiet', | |
63: 'drink', | |
94: 'girl', | |
200: 'sleepy', | |
249: 'zipper', | |
171: 'pig', | |
13: 'bad', | |
9: 'arm', | |
61: 'down', | |
123: 'if', | |
240: 'why', | |
166: 'pajamas', | |
203: 'snow', | |
137: 'loud', | |
195: 'shirt', | |
31: 'brown', | |
146: 'moon', | |
23: 'bird', | |
210: 'sun', | |
76: 'fast', | |
1: 'after', | |
54: 'cute', | |
77: 'feet', | |
4: 'alligator', | |
87: 'food', | |
113: 'hello', | |
93: 'giraffe', | |
180: 'puzzle', | |
211: 'table', | |
132: 'like', | |
153: 'no', | |
122: 'icecream', | |
67: 'duck', | |
69: 'elephant', | |
141: 'many', | |
18: 'bedroom', | |
205: 'stay', | |
74: 'fall', | |
246: 'yourself', | |
183: 'rain', | |
135: 'listen', | |
44: 'chocolate', | |
124: 'into', | |
11: 'awake', | |
40: 'chair', | |
7: 'any', | |
155: 'nose', | |
118: 'home', | |
161: 'open', | |
58: 'dog', | |
50: 'cow', | |
241: 'will', | |
149: 'mouth', | |
177: 'pretend', | |
172: 'pizza', | |
75: 'farm', | |
163: 'outside', | |
234: 'water', | |
81: 'finish', | |
159: 'old', | |
121: 'hungry', | |
112: 'helicopter', | |
130: 'lamp', | |
222: 'tongue', | |
194: 'shhh', | |
6: 'another', | |
103: 'gum', | |
214: 'thankyou', | |
128: 'kiss', | |
101: 'grass', | |
64: 'drop', | |
157: 'now', | |
233: 'wake', | |
116: 'hide', | |
201: 'smile', | |
226: 'toy', | |
216: 'there', | |
147: 'morning', | |
10: 'aunt', | |
102: 'green', | |
36: 'car', | |
213: 'taste', | |
39: 'cereal', | |
207: 'store', | |
66: 'dryer', | |
162: 'orange', | |
218: 'thirsty', | |
83: 'first', | |
45: 'clean', | |
3: 'all', | |
198: 'sick', | |
129: 'kitty', | |
96: 'glasswindow', | |
202: 'snack', | |
150: 'nap', | |
53: 'cut', | |
73: 'face', | |
99: 'grandma', | |
209: 'stuck', | |
91: 'garbage', | |
115: 'hesheit', | |
95: 'give', | |
104: 'hair', | |
125: 'jacket', | |
165: 'owl', | |
82: 'fireman', | |
227: 'tree', | |
16: 'because', | |
17: 'bed', | |
30: 'brother', | |
143: 'minemy', | |
127: 'jump', | |
245: 'yesterday', | |
145: 'mom', | |
111: 'hear', | |
174: 'police', | |
223: 'tooth', | |
212: 'talk', | |
224: 'toothbrush', | |
164: 'owie', | |
47: 'closet', | |
169: 'penny', | |
24: 'black', | |
85: 'flag', | |
238: 'white', | |
134: 'lips', | |
231: 'vacuum', | |
8: 'apple', | |
105: 'happy', | |
151: 'napkin', | |
92: 'gift', | |
70: 'empty', | |
46: 'close', | |
52: 'cry', | |
138: 'mad', | |
49: 'clown', | |
204: 'stairs', | |
42: 'child', | |
173: 'please', | |
65: 'dry', | |
72: 'eye', | |
235: 'wet', | |
32: 'bug', | |
109: 'haveto', | |
228: 'uncle', | |
199: 'sleep', | |
176: 'potty', | |
29: 'boy', | |
136: 'look', | |
107: 'hate', | |
71: 'every', | |
12: 'backyard', | |
22: 'better', | |
84: 'fish', | |
56: 'dance', | |
139: 'make', | |
98: 'goose', | |
38: 'cat', | |
232: 'wait', | |
14: 'balloon', | |
247: 'yucky', | |
2: 'airplane', | |
88: 'for', | |
126: 'jeans', | |
154: 'noisy', | |
142: 'milk', | |
239: 'who', | |
90: 'frog', | |
35: 'can', | |
215: 'that', | |
117: 'high', | |
244: 'yes', | |
196: 'shoe', | |
108: 'have', | |
48: 'cloud', | |
170: 'person', | |
187: 'ride', | |
34: 'callonphone', | |
37: 'carrot', | |
100: 'grandpa', | |
120: 'hot', | |
131: 'later', | |
229: 'underwear', | |
0: 'TV', | |
140: 'man', | |
217: 'think', | |
220: 'time', | |
80: 'finger', | |
86: 'flower', | |
15: 'bath', | |
28: 'book', | |
193: 'see', | |
208: 'story', | |
26: 'blue', | |
78: 'find', | |
148: 'mouse', | |
79: 'fine', | |
179: 'puppy', | |
55: 'dad', | |
21: 'beside', | |
225: 'touch', | |
89: 'frenchfries', | |
188: 'room', | |
19: 'bee', | |
27: 'boat', | |
156: 'not', | |
59: 'doll', | |
97: 'go', | |
190: 'same', | |
144: 'mitten', | |
160: 'on', | |
57: 'dirty', | |
182: 'radio', | |
197: 'shower', | |
186: 'refrigerator', | |
158: 'nuts', | |
175: 'pool', | |
242: 'wolf', | |
243: 'yellow', | |
110: 'head', | |
237: 'where', | |
33: 'bye', | |
133: 'lion', | |
152: 'night', | |
106: 'hat', | |
43: 'chin', | |
68: 'ear', | |
168: 'pencil', | |
119: 'horse', | |
219: 'tiger', | |
185: 'red'} | |
import tflite_runtime.interpreter as tflite | |
interpreter = tflite.Interpreter("./model.tflite") | |
found_signatures = list(interpreter.get_signature_list().keys()) | |
prediction_fn = interpreter.get_signature_runner("serving_default") | |
prediction_fn(inputs=demo_raw_data) | |
output = prediction_fn(inputs=demo_raw_data) | |
sign = output['outputs'].argmax() | |
print("PRED : ", ORD2SIGN.get(sign), f'[{sign}]') |