Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| import cv2 | |
| import gradio as gr | |
| import mediapipe as mp | |
| import numpy as np | |
| from matplotlib import pyplot as plt | |
| mp_holistic = mp.solutions.holistic | |
| # Import TensorFlow | |
| import tensorflow as tf | |
| # Initialize MediaPipe solutions | |
| mp_hands = mp.solutions.hands | |
| mp_pose = mp.solutions.pose | |
| mp_face_mesh = mp.solutions.face_mesh | |
| hands = mp_hands.Hands() | |
| pose = mp_pose.Pose() | |
| face_mesh = mp_face_mesh.FaceMesh() | |
| # Get the absolute path to the directory containing app.py | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # Define the filename of the TFLite model | |
| model_filename = "model.tflite" | |
| # Construct the full path to the TFLite model file | |
| model_path = os.path.join(current_dir, model_filename) | |
| # Load the TFLite model using the interpreter | |
| interpreter = tf.lite.Interpreter(model_path=model_path) | |
| interpreter.allocate_tensors() | |
| # Get input and output details | |
| input_details = interpreter.get_input_details() | |
| output_details = interpreter.get_output_details() | |
| N_ROWS = 543 | |
| N_DIMS = 3 | |
| DIM_NAMES = ['x', 'y', 'z'] | |
| SEED = 42 | |
| NUM_CLASSES = 250 | |
| INPUT_SIZE = 64 | |
| BATCH_ALL_SIGNS_N = 4 | |
| BATCH_SIZE = 256 | |
| N_EPOCHS = 100 | |
| LR_MAX = 1e-3 | |
| N_WARMUP_EPOCHS = 0 | |
| WD_RATIO = 0.05 | |
| MASK_VAL = 4237 | |
| USE_TYPES = ['left_hand', 'pose', 'right_hand'] | |
| START_IDX = 468 | |
| LIPS_IDXS0 = np.array([ | |
| 61, 185, 40, 39, 37, 0, 267, 269, 270, 409, | |
| 291, 146, 91, 181, 84, 17, 314, 405, 321, 375, | |
| 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, | |
| 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, | |
| ]) | |
| index_to_class = { | |
| "TV": 0, "after": 1, "airplane": 2, "all": 3, "alligator": 4, "animal": 5, "another": 6, "any": 7, "apple": 8, | |
| "arm": 9, "aunt": 10, "awake": 11, "backyard": 12, "bad": 13, "balloon": 14, "bath": 15, "because": 16, "bed": 17, | |
| "bedroom": 18, "bee": 19, "before": 20, "beside": 21, "better": 22, "bird": 23, "black": 24, "blow": 25, "blue": 26, | |
| "boat": 27, "book": 28, "boy": 29, "brother": 30, "brown": 31, "bug": 32, "bye": 33, "callonphone": 34, "can": 35, | |
| "car": 36, "carrot": 37, "cat": 38, "cereal": 39, "chair": 40, "cheek": 41, "child": 42, "chin": 43, | |
| "chocolate": 44, "clean": 45, "close": 46, "closet": 47, "cloud": 48, "clown": 49, "cow": 50, "cowboy": 51, | |
| "cry": 52, "cut": 53, "cute": 54, "dad": 55, "dance": 56, "dirty": 57, "dog": 58, "doll": 59, "donkey": 60, | |
| "down": 61, "drawer": 62, "drink": 63, "drop": 64, "dry": 65, "dryer": 66, "duck": 67, "ear": 68, "elephant": 69, | |
| "empty": 70, "every": 71, "eye": 72, "face": 73, "fall": 74, "farm": 75, "fast": 76, "feet": 77, "find": 78, | |
| "fine": 79, "finger": 80, "finish": 81, "fireman": 82, "first": 83, "fish": 84, "flag": 85, "flower": 86, | |
| "food": 87, "for": 88, "frenchfries": 89, "frog": 90, "garbage": 91, "gift": 92, "giraffe": 93, "girl": 94, | |
| "give": 95, "glasswindow": 96, "go": 97, "goose": 98, "grandma": 99, "grandpa": 100, "grass": 101, "green": 102, | |
| "gum": 103, "hair": 104, "happy": 105, "hat": 106, "hate": 107, "have": 108, "haveto": 109, "head": 110, | |
| "hear": 111, "helicopter": 112, "hello": 113, "hen": 114, "hesheit": 115, "hide": 116, "high": 117, "home": 118, | |
| "horse": 119, "hot": 120, "hungry": 121, "icecream": 122, "if": 123, "into": 124, "jacket": 125, "jeans": 126, | |
| "jump": 127, "kiss": 128, "kitty": 129, "lamp": 130, "later": 131, "like": 132, "lion": 133, "lips": 134, | |
| "listen": 135, "look": 136, "loud": 137, "mad": 138, "make": 139, "man": 140, "many": 141, "milk": 142, | |
| "minemy": 143, "mitten": 144, "mom": 145, "moon": 146, "morning": 147, "mouse": 148, "mouth": 149, "nap": 150, | |
| "napkin": 151, "night": 152, "no": 153, "noisy": 154, "nose": 155, "not": 156, "now": 157, "nuts": 158, "old": 159, | |
| "on": 160, "open": 161, "orange": 162, "outside": 163, "owie": 164, "owl": 165, "pajamas": 166, "pen": 167, | |
| "pencil": 168, "penny": 169, "person": 170, "pig": 171, "pizza": 172, "please": 173, "police": 174, "pool": 175, | |
| "potty": 176, "pretend": 177, "pretty": 178, "puppy": 179, "puzzle": 180, "quiet": 181, "radio": 182, "rain": 183, | |
| "read": 184, "red": 185, "refrigerator": 186, "ride": 187, "room": 188, "sad": 189, "same": 190, "say": 191, | |
| "scissors": 192, "see": 193, "shhh": 194, "shirt": 195, "shoe": 196, "shower": 197, "sick": 198, "sleep": 199, | |
| "sleepy": 200, "smile": 201, "snack": 202, "snow": 203, "stairs": 204, "stay": 205, "sticky": 206, "store": 207, | |
| "story": 208, "stuck": 209, "sun": 210, "table": 211, "talk": 212, "taste": 213, "thankyou": 214, "that": 215, | |
| "there": 216, "think": 217, "thirsty": 218, "tiger": 219, "time": 220, "tomorrow": 221, "tongue": 222, "tooth": 223, | |
| "toothbrush": 224, "touch": 225, "toy": 226, "tree": 227, "uncle": 228, "underwear": 229, "up": 230, "vacuum": 231, | |
| "wait": 232, "wake": 233, "water": 234, "wet": 235, "weus": 236, "where": 237, "white": 238, "who": 239, "why": 240, | |
| "will": 241, "wolf": 242, "yellow": 243, "yes": 244, "yesterday": 245, "yourself": 246, "yucky": 247, "zebra": 248, | |
| "zipper": 249 | |
| } | |
| inv_index_to_class = {v: k for k, v in index_to_class.items()} | |
| # Landmark indices in original data | |
| LEFT_HAND_IDXS0 = np.arange(468, 489) | |
| RIGHT_HAND_IDXS0 = np.arange(522, 543) | |
| LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510]) | |
| RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511]) | |
| LANDMARK_IDXS_LEFT_DOMINANT0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, LEFT_POSE_IDXS0)) | |
| LANDMARK_IDXS_RIGHT_DOMINANT0 = np.concatenate((LIPS_IDXS0, RIGHT_HAND_IDXS0, RIGHT_POSE_IDXS0)) | |
| HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0) | |
| N_COLS = LANDMARK_IDXS_LEFT_DOMINANT0.size | |
| # Landmark indices in processed data | |
| LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LIPS_IDXS0)).squeeze() | |
| LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_HAND_IDXS0)).squeeze() | |
| RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, RIGHT_HAND_IDXS0)).squeeze() | |
| HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, HAND_IDXS0)).squeeze() | |
| POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_POSE_IDXS0)).squeeze() | |
| print(f'# HAND_IDXS: {len(HAND_IDXS)}, N_COLS: {N_COLS}') | |
| LIPS_START = 0 | |
| LEFT_HAND_START = LIPS_IDXS.size | |
| RIGHT_HAND_START = LEFT_HAND_START + LEFT_HAND_IDXS.size | |
| POSE_START = RIGHT_HAND_START + RIGHT_HAND_IDXS.size | |
| print( | |
| f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}') | |
| def mediapipe_detection(image, model): | |
| # COLOR CONVERSION BGR 2 RGB | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| image.flags.writeable = False # Image is no longer writeable | |
| results = model.process(image) # Make prediction | |
| image.flags.writeable = True # Image is now writeable | |
| image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR | |
| return image, results | |
| def extract_keypoints(results): | |
| lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten( | |
| ) if results.left_hand_landmarks else np.zeros(21 * 3) | |
| rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten( | |
| ) if results.right_hand_landmarks else np.zeros(21 * 3) | |
| pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten( | |
| ) if results.pose_landmarks else np.zeros(33 * 4) | |
| face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten( | |
| ) if results.face_landmarks else np.zeros(468 * 3) | |
| return np.concatenate([lh, rh, pose, face]) | |
| cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) | |
| # Set mediapipe model | |
| with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: | |
| while cap.isOpened(): | |
| # Read feed | |
| ret, frame = cap.read() | |
| # Make detections | |
| image, results = mediapipe_detection(frame, holistic) | |
| print(results) | |
| # Function to make predictions using the TensorFlow Lite model | |
| def make_prediction(processed_landmarks): | |
| inputs = np.array(processed_landmarks, dtype=np.float32) | |
| # Set the input tensor for the TFLite model | |
| interpreter.set_tensor(input_details[0]['index'], inputs) | |
| # Invoke the TFLite interpreter to perform inference | |
| interpreter.invoke() | |
| # Get the output tensor of the TFLite model | |
| output_data = interpreter.get_tensor(output_details[0]['index']) | |
| # Find the index of the predicted class | |
| index = np.argmax(output_data) | |
| # Map the index to the corresponding class label using the index_to_class dictionary | |
| prediction = inv_index_to_class[index] | |
| return prediction | |
| class PreprocessLayer(tf.keras.layers.Layer): | |
| def __init__(self): | |
| super(PreprocessLayer, self).__init__() | |
| normalisation_correction = tf.constant([ | |
| # Add 0.50 to left hand (original right hand) and substract 0.50 of right hand (original left hand) | |
| [0] * len(LIPS_IDXS) + [0.50] * len(LEFT_HAND_IDXS) + [0.50] * len(POSE_IDXS), | |
| # Y coordinates stay intact | |
| [0] * len(LANDMARK_IDXS_LEFT_DOMINANT0), | |
| # Z coordinates stay intact | |
| [0] * len(LANDMARK_IDXS_LEFT_DOMINANT0), | |
| ], | |
| dtype=tf.float32, | |
| ) | |
| self.normalisation_correction = tf.transpose(normalisation_correction, [1, 0]) | |
| def pad_edge(self, t, repeats, side): | |
| if side == 'LEFT': | |
| return tf.concat((tf.repeat(t[:1], repeats=repeats, axis=0), t), axis=0) | |
| elif side == 'RIGHT': | |
| return tf.concat((t, tf.repeat(t[-1:], repeats=repeats, axis=0)), axis=0) | |
| def call(self, data0): | |
| # Number of Frames in Video | |
| N_FRAMES0 = tf.shape(data0)[0] | |
| # Find dominant hand by comparing summed absolute coordinates | |
| left_hand_sum = tf.math.reduce_sum( | |
| tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1)) | |
| right_hand_sum = tf.math.reduce_sum( | |
| tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1)) | |
| left_dominant = left_hand_sum >= right_hand_sum | |
| # Count non NaN Hand values in each frame for the dominant hand | |
| if left_dominant: | |
| frames_hands_non_nan_sum = tf.math.reduce_sum( | |
| tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1), | |
| axis=[1, 2], | |
| ) | |
| else: | |
| frames_hands_non_nan_sum = tf.math.reduce_sum( | |
| tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1), | |
| axis=[1, 2], | |
| ) | |
| # Find frames indices with coordinates of dominant hand | |
| non_empty_frames_idxs = tf.where(frames_hands_non_nan_sum > 0) | |
| non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1) | |
| # Filter frames | |
| data = tf.gather(data0, non_empty_frames_idxs, axis=0) | |
| # Cast Indices in float32 to be compatible with Tensorflow Lite | |
| non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32) | |
| # Normalize to start with 0 | |
| non_empty_frames_idxs -= tf.reduce_min(non_empty_frames_idxs) | |
| # Number of Frames in Filtered Video | |
| N_FRAMES = tf.shape(data)[0] | |
| # Gather Relevant Landmark Columns | |
| if left_dominant: | |
| data = tf.gather(data, LANDMARK_IDXS_LEFT_DOMINANT0, axis=1) | |
| else: | |
| data = tf.gather(data, LANDMARK_IDXS_RIGHT_DOMINANT0, axis=1) | |
| data = ( | |
| self.normalisation_correction + ( | |
| (data - self.normalisation_correction) * tf.where(self.normalisation_correction != 0, -1.0, | |
| 1.0)) | |
| ) | |
| # Video fits in INPUT_SIZE | |
| if N_FRAMES < INPUT_SIZE: | |
| # Pad With -1 to indicate padding | |
| non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, INPUT_SIZE - N_FRAMES]], | |
| constant_values=-1) | |
| # Pad Data With Zeros | |
| data = tf.pad(data, [[0, INPUT_SIZE - N_FRAMES], [0, 0], [0, 0]], constant_values=0) | |
| # Fill NaN Values With 0 | |
| data = tf.where(tf.math.is_nan(data), 0.0, data) | |
| return data, non_empty_frames_idxs | |
| # Video needs to be downsampled to INPUT_SIZE | |
| else: | |
| # Repeat | |
| if N_FRAMES < INPUT_SIZE ** 2: | |
| repeats = tf.math.floordiv(INPUT_SIZE * INPUT_SIZE, N_FRAMES0) | |
| data = tf.repeat(data, repeats=repeats, axis=0) | |
| non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0) | |
| # Pad To Multiple Of Input Size | |
| pool_size = tf.math.floordiv(len(data), INPUT_SIZE) | |
| if tf.math.mod(len(data), INPUT_SIZE) > 0: | |
| pool_size += 1 | |
| if pool_size == 1: | |
| pad_size = (pool_size * INPUT_SIZE) - len(data) | |
| else: | |
| pad_size = (pool_size * INPUT_SIZE) % len(data) | |
| # Pad Start/End with Start/End value | |
| pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2) | |
| pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2) | |
| if tf.math.mod(pad_size, 2) > 0: | |
| pad_right += 1 | |
| # Pad By Concatenating Left/Right Edge Values | |
| data = self.pad_edge(data, pad_left, 'LEFT') | |
| data = self.pad_edge(data, pad_right, 'RIGHT') | |
| # Pad Non Empty Frame Indices | |
| non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT') | |
| non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT') | |
| # Reshape to Mean Pool | |
| data = tf.reshape(data, [INPUT_SIZE, -1, N_COLS, N_DIMS]) | |
| non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [INPUT_SIZE, -1]) | |
| # Mean Pool | |
| data = tf.experimental.numpy.nanmean(data, axis=1) | |
| non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1) | |
| # Fill NaN Values With 0 | |
| data = tf.where(tf.math.is_nan(data), 0.0, data) | |
| return data, non_empty_frames_idxs | |
| preprocess_layer = PreprocessLayer() | |
| def translate_sign_language(image): | |
| # Convert the frame to RGB (Mediapipe expects RGB images) | |
| rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands_tracker: | |
| # Process the frame with Mediapipe Hands | |
| hands_results = hands_tracker.process(rgb_frame) | |
| with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose_tracker: | |
| # Process the frame with Mediapipe Pose | |
| pose_results = mp_pose.process(rgb_frame) | |
| # Extract keypoints from the results | |
| hand_pose_keypoints = extract_keypoints(hands_results) | |
| pose_keypoints = extract_keypoints(pose_results) | |
| # Prepare the input data for the TFLite model | |
| left_hand_landmarks = hand_pose_keypoints[:63].reshape(1, -1, 3) | |
| right_hand_landmarks = hand_pose_keypoints[63:126].reshape(1, -1, 3) | |
| pose_landmarks = pose_keypoints[126:].reshape(1, -1, 4) | |
| # Call the PreprocessLayer to preprocess the hand and pose landmark data | |
| preprocessed_left_hand, _ = preprocess_layer(left_hand_landmarks) | |
| preprocessed_right_hand, _ = preprocess_layer(right_hand_landmarks) | |
| preprocessed_pose, _ = preprocess_layer(pose_landmarks) | |
| # Prepare the input data for the TFLite model | |
| input_data = [preprocessed_left_hand, preprocessed_right_hand, preprocessed_pose] | |
| # Perform inference using the loaded sign language model (assuming you have already loaded it) | |
| interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_data[0]) | |
| interpreter.set_tensor(interpreter.get_input_details()[1]['index'], input_data[1]) | |
| interpreter.set_tensor(interpreter.get_input_details()[2]['index'], input_data[2]) | |
| interpreter.invoke() | |
| output = interpreter.get_tensor(interpreter.get_output_details()[0]['index']) | |
| # Make prediction using the processed landmarks | |
| translated_text = make_prediction(output) | |
| # Return the translated text | |
| return translated_text | |
| gr_interface = gr.Interface(fn=translate_sign_language, | |
| inputs="webcam", # Input from webcam | |
| outputs="text", # Output as text | |
| #capture_session=True, # To properly release the webcam after running the interface | |
| live=True, # Show live webcam feed | |
| title="Sign Language Translation", | |
| description="Translate sign language to text using TensorFlow Lite and Mediapipe.") | |
| gr_interface.launch(share=True) | |
| cap.release() | |
| cv2.destroyAllWindows() | |
| video_path = './Test/HAPPY.mp4' | |
| cap = cv2.VideoCapture(video_path) | |
| mp_drawing = mp.solutions.drawing_utils | |
| mp_face_mesh = mp.solutions.face_mesh | |
| mp_hands = mp.solutions.hands | |
| mp_pose = mp.solutions.pose | |
| data_list = [] | |
| ROWS_PER_FRAME = 543 # Constant number of landmarks per frame | |
| with mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1) as face_mesh, \ | |
| mp_hands.Hands(static_image_mode=False, max_num_hands=2) as hands, \ | |
| mp_pose.Pose(static_image_mode=False) as pose: | |
| frame_number = 0 | |
| while cap.isOpened(): | |
| ret, image = cap.read() | |
| if not ret: | |
| break | |
| # Convert the BGR image to RGB for Mediapipe | |
| image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| # Process face landmarks | |
| results_face = face_mesh.process(image_rgb) | |
| if results_face.multi_face_landmarks: | |
| face_landmarks = results_face.multi_face_landmarks[0] | |
| for idx, landmark in enumerate(face_landmarks.landmark): | |
| data_list.append([frame_number, f"{frame_number}-face-{idx}", "face", idx, landmark.x, landmark.y, landmark.z]) | |
| # Process hand landmarks | |
| results_hands = hands.process(image_rgb) | |
| if results_hands.multi_hand_landmarks: | |
| for hand_landmarks in results_hands.multi_hand_landmarks: | |
| for idx, landmark in enumerate(hand_landmarks.landmark): | |
| data_list.append([frame_number, f"{frame_number}-right_hand-{idx}", "right-hand", idx, landmark.x, landmark.y, landmark.z]) | |
| mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) | |
| # Process pose landmarks | |
| results_pose = pose.process(image_rgb) | |
| if results_pose.pose_landmarks: | |
| pose_landmarks = results_pose.pose_landmarks.landmark | |
| for idx, landmark in enumerate(pose_landmarks): | |
| data_list.append([frame_number, f"{frame_number}-pose-{idx}", "pose", idx, landmark.x, landmark.y, landmark.z]) | |
| # Pad the landmarks with NaN values if the number of landmarks is less than ROWS_PER_FRAME | |
| while len(data_list) < (frame_number + 1) * ROWS_PER_FRAME: | |
| data_list.append([frame_number, f"{frame_number}-right_hand-{len(data_list) % ROWS_PER_FRAME}", "right-hand", len(data_list) % ROWS_PER_FRAME, np.nan, np.nan, np.nan]) | |
| # Draw the landmarks on the frame (optional) | |
| mp_drawing.draw_landmarks(image, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS) | |
| mp_drawing.draw_landmarks(image, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS) | |
| # Display the frame (optional) | |
| cv2.imshow('MediaPipe', image) | |
| frame_number += 1 | |
| # Press 'q' to quit | |
| if cv2.waitKey(1) & 0xFF == ord('q'): | |
| break | |
| cap.release() | |
| cv2.destroyAllWindows() | |
| df = pd.DataFrame(data_list, columns=["frame", "row_id", "type", "landmark_index", "x", "y", "z"]) | |
| df.to_parquet("extracted_features.parquet", index=False) | |
| # test_data = pd.read_parquet('./1006440534.parquet') | |
| # test_data_kaggle = pd.read_parquet('1001373962.parquet') | |
| # test_data_kaggle2 = pd.read_parquet('./100015657.parquet') | |
| # test_data_kaggle3 = pd.read_parquet('./1003700302.parquet') | |
| # test_data_kaggle4 = pd.read_parquet('./1007127288.parquet') | |
| test_data_my_own = pd.read_parquet('extracted_features.parquet') | |
| test_data_my_own['frame'] = test_data_my_own['frame'].astype('int16') | |
| test_data_my_own['landmark_index'] = test_data_my_own['landmark_index'].astype('int16') | |
| def load_relevant_data_subset(pq_path, ROWS_PER_FRAME = 543): | |
| data_columns = ['x', 'y', 'z'] | |
| data = pd.read_parquet(pq_path, columns=data_columns) | |
| n_frames = int( len(data) / ROWS_PER_FRAME) | |
| print(f"Data: {len(data)} Number of Frames: {n_frames}") | |
| data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns)) | |
| return data.astype(np.float32) | |
| # demo_raw_data = load_relevant_data_subset('./1006440534.parquet') | |
| demo_raw_data = load_relevant_data_subset('./extracted_features.parquet') | |
| # demo_raw_data = load_relevant_data_subset('./1003700302.parquet', test_data_kaggle3['frame'].nunique()) | |
| # demo_raw_data = load_relevant_data_subset('./extracted_features.parquet') | |
| ORD2SIGN = {206: 'sticky', | |
| 20: 'before', | |
| 178: 'pretty', | |
| 114: 'hen', | |
| 221: 'tomorrow', | |
| 230: 'up', | |
| 25: 'blow', | |
| 236: 'weus', | |
| 184: 'read', | |
| 191: 'say', | |
| 248: 'zebra', | |
| 189: 'sad', | |
| 62: 'drawer', | |
| 5: 'animal', | |
| 167: 'pen', | |
| 60: 'donkey', | |
| 41: 'cheek', | |
| 51: 'cowboy', | |
| 192: 'scissors', | |
| 181: 'quiet', | |
| 63: 'drink', | |
| 94: 'girl', | |
| 200: 'sleepy', | |
| 249: 'zipper', | |
| 171: 'pig', | |
| 13: 'bad', | |
| 9: 'arm', | |
| 61: 'down', | |
| 123: 'if', | |
| 240: 'why', | |
| 166: 'pajamas', | |
| 203: 'snow', | |
| 137: 'loud', | |
| 195: 'shirt', | |
| 31: 'brown', | |
| 146: 'moon', | |
| 23: 'bird', | |
| 210: 'sun', | |
| 76: 'fast', | |
| 1: 'after', | |
| 54: 'cute', | |
| 77: 'feet', | |
| 4: 'alligator', | |
| 87: 'food', | |
| 113: 'hello', | |
| 93: 'giraffe', | |
| 180: 'puzzle', | |
| 211: 'table', | |
| 132: 'like', | |
| 153: 'no', | |
| 122: 'icecream', | |
| 67: 'duck', | |
| 69: 'elephant', | |
| 141: 'many', | |
| 18: 'bedroom', | |
| 205: 'stay', | |
| 74: 'fall', | |
| 246: 'yourself', | |
| 183: 'rain', | |
| 135: 'listen', | |
| 44: 'chocolate', | |
| 124: 'into', | |
| 11: 'awake', | |
| 40: 'chair', | |
| 7: 'any', | |
| 155: 'nose', | |
| 118: 'home', | |
| 161: 'open', | |
| 58: 'dog', | |
| 50: 'cow', | |
| 241: 'will', | |
| 149: 'mouth', | |
| 177: 'pretend', | |
| 172: 'pizza', | |
| 75: 'farm', | |
| 163: 'outside', | |
| 234: 'water', | |
| 81: 'finish', | |
| 159: 'old', | |
| 121: 'hungry', | |
| 112: 'helicopter', | |
| 130: 'lamp', | |
| 222: 'tongue', | |
| 194: 'shhh', | |
| 6: 'another', | |
| 103: 'gum', | |
| 214: 'thankyou', | |
| 128: 'kiss', | |
| 101: 'grass', | |
| 64: 'drop', | |
| 157: 'now', | |
| 233: 'wake', | |
| 116: 'hide', | |
| 201: 'smile', | |
| 226: 'toy', | |
| 216: 'there', | |
| 147: 'morning', | |
| 10: 'aunt', | |
| 102: 'green', | |
| 36: 'car', | |
| 213: 'taste', | |
| 39: 'cereal', | |
| 207: 'store', | |
| 66: 'dryer', | |
| 162: 'orange', | |
| 218: 'thirsty', | |
| 83: 'first', | |
| 45: 'clean', | |
| 3: 'all', | |
| 198: 'sick', | |
| 129: 'kitty', | |
| 96: 'glasswindow', | |
| 202: 'snack', | |
| 150: 'nap', | |
| 53: 'cut', | |
| 73: 'face', | |
| 99: 'grandma', | |
| 209: 'stuck', | |
| 91: 'garbage', | |
| 115: 'hesheit', | |
| 95: 'give', | |
| 104: 'hair', | |
| 125: 'jacket', | |
| 165: 'owl', | |
| 82: 'fireman', | |
| 227: 'tree', | |
| 16: 'because', | |
| 17: 'bed', | |
| 30: 'brother', | |
| 143: 'minemy', | |
| 127: 'jump', | |
| 245: 'yesterday', | |
| 145: 'mom', | |
| 111: 'hear', | |
| 174: 'police', | |
| 223: 'tooth', | |
| 212: 'talk', | |
| 224: 'toothbrush', | |
| 164: 'owie', | |
| 47: 'closet', | |
| 169: 'penny', | |
| 24: 'black', | |
| 85: 'flag', | |
| 238: 'white', | |
| 134: 'lips', | |
| 231: 'vacuum', | |
| 8: 'apple', | |
| 105: 'happy', | |
| 151: 'napkin', | |
| 92: 'gift', | |
| 70: 'empty', | |
| 46: 'close', | |
| 52: 'cry', | |
| 138: 'mad', | |
| 49: 'clown', | |
| 204: 'stairs', | |
| 42: 'child', | |
| 173: 'please', | |
| 65: 'dry', | |
| 72: 'eye', | |
| 235: 'wet', | |
| 32: 'bug', | |
| 109: 'haveto', | |
| 228: 'uncle', | |
| 199: 'sleep', | |
| 176: 'potty', | |
| 29: 'boy', | |
| 136: 'look', | |
| 107: 'hate', | |
| 71: 'every', | |
| 12: 'backyard', | |
| 22: 'better', | |
| 84: 'fish', | |
| 56: 'dance', | |
| 139: 'make', | |
| 98: 'goose', | |
| 38: 'cat', | |
| 232: 'wait', | |
| 14: 'balloon', | |
| 247: 'yucky', | |
| 2: 'airplane', | |
| 88: 'for', | |
| 126: 'jeans', | |
| 154: 'noisy', | |
| 142: 'milk', | |
| 239: 'who', | |
| 90: 'frog', | |
| 35: 'can', | |
| 215: 'that', | |
| 117: 'high', | |
| 244: 'yes', | |
| 196: 'shoe', | |
| 108: 'have', | |
| 48: 'cloud', | |
| 170: 'person', | |
| 187: 'ride', | |
| 34: 'callonphone', | |
| 37: 'carrot', | |
| 100: 'grandpa', | |
| 120: 'hot', | |
| 131: 'later', | |
| 229: 'underwear', | |
| 0: 'TV', | |
| 140: 'man', | |
| 217: 'think', | |
| 220: 'time', | |
| 80: 'finger', | |
| 86: 'flower', | |
| 15: 'bath', | |
| 28: 'book', | |
| 193: 'see', | |
| 208: 'story', | |
| 26: 'blue', | |
| 78: 'find', | |
| 148: 'mouse', | |
| 79: 'fine', | |
| 179: 'puppy', | |
| 55: 'dad', | |
| 21: 'beside', | |
| 225: 'touch', | |
| 89: 'frenchfries', | |
| 188: 'room', | |
| 19: 'bee', | |
| 27: 'boat', | |
| 156: 'not', | |
| 59: 'doll', | |
| 97: 'go', | |
| 190: 'same', | |
| 144: 'mitten', | |
| 160: 'on', | |
| 57: 'dirty', | |
| 182: 'radio', | |
| 197: 'shower', | |
| 186: 'refrigerator', | |
| 158: 'nuts', | |
| 175: 'pool', | |
| 242: 'wolf', | |
| 243: 'yellow', | |
| 110: 'head', | |
| 237: 'where', | |
| 33: 'bye', | |
| 133: 'lion', | |
| 152: 'night', | |
| 106: 'hat', | |
| 43: 'chin', | |
| 68: 'ear', | |
| 168: 'pencil', | |
| 119: 'horse', | |
| 219: 'tiger', | |
| 185: 'red'} | |
| import tflite_runtime.interpreter as tflite | |
| interpreter = tflite.Interpreter("./model.tflite") | |
| found_signatures = list(interpreter.get_signature_list().keys()) | |
| prediction_fn = interpreter.get_signature_runner("serving_default") | |
| prediction_fn(inputs=demo_raw_data) | |
| output = prediction_fn(inputs=demo_raw_data) | |
| sign = output['outputs'].argmax() | |
| print("PRED : ", ORD2SIGN.get(sign), f'[{sign}]') |