Spaces:
Runtime error
Runtime error
File size: 7,775 Bytes
e156e53 ac125c0 2aae5c8 ac125c0 9f9bcb1 2aae5c8 4ca0f67 27ae852 2aae5c8 2aad215 4ca0f67 4d98c67 4ca0f67 ac125c0 2aad215 5eb4620 2aad215 914e870 0dded4a 2aad215 4ca0f67 ac125c0 2aad215 27ae852 2aad215 27ae852 2aad215 27ae852 ac125c0 2aae5c8 2aad215 2aae5c8 2aad215 2aae5c8 6b1e533 2aae5c8 03fbdcd 2aae5c8 bd4424b 2aae5c8 60eabb7 2aae5c8 bd4424b 03fbdcd 2abb6d3 60eabb7 2aae5c8 8582f12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import os
import cv2
import gradio as gr
import mediapipe as mp
import numpy as np
import tensorflow as tf
import tensorflow.lite as tflite
index_to_class = {
"TV": 0, "after": 1, "airplane": 2, "all": 3, "alligator": 4, "animal": 5, "another": 6, "any": 7, "apple": 8,
"arm": 9, "aunt": 10, "awake": 11, "backyard": 12, "bad": 13, "balloon": 14, "bath": 15, "because": 16, "bed": 17,
"bedroom": 18, "bee": 19, "before": 20, "beside": 21, "better": 22, "bird": 23, "black": 24, "blow": 25, "blue": 26,
"boat": 27, "book": 28, "boy": 29, "brother": 30, "brown": 31, "bug": 32, "bye": 33, "callonphone": 34, "can": 35,
"car": 36, "carrot": 37, "cat": 38, "cereal": 39, "chair": 40, "cheek": 41, "child": 42, "chin": 43,
"chocolate": 44, "clean": 45, "close": 46, "closet": 47, "cloud": 48, "clown": 49, "cow": 50, "cowboy": 51,
"cry": 52, "cut": 53, "cute": 54, "dad": 55, "dance": 56, "dirty": 57, "dog": 58, "doll": 59, "donkey": 60,
"down": 61, "drawer": 62, "drink": 63, "drop": 64, "dry": 65, "dryer": 66, "duck": 67, "ear": 68, "elephant": 69,
"empty": 70, "every": 71, "eye": 72, "face": 73, "fall": 74, "farm": 75, "fast": 76, "feet": 77, "find": 78,
"fine": 79, "finger": 80, "finish": 81, "fireman": 82, "first": 83, "fish": 84, "flag": 85, "flower": 86,
"food": 87, "for": 88, "frenchfries": 89, "frog": 90, "garbage": 91, "gift": 92, "giraffe": 93, "girl": 94,
"give": 95, "glasswindow": 96, "go": 97, "goose": 98, "grandma": 99, "grandpa": 100, "grass": 101, "green": 102,
"gum": 103, "hair": 104, "happy": 105, "hat": 106, "hate": 107, "have": 108, "haveto": 109, "head": 110,
"hear": 111, "helicopter": 112, "hello": 113, "hen": 114, "hesheit": 115, "hide": 116, "high": 117, "home": 118,
"horse": 119, "hot": 120, "hungry": 121, "icecream": 122, "if": 123, "into": 124, "jacket": 125, "jeans": 126,
"jump": 127, "kiss": 128, "kitty": 129, "lamp": 130, "later": 131, "like": 132, "lion": 133, "lips": 134,
"listen": 135, "look": 136, "loud": 137, "mad": 138, "make": 139, "man": 140, "many": 141, "milk": 142,
"minemy": 143, "mitten": 144, "mom": 145, "moon": 146, "morning": 147, "mouse": 148, "mouth": 149, "nap": 150,
"napkin": 151, "night": 152, "no": 153, "noisy": 154, "nose": 155, "not": 156, "now": 157, "nuts": 158, "old": 159,
"on": 160, "open": 161, "orange": 162, "outside": 163, "owie": 164, "owl": 165, "pajamas": 166, "pen": 167,
"pencil": 168, "penny": 169, "person": 170, "pig": 171, "pizza": 172, "please": 173, "police": 174, "pool": 175,
"potty": 176, "pretend": 177, "pretty": 178, "puppy": 179, "puzzle": 180, "quiet": 181, "radio": 182, "rain": 183,
"read": 184, "red": 185, "refrigerator": 186, "ride": 187, "room": 188, "sad": 189, "same": 190, "say": 191,
"scissors": 192, "see": 193, "shhh": 194, "shirt": 195, "shoe": 196, "shower": 197, "sick": 198, "sleep": 199,
"sleepy": 200, "smile": 201, "snack": 202, "snow": 203, "stairs": 204, "stay": 205, "sticky": 206, "store": 207,
"story": 208, "stuck": 209, "sun": 210, "table": 211, "talk": 212, "taste": 213, "thankyou": 214, "that": 215,
"there": 216, "think": 217, "thirsty": 218, "tiger": 219, "time": 220, "tomorrow": 221, "tongue": 222, "tooth": 223,
"toothbrush": 224, "touch": 225, "toy": 226, "tree": 227, "uncle": 228, "underwear": 229, "up": 230, "vacuum": 231,
"wait": 232, "wake": 233, "water": 234, "wet": 235, "weus": 236, "where": 237, "white": 238, "who": 239, "why": 240,
"will": 241, "wolf": 242, "yellow": 243, "yes": 244, "yesterday": 245, "yourself": 246, "yucky": 247, "zebra": 248,
"zipper": 249
}
inv_index_to_class = {v: k for k, v in index_to_class.items()}
# Initialize MediaPipe solutions
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh
hands = mp_hands.Hands()
pose = mp_pose.Pose()
face_mesh = mp_face_mesh.FaceMesh()
# Get the absolute path to the directory containing app.py
current_dir = os.path.dirname(os.path.abspath(__file__))
# Define the filename of the TFLite model
model_filename = "model.tflite"
# Construct the full path to the TFLite model file
model_path = os.path.join(current_dir, model_filename)
# Load the TFLite model using the interpreter
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Preprocess landmarks
def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks):
hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark]
hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark]
pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark]
lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks]
combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks
return np.array(combined_landmarks, dtype=np.float32)
# Function to extract landmarks from the webcam frame
def extract_landmarks(frame):
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)
pose_results = pose.process(frame_rgb)
face_results = face_mesh.process(frame_rgb)
if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks:
return None
hand1_landmarks = results.multi_hand_landmarks[0]
if len(results.multi_hand_landmarks) > 1:
hand2_landmarks = results.multi_hand_landmarks[1]
else:
hand2_landmarks = hand1_landmarks
pose_landmarks = pose_results.pose_landmarks
face_landmarks = face_results.multi_face_landmarks[0]
lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX]
print(f"Number of landmarks for hand1: {len(hand1_landmarks.landmark)}")
print(f"Number of landmarks for hand2: {len(hand2_landmarks.landmark)}")
print(f"Number of landmarks for pose: {len(pose_landmarks.landmark)}")
print(f"Number of landmarks for lip: {len(lip_landmarks)}")
return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks
# Make prediction
def make_prediction(processed_landmarks):
inputs = np.array(processed_landmarks, dtype=np.float32)
# Set the input tensor for the TFLite model
interpreter.set_tensor(input_details[0]['index'], inputs)
# Invoke the TFLite interpreter to perform inference
interpreter.invoke()
# Get the output tensor of the TFLite model
output_data = interpreter.get_tensor(output_details[0]['index'])
# Find the index of the predicted class
index = np.argmax(output_data)
# Map the index to the corresponding class label using the index_to_class dictionary
prediction = inv_index_to_class[index]
return prediction
# Gradio Interface Function
def predict_with_webcam(frame):
landmarks = extract_landmarks(frame)
if landmarks is not None:
processed_landmarks = preprocess_landmarks(*landmarks)
prediction = make_prediction(processed_landmarks)
return str(prediction)
# Define the Gradio interface with the Webcam input and Text output
# Modify the Gradio interface to use a "label" type output instead of "textbox"
webcam_interface = gr.Interface(
fn=predict_with_webcam,
inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
outputs=gr.outputs.Label(), # Use "Label" type instead of "Textbox"
live=True,
interpretation="default",
title="Webcam Landmark Prediction",
description="Make predictions using landmarks extracted from your webcam stream.",
)
# Launch the Gradio app with the webcam interface
if __name__ == "__main__":
webcam_interface.launch()
|