Spaces:
Runtime error
Runtime error
import cv2 | |
import gradio as gr | |
import mediapipe as mp | |
import numpy as np | |
import tensorflow.lite as tflite | |
# Initialize MediaPipe solutions | |
mp_hands = mp.solutions.hands | |
mp_pose = mp.solutions.pose | |
mp_face_mesh = mp.solutions.face_mesh | |
hands = mp_hands.Hands() | |
pose = mp_pose.Pose() | |
face_mesh = mp_face_mesh.FaceMesh() | |
# Load the TFLite model | |
interpreter = tflite.Interpreter(model_path="model.tflite") | |
interpreter.allocate_tensors() | |
input_details = interpreter.get_input_details() | |
output_details = interpreter.get_output_details() | |
# Preprocess landmarks | |
def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks): | |
hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark] | |
hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark] | |
pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark] | |
lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks] | |
combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks | |
return np.array(combined_landmarks, dtype=np.float32) | |
# Function to extract landmarks from the webcam frame | |
def extract_landmarks(frame): | |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
results = hands.process(frame_rgb) | |
pose_results = pose.process(frame_rgb) | |
face_results = face_mesh.process(frame_rgb) | |
if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks: | |
return None | |
hand1_landmarks = results.multi_hand_landmarks[0] | |
if len(results.multi_hand_landmarks) > 1: | |
hand2_landmarks = results.multi_hand_landmarks[1] | |
else: | |
hand2_landmarks = hand1_landmarks | |
pose_landmarks = pose_results.pose_landmarks | |
face_landmarks = face_results.multi_face_landmarks[0] | |
lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX] | |
return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks | |
# Make prediction | |
def make_prediction(processed_landmarks): | |
inputs = np.array([processed_landmarks]) | |
interpreter.set_tensor(input_details[0]['index'], inputs) | |
interpreter.invoke() | |
outputs = interpreter.get_tensor(output_details[0]['index']) | |
return outputs[0].argmax() | |
# Gradio Interface Function | |
def predict_with_webcam(frame): | |
landmarks = extract_landmarks(frame) | |
if landmarks is not None: | |
processed_landmarks = preprocess_landmarks(*landmarks) | |
prediction = make_prediction(processed_landmarks) | |
return str(prediction) | |
# Define the Gradio interface with the Webcam input and Text output | |
webcam_interface = gr.Interface( | |
fn=predict_with_webcam, | |
inputs=gr.inputs.Image(shape=(480, 640), source="webcam"), | |
outputs="text", | |
live=True, | |
interpretation="default", | |
title="Webcam Landmark Prediction", | |
description="Make predictions using landmarks extracted from your webcam stream.", | |
) | |
# Launch the Gradio app with the webcam interface | |
if __name__ == "__main__": | |
webcam_interface.launch(share=True) | |