topStep / app.py
JefferyJapheth's picture
new
2aad215
raw
history blame
3.41 kB
import os
import cv2
import gradio as gr
import mediapipe as mp
import numpy as np
import tensorflow as tf
import tensorflow.lite as tflite
# Initialize MediaPipe solutions
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh
hands = mp_hands.Hands()
pose = mp_pose.Pose()
face_mesh = mp_face_mesh.FaceMesh()
# Get the absolute path to the directory containing app.py
current_dir = os.path.dirname(os.path.abspath(__file__))
# Define the filename of the TFLite model
model_filename = "model.tflite"
# Construct the full path to the TFLite model file
model_path = os.path.join(current_dir, model_filename)
# Load the TFLite model using the interpreter
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Preprocess landmarks
def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks):
hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark]
hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark]
pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark]
lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks]
combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks
return np.array(combined_landmarks, dtype=np.float32)
# Function to extract landmarks from the webcam frame
def extract_landmarks(frame):
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)
pose_results = pose.process(frame_rgb)
face_results = face_mesh.process(frame_rgb)
if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks:
return None
hand1_landmarks = results.multi_hand_landmarks[0]
if len(results.multi_hand_landmarks) > 1:
hand2_landmarks = results.multi_hand_landmarks[1]
else:
hand2_landmarks = hand1_landmarks
pose_landmarks = pose_results.pose_landmarks
face_landmarks = face_results.multi_face_landmarks[0]
lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX]
return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks
# Make prediction
def make_prediction(processed_landmarks):
inputs = np.array([processed_landmarks])
interpreter.set_tensor(input_details[0]['index'], inputs)
interpreter.invoke()
outputs = interpreter.get_tensor(output_details[0]['index'])
return outputs[0].argmax()
# Gradio Interface Function
def predict_with_webcam(frame):
landmarks = extract_landmarks(frame)
if landmarks is not None:
processed_landmarks = preprocess_landmarks(*landmarks)
prediction = make_prediction(processed_landmarks)
return str(prediction)
# Define the Gradio interface with the Webcam input and Text output
webcam_interface = gr.Interface(
fn=predict_with_webcam,
inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
outputs="text",
live=True,
interpretation="default",
title="Webcam Landmark Prediction",
description="Make predictions using landmarks extracted from your webcam stream.",
)
# Launch the Gradio app with the webcam interface
if __name__ == "__main__":
webcam_interface.launch(share=True)