# Import the required libraries import cv2 import numpy as np import os import gradio as gr import tensorflow as tf import tensorflow.lite as tflite import mediapipe as mp # Initialize MediaPipe solutions mp_holistic = mp.solutions.holistic # Get the absolute path to the directory containing app.py current_dir = os.path.dirname(os.path.abspath(__file__)) # Define the filename of the TFLite model model_filename = "model.tflite" # Construct the full path to the TFLite model file model_path = os.path.join(current_dir, model_filename) # Load the TFLite model using the interpreter interpreter = tf.lite.Interpreter(model_path=model_path) interpreter.allocate_tensors() # ... (other functions from previous code) # Function to perform holistic detection using Mediapipe def mediapipe_detection(image, model): # COLOR CONVERSION BGR 2 RGB image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image.flags.writeable = False # Image is no longer writeable results = model.process(image) # Make prediction image.flags.writeable = True # Image is now writeable image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR return image, results # Function to extract keypoints from Mediapipe results def extract_keypoints(results): lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten( ) if results.left_hand_landmarks else np.zeros(21*3) rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten( ) if results.right_hand_landmarks else np.zeros(21*3) pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten( ) if results.pose_landmarks else np.zeros(33*4) face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten( ) if results.face_landmarks else np.zeros(468*3) return np.concatenate([lh, rh, pose, face]) # Main prediction function that combines everything def predict_with_webcam(frame): # Perform holistic detection image, results = mediapipe_detection(frame, holistic) # Extract keypoints keypoints = extract_keypoints(results) if np.count_nonzero(keypoints) > 0: # Preprocess keypoints and make prediction processed_landmarks = np.array([keypoints], dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], processed_landmarks) interpreter.invoke() outputs = interpreter.get_tensor(output_details[0]['index']) prediction = outputs[0].argmax() return str(prediction) # Define the Gradio interface with the Webcam input and Text output webcam_interface = gr.Interface( fn=predict_with_webcam, inputs=gr.inputs.Image(shape=(480, 640), source="webcam"), outputs="text", live=True, interpretation="default", title="Webcam Landmark Prediction", description="Make predictions using landmarks extracted from your webcam stream.", ) # Launch the Gradio app with the webcam interface if __name__ == "__main__": webcam_interface.launch()