topStep / app.py
JefferyJapheth's picture
using old code
2aae5c8
raw
history blame
No virus
3.15 kB
# Import the required libraries
import cv2
import numpy as np
import os
import gradio as gr
import tensorflow as tf
import tensorflow.lite as tflite
import mediapipe as mp
# Initialize MediaPipe solutions
mp_holistic = mp.solutions.holistic
# Get the absolute path to the directory containing app.py
current_dir = os.path.dirname(os.path.abspath(__file__))
# Define the filename of the TFLite model
model_filename = "model.tflite"
# Construct the full path to the TFLite model file
model_path = os.path.join(current_dir, model_filename)
# Load the TFLite model using the interpreter
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# ... (other functions from previous code)
# Function to perform holistic detection using Mediapipe
def mediapipe_detection(image, model):
# COLOR CONVERSION BGR 2 RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False # Image is no longer writeable
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
# Function to extract keypoints from Mediapipe results
def extract_keypoints(results):
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
) if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
) if results.right_hand_landmarks else np.zeros(21*3)
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten(
) if results.pose_landmarks else np.zeros(33*4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten(
) if results.face_landmarks else np.zeros(468*3)
return np.concatenate([lh, rh, pose, face])
# Main prediction function that combines everything
def predict_with_webcam(frame):
# Perform holistic detection
image, results = mediapipe_detection(frame, holistic)
# Extract keypoints
keypoints = extract_keypoints(results)
if np.count_nonzero(keypoints) > 0:
# Preprocess keypoints and make prediction
processed_landmarks = np.array([keypoints], dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], processed_landmarks)
interpreter.invoke()
outputs = interpreter.get_tensor(output_details[0]['index'])
prediction = outputs[0].argmax()
return str(prediction)
# Define the Gradio interface with the Webcam input and Text output
webcam_interface = gr.Interface(
fn=predict_with_webcam,
inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
outputs="text",
live=True,
interpretation="default",
title="Webcam Landmark Prediction",
description="Make predictions using landmarks extracted from your webcam stream.",
)
# Launch the Gradio app with the webcam interface
if __name__ == "__main__":
webcam_interface.launch()