File size: 3,147 Bytes
2aae5c8
 
 
e156e53
2aae5c8
9f9bcb1
2aae5c8
 
4ca0f67
2aae5c8
 
4ca0f67
4d98c67
 
 
 
 
 
2aae5c8
4d98c67
 
4ca0f67
 
2aae5c8
5eb4620
2aae5c8
e156e53
 
 
2aae5c8
 
 
e156e53
 
4ca0f67
2aae5c8
e156e53
2aae5c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd4424b
2aae5c8
 
 
 
 
 
bd4424b
 
2aae5c8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Import the required libraries
import cv2
import numpy as np
import os
import gradio as gr
import tensorflow as tf
import tensorflow.lite as tflite
import mediapipe as mp

# Initialize MediaPipe solutions
mp_holistic = mp.solutions.holistic

# Get the absolute path to the directory containing app.py
current_dir = os.path.dirname(os.path.abspath(__file__))
# Define the filename of the TFLite model
model_filename = "model.tflite"
# Construct the full path to the TFLite model file
model_path = os.path.join(current_dir, model_filename)

# Load the TFLite model using the interpreter
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# ... (other functions from previous code)

# Function to perform holistic detection using Mediapipe
def mediapipe_detection(image, model):
    # COLOR CONVERSION BGR 2 RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR
    return image, results

# Function to extract keypoints from Mediapipe results
def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
    ) if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
    ) if results.right_hand_landmarks else np.zeros(21*3)
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten(
    ) if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten(
    ) if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([lh, rh, pose, face])

# Main prediction function that combines everything
def predict_with_webcam(frame):
    # Perform holistic detection
    image, results = mediapipe_detection(frame, holistic)
    # Extract keypoints
    keypoints = extract_keypoints(results)
    if np.count_nonzero(keypoints) > 0:
        # Preprocess keypoints and make prediction
        processed_landmarks = np.array([keypoints], dtype=np.float32)
        interpreter.set_tensor(input_details[0]['index'], processed_landmarks)
        interpreter.invoke()
        outputs = interpreter.get_tensor(output_details[0]['index'])
        prediction = outputs[0].argmax()
        return str(prediction)

# Define the Gradio interface with the Webcam input and Text output
webcam_interface = gr.Interface(
    fn=predict_with_webcam,
    inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
    outputs="text",
    live=True,
    interpretation="default",
    title="Webcam Landmark Prediction",
    description="Make predictions using landmarks extracted from your webcam stream.",
)

# Launch the Gradio app with the webcam interface
if __name__ == "__main__":
    webcam_interface.launch()