Spaces:
Runtime error
Runtime error
Commit
•
4ca0f67
1
Parent(s):
e6b8cdf
complete outline for prediction and inference
Browse files
app.py
CHANGED
@@ -1,33 +1,78 @@
|
|
1 |
-
import gradio as gr
|
2 |
import cv2
|
|
|
|
|
3 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
|
|
|
|
6 |
|
7 |
# Function to extract landmarks from the webcam frame
|
8 |
-
def
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Gradio Interface Function
|
15 |
def predict_with_webcam(frame):
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
# Make predictions using the TFLite model
|
22 |
-
predictions = make_prediction(processed_landmarks)
|
23 |
-
# You can process the predictions as needed before returning them
|
24 |
-
return predictions
|
25 |
|
26 |
# Define the Gradio interface with the Webcam input and Text output
|
27 |
webcam_interface = gr.Interface(
|
28 |
fn=predict_with_webcam,
|
29 |
-
inputs=gr.inputs.
|
30 |
-
outputs="text",
|
31 |
live=True,
|
32 |
interpretation="default",
|
33 |
title="Webcam Landmark Prediction",
|
|
|
|
|
1 |
import cv2
|
2 |
+
import gradio as gr
|
3 |
+
import mediapipe as mp
|
4 |
import numpy as np
|
5 |
+
import tensorflow.lite as tflite
|
6 |
+
|
7 |
+
# Initialize MediaPipe solutions
|
8 |
+
mp_hands = mp.solutions.hands
|
9 |
+
mp_pose = mp.solutions.pose
|
10 |
+
mp_face_mesh = mp.solutions.face_mesh
|
11 |
+
|
12 |
+
hands = mp_hands.Hands()
|
13 |
+
pose = mp_pose.Pose()
|
14 |
+
face_mesh = mp_face_mesh.FaceMesh()
|
15 |
+
|
16 |
+
# Load the TFLite model
|
17 |
+
interpreter = tflite.Interpreter(model_path="model.tflite")
|
18 |
+
interpreter.allocate_tensors()
|
19 |
+
input_details = interpreter.get_input_details()
|
20 |
+
output_details = interpreter.get_output_details()
|
21 |
+
|
22 |
+
# Preprocess landmarks
|
23 |
+
def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks):
|
24 |
+
hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark]
|
25 |
+
hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark]
|
26 |
+
pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark]
|
27 |
+
lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks]
|
28 |
|
29 |
+
combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks
|
30 |
+
|
31 |
+
return np.array(combined_landmarks, dtype=np.float32)
|
32 |
|
33 |
# Function to extract landmarks from the webcam frame
|
34 |
+
def extract_landmarks(frame):
|
35 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
36 |
+
results = hands.process(frame_rgb)
|
37 |
+
pose_results = pose.process(frame_rgb)
|
38 |
+
face_results = face_mesh.process(frame_rgb)
|
39 |
+
|
40 |
+
if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks:
|
41 |
+
return None
|
42 |
+
|
43 |
+
hand1_landmarks = results.multi_hand_landmarks[0]
|
44 |
+
if len(results.multi_hand_landmarks) > 1:
|
45 |
+
hand2_landmarks = results.multi_hand_landmarks[1]
|
46 |
+
else:
|
47 |
+
hand2_landmarks = hand1_landmarks
|
48 |
+
|
49 |
+
pose_landmarks = pose_results.pose_landmarks
|
50 |
+
face_landmarks = face_results.multi_face_landmarks[0]
|
51 |
+
lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX]
|
52 |
+
|
53 |
+
return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks
|
54 |
+
|
55 |
+
# Make prediction
|
56 |
+
def make_prediction(processed_landmarks):
|
57 |
+
inputs = np.array([processed_landmarks])
|
58 |
+
interpreter.set_tensor(input_details[0]['index'], inputs)
|
59 |
+
interpreter.invoke()
|
60 |
+
outputs = interpreter.get_tensor(output_details[0]['index'])
|
61 |
+
return outputs[0].argmax()
|
62 |
|
63 |
# Gradio Interface Function
|
64 |
def predict_with_webcam(frame):
|
65 |
+
landmarks = extract_landmarks(frame)
|
66 |
+
if landmarks is not None:
|
67 |
+
processed_landmarks = preprocess_landmarks(*landmarks)
|
68 |
+
prediction = make_prediction(processed_landmarks)
|
69 |
+
return str(prediction)
|
|
|
|
|
|
|
|
|
70 |
|
71 |
# Define the Gradio interface with the Webcam input and Text output
|
72 |
webcam_interface = gr.Interface(
|
73 |
fn=predict_with_webcam,
|
74 |
+
inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
|
75 |
+
outputs="text",
|
76 |
live=True,
|
77 |
interpretation="default",
|
78 |
title="Webcam Landmark Prediction",
|