JefferyJapheth commited on
Commit
2aad215
1 Parent(s): c1192a3
Files changed (1) hide show
  1. app.py +51 -41
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Import the required libraries
2
  import os
3
 
4
  import cv2
@@ -9,7 +8,13 @@ import tensorflow as tf
9
  import tensorflow.lite as tflite
10
 
11
  # Initialize MediaPipe solutions
12
- mp_holistic = mp.solutions.holistic
 
 
 
 
 
 
13
 
14
  # Get the absolute path to the directory containing app.py
15
  current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -17,56 +22,61 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
17
  model_filename = "model.tflite"
18
  # Construct the full path to the TFLite model file
19
  model_path = os.path.join(current_dir, model_filename)
20
-
21
  # Load the TFLite model using the interpreter
22
  interpreter = tf.lite.Interpreter(model_path=model_path)
23
  interpreter.allocate_tensors()
24
 
25
 
26
- # ... (other functions from previous code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # Function to perform holistic detection using Mediapipe
29
- def mediapipe_detection(image, model):
30
- # COLOR CONVERSION BGR 2 RGB
31
- image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
32
- image_rgb.flags.writeable = False # Image is no longer writeable
33
- results = model.process(image_rgb) # Make prediction
34
- image_rgb.flags.writeable = True # Image is now writeable
35
- image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR) # COLOR CONVERSION RGB 2 BGR
36
- return image_bgr, results
37
 
 
38
 
39
- # Function to extract keypoints from Mediapipe results
40
- # Function to extract keypoints from Mediapipe results
41
- def extract_keypoints(results):
42
- lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
43
- ) if results.left_hand_landmarks else np.zeros(21 * 3)
44
- rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
45
- ) if results.right_hand_landmarks else np.zeros(21 * 3)
46
- pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten(
47
- ) if results.pose_landmarks else np.zeros(33 * 4)
48
- face = np.array([[res.x, res.y, res.z] for res in
49
- results.face_landmarks.landmark]).flatten(
50
- ) if results.face_landmarks else np.zeros(468 * 3)
51
 
52
- # Concatenate the arrays and return the result
53
- keypoints = np.concatenate([lh, rh, pose, face])
54
- return keypoints
 
 
 
 
55
 
56
 
57
- # Main prediction function that combines everything
58
  def predict_with_webcam(frame):
59
- # Perform holistic detection
60
- image, results = mediapipe_detection(frame, mp_holistic) # Use mp_holistic here
61
- # Extract keypoints
62
- keypoints = extract_keypoints(results)
63
- if np.count_nonzero(keypoints) > 0:
64
- # Preprocess keypoints and make prediction
65
- processed_landmarks = np.array([keypoints], dtype=np.float32)
66
- interpreter.set_tensor(input_details[0]['index'], processed_landmarks)
67
- interpreter.invoke()
68
- outputs = interpreter.get_tensor(output_details[0]['index'])
69
- prediction = outputs[0].argmax()
70
  return str(prediction)
71
 
72
 
@@ -83,4 +93,4 @@ webcam_interface = gr.Interface(
83
 
84
  # Launch the Gradio app with the webcam interface
85
  if __name__ == "__main__":
86
- webcam_interface.launch()
 
 
1
  import os
2
 
3
  import cv2
 
8
  import tensorflow.lite as tflite
9
 
10
  # Initialize MediaPipe solutions
11
+ mp_hands = mp.solutions.hands
12
+ mp_pose = mp.solutions.pose
13
+ mp_face_mesh = mp.solutions.face_mesh
14
+
15
+ hands = mp_hands.Hands()
16
+ pose = mp_pose.Pose()
17
+ face_mesh = mp_face_mesh.FaceMesh()
18
 
19
  # Get the absolute path to the directory containing app.py
20
  current_dir = os.path.dirname(os.path.abspath(__file__))
 
22
  model_filename = "model.tflite"
23
  # Construct the full path to the TFLite model file
24
  model_path = os.path.join(current_dir, model_filename)
 
25
  # Load the TFLite model using the interpreter
26
  interpreter = tf.lite.Interpreter(model_path=model_path)
27
  interpreter.allocate_tensors()
28
 
29
 
30
+ # Preprocess landmarks
31
+ def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks):
32
+ hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark]
33
+ hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark]
34
+ pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark]
35
+ lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks]
36
+
37
+ combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks
38
+
39
+ return np.array(combined_landmarks, dtype=np.float32)
40
+
41
+
42
+ # Function to extract landmarks from the webcam frame
43
+ def extract_landmarks(frame):
44
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
45
+ results = hands.process(frame_rgb)
46
+ pose_results = pose.process(frame_rgb)
47
+ face_results = face_mesh.process(frame_rgb)
48
+
49
+ if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks:
50
+ return None
51
+
52
+ hand1_landmarks = results.multi_hand_landmarks[0]
53
+ if len(results.multi_hand_landmarks) > 1:
54
+ hand2_landmarks = results.multi_hand_landmarks[1]
55
+ else:
56
+ hand2_landmarks = hand1_landmarks
57
 
58
+ pose_landmarks = pose_results.pose_landmarks
59
+ face_landmarks = face_results.multi_face_landmarks[0]
60
+ lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX]
 
 
 
 
 
 
61
 
62
+ return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks
63
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Make prediction
66
+ def make_prediction(processed_landmarks):
67
+ inputs = np.array([processed_landmarks])
68
+ interpreter.set_tensor(input_details[0]['index'], inputs)
69
+ interpreter.invoke()
70
+ outputs = interpreter.get_tensor(output_details[0]['index'])
71
+ return outputs[0].argmax()
72
 
73
 
74
+ # Gradio Interface Function
75
  def predict_with_webcam(frame):
76
+ landmarks = extract_landmarks(frame)
77
+ if landmarks is not None:
78
+ processed_landmarks = preprocess_landmarks(*landmarks)
79
+ prediction = make_prediction(processed_landmarks)
 
 
 
 
 
 
 
80
  return str(prediction)
81
 
82
 
 
93
 
94
  # Launch the Gradio app with the webcam interface
95
  if __name__ == "__main__":
96
+ webcam_interface.launch(share=True)