JefferyJapheth commited on
Commit
4ca0f67
1 Parent(s): e6b8cdf

complete outline for prediction and inference

Browse files
Files changed (1) hide show
  1. app.py +63 -18
app.py CHANGED
@@ -1,33 +1,78 @@
1
- import gradio as gr
2
  import cv2
 
 
3
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # ... (TFLiteModel, preprocess_landmarks, make_prediction definitions as before) ...
 
 
6
 
7
  # Function to extract landmarks from the webcam frame
8
- def extract_landmarks_from_frame(frame):
9
- # Your code to extract landmarks from the webcam frame using MediaPipe or any other method
10
- # The landmark_data should be a list or array containing the spatial coordinates of landmarks
11
- landmark_data = [] # Replace this with your actual landmark data
12
- return landmark_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # Gradio Interface Function
15
  def predict_with_webcam(frame):
16
- # Extract landmarks from the webcam frame
17
- landmark_data = extract_landmarks_from_frame(frame)
18
- if landmark_data is not None:
19
- # Preprocess the landmarks
20
- processed_landmarks = preprocess_landmarks(landmark_data)
21
- # Make predictions using the TFLite model
22
- predictions = make_prediction(processed_landmarks)
23
- # You can process the predictions as needed before returning them
24
- return predictions
25
 
26
  # Define the Gradio interface with the Webcam input and Text output
27
  webcam_interface = gr.Interface(
28
  fn=predict_with_webcam,
29
- inputs=gr.inputs.Webcam(), # Use the Webcam input type
30
- outputs="text", # You can customize this based on your model's output
31
  live=True,
32
  interpretation="default",
33
  title="Webcam Landmark Prediction",
 
 
1
  import cv2
2
+ import gradio as gr
3
+ import mediapipe as mp
4
  import numpy as np
5
+ import tensorflow.lite as tflite
6
+
7
+ # Initialize MediaPipe solutions
8
+ mp_hands = mp.solutions.hands
9
+ mp_pose = mp.solutions.pose
10
+ mp_face_mesh = mp.solutions.face_mesh
11
+
12
+ hands = mp_hands.Hands()
13
+ pose = mp_pose.Pose()
14
+ face_mesh = mp_face_mesh.FaceMesh()
15
+
16
+ # Load the TFLite model
17
+ interpreter = tflite.Interpreter(model_path="model.tflite")
18
+ interpreter.allocate_tensors()
19
+ input_details = interpreter.get_input_details()
20
+ output_details = interpreter.get_output_details()
21
+
22
+ # Preprocess landmarks
23
+ def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks):
24
+ hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark]
25
+ hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark]
26
+ pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark]
27
+ lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks]
28
 
29
+ combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks
30
+
31
+ return np.array(combined_landmarks, dtype=np.float32)
32
 
33
  # Function to extract landmarks from the webcam frame
34
+ def extract_landmarks(frame):
35
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
36
+ results = hands.process(frame_rgb)
37
+ pose_results = pose.process(frame_rgb)
38
+ face_results = face_mesh.process(frame_rgb)
39
+
40
+ if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks:
41
+ return None
42
+
43
+ hand1_landmarks = results.multi_hand_landmarks[0]
44
+ if len(results.multi_hand_landmarks) > 1:
45
+ hand2_landmarks = results.multi_hand_landmarks[1]
46
+ else:
47
+ hand2_landmarks = hand1_landmarks
48
+
49
+ pose_landmarks = pose_results.pose_landmarks
50
+ face_landmarks = face_results.multi_face_landmarks[0]
51
+ lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX]
52
+
53
+ return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks
54
+
55
+ # Make prediction
56
+ def make_prediction(processed_landmarks):
57
+ inputs = np.array([processed_landmarks])
58
+ interpreter.set_tensor(input_details[0]['index'], inputs)
59
+ interpreter.invoke()
60
+ outputs = interpreter.get_tensor(output_details[0]['index'])
61
+ return outputs[0].argmax()
62
 
63
  # Gradio Interface Function
64
  def predict_with_webcam(frame):
65
+ landmarks = extract_landmarks(frame)
66
+ if landmarks is not None:
67
+ processed_landmarks = preprocess_landmarks(*landmarks)
68
+ prediction = make_prediction(processed_landmarks)
69
+ return str(prediction)
 
 
 
 
70
 
71
  # Define the Gradio interface with the Webcam input and Text output
72
  webcam_interface = gr.Interface(
73
  fn=predict_with_webcam,
74
+ inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
75
+ outputs="text",
76
  live=True,
77
  interpretation="default",
78
  title="Webcam Landmark Prediction",