JefferyJapheth commited on
Commit
2aae5c8
1 Parent(s): bd4424b

using old code

Browse files
Files changed (1) hide show
  1. app.py +51 -226
app.py CHANGED
@@ -1,104 +1,14 @@
 
 
 
1
  import os
2
-
3
- import mediapipe as mp
4
  import tensorflow as tf
 
 
5
 
6
- N_ROWS = 543
7
- N_DIMS = 3
8
- DIM_NAMES = ['x', 'y', 'z']
9
- SEED = 42
10
- NUM_CLASSES = 250
11
- INPUT_SIZE = 32
12
-
13
-
14
- # Tensorflow layer to process data in TFLite
15
- # Data needs to be processed in the model itself, so we cannot use Python
16
- class PreprocessLayer(tf.keras.layers.Layer):
17
- def __init__(self):
18
- super(PreprocessLayer, self).__init__()
19
-
20
- def pad_edge(self, t, repeats, side):
21
- if side == 'LEFT':
22
- return tf.concat((tf.repeat(t[:1], repeats=repeats, axis=0), t), axis=0)
23
- elif side == 'RIGHT':
24
- return tf.concat((t, tf.repeat(t[-1:], repeats=repeats, axis=0)), axis=0)
25
-
26
- @tf.function(
27
- input_signature=(tf.TensorSpec(shape=[None, N_ROWS, N_DIMS], dtype=tf.float32),),
28
- )
29
- def call(self, data0):
30
- # Number of Frames in Video
31
- N_FRAMES0 = tf.shape(data0)[0]
32
-
33
- # Filter Out Frames With Empty Hand Data
34
- frames_hands_nansum = tf.experimental.numpy.nanmean(tf.gather(data0, HAND_IDXS0, axis=1), axis=[1, 2])
35
- non_empty_frames_idxs = tf.where(frames_hands_nansum > 0)
36
- non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1)
37
- data = tf.gather(data0, non_empty_frames_idxs, axis=0)
38
-
39
- # Cast Indices in float32 to be compatible with Tensorflow Lite
40
- non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32)
41
-
42
- # Number of Frames in Filtered Video
43
- N_FRAMES = tf.shape(data)[0]
44
-
45
- # Gather Relevant Landmark Columns
46
- data = tf.gather(data, LANDMARK_IDXS0, axis=1)
47
-
48
- # Video fits in INPUT_SIZE
49
- if N_FRAMES < INPUT_SIZE:
50
- # Pad With -1 to indicate padding
51
- non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, INPUT_SIZE - N_FRAMES]], constant_values=-1)
52
- # Pad Data With Zeros
53
- data = tf.pad(data, [[0, INPUT_SIZE - N_FRAMES], [0, 0], [0, 0]], constant_values=0)
54
- # Fill NaN Values With 0
55
- data = tf.where(tf.math.is_nan(data), 0.0, data)
56
- return data, non_empty_frames_idxs
57
- # Video needs to be downsampled to INPUT_SIZE
58
- else:
59
- # Repeat
60
- if N_FRAMES < INPUT_SIZE ** 2:
61
- repeats = tf.math.floordiv(INPUT_SIZE * INPUT_SIZE, N_FRAMES0)
62
- data = tf.repeat(data, repeats=repeats, axis=0)
63
- non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0)
64
-
65
- # Pad To Multiple Of Input Size
66
- pool_size = tf.math.floordiv(len(data), INPUT_SIZE)
67
- if tf.math.mod(len(data), INPUT_SIZE) > 0:
68
- pool_size += 1
69
-
70
- if pool_size == 1:
71
- pad_size = (pool_size * INPUT_SIZE) - len(data)
72
- else:
73
- pad_size = (pool_size * INPUT_SIZE) % len(data)
74
-
75
- # Pad Start/End with Start/End value
76
- pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2)
77
- pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2)
78
- if tf.math.mod(pad_size, 2) > 0:
79
- pad_right += 1
80
-
81
- # Pad By Concatenating Left/Right Edge Values
82
- data = self.pad_edge(data, pad_left, 'LEFT')
83
- data = self.pad_edge(data, pad_right, 'RIGHT')
84
-
85
- # Pad Non Empty Frame Indices
86
- non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT')
87
- non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT')
88
-
89
- # Reshape to Mean Pool
90
- data = tf.reshape(data, [INPUT_SIZE, -1, N_COLS, N_DIMS])
91
- non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [INPUT_SIZE, -1])
92
-
93
- # Mean Pool
94
- data = tf.experimental.numpy.nanmean(data, axis=1)
95
- non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1)
96
-
97
- # Fill NaN Values With 0
98
- data = tf.where(tf.math.is_nan(data), 0.0, data)
99
-
100
- return data, non_empty_frames_idxs
101
-
102
 
103
  # Get the absolute path to the directory containing app.py
104
  current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -106,146 +16,61 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
106
  model_filename = "model.tflite"
107
  # Construct the full path to the TFLite model file
108
  model_path = os.path.join(current_dir, model_filename)
 
109
  # Load the TFLite model using the interpreter
110
  interpreter = tf.lite.Interpreter(model_path=model_path)
111
  interpreter.allocate_tensors()
112
 
113
- # Get input and output details of the TFLite model
114
- input_details = interpreter.get_input_details()
115
- output_details = interpreter.get_output_details()
116
-
117
- index_to_class = {
118
- "TV": 0, "after": 1, "airplane": 2, "all": 3, "alligator": 4, "animal": 5, "another": 6, "any": 7, "apple": 8,
119
- "arm": 9, "aunt": 10, "awake": 11, "backyard": 12, "bad": 13, "balloon": 14, "bath": 15, "because": 16, "bed": 17,
120
- "bedroom": 18, "bee": 19, "before": 20, "beside": 21, "better": 22, "bird": 23, "black": 24, "blow": 25, "blue": 26,
121
- "boat": 27, "book": 28, "boy": 29, "brother": 30, "brown": 31, "bug": 32, "bye": 33, "callonphone": 34, "can": 35,
122
- "car": 36, "carrot": 37, "cat": 38, "cereal": 39, "chair": 40, "cheek": 41, "child": 42, "chin": 43,
123
- "chocolate": 44, "clean": 45, "close": 46, "closet": 47, "cloud": 48, "clown": 49, "cow": 50, "cowboy": 51,
124
- "cry": 52, "cut": 53, "cute": 54, "dad": 55, "dance": 56, "dirty": 57, "dog": 58, "doll": 59, "donkey": 60,
125
- "down": 61, "drawer": 62, "drink": 63, "drop": 64, "dry": 65, "dryer": 66, "duck": 67, "ear": 68, "elephant": 69,
126
- "empty": 70, "every": 71, "eye": 72, "face": 73, "fall": 74, "farm": 75, "fast": 76, "feet": 77, "find": 78,
127
- "fine": 79, "finger": 80, "finish": 81, "fireman": 82, "first": 83, "fish": 84, "flag": 85, "flower": 86,
128
- "food": 87, "for": 88, "frenchfries": 89, "frog": 90, "garbage": 91, "gift": 92, "giraffe": 93, "girl": 94,
129
- "give": 95, "glasswindow": 96, "go": 97, "goose": 98, "grandma": 99, "grandpa": 100, "grass": 101, "green": 102,
130
- "gum": 103, "hair": 104, "happy": 105, "hat": 106, "hate": 107, "have": 108, "haveto": 109, "head": 110,
131
- "hear": 111, "helicopter": 112, "hello": 113, "hen": 114, "hesheit": 115, "hide": 116, "high": 117, "home": 118,
132
- "horse": 119, "hot": 120, "hungry": 121, "icecream": 122, "if": 123, "into": 124, "jacket": 125, "jeans": 126,
133
- "jump": 127, "kiss": 128, "kitty": 129, "lamp": 130, "later": 131, "like": 132, "lion": 133, "lips": 134,
134
- "listen": 135, "look": 136, "loud": 137, "mad": 138, "make": 139, "man": 140, "many": 141, "milk": 142,
135
- "minemy": 143, "mitten": 144, "mom": 145, "moon": 146, "morning": 147, "mouse": 148, "mouth": 149, "nap": 150,
136
- "napkin": 151, "night": 152, "no": 153, "noisy": 154, "nose": 155, "not": 156, "now": 157, "nuts": 158, "old": 159,
137
- "on": 160, "open": 161, "orange": 162, "outside": 163, "owie": 164, "owl": 165, "pajamas": 166, "pen": 167,
138
- "pencil": 168, "penny": 169, "person": 170, "pig": 171, "pizza": 172, "please": 173, "police": 174, "pool": 175,
139
- "potty": 176, "pretend": 177, "pretty": 178, "puppy": 179, "puzzle": 180, "quiet": 181, "radio": 182, "rain": 183,
140
- "read": 184, "red": 185, "refrigerator": 186, "ride": 187, "room": 188, "sad": 189, "same": 190, "say": 191,
141
- "scissors": 192, "see": 193, "shhh": 194, "shirt": 195, "shoe": 196, "shower": 197, "sick": 198, "sleep": 199,
142
- "sleepy": 200, "smile": 201, "snack": 202, "snow": 203, "stairs": 204, "stay": 205, "sticky": 206, "store": 207,
143
- "story": 208, "stuck": 209, "sun": 210, "table": 211, "talk": 212, "taste": 213, "thankyou": 214, "that": 215,
144
- "there": 216, "think": 217, "thirsty": 218, "tiger": 219, "time": 220, "tomorrow": 221, "tongue": 222, "tooth": 223,
145
- "toothbrush": 224, "touch": 225, "toy": 226, "tree": 227, "uncle": 228, "underwear": 229, "up": 230, "vacuum": 231,
146
- "wait": 232, "wake": 233, "water": 234, "wet": 235, "weus": 236, "where": 237, "white": 238, "who": 239, "why": 240,
147
- "will": 241, "wolf": 242, "yellow": 243, "yes": 244, "yesterday": 245, "yourself": 246, "yucky": 247, "zebra": 248,
148
- "zipper": 249
149
- }
150
-
151
- inv_index_to_class = {v: k for k, v in index_to_class.items()}
152
-
153
- mp_holistic = mp.solutions.holistic
154
-
155
 
 
156
  def mediapipe_detection(image, model):
157
  # COLOR CONVERSION BGR 2 RGB
158
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
159
- image.flags.writeable = False # Image is no longer writeable
160
- results = model.process(image) # Make prediction
161
- image.flags.writeable = True # Image is now writeable
162
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
163
  return image, results
164
 
165
-
166
  def extract_keypoints(results):
167
- face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten()
168
- lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten()
169
- rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten()
170
- pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten()
171
-
172
- # Pad or truncate the arrays to the expected length (543)
173
- face = np.pad(face, (0, max(0, 543 - len(face))), mode='constant')
174
- lh = np.pad(lh, (0, max(0, 543 - len(lh))), mode='constant')
175
- rh = np.pad(rh, (0, max(0, 543 - len(rh))), mode='constant')
176
- pose = np.pad(pose, (0, max(0, 543 - len(pose))), mode='constant')
177
-
178
- # Concatenate the arrays in the correct order and return the result
179
- return np.concatenate([face, lh, rh, pose])
180
-
181
-
182
- # Make prediction
183
- def make_prediction(processed_landmarks):
184
- inputs = np.array(processed_landmarks, dtype=np.float32)
185
-
186
- # Set the input tensor for the TFLite model
187
- interpreter.set_tensor(input_details[0]['index'], inputs)
188
-
189
- # Invoke the TFLite interpreter to perform inference
190
- interpreter.invoke()
191
-
192
- # Get the output tensor of the TFLite model
193
- output_data = interpreter.get_tensor(output_details[0]['index'])
194
-
195
- # Find the index of the predicted class
196
- index = np.argmax(output_data)
197
-
198
- # Map the index to the corresponding class label using the index_to_class dictionary
199
- prediction = inv_index_to_class[index]
200
-
201
- return prediction
202
-
203
-
204
- # ...
205
-
206
- with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
207
- import cv2
208
- import numpy as np
209
- import gradio as gr
210
- import tensorflow as tf
211
-
212
-
213
- # Modify the predict_with_webcam function to take an image as input and return the prediction string
214
- def predict_with_webcam(frame):
215
- if frame is None:
216
- raise ValueError("Frame is None. Make sure your webcam is working properly.")
217
-
218
- # Make detections using mediapipe
219
- image, results = mediapipe_detection(frame, holistic)
220
- print(results)
221
-
222
- if results is not None and results.face_landmarks is not None:
223
- landmarks = extract_keypoints(results)
224
- if landmarks is not None:
225
- # Calculate the number of landmarks per frame
226
- landmarks_per_frame = len(landmarks) // (N_ROWS * N_DIMS)
227
- # Reshape the landmarks to have shape (None, N_ROWS, N_DIMS)
228
- landmarks = landmarks.reshape(-1, landmarks_per_frame, N_DIMS)
229
- # Initialize PreprocessLayer
230
- preprocess_layer = PreprocessLayer()
231
- # Call the PreprocessLayer to preprocess the landmarks
232
- processed_landmarks, _ = preprocess_layer.call(landmarks)
233
- prediction = make_prediction(processed_landmarks) # Pass the preprocessed landmarks to make_prediction
234
- print("Prediction:", prediction)
235
- return prediction
236
- else:
237
- return "Could not detect landmarks or extract keypoints. Make sure your webcam is working properly."
238
- else:
239
- return "Could not detect face landmarks. Make sure your webcam is working properly."
240
-
241
-
242
- # Define the Gradio interface
243
- iface = gr.Interface(
244
  fn=predict_with_webcam,
245
- inputs=gr.inputs.Image(shape=(None, None, 3), source="webcam", tool="opencv"),
246
- outputs=gr.outputs.Textbox()
 
 
 
 
247
  )
248
 
249
-
250
- # Launch the interface
251
- iface.launch()
 
1
+ # Import the required libraries
2
+ import cv2
3
+ import numpy as np
4
  import os
5
+ import gradio as gr
 
6
  import tensorflow as tf
7
+ import tensorflow.lite as tflite
8
+ import mediapipe as mp
9
 
10
+ # Initialize MediaPipe solutions
11
+ mp_holistic = mp.solutions.holistic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Get the absolute path to the directory containing app.py
14
  current_dir = os.path.dirname(os.path.abspath(__file__))
 
16
  model_filename = "model.tflite"
17
  # Construct the full path to the TFLite model file
18
  model_path = os.path.join(current_dir, model_filename)
19
+
20
  # Load the TFLite model using the interpreter
21
  interpreter = tf.lite.Interpreter(model_path=model_path)
22
  interpreter.allocate_tensors()
23
 
24
+ # ... (other functions from previous code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Function to perform holistic detection using Mediapipe
27
  def mediapipe_detection(image, model):
28
  # COLOR CONVERSION BGR 2 RGB
29
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
30
+ image.flags.writeable = False # Image is no longer writeable
31
+ results = model.process(image) # Make prediction
32
+ image.flags.writeable = True # Image is now writeable
33
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
34
  return image, results
35
 
36
+ # Function to extract keypoints from Mediapipe results
37
  def extract_keypoints(results):
38
+ lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
39
+ ) if results.left_hand_landmarks else np.zeros(21*3)
40
+ rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
41
+ ) if results.right_hand_landmarks else np.zeros(21*3)
42
+ pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten(
43
+ ) if results.pose_landmarks else np.zeros(33*4)
44
+ face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten(
45
+ ) if results.face_landmarks else np.zeros(468*3)
46
+ return np.concatenate([lh, rh, pose, face])
47
+
48
+ # Main prediction function that combines everything
49
+ def predict_with_webcam(frame):
50
+ # Perform holistic detection
51
+ image, results = mediapipe_detection(frame, holistic)
52
+ # Extract keypoints
53
+ keypoints = extract_keypoints(results)
54
+ if np.count_nonzero(keypoints) > 0:
55
+ # Preprocess keypoints and make prediction
56
+ processed_landmarks = np.array([keypoints], dtype=np.float32)
57
+ interpreter.set_tensor(input_details[0]['index'], processed_landmarks)
58
+ interpreter.invoke()
59
+ outputs = interpreter.get_tensor(output_details[0]['index'])
60
+ prediction = outputs[0].argmax()
61
+ return str(prediction)
62
+
63
+ # Define the Gradio interface with the Webcam input and Text output
64
+ webcam_interface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  fn=predict_with_webcam,
66
+ inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
67
+ outputs="text",
68
+ live=True,
69
+ interpretation="default",
70
+ title="Webcam Landmark Prediction",
71
+ description="Make predictions using landmarks extracted from your webcam stream.",
72
  )
73
 
74
+ # Launch the Gradio app with the webcam interface
75
+ if __name__ == "__main__":
76
+ webcam_interface.launch()