Spaces:

JefferyJapheth
/

topStep

Runtime error

App Files Files Community

JefferyJapheth commited on Aug 1, 2023

Commit

4ca0f67

•

1 Parent(s): e6b8cdf

complete outline for prediction and inference

Browse files

Files changed (1) hide show

app.py +63 -18

app.py CHANGED Viewed

@@ -1,33 +1,78 @@
-import gradio as gr
 import cv2
 import numpy as np
-# ... (TFLiteModel, preprocess_landmarks, make_prediction definitions as before) ...
 # Function to extract landmarks from the webcam frame
-def extract_landmarks_from_frame(frame):
-    # Your code to extract landmarks from the webcam frame using MediaPipe or any other method
-    # The landmark_data should be a list or array containing the spatial coordinates of landmarks
-    landmark_data = []  # Replace this with your actual landmark data
-    return landmark_data
 # Gradio Interface Function
 def predict_with_webcam(frame):
-    # Extract landmarks from the webcam frame
-    landmark_data = extract_landmarks_from_frame(frame)
-    if landmark_data is not None:
-        # Preprocess the landmarks
-        processed_landmarks = preprocess_landmarks(landmark_data)
-        # Make predictions using the TFLite model
-        predictions = make_prediction(processed_landmarks)
-        # You can process the predictions as needed before returning them
-        return predictions
 # Define the Gradio interface with the Webcam input and Text output
 webcam_interface = gr.Interface(
     fn=predict_with_webcam,
-    inputs=gr.inputs.Webcam(),  # Use the Webcam input type
-    outputs="text",  # You can customize this based on your model's output
     live=True,
     interpretation="default",
     title="Webcam Landmark Prediction",

 import cv2
+import gradio as gr
+import mediapipe as mp
 import numpy as np
+import tensorflow.lite as tflite
+# Initialize MediaPipe solutions
+mp_hands = mp.solutions.hands
+mp_pose = mp.solutions.pose
+mp_face_mesh = mp.solutions.face_mesh
+hands = mp_hands.Hands()
+pose = mp_pose.Pose()
+face_mesh = mp_face_mesh.FaceMesh()
+# Load the TFLite model
+interpreter = tflite.Interpreter(model_path="model.tflite")
+interpreter.allocate_tensors()
+input_details = interpreter.get_input_details()
+output_details = interpreter.get_output_details()
+# Preprocess landmarks
+def preprocess_landmarks(hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks):
+    hand1_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand1_landmarks.landmark]
+    hand2_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in hand2_landmarks.landmark]
+    pose_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in pose_landmarks.landmark]
+    lip_landmarks = [[landmark.x, landmark.y, landmark.z] for landmark in lip_landmarks]
+    combined_landmarks = lip_landmarks + hand1_landmarks + hand2_landmarks + pose_landmarks
+    return np.array(combined_landmarks, dtype=np.float32)
 # Function to extract landmarks from the webcam frame
+def extract_landmarks(frame):
+    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    results = hands.process(frame_rgb)
+    pose_results = pose.process(frame_rgb)
+    face_results = face_mesh.process(frame_rgb)
+    if not results.multi_hand_landmarks or not pose_results.pose_landmarks or not face_results.multi_face_landmarks:
+        return None
+    hand1_landmarks = results.multi_hand_landmarks[0]
+    if len(results.multi_hand_landmarks) > 1:
+        hand2_landmarks = results.multi_hand_landmarks[1]
+    else:
+        hand2_landmarks = hand1_landmarks
+    pose_landmarks = pose_results.pose_landmarks
+    face_landmarks = face_results.multi_face_landmarks[0]
+    lip_landmarks = [face_landmarks.landmark[i] for i in LIPS_IDXS0 - START_IDX]
+    return hand1_landmarks, hand2_landmarks, pose_landmarks, lip_landmarks
+# Make prediction
+def make_prediction(processed_landmarks):
+    inputs = np.array([processed_landmarks])
+    interpreter.set_tensor(input_details[0]['index'], inputs)
+    interpreter.invoke()
+    outputs = interpreter.get_tensor(output_details[0]['index'])
+    return outputs[0].argmax()
 # Gradio Interface Function
 def predict_with_webcam(frame):
+    landmarks = extract_landmarks(frame)
+    if landmarks is not None:
+        processed_landmarks = preprocess_landmarks(*landmarks)
+        prediction = make_prediction(processed_landmarks)
+        return str(prediction)
 # Define the Gradio interface with the Webcam input and Text output
 webcam_interface = gr.Interface(
     fn=predict_with_webcam,
+    inputs=gr.inputs.Image(shape=(480, 640), source="webcam"),
+    outputs="text",
     live=True,
     interpretation="default",
     title="Webcam Landmark Prediction",