Spaces:

sandeepa-TN
/

sign_Speak_MCE

Runtime error

App Files Files Community

Sandeepa commited on Apr 2

Commit

1d6b8f2

•

1 Parent(s): 9d6280b

Upload app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import cv2
+import numpy as np
+import mediapipe as mp
+from tensorflow.keras.models import load_model
+import gradio as gr
+# Load the sign language recognition model
+model = load_model('isl.h5')
+# Initialize Mediapipe
+mp_holistic = mp.solutions.holistic
+mp_drawing = mp.solutions.drawing_utils
+# Define actions
+actions = ['hello', 'me', 'no', 'please', 'sorry', 'thank you', 'welcome', 'what', 'yes', 'you']
+# Function to perform Mediapipe detection
+def mediapipe_detection(image, model):
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image.flags.writeable = False
+    results = model.process(image)
+    image.flags.writeable = True
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    return image, results
+# Function to extract keypoints
+def extract_keypoints(results):
+    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
+    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
+    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
+    return np.concatenate([pose, lh, rh])
+# Function to predict sign from video
+def predict_sign_from_video(video_path):
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            image, results = mediapipe_detection(frame, holistic)
+            keypoints = extract_keypoints(results)
+            frames.append(keypoints)
+            if len(frames) == 30:
+                sequence = np.array(frames)
+                res = model.predict(np.expand_dims(sequence, axis=0))[0]
+                sign = actions[np.argmax(res)]
+                frames = []  # Reset frames for next sequence
+                return sign
+    cap.release()
+# Create Gradio Interface
+iface = gr.Interface(predict_sign_from_video,
+                      inputs="video",
+                      outputs="text",
+                      title="Sign Speak",
+                      description="Upload a video and get the predicted sign.")
+iface.launch()