Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import mediapipe as mp | |
from tensorflow.keras.models import load_model | |
import gradio as gr | |
# Load the sign language recognition model | |
model = load_model('isl.h5') | |
# Initialize Mediapipe | |
mp_holistic = mp.solutions.holistic | |
mp_drawing = mp.solutions.drawing_utils | |
# Define actions | |
actions = ['hello', 'me', 'no', 'please', 'sorry', 'thank you', 'welcome', 'what', 'yes', 'you'] | |
# Function to perform Mediapipe detection | |
def mediapipe_detection(image, model): | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
image.flags.writeable = False | |
results = model.process(image) | |
image.flags.writeable = True | |
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
return image, results | |
# Function to extract keypoints | |
def extract_keypoints(results): | |
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4) | |
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3) | |
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3) | |
return np.concatenate([pose, lh, rh]) | |
# Function to predict sign from video | |
def predict_sign_from_video(video_path): | |
cap = cv2.VideoCapture(video_path) | |
frames = [] | |
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
image, results = mediapipe_detection(frame, holistic) | |
keypoints = extract_keypoints(results) | |
frames.append(keypoints) | |
if len(frames) == 30: | |
sequence = np.array(frames) | |
res = model.predict(np.expand_dims(sequence, axis=0))[0] | |
sign = actions[np.argmax(res)] | |
frames = [] # Reset frames for next sequence | |
return sign | |
cap.release() | |
examples = [ | |
['videos/abvv.webm'], | |
['videos/gdgdh.webm'] | |
] | |
# Create Gradio Interface | |
iface = gr.Interface(predict_sign_from_video, | |
inputs="video", | |
outputs="text", | |
title="Sign Speak", | |
description="Upload a video and get the predicted sign.", | |
examples=examples, | |
cache_examples=False) | |
iface.launch(share=True) | |