Spaces:
Runtime error
Runtime error
File size: 2,596 Bytes
1d6b8f2 1b395e1 02ad2a8 1b395e1 1d6b8f2 245aec5 62c85c9 01cbfd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
import gradio as gr
# Load the sign language recognition model
model = load_model('isl.h5')
# Initialize Mediapipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
# Define actions
actions = ['hello', 'me', 'no', 'please', 'sorry', 'thank you', 'welcome', 'what', 'yes', 'you']
# Function to perform Mediapipe detection
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
return image, results
# Function to extract keypoints
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, lh, rh])
# Function to predict sign from video
def predict_sign_from_video(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
image, results = mediapipe_detection(frame, holistic)
keypoints = extract_keypoints(results)
frames.append(keypoints)
if len(frames) == 30:
sequence = np.array(frames)
res = model.predict(np.expand_dims(sequence, axis=0))[0]
sign = actions[np.argmax(res)]
frames = [] # Reset frames for next sequence
return sign
cap.release()
examples = [
['videos/abvv.webm'],
['videos/gdgdh.mp4']
]
# Create Gradio Interface
iface = gr.Interface(predict_sign_from_video,
inputs="video",
outputs="text",
title="Sign Speak",
description="Upload a video and get the predicted sign.",
examples=examples,
cache_examples=False)
iface.launch(share=True)
|