sign_Speak_MCE / app.py
Sandeepa
Update app.py
62c85c9 verified
raw
history blame
2.6 kB
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
import gradio as gr
# Load the sign language recognition model
model = load_model('isl.h5')
# Initialize Mediapipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
# Define actions
actions = ['hello', 'me', 'no', 'please', 'sorry', 'thank you', 'welcome', 'what', 'yes', 'you']
# Function to perform Mediapipe detection
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
return image, results
# Function to extract keypoints
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, lh, rh])
# Function to predict sign from video
def predict_sign_from_video(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
image, results = mediapipe_detection(frame, holistic)
keypoints = extract_keypoints(results)
frames.append(keypoints)
if len(frames) == 30:
sequence = np.array(frames)
res = model.predict(np.expand_dims(sequence, axis=0))[0]
sign = actions[np.argmax(res)]
frames = [] # Reset frames for next sequence
return sign
cap.release()
examples = [
['videos/abvv.webm'],
['videos/gdgdh.webm']
]
# Create Gradio Interface
iface = gr.Interface(predict_sign_from_video,
inputs="video",
outputs="text",
title="Sign Speak",
description="Upload a video and get the predicted sign.",
examples=examples,
cache_examples=False)
iface.launch(share=True)