Spaces:

sandeepa-TN
/

sign_Speak_MCE

Runtime error

App Files Files Community

sign_Speak_MCE / app.py

Sandeepa

Update app.py

02ad2a8 verified 6 months ago

raw

history blame contribute delete

No virus

2.6 kB

	import cv2
	import numpy as np
	import mediapipe as mp
	from tensorflow.keras.models import load_model
	import gradio as gr

	# Load the sign language recognition model
	model = load_model('isl.h5')

	# Initialize Mediapipe
	mp_holistic = mp.solutions.holistic
	mp_drawing = mp.solutions.drawing_utils

	# Define actions
	actions = ['hello', 'me', 'no', 'please', 'sorry', 'thank you', 'welcome', 'what', 'yes', 'you']

	# Function to perform Mediapipe detection
	def mediapipe_detection(image, model):
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	image.flags.writeable = False
	results = model.process(image)
	image.flags.writeable = True
	image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
	return image, results

	# Function to extract keypoints
	def extract_keypoints(results):
	pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
	lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
	rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
	return np.concatenate([pose, lh, rh])

	# Function to predict sign from video
	def predict_sign_from_video(video_path):
	cap = cv2.VideoCapture(video_path)
	frames = []
	with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	image, results = mediapipe_detection(frame, holistic)
	keypoints = extract_keypoints(results)
	frames.append(keypoints)
	if len(frames) == 30:
	sequence = np.array(frames)
	res = model.predict(np.expand_dims(sequence, axis=0))[0]
	sign = actions[np.argmax(res)]
	frames = [] # Reset frames for next sequence
	return sign

	cap.release()

	examples = [
	['videos/abvv.webm'],
	['videos/gdgdh.mp4']

	]

	# Create Gradio Interface
	iface = gr.Interface(predict_sign_from_video,
	inputs="video",
	outputs="text",
	title="Sign Speak",
	description="Upload a video and get the predicted sign.",
	examples=examples,
	cache_examples=False)
	iface.launch(share=True)