Spaces:
Build error
Build error
import streamlit as st | |
from streamlit_webrtc import VideoTransformerBase, webrtc_streamer | |
import cv2 | |
import numpy as np | |
from tensorflow.keras.models import load_model | |
from scipy import stats | |
import mediapipe as mp | |
# Load your model here | |
model = load_model("D2M1.h5") | |
# Add your utility functions here (e.g., mediapipe_detection, draw_styled_landmarks, extract_keypoints, etc.) | |
actions = np.array(['ಶುಭೋದಯ', | |
'ದೊಡ್ಡದು', | |
'ನೀವು', | |
'ದೊಡ್ಡದು', | |
'ಧನ್ಯವಾದ', | |
'ಸಮಯ', | |
'ನಮಸ್ಕಾರ', | |
'ವಿಮಾನ', | |
'ಸಂತೋಷ', | |
'ಒಳ್ಳೆಯದು', | |
'ಶುಭ ರಾತ್ರಿ', | |
'ಚಿಕ್ಕದು', | |
'ನಾನು', | |
'ಬೈಸಿಕಲ್', | |
'ಎತ್ತರದ', | |
'ಯುವ', | |
'ನಿಧಾನ', | |
'ಹೊಸ', | |
'ಅನಾರೋಗ್ಯ', | |
'ತಂಪಾದ']) | |
no_sequences = 20 | |
sequence_length = 40 | |
start_folder = 0 | |
label_map = {label: num for num, label in enumerate(actions)} | |
mp_holistic = mp.solutions.holistic # Holistic model | |
mp_drawing = mp.solutions.drawing_utils # Drawing utilities | |
def mediapipe_detection(image, model): | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
image.flags.writeable = False | |
results = model.process(image) | |
image.flags.writeable = True | |
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
return image, results | |
def draw_landmarks(image, results): | |
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) | |
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) | |
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) | |
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) | |
def draw_styled_landmarks(image, results): | |
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, | |
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), | |
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) | |
) | |
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, | |
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), | |
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2) | |
) | |
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, | |
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), | |
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2) | |
) | |
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, | |
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), | |
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) | |
) | |
def extract_keypoints(results): | |
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4) | |
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3) | |
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3) | |
return np.concatenate([pose, lh, rh]) | |
class VideoTransformer(VideoTransformerBase): | |
def __init__(self): | |
self.sequence = [] | |
self.sentence = [] | |
self.predictions = [] | |
self.threshold = 0.5 | |
self.mp_holistic = mp.solutions.holistic | |
self.holistic = self.mp_holistic.Holistic(min_detection_confidence=0.9, min_tracking_confidence=0.9) | |
def transform(self, frame): | |
img = frame.to_ndarray(format="bgr24") | |
img = cv2.flip(img, 1) | |
image, results = mediapipe_detection(img, self.holistic) | |
keypoints = extract_keypoints(results) | |
self.sequence.append(keypoints) | |
self.sequence = self.sequence[-40:] | |
if len(self.sequence) == 40: | |
res = model.predict(np.expand_dims(self.sequence, axis=0))[0] | |
self.predictions.append(np.argmax(res)) | |
if np.unique(self.predictions[-10:])[0] == np.argmax(res): | |
if res[np.argmax(res)] > self.threshold: | |
if len(self.sentence) > 0: | |
if actions[np.argmax(res)] != self.sentence[-1]: | |
self.sentence.append(actions[np.argmax(res)]) | |
else: | |
self.sentence.append(actions[np.argmax(res)]) | |
if len(self.sentence) > 5: | |
self.sentence = self.sentence[-5:] | |
cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1) | |
cv2.putText(image, ' '.join(self.sentence), (3, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) | |
return image | |
st.title("Sign Language Live Detector") | |
webrtc_streamer(key="sign-language-detector", video_transformer_factory=VideoTransformer) | |