noxSpider's picture
Final
5f1ffca
import streamlit as st
from streamlit_webrtc import VideoTransformerBase, webrtc_streamer
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from scipy import stats
import mediapipe as mp
# Load your model here
model = load_model("D2M1.h5")
# Add your utility functions here (e.g., mediapipe_detection, draw_styled_landmarks, extract_keypoints, etc.)
actions = np.array(['ಶುಭೋದಯ',
'ದೊಡ್ಡದು',
'ನೀವು',
'ದೊಡ್ಡದು',
'ಧನ್ಯವಾದ',
'ಸಮಯ',
'ನಮಸ್ಕಾರ',
'ವಿಮಾನ',
'ಸಂತೋಷ',
'ಒಳ್ಳೆಯದು',
'ಶುಭ ರಾತ್ರಿ',
'ಚಿಕ್ಕದು',
'ನಾನು',
'ಬೈಸಿಕಲ್',
'ಎತ್ತರದ',
'ಯುವ',
'ನಿಧಾನ',
'ಹೊಸ',
'ಅನಾರೋಗ್ಯ',
'ತಂಪಾದ'])
no_sequences = 20
sequence_length = 40
start_folder = 0
label_map = {label: num for num, label in enumerate(actions)}
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
return image, results
def draw_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
def draw_styled_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
)
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
)
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
)
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, lh, rh])
class VideoTransformer(VideoTransformerBase):
def __init__(self):
self.sequence = []
self.sentence = []
self.predictions = []
self.threshold = 0.5
self.mp_holistic = mp.solutions.holistic
self.holistic = self.mp_holistic.Holistic(min_detection_confidence=0.9, min_tracking_confidence=0.9)
def transform(self, frame):
img = frame.to_ndarray(format="bgr24")
img = cv2.flip(img, 1)
image, results = mediapipe_detection(img, self.holistic)
keypoints = extract_keypoints(results)
self.sequence.append(keypoints)
self.sequence = self.sequence[-40:]
if len(self.sequence) == 40:
res = model.predict(np.expand_dims(self.sequence, axis=0))[0]
self.predictions.append(np.argmax(res))
if np.unique(self.predictions[-10:])[0] == np.argmax(res):
if res[np.argmax(res)] > self.threshold:
if len(self.sentence) > 0:
if actions[np.argmax(res)] != self.sentence[-1]:
self.sentence.append(actions[np.argmax(res)])
else:
self.sentence.append(actions[np.argmax(res)])
if len(self.sentence) > 5:
self.sentence = self.sentence[-5:]
cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
cv2.putText(image, ' '.join(self.sentence), (3, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
return image
st.title("Sign Language Live Detector")
webrtc_streamer(key="sign-language-detector", video_transformer_factory=VideoTransformer)