AliNajdawi's picture
Create app.py
c452bc0 verified
import pickle
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from collections import deque
import gradio as gr
# Constants (fallback defaults)
SEQUENCE_LENGTH = 30
FEATURE_LENGTH = 168
# Load model & preprocessing
def load_model_and_preprocessing():
global scaler, label_encoder, model, labels_dict
with open('lstm_preprocessing.pickle', 'rb') as f:
preproc = pickle.load(f)
scaler = preproc['scaler']
label_encoder = preproc['label_encoder']
timesteps = preproc['timesteps']
n_features = preproc['n_features']
global SEQUENCE_LENGTH, FEATURE_LENGTH
SEQUENCE_LENGTH = timesteps
FEATURE_LENGTH = n_features
model = tf.keras.models.load_model('lstm_model.h5')
labels_dict = {0:'salam',1:'good morning',2:'thanks',3:'ana',4:'anta',5:'ante',6:'hua',
7:'hea',8:'antm',9:'hm',10:'name',11:'how r u',12:'thanks god',13:'happy',
14:'sad',15:'angry',16:'good',17:'bad',18:'tired',19:'sick',20:'see',21:'say',
22:'talk',23:'walk',24:'went',25:'came',26:'home',27:'eat',28:'slept',29:'university',
30:'today',31:'tmrw',32:'sunday',33:'tuesday',34:'thursday',35:'friday',36:'week',
37:'month',38:'year',39:'when',40:'I know',41:'thinking',42:'forgetten',43:'love',
44:'I want',45:'helps',46:'not allowed',47:'agree',48:'together',49:'different'}
load_model_and_preprocessing()
# MediaPipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2)
def extract_hand_features(image):
data_aux = np.zeros(FEATURE_LENGTH, dtype=np.float32)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image_rgb)
if not results.multi_hand_landmarks:
return None
for hand_idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
if hand_idx >= 2: break
min_x = min([lm.x for lm in hand_landmarks.landmark])
min_y = min([lm.y for lm in hand_landmarks.landmark])
base_idx = hand_idx * 84
for i, lm in enumerate(hand_landmarks.landmark):
data_aux[base_idx + i * 2] = lm.x - min_x
data_aux[base_idx + i * 2 + 1] = lm.y - min_y
return data_aux
sequence_buffer = deque(maxlen=SEQUENCE_LENGTH)
def predict_sign(image):
image = np.array(image)
features = extract_hand_features(image)
if features is None:
return "No hand detected"
sequence_buffer.append(features)
if len(sequence_buffer) < SEQUENCE_LENGTH:
return f"Waiting for sequence: {len(sequence_buffer)}/{SEQUENCE_LENGTH}"
sequence_data = np.array(list(sequence_buffer)).reshape(1, SEQUENCE_LENGTH, FEATURE_LENGTH)
scaled = scaler.transform(sequence_data.reshape(-1, FEATURE_LENGTH)).reshape(1, SEQUENCE_LENGTH, FEATURE_LENGTH)
prediction_scores = model.predict(scaled, verbose=0)[0]
predicted_idx = np.argmax(prediction_scores)
confidence = prediction_scores[predicted_idx]
label = labels_dict.get(predicted_idx, "Unknown")
return f"{label} ({confidence:.2f})"
# Gradio interface
interface = gr.Interface(fn=predict_sign,
inputs=gr.Image(source="webcam", tool="editor", type="numpy"),
outputs="text",
title="Sign Language Recognition",
description="Perform a sign and capture it. Wait for prediction...")
# βœ… LAUNCH THE APP
interface.launch()