File size: 5,233 Bytes
ba534be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import streamlit as st
import numpy as np
from keras.models import load_model
import cv2
from io import BytesIO
import mediapipe as mp

# Load the model
model = load_model('sign_asl_cnn_30_epochs.h5')
class_labels = {i: str(i) if i < 10 else chr(65 + i - 10) for i in range(36)}

# Function to preprocess the image
def preprocess_image(image):
    image = cv2.resize(image, (200, 200))
    image = image / 255.0
    image = image.reshape(1, 200, 200, 3)
    return image

# Function to predict the sign language letter
def predict_letter(image):
    processed_image = preprocess_image(image)
    predictions = model.predict(processed_image)
    predicted_class = np.argmax(predictions, axis=1)[0]
    sign_letter = class_labels[predicted_class]
    return sign_letter

# Function to detect hands in the image
def detect_hands(image):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()
    margin = 15

    # Convert the image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Process the image and get the hand landmarks
    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
        for landmarks in results.multi_hand_landmarks:
            # Get bounding box coordinates of the hand
            landmarks_xy = [(int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0]))
                            for landmark in landmarks.landmark]

            # Define the bounding box for the hand
            x_min = max(0, min(landmarks_xy, key=lambda x: x[0])[0] - margin)
            y_min = max(0, min(landmarks_xy, key=lambda x: x[1])[1] - margin)
            x_max = min(image.shape[1], max(landmarks_xy, key=lambda x: x[0])[0] + margin)
            y_max = min(image.shape[0], max(landmarks_xy, key=lambda x: x[1])[1] + margin)

            # Extract the hand region
            roi = image[y_min:y_max, x_min:x_max]

            # Check if the ROI is empty
            if roi.size == 0:
                continue

            # Resize the ROI to match your model's input shape
            roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_AREA)
            hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)

            lower_yellow = np.array([93, 72, 51])
            upper_yellow = np.array([224, 194, 183])
            mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
            roi = cv2.bitwise_and(roi, roi, mask=mask)
            roi = roi.reshape(1, 200, 200, 3)  # Ensure it matches your model's input shape

            # Make predictions using your classifier
            predictions = model.predict(roi)
            predicted_class = int(np.argmax(predictions, axis=1)[0])
            result = class_labels[predicted_class]

            # Draw result on the image
            cv2.putText(image, str(result), (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

            # Draw bounding box on the image
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

    return image

# Streamlit app
st.title('Sign Language Recognition')

# Sidebar with radio button for Upload/Webcam
selected_option = st.sidebar.radio("Select Option", ["Upload", "Webcam"], index=0)

if selected_option == "Upload":
    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"])

    if uploaded_file is not None:
        if st.button('Predict'):
            contents = uploaded_file.read()
            nparr = np.frombuffer(contents, np.uint8)
            image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

            # Make the prediction
            predicted_letter = predict_letter(image)

            # Display the predicted letter
            st.write('Predicted Letter:', predicted_letter)

elif selected_option == "Webcam":
    # Placeholder for webcam frame
    webcam_frame = st.empty()

    # Placeholder for predicted letter in webcam mode
    predicted_letter_webcam = st.empty()

    # Placeholder for webcam capture status
    webcam_capture_status = st.empty()

    # Placeholder for webcam stop button
    webcam_stop_button = st.empty()

    # Placeholder for webcam status
    webcam_status = st.empty()

    # Placeholder for webcam button
    webcam_button = st.button("Start Webcam")

    if webcam_button:
        webcam_status.text("Webcam is on.")
        webcam_stop_button = st.button("Stop Webcam")

        # OpenCV video capture
        cap = cv2.VideoCapture(0)

        while True:
            # Read the frame from the webcam
            ret, frame = cap.read()

            # Display the frame in Streamlit
            webcam_frame.image(frame, channels="BGR")

            # Detect hands in the current frame
            frame = detect_hands(frame)

            # Convert the frame to JPEG format
            _, jpeg = cv2.imencode(".jpg", frame)

            # Display the predicted letter
            predicted_letter = predict_letter(frame)
            predicted_letter_webcam.text(f"Predicted Letter: {predicted_letter}")

            # Check if the "Stop Webcam" button is clicked
            if webcam_stop_button:
                webcam_status.text("Webcam is off.")
                break

        # Release the webcam when done
        cap.release()