Spaces:

HenRick69
/

Hands-free_Cursor

Sleeping

App Files Files Community

HenRick69 commited on Sep 11, 2023

Commit

1e0a8f3

•

1 Parent(s): c73265e

Upload main files

Browse files

Files changed (4) hide show

AudioClassifier.py +54 -0
FacePosition.py +82 -0
app.py +39 -0
cursor_movement_model.pkl +3 -0

AudioClassifier.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""
+AudioClassifier class
+Author: HenryAreiza
+Date: 08/09/2023
+"""
+from scipy.io import wavfile
+from scipy.signal import decimate
+from transformers import pipeline
+class AudioClassifier:
+    """
+    A class for classifying audio commands using a pre-trained model.
+    This class provides functionality for classifying audio commands based on
+    a pre-trained audio classification model.
+    Attributes:
+        vocab (list): Vocabulary of valid commands
+        pipe: The Hugging Face Transformers pipeline for audio classification.
+    """
+    def __init__(self):
+        """
+        Initializes the AudioClassifier class.
+        """
+        self.vocab = ["left", "right", "up", "down", "go", "follow",
+                      "on", "off", "one", "two", "three", "stop"]
+        # Load the audio classification pipeline
+        self.pipe = pipeline("audio-classification", model="0xb1/wav2vec2-base-finetuned-speech_commands-v0.02")
+    def predict(self, audio_path):
+        """
+        Classify audio data into a command label.
+        Args:
+            audio_data (numpy.ndarray): Input audio data.
+        Returns:
+            result (str): The classified command label.
+        """
+        _, audio = wavfile.read(audio_path)
+        audio = decimate(audio, 3)
+        result = self.pipe(audio)[0]["label"]
+        if result not in self.vocab:
+            result = 'unknown'
+        return result

FacePosition.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+FacePosition class
+Author: HenryAreiza
+Date: 08/09/2023
+"""
+import os
+import cv2
+import pickle
+import numpy as np
+import mediapipe as mp
+class FacePosition:
+    """
+    A class for controlling the cursor based on head movements.
+    This class provides functionality for detecting a face using
+    the MediaPipe library and controlling the cursor's movement accordingly.
+    Attributes:
+        movement (list): List of classes corresponding to the predicted movement.
+        images (list): List of images associated to each class
+        cursor_model: The machine learning model for gesture prediction.
+        face_detection: The MediaPipe Face Detection component.
+    """
+    def __init__(self):
+        """
+        Initializes the FaceCursorController class.
+        """
+        self.movement = ['Center', 'Up', 'Right/Up', 'Right', 'Right/Down', 'Down', 'Left/Down', 'Left', 'Left/Up']
+        self.images = [cv2.imread(os.path.join('media', str(i)+'.png')) for i in range(9)]
+        # Load the cursor movement model
+        with open('cursor_movement_model.pkl', 'rb') as f:
+            self.cursor_model = pickle.load(f)
+        # Initialize the MediaPipe Face Detection component
+        self.face_detection = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.5)
+    def predict(self, frame):
+        """
+        Move the cursor based on head position.
+        Args:
+            reference (list): A list containing reference coordinates and size of the bounding box.
+            keypoints (list): A list of keypoints representing face landmarks.
+        Returns:
+            result (list): The predicted class image and label.
+        """
+        # Perform face detection
+        results = self.face_detection.process(frame)
+        # Read the reference and landmarks from the detected face
+        if results.detections:
+            for detection in results.detections:
+                reference = [[detection.location_data.relative_bounding_box.xmin,
+                              detection.location_data.relative_bounding_box.ymin],
+                             [detection.location_data.relative_bounding_box.width,
+                              detection.location_data.relative_bounding_box.height]]
+                keypoints = []
+                for key_point in detection.location_data.relative_keypoints:
+                    keypoints.append([key_point.x, key_point.y])
+                break
+            # Transform the lists into numpy arrays
+            reference = np.array(reference)
+            keypoints = np.array(keypoints)
+            # Remove off-set from keypoints
+            keypoints = (keypoints - reference[0]) / reference[1]
+            # Recognize the head position
+            prediction = self.cursor_model.predict(keypoints.reshape((1, -1)))[0]
+            return [self.images[prediction], self.movement[prediction]]
+        else:
+            return [self.images[0], self.movement[0]]

app.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import gradio as gr
+from FacePosition import FacePosition
+from AudioClassifier import AudioClassifier
+# Create an instance of the FacePosition class
+movement_controller = FacePosition()
+cursor_movement = gr.Interface(
+    fn = movement_controller.predict,
+    inputs = gr.Image(source='webcam', streaming=True),
+    outputs = ['image', 'text'],
+    live = True,
+    title = 'Cursor movement controller',
+    description = "This space provides functionality for detecting a face using the MediaPipe library and controlling the cursor's movement accordingly."
+)
+# Create an instance of the AudioClassifier class
+audio_classifier = AudioClassifier()
+audio_commands = gr.Interface(
+    fn = audio_classifier.predict,
+    inputs = gr.Audio(source="microphone", type="filepath", streaming=True),
+    outputs = "text",
+    live = True,
+    title = 'Speech commands recognition (mouse actions)',
+    description = 'This class provides functionality for classifying audio commands associated to mouse actions, based on a pre-trained audio classification model.'
+    )
+demo = gr.TabbedInterface([cursor_movement, audio_commands],
+                          title = 'Hands-free Cursor Application',
+                          tab_names = ['Cursor movement controller', 'Speech commands recognition'],
+                          theme = gr.themes.Soft())
+if __name__ == "__main__":
+    demo.launch()

cursor_movement_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3751a31fbe1163000ffc0ae0e230430475ad150412947a16aef3ebdfb6792d4d
+size 1696