File size: 2,978 Bytes
1e0a8f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
FacePosition class

Author: HenryAreiza
Date: 08/09/2023
"""

import os
import cv2
import pickle
import numpy as np
import mediapipe as mp

class FacePosition:
    """
    A class for controlling the cursor based on head movements.

    This class provides functionality for detecting a face using
    the MediaPipe library and controlling the cursor's movement accordingly.

    Attributes:
        movement (list): List of classes corresponding to the predicted movement.
        images (list): List of images associated to each class
        cursor_model: The machine learning model for gesture prediction.
        face_detection: The MediaPipe Face Detection component.
    """

    def __init__(self):
        """
        Initializes the FaceCursorController class.
        """
        self.movement = ['Center', 'Up', 'Right/Up', 'Right', 'Right/Down', 'Down', 'Left/Down', 'Left', 'Left/Up']
        self.images = [cv2.imread(os.path.join('media', str(i)+'.png')) for i in range(9)]

        # Load the cursor movement model
        with open('cursor_movement_model.pkl', 'rb') as f:
            self.cursor_model = pickle.load(f)

        # Initialize the MediaPipe Face Detection component
        self.face_detection = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.5)     

    def predict(self, frame):
        """
        Move the cursor based on head position.

        Args:
            reference (list): A list containing reference coordinates and size of the bounding box.
            keypoints (list): A list of keypoints representing face landmarks.

        Returns:
            result (list): The predicted class image and label.
        """
        # Perform face detection
        results = self.face_detection.process(frame)

        # Read the reference and landmarks from the detected face
        if results.detections:
            for detection in results.detections:
                reference = [[detection.location_data.relative_bounding_box.xmin,
                              detection.location_data.relative_bounding_box.ymin],
                             [detection.location_data.relative_bounding_box.width,
                              detection.location_data.relative_bounding_box.height]]
                keypoints = []
                for key_point in detection.location_data.relative_keypoints:
                    keypoints.append([key_point.x, key_point.y])
                break

            # Transform the lists into numpy arrays
            reference = np.array(reference)
            keypoints = np.array(keypoints)

            # Remove off-set from keypoints
            keypoints = (keypoints - reference[0]) / reference[1]

            # Recognize the head position
            prediction = self.cursor_model.predict(keypoints.reshape((1, -1)))[0]

            return [self.images[prediction], self.movement[prediction]]
        
        else:
            return [self.images[0], self.movement[0]]