File size: 5,158 Bytes
34f7d44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import numpy as np
import cv2
import landmark_detection
import gradio as gr
from mtcnn_facedetection import detect_faces


def apply_sunglasses(image, landmarks, sunglasses_img):
    # If image loading fails or no landmarks, return original image
    if sunglasses_img is None or not landmarks:
        return image

    # Create a copy of the image to overlay on
    result = image.copy()

    # Process each face
    for face_landmarks in landmarks:
        # We need at least the eye landmarks
        if len(face_landmarks) < 5:
            continue

        # Get eye landmarks
        left_eye_center = np.mean(face_landmarks[36:42], axis=0).astype(int)
        right_eye_center = np.mean(face_landmarks[42:48], axis=0).astype(int)

        # Calculate eye distance and angle
        eye_distance = np.linalg.norm(right_eye_center - left_eye_center)
        # Negate the angle to correct rotation direction
        angle = -np.degrees(
            np.arctan2(
                right_eye_center[1] - left_eye_center[1],
                right_eye_center[0] - left_eye_center[0],
            )
        )

        # Size for sunglasses based on eye distance
        width = int(eye_distance * 2.5)
        height = int(width * sunglasses_img.shape[0] / sunglasses_img.shape[1])

        # Resize sunglasses
        sunglasses_resized = cv2.resize(sunglasses_img, (width, height))

        # Rotate the sunglasses image
        center = (width // 2, height // 2)
        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)

        # Calculate new dimensions after rotation
        cos = np.abs(rotation_matrix[0, 0])
        sin = np.abs(rotation_matrix[0, 1])
        new_width = int((height * sin) + (width * cos))
        new_height = int((height * cos) + (width * sin))

        # Adjust rotation matrix
        rotation_matrix[0, 2] += (new_width / 2) - center[0]
        rotation_matrix[1, 2] += (new_height / 2) - center[1]

        # Perform the rotation
        rotated_glasses = cv2.warpAffine(
            sunglasses_resized,
            rotation_matrix,
            (new_width, new_height),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0, 0),
        )

        # Position the sunglasses
        eye_center = ((left_eye_center + right_eye_center) // 2).astype(int)
        x = eye_center[0] - new_width // 2
        y = eye_center[1] - new_height // 2

        # Create ROI for overlay
        y1, y2 = max(0, y), min(result.shape[0], y + new_height)
        x1, x2 = max(0, x), min(result.shape[1], x + new_width)

        # ROI in the glasses image
        g_y1, g_y2 = max(0, -y), max(0, -y) + (y2 - y1)
        g_x1, g_x2 = max(0, -x), max(0, -x) + (x2 - x1)

        # Check if we have valid regions
        if g_y2 <= rotated_glasses.shape[0] and g_x2 <= rotated_glasses.shape[1]:
            roi = result[y1:y2, x1:x2]
            glasses_roi = rotated_glasses[g_y1:g_y2, g_x1:g_x2]

            # Apply alpha blending
            if glasses_roi.shape[2] == 4 and roi.shape[:2] == glasses_roi.shape[:2]:
                alpha = glasses_roi[:, :, 3] / 255.0
                for c in range(3):
                    roi[:, :, c] = (
                        glasses_roi[:, :, c] * alpha + roi[:, :, c] * (1 - alpha)
                    ).astype(np.uint8)
                result[y1:y2, x1:x2] = roi

    return result


def do_facial_landmark_recognition(
    image: np.ndarray, face_boxes: list[landmark_detection.BoundingBox]
):
    faces = landmark_detection.get_faces(image, face_boxes)
    landmarks_batch = landmark_detection.get_landmarks(faces)
    return landmarks_batch


def do_facial_landmark_recognition_with_mtcnn(image: np.ndarray, sunglasses_img):
    face_boxes = detect_faces(image)
    landmarks_batch = do_facial_landmark_recognition(image, face_boxes)
    return apply_sunglasses(image, landmarks_batch, sunglasses_img)


def process_video(input_path, sunglasses_img):
    output_path = os.path.join(
        os.path.dirname(input_path), "output_" + os.path.basename(input_path)
    )
    # Open the input video
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        gr.Error(f"Error opening input video file: {input_path}")
        return

    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    frame_count = 0

    # Process each frame
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Process the frame
        processed_frame = do_facial_landmark_recognition_with_mtcnn(
            frame, sunglasses_img
        )

        # Write the frame
        out.write(processed_frame)

    # Release resources
    cap.release()
    out.release()
    gr.Info(f"Video processing complete. Output saved to: {output_path}")
    return output_path