File size: 3,138 Bytes
761f4da
 
96705f2
 
2587bd0
96705f2
a52b576
0c2700d
 
d5de1e1
 
 
0c2700d
761f4da
4d29448
761f4da
0c2700d
 
 
 
 
 
 
 
 
 
 
96705f2
0c2700d
 
96705f2
 
 
0c2700d
96705f2
 
9e3243b
96705f2
 
 
761f4da
96705f2
 
 
 
 
 
 
761f4da
96705f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8aa6df
 
 
 
 
35d42b9
f97c524
96705f2
 
 
 
 
 
0c2700d
 
78eae32
0b0c34c
96705f2
 
0c2700d
 
 
761f4da
 
0c2700d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import os
import cv2
import face_recognition
from fastai.vision.all import load_learner
import time
import chardet
import base64

# import pathlib
# temp = pathlib.PosixPath
# pathlib.PosixPath = pathlib.WindowsPath


model = load_learner("gaze-recognizer-v3.pkl")

def video_processing(video_file, encoded_video):

    if encoded_video != "":
    
        decoded_file_data = base64.b64decode(encoded_video)

        with open("temp_video.mp4", "wb") as f:
            f.write(decoded_file_data)
        
        video_file = "temp_video.mp4"

    start_time = time.time()
    
    video_capture = cv2.VideoCapture(video_file)
    on_camera = 0
    off_camera = 0
    total = 0

    while True:
        # Read a single frame from the video
        for i in range(24*3):
            ret, frame = video_capture.read()
            if not ret:
                break

        # If there are no more frames, break out of the loop
        if not ret:
            break
        
        # Convert the frame to RGB color (face_recognition uses RGB)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        

        # Find all the faces in the frame using a pre-trained convolutional neural network.
        face_locations = face_recognition.face_locations(gray)
        #face_locations = face_recognition.face_locations(gray, number_of_times_to_upsample=0, model="cnn")

        if len(face_locations) > 0:
            # Show the original frame with face rectangles drawn around the faces
            for top, right, bottom, left in face_locations:
                # cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                face_image = gray[top:bottom, left:right]

                # Resize the face image to the desired size
                resized_face_image = cv2.resize(face_image, (128,128))

                # Predict the class of the resized face image using the model
                result = model.predict(resized_face_image)
                print(result[0])
                if(result[0] == 'on_camera'): on_camera = on_camera + 1
                elif(result[0] == 'off_camera'): off_camera = off_camera + 1
                total = total + 1

    try:
        # your processing code here
        gaze_percentage = on_camera / total * 100
    except Exception as e:
        print(f"An error occurred while processing the video: {e}")
        gaze_percentage = f'no face detected Total = {total},on_camera = {on_camera},off_camera = {off_camera}'
    print(f'Total = {total},on_camera = {on_camera},off_camera = {off_camera}')
    # print(f'focus perfectage = {on_camera/total*100}')
    # Release the video capture object and close all windows
    video_capture.release()
    cv2.destroyAllWindows()
    end_time = time.time()
    print(f'Time taken: {end_time-start_time}')
    if os.path.exists("temp_video.mp4"): 
        os.remove("temp_video.mp4")
    print(gaze_percentage)
    return str(gaze_percentage)


demo = gr.Interface(fn=video_processing,
                     inputs=["video", "text"],
                     outputs="text")

if __name__ == "__main__":
    demo.launch()