import openvino as ov import cv2 import numpy as np import PIL import io import tempfile import streamlit as st import moviepy.editor as mpy class EmotionModel: def __init__(self): self.face_compiled_model, self.face_input_layer, self.face_output_layer = self.load_model('face-detection-adas-0001') self.emotion_compiled_model, self.emotion_input_layer, self.emotion_output_layer = self.load_model('emotions-recognition-retail-0003') self.emotions = { 0:"neutral", 1:"happy", 2:"sad", 3:"surprise", 4:"anger" } def load_model(self, model_name): model_path = "models/" + model_name + ".xml" core = ov.Core() model = core.read_model(model=model_path) compiled_model = core.compile_model(model=model, device_name="CPU") input_layer = compiled_model.input(0) output_layer = compiled_model.output(0) return compiled_model, input_layer, output_layer def preprocess(self, img, input_layer): input_h, input_w = input_layer.shape[2], input_layer.shape[3] input_img = cv2.resize(img, (input_w,input_h)) input_img = input_img.transpose(2, 0, 1) input_img = np.expand_dims(input_img, 0) return input_img def detect_faces(self, uploaded_img_cv, conf = 0.5): input_img = self.preprocess(uploaded_img_cv, self.face_input_layer) result_face = self.face_compiled_model([input_img])[self.face_output_layer] boxes = self.post_process_face(result_face, uploaded_img_cv, conf) return boxes def post_process_face(self, result_face, img, conf=0.5): boxes = [] h,w,_ = img.shape predictions = result_face[0][0] # 하위 집합 데이터 프레임 confidence = predictions[:,2] # conf 값 가져오기 [img_id, label, conf, x_min, y_min, x_max, y_max] top_predictions = predictions[(confidence>conf)] # 임계값보다 큰 conf 값을 가진 예측만 선택 for detection in top_predictions: box = (detection[3:7]* np.array([w, h, w, h])).astype("int") # 상자 위치 결정 box = [0 if i < 0 else i for i in box] (xmin, ymin, xmax, ymax) = box # xmin, ymin, xmax, ymax에 상자 위치 값 지정 boxes.append(box) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2) # 사각형 만들기 return boxes def detect_emotions(self, uploaded_img_cv, boxes): for box in boxes: xmin, ymin, xmax, ymax = box emotion_input = uploaded_img_cv[ymin:ymax,xmin:xmax] input_img = self.preprocess(emotion_input, self.emotion_input_layer) result_emotion = self.emotion_compiled_model([input_img])[self.emotion_output_layer] self.post_process_emotion(result_emotion, uploaded_img_cv, box) def post_process_emotion(self, result_emotion, img, face_position): predictions = result_emotion[0,:,0,0] topresult_index = np.argmax(predictions) emotion = self.emotions[topresult_index] font_size = img.shape[0]/1000 font_thickness = int(img.shape[0]/500) text_offset = int(img.shape[0]/30) cv2.putText(img, emotion, (face_position[0],face_position[1]+text_offset), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255,255), font_thickness) return emotion def process(self, img): if isinstance(img, np.ndarray): uploaded_img_cv = img else: uploaded_img = PIL.Image.open(img) uploaded_img_cv = np.array(uploaded_img) boxes = self.detect_faces(uploaded_img_cv) self.detect_emotions(uploaded_img_cv, boxes) return uploaded_img_cv def play_video(self, input_video): uploaded_video = io.BytesIO(input_video.read()) temporary_location = "upload.mp4" with open(temporary_location, "wb") as out: out.write(uploaded_video.read()) out.close() camera = cv2.VideoCapture(temporary_location) fps = camera.get(cv2.CAP_PROP_FPS) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') processed_frames=[] total_frames = int(camera.get(cv2.CAP_PROP_FRAME_COUNT)) progress_bar = st.progress(0) frame_count = 0 st_frame = st.empty() while(camera.isOpened()): ret, frame = camera.read() if ret: emotion_img = self.process(frame) st_frame.image(emotion_img, channels = "BGR") processed_frames.append(cv2.cvtColor(emotion_img,cv2.COLOR_BGR2RGB)) frame_count +=1 progress_bar.progress(frame_count/total_frames, text = None) else: camera.release() st_frame.empty() progress_bar.empty() break clip = mpy.ImageSequenceClip(processed_frames,fps=fps) clip.write_videofile(temp_file.name) return temp_file.name