import openvino as ov
import cv2
import numpy as np
import PIL
import io
import tempfile
import streamlit as st
import moviepy.editor as mpy


class EmotionModel:
    def __init__(self):
        self.face_compiled_model, self.face_input_layer, self.face_output_layer = self.load_model('face-detection-adas-0001')
        self.emotion_compiled_model, self.emotion_input_layer, self.emotion_output_layer = self.load_model('emotions-recognition-retail-0003')

        self.emotions = {
            0:"neutral",
            1:"happy",
            2:"sad",
            3:"surprise",
            4:"anger"
        }
        
    def load_model(self, model_name):
        model_path = "models/" + model_name + ".xml"
        core = ov.Core()
        model = core.read_model(model=model_path)
        compiled_model = core.compile_model(model=model, device_name="CPU")
        input_layer = compiled_model.input(0)
        output_layer = compiled_model.output(0)
        return compiled_model, input_layer, output_layer

    def preprocess(self, img, input_layer):
 
        input_h, input_w = input_layer.shape[2], input_layer.shape[3]
        input_img = cv2.resize(img, (input_w,input_h))
        input_img = input_img.transpose(2, 0, 1)
        input_img = np.expand_dims(input_img, 0)
        
        return input_img

    def detect_faces(self, uploaded_img_cv, conf = 0.5):
        input_img = self.preprocess(uploaded_img_cv, self.face_input_layer)
        result_face = self.face_compiled_model([input_img])[self.face_output_layer]
        boxes = self.post_process_face(result_face, uploaded_img_cv, conf)

        return boxes

    def post_process_face(self, result_face, img, conf=0.5):
        boxes = []
        h,w,_ = img.shape
        predictions = result_face[0][0]       # 하위 집합 데이터 프레임
        confidence = predictions[:,2]    # conf 값 가져오기 [img_id, label, conf, x_min, y_min, x_max, y_max]
      
        top_predictions = predictions[(confidence>conf)]         # 임계값보다 큰 conf 값을 가진 예측만 선택
        for detection in top_predictions:
            box = (detection[3:7]* np.array([w, h, w, h])).astype("int") # 상자 위치 결정
            box = [0 if i < 0 else i for i in box]
            (xmin, ymin, xmax, ymax) = box   # xmin, ymin, xmax, ymax에 상자 위치 값 지정
            boxes.append(box)
            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)       # 사각형 만들기
        
        return boxes

    def detect_emotions(self, uploaded_img_cv, boxes):
        for box in boxes:
            xmin, ymin, xmax, ymax = box
            emotion_input = uploaded_img_cv[ymin:ymax,xmin:xmax]
            input_img = self.preprocess(emotion_input, self.emotion_input_layer)       
            result_emotion = self.emotion_compiled_model([input_img])[self.emotion_output_layer]
            self.post_process_emotion(result_emotion, uploaded_img_cv, box)

    def post_process_emotion(self, result_emotion, img, face_position):
        
        predictions = result_emotion[0,:,0,0]   
        topresult_index = np.argmax(predictions)
        emotion = self.emotions[topresult_index]

        font_size = img.shape[0]/1000
        font_thickness = int(img.shape[0]/500)
        text_offset = int(img.shape[0]/30)
       
        cv2.putText(img, emotion,
                    (face_position[0],face_position[1]+text_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, font_size, 
                    (255, 255,255), font_thickness)
        
        return emotion

    def process(self, img):

        if isinstance(img, np.ndarray):
            uploaded_img_cv = img
        else:
            uploaded_img = PIL.Image.open(img)
            uploaded_img_cv =  np.array(uploaded_img) 

        boxes = self.detect_faces(uploaded_img_cv)
        self.detect_emotions(uploaded_img_cv, boxes)
        
        return uploaded_img_cv

    def play_video(self, input_video):
        uploaded_video = io.BytesIO(input_video.read())
        temporary_location = "upload.mp4" 
        with open(temporary_location, "wb") as out: 
            out.write(uploaded_video.read())
        out.close() 
        
        camera = cv2.VideoCapture(temporary_location)
        fps = camera.get(cv2.CAP_PROP_FPS)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
        processed_frames=[]
        total_frames = int(camera.get(cv2.CAP_PROP_FRAME_COUNT))
        progress_bar = st.progress(0)
        frame_count = 0
    
        st_frame = st.empty()
        while(camera.isOpened()):
            ret, frame = camera.read()
    
            if ret:
                emotion_img = self.process(frame)
                st_frame.image(emotion_img, channels = "BGR")
                processed_frames.append(cv2.cvtColor(emotion_img,cv2.COLOR_BGR2RGB))
                frame_count +=1
                progress_bar.progress(frame_count/total_frames, text = None)
    
            else:
                camera.release()
                st_frame.empty()
                progress_bar.empty()
                break
        clip = mpy.ImageSequenceClip(processed_frames,fps=fps)
        clip.write_videofile(temp_file.name)
    
        return temp_file.name