import cv2
from moviepy.editor import VideoFileClip
from retinaface import RetinaFace
from hsemotion.facial_emotions import HSEmotionRecognizer

recognizer = HSEmotionRecognizer(model_name='enet_b0_8_best_vgaf', device='cpu')

def detect_faces(frame):
    faces = RetinaFace.detect_faces(frame)
    if isinstance(faces, dict):
        face_list = []
        for key in faces.keys():
            face = faces[key]
            facial_area = face['facial_area']
            face_dict = {
                'box': (facial_area[0], facial_area[1], facial_area[2] - facial_area[0], facial_area[3] - facial_area[1])
            }
            face_list.append(face_dict)
        return face_list
    return []

def annotate_frame(frame, faces):
    for face in faces:
        x, y, w, h = face['box']
        face_image = frame[y:y+h, x:x+w]
        emotion = classify_emotions(face_image)
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        cv2.putText(frame, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

def classify_emotions(face_image):
    results = recognizer.predict_emotions(face_image)
    if results:
        emotion = results[0]
    else:
        emotion = 'Unknown'
    return emotion

def process_video_frames(video_path, temp_output_path, frame_skip=5):
    video_clip = VideoFileClip(video_path)
    fps = video_clip.fps
    out = cv2.VideoWriter(temp_output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (int(video_clip.size[0]), int(video_clip.size[1])))
    frame_count = 0
    for frame in video_clip.iter_frames():
        if frame_count % frame_skip == 0:
            faces = detect_faces(frame)
            annotate_frame(frame, faces)
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame)
        frame_count += 1
    out.release()
    cv2.destroyAllWindows()
    video_clip.close()

def add_audio_to_video(original_video_path, processed_video_path, output_path):
    try:
        original_clip = VideoFileClip(original_video_path)
        processed_clip = VideoFileClip(processed_video_path)
        final_clip = processed_clip.set_audio(original_clip.audio)
        final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
    except Exception as e:
        print(f"Error while combining with audio: {e}")
    finally:
        original_clip.close()
        processed_clip.close()

def process_video(video_path, output_path):
    temp_output_path = 'temp_output_video.mp4'
    process_video_frames(video_path, temp_output_path, frame_skip=5)
    add_audio_to_video(video_path, temp_output_path, output_path)

def process_image(input_path, output_path):
    image = cv2.imread(input_path)
    if image is None:
        print(f"Error: Unable to read image at '{input_path}'")
        return
    faces = detect_faces(image)
    annotate_frame(image, faces)
    cv2.imwrite(output_path, image)
    input_image = cv2.imread(input_path)
    combined_image = cv2.hconcat([input_image, image])
    cv2.imwrite(output_path, combined_image)