import sys, os

__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))

import torch
import cv2
import numpy as np

from face_detection.data import cfg_mnet, cfg_re50
from face_detection.layers.functions.prior_box import PriorBox
from face_detection.utils.nms.py_cpu_nms import py_cpu_nms
from face_detection.models.retinaface import RetinaFace
from face_detection.utils.box_utils import decode

from face_detection.helper import remove_prefix, check_keys, get_boundingbox

class FaceDetection:
    def __init__(self, batch_size=16, device=None, gpu_id=0, weights='./weights/Resnet50_Final.pth'):
        torch.backends.cudnn.benchmark = True

        if device:
            self.device = device
        elif torch.cuda.is_available() and gpu_id != -1:
            self.device = 'cuda:{}'.format(gpu_id)
        else:
            self.device = 'cpu'
        self.device = torch.device(self.device)

        self.batch_size = batch_size

        self.load_model(weights, self.device, load_to_cpu=True if str(self.device)=='cpu' else False)
        self.model.eval()

    def load_model(self, pretrained_path, device, load_to_cpu=False):
        print('Loading pretrained model from {}'.format(pretrained_path))
        if load_to_cpu:
            pretrained_dict = torch.load(
                pretrained_path, map_location=lambda storage, loc: storage)
        else:
            pretrained_dict = torch.load(
                pretrained_path, map_location=lambda storage, loc: storage.cuda(device))

        if "state_dict" in pretrained_dict.keys():
            pretrained_dict = remove_prefix(
                pretrained_dict['state_dict'], 'module.')
        else:
            pretrained_dict = remove_prefix(pretrained_dict, 'module.')
        
        self.model = RetinaFace(cfg=cfg_re50, phase='test')
        check_keys(self.model, pretrained_dict)
        self.model.load_state_dict(pretrained_dict, strict=False)
        self.model.to(device)

    def detect(self, images, model, device, resize=1, confidence_threshold=0.997):
        """ 

        "And now I am become death, destroyer of worlds"
        
        -The author who wrote this
        """
        result = []
        img_list = []

        for img in images:
            img = torch.tensor(img) - torch.tensor([104, 117, 123])
            img_list.append(img)
        im_height, im_width, _ = img_list[0].shape
        scale = torch.Tensor([im_width, im_height, im_width, im_height])
        img_x = torch.stack(img_list, dim=0).permute([0, 3, 1, 2])
        scale = scale.to(device)

        # forward times
        f_times = img_x.shape[0] // self.batch_size
        if img_x.shape[0] % self.batch_size != 0:
            f_times += 1
        locs_list = list()
        confs_list = list()
        with torch.no_grad():
            for _ in range(f_times):
                if _ != f_times - 1:
                    batch_img_x = img_x[_ * self.batch_size:(_ + 1) * self.batch_size]
                else:
                    batch_img_x = img_x[_ * self.batch_size:]  # last batch
                batch_img_x = batch_img_x.to(device).float()
                l, c, _ = model(batch_img_x)
                locs_list.append(l)
                confs_list.append(c)
        locs = torch.cat(locs_list, dim=0)
        confs = torch.cat(confs_list, dim=0)

        priorbox = PriorBox(cfg_re50, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data

        img_cpu = img_x.permute([0, 2, 3, 1]).cpu().numpy()
        i = 0
        for img, loc, conf in zip(img_cpu, locs, confs):
            boxes = decode(loc.data, prior_data, cfg_re50['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.data.cpu().numpy()[:, 1]

            # ignore low scores
            inds = np.where(scores > confidence_threshold)[0]
            boxes = boxes[inds]
            scores = scores[inds]

            # keep top-K before NMS
            order = scores.argsort()[::-1][:] # top_k
            boxes = boxes[order]
            scores = scores[order]

            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            keep = py_cpu_nms(dets, 0.4) # nms threshold
            dets = dets[keep, :]

            # keep top-K faster NMS
            dets = dets[:, :] # keep top k

            if len(dets) == 0:
                continue
            
            face_images = []
            face_bbox = []
            for det in dets:
                det = list(map(int, det))
                x, y, size_bb_x, size_bb_y = get_boundingbox(det, img.shape[1], img.shape[0])
                cropped_img = img[y:y + size_bb_y, x:x + size_bb_x, :] + (104, 117, 123)
                cropped_img = cropped_img.astype(np.uint8)
                face_images.append(cropped_img)
                face_bbox.append({
                    'top':y,
                    'left':x,
                    'width': size_bb_x,
                    'height':size_bb_y,
                })
                i += 1
            result.append({'face_images': face_images,
                           'face_bbox': face_bbox})
        return result

    def __call__(self, images, confidence_threshold=0.997, return_heatmap=False):
        return self.detect(images, confidence_threshold=confidence_threshold, model=self.model, device=self.device)

if __name__ == '__main__':
    img = cv2.imread('sample_files/wefie.jpg')
    face_detector = FaceDetection()
    result = face_detector([img])
    print(result)