#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Power by Zongsheng Yue 2022-07-13 16:59:27


import os
import random
import numpy as np
from math import ceil
from pathlib import Path
from einops import rearrange
from omegaconf import OmegaConf
from skimage import img_as_ubyte
from ResizeRight.resize_right import resize

from utils import util_net
from utils import util_image
from utils import util_common

import torch
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP

from basicsr.utils import img2tensor
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.realesrgan_utils import RealESRGANer
from facelib.utils.face_restoration_helper import FaceRestoreHelper

class BaseSampler:
    def __init__(self, configs):
        '''
        Input:
            configs: config, see the yaml file in folder ./configs/sample/
        '''
        self.configs = configs
        self.display = configs.display
        self.diffusion_cfg = configs.diffusion

        self.setup_dist()  # setup distributed training: self.num_gpus, self.rank

        self.setup_seed()    # setup seed

        self.build_model()

    def setup_seed(self, seed=None):
        seed = self.configs.seed if seed is None else seed
        seed += (self.rank+1) * 10000
        if self.rank == 0 and self.display:
            print(f'Setting random seed {seed}')
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    def setup_dist(self):
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            print(f'Runing on GPU...')
        else:
            self.device = torch.device('cpu')
            print(f'Runing on CPU...')
        self.rank = 0

    def build_model(self):
        obj = util_common.get_obj_from_str(self.configs.diffusion.target)
        self.diffusion = obj(**self.configs.diffusion.params)

        obj = util_common.get_obj_from_str(self.configs.model.target)
        model = obj(**self.configs.model.params).to(self.device)
        if not self.configs.model.ckpt_path is None:
            self.load_model(model, self.configs.model.ckpt_path)
        self.model = model
        self.model.eval()

    def load_model(self, model, ckpt_path=None):
        if not ckpt_path is None:
            if self.rank == 0 and self.display:
                print(f'Loading from {ckpt_path}...')
            ckpt = torch.load(ckpt_path, map_location=f"cuda:{self.rank}")
            util_net.reload_model(model, ckpt)
            if self.rank == 0 and self.display:
                print('Loaded Done')

    def reset_diffusion(self, diffusion_cfg):
        self.diffusion = create_gaussian_diffusion(**diffusion_cfg)

class DifIRSampler(BaseSampler):
    def build_model(self):
        super().build_model()

        if not self.configs.model_ir is None:
            obj = util_common.get_obj_from_str(self.configs.model_ir.target)
            model_ir = obj(**self.configs.model_ir.params).cuda()
            if not self.configs.model_ir.ckpt_path is None:
                self.load_model(model_ir, self.configs.model_ir.ckpt_path)
            self.model_ir = model_ir
            self.model_ir.eval()

        if not self.configs.aligned:
            # face dection model
            self.face_helper = FaceRestoreHelper(
                    self.configs.detection.upscale,
                    face_size=self.configs.im_size,
                    crop_ratio=(1, 1),
                    det_model = self.configs.detection.det_model,
                    save_ext='png',
                    use_parse=True,
                    device=self.device,
                    )

            # background super-resolution
            if self.configs.background_enhance or self.configs.face_upsample:
                bg_model = RRDBNet(
                        num_in_ch=3,
                        num_out_ch=3,
                        num_feat=64,
                        num_block=23,
                        num_grow_ch=32,
                        scale=2,
                        )
                self.bg_model = RealESRGANer(
                    scale=2,
                    model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
                    model=bg_model,
                    tile=400,
                    tile_pad=10,
                    pre_pad=0,
                    half=True,
                    device=torch.device(f'cuda:{self.rank}'),
                    )  # need to set False in CPU mode

    def sample_func_ir_aligned(
            self,
            y0,
            start_timesteps=None,
            post_fun=None,
            model_kwargs_ir=None,
            need_restoration=True,
            ):
        '''
        Input:
            y0: n x c x h x w torch tensor, low-quality image, [0, 1], RGB
                or, h x w x c, numpy array, [0, 255], uint8, BGR
            start_timesteps: integer, range [0, num_timesteps-1],
                for accelerated sampling (e.g., 'ddim250'), range [0, 249]
            post_fun: post-processing for the enhanced image
            model_kwargs_ir: additional parameters for restoration model
        Output:
            sample: n x c x h x w, torch tensor, [0,1], RGB
        '''
        if not isinstance(y0, torch.Tensor):
            y0 = img2tensor(y0, bgr2rgb=True, float32=True).unsqueeze(0) / 255.  # 1 x c x h x w, [0,1]

        if start_timesteps is None:
            start_timesteps = self.diffusion.num_timesteps

        if post_fun is None:
            post_fun = lambda x: util_image.normalize_th(
                    im=x,
                    mean=0.5,
                    std=0.5,
                    reverse=False,
                    )

        # basical image restoration
        device = next(self.model.parameters()).device
        y0 = y0.to(device=device, dtype=torch.float32)

        h_old, w_old = y0.shape[2:4]
        if not (h_old == self.configs.im_size and w_old == self.configs.im_size):
            y0 = resize(y0, out_shape=(self.configs.im_size,) * 2).to(torch.float32)

        if need_restoration:
            with torch.no_grad():
                if model_kwargs_ir is None:
                    im_hq = self.model_ir(y0)
                else:
                    im_hq = self.model_ir(y0, **model_kwargs_ir)
        else:
            im_hq = y0
        im_hq.clamp_(0.0, 1.0)

        # diffuse for im_hq
        yt = self.diffusion.q_sample(
                x_start=post_fun(im_hq),
                t=torch.tensor([start_timesteps,]*im_hq.shape[0], device=device),
                )

        assert yt.shape[-1] == self.configs.im_size and yt.shape[-2] == self.configs.im_size
        if 'ddim' in self.configs.diffusion.params.timestep_respacing:
            sample = self.diffusion.ddim_sample_loop(
                    self.model,
                    shape=yt.shape,
                    noise=yt,
                    start_timesteps=start_timesteps,
                    clip_denoised=True,
                    denoised_fn=None,
                    model_kwargs=None,
                    device=None,
                    progress=False,
                    eta=0.0,
                    )
        else:
            sample = self.diffusion.p_sample_loop(
                    self.model,
                    shape=yt.shape,
                    noise=yt,
                    start_timesteps=start_timesteps,
                    clip_denoised=True,
                    denoised_fn=None,
                    model_kwargs=None,
                    device=None,
                    progress=False,
                    )

        sample = util_image.normalize_th(sample, reverse=True).clamp(0.0, 1.0)

        if not (h_old == self.configs.im_size and w_old == self.configs.im_size):
            sample = resize(sample, out_shape=(h_old, w_old)).clamp(0.0, 1.0)

        return sample, im_hq

    def sample_func_bfr_unaligned(
            self,
            y0,
            bs=16,
            start_timesteps=None,
            post_fun=None,
            model_kwargs_ir=None,
            need_restoration=True,
            only_center_face=False,
            draw_box=False,
            ):
        '''
        Input:
            y0: h x w x c numpy array, uint8, BGR
            bs: batch size for face restoration
            upscale: upsampling factor for the restorated image
            start_timesteps: integer, range [0, num_timesteps-1],
                for accelerated sampling (e.g., 'ddim250'), range [0, 249]
            post_fun: post-processing for the enhanced image
            model_kwargs_ir: additional parameters for restoration model
            only_center_face:
            draw_box: draw a box for each face
        Output:
            restored_img: h x w x c, numpy array, uint8, BGR
            restored_faces: list, h x w x c, numpy array, uint8, BGR
            cropped_faces: list, h x w x c, numpy array, uint8, BGR
        '''

        def  _process_batch(cropped_faces_list):
            length = len(cropped_faces_list)
            cropped_face_t = np.stack(
                    img2tensor(cropped_faces_list, bgr2rgb=True, float32=True),
                    axis=0) / 255.
            cropped_face_t = torch.from_numpy(cropped_face_t).to(torch.device(f"cuda:{self.rank}"))
            restored_faces = self.sample_func_ir_aligned(
                    cropped_face_t,
                    start_timesteps=start_timesteps,
                    post_fun=post_fun,
                    model_kwargs_ir=model_kwargs_ir,
                    need_restoration=need_restoration,
                    )[0]      # [0, 1], b x c x h x w
            return restored_faces

        assert not self.configs.aligned

        self.face_helper.clean_all()
        self.face_helper.read_image(y0)
        num_det_faces = self.face_helper.get_face_landmarks_5(
                only_center_face=only_center_face,
                resize=640,
                eye_dist_threshold=5,
                )
        # align and warp each face
        self.face_helper.align_warp_face()

        num_cropped_face = len(self.face_helper.cropped_faces)
        if num_cropped_face > bs:
            restored_faces = []
            for idx_start in range(0, num_cropped_face, bs):
                idx_end = idx_start + bs if idx_start + bs < num_cropped_face else num_cropped_face
                current_cropped_faces = self.face_helper.cropped_faces[idx_start:idx_end]
                current_restored_faces = _process_batch(current_cropped_faces)
                current_restored_faces = util_image.tensor2img(
                        list(current_restored_faces.split(1, dim=0)),
                        rgb2bgr=True,
                        min_max=(0, 1),
                        out_type=np.uint8,
                        )
                restored_faces.extend(current_restored_faces)
        else:
            restored_faces = _process_batch(self.face_helper.cropped_faces)
            restored_faces = util_image.tensor2img(
                    list(restored_faces.split(1, dim=0)),
                    rgb2bgr=True,
                    min_max=(0, 1),
                    out_type=np.uint8,
                    )
        for xx in restored_faces:
            self.face_helper.add_restored_face(xx)

        # paste_back
        if self.configs.background_enhance:
            bg_img = self.bg_model.enhance(y0, outscale=self.configs.detection.upscale)[0]
        else:
            bg_img = None
        self.face_helper.get_inverse_affine(None)
        # paste each restored face to the input image
        if self.configs.face_upsample:
            restored_img = self.face_helper.paste_faces_to_input_image(
                    upsample_img=bg_img,
                    draw_box=draw_box,
                    face_upsampler=self.bg_model,
                    )
        else:
            restored_img = self.face_helper.paste_faces_to_input_image(
                    upsample_img=bg_img,
                    draw_box=draw_box,
                    )

        cropped_faces = self.face_helper.cropped_faces

        return restored_img, restored_faces, cropped_faces

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument(
            "--save_dir",
            type=str,
            default="./save_dir",
            help="Folder to save the checkpoints and training log",
            )
    parser.add_argument(
            "--gpu_id",
            type=str,
            default='',
            help="GPU Index, e.g., 025",
            )
    parser.add_argument(
            "--cfg_path",
            type=str,
            default='./configs/sample/iddpm_ffhq256.yaml',
            help="Path of config files",
            )
    parser.add_argument(
            "--bs",
            type=int,
            default=32,
            help="Batch size",
            )
    parser.add_argument(
            "--num_images",
            type=int,
            default=3000,
            help="Number of sampled images",
            )
    parser.add_argument(
            "--timestep_respacing",
            type=str,
            default='1000',
            help="Sampling steps for accelerate",
            )
    args = parser.parse_args()

    configs = OmegaConf.load(args.cfg_path)
    configs.gpu_id = args.gpu_id
    configs.diffusion.params.timestep_respacing = args.timestep_respacing

    sampler_dist = DiffusionSampler(configs)

    sampler_dist.sample_func(
            bs=args.bs,
            num_images=args.num_images,
            save_dir=args.save_dir,
            )