# -*- coding: utf-8 -*-

# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
# holder of all proprietary rights on this computer program.
# You can only use this computer program if you have closed
# a license agreement with MPG or you get the right to use the computer
# program from someone who is authorized to grant you that right.
# Any use of the computer program without a valid license is prohibited and
# liable to prosecution.
#
# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
# for Intelligent Systems. All rights reserved.
#
# Contact: ps-license@tuebingen.mpg.de


from .seg3d_utils import (
    create_grid3D,
    plot_mask3D,
    SmoothConv3D,
)

import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import mcubes
from kaolin.ops.conversions import voxelgrids_to_trianglemeshes
import logging

logging.getLogger("lightning").setLevel(logging.ERROR)


class Seg3dLossless(nn.Module):
    def __init__(self,
                 query_func,
                 b_min,
                 b_max,
                 resolutions,
                 channels=1,
                 balance_value=0.5,
                 align_corners=False,
                 visualize=False,
                 debug=False,
                 use_cuda_impl=False,
                 faster=False,
                 use_shadow=False,
                 **kwargs):
        """
        align_corners: same with how you process gt. (grid_sample / interpolate) 
        """
        super().__init__()
        self.query_func = query_func
        self.register_buffer(
            'b_min',
            torch.tensor(b_min).float().unsqueeze(1))  # [bz, 1, 3]
        self.register_buffer(
            'b_max',
            torch.tensor(b_max).float().unsqueeze(1))  # [bz, 1, 3]

        # ti.init(arch=ti.cuda)
        # self.mciso_taichi = MCISO(dim=3, N=resolutions[-1]-1)

        if type(resolutions[0]) is int:
            resolutions = torch.tensor([(res, res, res)
                                        for res in resolutions])
        else:
            resolutions = torch.tensor(resolutions)
        self.register_buffer('resolutions', resolutions)
        self.batchsize = self.b_min.size(0)
        assert self.batchsize == 1
        self.balance_value = balance_value
        self.channels = channels
        assert self.channels == 1
        self.align_corners = align_corners
        self.visualize = visualize
        self.debug = debug
        self.use_cuda_impl = use_cuda_impl
        self.faster = faster
        self.use_shadow = use_shadow

        for resolution in resolutions:
            assert resolution[0] % 2 == 1 and resolution[1] % 2 == 1, \
                f"resolution {resolution} need to be odd becuase of align_corner."

        # init first resolution
        init_coords = create_grid3D(0,
                                    resolutions[-1] - 1,
                                    steps=resolutions[0])  # [N, 3]
        init_coords = init_coords.unsqueeze(0).repeat(self.batchsize, 1,
                                                      1)  # [bz, N, 3]
        self.register_buffer('init_coords', init_coords)

        # some useful tensors
        calculated = torch.zeros(
            (self.resolutions[-1][2], self.resolutions[-1][1],
             self.resolutions[-1][0]),
            dtype=torch.bool)
        self.register_buffer('calculated', calculated)

        gird8_offsets = torch.stack(
            torch.meshgrid([
                torch.tensor([-1, 0, 1]),
                torch.tensor([-1, 0, 1]),
                torch.tensor([-1, 0, 1])
            ])).int().view(3, -1).t()  # [27, 3]
        self.register_buffer('gird8_offsets', gird8_offsets)

        # smooth convs
        self.smooth_conv3x3 = SmoothConv3D(in_channels=1,
                                           out_channels=1,
                                           kernel_size=3)
        self.smooth_conv5x5 = SmoothConv3D(in_channels=1,
                                           out_channels=1,
                                           kernel_size=5)
        self.smooth_conv7x7 = SmoothConv3D(in_channels=1,
                                           out_channels=1,
                                           kernel_size=7)
        self.smooth_conv9x9 = SmoothConv3D(in_channels=1,
                                           out_channels=1,
                                           kernel_size=9)

    def batch_eval(self, coords, **kwargs):
        """
        coords: in the coordinates of last resolution
        **kwargs: for query_func
        """
        coords = coords.detach()
        # normalize coords to fit in [b_min, b_max]
        if self.align_corners:
            coords2D = coords.float() / (self.resolutions[-1] - 1)
        else:
            step = 1.0 / self.resolutions[-1].float()
            coords2D = coords.float() / self.resolutions[-1] + step / 2
        coords2D = coords2D * (self.b_max - self.b_min) + self.b_min
        # query function
        occupancys = self.query_func(**kwargs, points=coords2D)
        if type(occupancys) is list:
            occupancys = torch.stack(occupancys)  # [bz, C, N]
        assert len(occupancys.size()) == 3, \
            "query_func should return a occupancy with shape of [bz, C, N]"
        return occupancys

    def forward(self, **kwargs):
        if self.faster:
            return self._forward_faster(**kwargs)
        else:
            return self._forward(**kwargs)

    def _forward_faster(self, **kwargs):
        """
        In faster mode, we make following changes to exchange accuracy for speed:
        1. no conflict checking: 4.88 fps -> 6.56 fps
        2. smooth_conv9x9 ~ smooth_conv3x3 for different resolution
        3. last step no examine
        """
        final_W = self.resolutions[-1][0]
        final_H = self.resolutions[-1][1]
        final_D = self.resolutions[-1][2]

        for resolution in self.resolutions:
            W, H, D = resolution
            stride = (self.resolutions[-1] - 1) / (resolution - 1)

            # first step
            if torch.equal(resolution, self.resolutions[0]):
                coords = self.init_coords.clone()  # torch.long
                occupancys = self.batch_eval(coords, **kwargs)
                occupancys = occupancys.view(self.batchsize, self.channels, D,
                                             H, W)
                if (occupancys > 0.5).sum() == 0:
                    # return F.interpolate(
                    #     occupancys, size=(final_D, final_H, final_W),
                    #     mode="linear", align_corners=True)
                    return None

                if self.visualize:
                    self.plot(occupancys, coords, final_D, final_H, final_W)

                with torch.no_grad():
                    coords_accum = coords / stride

            # last step
            elif torch.equal(resolution, self.resolutions[-1]):

                with torch.no_grad():
                    # here true is correct!
                    valid = F.interpolate(
                        (occupancys > self.balance_value).float(),
                        size=(D, H, W),
                        mode="trilinear",
                        align_corners=True)

                # here true is correct!
                occupancys = F.interpolate(occupancys.float(),
                                           size=(D, H, W),
                                           mode="trilinear",
                                           align_corners=True)

                # is_boundary = (valid > 0.0) & (valid < 1.0)
                is_boundary = valid == 0.5

            # next steps
            else:
                coords_accum *= 2

                with torch.no_grad():
                    # here true is correct!
                    valid = F.interpolate(
                        (occupancys > self.balance_value).float(),
                        size=(D, H, W),
                        mode="trilinear",
                        align_corners=True)

                # here true is correct!
                occupancys = F.interpolate(occupancys.float(),
                                           size=(D, H, W),
                                           mode="trilinear",
                                           align_corners=True)

                is_boundary = (valid > 0.0) & (valid < 1.0)

                with torch.no_grad():
                    if torch.equal(resolution, self.resolutions[1]):
                        is_boundary = (self.smooth_conv9x9(is_boundary.float())
                                       > 0)[0, 0]
                    elif torch.equal(resolution, self.resolutions[2]):
                        is_boundary = (self.smooth_conv7x7(is_boundary.float())
                                       > 0)[0, 0]
                    else:
                        is_boundary = (self.smooth_conv3x3(is_boundary.float())
                                       > 0)[0, 0]

                    coords_accum = coords_accum.long()
                    is_boundary[coords_accum[0, :, 2], coords_accum[0, :, 1],
                                coords_accum[0, :, 0]] = False
                    point_coords = is_boundary.permute(
                        2, 1, 0).nonzero(as_tuple=False).unsqueeze(0)
                    point_indices = (point_coords[:, :, 2] * H * W +
                                     point_coords[:, :, 1] * W +
                                     point_coords[:, :, 0])

                    R, C, D, H, W = occupancys.shape

                    # inferred value
                    coords = point_coords * stride

                if coords.size(1) == 0:
                    continue
                occupancys_topk = self.batch_eval(coords, **kwargs)

                # put mask point predictions to the right places on the upsampled grid.
                R, C, D, H, W = occupancys.shape
                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
                occupancys = (occupancys.reshape(R, C, D * H * W).scatter_(
                    2, point_indices, occupancys_topk).view(R, C, D, H, W))

                with torch.no_grad():
                    voxels = coords / stride
                    coords_accum = torch.cat([voxels, coords_accum],
                                             dim=1).unique(dim=1)

        return occupancys[0, 0]

    def _forward(self, **kwargs):
        """
        output occupancy field would be:
        (bz, C, res, res)
        """
        final_W = self.resolutions[-1][0]
        final_H = self.resolutions[-1][1]
        final_D = self.resolutions[-1][2]

        calculated = self.calculated.clone()

        for resolution in self.resolutions:
            W, H, D = resolution
            stride = (self.resolutions[-1] - 1) / (resolution - 1)

            if self.visualize:
                this_stage_coords = []

            # first step
            if torch.equal(resolution, self.resolutions[0]):
                coords = self.init_coords.clone()  # torch.long
                occupancys = self.batch_eval(coords, **kwargs)
                occupancys = occupancys.view(self.batchsize, self.channels, D,
                                             H, W)

                if self.visualize:
                    self.plot(occupancys, coords, final_D, final_H, final_W)

                with torch.no_grad():
                    coords_accum = coords / stride
                    calculated[coords[0, :, 2], coords[0, :, 1],
                               coords[0, :, 0]] = True

            # next steps
            else:
                coords_accum *= 2

                with torch.no_grad():
                    # here true is correct!
                    valid = F.interpolate(
                        (occupancys > self.balance_value).float(),
                        size=(D, H, W),
                        mode="trilinear",
                        align_corners=True)

                # here true is correct!
                occupancys = F.interpolate(occupancys.float(),
                                           size=(D, H, W),
                                           mode="trilinear",
                                           align_corners=True)

                is_boundary = (valid > 0.0) & (valid < 1.0)

                with torch.no_grad():
                    # TODO
                    if self.use_shadow and torch.equal(resolution,
                                                       self.resolutions[-1]):
                        # larger z means smaller depth here
                        depth_res = resolution[2].item()
                        depth_index = torch.linspace(0,
                                                     depth_res - 1,
                                                     steps=depth_res).type_as(
                                                         occupancys.device)
                        depth_index_max = torch.max(
                            (occupancys > self.balance_value) *
                            (depth_index + 1),
                            dim=-1,
                            keepdim=True)[0] - 1
                        shadow = depth_index < depth_index_max
                        is_boundary[shadow] = False
                        is_boundary = is_boundary[0, 0]
                    else:
                        is_boundary = (self.smooth_conv3x3(is_boundary.float())
                                       > 0)[0, 0]
                        # is_boundary = is_boundary[0, 0]

                    is_boundary[coords_accum[0, :, 2], coords_accum[0, :, 1],
                                coords_accum[0, :, 0]] = False
                    point_coords = is_boundary.permute(
                        2, 1, 0).nonzero(as_tuple=False).unsqueeze(0)
                    point_indices = (point_coords[:, :, 2] * H * W +
                                     point_coords[:, :, 1] * W +
                                     point_coords[:, :, 0])

                    R, C, D, H, W = occupancys.shape
                    # interpolated value
                    occupancys_interp = torch.gather(
                        occupancys.reshape(R, C, D * H * W), 2,
                        point_indices.unsqueeze(1))

                    # inferred value
                    coords = point_coords * stride

                if coords.size(1) == 0:
                    continue
                occupancys_topk = self.batch_eval(coords, **kwargs)
                if self.visualize:
                    this_stage_coords.append(coords)

                # put mask point predictions to the right places on the upsampled grid.
                R, C, D, H, W = occupancys.shape
                point_indices = point_indices.unsqueeze(1).expand(-1, C, -1)
                occupancys = (occupancys.reshape(R, C, D * H * W).scatter_(
                    2, point_indices, occupancys_topk).view(R, C, D, H, W))

                with torch.no_grad():
                    # conflicts
                    conflicts = ((occupancys_interp - self.balance_value) *
                                 (occupancys_topk - self.balance_value) < 0)[0,
                                                                             0]

                    if self.visualize:
                        self.plot(occupancys, coords, final_D, final_H,
                                  final_W)

                    voxels = coords / stride
                    coords_accum = torch.cat([voxels, coords_accum],
                                             dim=1).unique(dim=1)
                    calculated[coords[0, :, 2], coords[0, :, 1],
                               coords[0, :, 0]] = True

                while conflicts.sum() > 0:
                    if self.use_shadow and torch.equal(resolution,
                                                       self.resolutions[-1]):
                        break

                    with torch.no_grad():
                        conflicts_coords = coords[0, conflicts, :]

                        if self.debug:
                            self.plot(occupancys,
                                      conflicts_coords.unsqueeze(0),
                                      final_D,
                                      final_H,
                                      final_W,
                                      title='conflicts')

                        conflicts_boundary = (conflicts_coords.int() +
                                              self.gird8_offsets.unsqueeze(1) *
                                              stride.int()).reshape(
                                                  -1, 3).long().unique(dim=0)
                        conflicts_boundary[:, 0] = (
                            conflicts_boundary[:, 0].clamp(
                                0,
                                calculated.size(2) - 1))
                        conflicts_boundary[:, 1] = (
                            conflicts_boundary[:, 1].clamp(
                                0,
                                calculated.size(1) - 1))
                        conflicts_boundary[:, 2] = (
                            conflicts_boundary[:, 2].clamp(
                                0,
                                calculated.size(0) - 1))

                        coords = conflicts_boundary[calculated[
                            conflicts_boundary[:, 2], conflicts_boundary[:, 1],
                            conflicts_boundary[:, 0]] == False]

                        if self.debug:
                            self.plot(occupancys,
                                      coords.unsqueeze(0),
                                      final_D,
                                      final_H,
                                      final_W,
                                      title='coords')

                        coords = coords.unsqueeze(0)
                        point_coords = coords / stride
                        point_indices = (point_coords[:, :, 2] * H * W +
                                         point_coords[:, :, 1] * W +
                                         point_coords[:, :, 0])

                        R, C, D, H, W = occupancys.shape
                        # interpolated value
                        occupancys_interp = torch.gather(
                            occupancys.reshape(R, C, D * H * W), 2,
                            point_indices.unsqueeze(1))

                        # inferred value
                        coords = point_coords * stride

                    if coords.size(1) == 0:
                        break
                    occupancys_topk = self.batch_eval(coords, **kwargs)
                    if self.visualize:
                        this_stage_coords.append(coords)

                    with torch.no_grad():
                        # conflicts
                        conflicts = ((occupancys_interp - self.balance_value) *
                                     (occupancys_topk - self.balance_value) <
                                     0)[0, 0]

                    # put mask point predictions to the right places on the upsampled grid.
                    point_indices = point_indices.unsqueeze(1).expand(
                        -1, C, -1)
                    occupancys = (occupancys.reshape(R, C, D * H * W).scatter_(
                        2, point_indices, occupancys_topk).view(R, C, D, H, W))

                    with torch.no_grad():
                        voxels = coords / stride
                        coords_accum = torch.cat([voxels, coords_accum],
                                                 dim=1).unique(dim=1)
                        calculated[coords[0, :, 2], coords[0, :, 1],
                                   coords[0, :, 0]] = True

                if self.visualize:
                    this_stage_coords = torch.cat(this_stage_coords, dim=1)
                    self.plot(occupancys, this_stage_coords, final_D, final_H,
                              final_W)

        return occupancys[0, 0]

    def plot(self,
             occupancys,
             coords,
             final_D,
             final_H,
             final_W,
             title='',
             **kwargs):
        final = F.interpolate(occupancys.float(),
                              size=(final_D, final_H, final_W),
                              mode="trilinear",
                              align_corners=True)  # here true is correct!
        x = coords[0, :, 0].to("cpu")
        y = coords[0, :, 1].to("cpu")
        z = coords[0, :, 2].to("cpu")

        plot_mask3D(final[0, 0].to("cpu"), title, (x, y, z), **kwargs)

    def find_vertices(self, sdf, direction="front"):
        '''
            - direction: "front" | "back" | "left" | "right"
        '''
        resolution = sdf.size(2)
        if direction == "front":
            pass
        elif direction == "left":
            sdf = sdf.permute(2, 1, 0)
        elif direction == "back":
            inv_idx = torch.arange(sdf.size(2) - 1, -1, -1).long()
            sdf = sdf[inv_idx, :, :]
        elif direction == "right":
            inv_idx = torch.arange(sdf.size(2) - 1, -1, -1).long()
            sdf = sdf[:, :, inv_idx]
            sdf = sdf.permute(2, 1, 0)

        inv_idx = torch.arange(sdf.size(2) - 1, -1, -1).long()
        sdf = sdf[inv_idx, :, :]
        sdf_all = sdf.permute(2, 1, 0)

        # shadow
        grad_v = (sdf_all > 0.5) * torch.linspace(
            resolution, 1, steps=resolution).to(sdf.device)
        grad_c = torch.ones_like(sdf_all) * torch.linspace(
            0, resolution - 1, steps=resolution).to(sdf.device)
        max_v, max_c = grad_v.max(dim=2)
        shadow = grad_c > max_c.view(resolution, resolution, 1)
        keep = (sdf_all > 0.5) & (~shadow)

        p1 = keep.nonzero(as_tuple=False).t()  # [3, N]
        p2 = p1.clone()  # z
        p2[2, :] = (p2[2, :] - 2).clamp(0, resolution)
        p3 = p1.clone()  # y
        p3[1, :] = (p3[1, :] - 2).clamp(0, resolution)
        p4 = p1.clone()  # x
        p4[0, :] = (p4[0, :] - 2).clamp(0, resolution)

        v1 = sdf_all[p1[0, :], p1[1, :], p1[2, :]]
        v2 = sdf_all[p2[0, :], p2[1, :], p2[2, :]]
        v3 = sdf_all[p3[0, :], p3[1, :], p3[2, :]]
        v4 = sdf_all[p4[0, :], p4[1, :], p4[2, :]]

        X = p1[0, :].long()  # [N,]
        Y = p1[1, :].long()  # [N,]
        Z = p2[2, :].float() * (0.5 - v1) / (v2 - v1) + \
            p1[2, :].float() * (v2 - 0.5) / (v2 - v1)  # [N,]
        Z = Z.clamp(0, resolution)

        # normal
        norm_z = v2 - v1
        norm_y = v3 - v1
        norm_x = v4 - v1
        # print (v2.min(dim=0)[0], v2.max(dim=0)[0], v3.min(dim=0)[0], v3.max(dim=0)[0])

        norm = torch.stack([norm_x, norm_y, norm_z], dim=1)
        norm = norm / torch.norm(norm, p=2, dim=1, keepdim=True)

        return X, Y, Z, norm

    def render_normal(self, resolution, X, Y, Z, norm):
        image = torch.ones((1, 3, resolution, resolution),
                           dtype=torch.float32).to(norm.device)
        color = (norm + 1) / 2.0
        color = color.clamp(0, 1)
        image[0, :, Y, X] = color.t()
        return image

    def display(self, sdf):

        # render
        X, Y, Z, norm = self.find_vertices(sdf, direction="front")
        image1 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
        X, Y, Z, norm = self.find_vertices(sdf, direction="left")
        image2 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
        X, Y, Z, norm = self.find_vertices(sdf, direction="right")
        image3 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)
        X, Y, Z, norm = self.find_vertices(sdf, direction="back")
        image4 = self.render_normal(self.resolutions[-1, -1], X, Y, Z, norm)

        image = torch.cat([image1, image2, image3, image4], axis=3)
        image = image.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255.0

        return np.uint8(image)

    def export_mesh(self, occupancys):

        final = occupancys[1:, 1:, 1:].contiguous()

        if final.shape[0] > 256:
            # for voxelgrid larger than 256^3, the required GPU memory will be > 9GB
            # thus we use CPU marching_cube to avoid "CUDA out of memory"
            occu_arr = final.detach().cpu().numpy()                 # non-smooth surface
            # occu_arr = mcubes.smooth(final.detach().cpu().numpy())  # smooth surface
            vertices, triangles = mcubes.marching_cubes(
                occu_arr, self.balance_value)
            verts = torch.as_tensor(vertices[:, [2, 1, 0]])
            faces = torch.as_tensor(triangles.astype(
                np.long), dtype=torch.long)[:, [0, 2, 1]]
        else:
            torch.cuda.empty_cache()
            vertices, triangles = voxelgrids_to_trianglemeshes(
                final.unsqueeze(0))
            verts = vertices[0][:, [2, 1, 0]].cpu()
            faces = triangles[0][:, [0, 2, 1]].cpu()

        return verts, faces