import math
import warnings

import torch

from dist import cached_grid
from utils import log as log_utils

LOGGER = log_utils.getLogger(__name__)


def mask_selector(masks_softmaxed, top=2, size_norm=False):
    """Select <top> centre most masks and sumthem """
    b, k, *other_dims, h, w = masks_softmaxed.shape
    masks_softmaxed = masks_softmaxed.view(b, k, 1, h, w)
    g = cached_grid(h, w, device=masks_softmaxed.device, dtype=masks_softmaxed.dtype)
    x = g[0, 0] / (w - 1) - .5
    y = g[0, 1] / (h - 1) - .5

    v = (x ** 2 + y ** 2) * 2
    assert len(v.shape) == 2
    v = v.view(*[1] * (len(masks_softmaxed) - 2), h, w)
    scores = (masks_softmaxed * (1 - v)).sum([-1, -2]).view(b, k)
    scores = scores / (masks_softmaxed.flatten(-3).sum(-1) + 1e-6)

    LOGGER.debug_once(f"Selector -- masks in {masks_softmaxed.shape}; scores {scores.shape}")

    best_idxs = scores.topk(top, dim=-1).indices[..., None, None, None].expand(-1, -1, -1, h, w)
    wrst_idxs = (-scores).topk(k - top, dim=-1).indices[..., None, None, None].expand(-1, -1, -1, h, w)

    LOGGER.debug_once(f"Selector -- inds {best_idxs.shape} {wrst_idxs.shape}")

    masks_out = torch.empty(b, 2, 1, h, w, device=masks_softmaxed.device, dtype=masks_softmaxed.dtype)

    centre_most_masks = torch.gather(masks_softmaxed, 1, best_idxs).sum(1, keepdim=True)
    other_masks = torch.gather(masks_softmaxed, 1, wrst_idxs).sum(1, keepdim=True)

    LOGGER.debug_once(f"Selector -- best {centre_most_masks.shape} others {other_masks.shape}")

    masks_out[:, 1:] = centre_most_masks
    masks_out[:, :1] = other_masks

    return masks_out.view(b, 2, *other_dims, h, w)


def _no_grad_trunc_normal_(tensor, mean, std, a, b):
    # Cut & paste from PyTorch official master until it's in a few official releases - RW
    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
    def norm_cdf(x):
        # Computes standard normal cumulative distribution function
        return (1. + math.erf(x / math.sqrt(2.))) / 2.

    if (mean < a - 2 * std) or (mean > b + 2 * std):
        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
                      "The distribution of values may be incorrect.",
                      stacklevel=2)

    with torch.no_grad():
        # Values are generated by using a truncated uniform distribution and
        # then using the inverse CDF for the normal distribution.
        # Get upper and lower cdf values
        l = norm_cdf((a - mean) / std)
        u = norm_cdf((b - mean) / std)

        # Uniformly fill tensor with values from [l, u], then translate to
        # [2l-1, 2u-1].
        tensor.uniform_(2 * l - 1, 2 * u - 1)

        # Use inverse cdf transform for normal distribution to get truncated
        # standard normal
        tensor.erfinv_()

        # Transform to proper mean, std
        tensor.mul_(std * math.sqrt(2.))
        tensor.add_(mean)

        # Clamp to ensure it's in the proper range
        tensor.clamp_(min=a, max=b)
        return tensor


def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
    # type: (Tensor, float, float, float, float) -> Tensor
    return _no_grad_trunc_normal_(tensor, mean, std, a, b)