import random
import math
import torch
import torch.nn.functional as F
import torchaudio

from pathlib import Path

import librosa as li
from src.simulation.effect import Effect

torchaudio.set_audio_backend("sox_io")

################################################################################
# Simulate environmental noise
################################################################################


class Noise(Effect):
    """
    Simple additive noise effect
    """
    def __init__(self,
                 compute_grad: bool = True,
                 type: str = 'gaussian',
                 snr: any = None,
                 noise_dir: str = None,
                 ext: str = "wav"):
        """
        Apply additive noise to audio signal. SNR calculations adapted from
        VoxCeleb-Trainer (https://github.com/clovaai/voxceleb_trainer/)

        :param compute_grad: if False, perform straight-through gradient
                             estimation
        :param type: type of noise to add; must be one of `gaussian`,
                     `uniform`, or `environmental`
        :param snr: decibel Signal-to-Noise ratio (dB SNR) of added noise
        :param noise_dir: directory from which to draw noise samples, if `type`
                          is `environmental`
        :param ext: extension for audio files in `noise_dir`
        """
        super().__init__(compute_grad)

        self.type = type
        self.noise_list = None
        self.ext = ext

        if type == 'environmental':
            if not noise_dir:
                raise ValueError(
                    'Environmental noise requires sample directory'
                )
            else:
                self.noise_list = list(Path(noise_dir).rglob(f'*.{self.ext}'))

        # parse valid range of SNR parameter
        self.min_snr, self.max_snr = self.parse_range(
            snr,
            float,
            f'Invalid noise SNR {snr}'
        )

        # store noise as buffer to allow device movement
        self.register_buffer("noise", torch.zeros(1, dtype=torch.float32))
        self.register_buffer("noise_db", torch.zeros(1, dtype=torch.float32))

        # initialize parameters
        self.snr = None
        self.sample_params()

    def forward(self, x: torch.Tensor):

        # require batch, channel dimensions
        assert x.ndim >= 2
        orig_shape = x.shape

        if x.ndim == 2:
            x = x.unsqueeze(1)

        # scale noise level to stored SNR
        signal_db = 10 * torch.log10(
            torch.mean(torch.square(x), dim=-1, keepdims=True) + 1e-8
        )
        scale = torch.sqrt(
            torch.pow(10, (signal_db - self.noise_db - self.snr) / 10)
        )

        # scale noise and trim to input length
        noise = scale * self.noise.clone().to(x)[..., :x.shape[-1]]

        # repeat noise to match input length if necessary
        pad_len = max(x.shape[-1] - noise.shape[-1], 0)
        noise = F.pad(noise, (0, pad_len), mode='circular')

        # reshape to original dimensions
        return (noise + x).reshape(orig_shape)

    @staticmethod
    def _crossfade(sig, fade_len):
        sig = sig.clone()
        fade_len = int(fade_len * sig.shape[-1])
        fade_in = torch.linspace(0, 1, fade_len).to(sig)
        fade_out = torch.linspace(1, 0, fade_len).to(sig)
        sig[..., :fade_len] *= fade_in
        sig[..., -fade_len:] *= fade_out
        return sig

    def sample_params(self):
        """
        Sample SNR uniformly from stored range
        """
        self.snr = random.uniform(self.min_snr, self.max_snr)

        if self.type == "gaussian":
            self.noise = torch.randn(self.signal_length).to(self.noise)
        elif self.type == "uniform":
            self.noise = torch.sign(
                torch.randn(self.signal_length)
            ).to(self.noise)
        elif self.type == "environmental":

            # load from randomly-selected file
            noise_np, _ = li.load(
                random.choice(self.noise_list),
                sr=self.sample_rate, mono=True
            )
            noise = torch.as_tensor(noise_np)

            # trim or loop (with cross-fade) to match expected signal length
            if noise.shape[-1] >= self.signal_length:
                self.noise = noise[..., :self.signal_length].reshape(
                    1, 1, -1
                ).to(self.noise)
            else:

                overlap = 0.05
                step = math.ceil(noise.shape[-1] * (1 - overlap))
                n_repeat = math.ceil(self.signal_length / step)

                padded = torch.zeros(
                    1, step * (n_repeat - 1) + noise.shape[-1] + 1
                ).reshape(1, -1).type(torch.float32)
                shape = padded.shape[:-1] + (n_repeat, noise.shape[-1])

                strides = (padded.stride()[0],) + (step, padded.stride()[-1],)
                frames = torch.as_strided(
                    padded, size=shape, stride=strides
                )[::step]

                for j in range(n_repeat):
                    frames[:, j, :] += self.crossfade(noise, overlap)

                self.noise = padded[..., :self.signal_length].reshape(
                    1, 1, -1
                ).to(self.noise)

        else:
            raise ValueError(f'Invalid noise type {self.type}')

        self.noise_db = 10 * torch.log10(
            torch.mean(torch.square(self.noise), dim=-1, keepdims=True) + 1e-8
        ).to(self.noise_db)