"""
Training the network
"""
import datetime
import logging
import time
from typing import Sequence, Tuple

import torch

import dataloader

# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
logging.basicConfig(level=logging.INFO)
logging.disable(level=10)

data1 = dataloader.Data("data/train_extract.jsonl")
words = data1.get_words()
vectoriser = dataloader.Vectoriser(words)


def train_network(
    model: torch.nn.Module,
    train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
    dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
    epochs: int,
    clip: int = 1,
):
    """
    Train the EncoderDecoderModel network for a given number of epoch
    -----------
    Parameters
        model: torch.nn.Module
            EncoderDecoderModel defined in model.py
        train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
            tuple of vectorized (text, summary) from the training set
        dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
            tuple of vectorized (text, summary) for the dev set
        epochs: int
            the number of epochs to train on
        clip: int
            no idea
    Return
        None
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    print("Device check. You are using:", model.device)
    model.train()

    # with torch.no_grad():

    optim = torch.optim.Adam(model.parameters(), lr=0.01)

    print("Epoch\ttrain loss\tdev accuracy\tcompute time")

    for epoch_n in range(epochs):
        # Tell the model it's in train mode for layers designed to
        # behave differently in train or evaluation
        # https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
        model.train()

        # To get the computing time per epoch
        epoch_start_time = time.time()

        # To get the model accuracy per epoch
        epoch_loss = 0.0
        epoch_length = 0

        # Iterates over all the text, summary tuples
        for source, target in train_set:
            source = source.to(device)
            target = target.to(device)

            # DEBUG Block
            # logging.debug("TRAIN")
            # logging.debug(f"cuda available ? {torch.cuda.is_available()}")
            # logging.debug(f"Source sur cuda ? {source.is_cuda}")
            # logging.debug(f"Target sur cuda ? {target.is_cuda}")

            out = model(source).to(device)
            logging.debug(f"outputs = {out.shape}")
            target = torch.nn.functional.pad(
                target, (0, len(out) - len(target)), value=-100
            )
            # logging.debug(f"predition : {vectoriser.decode(output_predictions)}")
            loss = torch.nn.functional.nll_loss(out, target).to(device)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optim.step()

            epoch_loss += loss.item()
            epoch_length += source.shape[0]

        # To check the model accuracy on new data
        dev_correct = 0
        dev_total = 0

        # Iterates over text, summary tuple from dev
        for source, target in dev_set:
            # We here want to evaluate the model
            # so we're switching to evaluation mode
            model.eval()

            source = source.to(device)
            target = target.to(device)

            # We compute the result
            output = model(source).to(device)

            output_dim = output.shape[-1]

            output = output[1:].view(-1, output_dim)
            logging.debug(f"dev output : {output.shape}")
            target = target[1:].view(-1)
            # To compare the output with the target,
            # they have to be of same length so we're
            # padding the target with -100 idx that will
            # be ignored by the nll_loss function
            target = torch.nn.functional.pad(
                target, (0, len(output) - len(target)), value=-100
            )
            dev_loss = torch.nn.functional.nll_loss(output, target)
            dev_correct += dev_loss.item()
            dev_total += source.shape[0]

        # Compute of the epoch training time
        epoch_compute_time = time.time() - epoch_start_time

        print(
            f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
        )