""" Training the network """ import datetime import logging import time from typing import Sequence, Tuple import torch import dataloader # logging INFO, WARNING, ERROR, CRITICAL, DEBUG logging.basicConfig(level=logging.INFO) logging.disable(level=10) data1 = dataloader.Data("data/train_extract.jsonl") words = data1.get_words() vectoriser = dataloader.Vectoriser(words) def train_network( model: torch.nn.Module, train_set: Sequence[Tuple[torch.tensor, torch.Tensor]], dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]], epochs: int, clip: int = 1, ): """ Train the EncoderDecoderModel network for a given number of epoch ----------- Parameters model: torch.nn.Module EncoderDecoderModel defined in model.py train_set: Sequence[Tuple[torch.tensor, torch.tensor]] tuple of vectorized (text, summary) from the training set dev_set: Sequence[Tuple[torch.tensor, torch.tensor]] tuple of vectorized (text, summary) for the dev set epochs: int the number of epochs to train on clip: int no idea Return None """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) print("Device check. You are using:", model.device) model.train() # with torch.no_grad(): optim = torch.optim.Adam(model.parameters(), lr=0.01) print("Epoch\ttrain loss\tdev accuracy\tcompute time") for epoch_n in range(epochs): # Tell the model it's in train mode for layers designed to # behave differently in train or evaluation # https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch model.train() # To get the computing time per epoch epoch_start_time = time.time() # To get the model accuracy per epoch epoch_loss = 0.0 epoch_length = 0 # Iterates over all the text, summary tuples for source, target in train_set: source = source.to(device) target = target.to(device) # DEBUG Block # logging.debug("TRAIN") # logging.debug(f"cuda available ? {torch.cuda.is_available()}") # logging.debug(f"Source sur cuda ? {source.is_cuda}") # logging.debug(f"Target sur cuda ? {target.is_cuda}") out = model(source).to(device) logging.debug(f"outputs = {out.shape}") target = torch.nn.functional.pad( target, (0, len(out) - len(target)), value=-100 ) # logging.debug(f"predition : {vectoriser.decode(output_predictions)}") loss = torch.nn.functional.nll_loss(out, target).to(device) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optim.step() epoch_loss += loss.item() epoch_length += source.shape[0] # To check the model accuracy on new data dev_correct = 0 dev_total = 0 # Iterates over text, summary tuple from dev for source, target in dev_set: # We here want to evaluate the model # so we're switching to evaluation mode model.eval() source = source.to(device) target = target.to(device) # We compute the result output = model(source).to(device) output_dim = output.shape[-1] output = output[1:].view(-1, output_dim) logging.debug(f"dev output : {output.shape}") target = target[1:].view(-1) # To compare the output with the target, # they have to be of same length so we're # padding the target with -100 idx that will # be ignored by the nll_loss function target = torch.nn.functional.pad( target, (0, len(output) - len(target)), value=-100 ) dev_loss = torch.nn.functional.nll_loss(output, target) dev_correct += dev_loss.item() dev_total += source.shape[0] # Compute of the epoch training time epoch_compute_time = time.time() - epoch_start_time print( f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}" )