SummaryProject / src /train.py
EveSa's picture
Initial Commit
ad78747
raw
history blame
No virus
4.41 kB
"""
Training the network
"""
import datetime
import logging
import time
from typing import Sequence, Tuple
import torch
import dataloader
# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
logging.basicConfig(level=logging.INFO)
logging.disable(level=10)
data1 = dataloader.Data("data/train_extract.jsonl")
words = data1.get_words()
vectoriser = dataloader.Vectoriser(words)
def train_network(
model: torch.nn.Module,
train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
epochs: int,
clip: int = 1,
):
"""
Train the EncoderDecoderModel network for a given number of epoch
-----------
Parameters
model: torch.nn.Module
EncoderDecoderModel defined in model.py
train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
tuple of vectorized (text, summary) from the training set
dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
tuple of vectorized (text, summary) for the dev set
epochs: int
the number of epochs to train on
clip: int
no idea
Return
None
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print("Device check. You are using:", model.device)
model.train()
# with torch.no_grad():
optim = torch.optim.Adam(model.parameters(), lr=0.01)
print("Epoch\ttrain loss\tdev accuracy\tcompute time")
for epoch_n in range(epochs):
# Tell the model it's in train mode for layers designed to
# behave differently in train or evaluation
# https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
model.train()
# To get the computing time per epoch
epoch_start_time = time.time()
# To get the model accuracy per epoch
epoch_loss = 0.0
epoch_length = 0
# Iterates over all the text, summary tuples
for source, target in train_set:
source = source.to(device)
target = target.to(device)
# DEBUG Block
# logging.debug("TRAIN")
# logging.debug(f"cuda available ? {torch.cuda.is_available()}")
# logging.debug(f"Source sur cuda ? {source.is_cuda}")
# logging.debug(f"Target sur cuda ? {target.is_cuda}")
out = model(source).to(device)
logging.debug(f"outputs = {out.shape}")
target = torch.nn.functional.pad(
target, (0, len(out) - len(target)), value=-100
)
# logging.debug(f"predition : {vectoriser.decode(output_predictions)}")
loss = torch.nn.functional.nll_loss(out, target).to(device)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optim.step()
epoch_loss += loss.item()
epoch_length += source.shape[0]
# To check the model accuracy on new data
dev_correct = 0
dev_total = 0
# Iterates over text, summary tuple from dev
for source, target in dev_set:
# We here want to evaluate the model
# so we're switching to evaluation mode
model.eval()
source = source.to(device)
target = target.to(device)
# We compute the result
output = model(source).to(device)
output_dim = output.shape[-1]
output = output[1:].view(-1, output_dim)
logging.debug(f"dev output : {output.shape}")
target = target[1:].view(-1)
# To compare the output with the target,
# they have to be of same length so we're
# padding the target with -100 idx that will
# be ignored by the nll_loss function
target = torch.nn.functional.pad(
target, (0, len(output) - len(target)), value=-100
)
dev_loss = torch.nn.functional.nll_loss(output, target)
dev_correct += dev_loss.item()
dev_total += source.shape[0]
# Compute of the epoch training time
epoch_compute_time = time.time() - epoch_start_time
print(
f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
)