Spaces:
Runtime error
Runtime error
""" | |
Training the network | |
""" | |
import datetime | |
import logging | |
import random | |
import time | |
from typing import Sequence, Tuple | |
import torch | |
import dataloader | |
from model import Decoder, Encoder, EncoderDecoderModel | |
# logging INFO, WARNING, ERROR, CRITICAL, DEBUG | |
logging.basicConfig(level=logging.INFO) | |
logging.disable(level=10) | |
def train_network( | |
model: torch.nn.Module, | |
train_set: Sequence[Tuple[torch.tensor, torch.Tensor]], | |
dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]], | |
epochs: int, | |
clip: int = 1, | |
): | |
""" | |
Train the EncoderDecoderModel network for a given number of epoch | |
----------- | |
Parameters | |
model: torch.nn.Module | |
EncoderDecoderModel defined in model.py | |
train_set: Sequence[Tuple[torch.tensor, torch.tensor]] | |
tuple of vectorized (text, summary) from the training set | |
dev_set: Sequence[Tuple[torch.tensor, torch.tensor]] | |
tuple of vectorized (text, summary) for the dev set | |
epochs: int | |
the number of epochs to train on | |
clip: int | |
no idea | |
Return | |
None | |
""" | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = model.to(device) | |
print("Device check. You are using:", model.device) | |
# with torch.no_grad(): | |
optim = torch.optim.Adam(model.parameters(), lr=0.01) | |
print("Epoch\ttrain loss\tdev accuracy\tcompute time") | |
for epoch_n in range(epochs): | |
# Tell the model it's in train mode for layers designed to | |
# behave differently in train or evaluation | |
# https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch | |
model.train() | |
# To get the computing time per epoch | |
epoch_start_time = time.time() | |
# To get the model accuracy per epoch | |
epoch_loss = 0.0 | |
epoch_length = 0 | |
# Iterates over all the text, summary tuples | |
for source, target in train_set: | |
source = source.to(device) | |
target = target.to(device) | |
# DEBUG Block | |
# logging.debug("TRAIN") | |
# logging.debug(f"cuda available ? {torch.cuda.is_available()}") | |
# logging.debug(f"Source sur cuda ? {source.is_cuda}") | |
# logging.debug(f"Target sur cuda ? {target.is_cuda}") | |
out = model(source).to(device) | |
logging.debug(f"outputs = {out.shape}") | |
target = torch.nn.functional.pad( | |
target, (0, len(out) - len(target)), value=-100 | |
) | |
# logging.debug(f"prediction : {vectoriser.decode(output_predictions)}") | |
loss = torch.nn.functional.nll_loss(out, target).to(device) | |
loss.backward() | |
torch.nn.utils.clip_grad_norm_(model.parameters(), clip) | |
optim.step() | |
epoch_loss += loss.item() | |
epoch_length += source.shape[0] | |
# To check the model accuracy on new data | |
dev_correct = 0 | |
dev_total = 0 | |
# Iterates over text, summary tuple from dev | |
for source, target in dev_set: | |
# We here want to evaluate the model | |
# so we're switching to evaluation mode | |
model.eval() | |
source = source.to(device) | |
target = target.to(device) | |
# We compute the result | |
output = model(source).to(device) | |
output_dim = output.shape[-1] | |
output = output[1:].view(-1, output_dim) | |
logging.debug(f"dev output : {output.shape}") | |
target = target[1:].view(-1) | |
# To compare the output with the target, | |
# they have to be of same length so we're | |
# padding the target with -100 idx that will | |
# be ignored by the nll_loss function | |
target = torch.nn.functional.pad( | |
target, (0, len(output) - len(target)), value=-100 | |
) | |
dev_loss = torch.nn.functional.nll_loss(output, target) | |
dev_correct += dev_loss.item() | |
dev_total += source.shape[0] | |
# Compute of the epoch training time | |
epoch_compute_time = time.time() - epoch_start_time | |
print( | |
f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}" | |
) | |
def predict(model, tokens: Sequence[str]) -> Sequence[str]: | |
"""Predict the POS for a tokenized sequence""" | |
words_idx = vectoriser.encode(tokens).to(device) | |
# Pas de calcul de gradient ici : c'est juste pour les prédictions | |
with torch.no_grad(): | |
# equivalent to model(input) when called out of class | |
out = model(words_idx).to(device) | |
out_predictions = out.to(device) | |
print(out_predictions) | |
out_predictions = out_predictions.argmax(dim=-1) | |
return vectoriser.decode(out_predictions) | |
if __name__ == "__main__": | |
train_dataset = dataloader.Data("data/train_extract.jsonl") | |
words = train_dataset.get_words() | |
vectoriser = dataloader.Vectoriser(words) | |
train_dataset = dataloader.Data("data/train_extract.jsonl", transform=vectoriser) | |
dev_dataset = dataloader.Data("data/dev_extract.jsonl", transform=vectoriser) | |
train_dataloader = torch.utils.data.DataLoader( | |
train_dataset, batch_size=2, shuffle=True, collate_fn=dataloader.pad_collate | |
) | |
dev_dataloader = torch.utils.data.DataLoader( | |
dev_dataset, batch_size=4, shuffle=True, collate_fn=dataloader.pad_collate | |
) | |
for i_batch, batch in enumerate(train_dataloader): | |
print(i_batch, batch[0], batch[1]) | |
### NEURAL NETWORK ### | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print("Device check. You are using:", device) | |
### RÉSEAU ENTRAÎNÉ ### | |
# Pour s'assurer que les résultats seront les mêmes à chaque run du notebook | |
torch.use_deterministic_algorithms(True) | |
torch.manual_seed(0) | |
random.seed(0) | |
# On peut également entraîner encoder séparemment | |
encoder = Encoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device) | |
decoder = Decoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device) | |
trained_classifier = EncoderDecoderModel(encoder, decoder, vectoriser, device).to( | |
device | |
) | |
print(next(trained_classifier.parameters()).device) | |
# print(train_dataset.is_cuda) | |
train_network( | |
trained_classifier, | |
train_dataset, | |
dev_dataset, | |
2, | |
) | |
torch.save(trained_classifier.state_dict(), "model/model.pt") | |
vectoriser.save("model/vocab.pkl") | |
print(f"test summary : {vectoriser.decode(dev_dataset[6][1])}") | |
print( | |
f"test prediction : {predict(trained_classifier, vectoriser.decode(dev_dataset[6][0]))}" | |
) | |