Spaces:
Runtime error
Runtime error
| """ | |
| Training the network | |
| """ | |
| import datetime | |
| import logging | |
| import random | |
| import time | |
| from typing import Sequence, Tuple | |
| import torch | |
| import dataloader | |
| from model import Decoder, Encoder, EncoderDecoderModel | |
| # logging INFO, WARNING, ERROR, CRITICAL, DEBUG | |
| logging.basicConfig(level=logging.INFO) | |
| logging.disable(level=10) | |
| def train_network( | |
| model: torch.nn.Module, | |
| train_set: Sequence[Tuple[torch.tensor, torch.Tensor]], | |
| dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]], | |
| epochs: int, | |
| clip: int = 1, | |
| ): | |
| """ | |
| Train the EncoderDecoderModel network for a given number of epoch | |
| ----------- | |
| Parameters | |
| model: torch.nn.Module | |
| EncoderDecoderModel defined in model.py | |
| train_set: Sequence[Tuple[torch.tensor, torch.tensor]] | |
| tuple of vectorized (text, summary) from the training set | |
| dev_set: Sequence[Tuple[torch.tensor, torch.tensor]] | |
| tuple of vectorized (text, summary) for the dev set | |
| epochs: int | |
| the number of epochs to train on | |
| clip: int | |
| no idea | |
| Return | |
| None | |
| """ | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = model.to(device) | |
| print("Device check. You are using:", model.device) | |
| # with torch.no_grad(): | |
| optim = torch.optim.Adam(model.parameters(), lr=0.01) | |
| print("Epoch\ttrain loss\tdev accuracy\tcompute time") | |
| for epoch_n in range(epochs): | |
| # Tell the model it's in train mode for layers designed to | |
| # behave differently in train or evaluation | |
| # https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch | |
| model.train() | |
| # To get the computing time per epoch | |
| epoch_start_time = time.time() | |
| # To get the model accuracy per epoch | |
| epoch_loss = 0.0 | |
| epoch_length = 0 | |
| # Iterates over all the text, summary tuples | |
| for source, target in train_set: | |
| source = source.to(device) | |
| target = target.to(device) | |
| # DEBUG Block | |
| # logging.debug("TRAIN") | |
| # logging.debug(f"cuda available ? {torch.cuda.is_available()}") | |
| # logging.debug(f"Source sur cuda ? {source.is_cuda}") | |
| # logging.debug(f"Target sur cuda ? {target.is_cuda}") | |
| out = model(source).to(device) | |
| logging.debug(f"outputs = {out.shape}") | |
| target = torch.nn.functional.pad( | |
| target, (0, len(out) - len(target)), value=-100 | |
| ) | |
| # logging.debug(f"prediction : {vectoriser.decode(output_predictions)}") | |
| loss = torch.nn.functional.nll_loss(out, target).to(device) | |
| loss.backward() | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), clip) | |
| optim.step() | |
| epoch_loss += loss.item() | |
| epoch_length += source.shape[0] | |
| # To check the model accuracy on new data | |
| dev_correct = 0 | |
| dev_total = 0 | |
| # Iterates over text, summary tuple from dev | |
| for source, target in dev_set: | |
| # We here want to evaluate the model | |
| # so we're switching to evaluation mode | |
| model.eval() | |
| source = source.to(device) | |
| target = target.to(device) | |
| # We compute the result | |
| output = model(source).to(device) | |
| output_dim = output.shape[-1] | |
| output = output[1:].view(-1, output_dim) | |
| logging.debug(f"dev output : {output.shape}") | |
| target = target[1:].view(-1) | |
| # To compare the output with the target, | |
| # they have to be of same length so we're | |
| # padding the target with -100 idx that will | |
| # be ignored by the nll_loss function | |
| target = torch.nn.functional.pad( | |
| target, (0, len(output) - len(target)), value=-100 | |
| ) | |
| dev_loss = torch.nn.functional.nll_loss(output, target) | |
| dev_correct += dev_loss.item() | |
| dev_total += source.shape[0] | |
| # Compute of the epoch training time | |
| epoch_compute_time = time.time() - epoch_start_time | |
| print( | |
| f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}" | |
| ) | |
| def predict(model, tokens: Sequence[str]) -> Sequence[str]: | |
| """Predict the POS for a tokenized sequence""" | |
| words_idx = vectoriser.encode(tokens).to(device) | |
| # Pas de calcul de gradient ici : c'est juste pour les prédictions | |
| with torch.no_grad(): | |
| # equivalent to model(input) when called out of class | |
| out = model(words_idx).to(device) | |
| out_predictions = out.to(device) | |
| print(out_predictions) | |
| out_predictions = out_predictions.argmax(dim=-1) | |
| return vectoriser.decode(out_predictions) | |
| if __name__ == "__main__": | |
| train_dataset = dataloader.Data("data/train_extract.jsonl") | |
| words = train_dataset.get_words() | |
| vectoriser = dataloader.Vectoriser(words) | |
| train_dataset = dataloader.Data( | |
| "data/train_extract.jsonl", | |
| transform=vectoriser) | |
| dev_dataset = dataloader.Data( | |
| "data/dev_extract.jsonl", | |
| transform=vectoriser) | |
| train_dataloader = torch.utils.data.DataLoader( | |
| train_dataset, | |
| batch_size=2, | |
| shuffle=True, | |
| collate_fn=dataloader.pad_collate) | |
| dev_dataloader = torch.utils.data.DataLoader( | |
| dev_dataset, | |
| batch_size=4, | |
| shuffle=True, | |
| collate_fn=dataloader.pad_collate) | |
| for i_batch, batch in enumerate(train_dataloader): | |
| print(i_batch, batch[0], batch[1]) | |
| ### NEURAL NETWORK ### | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print("Device check. You are using:", device) | |
| ### RÉSEAU ENTRAÎNÉ ### | |
| # Pour s'assurer que les résultats seront les mêmes à chaque run du | |
| # notebook | |
| torch.use_deterministic_algorithms(True) | |
| torch.manual_seed(0) | |
| random.seed(0) | |
| # On peut également entraîner encoder séparemment | |
| encoder = Encoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device) | |
| decoder = Decoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device) | |
| trained_classifier = EncoderDecoderModel( | |
| encoder, decoder, vectoriser, device).to(device) | |
| print(next(trained_classifier.parameters()).device) | |
| # print(train_dataset.is_cuda) | |
| train_network( | |
| trained_classifier, | |
| train_dataset, | |
| dev_dataset, | |
| 2, | |
| ) | |
| torch.save(trained_classifier.state_dict(), "model/model.pt") | |
| vectoriser.save("model/vocab.pkl") | |
| print(f"test summary : {vectoriser.decode(dev_dataset[6][1])}") | |
| print( | |
| f"test prediction : {predict(trained_classifier, vectoriser.decode(dev_dataset[6][0]))}" | |
| ) | |