Spaces:
Runtime error
Runtime error
File size: 6,838 Bytes
ad78747 3805a61 ad78747 3805a61 ad78747 3805a61 ad78747 3805a61 ad78747 3805a61 4cbd001 3805a61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
"""
Training the network
"""
import datetime
import logging
import random
import time
from typing import Sequence, Tuple
import torch
import dataloader
from model import Decoder, Encoder, EncoderDecoderModel
# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
logging.basicConfig(level=logging.INFO)
logging.disable(level=10)
def train_network(
model: torch.nn.Module,
train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
epochs: int,
clip: int = 1,
):
"""
Train the EncoderDecoderModel network for a given number of epoch
-----------
Parameters
model: torch.nn.Module
EncoderDecoderModel defined in model.py
train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
tuple of vectorized (text, summary) from the training set
dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
tuple of vectorized (text, summary) for the dev set
epochs: int
the number of epochs to train on
clip: int
no idea
Return
None
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print("Device check. You are using:", model.device)
# with torch.no_grad():
optim = torch.optim.Adam(model.parameters(), lr=0.01)
print("Epoch\ttrain loss\tdev accuracy\tcompute time")
for epoch_n in range(epochs):
# Tell the model it's in train mode for layers designed to
# behave differently in train or evaluation
# https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
model.train()
# To get the computing time per epoch
epoch_start_time = time.time()
# To get the model accuracy per epoch
epoch_loss = 0.0
epoch_length = 0
# Iterates over all the text, summary tuples
for source, target in train_set:
source = source.to(device)
target = target.to(device)
# DEBUG Block
# logging.debug("TRAIN")
# logging.debug(f"cuda available ? {torch.cuda.is_available()}")
# logging.debug(f"Source sur cuda ? {source.is_cuda}")
# logging.debug(f"Target sur cuda ? {target.is_cuda}")
out = model(source).to(device)
logging.debug(f"outputs = {out.shape}")
target = torch.nn.functional.pad(
target, (0, len(out) - len(target)), value=-100
)
# logging.debug(f"prediction : {vectoriser.decode(output_predictions)}")
loss = torch.nn.functional.nll_loss(out, target).to(device)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optim.step()
epoch_loss += loss.item()
epoch_length += source.shape[0]
# To check the model accuracy on new data
dev_correct = 0
dev_total = 0
# Iterates over text, summary tuple from dev
for source, target in dev_set:
# We here want to evaluate the model
# so we're switching to evaluation mode
model.eval()
source = source.to(device)
target = target.to(device)
# We compute the result
output = model(source).to(device)
output_dim = output.shape[-1]
output = output[1:].view(-1, output_dim)
logging.debug(f"dev output : {output.shape}")
target = target[1:].view(-1)
# To compare the output with the target,
# they have to be of same length so we're
# padding the target with -100 idx that will
# be ignored by the nll_loss function
target = torch.nn.functional.pad(
target, (0, len(output) - len(target)), value=-100
)
dev_loss = torch.nn.functional.nll_loss(output, target)
dev_correct += dev_loss.item()
dev_total += source.shape[0]
# Compute of the epoch training time
epoch_compute_time = time.time() - epoch_start_time
print(
f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
)
def predict(model, tokens: Sequence[str]) -> Sequence[str]:
"""Predict the POS for a tokenized sequence"""
words_idx = vectoriser.encode(tokens).to(device)
# Pas de calcul de gradient ici : c'est juste pour les prédictions
with torch.no_grad():
# equivalent to model(input) when called out of class
out = model(words_idx).to(device)
out_predictions = out.to(device)
print(out_predictions)
out_predictions = out_predictions.argmax(dim=-1)
return vectoriser.decode(out_predictions)
if __name__ == "__main__":
train_dataset = dataloader.Data("data/train_extract.jsonl")
words = train_dataset.get_words()
vectoriser = dataloader.Vectoriser(words)
train_dataset = dataloader.Data("data/train_extract.jsonl", transform=vectoriser)
dev_dataset = dataloader.Data("data/dev_extract.jsonl", transform=vectoriser)
train_dataloader = torch.utils.data.DataLoader(
train_dataset, batch_size=2, shuffle=True, collate_fn=dataloader.pad_collate
)
dev_dataloader = torch.utils.data.DataLoader(
dev_dataset, batch_size=4, shuffle=True, collate_fn=dataloader.pad_collate
)
for i_batch, batch in enumerate(train_dataloader):
print(i_batch, batch[0], batch[1])
### NEURAL NETWORK ###
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device check. You are using:", device)
### RÉSEAU ENTRAÎNÉ ###
# Pour s'assurer que les résultats seront les mêmes à chaque run du notebook
torch.use_deterministic_algorithms(True)
torch.manual_seed(0)
random.seed(0)
# On peut également entraîner encoder séparemment
encoder = Encoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device)
decoder = Decoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device)
trained_classifier = EncoderDecoderModel(encoder, decoder, vectoriser, device).to(
device
)
print(next(trained_classifier.parameters()).device)
# print(train_dataset.is_cuda)
train_network(
trained_classifier,
train_dataset,
dev_dataset,
2,
)
torch.save(trained_classifier.state_dict(), "model/model.pt")
vectoriser.save("model/vocab.pkl")
trained_classifier.config.to_json_file("config.json")
print(f"test summary : {vectoriser.decode(dev_dataset[6][1])}")
print(
f"test prediction : {predict(trained_classifier, vectoriser.decode(dev_dataset[6][0]))}"
)
|