Spaces:

EveSa
/

SummaryProject

Runtime error

App Files Files Community

SummaryProject / src /train.py

EveSa

refactoring de requirements.txt

3c03f61 over 2 years ago

raw

history blame contribute delete

6.85 kB

	"""
	Training the network
	"""
	import datetime
	import logging
	import random
	import time
	from typing import Sequence, Tuple

	import torch

	import dataloader
	from model import Decoder, Encoder, EncoderDecoderModel

	# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
	logging.basicConfig(level=logging.INFO)
	logging.disable(level=10)


	def train_network(
	model: torch.nn.Module,
	train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
	dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
	epochs: int,
	clip: int = 1,
	):
	"""
	Train the EncoderDecoderModel network for a given number of epoch
	-----------
	Parameters
	model: torch.nn.Module
	EncoderDecoderModel defined in model.py
	train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
	tuple of vectorized (text, summary) from the training set
	dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
	tuple of vectorized (text, summary) for the dev set
	epochs: int
	the number of epochs to train on
	clip: int
	no idea
	Return
	None
	"""

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)
	print("Device check. You are using:", model.device)

	# with torch.no_grad():

	optim = torch.optim.Adam(model.parameters(), lr=0.01)

	print("Epoch\ttrain loss\tdev accuracy\tcompute time")

	for epoch_n in range(epochs):
	# Tell the model it's in train mode for layers designed to
	# behave differently in train or evaluation
	# https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
	model.train()

	# To get the computing time per epoch
	epoch_start_time = time.time()

	# To get the model accuracy per epoch
	epoch_loss = 0.0
	epoch_length = 0

	# Iterates over all the text, summary tuples
	for source, target in train_set:
	source = source.to(device)
	target = target.to(device)

	# DEBUG Block
	# logging.debug("TRAIN")
	# logging.debug(f"cuda available ? {torch.cuda.is_available()}")
	# logging.debug(f"Source sur cuda ? {source.is_cuda}")
	# logging.debug(f"Target sur cuda ? {target.is_cuda}")

	out = model(source).to(device)
	logging.debug(f"outputs = {out.shape}")

	target = torch.nn.functional.pad(
	target, (0, len(out) - len(target)), value=-100
	)

	# logging.debug(f"prediction : {vectoriser.decode(output_predictions)}")
	loss = torch.nn.functional.nll_loss(out, target).to(device)
	loss.backward()
	torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
	optim.step()

	epoch_loss += loss.item()
	epoch_length += source.shape[0]

	# To check the model accuracy on new data
	dev_correct = 0
	dev_total = 0

	# Iterates over text, summary tuple from dev
	for source, target in dev_set:
	# We here want to evaluate the model
	# so we're switching to evaluation mode
	model.eval()

	source = source.to(device)
	target = target.to(device)

	# We compute the result
	output = model(source).to(device)

	output_dim = output.shape[-1]

	output = output[1:].view(-1, output_dim)
	logging.debug(f"dev output : {output.shape}")
	target = target[1:].view(-1)
	# To compare the output with the target,
	# they have to be of same length so we're
	# padding the target with -100 idx that will
	# be ignored by the nll_loss function
	target = torch.nn.functional.pad(
	target, (0, len(output) - len(target)), value=-100
	)
	dev_loss = torch.nn.functional.nll_loss(output, target)
	dev_correct += dev_loss.item()
	dev_total += source.shape[0]

	# Compute of the epoch training time
	epoch_compute_time = time.time() - epoch_start_time

	print(
	f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
	)


	def predict(model, tokens: Sequence[str]) -> Sequence[str]:
	"""Predict the POS for a tokenized sequence"""
	words_idx = vectoriser.encode(tokens).to(device)
	# Pas de calcul de gradient ici : c'est juste pour les prédictions
	with torch.no_grad():
	# equivalent to model(input) when called out of class
	out = model(words_idx).to(device)
	out_predictions = out.to(device)
	print(out_predictions)
	out_predictions = out_predictions.argmax(dim=-1)
	return vectoriser.decode(out_predictions)


	if __name__ == "__main__":
	train_dataset = dataloader.Data("data/train_extract.jsonl")
	words = train_dataset.get_words()
	vectoriser = dataloader.Vectoriser(words)

	train_dataset = dataloader.Data(
	"data/train_extract.jsonl",
	transform=vectoriser)
	dev_dataset = dataloader.Data(
	"data/dev_extract.jsonl",
	transform=vectoriser)

	train_dataloader = torch.utils.data.DataLoader(
	train_dataset,
	batch_size=2,
	shuffle=True,
	collate_fn=dataloader.pad_collate)

	dev_dataloader = torch.utils.data.DataLoader(
	dev_dataset,
	batch_size=4,
	shuffle=True,
	collate_fn=dataloader.pad_collate)

	for i_batch, batch in enumerate(train_dataloader):
	print(i_batch, batch[0], batch[1])

	### NEURAL NETWORK ###
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print("Device check. You are using:", device)

	### RÉSEAU ENTRAÎNÉ ###
	# Pour s'assurer que les résultats seront les mêmes à chaque run du
	# notebook
	torch.use_deterministic_algorithms(True)
	torch.manual_seed(0)
	random.seed(0)

	# On peut également entraîner encoder séparemment
	encoder = Encoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device)
	decoder = Decoder(len(vectoriser.idx_to_token) + 1, 256, 512, 0.5, device)

	trained_classifier = EncoderDecoderModel(
	encoder, decoder, vectoriser, device).to(device)

	print(next(trained_classifier.parameters()).device)
	# print(train_dataset.is_cuda)

	train_network(
	trained_classifier,
	train_dataset,
	dev_dataset,
	2,
	)

	torch.save(trained_classifier.state_dict(), "model/model.pt")
	vectoriser.save("model/vocab.pkl")

	print(f"test summary : {vectoriser.decode(dev_dataset[6][1])}")
	print(
	f"test prediction : {predict(trained_classifier, vectoriser.decode(dev_dataset[6][0]))}"
	)