Spaces:

EveSa
/

SummaryProject

Runtime error

App Files Files Community

SummaryProject / src /train.py

EveSa

Initial Commit

ad78747 over 1 year ago

raw

history blame

No virus

4.41 kB

	"""
	Training the network
	"""
	import datetime
	import logging
	import time
	from typing import Sequence, Tuple

	import torch

	import dataloader

	# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
	logging.basicConfig(level=logging.INFO)
	logging.disable(level=10)

	data1 = dataloader.Data("data/train_extract.jsonl")
	words = data1.get_words()
	vectoriser = dataloader.Vectoriser(words)


	def train_network(
	model: torch.nn.Module,
	train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
	dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
	epochs: int,
	clip: int = 1,
	):
	"""
	Train the EncoderDecoderModel network for a given number of epoch
	-----------
	Parameters
	model: torch.nn.Module
	EncoderDecoderModel defined in model.py
	train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
	tuple of vectorized (text, summary) from the training set
	dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
	tuple of vectorized (text, summary) for the dev set
	epochs: int
	the number of epochs to train on
	clip: int
	no idea
	Return
	None
	"""

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)
	print("Device check. You are using:", model.device)
	model.train()

	# with torch.no_grad():

	optim = torch.optim.Adam(model.parameters(), lr=0.01)

	print("Epoch\ttrain loss\tdev accuracy\tcompute time")

	for epoch_n in range(epochs):
	# Tell the model it's in train mode for layers designed to
	# behave differently in train or evaluation
	# https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
	model.train()

	# To get the computing time per epoch
	epoch_start_time = time.time()

	# To get the model accuracy per epoch
	epoch_loss = 0.0
	epoch_length = 0

	# Iterates over all the text, summary tuples
	for source, target in train_set:
	source = source.to(device)
	target = target.to(device)

	# DEBUG Block
	# logging.debug("TRAIN")
	# logging.debug(f"cuda available ? {torch.cuda.is_available()}")
	# logging.debug(f"Source sur cuda ? {source.is_cuda}")
	# logging.debug(f"Target sur cuda ? {target.is_cuda}")

	out = model(source).to(device)
	logging.debug(f"outputs = {out.shape}")
	target = torch.nn.functional.pad(
	target, (0, len(out) - len(target)), value=-100
	)
	# logging.debug(f"predition : {vectoriser.decode(output_predictions)}")
	loss = torch.nn.functional.nll_loss(out, target).to(device)
	loss.backward()
	torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
	optim.step()

	epoch_loss += loss.item()
	epoch_length += source.shape[0]

	# To check the model accuracy on new data
	dev_correct = 0
	dev_total = 0

	# Iterates over text, summary tuple from dev
	for source, target in dev_set:
	# We here want to evaluate the model
	# so we're switching to evaluation mode
	model.eval()

	source = source.to(device)
	target = target.to(device)

	# We compute the result
	output = model(source).to(device)

	output_dim = output.shape[-1]

	output = output[1:].view(-1, output_dim)
	logging.debug(f"dev output : {output.shape}")
	target = target[1:].view(-1)
	# To compare the output with the target,
	# they have to be of same length so we're
	# padding the target with -100 idx that will
	# be ignored by the nll_loss function
	target = torch.nn.functional.pad(
	target, (0, len(output) - len(target)), value=-100
	)
	dev_loss = torch.nn.functional.nll_loss(output, target)
	dev_correct += dev_loss.item()
	dev_total += source.shape[0]

	# Compute of the epoch training time
	epoch_compute_time = time.time() - epoch_start_time

	print(
	f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
	)