File size: 4,412 Bytes
ad78747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Training the network
"""
import datetime
import logging
import time
from typing import Sequence, Tuple

import torch

import dataloader

# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
logging.basicConfig(level=logging.INFO)
logging.disable(level=10)

data1 = dataloader.Data("data/train_extract.jsonl")
words = data1.get_words()
vectoriser = dataloader.Vectoriser(words)


def train_network(
    model: torch.nn.Module,
    train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
    dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
    epochs: int,
    clip: int = 1,
):
    """
    Train the EncoderDecoderModel network for a given number of epoch
    -----------
    Parameters
        model: torch.nn.Module
            EncoderDecoderModel defined in model.py
        train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
            tuple of vectorized (text, summary) from the training set
        dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
            tuple of vectorized (text, summary) for the dev set
        epochs: int
            the number of epochs to train on
        clip: int
            no idea
    Return
        None
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    print("Device check. You are using:", model.device)
    model.train()

    # with torch.no_grad():

    optim = torch.optim.Adam(model.parameters(), lr=0.01)

    print("Epoch\ttrain loss\tdev accuracy\tcompute time")

    for epoch_n in range(epochs):
        # Tell the model it's in train mode for layers designed to
        # behave differently in train or evaluation
        # https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
        model.train()

        # To get the computing time per epoch
        epoch_start_time = time.time()

        # To get the model accuracy per epoch
        epoch_loss = 0.0
        epoch_length = 0

        # Iterates over all the text, summary tuples
        for source, target in train_set:
            source = source.to(device)
            target = target.to(device)

            # DEBUG Block
            # logging.debug("TRAIN")
            # logging.debug(f"cuda available ? {torch.cuda.is_available()}")
            # logging.debug(f"Source sur cuda ? {source.is_cuda}")
            # logging.debug(f"Target sur cuda ? {target.is_cuda}")

            out = model(source).to(device)
            logging.debug(f"outputs = {out.shape}")
            target = torch.nn.functional.pad(
                target, (0, len(out) - len(target)), value=-100
            )
            # logging.debug(f"predition : {vectoriser.decode(output_predictions)}")
            loss = torch.nn.functional.nll_loss(out, target).to(device)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optim.step()

            epoch_loss += loss.item()
            epoch_length += source.shape[0]

        # To check the model accuracy on new data
        dev_correct = 0
        dev_total = 0

        # Iterates over text, summary tuple from dev
        for source, target in dev_set:
            # We here want to evaluate the model
            # so we're switching to evaluation mode
            model.eval()

            source = source.to(device)
            target = target.to(device)

            # We compute the result
            output = model(source).to(device)

            output_dim = output.shape[-1]

            output = output[1:].view(-1, output_dim)
            logging.debug(f"dev output : {output.shape}")
            target = target[1:].view(-1)
            # To compare the output with the target,
            # they have to be of same length so we're
            # padding the target with -100 idx that will
            # be ignored by the nll_loss function
            target = torch.nn.functional.pad(
                target, (0, len(output) - len(target)), value=-100
            )
            dev_loss = torch.nn.functional.nll_loss(output, target)
            dev_correct += dev_loss.item()
            dev_total += source.shape[0]

        # Compute of the epoch training time
        epoch_compute_time = time.time() - epoch_start_time

        print(
            f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
        )