Update PrateritumGPT.py
Browse files- PrateritumGPT.py +21 -25
PrateritumGPT.py
CHANGED
@@ -49,26 +49,26 @@ MyDataset = CSVDataset(features=features, labels=labels)
|
|
49 |
class TransformerModel(nn.Module):
|
50 |
def __init__(self, vocab_size, emb_dim, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout=0.1):
|
51 |
super().__init__()
|
52 |
-
self.custom_embedding = nn.Embedding(vocab_size, emb_dim)
|
53 |
-
self.pos_encoder = PositionalEncoding(emb_dim, dropout)
|
54 |
-
encoder_layer = nn.TransformerEncoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True)
|
55 |
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
|
56 |
-
decoder_layer = nn.TransformerDecoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True)
|
57 |
self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)
|
58 |
-
self.output_layer = nn.Linear(emb_dim, vocab_size)
|
59 |
|
60 |
def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None):
|
61 |
src_emb = self.custom_embedding(src.long())
|
62 |
-
print("Source Embedding:", src_emb.shape)
|
63 |
src_emb = self.pos_encoder(src_emb)
|
64 |
-
print("Source Embedding:", src_emb.shape)
|
65 |
tgt_emb = self.custom_embedding(tgt.long())
|
66 |
-
print("Target Embedding:", tgt_emb.shape)
|
67 |
tgt_emb = self.pos_encoder(tgt_emb)
|
68 |
-
print("Target Embedding:", tgt_emb.shape)
|
69 |
encoder_output = self.transformer_encoder(src_emb, src_mask, src_key_padding_mask)
|
70 |
decoder_output = self.transformer_decoder(tgt_emb, encoder_output, tgt_mask, memory_mask, tgt_key_padding_mask, memory_key_padding_mask)
|
71 |
-
output = self.output_layer(decoder_output)
|
72 |
return output
|
73 |
|
74 |
class PositionalEncoding(nn.Module):
|
@@ -89,10 +89,10 @@ class PositionalEncoding(nn.Module):
|
|
89 |
return self.dropout(x)
|
90 |
|
91 |
def collate_fn(batch):
|
92 |
-
inputs = [item[0] for item in batch]
|
93 |
-
targets = [item[1] for item in batch]
|
94 |
-
inputs = pad_sequence(inputs, batch_first=True, padding_value=
|
95 |
-
targets = pad_sequence(targets, batch_first=True, padding_value=
|
96 |
return inputs, targets
|
97 |
|
98 |
train_loader = DataLoader(MyDataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
|
@@ -101,28 +101,24 @@ model = TransformerModel(vocab_size=len(tokens)+1, emb_dim=32, nhead=8, num_enco
|
|
101 |
loss_fn = nn.CrossEntropyLoss()
|
102 |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
103 |
|
104 |
-
epochs =
|
105 |
|
106 |
for epoch in range(epochs):
|
107 |
total_loss = 0.0
|
108 |
|
109 |
for batch_idx, (inputs, targets) in enumerate(train_loader):
|
110 |
-
|
111 |
-
for i in range(1,targets.shape[1]):
|
112 |
-
|
113 |
optimizer.zero_grad()
|
114 |
output = model(inputs, targets[:, :i]) # Shifted targets
|
115 |
-
output
|
116 |
-
|
117 |
-
print(output.shape)
|
118 |
-
print(targets[:, i].unsqueeze(1).long().shape)
|
119 |
-
loss = loss_fn(output, targets[:, i].unsqueeze(1).long())
|
120 |
loss.backward()
|
121 |
optimizer.step()
|
122 |
|
123 |
total_loss += loss.item()
|
124 |
|
125 |
-
|
126 |
-
print(f"Epoch {epoch + 1}/{epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {total_loss / (batch_idx + 1)}")
|
127 |
|
128 |
print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")
|
|
|
|
|
|
49 |
class TransformerModel(nn.Module):
|
50 |
def __init__(self, vocab_size, emb_dim, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout=0.1):
|
51 |
super().__init__()
|
52 |
+
self.custom_embedding = nn.Embedding(vocab_size, emb_dim).to("cuda")
|
53 |
+
self.pos_encoder = PositionalEncoding(emb_dim, dropout).to("cuda")
|
54 |
+
encoder_layer = nn.TransformerEncoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True).to("cuda")
|
55 |
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
|
56 |
+
decoder_layer = nn.TransformerDecoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True).to("cuda")
|
57 |
self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)
|
58 |
+
self.output_layer = nn.Linear(emb_dim, vocab_size).to("cuda")
|
59 |
|
60 |
def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None):
|
61 |
src_emb = self.custom_embedding(src.long())
|
62 |
+
#print("Source Embedding:", src_emb.shape)
|
63 |
src_emb = self.pos_encoder(src_emb)
|
64 |
+
#print("Source Embedding:", src_emb.shape)
|
65 |
tgt_emb = self.custom_embedding(tgt.long())
|
66 |
+
#print("Target Embedding:", tgt_emb.shape)
|
67 |
tgt_emb = self.pos_encoder(tgt_emb)
|
68 |
+
#print("Target Embedding:", tgt_emb.shape)
|
69 |
encoder_output = self.transformer_encoder(src_emb, src_mask, src_key_padding_mask)
|
70 |
decoder_output = self.transformer_decoder(tgt_emb, encoder_output, tgt_mask, memory_mask, tgt_key_padding_mask, memory_key_padding_mask)
|
71 |
+
output = self.output_layer(decoder_output[:, -1, :])
|
72 |
return output
|
73 |
|
74 |
class PositionalEncoding(nn.Module):
|
|
|
89 |
return self.dropout(x)
|
90 |
|
91 |
def collate_fn(batch):
|
92 |
+
inputs = [item[0].to("cuda") for item in batch]
|
93 |
+
targets = [item[1].to("cuda") for item in batch]
|
94 |
+
inputs = pad_sequence(inputs, batch_first=True, padding_value=0)
|
95 |
+
targets = pad_sequence(targets, batch_first=True, padding_value=0)
|
96 |
return inputs, targets
|
97 |
|
98 |
train_loader = DataLoader(MyDataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
|
|
|
101 |
loss_fn = nn.CrossEntropyLoss()
|
102 |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
103 |
|
104 |
+
epochs = 1
|
105 |
|
106 |
for epoch in range(epochs):
|
107 |
total_loss = 0.0
|
108 |
|
109 |
for batch_idx, (inputs, targets) in enumerate(train_loader):
|
110 |
+
for i in range(1, targets.shape[1]):
|
|
|
|
|
111 |
optimizer.zero_grad()
|
112 |
output = model(inputs, targets[:, :i]) # Shifted targets
|
113 |
+
#print(output.shape)
|
114 |
+
loss = loss_fn(output, targets[:, i].long()) # Reshape targets
|
|
|
|
|
|
|
115 |
loss.backward()
|
116 |
optimizer.step()
|
117 |
|
118 |
total_loss += loss.item()
|
119 |
|
120 |
+
print(f"Epoch {epoch + 1}/{epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {total_loss / (batch_idx + 1)}")
|
|
|
121 |
|
122 |
print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")
|
123 |
+
|
124 |
+
torch.save(model, "data/PrateritumGPT.pth")
|