GPT007 commited on
Commit
140a2ce
1 Parent(s): c0d7085

Update PrateritumGPT.py

Browse files
Files changed (1) hide show
  1. PrateritumGPT.py +21 -25
PrateritumGPT.py CHANGED
@@ -49,26 +49,26 @@ MyDataset = CSVDataset(features=features, labels=labels)
49
  class TransformerModel(nn.Module):
50
  def __init__(self, vocab_size, emb_dim, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout=0.1):
51
  super().__init__()
52
- self.custom_embedding = nn.Embedding(vocab_size, emb_dim)
53
- self.pos_encoder = PositionalEncoding(emb_dim, dropout)
54
- encoder_layer = nn.TransformerEncoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True)
55
  self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
56
- decoder_layer = nn.TransformerDecoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True)
57
  self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)
58
- self.output_layer = nn.Linear(emb_dim, vocab_size)
59
 
60
  def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None):
61
  src_emb = self.custom_embedding(src.long())
62
- print("Source Embedding:", src_emb.shape)
63
  src_emb = self.pos_encoder(src_emb)
64
- print("Source Embedding:", src_emb.shape)
65
  tgt_emb = self.custom_embedding(tgt.long())
66
- print("Target Embedding:", tgt_emb.shape)
67
  tgt_emb = self.pos_encoder(tgt_emb)
68
- print("Target Embedding:", tgt_emb.shape)
69
  encoder_output = self.transformer_encoder(src_emb, src_mask, src_key_padding_mask)
70
  decoder_output = self.transformer_decoder(tgt_emb, encoder_output, tgt_mask, memory_mask, tgt_key_padding_mask, memory_key_padding_mask)
71
- output = self.output_layer(decoder_output)
72
  return output
73
 
74
  class PositionalEncoding(nn.Module):
@@ -89,10 +89,10 @@ class PositionalEncoding(nn.Module):
89
  return self.dropout(x)
90
 
91
  def collate_fn(batch):
92
- inputs = [item[0] for item in batch]
93
- targets = [item[1] for item in batch]
94
- inputs = pad_sequence(inputs, batch_first=True, padding_value=len(tokens) + 1)
95
- targets = pad_sequence(targets, batch_first=True, padding_value=len(tokens) + 1)
96
  return inputs, targets
97
 
98
  train_loader = DataLoader(MyDataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
@@ -101,28 +101,24 @@ model = TransformerModel(vocab_size=len(tokens)+1, emb_dim=32, nhead=8, num_enco
101
  loss_fn = nn.CrossEntropyLoss()
102
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
103
 
104
- epochs = 10
105
 
106
  for epoch in range(epochs):
107
  total_loss = 0.0
108
 
109
  for batch_idx, (inputs, targets) in enumerate(train_loader):
110
-
111
- for i in range(1,targets.shape[1]):
112
-
113
  optimizer.zero_grad()
114
  output = model(inputs, targets[:, :i]) # Shifted targets
115
- output = output.transpose(1, 2) # Adjust shape for loss function
116
- #loss = loss_fn(output, targets[:, i].unsqueeze(1).long()) # Shifted targets
117
- print(output.shape)
118
- print(targets[:, i].unsqueeze(1).long().shape)
119
- loss = loss_fn(output, targets[:, i].unsqueeze(1).long())
120
  loss.backward()
121
  optimizer.step()
122
 
123
  total_loss += loss.item()
124
 
125
- if batch_idx % 100 == 0:
126
- print(f"Epoch {epoch + 1}/{epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {total_loss / (batch_idx + 1)}")
127
 
128
  print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")
 
 
 
49
  class TransformerModel(nn.Module):
50
  def __init__(self, vocab_size, emb_dim, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout=0.1):
51
  super().__init__()
52
+ self.custom_embedding = nn.Embedding(vocab_size, emb_dim).to("cuda")
53
+ self.pos_encoder = PositionalEncoding(emb_dim, dropout).to("cuda")
54
+ encoder_layer = nn.TransformerEncoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True).to("cuda")
55
  self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
56
+ decoder_layer = nn.TransformerDecoderLayer(emb_dim, nhead, dim_feedforward, dropout, batch_first=True).to("cuda")
57
  self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)
58
+ self.output_layer = nn.Linear(emb_dim, vocab_size).to("cuda")
59
 
60
  def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None):
61
  src_emb = self.custom_embedding(src.long())
62
+ #print("Source Embedding:", src_emb.shape)
63
  src_emb = self.pos_encoder(src_emb)
64
+ #print("Source Embedding:", src_emb.shape)
65
  tgt_emb = self.custom_embedding(tgt.long())
66
+ #print("Target Embedding:", tgt_emb.shape)
67
  tgt_emb = self.pos_encoder(tgt_emb)
68
+ #print("Target Embedding:", tgt_emb.shape)
69
  encoder_output = self.transformer_encoder(src_emb, src_mask, src_key_padding_mask)
70
  decoder_output = self.transformer_decoder(tgt_emb, encoder_output, tgt_mask, memory_mask, tgt_key_padding_mask, memory_key_padding_mask)
71
+ output = self.output_layer(decoder_output[:, -1, :])
72
  return output
73
 
74
  class PositionalEncoding(nn.Module):
 
89
  return self.dropout(x)
90
 
91
  def collate_fn(batch):
92
+ inputs = [item[0].to("cuda") for item in batch]
93
+ targets = [item[1].to("cuda") for item in batch]
94
+ inputs = pad_sequence(inputs, batch_first=True, padding_value=0)
95
+ targets = pad_sequence(targets, batch_first=True, padding_value=0)
96
  return inputs, targets
97
 
98
  train_loader = DataLoader(MyDataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
 
101
  loss_fn = nn.CrossEntropyLoss()
102
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
103
 
104
+ epochs = 1
105
 
106
  for epoch in range(epochs):
107
  total_loss = 0.0
108
 
109
  for batch_idx, (inputs, targets) in enumerate(train_loader):
110
+ for i in range(1, targets.shape[1]):
 
 
111
  optimizer.zero_grad()
112
  output = model(inputs, targets[:, :i]) # Shifted targets
113
+ #print(output.shape)
114
+ loss = loss_fn(output, targets[:, i].long()) # Reshape targets
 
 
 
115
  loss.backward()
116
  optimizer.step()
117
 
118
  total_loss += loss.item()
119
 
120
+ print(f"Epoch {epoch + 1}/{epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {total_loss / (batch_idx + 1)}")
 
121
 
122
  print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")
123
+
124
+ torch.save(model, "data/PrateritumGPT.pth")