pro-grammer commited on
Commit
cb9ee13
·
verified ·
1 Parent(s): 66e9630

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +14 -48
model.py CHANGED
@@ -113,42 +113,18 @@ class GPTLanguageModel(nn.Module):
113
  return logits, loss
114
 
115
  @torch.no_grad()
116
- def generate(self, idx, max_new_tokens, temperature=0.8, top_k=50, eos_token=None, max_consecutive_exclamations=2):
117
- consecutive_exclamations = 0
118
  for _ in range(max_new_tokens):
119
- # Crop idx to the last block_size tokens if it exceeds block_size
120
  idx_cond = idx[:, -self.block_size:]
121
-
122
- # Get the predictions
123
  logits, _ = self(idx_cond)
124
-
125
- # Focus only on the last time step
126
- logits = logits[:, -1, :] / temperature
127
-
128
- # Apply top-k sampling
129
- top_k_logits, top_k_indices = torch.topk(logits, min(top_k, logits.size(-1)))
130
- probs = F.softmax(top_k_logits, dim=-1)
131
- idx_next = top_k_indices[0, torch.multinomial(probs[0], num_samples=1)]
132
-
133
- # Check for exclamation mark
134
- if idx_next.item() == enc.encode("!")[0]:
135
- consecutive_exclamations += 1
136
- if consecutive_exclamations > max_consecutive_exclamations:
137
- continue # Skip this token
138
- else:
139
- consecutive_exclamations = 0
140
-
141
- # Append sampled index to the running sequence
142
- idx = torch.cat((idx, idx_next.unsqueeze(0).unsqueeze(1)), dim=1)
143
-
144
- # Stop if EOS token is generated
145
- if eos_token is not None and idx_next.item() == eos_token:
146
- break
147
-
148
  return idx
149
 
150
- # Set up the device
151
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
152
 
153
  # Hyperparameters (match these with the ones you used for training)
154
  vocab_size = 50257
@@ -164,29 +140,19 @@ weight_decay = 0.1
164
  # Create an instance of the model
165
  model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
166
 
167
- # Load the model (with weights_only=True for security)
168
- model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
169
 
170
  # Set the model to evaluation mode
171
  model.eval()
172
 
173
- # Set up the tokenizer and get the EOS token
174
- enc = tiktoken.get_encoding("gpt2")
175
- eos_token = enc.eot_token # Use the end-of-text token directly from the tokenizer
176
-
177
  context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
178
 
179
- # Generate text with the updated parameters
180
- max_new_tokens = 300
181
- temperature = 0.6 # Slightly lower temperature
182
- top_k = 40 # Adjust as needed
183
- # Generate text
184
- # Load the model (with weights_only=True for security)
185
- model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
186
-
187
- # Generate text
188
- context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
189
- generated_text_idx = model.generate(context, max_new_tokens, temperature=temperature, top_k=top_k, eos_token=eos_token, max_consecutive_exclamations=2)
190
  generated_text = enc.decode(generated_text_idx[0].tolist())
191
 
192
  print(f"Generated text: {generated_text}")
 
113
  return logits, loss
114
 
115
  @torch.no_grad()
116
+ def generate(self, idx, max_new_tokens):
 
117
  for _ in range(max_new_tokens):
 
118
  idx_cond = idx[:, -self.block_size:]
 
 
119
  logits, _ = self(idx_cond)
120
+ logits = logits[:, -1, :]
121
+ probs = F.softmax(logits, dim=-1)
122
+ idx_next = torch.multinomial(probs, num_samples=1)
123
+ idx = torch.cat((idx, idx_next), dim=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  return idx
125
 
126
+ # Now that we have the model definition, let's load the weights and perform inference
127
+ device = torch.device('cpu') # Use 'cuda' if you have a GPU
128
 
129
  # Hyperparameters (match these with the ones you used for training)
130
  vocab_size = 50257
 
140
  # Create an instance of the model
141
  model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
142
 
143
+ # Load the trained weights
144
+ model.load_state_dict(torch.load("model_weights.pth", map_location=device))
145
 
146
  # Set the model to evaluation mode
147
  model.eval()
148
 
149
+ # Prompt
 
 
 
150
  context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
151
 
152
+ # Test generation with a higher number of tokens and adjusted temperature
153
+ max_new_tokens = 300 # Increase the token limit for a longer generation
154
+ temperature = 0.8 # More focused, less random
155
+ generated_text_idx = model.generate(context, max_new_tokens)
 
 
 
 
 
 
 
156
  generated_text = enc.decode(generated_text_idx[0].tolist())
157
 
158
  print(f"Generated text: {generated_text}")