Spaces:
Running
Running
Update model.py
Browse files
model.py
CHANGED
|
@@ -113,42 +113,18 @@ class GPTLanguageModel(nn.Module):
|
|
| 113 |
return logits, loss
|
| 114 |
|
| 115 |
@torch.no_grad()
|
| 116 |
-
def generate(self, idx, max_new_tokens
|
| 117 |
-
consecutive_exclamations = 0
|
| 118 |
for _ in range(max_new_tokens):
|
| 119 |
-
# Crop idx to the last block_size tokens if it exceeds block_size
|
| 120 |
idx_cond = idx[:, -self.block_size:]
|
| 121 |
-
|
| 122 |
-
# Get the predictions
|
| 123 |
logits, _ = self(idx_cond)
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
# Apply top-k sampling
|
| 129 |
-
top_k_logits, top_k_indices = torch.topk(logits, min(top_k, logits.size(-1)))
|
| 130 |
-
probs = F.softmax(top_k_logits, dim=-1)
|
| 131 |
-
idx_next = top_k_indices[0, torch.multinomial(probs[0], num_samples=1)]
|
| 132 |
-
|
| 133 |
-
# Check for exclamation mark
|
| 134 |
-
if idx_next.item() == enc.encode("!")[0]:
|
| 135 |
-
consecutive_exclamations += 1
|
| 136 |
-
if consecutive_exclamations > max_consecutive_exclamations:
|
| 137 |
-
continue # Skip this token
|
| 138 |
-
else:
|
| 139 |
-
consecutive_exclamations = 0
|
| 140 |
-
|
| 141 |
-
# Append sampled index to the running sequence
|
| 142 |
-
idx = torch.cat((idx, idx_next.unsqueeze(0).unsqueeze(1)), dim=1)
|
| 143 |
-
|
| 144 |
-
# Stop if EOS token is generated
|
| 145 |
-
if eos_token is not None and idx_next.item() == eos_token:
|
| 146 |
-
break
|
| 147 |
-
|
| 148 |
return idx
|
| 149 |
|
| 150 |
-
#
|
| 151 |
-
device = torch.device('cuda' if
|
| 152 |
|
| 153 |
# Hyperparameters (match these with the ones you used for training)
|
| 154 |
vocab_size = 50257
|
|
@@ -164,29 +140,19 @@ weight_decay = 0.1
|
|
| 164 |
# Create an instance of the model
|
| 165 |
model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
|
| 166 |
|
| 167 |
-
# Load the
|
| 168 |
-
model.load_state_dict(torch.load("model_weights.pth", map_location=device
|
| 169 |
|
| 170 |
# Set the model to evaluation mode
|
| 171 |
model.eval()
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
enc = tiktoken.get_encoding("gpt2")
|
| 175 |
-
eos_token = enc.eot_token # Use the end-of-text token directly from the tokenizer
|
| 176 |
-
|
| 177 |
context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
max_new_tokens = 300
|
| 181 |
-
temperature = 0.
|
| 182 |
-
|
| 183 |
-
# Generate text
|
| 184 |
-
# Load the model (with weights_only=True for security)
|
| 185 |
-
model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
|
| 186 |
-
|
| 187 |
-
# Generate text
|
| 188 |
-
context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
|
| 189 |
-
generated_text_idx = model.generate(context, max_new_tokens, temperature=temperature, top_k=top_k, eos_token=eos_token, max_consecutive_exclamations=2)
|
| 190 |
generated_text = enc.decode(generated_text_idx[0].tolist())
|
| 191 |
|
| 192 |
print(f"Generated text: {generated_text}")
|
|
|
|
| 113 |
return logits, loss
|
| 114 |
|
| 115 |
@torch.no_grad()
|
| 116 |
+
def generate(self, idx, max_new_tokens):
|
|
|
|
| 117 |
for _ in range(max_new_tokens):
|
|
|
|
| 118 |
idx_cond = idx[:, -self.block_size:]
|
|
|
|
|
|
|
| 119 |
logits, _ = self(idx_cond)
|
| 120 |
+
logits = logits[:, -1, :]
|
| 121 |
+
probs = F.softmax(logits, dim=-1)
|
| 122 |
+
idx_next = torch.multinomial(probs, num_samples=1)
|
| 123 |
+
idx = torch.cat((idx, idx_next), dim=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
return idx
|
| 125 |
|
| 126 |
+
# Now that we have the model definition, let's load the weights and perform inference
|
| 127 |
+
device = torch.device('cpu') # Use 'cuda' if you have a GPU
|
| 128 |
|
| 129 |
# Hyperparameters (match these with the ones you used for training)
|
| 130 |
vocab_size = 50257
|
|
|
|
| 140 |
# Create an instance of the model
|
| 141 |
model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
|
| 142 |
|
| 143 |
+
# Load the trained weights
|
| 144 |
+
model.load_state_dict(torch.load("model_weights.pth", map_location=device))
|
| 145 |
|
| 146 |
# Set the model to evaluation mode
|
| 147 |
model.eval()
|
| 148 |
|
| 149 |
+
# Prompt
|
|
|
|
|
|
|
|
|
|
| 150 |
context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
|
| 151 |
|
| 152 |
+
# Test generation with a higher number of tokens and adjusted temperature
|
| 153 |
+
max_new_tokens = 300 # Increase the token limit for a longer generation
|
| 154 |
+
temperature = 0.8 # More focused, less random
|
| 155 |
+
generated_text_idx = model.generate(context, max_new_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
generated_text = enc.decode(generated_text_idx[0].tolist())
|
| 157 |
|
| 158 |
print(f"Generated text: {generated_text}")
|