Spaces:

pro-grammer
/

StoryCrafterLLM

Running

App Files Files Community

pro-grammer commited on Oct 2, 2024

Commit

cb9ee13

verified ·

1 Parent(s): 66e9630

Update model.py

Browse files

Files changed (1) hide show

model.py +14 -48

model.py CHANGED Viewed

@@ -113,42 +113,18 @@ class GPTLanguageModel(nn.Module):
         return logits, loss
     @torch.no_grad()
-    def generate(self, idx, max_new_tokens, temperature=0.8, top_k=50, eos_token=None, max_consecutive_exclamations=2):
-        consecutive_exclamations = 0
         for _ in range(max_new_tokens):
-            # Crop idx to the last block_size tokens if it exceeds block_size
             idx_cond = idx[:, -self.block_size:]
-            # Get the predictions
             logits, _ = self(idx_cond)
-            # Focus only on the last time step
-            logits = logits[:, -1, :] / temperature
-            # Apply top-k sampling
-            top_k_logits, top_k_indices = torch.topk(logits, min(top_k, logits.size(-1)))
-            probs = F.softmax(top_k_logits, dim=-1)
-            idx_next = top_k_indices[0, torch.multinomial(probs[0], num_samples=1)]
-            # Check for exclamation mark
-            if idx_next.item() == enc.encode("!")[0]:
-                consecutive_exclamations += 1
-                if consecutive_exclamations > max_consecutive_exclamations:
-                    continue  # Skip this token
-            else:
-                consecutive_exclamations = 0
-            # Append sampled index to the running sequence
-            idx = torch.cat((idx, idx_next.unsqueeze(0).unsqueeze(1)), dim=1)
-            # Stop if EOS token is generated
-            if eos_token is not None and idx_next.item() == eos_token:
-                break
         return idx
-# Set up the device
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # Hyperparameters (match these with the ones you used for training)
 vocab_size = 50257
@@ -164,29 +140,19 @@ weight_decay = 0.1
 # Create an instance of the model
 model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
-# Load the model (with weights_only=True for security)
-model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
 # Set the model to evaluation mode
 model.eval()
-# Set up the tokenizer and get the EOS token
-enc = tiktoken.get_encoding("gpt2")
-eos_token = enc.eot_token  # Use the end-of-text token directly from the tokenizer
 context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
-# Generate text with the updated parameters
-max_new_tokens = 300
-temperature = 0.6  # Slightly lower temperature
-top_k = 40  # Adjust as needed
-# Generate text
-# Load the model (with weights_only=True for security)
-model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
-# Generate text
-context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
-generated_text_idx = model.generate(context, max_new_tokens, temperature=temperature, top_k=top_k, eos_token=eos_token, max_consecutive_exclamations=2)
 generated_text = enc.decode(generated_text_idx[0].tolist())
 print(f"Generated text: {generated_text}")

         return logits, loss
     @torch.no_grad()
+    def generate(self, idx, max_new_tokens):
         for _ in range(max_new_tokens):
             idx_cond = idx[:, -self.block_size:]
             logits, _ = self(idx_cond)
+            logits = logits[:, -1, :]
+            probs = F.softmax(logits, dim=-1)
+            idx_next = torch.multinomial(probs, num_samples=1)
+            idx = torch.cat((idx, idx_next), dim=1)
         return idx
+# Now that we have the model definition, let's load the weights and perform inference
+device = torch.device('cpu')  # Use 'cuda' if you have a GPU
 # Hyperparameters (match these with the ones you used for training)
 vocab_size = 50257
 # Create an instance of the model
 model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
+# Load the trained weights
+model.load_state_dict(torch.load("model_weights.pth", map_location=device))
 # Set the model to evaluation mode
 model.eval()
+# Prompt
 context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
+# Test generation with a higher number of tokens and adjusted temperature
+max_new_tokens = 300  # Increase the token limit for a longer generation
+temperature = 0.8  # More focused, less random
+generated_text_idx = model.generate(context, max_new_tokens)
 generated_text = enc.decode(generated_text_idx[0].tolist())
 print(f"Generated text: {generated_text}")