Spaces:

Corianas
/

RamblingGPT

Paused

App Files Files Community

Corianas commited on Jun 6, 2023

Commit

6814bfe

1 Parent(s): b731873

Update model.py

Browse files

Files changed (1) hide show

model.py +47 -1

model.py CHANGED Viewed

@@ -372,7 +372,8 @@ class GPT(nn.Module):
             idx = torch.cat((idx, idx_next), dim=1)
         return idx
     def generate_streaming(self, idx, max_new_tokens, temperature=1.0, top_k=None):
         """
         Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
@@ -399,3 +400,48 @@ class GPT(nn.Module):
             # append sampled index to the running sequence and continue
             idx = torch.cat((idx, idx_next), dim=1)
             yield idx_next.item()

             idx = torch.cat((idx, idx_next), dim=1)
         return idx
+    @torch.no_grad()
     def generate_streaming(self, idx, max_new_tokens, temperature=1.0, top_k=None):
         """
         Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
             # append sampled index to the running sequence and continue
             idx = torch.cat((idx, idx_next), dim=1)
             yield idx_next.item()
+    @torch.no_grad()
+    def generate_instructed_streaming(self, idx, idi, max_new_tokens, temperature=1.0, top_k=None):
+        """
+        Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
+        the sequence max_new_tokens times, feeding the predictions back into the model each time.
+        Yield the generated indices one at a time rather than concatenating them into a single tensor.
+        Most likely you'll want to make sure to be in model.eval() mode of operation for this.
+        """
+        idi_length = idi.size(1)
+        max_idx_length = self.config.block_size - idi_length
+        # Precompute the minimum top_k value for logits.size(-1)
+        min_top_k = None
+        if top_k is not None:
+            min_top_k = min(top_k, self.config.vocab_size)
+        for _ in range(max_new_tokens):
+            # if the sequence context is growing too long we must crop it at block_size
+            idx_cond = idx if idx.size(1) <= max_idx_length else idx[:, -max_idx_length:]
+            # concatenate idi with the cropped idx
+            idx_cond = torch.cat((idi, idx_cond), dim=1)
+            # forward the model to get the logits for the index in the sequence
+            logits, _ = self(idx_cond)
+            # pluck the logits at the final step and scale by desired temperature
+            logits = logits[:, -1, :] / temperature
+            # optionally crop the logits to only the top k options
+            if min_top_k is not None:
+                v, _ = torch.topk(logits, min_top_k)
+                logits[logits < v[:, [-1]]] = -float('Inf')
+            # apply softmax to convert logits to (normalized) probabilities
+            probs = F.softmax(logits, dim=-1)
+            # sample from the distribution
+            idx_next = torch.multinomial(probs, num_samples=1)
+            # yield the next index
+            # append sampled index to the running sequence and continue
+            idx = torch.cat((idx, idx_next), dim=1)
+            yield idx_next.item()