Continuous-Rivals-Discrete
/

langflow-owt

@@ -1,60 +0,0 @@
-"""Simple inference script to test the HuggingFace LangFlow model."""
-import argparse
-import torch
-from transformers import AutoModelForMaskedLM, AutoTokenizer
-def main():
-    parser = argparse.ArgumentParser(description="Generate samples with LangFlow")
-    parser.add_argument(
-        "--model_path", type=str, default="hf_release/model_weights",
-        help="Path to the HuggingFace model directory")
-    parser.add_argument(
-        "--num_samples", type=int, default=5,
-        help="Number of samples to generate")
-    parser.add_argument(
-        "--num_steps", type=int, default=128,
-        help="Number of denoising steps")
-    parser.add_argument(
-        "--seq_length", type=int, default=1024,
-        help="Sequence length")
-    parser.add_argument(
-        "--seed", type=int, default=42,
-        help="Random seed")
-    args = parser.parse_args()
-    # Set seed for reproducibility
-    torch.manual_seed(args.seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(args.seed)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Using device: {device}")
-    tokenizer = AutoTokenizer.from_pretrained("gpt2")
-    model = AutoModelForMaskedLM.from_pretrained(
-        args.model_path,
-        trust_remote_code=True
-    )
-    model = model.to(device)
-    model.eval()
-    print(f"\nGenerating {args.num_samples} samples with {args.num_steps} steps...")
-    with torch.no_grad():
-        samples = model.generate_samples(
-            num_samples=args.num_samples,
-            seq_length=args.seq_length,
-            num_steps=args.num_steps,
-            device=device
-        )
-    texts = tokenizer.batch_decode(samples, skip_special_tokens=True)
-    for i, text in enumerate(texts):
-        print(f"\n--- Sample {i+1} ---")
-        # Print first 500 characters to keep output manageable
-        print(text[:500] + ("..." if len(text) > 500 else ""))
-if __name__ == "__main__":
-    main()

model.py CHANGED Viewed

@@ -7,8 +7,6 @@ import math
 import typing
 import einops
-import flash_attn
-import flash_attn.layers.rotary
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -91,6 +89,19 @@ class Rotary(nn.Module):
         return self.cos_cached, self.sin_cached
 def split_and_apply_rotary_pos_emb(qkv, rotary_cos_sin):
     with torch.autocast(device_type='cuda', enabled=False):
         cos, sin = rotary_cos_sin
@@ -99,10 +110,8 @@ def split_and_apply_rotary_pos_emb(qkv, rotary_cos_sin):
         cos = cos[0, :, 0, 0, :cos.shape[-1]//2]
         sin = sin[0, :, 0, 0, :sin.shape[-1]//2]
         q, k, v = qkv.chunk(3, dim=2)
-        q = flash_attn.layers.rotary.apply_rotary_emb_torch(
-            q.squeeze(dim=2), cos, sin)
-        k = flash_attn.layers.rotary.apply_rotary_emb_torch(
-            k.squeeze(dim=2), cos, sin)
         v = v.squeeze(dim=2)
     return q, k, v
@@ -548,4 +557,4 @@ class LangFlow(transformers.PreTrainedModel):
             return_dict=False)
         samples = logits.argmax(dim=-1)
-        return samples

 import typing
 import einops
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
         return self.cos_cached, self.sin_cached
+def _apply_rotary_emb(x, cos, sin):
+    # x: [batch, seqlen, nheads, headdim]
+    # cos, sin: [seqlen, headdim//2]
+    ro_dim = cos.shape[-1] * 2
+    # Expand to [1, seqlen, 1, ro_dim] for broadcasting
+    cos = torch.cat([cos, cos], dim=-1)[None, :, None, :]
+    sin = torch.cat([sin, sin], dim=-1)[None, :, None, :]
+    x_rot = x[..., :ro_dim]
+    x1, x2 = x_rot.chunk(2, dim=-1)
+    x_rotated = torch.cat([-x2, x1], dim=-1)
+    return torch.cat([x_rot * cos + x_rotated * sin, x[..., ro_dim:]], dim=-1)
 def split_and_apply_rotary_pos_emb(qkv, rotary_cos_sin):
     with torch.autocast(device_type='cuda', enabled=False):
         cos, sin = rotary_cos_sin
         cos = cos[0, :, 0, 0, :cos.shape[-1]//2]
         sin = sin[0, :, 0, 0, :sin.shape[-1]//2]
         q, k, v = qkv.chunk(3, dim=2)
+        q = _apply_rotary_emb(q.squeeze(dim=2), cos, sin)
+        k = _apply_rotary_emb(k.squeeze(dim=2), cos, sin)
         v = v.squeeze(dim=2)
     return q, k, v
             return_dict=False)
         samples = logits.argmax(dim=-1)
+        return samples