Spaces:

loocorez
/

nanochat-mid-transformers

Sleeping

loocorez commited on 18 days ago

Commit

9ef35af

verified ·

1 Parent(s): e9ab863

Upload app.py with huggingface_hub

Files changed (1) hide show

app.py CHANGED Viewed

@@ -54,19 +54,21 @@ weights_path = os.path.join(local_dir, "pytorch_model.bin")
 state = torch.load(weights_path, map_location=device)
 state = {k.lstrip("_orig_mod."): v for k, v in state.items()}
 model.load_state_dict(state, strict=True, assign=True)
-model = model.to(dtype=torch.float32)
 model.eval()
 def complete(prompt, max_new_tokens=64):
     input_ids = tokenizer.encode(prompt, prepend=tokenizer.get_bos_token_id())
     ids = torch.tensor([input_ids], dtype=torch.long, device=device)
-    generated = []
     with torch.inference_mode():
-        for _ in range(max_new_tokens):
-            logits = model.forward(ids)
-            logits = logits[:, -1, :]
-            next_token = torch.argmax(logits, dim=-1, keepdim=True)
-            ids = torch.cat([ids, next_token], dim=1)
     return tokenizer.decode(ids[0].tolist())
 with gr.Blocks() as demo:

 state = torch.load(weights_path, map_location=device)
 state = {k.lstrip("_orig_mod."): v for k, v in state.items()}
 model.load_state_dict(state, strict=True, assign=True)
+# Ensure rotary buffers and weights are bf16 as expected by model
+model = model.to(device).to(dtype=torch.bfloat16)
 model.eval()
 def complete(prompt, max_new_tokens=64):
     input_ids = tokenizer.encode(prompt, prepend=tokenizer.get_bos_token_id())
     ids = torch.tensor([input_ids], dtype=torch.long, device=device)
     with torch.inference_mode():
+        # autocast so activations match model bf16 dtype
+        with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
+            for _ in range(max_new_tokens):
+                logits = model.forward(ids)
+                logits = logits[:, -1, :]
+                next_token = torch.argmax(logits, dim=-1, keepdim=True)
+                ids = torch.cat([ids, next_token], dim=1)
     return tokenizer.decode(ids[0].tolist())
 with gr.Blocks() as demo: