stabilityai
/

stable-code-3b

@@ -137,7 +137,7 @@ model = AutoModelForCausalLM.from_pretrained(
   "stabilityai/stable-code-3b",
   trust_remote_code=True,
   torch_dtype="auto",
-+ attn_implementation="flash_attention_2",
 )
 model.cuda()
 inputs = tokenizer("<fim_prefix>def fib(n):<fim_suffix>    else:\n        return fib(n - 2) + fib(n - 1)<fim_middle>", return_tensors="pt").to(model.device)
@@ -164,7 +164,7 @@ model = AutoModelForCausalLM.from_pretrained(
   "stabilityai/stable-code-3b",
   trust_remote_code=True,
   torch_dtype="auto",
-+ attn_implementation="flash_attention_2",
 )
 model.cuda()
 inputs = tokenizer("import torch\nimport torch.nn as nn", return_tensors="pt").to(model.device)

   "stabilityai/stable-code-3b",
   trust_remote_code=True,
   torch_dtype="auto",
+  attn_implementation="flash_attention_2",
 )
 model.cuda()
 inputs = tokenizer("<fim_prefix>def fib(n):<fim_suffix>    else:\n        return fib(n - 2) + fib(n - 1)<fim_middle>", return_tensors="pt").to(model.device)
   "stabilityai/stable-code-3b",
   trust_remote_code=True,
   torch_dtype="auto",
+  attn_implementation="flash_attention_2",
 )
 model.cuda()
 inputs = tokenizer("import torch\nimport torch.nn as nn", return_tensors="pt").to(model.device)