eswardivi commited on
Commit
51153f0
1 Parent(s): 4828909

updated with Flashattention

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -16,6 +16,7 @@ token = os.environ["HF_TOKEN"]
16
 
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
 
19
  "microsoft/Phi-3-mini-128k-instruct", token=token,trust_remote_code=True
20
  )
21
  tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)
 
16
 
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
+ use_cache=False,attn_implementation="flash_attention_2",
20
  "microsoft/Phi-3-mini-128k-instruct", token=token,trust_remote_code=True
21
  )
22
  tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)