minhdang commited on
Commit
e4738ca
1 Parent(s): a57ae7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -7,6 +7,15 @@ import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
  # test
 
 
 
 
 
 
 
 
 
10
  MAX_MAX_NEW_TOKENS = 2048
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  total_count=0
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
  # test
10
+ import os
11
+ import subprocess
12
+
13
+ # Install flash attention
14
+ subprocess.run(
15
+ "pip install flash-attn --no-build-isolation",
16
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
17
+ shell=True,
18
+ )
19
  MAX_MAX_NEW_TOKENS = 2048
20
  DEFAULT_MAX_NEW_TOKENS = 1024
21
  total_count=0