iofu728 commited on
Commit
4a12906
1 Parent(s): 43a7079

Feature(MInference): chaning flash_attn

Browse files
Files changed (2) hide show
  1. app.py +8 -0
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import os
3
  import spaces
 
1
+ import subprocess
2
+ # Install flash attention, skipping CUDA build if necessary
3
+ subprocess.run(
4
+ "pip install flash-attn --no-build-isolation",
5
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
6
+ shell=True,
7
+ )
8
+
9
  import gradio as gr
10
  import os
11
  import spaces
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- flash_attn
2
  triton==2.1.0
3
  pycuda==2023.1
4
  accelerate
 
 
1
  triton==2.1.0
2
  pycuda==2023.1
3
  accelerate