Zeph27 commited on
Commit
213c599
β€’
1 Parent(s): 96e8cef

update flash

Browse files
Files changed (2) hide show
  1. app.py +3 -0
  2. requirements.txt +1 -2
app.py CHANGED
@@ -5,6 +5,9 @@ from decord import VideoReader, cpu
5
  import os
6
  import spaces
7
 
 
 
 
8
  # Load the model and tokenizer
9
  model_name = "openbmb/MiniCPM-V-2_6-int4"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
5
  import os
6
  import spaces
7
 
8
+ import subprocess
9
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
+
11
  # Load the model and tokenizer
12
  model_name = "openbmb/MiniCPM-V-2_6-int4"
13
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
requirements.txt CHANGED
@@ -1,9 +1,8 @@
1
  Pillow==10.1.0
2
- torch==2.1.2+cu118
3
  torchvision==0.16.2
4
  transformers==4.40.0
5
  sentencepiece==0.1.99
6
  accelerate==0.30.1
7
  bitsandbytes==0.43.1
8
  decord
9
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu118torch1.12cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
1
  Pillow==10.1.0
2
+ torch==2.1.2
3
  torchvision==0.16.2
4
  transformers==4.40.0
5
  sentencepiece==0.1.99
6
  accelerate==0.30.1
7
  bitsandbytes==0.43.1
8
  decord