leonardlin commited on
Commit
82d8190
1 Parent(s): 99d00ea

flash attention not happy camper

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. requirements.txt +1 -1
app.py CHANGED
@@ -34,13 +34,13 @@ model = AutoModelForCausalLM.from_pretrained(
34
  torch_dtype=torch.bfloat16,
35
  device_map="auto",
36
  # load_in_8bit=True,
 
37
  quantization_config = BitsAndBytesConfig(
38
  load_in_4bit=True,
39
  bnb_4bit_quant_type='nf4',
40
  bnb_4bit_use_double_quant=True,
41
  bnb_4bit_compute_dtype=torch.bfloat16
42
  ),
43
- use_flash_attention_2=True
44
  )
45
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
46
 
 
34
  torch_dtype=torch.bfloat16,
35
  device_map="auto",
36
  # load_in_8bit=True,
37
+ load_in_4bit=True,
38
  quantization_config = BitsAndBytesConfig(
39
  load_in_4bit=True,
40
  bnb_4bit_quant_type='nf4',
41
  bnb_4bit_use_double_quant=True,
42
  bnb_4bit_compute_dtype=torch.bfloat16
43
  ),
 
44
  )
45
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
46
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  accelerate
2
  bitsandbytes
3
- flash_attn
4
  gradio
5
  scipy
 
6
  transformers
 
1
  accelerate
2
  bitsandbytes
 
3
  gradio
4
  scipy
5
+ torch
6
  transformers