Spaces:

augmxnt
/

shisa

Runtime error

leonardlin commited on Dec 8, 2023

Commit

82d8190

•

1 Parent(s): 99d00ea

flash attention not happy camper

Files changed (2) hide show

app.py CHANGED Viewed

@@ -34,13 +34,13 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.bfloat16,
     device_map="auto",
     # load_in_8bit=True,
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type='nf4',
         bnb_4bit_use_double_quant=True,
         bnb_4bit_compute_dtype=torch.bfloat16
     ),
-    use_flash_attention_2=True
 )
 streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

     torch_dtype=torch.bfloat16,
     device_map="auto",
     # load_in_8bit=True,
+    load_in_4bit=True,
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type='nf4',
         bnb_4bit_use_double_quant=True,
         bnb_4bit_compute_dtype=torch.bfloat16
     ),
 )
 streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 accelerate
 bitsandbytes
-flash_attn
 gradio
 scipy
 transformers

 accelerate
 bitsandbytes
 gradio
 scipy
+torch
 transformers