Didier commited on
Commit
d4618a4
1 Parent(s): 5df3581

Loading in 8bits

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -17,16 +17,16 @@ import gradio as gr
17
  #
18
  model_name = "google/madlad400-10b-mt"
19
 
20
- quantization_config = BitsAndBytesConfig(
21
- load_in_4bit=True,
22
- bnb_4bit_compute_dtype=torch.float16,
23
- bnb_4bit_use_double_quant=True,
24
- bnb_4bit_quant_type="nf4"
25
- )
26
  #quantization_config = BitsAndBytesConfig(
27
- # load_in_8bit=True,
28
- # llm_int8_threshold=200.0 # https://discuss.huggingface.co/t/correct-usage-of-bitsandbytesconfig/33809/5
 
 
29
  #)
 
 
 
 
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
32
  model = AutoModelForSeq2SeqLM.from_pretrained(
 
17
  #
18
  model_name = "google/madlad400-10b-mt"
19
 
 
 
 
 
 
 
20
  #quantization_config = BitsAndBytesConfig(
21
+ # load_in_4bit=True,
22
+ # bnb_4bit_compute_dtype=torch.float16,
23
+ # bnb_4bit_use_double_quant=True,
24
+ # bnb_4bit_quant_type="nf4"
25
  #)
26
+ quantization_config = BitsAndBytesConfig(
27
+ load_in_8bit=True,
28
+ llm_int8_threshold=200.0 # https://discuss.huggingface.co/t/correct-usage-of-bitsandbytesconfig/33809/5
29
+ )
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
32
  model = AutoModelForSeq2SeqLM.from_pretrained(