Update 4bit notes
Browse files
README.md
CHANGED
@@ -136,7 +136,7 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
|
|
136 |
|
137 |
- loading
|
138 |
```yaml
|
139 |
-
|
140 |
load_in_8bit: true
|
141 |
bf16: true # require >=ampere
|
142 |
fp16: true
|
@@ -175,13 +175,15 @@ tokenizer_type: AutoTokenizer
|
|
175 |
# Trust remote code for untrusted source
|
176 |
trust_remote_code:
|
177 |
|
178 |
-
# whether you are training a 4-bit quantized model
|
179 |
load_4bit: true
|
180 |
gptq_groupsize: 128 # group size
|
181 |
gptq_model_v1: false # v1 or v2
|
182 |
|
183 |
# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
184 |
load_in_8bit: true
|
|
|
|
|
185 |
|
186 |
# Use CUDA bf16
|
187 |
bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
|
|
|
136 |
|
137 |
- loading
|
138 |
```yaml
|
139 |
+
load_in_4bit: true
|
140 |
load_in_8bit: true
|
141 |
bf16: true # require >=ampere
|
142 |
fp16: true
|
|
|
175 |
# Trust remote code for untrusted source
|
176 |
trust_remote_code:
|
177 |
|
178 |
+
# whether you are training a 4-bit GPTQ quantized model
|
179 |
load_4bit: true
|
180 |
gptq_groupsize: 128 # group size
|
181 |
gptq_model_v1: false # v1 or v2
|
182 |
|
183 |
# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
184 |
load_in_8bit: true
|
185 |
+
# use bitsandbytes 4 bit
|
186 |
+
load_in_4bit:
|
187 |
|
188 |
# Use CUDA bf16
|
189 |
bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
|