Update model_config.yaml
Browse files- model_config.yaml +2 -7
model_config.yaml
CHANGED
@@ -40,11 +40,6 @@ num_query_groups: 8
|
|
40 |
tokenizer:
|
41 |
library: huggingface
|
42 |
type: meta-llama/Meta-Llama-3-70B
|
43 |
-
model: /dataset/models/llama2-13b/llama-tokenizer.model
|
44 |
-
vocab_file: null
|
45 |
-
merge_file: null
|
46 |
-
tokenizer_model: /dataset/models/llama2-13b/llama-tokenizer.model
|
47 |
-
sentencepiece_legacy: false
|
48 |
use_fast: true
|
49 |
native_amp_init_scale: 4294967296
|
50 |
native_amp_growth_interval: 1000
|
@@ -194,7 +189,7 @@ nsys_profile:
|
|
194 |
gen_shape: false
|
195 |
optim:
|
196 |
name: distributed_fused_adam
|
197 |
-
lr:
|
198 |
weight_decay: 0.01
|
199 |
betas:
|
200 |
- 0.9
|
@@ -203,7 +198,7 @@ optim:
|
|
203 |
name: CosineAnnealing
|
204 |
warmup_steps: 1
|
205 |
constant_steps: 300
|
206 |
-
min_lr:
|
207 |
bucket_cap_mb: 200
|
208 |
overlap_grad_sync: false
|
209 |
contiguous_grad_buffer: true
|
|
|
40 |
tokenizer:
|
41 |
library: huggingface
|
42 |
type: meta-llama/Meta-Llama-3-70B
|
|
|
|
|
|
|
|
|
|
|
43 |
use_fast: true
|
44 |
native_amp_init_scale: 4294967296
|
45 |
native_amp_growth_interval: 1000
|
|
|
189 |
gen_shape: false
|
190 |
optim:
|
191 |
name: distributed_fused_adam
|
192 |
+
lr: 1e-07
|
193 |
weight_decay: 0.01
|
194 |
betas:
|
195 |
- 0.9
|
|
|
198 |
name: CosineAnnealing
|
199 |
warmup_steps: 1
|
200 |
constant_steps: 300
|
201 |
+
min_lr: 0.9999e-07
|
202 |
bucket_cap_mb: 200
|
203 |
overlap_grad_sync: false
|
204 |
contiguous_grad_buffer: true
|