nvidia
/

Llama3-70B-SteerLM-Chat

Model card Files Files and versions Community

zhilinw commited on Jun 13, 2024

Commit

4e7b7a8

·

verified ·

1 Parent(s): 0eded9d

Update model_config.yaml

Files changed (1) hide show

model_config.yaml +2 -7

model_config.yaml CHANGED Viewed

@@ -40,11 +40,6 @@ num_query_groups: 8
 tokenizer:
   library: huggingface
   type: meta-llama/Meta-Llama-3-70B
-  model: /dataset/models/llama2-13b/llama-tokenizer.model
-  vocab_file: null
-  merge_file: null
-  tokenizer_model: /dataset/models/llama2-13b/llama-tokenizer.model
-  sentencepiece_legacy: false
   use_fast: true
 native_amp_init_scale: 4294967296
 native_amp_growth_interval: 1000
@@ -194,7 +189,7 @@ nsys_profile:
   gen_shape: false
 optim:
   name: distributed_fused_adam
-  lr: 1.03e-07
   weight_decay: 0.01
   betas:
   - 0.9
@@ -203,7 +198,7 @@ optim:
     name: CosineAnnealing
     warmup_steps: 1
     constant_steps: 300
-    min_lr: 1.02e-07
   bucket_cap_mb: 200
   overlap_grad_sync: false
   contiguous_grad_buffer: true

 tokenizer:
   library: huggingface
   type: meta-llama/Meta-Llama-3-70B
   use_fast: true
 native_amp_init_scale: 4294967296
 native_amp_growth_interval: 1000
   gen_shape: false
 optim:
   name: distributed_fused_adam
+  lr: 1e-07
   weight_decay: 0.01
   betas:
   - 0.9
     name: CosineAnnealing
     warmup_steps: 1
     constant_steps: 300
+    min_lr: 0.9999e-07
   bucket_cap_mb: 200
   overlap_grad_sync: false
   contiguous_grad_buffer: true