NeMo
English
nvidia
steerlm
llama3
reward model
File size: 3,357 Bytes
f736c15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c50c23
f736c15
 
 
 
 
 
 
 
7c50c23
f736c15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
mcore_gpt: true
micro_batch_size: 1
global_batch_size: 128
tensor_model_parallel_size: 8
pipeline_model_parallel_size: 2
virtual_pipeline_model_parallel_size: null
encoder_seq_length: 4096
max_position_embeddings: 4096
num_layers: 80
hidden_size: 8192
ffn_hidden_size: 28672
num_attention_heads: 64
init_method_std: 0.02
use_scaled_init_method: true
hidden_dropout: 0.0
attention_dropout: 0.0
ffn_dropout: 0.0
kv_channels: null
apply_query_key_layer_scaling: true
normalization: rmsnorm
layernorm_epsilon: 1.0e-05
do_layer_norm_weight_decay: false
make_vocab_size_divisible_by: 128
pre_process: true
post_process: true
persist_layer_norm: true
bias: false
activation: fast-swiglu
headscale: false
transformer_block_type: pre_ln
openai_gelu: false
normalize_attention_scores: true
position_embedding_type: rope
rotary_percentage: 1.0
attention_type: multihead
share_embeddings_and_output_weights: false
overlap_p2p_comm: false
batch_p2p_comm: true
num_query_groups: 8
tokenizer:
  library: huggingface
  type: meta-llama/Meta-Llama-3-70B
  use_fast: true
native_amp_init_scale: 4294967296
native_amp_growth_interval: 1000
hysteresis: 2
fp32_residual_connection: false
fp16_lm_cross_entropy: false
megatron_amp_O2: true
grad_allreduce_chunk_size_mb: 125
grad_div_ar_fusion: true
gradient_accumulation_fusion: false
bias_activation_fusion: false
bias_dropout_add_fusion: false
masked_softmax_fusion: true
get_attention_mask_from_fusion: true
apply_rope_fusion: false
seed: 1234
resume_from_checkpoint: null
use_cpu_initialization: false
onnx_safe: false
apex_transformer_log_level: 30
gradient_as_bucket_view: true
sync_batch_comm: false
activations_checkpoint_granularity: full
activations_checkpoint_method: uniform
activations_checkpoint_num_layers: 1
num_micro_batches_with_partial_activation_checkpoints: null
activations_checkpoint_layers_per_pipeline: null
sequence_parallel: false
transformer_engine: true
fp8: false
fp8_e4m3: false
fp8_hybrid: true
fp8_margin: 0
fp8_interval: 1
fp8_amax_history_len: 1024
fp8_amax_compute_algo: max
reduce_amax: true
use_emha: false
data:
  index_mapping_dir: null
  data_impl: jsonl
  splits_string: null
  seq_length: 4096
  skip_warmup: true
  num_workers: 2
  dataloader_type: single
  reset_position_ids: false
  reset_attention_mask: false
  eod_mask_loss: false
  validation_drop_last: true
  no_seqlen_plus_one_input_tokens: false
  pad_samples_to_global_batch_size: false
  shuffle_documents: true
  data_prefix:
    train:
    - /dataset/train_2_epochs_reg.jsonl
    validation:
    - /dataset/val_2_epochs_reg.jsonl
    test:
    - /dataset/val_2_epochs_reg.jsonl
nsys_profile:
  enabled: false
  start_step: 10
  end_step: 10
  ranks:
  - 0
  gen_shape: false
optim:
  name: distributed_fused_adam
  lr: 2.0e-06
  weight_decay: 0.1
  betas:
  - 0.9
  - 0.98
  sched:
    name: CosineAnnealing
    warmup_steps: 10
    constant_steps: 0
    min_lr: 2.0e-06
  bucket_cap_mb: 200
  overlap_grad_sync: false
  contiguous_grad_buffer: true
rotary_base: 500000.0
precision: bf16
reward_model_type: regression
regression:
  num_attributes: 9
  merge_attributes: false
  attribute_weights: null
  loss_mask_val: -100
output_sequence: false
use_avg_pool: false
force_head_dtype: float32
target: nemo_aligner.models.nlp.gpt.megatron_gpt_regression_reward_model.MegatronGPTRegressionRewardModel
nemo_version: 1.22.0