micro_batch_size: 24 global_batch_size: 1920 tensor_model_parallel_size: 2 pipeline_model_parallel_size: 1 resume_from_checkpoint: null pipeline_model_parallel_split_rank: 0 make_vocab_size_divisible_by: 128 pre_process: true post_process: true megatron_amp_O2: true seq_length: 512 max_position_embeddings: 512 num_layers: 24 hidden_size: 2048 ffn_hidden_size: 5120 num_attention_heads: 32 init_method_std: 0.015 hidden_dropout: 0.1 attention_dropout: 0.1 kv_channels: 64 apply_query_key_layer_scaling: true layernorm_epsilon: 1.0e-05 persist_layer_norm: true gradient_as_bucket_view: true bias_gelu_fusion: false masked_softmax_fusion: true encoder_arch: transformer decoder_arch: transformer activation: geglu tokenizer: library: sentencepiece type: null model: nemo:d55283aced7944109f3cf68d9452e73b_mt5_tokenizer.model vocab_file: null merge_file: null num_sentinel_tokens: 100 native_amp_init_scale: 4294967296 native_amp_growth_interval: 1000 fp32_residual_connection: false fp16_lm_cross_entropy: false seed: 1234 use_cpu_initialization: false onnx_safe: false apex_transformer_log_level: 30 activations_checkpoint_method: null activations_checkpoint_num_layers: 1 data: data_prefix: - 0.056224 - /preproc_data/mc4_ja_mt5_tokenizer_text_document - 0.064717 - /preproc_data/mc4_en_mt5_tokenizer_text_document - 0.055394 - /preproc_data/mc4_it_mt5_tokenizer_text_document - 0.006129 - /preproc_data/mc4_lv_mt5_tokenizer_text_document - 0.156199 - /preproc_data/mc4_ru_mt5_tokenizer_text_document - 0.02047 - /preproc_data/mc4_hu_mt5_tokenizer_text_document - 0.020264 - /preproc_data/mc4_zh_mt5_tokenizer_text_document - 0.047618 - /preproc_data/mc4_pl_mt5_tokenizer_text_document - 0.021716 - /preproc_data/mc4_el_mt5_tokenizer_text_document - 0.094469 - /preproc_data/mc4_de_mt5_tokenizer_text_document - 0.028565 - /preproc_data/mc4_cs_mt5_tokenizer_text_document - 0.015286 - /preproc_data/mc4_ko_mt5_tokenizer_text_document - 0.014667 - /preproc_data/mc4_hi_mt5_tokenizer_text_document - 0.015717 - /preproc_data/mc4_no_mt5_tokenizer_text_document - 0.016761 - /preproc_data/mc4_da_mt5_tokenizer_text_document - 0.011884 - /preproc_data/mc4_sk_mt5_tokenizer_text_document - 0.088899 - /preproc_data/mc4_fr_mt5_tokenizer_text_document - 0.051519 - /preproc_data/mc4_pt_mt5_tokenizer_text_document - 0.008662 - /preproc_data/mc4_lt_mt5_tokenizer_text_document - 0.110217 - /preproc_data/mc4_es_mt5_tokenizer_text_document - 0.031769 - /preproc_data/mc4_nl_mt5_tokenizer_text_document - 0.022698 - /preproc_data/mc4_sv_mt5_tokenizer_text_document - 0.025119 - /preproc_data/mc4_ro_mt5_tokenizer_text_document - 0.015036 - /preproc_data/mc4_fi_mt5_tokenizer_text_document index_mapping_dir: null data_impl: mmap splits_string: 99892,99,9 seq_length: 512 seq_length_dec: 128 skip_warmup: true num_workers: 8 dataloader_type: single masked_lm_prob: 0.15 dataset_type: t5 short_seq_prob: 0.0 max_ngram_size: 10 mean_ngram_size: null geometric_dist: true permutation: false whole_word_masking: false favor_longer_ngrams: false optim: name: fused_adam lr: 0.0001 betas: - 0.9 - 0.999 eps: 1.0e-08 weight_decay: 0.01 sched: name: WarmupAnnealing min_lr: 1.0e-05 last_epoch: -1 warmup_ratio: 0.01 precision: bf16 target: nemo.collections.nlp.models.language_modeling.megatron_t5_model.MegatronT5Model nemo_version: 1.9.0rc0