!ModelConfig config_data: !DataConfig data_statistics: !DataStatistics average_len_target_per_bucket: - 6.011277314861252 - 10.39671010650862 - 18.504618481112797 - 26.041977712863865 - 32.48293857888395 - 41.60258525852575 - 51.3669121514993 - 59.76513060097686 - 68.25156398104271 - 75.67976141505562 - 83.09476309226905 - 21.1875 - 7.749999999999999 - 5.833333333333333 - 6.0 - 7.0 - 7.25 - 6.333333333333333 - 7.0 - 5.0 - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null - null buckets: - !!python/tuple - 8 - 8 - !!python/tuple - 16 - 16 - !!python/tuple - 24 - 24 - !!python/tuple - 32 - 32 - !!python/tuple - 40 - 40 - !!python/tuple - 48 - 48 - !!python/tuple - 56 - 56 - !!python/tuple - 64 - 64 - !!python/tuple - 72 - 72 - !!python/tuple - 80 - 80 - !!python/tuple - 88 - 88 - !!python/tuple - 96 - 96 - !!python/tuple - 104 - 104 - !!python/tuple - 112 - 112 - !!python/tuple - 120 - 120 - !!python/tuple - 128 - 128 - !!python/tuple - 136 - 136 - !!python/tuple - 144 - 144 - !!python/tuple - 152 - 152 - !!python/tuple - 160 - 160 - !!python/tuple - 168 - 168 - !!python/tuple - 176 - 176 - !!python/tuple - 184 - 184 - !!python/tuple - 192 - 192 - !!python/tuple - 200 - 200 - !!python/tuple - 208 - 208 - !!python/tuple - 216 - 216 - !!python/tuple - 224 - 224 - !!python/tuple - 232 - 232 - !!python/tuple - 240 - 240 - !!python/tuple - 248 - 248 - !!python/tuple - 256 - 256 - !!python/tuple - 264 - 264 - !!python/tuple - 272 - 272 - !!python/tuple - 280 - 280 - !!python/tuple - 288 - 288 - !!python/tuple - 296 - 296 - !!python/tuple - 304 - 304 - !!python/tuple - 312 - 312 - !!python/tuple - 320 - 320 - !!python/tuple - 328 - 328 - !!python/tuple - 336 - 336 - !!python/tuple - 344 - 344 - !!python/tuple - 352 - 352 - !!python/tuple - 360 - 360 - !!python/tuple - 368 - 368 - !!python/tuple - 376 - 376 - !!python/tuple - 384 - 384 - !!python/tuple - 392 - 392 - !!python/tuple - 400 - 400 - !!python/tuple - 408 - 408 - !!python/tuple - 416 - 416 - !!python/tuple - 424 - 424 - !!python/tuple - 432 - 432 - !!python/tuple - 440 - 440 - !!python/tuple - 448 - 448 - !!python/tuple - 456 - 456 - !!python/tuple - 464 - 464 - !!python/tuple - 472 - 472 - !!python/tuple - 480 - 480 - !!python/tuple - 488 - 488 - !!python/tuple - 496 - 496 - !!python/tuple - 504 - 504 - !!python/tuple - 512 - 512 - !!python/tuple - 513 - 513 length_ratio_mean: 1.4462363190719616 length_ratio_stats_per_bucket: - !!python/tuple - 1.13785261776882 - 0.30617192612461125 - !!python/tuple - 1.6532001104346803 - 0.6379238313847178 - !!python/tuple - 2.471066932954014 - 0.9542933578872858 - !!python/tuple - 2.9657919123449297 - 1.5793440289847793 - !!python/tuple - 3.096168086847911 - 2.441325976657657 - !!python/tuple - 3.268248874962792 - 2.2403195790511785 - !!python/tuple - 3.453956732311208 - 2.518686961607376 - !!python/tuple - 3.2456624469723585 - 2.2619507905455962 - !!python/tuple - 3.322785538319874 - 2.1624153548695157 - !!python/tuple - 3.489342008082259 - 2.469578333618206 - !!python/tuple - 3.278259980416589 - 1.6906831932492183 - !!python/tuple - 0.23045117210041233 - 0.26501940303452104 - !!python/tuple - 0.07818400556098729 - 0.027479091254378488 - !!python/tuple - 0.05445908159715249 - 0.013332869808672716 - !!python/tuple - 0.05042016806722689 - 0.0 - !!python/tuple - 0.05658536585365853 - 0.016585365853658534 - !!python/tuple - 0.05537665840439907 - 0.021182928316626336 - !!python/tuple - 0.04568795432892477 - 0.009015856562239487 - !!python/tuple - 0.047619047619047616 - 0.0 - !!python/tuple - 0.032679738562091505 - 0.0 - &id001 !!python/tuple - null - null - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 length_ratio_std: 0.8476478699398606 max_observed_len_source: 153 max_observed_len_target: 87 num_discarded: 0 num_sents: 779247 num_sents_per_bucket: - 478128 - 233503 - 27823 - 9243 - 4982 - 3636 - 3802 - 4709 - 5275 - 4862 - 3208 - 32 - 8 - 12 - 2 - 4 - 8 - 6 - 2 - 2 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 num_tokens_source: 5305413 num_tokens_target: 7842731 num_unks_source: 0 num_unks_target: 0 size_vocab_source: 45864 size_vocab_target: 656 eop_id: -1 max_seq_len_source: 513 max_seq_len_target: 513 num_source_factors: 1 num_target_factors: 5 config_decoder: !TransformerConfig act_type: relu attention_heads: 8 block_prepended_cross_attention: false decoder_type: transformer depth_key_value: 512 dropout_act: 0.2 dropout_attention: 0.2 dropout_prepost: 0.2 feed_forward_num_hidden: 2048 max_seq_len_source: 513 max_seq_len_target: 513 model_size: 512 num_layers: 6 positional_embedding_type: fixed postprocess_sequence: dr preprocess_sequence: n use_glu: false use_lhuc: false config_embed_source: !EmbeddingConfig allow_sparse_grad: false dropout: 0.5 factor_configs: null num_embed: 512 num_factors: 1 vocab_size: 45864 config_embed_target: !EmbeddingConfig allow_sparse_grad: false dropout: 0.5 factor_configs: - !FactorConfig combine: sum num_embed: 512 share_embedding: false vocab_size: 16 - !FactorConfig combine: sum num_embed: 512 share_embedding: false vocab_size: 24 - !FactorConfig combine: sum num_embed: 512 share_embedding: false vocab_size: 416 - !FactorConfig combine: sum num_embed: 512 share_embedding: false vocab_size: 744 num_embed: 512 num_factors: 5 vocab_size: 656 config_encoder: !TransformerConfig act_type: relu attention_heads: 8 block_prepended_cross_attention: false decoder_type: transformer depth_key_value: 512 dropout_act: 0.2 dropout_attention: 0.2 dropout_prepost: 0.2 feed_forward_num_hidden: 2048 max_seq_len_source: 513 max_seq_len_target: 513 model_size: 512 num_layers: 6 positional_embedding_type: fixed postprocess_sequence: dr preprocess_sequence: n use_glu: false use_lhuc: false config_length_task: null dtype: float32 lhuc: false neural_vocab_selection: null neural_vocab_selection_block_loss: false vocab_source_size: 45864 vocab_target_size: 656 weight_tying_type: trg_softmax