|
!ModelConfig |
|
config_data: !DataConfig |
|
data_statistics: !DataStatistics |
|
average_len_target_per_bucket: |
|
- 2.0 |
|
- 4.08629215026444 |
|
- 4.545226437122044 |
|
- 4.607174400985307 |
|
- 4.87920905184137 |
|
- 4.982440502718691 |
|
- 5.27049292873817 |
|
- 5.803196211897 |
|
- 6.028941176470578 |
|
- 6.834719710669081 |
|
- 7.802691790040373 |
|
- 15.045793000744581 |
|
- 8.642659279778403 |
|
- 8.640957446808516 |
|
- 10.678657074340526 |
|
- 9.13441955193482 |
|
- 9.305970149253735 |
|
- 8.974063400576362 |
|
- 10.971887550200805 |
|
- 13.464285714285717 |
|
- 13.232323232323239 |
|
- 11.324468085106396 |
|
- 13.984732824427489 |
|
- 16.142276422764233 |
|
- 16.61032863849766 |
|
- 16.427509293680295 |
|
- 16.655589123867063 |
|
- 19.258675078864364 |
|
- 21.614285714285707 |
|
- 21.643258426966298 |
|
- 20.918478260869566 |
|
- 22.957871396895783 |
|
- 23.638766519823815 |
|
- 24.167002012072434 |
|
- 25.362318840579718 |
|
- 25.95375722543352 |
|
- 26.010489510489492 |
|
- 27.22000000000001 |
|
- 26.959999999999987 |
|
- 27.297697368421066 |
|
- 28.040998217468793 |
|
- 30.042830540037233 |
|
- 29.966735966735982 |
|
- 30.934859154929573 |
|
- 30.47868217054262 |
|
- 30.527777777777782 |
|
- 31.492779783393505 |
|
- 32.51171874999999 |
|
- 32.6358024691358 |
|
- 34.461538461538474 |
|
- 32.60769230769231 |
|
- 5.5 |
|
- 43.0 |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
buckets: |
|
- !!python/tuple |
|
- 8 |
|
- 8 |
|
- !!python/tuple |
|
- 16 |
|
- 16 |
|
- !!python/tuple |
|
- 24 |
|
- 24 |
|
- !!python/tuple |
|
- 32 |
|
- 32 |
|
- !!python/tuple |
|
- 40 |
|
- 40 |
|
- !!python/tuple |
|
- 48 |
|
- 48 |
|
- !!python/tuple |
|
- 56 |
|
- 56 |
|
- !!python/tuple |
|
- 64 |
|
- 64 |
|
- !!python/tuple |
|
- 72 |
|
- 72 |
|
- !!python/tuple |
|
- 80 |
|
- 80 |
|
- !!python/tuple |
|
- 88 |
|
- 88 |
|
- !!python/tuple |
|
- 96 |
|
- 96 |
|
- !!python/tuple |
|
- 104 |
|
- 104 |
|
- !!python/tuple |
|
- 112 |
|
- 112 |
|
- !!python/tuple |
|
- 120 |
|
- 120 |
|
- !!python/tuple |
|
- 128 |
|
- 128 |
|
- !!python/tuple |
|
- 136 |
|
- 129 |
|
- !!python/tuple |
|
- 144 |
|
- 129 |
|
- !!python/tuple |
|
- 152 |
|
- 129 |
|
- !!python/tuple |
|
- 160 |
|
- 129 |
|
- !!python/tuple |
|
- 168 |
|
- 129 |
|
- !!python/tuple |
|
- 176 |
|
- 129 |
|
- !!python/tuple |
|
- 184 |
|
- 129 |
|
- !!python/tuple |
|
- 192 |
|
- 129 |
|
- !!python/tuple |
|
- 200 |
|
- 129 |
|
- !!python/tuple |
|
- 208 |
|
- 129 |
|
- !!python/tuple |
|
- 216 |
|
- 129 |
|
- !!python/tuple |
|
- 224 |
|
- 129 |
|
- !!python/tuple |
|
- 232 |
|
- 129 |
|
- !!python/tuple |
|
- 240 |
|
- 129 |
|
- !!python/tuple |
|
- 248 |
|
- 129 |
|
- !!python/tuple |
|
- 256 |
|
- 129 |
|
- !!python/tuple |
|
- 264 |
|
- 129 |
|
- !!python/tuple |
|
- 272 |
|
- 129 |
|
- !!python/tuple |
|
- 280 |
|
- 129 |
|
- !!python/tuple |
|
- 288 |
|
- 129 |
|
- !!python/tuple |
|
- 296 |
|
- 129 |
|
- !!python/tuple |
|
- 304 |
|
- 129 |
|
- !!python/tuple |
|
- 312 |
|
- 129 |
|
- !!python/tuple |
|
- 320 |
|
- 129 |
|
- !!python/tuple |
|
- 328 |
|
- 129 |
|
- !!python/tuple |
|
- 336 |
|
- 129 |
|
- !!python/tuple |
|
- 344 |
|
- 129 |
|
- !!python/tuple |
|
- 352 |
|
- 129 |
|
- !!python/tuple |
|
- 360 |
|
- 129 |
|
- !!python/tuple |
|
- 368 |
|
- 129 |
|
- !!python/tuple |
|
- 376 |
|
- 129 |
|
- !!python/tuple |
|
- 384 |
|
- 129 |
|
- !!python/tuple |
|
- 392 |
|
- 129 |
|
- !!python/tuple |
|
- 400 |
|
- 129 |
|
- !!python/tuple |
|
- 408 |
|
- 129 |
|
- !!python/tuple |
|
- 416 |
|
- 129 |
|
- !!python/tuple |
|
- 424 |
|
- 129 |
|
- !!python/tuple |
|
- 432 |
|
- 129 |
|
- !!python/tuple |
|
- 440 |
|
- 129 |
|
- !!python/tuple |
|
- 448 |
|
- 129 |
|
- !!python/tuple |
|
- 456 |
|
- 129 |
|
- !!python/tuple |
|
- 464 |
|
- 129 |
|
- !!python/tuple |
|
- 472 |
|
- 129 |
|
- !!python/tuple |
|
- 480 |
|
- 129 |
|
- !!python/tuple |
|
- 488 |
|
- 129 |
|
- !!python/tuple |
|
- 496 |
|
- 129 |
|
- !!python/tuple |
|
- 504 |
|
- 129 |
|
- !!python/tuple |
|
- 512 |
|
- 129 |
|
- !!python/tuple |
|
- 513 |
|
- 129 |
|
length_ratio_mean: 0.16320710693441579 |
|
length_ratio_stats_per_bucket: |
|
- !!python/tuple |
|
- 0.3333333333333333 |
|
- 0.0 |
|
- !!python/tuple |
|
- 0.28246393697985434 |
|
- 0.17868752447804973 |
|
- !!python/tuple |
|
- 0.21840710265332788 |
|
- 0.1330505772378312 |
|
- !!python/tuple |
|
- 0.16560142798704922 |
|
- 0.09581195473826641 |
|
- !!python/tuple |
|
- 0.13801367492489092 |
|
- 0.12429965021659338 |
|
- !!python/tuple |
|
- 0.11863212215522084 |
|
- 0.1208393385452983 |
|
- !!python/tuple |
|
- 0.10151133866588294 |
|
- 0.11072333780515448 |
|
- !!python/tuple |
|
- 0.09838819717267734 |
|
- 0.12557601720946082 |
|
- !!python/tuple |
|
- 0.09394884997066442 |
|
- 0.16159177653077658 |
|
- !!python/tuple |
|
- 0.10402554625981722 |
|
- 0.26083679437294416 |
|
- !!python/tuple |
|
- 0.1012342945734544 |
|
- 0.20099512839826167 |
|
- !!python/tuple |
|
- 0.17281772320739658 |
|
- 0.257545103018524 |
|
- !!python/tuple |
|
- 0.10845391475564008 |
|
- 0.3239959561352876 |
|
- !!python/tuple |
|
- 0.09935073708696769 |
|
- 0.3059573403277105 |
|
- !!python/tuple |
|
- 0.12635011083619693 |
|
- 0.3907243857496131 |
|
- !!python/tuple |
|
- 0.08949294838769961 |
|
- 0.3144956594612652 |
|
- !!python/tuple |
|
- 0.07007331783529426 |
|
- 0.05774973922713284 |
|
- !!python/tuple |
|
- 0.06411727035132861 |
|
- 0.04726139664600602 |
|
- !!python/tuple |
|
- 0.07382609782015778 |
|
- 0.050521761108029695 |
|
- !!python/tuple |
|
- 0.08590157138956556 |
|
- 0.05569960282284095 |
|
- !!python/tuple |
|
- 0.08062786352616935 |
|
- 0.058790770283216505 |
|
- !!python/tuple |
|
- 0.06563222282256796 |
|
- 0.05450274064413921 |
|
- !!python/tuple |
|
- 0.07765999502184046 |
|
- 0.05057167990395854 |
|
- !!python/tuple |
|
- 0.08555873649959676 |
|
- 0.05960592960682603 |
|
- !!python/tuple |
|
- 0.08460026909745419 |
|
- 0.05238594583690578 |
|
- !!python/tuple |
|
- 0.08041439956489124 |
|
- 0.04792844419538253 |
|
- !!python/tuple |
|
- 0.07859928415542815 |
|
- 0.05072523580179588 |
|
- !!python/tuple |
|
- 0.08720935617277471 |
|
- 0.04382289790728185 |
|
- !!python/tuple |
|
- 0.0945785744419209 |
|
- 0.05412799726318098 |
|
- !!python/tuple |
|
- 0.09147639273741005 |
|
- 0.04623369327444139 |
|
- !!python/tuple |
|
- 0.08566937441195915 |
|
- 0.03729904156233976 |
|
- !!python/tuple |
|
- 0.09089250053653752 |
|
- 0.03997864088737986 |
|
- !!python/tuple |
|
- 0.09074467762227163 |
|
- 0.03321456251239067 |
|
- !!python/tuple |
|
- 0.09005889312946631 |
|
- 0.03317108879820214 |
|
- !!python/tuple |
|
- 0.09171046510420815 |
|
- 0.035431771966381115 |
|
- !!python/tuple |
|
- 0.09113688227781093 |
|
- 0.03302006652634936 |
|
- !!python/tuple |
|
- 0.08893255045731784 |
|
- 0.03625826248767216 |
|
- !!python/tuple |
|
- 0.09050986731593187 |
|
- 0.03584124694886162 |
|
- !!python/tuple |
|
- 0.08733945286928807 |
|
- 0.02963045027026122 |
|
- !!python/tuple |
|
- 0.08629641342788141 |
|
- 0.030504004395265606 |
|
- !!python/tuple |
|
- 0.08640318897032012 |
|
- 0.030675309542199148 |
|
- !!python/tuple |
|
- 0.09036990980396087 |
|
- 0.03191849333402471 |
|
- !!python/tuple |
|
- 0.08807433400728253 |
|
- 0.03290646726223996 |
|
- !!python/tuple |
|
- 0.08866806731559626 |
|
- 0.03728726453805084 |
|
- !!python/tuple |
|
- 0.08543343575292071 |
|
- 0.02718762541630789 |
|
- !!python/tuple |
|
- 0.08370713516111211 |
|
- 0.030215511135668078 |
|
- !!python/tuple |
|
- 0.08456814843330018 |
|
- 0.02546164231510412 |
|
- !!python/tuple |
|
- 0.08555768005742968 |
|
- 0.026243070552042298 |
|
- !!python/tuple |
|
- 0.08406679695149158 |
|
- 0.02438826052491033 |
|
- !!python/tuple |
|
- 0.08695308402142944 |
|
- 0.02926245130243095 |
|
- !!python/tuple |
|
- 0.08093989759976632 |
|
- 0.032639773078865474 |
|
- !!python/tuple |
|
- 0.013364278458885503 |
|
- 0.0011988283372310012 |
|
- !!python/tuple |
|
- 0.10311750599520383 |
|
- 0.007770672300249229 |
|
- &id001 !!python/tuple |
|
- null |
|
- null |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
length_ratio_std: 0.1371393774100467 |
|
max_observed_len_source: 417 |
|
max_observed_len_target: 128 |
|
num_discarded: 20 |
|
num_sents: 354707 |
|
num_sents_per_bucket: |
|
- 1 |
|
- 29122 |
|
- 57389 |
|
- 123411 |
|
- 36766 |
|
- 44876 |
|
- 21849 |
|
- 6758 |
|
- 8500 |
|
- 2765 |
|
- 3715 |
|
- 2686 |
|
- 722 |
|
- 752 |
|
- 417 |
|
- 491 |
|
- 402 |
|
- 347 |
|
- 249 |
|
- 196 |
|
- 297 |
|
- 376 |
|
- 262 |
|
- 246 |
|
- 213 |
|
- 269 |
|
- 331 |
|
- 317 |
|
- 350 |
|
- 356 |
|
- 368 |
|
- 451 |
|
- 454 |
|
- 497 |
|
- 483 |
|
- 519 |
|
- 572 |
|
- 500 |
|
- 550 |
|
- 608 |
|
- 561 |
|
- 537 |
|
- 481 |
|
- 568 |
|
- 516 |
|
- 468 |
|
- 554 |
|
- 512 |
|
- 486 |
|
- 455 |
|
- 130 |
|
- 2 |
|
- 4 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
num_tokens_source: 15626084 |
|
num_tokens_target: 2020240 |
|
num_unks_source: 1 |
|
num_unks_target: 116 |
|
size_vocab_source: 1232 |
|
size_vocab_target: 5976 |
|
eop_id: -1 |
|
max_seq_len_source: 513 |
|
max_seq_len_target: 129 |
|
num_source_factors: 1 |
|
num_target_factors: 1 |
|
config_decoder: !TransformerConfig |
|
act_type: relu |
|
attention_heads: 8 |
|
block_prepended_cross_attention: false |
|
decoder_type: transformer |
|
depth_key_value: 512 |
|
dropout_act: 0.1 |
|
dropout_attention: 0.1 |
|
dropout_prepost: 0.1 |
|
feed_forward_num_hidden: 2048 |
|
max_seq_len_source: 513 |
|
max_seq_len_target: 129 |
|
model_size: 512 |
|
num_layers: 6 |
|
positional_embedding_type: fixed |
|
postprocess_sequence: dr |
|
preprocess_sequence: n |
|
use_glu: false |
|
use_lhuc: false |
|
config_embed_source: !EmbeddingConfig |
|
allow_sparse_grad: false |
|
dropout: 0.0 |
|
factor_configs: null |
|
num_embed: 512 |
|
num_factors: 1 |
|
vocab_size: 1232 |
|
config_embed_target: !EmbeddingConfig |
|
allow_sparse_grad: false |
|
dropout: 0.0 |
|
factor_configs: null |
|
num_embed: 512 |
|
num_factors: 1 |
|
vocab_size: 5976 |
|
config_encoder: !TransformerConfig |
|
act_type: relu |
|
attention_heads: 8 |
|
block_prepended_cross_attention: false |
|
decoder_type: transformer |
|
depth_key_value: 512 |
|
dropout_act: 0.1 |
|
dropout_attention: 0.1 |
|
dropout_prepost: 0.1 |
|
feed_forward_num_hidden: 2048 |
|
max_seq_len_source: 513 |
|
max_seq_len_target: 129 |
|
model_size: 512 |
|
num_layers: 6 |
|
positional_embedding_type: fixed |
|
postprocess_sequence: dr |
|
preprocess_sequence: n |
|
use_glu: false |
|
use_lhuc: false |
|
config_length_task: null |
|
dtype: float32 |
|
lhuc: false |
|
neural_vocab_selection: null |
|
neural_vocab_selection_block_loss: false |
|
vocab_source_size: 1232 |
|
vocab_target_size: 5976 |
|
weight_tying_type: none |
|
|