sign
/

AmitMY's picture
Upload config with huggingface_hub
7254477
!ModelConfig
config_data: !DataConfig
data_statistics: !DataStatistics
average_len_target_per_bucket:
- 2.0
- 4.08629215026444
- 4.545226437122044
- 4.607174400985307
- 4.87920905184137
- 4.982440502718691
- 5.27049292873817
- 5.803196211897
- 6.028941176470578
- 6.834719710669081
- 7.802691790040373
- 15.045793000744581
- 8.642659279778403
- 8.640957446808516
- 10.678657074340526
- 9.13441955193482
- 9.305970149253735
- 8.974063400576362
- 10.971887550200805
- 13.464285714285717
- 13.232323232323239
- 11.324468085106396
- 13.984732824427489
- 16.142276422764233
- 16.61032863849766
- 16.427509293680295
- 16.655589123867063
- 19.258675078864364
- 21.614285714285707
- 21.643258426966298
- 20.918478260869566
- 22.957871396895783
- 23.638766519823815
- 24.167002012072434
- 25.362318840579718
- 25.95375722543352
- 26.010489510489492
- 27.22000000000001
- 26.959999999999987
- 27.297697368421066
- 28.040998217468793
- 30.042830540037233
- 29.966735966735982
- 30.934859154929573
- 30.47868217054262
- 30.527777777777782
- 31.492779783393505
- 32.51171874999999
- 32.6358024691358
- 34.461538461538474
- 32.60769230769231
- 5.5
- 43.0
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
buckets:
- !!python/tuple
- 8
- 8
- !!python/tuple
- 16
- 16
- !!python/tuple
- 24
- 24
- !!python/tuple
- 32
- 32
- !!python/tuple
- 40
- 40
- !!python/tuple
- 48
- 48
- !!python/tuple
- 56
- 56
- !!python/tuple
- 64
- 64
- !!python/tuple
- 72
- 72
- !!python/tuple
- 80
- 80
- !!python/tuple
- 88
- 88
- !!python/tuple
- 96
- 96
- !!python/tuple
- 104
- 104
- !!python/tuple
- 112
- 112
- !!python/tuple
- 120
- 120
- !!python/tuple
- 128
- 128
- !!python/tuple
- 136
- 129
- !!python/tuple
- 144
- 129
- !!python/tuple
- 152
- 129
- !!python/tuple
- 160
- 129
- !!python/tuple
- 168
- 129
- !!python/tuple
- 176
- 129
- !!python/tuple
- 184
- 129
- !!python/tuple
- 192
- 129
- !!python/tuple
- 200
- 129
- !!python/tuple
- 208
- 129
- !!python/tuple
- 216
- 129
- !!python/tuple
- 224
- 129
- !!python/tuple
- 232
- 129
- !!python/tuple
- 240
- 129
- !!python/tuple
- 248
- 129
- !!python/tuple
- 256
- 129
- !!python/tuple
- 264
- 129
- !!python/tuple
- 272
- 129
- !!python/tuple
- 280
- 129
- !!python/tuple
- 288
- 129
- !!python/tuple
- 296
- 129
- !!python/tuple
- 304
- 129
- !!python/tuple
- 312
- 129
- !!python/tuple
- 320
- 129
- !!python/tuple
- 328
- 129
- !!python/tuple
- 336
- 129
- !!python/tuple
- 344
- 129
- !!python/tuple
- 352
- 129
- !!python/tuple
- 360
- 129
- !!python/tuple
- 368
- 129
- !!python/tuple
- 376
- 129
- !!python/tuple
- 384
- 129
- !!python/tuple
- 392
- 129
- !!python/tuple
- 400
- 129
- !!python/tuple
- 408
- 129
- !!python/tuple
- 416
- 129
- !!python/tuple
- 424
- 129
- !!python/tuple
- 432
- 129
- !!python/tuple
- 440
- 129
- !!python/tuple
- 448
- 129
- !!python/tuple
- 456
- 129
- !!python/tuple
- 464
- 129
- !!python/tuple
- 472
- 129
- !!python/tuple
- 480
- 129
- !!python/tuple
- 488
- 129
- !!python/tuple
- 496
- 129
- !!python/tuple
- 504
- 129
- !!python/tuple
- 512
- 129
- !!python/tuple
- 513
- 129
length_ratio_mean: 0.16320710693441579
length_ratio_stats_per_bucket:
- !!python/tuple
- 0.3333333333333333
- 0.0
- !!python/tuple
- 0.28246393697985434
- 0.17868752447804973
- !!python/tuple
- 0.21840710265332788
- 0.1330505772378312
- !!python/tuple
- 0.16560142798704922
- 0.09581195473826641
- !!python/tuple
- 0.13801367492489092
- 0.12429965021659338
- !!python/tuple
- 0.11863212215522084
- 0.1208393385452983
- !!python/tuple
- 0.10151133866588294
- 0.11072333780515448
- !!python/tuple
- 0.09838819717267734
- 0.12557601720946082
- !!python/tuple
- 0.09394884997066442
- 0.16159177653077658
- !!python/tuple
- 0.10402554625981722
- 0.26083679437294416
- !!python/tuple
- 0.1012342945734544
- 0.20099512839826167
- !!python/tuple
- 0.17281772320739658
- 0.257545103018524
- !!python/tuple
- 0.10845391475564008
- 0.3239959561352876
- !!python/tuple
- 0.09935073708696769
- 0.3059573403277105
- !!python/tuple
- 0.12635011083619693
- 0.3907243857496131
- !!python/tuple
- 0.08949294838769961
- 0.3144956594612652
- !!python/tuple
- 0.07007331783529426
- 0.05774973922713284
- !!python/tuple
- 0.06411727035132861
- 0.04726139664600602
- !!python/tuple
- 0.07382609782015778
- 0.050521761108029695
- !!python/tuple
- 0.08590157138956556
- 0.05569960282284095
- !!python/tuple
- 0.08062786352616935
- 0.058790770283216505
- !!python/tuple
- 0.06563222282256796
- 0.05450274064413921
- !!python/tuple
- 0.07765999502184046
- 0.05057167990395854
- !!python/tuple
- 0.08555873649959676
- 0.05960592960682603
- !!python/tuple
- 0.08460026909745419
- 0.05238594583690578
- !!python/tuple
- 0.08041439956489124
- 0.04792844419538253
- !!python/tuple
- 0.07859928415542815
- 0.05072523580179588
- !!python/tuple
- 0.08720935617277471
- 0.04382289790728185
- !!python/tuple
- 0.0945785744419209
- 0.05412799726318098
- !!python/tuple
- 0.09147639273741005
- 0.04623369327444139
- !!python/tuple
- 0.08566937441195915
- 0.03729904156233976
- !!python/tuple
- 0.09089250053653752
- 0.03997864088737986
- !!python/tuple
- 0.09074467762227163
- 0.03321456251239067
- !!python/tuple
- 0.09005889312946631
- 0.03317108879820214
- !!python/tuple
- 0.09171046510420815
- 0.035431771966381115
- !!python/tuple
- 0.09113688227781093
- 0.03302006652634936
- !!python/tuple
- 0.08893255045731784
- 0.03625826248767216
- !!python/tuple
- 0.09050986731593187
- 0.03584124694886162
- !!python/tuple
- 0.08733945286928807
- 0.02963045027026122
- !!python/tuple
- 0.08629641342788141
- 0.030504004395265606
- !!python/tuple
- 0.08640318897032012
- 0.030675309542199148
- !!python/tuple
- 0.09036990980396087
- 0.03191849333402471
- !!python/tuple
- 0.08807433400728253
- 0.03290646726223996
- !!python/tuple
- 0.08866806731559626
- 0.03728726453805084
- !!python/tuple
- 0.08543343575292071
- 0.02718762541630789
- !!python/tuple
- 0.08370713516111211
- 0.030215511135668078
- !!python/tuple
- 0.08456814843330018
- 0.02546164231510412
- !!python/tuple
- 0.08555768005742968
- 0.026243070552042298
- !!python/tuple
- 0.08406679695149158
- 0.02438826052491033
- !!python/tuple
- 0.08695308402142944
- 0.02926245130243095
- !!python/tuple
- 0.08093989759976632
- 0.032639773078865474
- !!python/tuple
- 0.013364278458885503
- 0.0011988283372310012
- !!python/tuple
- 0.10311750599520383
- 0.007770672300249229
- &id001 !!python/tuple
- null
- null
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
length_ratio_std: 0.1371393774100467
max_observed_len_source: 417
max_observed_len_target: 128
num_discarded: 20
num_sents: 354707
num_sents_per_bucket:
- 1
- 29122
- 57389
- 123411
- 36766
- 44876
- 21849
- 6758
- 8500
- 2765
- 3715
- 2686
- 722
- 752
- 417
- 491
- 402
- 347
- 249
- 196
- 297
- 376
- 262
- 246
- 213
- 269
- 331
- 317
- 350
- 356
- 368
- 451
- 454
- 497
- 483
- 519
- 572
- 500
- 550
- 608
- 561
- 537
- 481
- 568
- 516
- 468
- 554
- 512
- 486
- 455
- 130
- 2
- 4
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
num_tokens_source: 15626084
num_tokens_target: 2020240
num_unks_source: 1
num_unks_target: 116
size_vocab_source: 1232
size_vocab_target: 5976
eop_id: -1
max_seq_len_source: 513
max_seq_len_target: 129
num_source_factors: 1
num_target_factors: 1
config_decoder: !TransformerConfig
act_type: relu
attention_heads: 8
block_prepended_cross_attention: false
decoder_type: transformer
depth_key_value: 512
dropout_act: 0.1
dropout_attention: 0.1
dropout_prepost: 0.1
feed_forward_num_hidden: 2048
max_seq_len_source: 513
max_seq_len_target: 129
model_size: 512
num_layers: 6
positional_embedding_type: fixed
postprocess_sequence: dr
preprocess_sequence: n
use_glu: false
use_lhuc: false
config_embed_source: !EmbeddingConfig
allow_sparse_grad: false
dropout: 0.0
factor_configs: null
num_embed: 512
num_factors: 1
vocab_size: 1232
config_embed_target: !EmbeddingConfig
allow_sparse_grad: false
dropout: 0.0
factor_configs: null
num_embed: 512
num_factors: 1
vocab_size: 5976
config_encoder: !TransformerConfig
act_type: relu
attention_heads: 8
block_prepended_cross_attention: false
decoder_type: transformer
depth_key_value: 512
dropout_act: 0.1
dropout_attention: 0.1
dropout_prepost: 0.1
feed_forward_num_hidden: 2048
max_seq_len_source: 513
max_seq_len_target: 129
model_size: 512
num_layers: 6
positional_embedding_type: fixed
postprocess_sequence: dr
preprocess_sequence: n
use_glu: false
use_lhuc: false
config_length_task: null
dtype: float32
lhuc: false
neural_vocab_selection: null
neural_vocab_selection_block_loss: false
vocab_source_size: 1232
vocab_target_size: 5976
weight_tying_type: none