sign
/

AmitMY commited on
Commit
7254477
·
1 Parent(s): 4d1b0cf

Upload config with huggingface_hub

Browse files
Files changed (1) hide show
  1. config +582 -0
config ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !ModelConfig
2
+ config_data: !DataConfig
3
+ data_statistics: !DataStatistics
4
+ average_len_target_per_bucket:
5
+ - 2.0
6
+ - 4.08629215026444
7
+ - 4.545226437122044
8
+ - 4.607174400985307
9
+ - 4.87920905184137
10
+ - 4.982440502718691
11
+ - 5.27049292873817
12
+ - 5.803196211897
13
+ - 6.028941176470578
14
+ - 6.834719710669081
15
+ - 7.802691790040373
16
+ - 15.045793000744581
17
+ - 8.642659279778403
18
+ - 8.640957446808516
19
+ - 10.678657074340526
20
+ - 9.13441955193482
21
+ - 9.305970149253735
22
+ - 8.974063400576362
23
+ - 10.971887550200805
24
+ - 13.464285714285717
25
+ - 13.232323232323239
26
+ - 11.324468085106396
27
+ - 13.984732824427489
28
+ - 16.142276422764233
29
+ - 16.61032863849766
30
+ - 16.427509293680295
31
+ - 16.655589123867063
32
+ - 19.258675078864364
33
+ - 21.614285714285707
34
+ - 21.643258426966298
35
+ - 20.918478260869566
36
+ - 22.957871396895783
37
+ - 23.638766519823815
38
+ - 24.167002012072434
39
+ - 25.362318840579718
40
+ - 25.95375722543352
41
+ - 26.010489510489492
42
+ - 27.22000000000001
43
+ - 26.959999999999987
44
+ - 27.297697368421066
45
+ - 28.040998217468793
46
+ - 30.042830540037233
47
+ - 29.966735966735982
48
+ - 30.934859154929573
49
+ - 30.47868217054262
50
+ - 30.527777777777782
51
+ - 31.492779783393505
52
+ - 32.51171874999999
53
+ - 32.6358024691358
54
+ - 34.461538461538474
55
+ - 32.60769230769231
56
+ - 5.5
57
+ - 43.0
58
+ - null
59
+ - null
60
+ - null
61
+ - null
62
+ - null
63
+ - null
64
+ - null
65
+ - null
66
+ - null
67
+ - null
68
+ - null
69
+ - null
70
+ buckets:
71
+ - !!python/tuple
72
+ - 8
73
+ - 8
74
+ - !!python/tuple
75
+ - 16
76
+ - 16
77
+ - !!python/tuple
78
+ - 24
79
+ - 24
80
+ - !!python/tuple
81
+ - 32
82
+ - 32
83
+ - !!python/tuple
84
+ - 40
85
+ - 40
86
+ - !!python/tuple
87
+ - 48
88
+ - 48
89
+ - !!python/tuple
90
+ - 56
91
+ - 56
92
+ - !!python/tuple
93
+ - 64
94
+ - 64
95
+ - !!python/tuple
96
+ - 72
97
+ - 72
98
+ - !!python/tuple
99
+ - 80
100
+ - 80
101
+ - !!python/tuple
102
+ - 88
103
+ - 88
104
+ - !!python/tuple
105
+ - 96
106
+ - 96
107
+ - !!python/tuple
108
+ - 104
109
+ - 104
110
+ - !!python/tuple
111
+ - 112
112
+ - 112
113
+ - !!python/tuple
114
+ - 120
115
+ - 120
116
+ - !!python/tuple
117
+ - 128
118
+ - 128
119
+ - !!python/tuple
120
+ - 136
121
+ - 129
122
+ - !!python/tuple
123
+ - 144
124
+ - 129
125
+ - !!python/tuple
126
+ - 152
127
+ - 129
128
+ - !!python/tuple
129
+ - 160
130
+ - 129
131
+ - !!python/tuple
132
+ - 168
133
+ - 129
134
+ - !!python/tuple
135
+ - 176
136
+ - 129
137
+ - !!python/tuple
138
+ - 184
139
+ - 129
140
+ - !!python/tuple
141
+ - 192
142
+ - 129
143
+ - !!python/tuple
144
+ - 200
145
+ - 129
146
+ - !!python/tuple
147
+ - 208
148
+ - 129
149
+ - !!python/tuple
150
+ - 216
151
+ - 129
152
+ - !!python/tuple
153
+ - 224
154
+ - 129
155
+ - !!python/tuple
156
+ - 232
157
+ - 129
158
+ - !!python/tuple
159
+ - 240
160
+ - 129
161
+ - !!python/tuple
162
+ - 248
163
+ - 129
164
+ - !!python/tuple
165
+ - 256
166
+ - 129
167
+ - !!python/tuple
168
+ - 264
169
+ - 129
170
+ - !!python/tuple
171
+ - 272
172
+ - 129
173
+ - !!python/tuple
174
+ - 280
175
+ - 129
176
+ - !!python/tuple
177
+ - 288
178
+ - 129
179
+ - !!python/tuple
180
+ - 296
181
+ - 129
182
+ - !!python/tuple
183
+ - 304
184
+ - 129
185
+ - !!python/tuple
186
+ - 312
187
+ - 129
188
+ - !!python/tuple
189
+ - 320
190
+ - 129
191
+ - !!python/tuple
192
+ - 328
193
+ - 129
194
+ - !!python/tuple
195
+ - 336
196
+ - 129
197
+ - !!python/tuple
198
+ - 344
199
+ - 129
200
+ - !!python/tuple
201
+ - 352
202
+ - 129
203
+ - !!python/tuple
204
+ - 360
205
+ - 129
206
+ - !!python/tuple
207
+ - 368
208
+ - 129
209
+ - !!python/tuple
210
+ - 376
211
+ - 129
212
+ - !!python/tuple
213
+ - 384
214
+ - 129
215
+ - !!python/tuple
216
+ - 392
217
+ - 129
218
+ - !!python/tuple
219
+ - 400
220
+ - 129
221
+ - !!python/tuple
222
+ - 408
223
+ - 129
224
+ - !!python/tuple
225
+ - 416
226
+ - 129
227
+ - !!python/tuple
228
+ - 424
229
+ - 129
230
+ - !!python/tuple
231
+ - 432
232
+ - 129
233
+ - !!python/tuple
234
+ - 440
235
+ - 129
236
+ - !!python/tuple
237
+ - 448
238
+ - 129
239
+ - !!python/tuple
240
+ - 456
241
+ - 129
242
+ - !!python/tuple
243
+ - 464
244
+ - 129
245
+ - !!python/tuple
246
+ - 472
247
+ - 129
248
+ - !!python/tuple
249
+ - 480
250
+ - 129
251
+ - !!python/tuple
252
+ - 488
253
+ - 129
254
+ - !!python/tuple
255
+ - 496
256
+ - 129
257
+ - !!python/tuple
258
+ - 504
259
+ - 129
260
+ - !!python/tuple
261
+ - 512
262
+ - 129
263
+ - !!python/tuple
264
+ - 513
265
+ - 129
266
+ length_ratio_mean: 0.16320710693441579
267
+ length_ratio_stats_per_bucket:
268
+ - !!python/tuple
269
+ - 0.3333333333333333
270
+ - 0.0
271
+ - !!python/tuple
272
+ - 0.28246393697985434
273
+ - 0.17868752447804973
274
+ - !!python/tuple
275
+ - 0.21840710265332788
276
+ - 0.1330505772378312
277
+ - !!python/tuple
278
+ - 0.16560142798704922
279
+ - 0.09581195473826641
280
+ - !!python/tuple
281
+ - 0.13801367492489092
282
+ - 0.12429965021659338
283
+ - !!python/tuple
284
+ - 0.11863212215522084
285
+ - 0.1208393385452983
286
+ - !!python/tuple
287
+ - 0.10151133866588294
288
+ - 0.11072333780515448
289
+ - !!python/tuple
290
+ - 0.09838819717267734
291
+ - 0.12557601720946082
292
+ - !!python/tuple
293
+ - 0.09394884997066442
294
+ - 0.16159177653077658
295
+ - !!python/tuple
296
+ - 0.10402554625981722
297
+ - 0.26083679437294416
298
+ - !!python/tuple
299
+ - 0.1012342945734544
300
+ - 0.20099512839826167
301
+ - !!python/tuple
302
+ - 0.17281772320739658
303
+ - 0.257545103018524
304
+ - !!python/tuple
305
+ - 0.10845391475564008
306
+ - 0.3239959561352876
307
+ - !!python/tuple
308
+ - 0.09935073708696769
309
+ - 0.3059573403277105
310
+ - !!python/tuple
311
+ - 0.12635011083619693
312
+ - 0.3907243857496131
313
+ - !!python/tuple
314
+ - 0.08949294838769961
315
+ - 0.3144956594612652
316
+ - !!python/tuple
317
+ - 0.07007331783529426
318
+ - 0.05774973922713284
319
+ - !!python/tuple
320
+ - 0.06411727035132861
321
+ - 0.04726139664600602
322
+ - !!python/tuple
323
+ - 0.07382609782015778
324
+ - 0.050521761108029695
325
+ - !!python/tuple
326
+ - 0.08590157138956556
327
+ - 0.05569960282284095
328
+ - !!python/tuple
329
+ - 0.08062786352616935
330
+ - 0.058790770283216505
331
+ - !!python/tuple
332
+ - 0.06563222282256796
333
+ - 0.05450274064413921
334
+ - !!python/tuple
335
+ - 0.07765999502184046
336
+ - 0.05057167990395854
337
+ - !!python/tuple
338
+ - 0.08555873649959676
339
+ - 0.05960592960682603
340
+ - !!python/tuple
341
+ - 0.08460026909745419
342
+ - 0.05238594583690578
343
+ - !!python/tuple
344
+ - 0.08041439956489124
345
+ - 0.04792844419538253
346
+ - !!python/tuple
347
+ - 0.07859928415542815
348
+ - 0.05072523580179588
349
+ - !!python/tuple
350
+ - 0.08720935617277471
351
+ - 0.04382289790728185
352
+ - !!python/tuple
353
+ - 0.0945785744419209
354
+ - 0.05412799726318098
355
+ - !!python/tuple
356
+ - 0.09147639273741005
357
+ - 0.04623369327444139
358
+ - !!python/tuple
359
+ - 0.08566937441195915
360
+ - 0.03729904156233976
361
+ - !!python/tuple
362
+ - 0.09089250053653752
363
+ - 0.03997864088737986
364
+ - !!python/tuple
365
+ - 0.09074467762227163
366
+ - 0.03321456251239067
367
+ - !!python/tuple
368
+ - 0.09005889312946631
369
+ - 0.03317108879820214
370
+ - !!python/tuple
371
+ - 0.09171046510420815
372
+ - 0.035431771966381115
373
+ - !!python/tuple
374
+ - 0.09113688227781093
375
+ - 0.03302006652634936
376
+ - !!python/tuple
377
+ - 0.08893255045731784
378
+ - 0.03625826248767216
379
+ - !!python/tuple
380
+ - 0.09050986731593187
381
+ - 0.03584124694886162
382
+ - !!python/tuple
383
+ - 0.08733945286928807
384
+ - 0.02963045027026122
385
+ - !!python/tuple
386
+ - 0.08629641342788141
387
+ - 0.030504004395265606
388
+ - !!python/tuple
389
+ - 0.08640318897032012
390
+ - 0.030675309542199148
391
+ - !!python/tuple
392
+ - 0.09036990980396087
393
+ - 0.03191849333402471
394
+ - !!python/tuple
395
+ - 0.08807433400728253
396
+ - 0.03290646726223996
397
+ - !!python/tuple
398
+ - 0.08866806731559626
399
+ - 0.03728726453805084
400
+ - !!python/tuple
401
+ - 0.08543343575292071
402
+ - 0.02718762541630789
403
+ - !!python/tuple
404
+ - 0.08370713516111211
405
+ - 0.030215511135668078
406
+ - !!python/tuple
407
+ - 0.08456814843330018
408
+ - 0.02546164231510412
409
+ - !!python/tuple
410
+ - 0.08555768005742968
411
+ - 0.026243070552042298
412
+ - !!python/tuple
413
+ - 0.08406679695149158
414
+ - 0.02438826052491033
415
+ - !!python/tuple
416
+ - 0.08695308402142944
417
+ - 0.02926245130243095
418
+ - !!python/tuple
419
+ - 0.08093989759976632
420
+ - 0.032639773078865474
421
+ - !!python/tuple
422
+ - 0.013364278458885503
423
+ - 0.0011988283372310012
424
+ - !!python/tuple
425
+ - 0.10311750599520383
426
+ - 0.007770672300249229
427
+ - &id001 !!python/tuple
428
+ - null
429
+ - null
430
+ - *id001
431
+ - *id001
432
+ - *id001
433
+ - *id001
434
+ - *id001
435
+ - *id001
436
+ - *id001
437
+ - *id001
438
+ - *id001
439
+ - *id001
440
+ - *id001
441
+ length_ratio_std: 0.1371393774100467
442
+ max_observed_len_source: 417
443
+ max_observed_len_target: 128
444
+ num_discarded: 20
445
+ num_sents: 354707
446
+ num_sents_per_bucket:
447
+ - 1
448
+ - 29122
449
+ - 57389
450
+ - 123411
451
+ - 36766
452
+ - 44876
453
+ - 21849
454
+ - 6758
455
+ - 8500
456
+ - 2765
457
+ - 3715
458
+ - 2686
459
+ - 722
460
+ - 752
461
+ - 417
462
+ - 491
463
+ - 402
464
+ - 347
465
+ - 249
466
+ - 196
467
+ - 297
468
+ - 376
469
+ - 262
470
+ - 246
471
+ - 213
472
+ - 269
473
+ - 331
474
+ - 317
475
+ - 350
476
+ - 356
477
+ - 368
478
+ - 451
479
+ - 454
480
+ - 497
481
+ - 483
482
+ - 519
483
+ - 572
484
+ - 500
485
+ - 550
486
+ - 608
487
+ - 561
488
+ - 537
489
+ - 481
490
+ - 568
491
+ - 516
492
+ - 468
493
+ - 554
494
+ - 512
495
+ - 486
496
+ - 455
497
+ - 130
498
+ - 2
499
+ - 4
500
+ - 0
501
+ - 0
502
+ - 0
503
+ - 0
504
+ - 0
505
+ - 0
506
+ - 0
507
+ - 0
508
+ - 0
509
+ - 0
510
+ - 0
511
+ - 0
512
+ num_tokens_source: 15626084
513
+ num_tokens_target: 2020240
514
+ num_unks_source: 1
515
+ num_unks_target: 116
516
+ size_vocab_source: 1232
517
+ size_vocab_target: 5976
518
+ eop_id: -1
519
+ max_seq_len_source: 513
520
+ max_seq_len_target: 129
521
+ num_source_factors: 1
522
+ num_target_factors: 1
523
+ config_decoder: !TransformerConfig
524
+ act_type: relu
525
+ attention_heads: 8
526
+ block_prepended_cross_attention: false
527
+ decoder_type: transformer
528
+ depth_key_value: 512
529
+ dropout_act: 0.1
530
+ dropout_attention: 0.1
531
+ dropout_prepost: 0.1
532
+ feed_forward_num_hidden: 2048
533
+ max_seq_len_source: 513
534
+ max_seq_len_target: 129
535
+ model_size: 512
536
+ num_layers: 6
537
+ positional_embedding_type: fixed
538
+ postprocess_sequence: dr
539
+ preprocess_sequence: n
540
+ use_glu: false
541
+ use_lhuc: false
542
+ config_embed_source: !EmbeddingConfig
543
+ allow_sparse_grad: false
544
+ dropout: 0.0
545
+ factor_configs: null
546
+ num_embed: 512
547
+ num_factors: 1
548
+ vocab_size: 1232
549
+ config_embed_target: !EmbeddingConfig
550
+ allow_sparse_grad: false
551
+ dropout: 0.0
552
+ factor_configs: null
553
+ num_embed: 512
554
+ num_factors: 1
555
+ vocab_size: 5976
556
+ config_encoder: !TransformerConfig
557
+ act_type: relu
558
+ attention_heads: 8
559
+ block_prepended_cross_attention: false
560
+ decoder_type: transformer
561
+ depth_key_value: 512
562
+ dropout_act: 0.1
563
+ dropout_attention: 0.1
564
+ dropout_prepost: 0.1
565
+ feed_forward_num_hidden: 2048
566
+ max_seq_len_source: 513
567
+ max_seq_len_target: 129
568
+ model_size: 512
569
+ num_layers: 6
570
+ positional_embedding_type: fixed
571
+ postprocess_sequence: dr
572
+ preprocess_sequence: n
573
+ use_glu: false
574
+ use_lhuc: false
575
+ config_length_task: null
576
+ dtype: float32
577
+ lhuc: false
578
+ neural_vocab_selection: null
579
+ neural_vocab_selection_block_loss: false
580
+ vocab_source_size: 1232
581
+ vocab_target_size: 5976
582
+ weight_tying_type: none