Amir Kargaran commited on
Commit
5ef3c94
1 Parent(s): 37bc035
README.md CHANGED
@@ -5,33 +5,32 @@ datasets:
5
  tags:
6
  - generated_from_trainer
7
  model-index:
8
- - name: flan-t5-small-prompt
9
  results: []
10
  pipeline_tag: text-generation
11
  language:
12
  - en
13
  widget:
14
  - text: >-
15
- output: Hitler was born in Braunau am Inn in Austria-Hungary and was
16
  raised near Linz. He lived in Vienna later in the first decade of the
17
  1900s and moved to Germany in 1913. He was decorated during his service in
18
  the German Army in World War I. In 1919, he joined the German Workers'
19
  Party (DAP), the precursor of the Nazi Party, and was appointed leader of
20
  the Nazi Party in 1921. In 1923, he attempted to seize governmental power
21
  in a failed coup in Munich and was imprisoned with a sentence of five
22
- years.
23
- input:
24
  example_title: Example 1
25
  - text: >-
26
- output: 1. Base your meals on higher fibre starchy carbohydrates 2. Eat
27
- lots of fruit and veg 3. Eat more fish, including a portion of oily fish.
28
- input:
29
  example_title: Example 2
30
  ---
31
 
32
 
33
- # Model flan-t5-small-prompt
34
 
35
  This model can generate prompts (instruction) for any text!
36
 
37
- This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on [alpaca dataset](https://huggingface.co/datasets/tatsu-lab/alpaca) but in a **reverse format**!
 
5
  tags:
6
  - generated_from_trainer
7
  model-index:
8
+ - name: T5R
9
  results: []
10
  pipeline_tag: text-generation
11
  language:
12
  - en
13
  widget:
14
  - text: >-
15
+ Instruction: X\nOutput: Hitler was born in Braunau am Inn in Austria-Hungary and was
16
  raised near Linz. He lived in Vienna later in the first decade of the
17
  1900s and moved to Germany in 1913. He was decorated during his service in
18
  the German Army in World War I. In 1919, he joined the German Workers'
19
  Party (DAP), the precursor of the Nazi Party, and was appointed leader of
20
  the Nazi Party in 1921. In 1923, he attempted to seize governmental power
21
  in a failed coup in Munich and was imprisoned with a sentence of five
22
+ years.\nWhat kind of instruction could this be the answer to?\nX:
 
23
  example_title: Example 1
24
  - text: >-
25
+ Instruction: X\nOutput: 1- Base your meals on higher fibre starchy carbohydrates. 2- Eat
26
+ lots of fruit and veg. 3- Eat more fish, including a portion of oily fish.\nWhat kind of instruction could
27
+ this be the answer to?\nX:
28
  example_title: Example 2
29
  ---
30
 
31
 
32
+ # T5-Reverse (T5R)
33
 
34
  This model can generate prompts (instruction) for any text!
35
 
36
+ This model is an instruction-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on [alpaca dataset](https://huggingface.co/datasets/tatsu-lab/alpaca) but in **reverse format**!
config.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "_name_or_path": "google/flan-t5-small",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
- "d_ff": 1024,
7
  "d_kv": 64,
8
- "d_model": 512,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
  "dropout_rate": 0.1,
@@ -17,45 +17,24 @@
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
  "n_positions": 512,
20
- "num_decoder_layers": 8,
21
- "num_heads": 6,
22
- "num_layers": 8,
23
  "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
  "task_specific_params": {
28
- "summarization": {
29
- "early_stopping": true,
30
- "length_penalty": 2.0,
31
- "max_length": 200,
32
- "min_length": 30,
33
- "no_repeat_ngram_size": 3,
34
- "num_beams": 4,
35
- "prefix": "summarize: "
36
- },
37
- "translation_en_to_de": {
38
- "early_stopping": true,
39
- "max_length": 300,
40
- "num_beams": 4,
41
- "prefix": "translate English to German: "
42
- },
43
- "translation_en_to_fr": {
44
- "early_stopping": true,
45
- "max_length": 300,
46
- "num_beams": 4,
47
- "prefix": "translate English to French: "
48
- },
49
- "translation_en_to_ro": {
50
  "early_stopping": true,
51
  "max_length": 300,
52
  "num_beams": 4,
53
- "prefix": "translate English to Romanian: "
54
  }
55
  },
56
  "tie_word_embeddings": false,
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.29.1",
59
  "use_cache": true,
60
  "vocab_size": 32128
61
  }
 
1
  {
2
+ "_name_or_path": "kargaranamir/T5R",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
+ "d_ff": 2048,
7
  "d_kv": 64,
8
+ "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
  "dropout_rate": 0.1,
 
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
  "n_positions": 512,
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
23
  "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
  "task_specific_params": {
28
+ "reverse-gen": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "early_stopping": true,
30
  "max_length": 300,
31
  "num_beams": 4,
32
+ "prefix": "Instruction: X"
33
  }
34
  },
35
  "tie_word_embeddings": false,
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.24.0",
38
  "use_cache": true,
39
  "vocab_size": 32128
40
  }
generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "decoder_start_token_id": 0,
3
- "eos_token_id": 1,
4
- "pad_token_id": 0,
5
- "transformers_version": "4.29.1"
6
- }
 
 
 
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f3ab72f6ea636e6e3de721bec36b2d30a6e749a518e2673f24deefb64f89557
3
- size 307910149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f08bda4a51ec276efc98bb0cd0c994f5b493465386b0aef79409b69a851774c
3
+ size 990406605
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json CHANGED
@@ -19,930 +19,930 @@
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
22
  "content": "<pad>",
23
  "single_word": false,
24
  "lstrip": false,
25
  "rstrip": false,
26
- "normalized": false,
27
- "special": true
28
  },
29
  {
30
  "id": 1,
 
31
  "content": "</s>",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
35
- "normalized": false,
36
- "special": true
37
  },
38
  {
39
  "id": 2,
 
40
  "content": "<unk>",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
44
- "normalized": false,
45
- "special": true
46
  },
47
  {
48
  "id": 32000,
 
49
  "content": "<extra_id_99>",
50
  "single_word": false,
51
  "lstrip": false,
52
  "rstrip": false,
53
- "normalized": false,
54
- "special": true
55
  },
56
  {
57
  "id": 32001,
 
58
  "content": "<extra_id_98>",
59
  "single_word": false,
60
  "lstrip": false,
61
  "rstrip": false,
62
- "normalized": false,
63
- "special": true
64
  },
65
  {
66
  "id": 32002,
 
67
  "content": "<extra_id_97>",
68
  "single_word": false,
69
  "lstrip": false,
70
  "rstrip": false,
71
- "normalized": false,
72
- "special": true
73
  },
74
  {
75
  "id": 32003,
 
76
  "content": "<extra_id_96>",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
80
- "normalized": false,
81
- "special": true
82
  },
83
  {
84
  "id": 32004,
 
85
  "content": "<extra_id_95>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
89
- "normalized": false,
90
- "special": true
91
  },
92
  {
93
  "id": 32005,
 
94
  "content": "<extra_id_94>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
98
- "normalized": false,
99
- "special": true
100
  },
101
  {
102
  "id": 32006,
 
103
  "content": "<extra_id_93>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
107
- "normalized": false,
108
- "special": true
109
  },
110
  {
111
  "id": 32007,
 
112
  "content": "<extra_id_92>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
116
- "normalized": false,
117
- "special": true
118
  },
119
  {
120
  "id": 32008,
 
121
  "content": "<extra_id_91>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
125
- "normalized": false,
126
- "special": true
127
  },
128
  {
129
  "id": 32009,
 
130
  "content": "<extra_id_90>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
134
- "normalized": false,
135
- "special": true
136
  },
137
  {
138
  "id": 32010,
 
139
  "content": "<extra_id_89>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
143
- "normalized": false,
144
- "special": true
145
  },
146
  {
147
  "id": 32011,
 
148
  "content": "<extra_id_88>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
152
- "normalized": false,
153
- "special": true
154
  },
155
  {
156
  "id": 32012,
 
157
  "content": "<extra_id_87>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
161
- "normalized": false,
162
- "special": true
163
  },
164
  {
165
  "id": 32013,
 
166
  "content": "<extra_id_86>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
170
- "normalized": false,
171
- "special": true
172
  },
173
  {
174
  "id": 32014,
 
175
  "content": "<extra_id_85>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
179
- "normalized": false,
180
- "special": true
181
  },
182
  {
183
  "id": 32015,
 
184
  "content": "<extra_id_84>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
188
- "normalized": false,
189
- "special": true
190
  },
191
  {
192
  "id": 32016,
 
193
  "content": "<extra_id_83>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
197
- "normalized": false,
198
- "special": true
199
  },
200
  {
201
  "id": 32017,
 
202
  "content": "<extra_id_82>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
206
- "normalized": false,
207
- "special": true
208
  },
209
  {
210
  "id": 32018,
 
211
  "content": "<extra_id_81>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
215
- "normalized": false,
216
- "special": true
217
  },
218
  {
219
  "id": 32019,
 
220
  "content": "<extra_id_80>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
224
- "normalized": false,
225
- "special": true
226
  },
227
  {
228
  "id": 32020,
 
229
  "content": "<extra_id_79>",
230
  "single_word": false,
231
  "lstrip": false,
232
  "rstrip": false,
233
- "normalized": false,
234
- "special": true
235
  },
236
  {
237
  "id": 32021,
 
238
  "content": "<extra_id_78>",
239
  "single_word": false,
240
  "lstrip": false,
241
  "rstrip": false,
242
- "normalized": false,
243
- "special": true
244
  },
245
  {
246
  "id": 32022,
 
247
  "content": "<extra_id_77>",
248
  "single_word": false,
249
  "lstrip": false,
250
  "rstrip": false,
251
- "normalized": false,
252
- "special": true
253
  },
254
  {
255
  "id": 32023,
 
256
  "content": "<extra_id_76>",
257
  "single_word": false,
258
  "lstrip": false,
259
  "rstrip": false,
260
- "normalized": false,
261
- "special": true
262
  },
263
  {
264
  "id": 32024,
 
265
  "content": "<extra_id_75>",
266
  "single_word": false,
267
  "lstrip": false,
268
  "rstrip": false,
269
- "normalized": false,
270
- "special": true
271
  },
272
  {
273
  "id": 32025,
 
274
  "content": "<extra_id_74>",
275
  "single_word": false,
276
  "lstrip": false,
277
  "rstrip": false,
278
- "normalized": false,
279
- "special": true
280
  },
281
  {
282
  "id": 32026,
 
283
  "content": "<extra_id_73>",
284
  "single_word": false,
285
  "lstrip": false,
286
  "rstrip": false,
287
- "normalized": false,
288
- "special": true
289
  },
290
  {
291
  "id": 32027,
 
292
  "content": "<extra_id_72>",
293
  "single_word": false,
294
  "lstrip": false,
295
  "rstrip": false,
296
- "normalized": false,
297
- "special": true
298
  },
299
  {
300
  "id": 32028,
 
301
  "content": "<extra_id_71>",
302
  "single_word": false,
303
  "lstrip": false,
304
  "rstrip": false,
305
- "normalized": false,
306
- "special": true
307
  },
308
  {
309
  "id": 32029,
 
310
  "content": "<extra_id_70>",
311
  "single_word": false,
312
  "lstrip": false,
313
  "rstrip": false,
314
- "normalized": false,
315
- "special": true
316
  },
317
  {
318
  "id": 32030,
 
319
  "content": "<extra_id_69>",
320
  "single_word": false,
321
  "lstrip": false,
322
  "rstrip": false,
323
- "normalized": false,
324
- "special": true
325
  },
326
  {
327
  "id": 32031,
 
328
  "content": "<extra_id_68>",
329
  "single_word": false,
330
  "lstrip": false,
331
  "rstrip": false,
332
- "normalized": false,
333
- "special": true
334
  },
335
  {
336
  "id": 32032,
 
337
  "content": "<extra_id_67>",
338
  "single_word": false,
339
  "lstrip": false,
340
  "rstrip": false,
341
- "normalized": false,
342
- "special": true
343
  },
344
  {
345
  "id": 32033,
 
346
  "content": "<extra_id_66>",
347
  "single_word": false,
348
  "lstrip": false,
349
  "rstrip": false,
350
- "normalized": false,
351
- "special": true
352
  },
353
  {
354
  "id": 32034,
 
355
  "content": "<extra_id_65>",
356
  "single_word": false,
357
  "lstrip": false,
358
  "rstrip": false,
359
- "normalized": false,
360
- "special": true
361
  },
362
  {
363
  "id": 32035,
 
364
  "content": "<extra_id_64>",
365
  "single_word": false,
366
  "lstrip": false,
367
  "rstrip": false,
368
- "normalized": false,
369
- "special": true
370
  },
371
  {
372
  "id": 32036,
 
373
  "content": "<extra_id_63>",
374
  "single_word": false,
375
  "lstrip": false,
376
  "rstrip": false,
377
- "normalized": false,
378
- "special": true
379
  },
380
  {
381
  "id": 32037,
 
382
  "content": "<extra_id_62>",
383
  "single_word": false,
384
  "lstrip": false,
385
  "rstrip": false,
386
- "normalized": false,
387
- "special": true
388
  },
389
  {
390
  "id": 32038,
 
391
  "content": "<extra_id_61>",
392
  "single_word": false,
393
  "lstrip": false,
394
  "rstrip": false,
395
- "normalized": false,
396
- "special": true
397
  },
398
  {
399
  "id": 32039,
 
400
  "content": "<extra_id_60>",
401
  "single_word": false,
402
  "lstrip": false,
403
  "rstrip": false,
404
- "normalized": false,
405
- "special": true
406
  },
407
  {
408
  "id": 32040,
 
409
  "content": "<extra_id_59>",
410
  "single_word": false,
411
  "lstrip": false,
412
  "rstrip": false,
413
- "normalized": false,
414
- "special": true
415
  },
416
  {
417
  "id": 32041,
 
418
  "content": "<extra_id_58>",
419
  "single_word": false,
420
  "lstrip": false,
421
  "rstrip": false,
422
- "normalized": false,
423
- "special": true
424
  },
425
  {
426
  "id": 32042,
 
427
  "content": "<extra_id_57>",
428
  "single_word": false,
429
  "lstrip": false,
430
  "rstrip": false,
431
- "normalized": false,
432
- "special": true
433
  },
434
  {
435
  "id": 32043,
 
436
  "content": "<extra_id_56>",
437
  "single_word": false,
438
  "lstrip": false,
439
  "rstrip": false,
440
- "normalized": false,
441
- "special": true
442
  },
443
  {
444
  "id": 32044,
 
445
  "content": "<extra_id_55>",
446
  "single_word": false,
447
  "lstrip": false,
448
  "rstrip": false,
449
- "normalized": false,
450
- "special": true
451
  },
452
  {
453
  "id": 32045,
 
454
  "content": "<extra_id_54>",
455
  "single_word": false,
456
  "lstrip": false,
457
  "rstrip": false,
458
- "normalized": false,
459
- "special": true
460
  },
461
  {
462
  "id": 32046,
 
463
  "content": "<extra_id_53>",
464
  "single_word": false,
465
  "lstrip": false,
466
  "rstrip": false,
467
- "normalized": false,
468
- "special": true
469
  },
470
  {
471
  "id": 32047,
 
472
  "content": "<extra_id_52>",
473
  "single_word": false,
474
  "lstrip": false,
475
  "rstrip": false,
476
- "normalized": false,
477
- "special": true
478
  },
479
  {
480
  "id": 32048,
 
481
  "content": "<extra_id_51>",
482
  "single_word": false,
483
  "lstrip": false,
484
  "rstrip": false,
485
- "normalized": false,
486
- "special": true
487
  },
488
  {
489
  "id": 32049,
 
490
  "content": "<extra_id_50>",
491
  "single_word": false,
492
  "lstrip": false,
493
  "rstrip": false,
494
- "normalized": false,
495
- "special": true
496
  },
497
  {
498
  "id": 32050,
 
499
  "content": "<extra_id_49>",
500
  "single_word": false,
501
  "lstrip": false,
502
  "rstrip": false,
503
- "normalized": false,
504
- "special": true
505
  },
506
  {
507
  "id": 32051,
 
508
  "content": "<extra_id_48>",
509
  "single_word": false,
510
  "lstrip": false,
511
  "rstrip": false,
512
- "normalized": false,
513
- "special": true
514
  },
515
  {
516
  "id": 32052,
 
517
  "content": "<extra_id_47>",
518
  "single_word": false,
519
  "lstrip": false,
520
  "rstrip": false,
521
- "normalized": false,
522
- "special": true
523
  },
524
  {
525
  "id": 32053,
 
526
  "content": "<extra_id_46>",
527
  "single_word": false,
528
  "lstrip": false,
529
  "rstrip": false,
530
- "normalized": false,
531
- "special": true
532
  },
533
  {
534
  "id": 32054,
 
535
  "content": "<extra_id_45>",
536
  "single_word": false,
537
  "lstrip": false,
538
  "rstrip": false,
539
- "normalized": false,
540
- "special": true
541
  },
542
  {
543
  "id": 32055,
 
544
  "content": "<extra_id_44>",
545
  "single_word": false,
546
  "lstrip": false,
547
  "rstrip": false,
548
- "normalized": false,
549
- "special": true
550
  },
551
  {
552
  "id": 32056,
 
553
  "content": "<extra_id_43>",
554
  "single_word": false,
555
  "lstrip": false,
556
  "rstrip": false,
557
- "normalized": false,
558
- "special": true
559
  },
560
  {
561
  "id": 32057,
 
562
  "content": "<extra_id_42>",
563
  "single_word": false,
564
  "lstrip": false,
565
  "rstrip": false,
566
- "normalized": false,
567
- "special": true
568
  },
569
  {
570
  "id": 32058,
 
571
  "content": "<extra_id_41>",
572
  "single_word": false,
573
  "lstrip": false,
574
  "rstrip": false,
575
- "normalized": false,
576
- "special": true
577
  },
578
  {
579
  "id": 32059,
 
580
  "content": "<extra_id_40>",
581
  "single_word": false,
582
  "lstrip": false,
583
  "rstrip": false,
584
- "normalized": false,
585
- "special": true
586
  },
587
  {
588
  "id": 32060,
 
589
  "content": "<extra_id_39>",
590
  "single_word": false,
591
  "lstrip": false,
592
  "rstrip": false,
593
- "normalized": false,
594
- "special": true
595
  },
596
  {
597
  "id": 32061,
 
598
  "content": "<extra_id_38>",
599
  "single_word": false,
600
  "lstrip": false,
601
  "rstrip": false,
602
- "normalized": false,
603
- "special": true
604
  },
605
  {
606
  "id": 32062,
 
607
  "content": "<extra_id_37>",
608
  "single_word": false,
609
  "lstrip": false,
610
  "rstrip": false,
611
- "normalized": false,
612
- "special": true
613
  },
614
  {
615
  "id": 32063,
 
616
  "content": "<extra_id_36>",
617
  "single_word": false,
618
  "lstrip": false,
619
  "rstrip": false,
620
- "normalized": false,
621
- "special": true
622
  },
623
  {
624
  "id": 32064,
 
625
  "content": "<extra_id_35>",
626
  "single_word": false,
627
  "lstrip": false,
628
  "rstrip": false,
629
- "normalized": false,
630
- "special": true
631
  },
632
  {
633
  "id": 32065,
 
634
  "content": "<extra_id_34>",
635
  "single_word": false,
636
  "lstrip": false,
637
  "rstrip": false,
638
- "normalized": false,
639
- "special": true
640
  },
641
  {
642
  "id": 32066,
 
643
  "content": "<extra_id_33>",
644
  "single_word": false,
645
  "lstrip": false,
646
  "rstrip": false,
647
- "normalized": false,
648
- "special": true
649
  },
650
  {
651
  "id": 32067,
 
652
  "content": "<extra_id_32>",
653
  "single_word": false,
654
  "lstrip": false,
655
  "rstrip": false,
656
- "normalized": false,
657
- "special": true
658
  },
659
  {
660
  "id": 32068,
 
661
  "content": "<extra_id_31>",
662
  "single_word": false,
663
  "lstrip": false,
664
  "rstrip": false,
665
- "normalized": false,
666
- "special": true
667
  },
668
  {
669
  "id": 32069,
 
670
  "content": "<extra_id_30>",
671
  "single_word": false,
672
  "lstrip": false,
673
  "rstrip": false,
674
- "normalized": false,
675
- "special": true
676
  },
677
  {
678
  "id": 32070,
 
679
  "content": "<extra_id_29>",
680
  "single_word": false,
681
  "lstrip": false,
682
  "rstrip": false,
683
- "normalized": false,
684
- "special": true
685
  },
686
  {
687
  "id": 32071,
 
688
  "content": "<extra_id_28>",
689
  "single_word": false,
690
  "lstrip": false,
691
  "rstrip": false,
692
- "normalized": false,
693
- "special": true
694
  },
695
  {
696
  "id": 32072,
 
697
  "content": "<extra_id_27>",
698
  "single_word": false,
699
  "lstrip": false,
700
  "rstrip": false,
701
- "normalized": false,
702
- "special": true
703
  },
704
  {
705
  "id": 32073,
 
706
  "content": "<extra_id_26>",
707
  "single_word": false,
708
  "lstrip": false,
709
  "rstrip": false,
710
- "normalized": false,
711
- "special": true
712
  },
713
  {
714
  "id": 32074,
 
715
  "content": "<extra_id_25>",
716
  "single_word": false,
717
  "lstrip": false,
718
  "rstrip": false,
719
- "normalized": false,
720
- "special": true
721
  },
722
  {
723
  "id": 32075,
 
724
  "content": "<extra_id_24>",
725
  "single_word": false,
726
  "lstrip": false,
727
  "rstrip": false,
728
- "normalized": false,
729
- "special": true
730
  },
731
  {
732
  "id": 32076,
 
733
  "content": "<extra_id_23>",
734
  "single_word": false,
735
  "lstrip": false,
736
  "rstrip": false,
737
- "normalized": false,
738
- "special": true
739
  },
740
  {
741
  "id": 32077,
 
742
  "content": "<extra_id_22>",
743
  "single_word": false,
744
  "lstrip": false,
745
  "rstrip": false,
746
- "normalized": false,
747
- "special": true
748
  },
749
  {
750
  "id": 32078,
 
751
  "content": "<extra_id_21>",
752
  "single_word": false,
753
  "lstrip": false,
754
  "rstrip": false,
755
- "normalized": false,
756
- "special": true
757
  },
758
  {
759
  "id": 32079,
 
760
  "content": "<extra_id_20>",
761
  "single_word": false,
762
  "lstrip": false,
763
  "rstrip": false,
764
- "normalized": false,
765
- "special": true
766
  },
767
  {
768
  "id": 32080,
 
769
  "content": "<extra_id_19>",
770
  "single_word": false,
771
  "lstrip": false,
772
  "rstrip": false,
773
- "normalized": false,
774
- "special": true
775
  },
776
  {
777
  "id": 32081,
 
778
  "content": "<extra_id_18>",
779
  "single_word": false,
780
  "lstrip": false,
781
  "rstrip": false,
782
- "normalized": false,
783
- "special": true
784
  },
785
  {
786
  "id": 32082,
 
787
  "content": "<extra_id_17>",
788
  "single_word": false,
789
  "lstrip": false,
790
  "rstrip": false,
791
- "normalized": false,
792
- "special": true
793
  },
794
  {
795
  "id": 32083,
 
796
  "content": "<extra_id_16>",
797
  "single_word": false,
798
  "lstrip": false,
799
  "rstrip": false,
800
- "normalized": false,
801
- "special": true
802
  },
803
  {
804
  "id": 32084,
 
805
  "content": "<extra_id_15>",
806
  "single_word": false,
807
  "lstrip": false,
808
  "rstrip": false,
809
- "normalized": false,
810
- "special": true
811
  },
812
  {
813
  "id": 32085,
 
814
  "content": "<extra_id_14>",
815
  "single_word": false,
816
  "lstrip": false,
817
  "rstrip": false,
818
- "normalized": false,
819
- "special": true
820
  },
821
  {
822
  "id": 32086,
 
823
  "content": "<extra_id_13>",
824
  "single_word": false,
825
  "lstrip": false,
826
  "rstrip": false,
827
- "normalized": false,
828
- "special": true
829
  },
830
  {
831
  "id": 32087,
 
832
  "content": "<extra_id_12>",
833
  "single_word": false,
834
  "lstrip": false,
835
  "rstrip": false,
836
- "normalized": false,
837
- "special": true
838
  },
839
  {
840
  "id": 32088,
 
841
  "content": "<extra_id_11>",
842
  "single_word": false,
843
  "lstrip": false,
844
  "rstrip": false,
845
- "normalized": false,
846
- "special": true
847
  },
848
  {
849
  "id": 32089,
 
850
  "content": "<extra_id_10>",
851
  "single_word": false,
852
  "lstrip": false,
853
  "rstrip": false,
854
- "normalized": false,
855
- "special": true
856
  },
857
  {
858
  "id": 32090,
 
859
  "content": "<extra_id_9>",
860
  "single_word": false,
861
  "lstrip": false,
862
  "rstrip": false,
863
- "normalized": false,
864
- "special": true
865
  },
866
  {
867
  "id": 32091,
 
868
  "content": "<extra_id_8>",
869
  "single_word": false,
870
  "lstrip": false,
871
  "rstrip": false,
872
- "normalized": false,
873
- "special": true
874
  },
875
  {
876
  "id": 32092,
 
877
  "content": "<extra_id_7>",
878
  "single_word": false,
879
  "lstrip": false,
880
  "rstrip": false,
881
- "normalized": false,
882
- "special": true
883
  },
884
  {
885
  "id": 32093,
 
886
  "content": "<extra_id_6>",
887
  "single_word": false,
888
  "lstrip": false,
889
  "rstrip": false,
890
- "normalized": false,
891
- "special": true
892
  },
893
  {
894
  "id": 32094,
 
895
  "content": "<extra_id_5>",
896
  "single_word": false,
897
  "lstrip": false,
898
  "rstrip": false,
899
- "normalized": false,
900
- "special": true
901
  },
902
  {
903
  "id": 32095,
 
904
  "content": "<extra_id_4>",
905
  "single_word": false,
906
  "lstrip": false,
907
  "rstrip": false,
908
- "normalized": false,
909
- "special": true
910
  },
911
  {
912
  "id": 32096,
 
913
  "content": "<extra_id_3>",
914
  "single_word": false,
915
  "lstrip": false,
916
  "rstrip": false,
917
- "normalized": false,
918
- "special": true
919
  },
920
  {
921
  "id": 32097,
 
922
  "content": "<extra_id_2>",
923
  "single_word": false,
924
  "lstrip": false,
925
  "rstrip": false,
926
- "normalized": false,
927
- "special": true
928
  },
929
  {
930
  "id": 32098,
 
931
  "content": "<extra_id_1>",
932
  "single_word": false,
933
  "lstrip": false,
934
  "rstrip": false,
935
- "normalized": false,
936
- "special": true
937
  },
938
  {
939
  "id": 32099,
 
940
  "content": "<extra_id_0>",
941
  "single_word": false,
942
  "lstrip": false,
943
  "rstrip": false,
944
- "normalized": false,
945
- "special": true
946
  }
947
  ],
948
  "normalizer": {
 
19
  "added_tokens": [
20
  {
21
  "id": 0,
22
+ "special": true,
23
  "content": "<pad>",
24
  "single_word": false,
25
  "lstrip": false,
26
  "rstrip": false,
27
+ "normalized": false
 
28
  },
29
  {
30
  "id": 1,
31
+ "special": true,
32
  "content": "</s>",
33
  "single_word": false,
34
  "lstrip": false,
35
  "rstrip": false,
36
+ "normalized": false
 
37
  },
38
  {
39
  "id": 2,
40
+ "special": true,
41
  "content": "<unk>",
42
  "single_word": false,
43
  "lstrip": false,
44
  "rstrip": false,
45
+ "normalized": false
 
46
  },
47
  {
48
  "id": 32000,
49
+ "special": true,
50
  "content": "<extra_id_99>",
51
  "single_word": false,
52
  "lstrip": false,
53
  "rstrip": false,
54
+ "normalized": false
 
55
  },
56
  {
57
  "id": 32001,
58
+ "special": true,
59
  "content": "<extra_id_98>",
60
  "single_word": false,
61
  "lstrip": false,
62
  "rstrip": false,
63
+ "normalized": false
 
64
  },
65
  {
66
  "id": 32002,
67
+ "special": true,
68
  "content": "<extra_id_97>",
69
  "single_word": false,
70
  "lstrip": false,
71
  "rstrip": false,
72
+ "normalized": false
 
73
  },
74
  {
75
  "id": 32003,
76
+ "special": true,
77
  "content": "<extra_id_96>",
78
  "single_word": false,
79
  "lstrip": false,
80
  "rstrip": false,
81
+ "normalized": false
 
82
  },
83
  {
84
  "id": 32004,
85
+ "special": true,
86
  "content": "<extra_id_95>",
87
  "single_word": false,
88
  "lstrip": false,
89
  "rstrip": false,
90
+ "normalized": false
 
91
  },
92
  {
93
  "id": 32005,
94
+ "special": true,
95
  "content": "<extra_id_94>",
96
  "single_word": false,
97
  "lstrip": false,
98
  "rstrip": false,
99
+ "normalized": false
 
100
  },
101
  {
102
  "id": 32006,
103
+ "special": true,
104
  "content": "<extra_id_93>",
105
  "single_word": false,
106
  "lstrip": false,
107
  "rstrip": false,
108
+ "normalized": false
 
109
  },
110
  {
111
  "id": 32007,
112
+ "special": true,
113
  "content": "<extra_id_92>",
114
  "single_word": false,
115
  "lstrip": false,
116
  "rstrip": false,
117
+ "normalized": false
 
118
  },
119
  {
120
  "id": 32008,
121
+ "special": true,
122
  "content": "<extra_id_91>",
123
  "single_word": false,
124
  "lstrip": false,
125
  "rstrip": false,
126
+ "normalized": false
 
127
  },
128
  {
129
  "id": 32009,
130
+ "special": true,
131
  "content": "<extra_id_90>",
132
  "single_word": false,
133
  "lstrip": false,
134
  "rstrip": false,
135
+ "normalized": false
 
136
  },
137
  {
138
  "id": 32010,
139
+ "special": true,
140
  "content": "<extra_id_89>",
141
  "single_word": false,
142
  "lstrip": false,
143
  "rstrip": false,
144
+ "normalized": false
 
145
  },
146
  {
147
  "id": 32011,
148
+ "special": true,
149
  "content": "<extra_id_88>",
150
  "single_word": false,
151
  "lstrip": false,
152
  "rstrip": false,
153
+ "normalized": false
 
154
  },
155
  {
156
  "id": 32012,
157
+ "special": true,
158
  "content": "<extra_id_87>",
159
  "single_word": false,
160
  "lstrip": false,
161
  "rstrip": false,
162
+ "normalized": false
 
163
  },
164
  {
165
  "id": 32013,
166
+ "special": true,
167
  "content": "<extra_id_86>",
168
  "single_word": false,
169
  "lstrip": false,
170
  "rstrip": false,
171
+ "normalized": false
 
172
  },
173
  {
174
  "id": 32014,
175
+ "special": true,
176
  "content": "<extra_id_85>",
177
  "single_word": false,
178
  "lstrip": false,
179
  "rstrip": false,
180
+ "normalized": false
 
181
  },
182
  {
183
  "id": 32015,
184
+ "special": true,
185
  "content": "<extra_id_84>",
186
  "single_word": false,
187
  "lstrip": false,
188
  "rstrip": false,
189
+ "normalized": false
 
190
  },
191
  {
192
  "id": 32016,
193
+ "special": true,
194
  "content": "<extra_id_83>",
195
  "single_word": false,
196
  "lstrip": false,
197
  "rstrip": false,
198
+ "normalized": false
 
199
  },
200
  {
201
  "id": 32017,
202
+ "special": true,
203
  "content": "<extra_id_82>",
204
  "single_word": false,
205
  "lstrip": false,
206
  "rstrip": false,
207
+ "normalized": false
 
208
  },
209
  {
210
  "id": 32018,
211
+ "special": true,
212
  "content": "<extra_id_81>",
213
  "single_word": false,
214
  "lstrip": false,
215
  "rstrip": false,
216
+ "normalized": false
 
217
  },
218
  {
219
  "id": 32019,
220
+ "special": true,
221
  "content": "<extra_id_80>",
222
  "single_word": false,
223
  "lstrip": false,
224
  "rstrip": false,
225
+ "normalized": false
 
226
  },
227
  {
228
  "id": 32020,
229
+ "special": true,
230
  "content": "<extra_id_79>",
231
  "single_word": false,
232
  "lstrip": false,
233
  "rstrip": false,
234
+ "normalized": false
 
235
  },
236
  {
237
  "id": 32021,
238
+ "special": true,
239
  "content": "<extra_id_78>",
240
  "single_word": false,
241
  "lstrip": false,
242
  "rstrip": false,
243
+ "normalized": false
 
244
  },
245
  {
246
  "id": 32022,
247
+ "special": true,
248
  "content": "<extra_id_77>",
249
  "single_word": false,
250
  "lstrip": false,
251
  "rstrip": false,
252
+ "normalized": false
 
253
  },
254
  {
255
  "id": 32023,
256
+ "special": true,
257
  "content": "<extra_id_76>",
258
  "single_word": false,
259
  "lstrip": false,
260
  "rstrip": false,
261
+ "normalized": false
 
262
  },
263
  {
264
  "id": 32024,
265
+ "special": true,
266
  "content": "<extra_id_75>",
267
  "single_word": false,
268
  "lstrip": false,
269
  "rstrip": false,
270
+ "normalized": false
 
271
  },
272
  {
273
  "id": 32025,
274
+ "special": true,
275
  "content": "<extra_id_74>",
276
  "single_word": false,
277
  "lstrip": false,
278
  "rstrip": false,
279
+ "normalized": false
 
280
  },
281
  {
282
  "id": 32026,
283
+ "special": true,
284
  "content": "<extra_id_73>",
285
  "single_word": false,
286
  "lstrip": false,
287
  "rstrip": false,
288
+ "normalized": false
 
289
  },
290
  {
291
  "id": 32027,
292
+ "special": true,
293
  "content": "<extra_id_72>",
294
  "single_word": false,
295
  "lstrip": false,
296
  "rstrip": false,
297
+ "normalized": false
 
298
  },
299
  {
300
  "id": 32028,
301
+ "special": true,
302
  "content": "<extra_id_71>",
303
  "single_word": false,
304
  "lstrip": false,
305
  "rstrip": false,
306
+ "normalized": false
 
307
  },
308
  {
309
  "id": 32029,
310
+ "special": true,
311
  "content": "<extra_id_70>",
312
  "single_word": false,
313
  "lstrip": false,
314
  "rstrip": false,
315
+ "normalized": false
 
316
  },
317
  {
318
  "id": 32030,
319
+ "special": true,
320
  "content": "<extra_id_69>",
321
  "single_word": false,
322
  "lstrip": false,
323
  "rstrip": false,
324
+ "normalized": false
 
325
  },
326
  {
327
  "id": 32031,
328
+ "special": true,
329
  "content": "<extra_id_68>",
330
  "single_word": false,
331
  "lstrip": false,
332
  "rstrip": false,
333
+ "normalized": false
 
334
  },
335
  {
336
  "id": 32032,
337
+ "special": true,
338
  "content": "<extra_id_67>",
339
  "single_word": false,
340
  "lstrip": false,
341
  "rstrip": false,
342
+ "normalized": false
 
343
  },
344
  {
345
  "id": 32033,
346
+ "special": true,
347
  "content": "<extra_id_66>",
348
  "single_word": false,
349
  "lstrip": false,
350
  "rstrip": false,
351
+ "normalized": false
 
352
  },
353
  {
354
  "id": 32034,
355
+ "special": true,
356
  "content": "<extra_id_65>",
357
  "single_word": false,
358
  "lstrip": false,
359
  "rstrip": false,
360
+ "normalized": false
 
361
  },
362
  {
363
  "id": 32035,
364
+ "special": true,
365
  "content": "<extra_id_64>",
366
  "single_word": false,
367
  "lstrip": false,
368
  "rstrip": false,
369
+ "normalized": false
 
370
  },
371
  {
372
  "id": 32036,
373
+ "special": true,
374
  "content": "<extra_id_63>",
375
  "single_word": false,
376
  "lstrip": false,
377
  "rstrip": false,
378
+ "normalized": false
 
379
  },
380
  {
381
  "id": 32037,
382
+ "special": true,
383
  "content": "<extra_id_62>",
384
  "single_word": false,
385
  "lstrip": false,
386
  "rstrip": false,
387
+ "normalized": false
 
388
  },
389
  {
390
  "id": 32038,
391
+ "special": true,
392
  "content": "<extra_id_61>",
393
  "single_word": false,
394
  "lstrip": false,
395
  "rstrip": false,
396
+ "normalized": false
 
397
  },
398
  {
399
  "id": 32039,
400
+ "special": true,
401
  "content": "<extra_id_60>",
402
  "single_word": false,
403
  "lstrip": false,
404
  "rstrip": false,
405
+ "normalized": false
 
406
  },
407
  {
408
  "id": 32040,
409
+ "special": true,
410
  "content": "<extra_id_59>",
411
  "single_word": false,
412
  "lstrip": false,
413
  "rstrip": false,
414
+ "normalized": false
 
415
  },
416
  {
417
  "id": 32041,
418
+ "special": true,
419
  "content": "<extra_id_58>",
420
  "single_word": false,
421
  "lstrip": false,
422
  "rstrip": false,
423
+ "normalized": false
 
424
  },
425
  {
426
  "id": 32042,
427
+ "special": true,
428
  "content": "<extra_id_57>",
429
  "single_word": false,
430
  "lstrip": false,
431
  "rstrip": false,
432
+ "normalized": false
 
433
  },
434
  {
435
  "id": 32043,
436
+ "special": true,
437
  "content": "<extra_id_56>",
438
  "single_word": false,
439
  "lstrip": false,
440
  "rstrip": false,
441
+ "normalized": false
 
442
  },
443
  {
444
  "id": 32044,
445
+ "special": true,
446
  "content": "<extra_id_55>",
447
  "single_word": false,
448
  "lstrip": false,
449
  "rstrip": false,
450
+ "normalized": false
 
451
  },
452
  {
453
  "id": 32045,
454
+ "special": true,
455
  "content": "<extra_id_54>",
456
  "single_word": false,
457
  "lstrip": false,
458
  "rstrip": false,
459
+ "normalized": false
 
460
  },
461
  {
462
  "id": 32046,
463
+ "special": true,
464
  "content": "<extra_id_53>",
465
  "single_word": false,
466
  "lstrip": false,
467
  "rstrip": false,
468
+ "normalized": false
 
469
  },
470
  {
471
  "id": 32047,
472
+ "special": true,
473
  "content": "<extra_id_52>",
474
  "single_word": false,
475
  "lstrip": false,
476
  "rstrip": false,
477
+ "normalized": false
 
478
  },
479
  {
480
  "id": 32048,
481
+ "special": true,
482
  "content": "<extra_id_51>",
483
  "single_word": false,
484
  "lstrip": false,
485
  "rstrip": false,
486
+ "normalized": false
 
487
  },
488
  {
489
  "id": 32049,
490
+ "special": true,
491
  "content": "<extra_id_50>",
492
  "single_word": false,
493
  "lstrip": false,
494
  "rstrip": false,
495
+ "normalized": false
 
496
  },
497
  {
498
  "id": 32050,
499
+ "special": true,
500
  "content": "<extra_id_49>",
501
  "single_word": false,
502
  "lstrip": false,
503
  "rstrip": false,
504
+ "normalized": false
 
505
  },
506
  {
507
  "id": 32051,
508
+ "special": true,
509
  "content": "<extra_id_48>",
510
  "single_word": false,
511
  "lstrip": false,
512
  "rstrip": false,
513
+ "normalized": false
 
514
  },
515
  {
516
  "id": 32052,
517
+ "special": true,
518
  "content": "<extra_id_47>",
519
  "single_word": false,
520
  "lstrip": false,
521
  "rstrip": false,
522
+ "normalized": false
 
523
  },
524
  {
525
  "id": 32053,
526
+ "special": true,
527
  "content": "<extra_id_46>",
528
  "single_word": false,
529
  "lstrip": false,
530
  "rstrip": false,
531
+ "normalized": false
 
532
  },
533
  {
534
  "id": 32054,
535
+ "special": true,
536
  "content": "<extra_id_45>",
537
  "single_word": false,
538
  "lstrip": false,
539
  "rstrip": false,
540
+ "normalized": false
 
541
  },
542
  {
543
  "id": 32055,
544
+ "special": true,
545
  "content": "<extra_id_44>",
546
  "single_word": false,
547
  "lstrip": false,
548
  "rstrip": false,
549
+ "normalized": false
 
550
  },
551
  {
552
  "id": 32056,
553
+ "special": true,
554
  "content": "<extra_id_43>",
555
  "single_word": false,
556
  "lstrip": false,
557
  "rstrip": false,
558
+ "normalized": false
 
559
  },
560
  {
561
  "id": 32057,
562
+ "special": true,
563
  "content": "<extra_id_42>",
564
  "single_word": false,
565
  "lstrip": false,
566
  "rstrip": false,
567
+ "normalized": false
 
568
  },
569
  {
570
  "id": 32058,
571
+ "special": true,
572
  "content": "<extra_id_41>",
573
  "single_word": false,
574
  "lstrip": false,
575
  "rstrip": false,
576
+ "normalized": false
 
577
  },
578
  {
579
  "id": 32059,
580
+ "special": true,
581
  "content": "<extra_id_40>",
582
  "single_word": false,
583
  "lstrip": false,
584
  "rstrip": false,
585
+ "normalized": false
 
586
  },
587
  {
588
  "id": 32060,
589
+ "special": true,
590
  "content": "<extra_id_39>",
591
  "single_word": false,
592
  "lstrip": false,
593
  "rstrip": false,
594
+ "normalized": false
 
595
  },
596
  {
597
  "id": 32061,
598
+ "special": true,
599
  "content": "<extra_id_38>",
600
  "single_word": false,
601
  "lstrip": false,
602
  "rstrip": false,
603
+ "normalized": false
 
604
  },
605
  {
606
  "id": 32062,
607
+ "special": true,
608
  "content": "<extra_id_37>",
609
  "single_word": false,
610
  "lstrip": false,
611
  "rstrip": false,
612
+ "normalized": false
 
613
  },
614
  {
615
  "id": 32063,
616
+ "special": true,
617
  "content": "<extra_id_36>",
618
  "single_word": false,
619
  "lstrip": false,
620
  "rstrip": false,
621
+ "normalized": false
 
622
  },
623
  {
624
  "id": 32064,
625
+ "special": true,
626
  "content": "<extra_id_35>",
627
  "single_word": false,
628
  "lstrip": false,
629
  "rstrip": false,
630
+ "normalized": false
 
631
  },
632
  {
633
  "id": 32065,
634
+ "special": true,
635
  "content": "<extra_id_34>",
636
  "single_word": false,
637
  "lstrip": false,
638
  "rstrip": false,
639
+ "normalized": false
 
640
  },
641
  {
642
  "id": 32066,
643
+ "special": true,
644
  "content": "<extra_id_33>",
645
  "single_word": false,
646
  "lstrip": false,
647
  "rstrip": false,
648
+ "normalized": false
 
649
  },
650
  {
651
  "id": 32067,
652
+ "special": true,
653
  "content": "<extra_id_32>",
654
  "single_word": false,
655
  "lstrip": false,
656
  "rstrip": false,
657
+ "normalized": false
 
658
  },
659
  {
660
  "id": 32068,
661
+ "special": true,
662
  "content": "<extra_id_31>",
663
  "single_word": false,
664
  "lstrip": false,
665
  "rstrip": false,
666
+ "normalized": false
 
667
  },
668
  {
669
  "id": 32069,
670
+ "special": true,
671
  "content": "<extra_id_30>",
672
  "single_word": false,
673
  "lstrip": false,
674
  "rstrip": false,
675
+ "normalized": false
 
676
  },
677
  {
678
  "id": 32070,
679
+ "special": true,
680
  "content": "<extra_id_29>",
681
  "single_word": false,
682
  "lstrip": false,
683
  "rstrip": false,
684
+ "normalized": false
 
685
  },
686
  {
687
  "id": 32071,
688
+ "special": true,
689
  "content": "<extra_id_28>",
690
  "single_word": false,
691
  "lstrip": false,
692
  "rstrip": false,
693
+ "normalized": false
 
694
  },
695
  {
696
  "id": 32072,
697
+ "special": true,
698
  "content": "<extra_id_27>",
699
  "single_word": false,
700
  "lstrip": false,
701
  "rstrip": false,
702
+ "normalized": false
 
703
  },
704
  {
705
  "id": 32073,
706
+ "special": true,
707
  "content": "<extra_id_26>",
708
  "single_word": false,
709
  "lstrip": false,
710
  "rstrip": false,
711
+ "normalized": false
 
712
  },
713
  {
714
  "id": 32074,
715
+ "special": true,
716
  "content": "<extra_id_25>",
717
  "single_word": false,
718
  "lstrip": false,
719
  "rstrip": false,
720
+ "normalized": false
 
721
  },
722
  {
723
  "id": 32075,
724
+ "special": true,
725
  "content": "<extra_id_24>",
726
  "single_word": false,
727
  "lstrip": false,
728
  "rstrip": false,
729
+ "normalized": false
 
730
  },
731
  {
732
  "id": 32076,
733
+ "special": true,
734
  "content": "<extra_id_23>",
735
  "single_word": false,
736
  "lstrip": false,
737
  "rstrip": false,
738
+ "normalized": false
 
739
  },
740
  {
741
  "id": 32077,
742
+ "special": true,
743
  "content": "<extra_id_22>",
744
  "single_word": false,
745
  "lstrip": false,
746
  "rstrip": false,
747
+ "normalized": false
 
748
  },
749
  {
750
  "id": 32078,
751
+ "special": true,
752
  "content": "<extra_id_21>",
753
  "single_word": false,
754
  "lstrip": false,
755
  "rstrip": false,
756
+ "normalized": false
 
757
  },
758
  {
759
  "id": 32079,
760
+ "special": true,
761
  "content": "<extra_id_20>",
762
  "single_word": false,
763
  "lstrip": false,
764
  "rstrip": false,
765
+ "normalized": false
 
766
  },
767
  {
768
  "id": 32080,
769
+ "special": true,
770
  "content": "<extra_id_19>",
771
  "single_word": false,
772
  "lstrip": false,
773
  "rstrip": false,
774
+ "normalized": false
 
775
  },
776
  {
777
  "id": 32081,
778
+ "special": true,
779
  "content": "<extra_id_18>",
780
  "single_word": false,
781
  "lstrip": false,
782
  "rstrip": false,
783
+ "normalized": false
 
784
  },
785
  {
786
  "id": 32082,
787
+ "special": true,
788
  "content": "<extra_id_17>",
789
  "single_word": false,
790
  "lstrip": false,
791
  "rstrip": false,
792
+ "normalized": false
 
793
  },
794
  {
795
  "id": 32083,
796
+ "special": true,
797
  "content": "<extra_id_16>",
798
  "single_word": false,
799
  "lstrip": false,
800
  "rstrip": false,
801
+ "normalized": false
 
802
  },
803
  {
804
  "id": 32084,
805
+ "special": true,
806
  "content": "<extra_id_15>",
807
  "single_word": false,
808
  "lstrip": false,
809
  "rstrip": false,
810
+ "normalized": false
 
811
  },
812
  {
813
  "id": 32085,
814
+ "special": true,
815
  "content": "<extra_id_14>",
816
  "single_word": false,
817
  "lstrip": false,
818
  "rstrip": false,
819
+ "normalized": false
 
820
  },
821
  {
822
  "id": 32086,
823
+ "special": true,
824
  "content": "<extra_id_13>",
825
  "single_word": false,
826
  "lstrip": false,
827
  "rstrip": false,
828
+ "normalized": false
 
829
  },
830
  {
831
  "id": 32087,
832
+ "special": true,
833
  "content": "<extra_id_12>",
834
  "single_word": false,
835
  "lstrip": false,
836
  "rstrip": false,
837
+ "normalized": false
 
838
  },
839
  {
840
  "id": 32088,
841
+ "special": true,
842
  "content": "<extra_id_11>",
843
  "single_word": false,
844
  "lstrip": false,
845
  "rstrip": false,
846
+ "normalized": false
 
847
  },
848
  {
849
  "id": 32089,
850
+ "special": true,
851
  "content": "<extra_id_10>",
852
  "single_word": false,
853
  "lstrip": false,
854
  "rstrip": false,
855
+ "normalized": false
 
856
  },
857
  {
858
  "id": 32090,
859
+ "special": true,
860
  "content": "<extra_id_9>",
861
  "single_word": false,
862
  "lstrip": false,
863
  "rstrip": false,
864
+ "normalized": false
 
865
  },
866
  {
867
  "id": 32091,
868
+ "special": true,
869
  "content": "<extra_id_8>",
870
  "single_word": false,
871
  "lstrip": false,
872
  "rstrip": false,
873
+ "normalized": false
 
874
  },
875
  {
876
  "id": 32092,
877
+ "special": true,
878
  "content": "<extra_id_7>",
879
  "single_word": false,
880
  "lstrip": false,
881
  "rstrip": false,
882
+ "normalized": false
 
883
  },
884
  {
885
  "id": 32093,
886
+ "special": true,
887
  "content": "<extra_id_6>",
888
  "single_word": false,
889
  "lstrip": false,
890
  "rstrip": false,
891
+ "normalized": false
 
892
  },
893
  {
894
  "id": 32094,
895
+ "special": true,
896
  "content": "<extra_id_5>",
897
  "single_word": false,
898
  "lstrip": false,
899
  "rstrip": false,
900
+ "normalized": false
 
901
  },
902
  {
903
  "id": 32095,
904
+ "special": true,
905
  "content": "<extra_id_4>",
906
  "single_word": false,
907
  "lstrip": false,
908
  "rstrip": false,
909
+ "normalized": false
 
910
  },
911
  {
912
  "id": 32096,
913
+ "special": true,
914
  "content": "<extra_id_3>",
915
  "single_word": false,
916
  "lstrip": false,
917
  "rstrip": false,
918
+ "normalized": false
 
919
  },
920
  {
921
  "id": 32097,
922
+ "special": true,
923
  "content": "<extra_id_2>",
924
  "single_word": false,
925
  "lstrip": false,
926
  "rstrip": false,
927
+ "normalized": false
 
928
  },
929
  {
930
  "id": 32098,
931
+ "special": true,
932
  "content": "<extra_id_1>",
933
  "single_word": false,
934
  "lstrip": false,
935
  "rstrip": false,
936
+ "normalized": false
 
937
  },
938
  {
939
  "id": 32099,
940
+ "special": true,
941
  "content": "<extra_id_0>",
942
  "single_word": false,
943
  "lstrip": false,
944
  "rstrip": false,
945
+ "normalized": false
 
946
  }
947
  ],
948
  "normalizer": {
tokenizer_config.json CHANGED
@@ -101,12 +101,13 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "clean_up_tokenization_spaces": true,
105
  "eos_token": "</s>",
106
  "extra_ids": 100,
107
  "model_max_length": 512,
 
108
  "pad_token": "<pad>",
109
  "sp_model_kwargs": {},
 
110
  "tokenizer_class": "T5Tokenizer",
111
  "unk_token": "<unk>"
112
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
 
104
  "eos_token": "</s>",
105
  "extra_ids": 100,
106
  "model_max_length": 512,
107
+ "name_or_path": "google/flan-t5-base",
108
  "pad_token": "<pad>",
109
  "sp_model_kwargs": {},
110
+ "special_tokens_map_file": "/home/younes_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-base/snapshots/650d7745bf1e502d6949b22cc19155cd656d3d4e/special_tokens_map.json",
111
  "tokenizer_class": "T5Tokenizer",
112
  "unk_token": "<unk>"
113
  }
trainer_state.json CHANGED
@@ -1,180 +1,770 @@
1
  {
2
- "best_metric": 1.3581866025924683,
3
- "best_model_checkpoint": "flan-t5-small-prompt/checkpoint-11702",
4
- "epoch": 2.0,
5
- "global_step": 11702,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.09,
12
- "learning_rate": 4.91454452230388e-05,
13
- "loss": 2.2473,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.17,
18
- "learning_rate": 4.82908904460776e-05,
19
- "loss": 1.9761,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 0.26,
24
- "learning_rate": 4.7436335669116395e-05,
25
- "loss": 1.8802,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 0.34,
30
- "learning_rate": 4.658178089215519e-05,
31
- "loss": 1.8477,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 0.43,
36
- "learning_rate": 4.572722611519399e-05,
37
- "loss": 1.8029,
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 0.51,
42
- "learning_rate": 4.487267133823278e-05,
43
- "loss": 1.7633,
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 0.6,
48
- "learning_rate": 4.401811656127158e-05,
49
- "loss": 1.7704,
50
  "step": 3500
51
  },
52
  {
53
- "epoch": 0.68,
54
- "learning_rate": 4.3163561784310376e-05,
55
- "loss": 1.7068,
56
  "step": 4000
57
  },
58
  {
59
- "epoch": 0.77,
60
- "learning_rate": 4.230900700734917e-05,
61
- "loss": 1.732,
62
  "step": 4500
63
  },
64
  {
65
- "epoch": 0.85,
66
- "learning_rate": 4.145445223038797e-05,
67
- "loss": 1.7168,
68
  "step": 5000
69
  },
70
  {
71
- "epoch": 0.94,
72
- "learning_rate": 4.059989745342677e-05,
73
- "loss": 1.6812,
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  "step": 5500
75
  },
76
  {
77
- "epoch": 1.0,
78
- "eval_gen_len": 14.935384615384615,
79
- "eval_loss": 1.4240827560424805,
80
- "eval_rouge1": 52.0316,
81
- "eval_rouge2": 33.7896,
82
- "eval_rougeL": 49.2826,
83
- "eval_rougeLsum": 49.8238,
84
- "eval_runtime": 375.6869,
85
- "eval_samples_per_second": 13.841,
86
- "eval_steps_per_second": 1.73,
87
- "step": 5851
88
- },
89
- {
90
- "epoch": 1.03,
91
- "learning_rate": 3.9745342676465566e-05,
92
- "loss": 1.6711,
93
  "step": 6000
94
  },
95
  {
96
- "epoch": 1.11,
97
- "learning_rate": 3.889078789950436e-05,
98
- "loss": 1.651,
99
  "step": 6500
100
  },
101
  {
102
- "epoch": 1.2,
103
- "learning_rate": 3.803623312254316e-05,
104
- "loss": 1.6327,
105
  "step": 7000
106
  },
107
  {
108
- "epoch": 1.28,
109
- "learning_rate": 3.718167834558196e-05,
110
- "loss": 1.6171,
111
  "step": 7500
112
  },
113
  {
114
- "epoch": 1.37,
115
- "learning_rate": 3.6327123568620756e-05,
116
- "loss": 1.6142,
117
  "step": 8000
118
  },
119
  {
120
- "epoch": 1.45,
121
- "learning_rate": 3.5472568791659546e-05,
122
- "loss": 1.5843,
123
  "step": 8500
124
  },
125
  {
126
- "epoch": 1.54,
127
- "learning_rate": 3.4618014014698344e-05,
128
- "loss": 1.6075,
129
  "step": 9000
130
  },
131
  {
132
- "epoch": 1.62,
133
- "learning_rate": 3.376345923773714e-05,
134
- "loss": 1.5917,
135
  "step": 9500
136
  },
137
  {
138
- "epoch": 1.71,
139
- "learning_rate": 3.290890446077594e-05,
140
- "loss": 1.5797,
141
  "step": 10000
142
  },
143
  {
144
- "epoch": 1.79,
145
- "learning_rate": 3.205434968381473e-05,
146
- "loss": 1.6037,
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  "step": 10500
148
  },
149
  {
150
- "epoch": 1.88,
151
- "learning_rate": 3.119979490685353e-05,
152
- "loss": 1.5926,
153
  "step": 11000
154
  },
155
  {
156
- "epoch": 1.97,
157
- "learning_rate": 3.0345240129892328e-05,
158
- "loss": 1.5684,
159
  "step": 11500
160
  },
161
  {
162
- "epoch": 2.0,
163
- "eval_gen_len": 14.967115384615385,
164
- "eval_loss": 1.3581866025924683,
165
- "eval_rouge1": 53.5315,
166
- "eval_rouge2": 35.1883,
167
- "eval_rougeL": 50.7765,
168
- "eval_rougeLsum": 51.3334,
169
- "eval_runtime": 392.0061,
170
- "eval_samples_per_second": 13.265,
171
- "eval_steps_per_second": 1.658,
172
- "step": 11702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  }
174
  ],
175
- "max_steps": 29255,
176
- "num_train_epochs": 5,
177
- "total_flos": 1.7400094802313216e+16,
178
  "trial_name": null,
179
  "trial_params": null
180
  }
 
1
  {
2
+ "best_metric": 1.341736078262329,
3
+ "best_model_checkpoint": "./dual/flan-t5-base-dual/checkpoint-52010",
4
+ "epoch": 10.0,
5
+ "global_step": 52010,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.1,
12
+ "learning_rate": 4.951932320707556e-05,
13
+ "loss": 2.0525,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.19,
18
+ "learning_rate": 4.903864641415113e-05,
19
+ "loss": 1.9359,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 0.29,
24
+ "learning_rate": 4.855796962122669e-05,
25
+ "loss": 1.892,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 0.38,
30
+ "learning_rate": 4.807729282830225e-05,
31
+ "loss": 1.8455,
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 0.48,
36
+ "learning_rate": 4.7596616035377816e-05,
37
+ "loss": 1.8273,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 0.58,
42
+ "learning_rate": 4.7115939242453377e-05,
43
+ "loss": 1.8317,
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 0.67,
48
+ "learning_rate": 4.663526244952894e-05,
49
+ "loss": 1.8103,
50
  "step": 3500
51
  },
52
  {
53
+ "epoch": 0.77,
54
+ "learning_rate": 4.61545856566045e-05,
55
+ "loss": 1.8032,
56
  "step": 4000
57
  },
58
  {
59
+ "epoch": 0.87,
60
+ "learning_rate": 4.5673908863680064e-05,
61
+ "loss": 1.7842,
62
  "step": 4500
63
  },
64
  {
65
+ "epoch": 0.96,
66
+ "learning_rate": 4.5193232070755624e-05,
67
+ "loss": 1.7674,
68
  "step": 5000
69
  },
70
  {
71
+ "epoch": 1.0,
72
+ "eval_gen_len": 16.8,
73
+ "eval_loss": 1.4587862491607666,
74
+ "eval_rouge1": 43.5356,
75
+ "eval_rouge2": 25.8338,
76
+ "eval_rougeL": 41.1764,
77
+ "eval_rougeLsum": 41.816,
78
+ "eval_runtime": 629.4032,
79
+ "eval_samples_per_second": 16.524,
80
+ "eval_steps_per_second": 1.033,
81
+ "step": 5201
82
+ },
83
+ {
84
+ "epoch": 1.06,
85
+ "learning_rate": 4.4712555277831184e-05,
86
+ "loss": 1.7405,
87
  "step": 5500
88
  },
89
  {
90
+ "epoch": 1.15,
91
+ "learning_rate": 4.423187848490675e-05,
92
+ "loss": 1.7135,
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  "step": 6000
94
  },
95
  {
96
+ "epoch": 1.25,
97
+ "learning_rate": 4.375120169198231e-05,
98
+ "loss": 1.7161,
99
  "step": 6500
100
  },
101
  {
102
+ "epoch": 1.35,
103
+ "learning_rate": 4.327052489905787e-05,
104
+ "loss": 1.7208,
105
  "step": 7000
106
  },
107
  {
108
+ "epoch": 1.44,
109
+ "learning_rate": 4.278984810613344e-05,
110
+ "loss": 1.6999,
111
  "step": 7500
112
  },
113
  {
114
+ "epoch": 1.54,
115
+ "learning_rate": 4.2309171313209e-05,
116
+ "loss": 1.694,
117
  "step": 8000
118
  },
119
  {
120
+ "epoch": 1.63,
121
+ "learning_rate": 4.182849452028456e-05,
122
+ "loss": 1.6837,
123
  "step": 8500
124
  },
125
  {
126
+ "epoch": 1.73,
127
+ "learning_rate": 4.1347817727360125e-05,
128
+ "loss": 1.6892,
129
  "step": 9000
130
  },
131
  {
132
+ "epoch": 1.83,
133
+ "learning_rate": 4.0867140934435685e-05,
134
+ "loss": 1.6811,
135
  "step": 9500
136
  },
137
  {
138
+ "epoch": 1.92,
139
+ "learning_rate": 4.038646414151125e-05,
140
+ "loss": 1.7004,
141
  "step": 10000
142
  },
143
  {
144
+ "epoch": 2.0,
145
+ "eval_gen_len": 16.87192307692308,
146
+ "eval_loss": 1.4108598232269287,
147
+ "eval_rouge1": 44.1953,
148
+ "eval_rouge2": 26.6443,
149
+ "eval_rougeL": 41.7387,
150
+ "eval_rougeLsum": 42.3745,
151
+ "eval_runtime": 674.3077,
152
+ "eval_samples_per_second": 15.423,
153
+ "eval_steps_per_second": 0.964,
154
+ "step": 10402
155
+ },
156
+ {
157
+ "epoch": 2.02,
158
+ "learning_rate": 3.990578734858681e-05,
159
+ "loss": 1.6572,
160
  "step": 10500
161
  },
162
  {
163
+ "epoch": 2.11,
164
+ "learning_rate": 3.942511055566238e-05,
165
+ "loss": 1.6449,
166
  "step": 11000
167
  },
168
  {
169
+ "epoch": 2.21,
170
+ "learning_rate": 3.894443376273794e-05,
171
+ "loss": 1.6235,
172
  "step": 11500
173
  },
174
  {
175
+ "epoch": 2.31,
176
+ "learning_rate": 3.84637569698135e-05,
177
+ "loss": 1.6573,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 2.4,
182
+ "learning_rate": 3.7983080176889066e-05,
183
+ "loss": 1.6262,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 2.5,
188
+ "learning_rate": 3.7502403383964626e-05,
189
+ "loss": 1.6377,
190
+ "step": 13000
191
+ },
192
+ {
193
+ "epoch": 2.6,
194
+ "learning_rate": 3.7021726591040186e-05,
195
+ "loss": 1.6407,
196
+ "step": 13500
197
+ },
198
+ {
199
+ "epoch": 2.69,
200
+ "learning_rate": 3.654104979811575e-05,
201
+ "loss": 1.6343,
202
+ "step": 14000
203
+ },
204
+ {
205
+ "epoch": 2.79,
206
+ "learning_rate": 3.606037300519131e-05,
207
+ "loss": 1.6221,
208
+ "step": 14500
209
+ },
210
+ {
211
+ "epoch": 2.88,
212
+ "learning_rate": 3.557969621226687e-05,
213
+ "loss": 1.6127,
214
+ "step": 15000
215
+ },
216
+ {
217
+ "epoch": 2.98,
218
+ "learning_rate": 3.509901941934244e-05,
219
+ "loss": 1.622,
220
+ "step": 15500
221
+ },
222
+ {
223
+ "epoch": 3.0,
224
+ "eval_gen_len": 16.911923076923078,
225
+ "eval_loss": 1.387160301208496,
226
+ "eval_rouge1": 44.6617,
227
+ "eval_rouge2": 27.2456,
228
+ "eval_rougeL": 42.2185,
229
+ "eval_rougeLsum": 42.8309,
230
+ "eval_runtime": 678.3822,
231
+ "eval_samples_per_second": 15.331,
232
+ "eval_steps_per_second": 0.958,
233
+ "step": 15603
234
+ },
235
+ {
236
+ "epoch": 3.08,
237
+ "learning_rate": 3.4618342626418e-05,
238
+ "loss": 1.5886,
239
+ "step": 16000
240
+ },
241
+ {
242
+ "epoch": 3.17,
243
+ "learning_rate": 3.413766583349356e-05,
244
+ "loss": 1.5913,
245
+ "step": 16500
246
+ },
247
+ {
248
+ "epoch": 3.27,
249
+ "learning_rate": 3.365698904056912e-05,
250
+ "loss": 1.5693,
251
+ "step": 17000
252
+ },
253
+ {
254
+ "epoch": 3.36,
255
+ "learning_rate": 3.317631224764469e-05,
256
+ "loss": 1.5781,
257
+ "step": 17500
258
+ },
259
+ {
260
+ "epoch": 3.46,
261
+ "learning_rate": 3.269563545472025e-05,
262
+ "loss": 1.5944,
263
+ "step": 18000
264
+ },
265
+ {
266
+ "epoch": 3.56,
267
+ "learning_rate": 3.221495866179581e-05,
268
+ "loss": 1.5671,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 3.65,
273
+ "learning_rate": 3.1734281868871374e-05,
274
+ "loss": 1.5979,
275
+ "step": 19000
276
+ },
277
+ {
278
+ "epoch": 3.75,
279
+ "learning_rate": 3.1253605075946935e-05,
280
+ "loss": 1.6014,
281
+ "step": 19500
282
+ },
283
+ {
284
+ "epoch": 3.85,
285
+ "learning_rate": 3.0772928283022495e-05,
286
+ "loss": 1.5929,
287
+ "step": 20000
288
+ },
289
+ {
290
+ "epoch": 3.94,
291
+ "learning_rate": 3.029225149009806e-05,
292
+ "loss": 1.5822,
293
+ "step": 20500
294
+ },
295
+ {
296
+ "epoch": 4.0,
297
+ "eval_gen_len": 16.976153846153846,
298
+ "eval_loss": 1.3675929307937622,
299
+ "eval_rouge1": 44.7885,
300
+ "eval_rouge2": 27.4914,
301
+ "eval_rougeL": 42.3527,
302
+ "eval_rougeLsum": 42.9959,
303
+ "eval_runtime": 672.7849,
304
+ "eval_samples_per_second": 15.458,
305
+ "eval_steps_per_second": 0.966,
306
+ "step": 20804
307
+ },
308
+ {
309
+ "epoch": 4.04,
310
+ "learning_rate": 2.981157469717362e-05,
311
+ "loss": 1.5658,
312
+ "step": 21000
313
+ },
314
+ {
315
+ "epoch": 4.13,
316
+ "learning_rate": 2.9330897904249182e-05,
317
+ "loss": 1.5656,
318
+ "step": 21500
319
+ },
320
+ {
321
+ "epoch": 4.23,
322
+ "learning_rate": 2.885022111132475e-05,
323
+ "loss": 1.5643,
324
+ "step": 22000
325
+ },
326
+ {
327
+ "epoch": 4.33,
328
+ "learning_rate": 2.836954431840031e-05,
329
+ "loss": 1.5484,
330
+ "step": 22500
331
+ },
332
+ {
333
+ "epoch": 4.42,
334
+ "learning_rate": 2.788886752547587e-05,
335
+ "loss": 1.5504,
336
+ "step": 23000
337
+ },
338
+ {
339
+ "epoch": 4.52,
340
+ "learning_rate": 2.7408190732551436e-05,
341
+ "loss": 1.5546,
342
+ "step": 23500
343
+ },
344
+ {
345
+ "epoch": 4.61,
346
+ "learning_rate": 2.6927513939626996e-05,
347
+ "loss": 1.556,
348
+ "step": 24000
349
+ },
350
+ {
351
+ "epoch": 4.71,
352
+ "learning_rate": 2.644683714670256e-05,
353
+ "loss": 1.5448,
354
+ "step": 24500
355
+ },
356
+ {
357
+ "epoch": 4.81,
358
+ "learning_rate": 2.596616035377812e-05,
359
+ "loss": 1.5519,
360
+ "step": 25000
361
+ },
362
+ {
363
+ "epoch": 4.9,
364
+ "learning_rate": 2.5485483560853686e-05,
365
+ "loss": 1.5606,
366
+ "step": 25500
367
+ },
368
+ {
369
+ "epoch": 5.0,
370
+ "learning_rate": 2.5004806767929246e-05,
371
+ "loss": 1.5541,
372
+ "step": 26000
373
+ },
374
+ {
375
+ "epoch": 5.0,
376
+ "eval_gen_len": 17.028846153846153,
377
+ "eval_loss": 1.3574897050857544,
378
+ "eval_rouge1": 44.7589,
379
+ "eval_rouge2": 27.4697,
380
+ "eval_rougeL": 42.3549,
381
+ "eval_rougeLsum": 42.9704,
382
+ "eval_runtime": 665.8005,
383
+ "eval_samples_per_second": 15.62,
384
+ "eval_steps_per_second": 0.976,
385
+ "step": 26005
386
+ },
387
+ {
388
+ "epoch": 5.1,
389
+ "learning_rate": 2.452412997500481e-05,
390
+ "loss": 1.5154,
391
+ "step": 26500
392
+ },
393
+ {
394
+ "epoch": 5.19,
395
+ "learning_rate": 2.404345318208037e-05,
396
+ "loss": 1.5163,
397
+ "step": 27000
398
+ },
399
+ {
400
+ "epoch": 5.29,
401
+ "learning_rate": 2.3562776389155933e-05,
402
+ "loss": 1.5176,
403
+ "step": 27500
404
+ },
405
+ {
406
+ "epoch": 5.38,
407
+ "learning_rate": 2.3082099596231497e-05,
408
+ "loss": 1.5293,
409
+ "step": 28000
410
+ },
411
+ {
412
+ "epoch": 5.48,
413
+ "learning_rate": 2.2601422803307057e-05,
414
+ "loss": 1.5237,
415
+ "step": 28500
416
+ },
417
+ {
418
+ "epoch": 5.58,
419
+ "learning_rate": 2.212074601038262e-05,
420
+ "loss": 1.5422,
421
+ "step": 29000
422
+ },
423
+ {
424
+ "epoch": 5.67,
425
+ "learning_rate": 2.164006921745818e-05,
426
+ "loss": 1.5309,
427
+ "step": 29500
428
+ },
429
+ {
430
+ "epoch": 5.77,
431
+ "learning_rate": 2.1159392424533744e-05,
432
+ "loss": 1.5296,
433
+ "step": 30000
434
+ },
435
+ {
436
+ "epoch": 5.86,
437
+ "learning_rate": 2.0678715631609308e-05,
438
+ "loss": 1.5137,
439
+ "step": 30500
440
+ },
441
+ {
442
+ "epoch": 5.96,
443
+ "learning_rate": 2.0198038838684868e-05,
444
+ "loss": 1.5116,
445
+ "step": 31000
446
+ },
447
+ {
448
+ "epoch": 6.0,
449
+ "eval_gen_len": 16.991923076923076,
450
+ "eval_loss": 1.3511042594909668,
451
+ "eval_rouge1": 45.0017,
452
+ "eval_rouge2": 27.6906,
453
+ "eval_rougeL": 42.5834,
454
+ "eval_rougeLsum": 43.2073,
455
+ "eval_runtime": 658.0426,
456
+ "eval_samples_per_second": 15.804,
457
+ "eval_steps_per_second": 0.988,
458
+ "step": 31206
459
+ },
460
+ {
461
+ "epoch": 6.06,
462
+ "learning_rate": 1.971736204576043e-05,
463
+ "loss": 1.5018,
464
+ "step": 31500
465
+ },
466
+ {
467
+ "epoch": 6.15,
468
+ "learning_rate": 1.9236685252835995e-05,
469
+ "loss": 1.5037,
470
+ "step": 32000
471
+ },
472
+ {
473
+ "epoch": 6.25,
474
+ "learning_rate": 1.8756008459911555e-05,
475
+ "loss": 1.4991,
476
+ "step": 32500
477
+ },
478
+ {
479
+ "epoch": 6.34,
480
+ "learning_rate": 1.8275331666987118e-05,
481
+ "loss": 1.4977,
482
+ "step": 33000
483
+ },
484
+ {
485
+ "epoch": 6.44,
486
+ "learning_rate": 1.779465487406268e-05,
487
+ "loss": 1.5024,
488
+ "step": 33500
489
+ },
490
+ {
491
+ "epoch": 6.54,
492
+ "learning_rate": 1.7313978081138242e-05,
493
+ "loss": 1.5043,
494
+ "step": 34000
495
+ },
496
+ {
497
+ "epoch": 6.63,
498
+ "learning_rate": 1.6833301288213805e-05,
499
+ "loss": 1.506,
500
+ "step": 34500
501
+ },
502
+ {
503
+ "epoch": 6.73,
504
+ "learning_rate": 1.635262449528937e-05,
505
+ "loss": 1.497,
506
+ "step": 35000
507
+ },
508
+ {
509
+ "epoch": 6.83,
510
+ "learning_rate": 1.5871947702364932e-05,
511
+ "loss": 1.5132,
512
+ "step": 35500
513
+ },
514
+ {
515
+ "epoch": 6.92,
516
+ "learning_rate": 1.5391270909440492e-05,
517
+ "loss": 1.5079,
518
+ "step": 36000
519
+ },
520
+ {
521
+ "epoch": 7.0,
522
+ "eval_gen_len": 16.982019230769232,
523
+ "eval_loss": 1.347075343132019,
524
+ "eval_rouge1": 44.9759,
525
+ "eval_rouge2": 27.7179,
526
+ "eval_rougeL": 42.5719,
527
+ "eval_rougeLsum": 43.1803,
528
+ "eval_runtime": 667.8543,
529
+ "eval_samples_per_second": 15.572,
530
+ "eval_steps_per_second": 0.973,
531
+ "step": 36407
532
+ },
533
+ {
534
+ "epoch": 7.02,
535
+ "learning_rate": 1.4910594116516054e-05,
536
+ "loss": 1.5017,
537
+ "step": 36500
538
+ },
539
+ {
540
+ "epoch": 7.11,
541
+ "learning_rate": 1.4429917323591618e-05,
542
+ "loss": 1.4946,
543
+ "step": 37000
544
+ },
545
+ {
546
+ "epoch": 7.21,
547
+ "learning_rate": 1.394924053066718e-05,
548
+ "loss": 1.4941,
549
+ "step": 37500
550
+ },
551
+ {
552
+ "epoch": 7.31,
553
+ "learning_rate": 1.3468563737742743e-05,
554
+ "loss": 1.5029,
555
+ "step": 38000
556
+ },
557
+ {
558
+ "epoch": 7.4,
559
+ "learning_rate": 1.2987886944818307e-05,
560
+ "loss": 1.4855,
561
+ "step": 38500
562
+ },
563
+ {
564
+ "epoch": 7.5,
565
+ "learning_rate": 1.2507210151893867e-05,
566
+ "loss": 1.4726,
567
+ "step": 39000
568
+ },
569
+ {
570
+ "epoch": 7.59,
571
+ "learning_rate": 1.202653335896943e-05,
572
+ "loss": 1.4687,
573
+ "step": 39500
574
+ },
575
+ {
576
+ "epoch": 7.69,
577
+ "learning_rate": 1.1545856566044992e-05,
578
+ "loss": 1.4915,
579
+ "step": 40000
580
+ },
581
+ {
582
+ "epoch": 7.79,
583
+ "learning_rate": 1.1065179773120554e-05,
584
+ "loss": 1.4793,
585
+ "step": 40500
586
+ },
587
+ {
588
+ "epoch": 7.88,
589
+ "learning_rate": 1.0584502980196116e-05,
590
+ "loss": 1.4818,
591
+ "step": 41000
592
+ },
593
+ {
594
+ "epoch": 7.98,
595
+ "learning_rate": 1.0103826187271679e-05,
596
+ "loss": 1.4771,
597
+ "step": 41500
598
+ },
599
+ {
600
+ "epoch": 8.0,
601
+ "eval_gen_len": 16.986923076923077,
602
+ "eval_loss": 1.3443900346755981,
603
+ "eval_rouge1": 45.2057,
604
+ "eval_rouge2": 27.9779,
605
+ "eval_rougeL": 42.7648,
606
+ "eval_rougeLsum": 43.3885,
607
+ "eval_runtime": 559.194,
608
+ "eval_samples_per_second": 18.598,
609
+ "eval_steps_per_second": 1.162,
610
+ "step": 41608
611
+ },
612
+ {
613
+ "epoch": 8.08,
614
+ "learning_rate": 9.623149394347242e-06,
615
+ "loss": 1.4658,
616
+ "step": 42000
617
+ },
618
+ {
619
+ "epoch": 8.17,
620
+ "learning_rate": 9.142472601422804e-06,
621
+ "loss": 1.469,
622
+ "step": 42500
623
+ },
624
+ {
625
+ "epoch": 8.27,
626
+ "learning_rate": 8.661795808498366e-06,
627
+ "loss": 1.4966,
628
+ "step": 43000
629
+ },
630
+ {
631
+ "epoch": 8.36,
632
+ "learning_rate": 8.181119015573928e-06,
633
+ "loss": 1.4691,
634
+ "step": 43500
635
+ },
636
+ {
637
+ "epoch": 8.46,
638
+ "learning_rate": 7.700442222649491e-06,
639
+ "loss": 1.4767,
640
+ "step": 44000
641
+ },
642
+ {
643
+ "epoch": 8.56,
644
+ "learning_rate": 7.219765429725053e-06,
645
+ "loss": 1.4875,
646
+ "step": 44500
647
+ },
648
+ {
649
+ "epoch": 8.65,
650
+ "learning_rate": 6.739088636800615e-06,
651
+ "loss": 1.473,
652
+ "step": 45000
653
+ },
654
+ {
655
+ "epoch": 8.75,
656
+ "learning_rate": 6.258411843876178e-06,
657
+ "loss": 1.4826,
658
+ "step": 45500
659
+ },
660
+ {
661
+ "epoch": 8.84,
662
+ "learning_rate": 5.77773505095174e-06,
663
+ "loss": 1.4683,
664
+ "step": 46000
665
+ },
666
+ {
667
+ "epoch": 8.94,
668
+ "learning_rate": 5.297058258027303e-06,
669
+ "loss": 1.4691,
670
+ "step": 46500
671
+ },
672
+ {
673
+ "epoch": 9.0,
674
+ "eval_gen_len": 16.951634615384616,
675
+ "eval_loss": 1.3431836366653442,
676
+ "eval_rouge1": 45.197,
677
+ "eval_rouge2": 27.8923,
678
+ "eval_rougeL": 42.7387,
679
+ "eval_rougeLsum": 43.3577,
680
+ "eval_runtime": 562.3582,
681
+ "eval_samples_per_second": 18.494,
682
+ "eval_steps_per_second": 1.156,
683
+ "step": 46809
684
+ },
685
+ {
686
+ "epoch": 9.04,
687
+ "learning_rate": 4.816381465102865e-06,
688
+ "loss": 1.4663,
689
+ "step": 47000
690
+ },
691
+ {
692
+ "epoch": 9.13,
693
+ "learning_rate": 4.335704672178427e-06,
694
+ "loss": 1.4658,
695
+ "step": 47500
696
+ },
697
+ {
698
+ "epoch": 9.23,
699
+ "learning_rate": 3.85502787925399e-06,
700
+ "loss": 1.4679,
701
+ "step": 48000
702
+ },
703
+ {
704
+ "epoch": 9.33,
705
+ "learning_rate": 3.3743510863295526e-06,
706
+ "loss": 1.4573,
707
+ "step": 48500
708
+ },
709
+ {
710
+ "epoch": 9.42,
711
+ "learning_rate": 2.8936742934051144e-06,
712
+ "loss": 1.465,
713
+ "step": 49000
714
+ },
715
+ {
716
+ "epoch": 9.52,
717
+ "learning_rate": 2.412997500480677e-06,
718
+ "loss": 1.4582,
719
+ "step": 49500
720
+ },
721
+ {
722
+ "epoch": 9.61,
723
+ "learning_rate": 1.9323207075562393e-06,
724
+ "loss": 1.4736,
725
+ "step": 50000
726
+ },
727
+ {
728
+ "epoch": 9.71,
729
+ "learning_rate": 1.4516439146318017e-06,
730
+ "loss": 1.4768,
731
+ "step": 50500
732
+ },
733
+ {
734
+ "epoch": 9.81,
735
+ "learning_rate": 9.70967121707364e-07,
736
+ "loss": 1.4634,
737
+ "step": 51000
738
+ },
739
+ {
740
+ "epoch": 9.9,
741
+ "learning_rate": 4.902903287829264e-07,
742
+ "loss": 1.4832,
743
+ "step": 51500
744
+ },
745
+ {
746
+ "epoch": 10.0,
747
+ "learning_rate": 9.613535858488752e-09,
748
+ "loss": 1.4719,
749
+ "step": 52000
750
+ },
751
+ {
752
+ "epoch": 10.0,
753
+ "eval_gen_len": 16.988557692307694,
754
+ "eval_loss": 1.341736078262329,
755
+ "eval_rouge1": 45.2143,
756
+ "eval_rouge2": 27.9673,
757
+ "eval_rougeL": 42.7712,
758
+ "eval_rougeLsum": 43.3892,
759
+ "eval_runtime": 603.039,
760
+ "eval_samples_per_second": 17.246,
761
+ "eval_steps_per_second": 1.078,
762
+ "step": 52010
763
  }
764
  ],
765
+ "max_steps": 52010,
766
+ "num_train_epochs": 10,
767
+ "total_flos": 5.697455075308339e+17,
768
  "trial_name": null,
769
  "trial_params": null
770
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7257d6caf5b46b4cad8fdf3be2665f3c34e9f31a968dcd6c0daa21d82fff990
3
- size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0329604d8f92dab1fae265c51697fbf1309edc89ed828f2774dd8a76c9fa7dad
3
+ size 3567