Pratik Hublikar commited on
Commit
3e3e3ef
1 Parent(s): c424624

adding model files

Browse files
Files changed (40) hide show
  1. checkpoint-2000/config.json +60 -0
  2. checkpoint-2000/generation_config.json +6 -0
  3. checkpoint-2000/optimizer.pt +3 -0
  4. checkpoint-2000/pytorch_model.bin +3 -0
  5. checkpoint-2000/rng_state.pth +3 -0
  6. checkpoint-2000/scheduler.pt +3 -0
  7. checkpoint-2000/special_tokens_map.json +107 -0
  8. checkpoint-2000/tokenizer.json +0 -0
  9. checkpoint-2000/tokenizer_config.json +111 -0
  10. checkpoint-2000/trainer_state.json +456 -0
  11. checkpoint-2000/training_args.bin +3 -0
  12. checkpoint-2500/config.json +60 -0
  13. checkpoint-2500/generation_config.json +6 -0
  14. checkpoint-2500/optimizer.pt +3 -0
  15. checkpoint-2500/pytorch_model.bin +3 -0
  16. checkpoint-2500/rng_state.pth +3 -0
  17. checkpoint-2500/scheduler.pt +3 -0
  18. checkpoint-2500/special_tokens_map.json +107 -0
  19. checkpoint-2500/tokenizer.json +0 -0
  20. checkpoint-2500/tokenizer_config.json +111 -0
  21. checkpoint-2500/trainer_state.json +566 -0
  22. checkpoint-2500/training_args.bin +3 -0
  23. checkpoint-3000/config.json +60 -0
  24. checkpoint-3000/generation_config.json +6 -0
  25. checkpoint-3000/optimizer.pt +3 -0
  26. checkpoint-3000/pytorch_model.bin +3 -0
  27. checkpoint-3000/rng_state.pth +3 -0
  28. checkpoint-3000/scheduler.pt +3 -0
  29. checkpoint-3000/special_tokens_map.json +107 -0
  30. checkpoint-3000/tokenizer.json +0 -0
  31. checkpoint-3000/tokenizer_config.json +111 -0
  32. checkpoint-3000/trainer_state.json +676 -0
  33. checkpoint-3000/training_args.bin +3 -0
  34. config.json +60 -0
  35. pytorch_model.bin +3 -0
  36. runs/Jul21_05-05-37_1daa7b5ea0df/events.out.tfevents.1689915947.1daa7b5ea0df.1289.0 +3 -0
  37. special_tokens_map.json +107 -0
  38. tokenizer.json +0 -0
  39. tokenizer_config.json +111 -0
  40. training_args.bin +3 -0
checkpoint-2000/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 6,
21
+ "num_heads": 8,
22
+ "num_layers": 6,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.31.0",
58
+ "use_cache": true,
59
+ "vocab_size": 32128
60
+ }
checkpoint-2000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b6659ae6785d45ea62cc2157f11aa7e9b47d5895ca95974734f05db11738f9
3
+ size 484130629
checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa181bce5df913c93d5de63a3336845056ccbf84f43eaa0a73152744f304c0f8
3
+ size 242071641
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a76d466d1d6e8f84ffe1877990cf0c660ff14c19af676337c446d343f677946
3
+ size 14575
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f37edebfb5d5909131f4207209d60414a1e880e0ac752e5f8801b3a36181ba
3
+ size 627
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>"
111
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 32.25806451612903,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_gen_len": 19.0,
13
+ "eval_loss": 2.797335147857666,
14
+ "eval_rouge1": 0.1301,
15
+ "eval_rouge2": 0.0352,
16
+ "eval_rougeL": 0.1074,
17
+ "eval_rougeLsum": 0.1075,
18
+ "eval_runtime": 18.6868,
19
+ "eval_samples_per_second": 13.271,
20
+ "eval_steps_per_second": 0.856,
21
+ "step": 62
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_gen_len": 19.0,
26
+ "eval_loss": 2.568485975265503,
27
+ "eval_rouge1": 0.1455,
28
+ "eval_rouge2": 0.051,
29
+ "eval_rougeL": 0.1189,
30
+ "eval_rougeLsum": 0.1187,
31
+ "eval_runtime": 16.801,
32
+ "eval_samples_per_second": 14.761,
33
+ "eval_steps_per_second": 0.952,
34
+ "step": 124
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_gen_len": 19.0,
39
+ "eval_loss": 2.475400924682617,
40
+ "eval_rouge1": 0.1674,
41
+ "eval_rouge2": 0.0692,
42
+ "eval_rougeL": 0.1397,
43
+ "eval_rougeLsum": 0.1397,
44
+ "eval_runtime": 17.3933,
45
+ "eval_samples_per_second": 14.258,
46
+ "eval_steps_per_second": 0.92,
47
+ "step": 186
48
+ },
49
+ {
50
+ "epoch": 4.0,
51
+ "eval_gen_len": 19.0,
52
+ "eval_loss": 2.419504404067993,
53
+ "eval_rouge1": 0.1901,
54
+ "eval_rouge2": 0.0867,
55
+ "eval_rougeL": 0.1586,
56
+ "eval_rougeLsum": 0.1587,
57
+ "eval_runtime": 16.7871,
58
+ "eval_samples_per_second": 14.773,
59
+ "eval_steps_per_second": 0.953,
60
+ "step": 248
61
+ },
62
+ {
63
+ "epoch": 5.0,
64
+ "eval_gen_len": 19.0,
65
+ "eval_loss": 2.3755078315734863,
66
+ "eval_rouge1": 0.1933,
67
+ "eval_rouge2": 0.0907,
68
+ "eval_rougeL": 0.1617,
69
+ "eval_rougeLsum": 0.1619,
70
+ "eval_runtime": 16.6712,
71
+ "eval_samples_per_second": 14.876,
72
+ "eval_steps_per_second": 0.96,
73
+ "step": 310
74
+ },
75
+ {
76
+ "epoch": 6.0,
77
+ "eval_gen_len": 19.0,
78
+ "eval_loss": 2.3425652980804443,
79
+ "eval_rouge1": 0.1946,
80
+ "eval_rouge2": 0.0916,
81
+ "eval_rougeL": 0.1634,
82
+ "eval_rougeLsum": 0.1636,
83
+ "eval_runtime": 17.1282,
84
+ "eval_samples_per_second": 14.479,
85
+ "eval_steps_per_second": 0.934,
86
+ "step": 372
87
+ },
88
+ {
89
+ "epoch": 7.0,
90
+ "eval_gen_len": 19.0,
91
+ "eval_loss": 2.3197405338287354,
92
+ "eval_rouge1": 0.1964,
93
+ "eval_rouge2": 0.0929,
94
+ "eval_rougeL": 0.1646,
95
+ "eval_rougeLsum": 0.1648,
96
+ "eval_runtime": 16.7039,
97
+ "eval_samples_per_second": 14.847,
98
+ "eval_steps_per_second": 0.958,
99
+ "step": 434
100
+ },
101
+ {
102
+ "epoch": 8.0,
103
+ "eval_gen_len": 19.0,
104
+ "eval_loss": 2.2987782955169678,
105
+ "eval_rouge1": 0.1968,
106
+ "eval_rouge2": 0.0933,
107
+ "eval_rougeL": 0.165,
108
+ "eval_rougeLsum": 0.1653,
109
+ "eval_runtime": 16.6651,
110
+ "eval_samples_per_second": 14.881,
111
+ "eval_steps_per_second": 0.96,
112
+ "step": 496
113
+ },
114
+ {
115
+ "epoch": 8.06,
116
+ "learning_rate": 1.6780645161290323e-05,
117
+ "loss": 2.7011,
118
+ "step": 500
119
+ },
120
+ {
121
+ "epoch": 9.0,
122
+ "eval_gen_len": 19.0,
123
+ "eval_loss": 2.279834270477295,
124
+ "eval_rouge1": 0.1969,
125
+ "eval_rouge2": 0.0946,
126
+ "eval_rougeL": 0.1662,
127
+ "eval_rougeLsum": 0.1665,
128
+ "eval_runtime": 17.134,
129
+ "eval_samples_per_second": 14.474,
130
+ "eval_steps_per_second": 0.934,
131
+ "step": 558
132
+ },
133
+ {
134
+ "epoch": 10.0,
135
+ "eval_gen_len": 19.0,
136
+ "eval_loss": 2.265596389770508,
137
+ "eval_rouge1": 0.1987,
138
+ "eval_rouge2": 0.0962,
139
+ "eval_rougeL": 0.1672,
140
+ "eval_rougeLsum": 0.1673,
141
+ "eval_runtime": 17.1955,
142
+ "eval_samples_per_second": 14.422,
143
+ "eval_steps_per_second": 0.93,
144
+ "step": 620
145
+ },
146
+ {
147
+ "epoch": 11.0,
148
+ "eval_gen_len": 19.0,
149
+ "eval_loss": 2.2547566890716553,
150
+ "eval_rouge1": 0.1958,
151
+ "eval_rouge2": 0.0965,
152
+ "eval_rougeL": 0.1655,
153
+ "eval_rougeLsum": 0.1657,
154
+ "eval_runtime": 16.9264,
155
+ "eval_samples_per_second": 14.652,
156
+ "eval_steps_per_second": 0.945,
157
+ "step": 682
158
+ },
159
+ {
160
+ "epoch": 12.0,
161
+ "eval_gen_len": 19.0,
162
+ "eval_loss": 2.243624210357666,
163
+ "eval_rouge1": 0.1965,
164
+ "eval_rouge2": 0.096,
165
+ "eval_rougeL": 0.1659,
166
+ "eval_rougeLsum": 0.166,
167
+ "eval_runtime": 16.9191,
168
+ "eval_samples_per_second": 14.658,
169
+ "eval_steps_per_second": 0.946,
170
+ "step": 744
171
+ },
172
+ {
173
+ "epoch": 13.0,
174
+ "eval_gen_len": 19.0,
175
+ "eval_loss": 2.2352294921875,
176
+ "eval_rouge1": 0.1964,
177
+ "eval_rouge2": 0.0971,
178
+ "eval_rougeL": 0.1663,
179
+ "eval_rougeLsum": 0.1664,
180
+ "eval_runtime": 16.9525,
181
+ "eval_samples_per_second": 14.629,
182
+ "eval_steps_per_second": 0.944,
183
+ "step": 806
184
+ },
185
+ {
186
+ "epoch": 14.0,
187
+ "eval_gen_len": 19.0,
188
+ "eval_loss": 2.2252049446105957,
189
+ "eval_rouge1": 0.197,
190
+ "eval_rouge2": 0.097,
191
+ "eval_rougeL": 0.1664,
192
+ "eval_rougeLsum": 0.1664,
193
+ "eval_runtime": 16.904,
194
+ "eval_samples_per_second": 14.671,
195
+ "eval_steps_per_second": 0.947,
196
+ "step": 868
197
+ },
198
+ {
199
+ "epoch": 15.0,
200
+ "eval_gen_len": 19.0,
201
+ "eval_loss": 2.2152445316314697,
202
+ "eval_rouge1": 0.1954,
203
+ "eval_rouge2": 0.0979,
204
+ "eval_rougeL": 0.1664,
205
+ "eval_rougeLsum": 0.1665,
206
+ "eval_runtime": 17.117,
207
+ "eval_samples_per_second": 14.489,
208
+ "eval_steps_per_second": 0.935,
209
+ "step": 930
210
+ },
211
+ {
212
+ "epoch": 16.0,
213
+ "eval_gen_len": 19.0,
214
+ "eval_loss": 2.207918643951416,
215
+ "eval_rouge1": 0.1954,
216
+ "eval_rouge2": 0.0988,
217
+ "eval_rougeL": 0.1674,
218
+ "eval_rougeLsum": 0.1677,
219
+ "eval_runtime": 17.2029,
220
+ "eval_samples_per_second": 14.416,
221
+ "eval_steps_per_second": 0.93,
222
+ "step": 992
223
+ },
224
+ {
225
+ "epoch": 16.13,
226
+ "learning_rate": 1.355483870967742e-05,
227
+ "loss": 2.3282,
228
+ "step": 1000
229
+ },
230
+ {
231
+ "epoch": 17.0,
232
+ "eval_gen_len": 19.0,
233
+ "eval_loss": 2.1986870765686035,
234
+ "eval_rouge1": 0.1951,
235
+ "eval_rouge2": 0.0995,
236
+ "eval_rougeL": 0.1672,
237
+ "eval_rougeLsum": 0.1673,
238
+ "eval_runtime": 17.1147,
239
+ "eval_samples_per_second": 14.49,
240
+ "eval_steps_per_second": 0.935,
241
+ "step": 1054
242
+ },
243
+ {
244
+ "epoch": 18.0,
245
+ "eval_gen_len": 19.0,
246
+ "eval_loss": 2.1938998699188232,
247
+ "eval_rouge1": 0.1974,
248
+ "eval_rouge2": 0.1015,
249
+ "eval_rougeL": 0.1695,
250
+ "eval_rougeLsum": 0.1697,
251
+ "eval_runtime": 16.7909,
252
+ "eval_samples_per_second": 14.77,
253
+ "eval_steps_per_second": 0.953,
254
+ "step": 1116
255
+ },
256
+ {
257
+ "epoch": 19.0,
258
+ "eval_gen_len": 19.0,
259
+ "eval_loss": 2.18984055519104,
260
+ "eval_rouge1": 0.1965,
261
+ "eval_rouge2": 0.1014,
262
+ "eval_rougeL": 0.1691,
263
+ "eval_rougeLsum": 0.1693,
264
+ "eval_runtime": 16.6689,
265
+ "eval_samples_per_second": 14.878,
266
+ "eval_steps_per_second": 0.96,
267
+ "step": 1178
268
+ },
269
+ {
270
+ "epoch": 20.0,
271
+ "eval_gen_len": 19.0,
272
+ "eval_loss": 2.183218240737915,
273
+ "eval_rouge1": 0.1963,
274
+ "eval_rouge2": 0.0997,
275
+ "eval_rougeL": 0.1683,
276
+ "eval_rougeLsum": 0.1685,
277
+ "eval_runtime": 17.3129,
278
+ "eval_samples_per_second": 14.325,
279
+ "eval_steps_per_second": 0.924,
280
+ "step": 1240
281
+ },
282
+ {
283
+ "epoch": 21.0,
284
+ "eval_gen_len": 19.0,
285
+ "eval_loss": 2.1765005588531494,
286
+ "eval_rouge1": 0.1966,
287
+ "eval_rouge2": 0.0991,
288
+ "eval_rougeL": 0.1676,
289
+ "eval_rougeLsum": 0.1678,
290
+ "eval_runtime": 16.8703,
291
+ "eval_samples_per_second": 14.7,
292
+ "eval_steps_per_second": 0.948,
293
+ "step": 1302
294
+ },
295
+ {
296
+ "epoch": 22.0,
297
+ "eval_gen_len": 19.0,
298
+ "eval_loss": 2.1725897789001465,
299
+ "eval_rouge1": 0.1963,
300
+ "eval_rouge2": 0.0989,
301
+ "eval_rougeL": 0.1677,
302
+ "eval_rougeLsum": 0.1676,
303
+ "eval_runtime": 16.7813,
304
+ "eval_samples_per_second": 14.778,
305
+ "eval_steps_per_second": 0.953,
306
+ "step": 1364
307
+ },
308
+ {
309
+ "epoch": 23.0,
310
+ "eval_gen_len": 19.0,
311
+ "eval_loss": 2.1676828861236572,
312
+ "eval_rouge1": 0.1959,
313
+ "eval_rouge2": 0.0988,
314
+ "eval_rougeL": 0.168,
315
+ "eval_rougeLsum": 0.168,
316
+ "eval_runtime": 17.3121,
317
+ "eval_samples_per_second": 14.325,
318
+ "eval_steps_per_second": 0.924,
319
+ "step": 1426
320
+ },
321
+ {
322
+ "epoch": 24.0,
323
+ "eval_gen_len": 19.0,
324
+ "eval_loss": 2.1647536754608154,
325
+ "eval_rouge1": 0.1967,
326
+ "eval_rouge2": 0.0994,
327
+ "eval_rougeL": 0.169,
328
+ "eval_rougeLsum": 0.1692,
329
+ "eval_runtime": 16.9203,
330
+ "eval_samples_per_second": 14.657,
331
+ "eval_steps_per_second": 0.946,
332
+ "step": 1488
333
+ },
334
+ {
335
+ "epoch": 24.19,
336
+ "learning_rate": 1.0329032258064518e-05,
337
+ "loss": 2.2281,
338
+ "step": 1500
339
+ },
340
+ {
341
+ "epoch": 25.0,
342
+ "eval_gen_len": 19.0,
343
+ "eval_loss": 2.15854549407959,
344
+ "eval_rouge1": 0.1958,
345
+ "eval_rouge2": 0.0988,
346
+ "eval_rougeL": 0.1685,
347
+ "eval_rougeLsum": 0.1687,
348
+ "eval_runtime": 17.1171,
349
+ "eval_samples_per_second": 14.488,
350
+ "eval_steps_per_second": 0.935,
351
+ "step": 1550
352
+ },
353
+ {
354
+ "epoch": 26.0,
355
+ "eval_gen_len": 19.0,
356
+ "eval_loss": 2.1557765007019043,
357
+ "eval_rouge1": 0.197,
358
+ "eval_rouge2": 0.1,
359
+ "eval_rougeL": 0.1698,
360
+ "eval_rougeLsum": 0.1699,
361
+ "eval_runtime": 16.8981,
362
+ "eval_samples_per_second": 14.676,
363
+ "eval_steps_per_second": 0.947,
364
+ "step": 1612
365
+ },
366
+ {
367
+ "epoch": 27.0,
368
+ "eval_gen_len": 19.0,
369
+ "eval_loss": 2.1530044078826904,
370
+ "eval_rouge1": 0.196,
371
+ "eval_rouge2": 0.0994,
372
+ "eval_rougeL": 0.1685,
373
+ "eval_rougeLsum": 0.1687,
374
+ "eval_runtime": 16.9499,
375
+ "eval_samples_per_second": 14.631,
376
+ "eval_steps_per_second": 0.944,
377
+ "step": 1674
378
+ },
379
+ {
380
+ "epoch": 28.0,
381
+ "eval_gen_len": 19.0,
382
+ "eval_loss": 2.1497113704681396,
383
+ "eval_rouge1": 0.1971,
384
+ "eval_rouge2": 0.101,
385
+ "eval_rougeL": 0.1697,
386
+ "eval_rougeLsum": 0.1699,
387
+ "eval_runtime": 16.8885,
388
+ "eval_samples_per_second": 14.685,
389
+ "eval_steps_per_second": 0.947,
390
+ "step": 1736
391
+ },
392
+ {
393
+ "epoch": 29.0,
394
+ "eval_gen_len": 19.0,
395
+ "eval_loss": 2.1459004878997803,
396
+ "eval_rouge1": 0.1972,
397
+ "eval_rouge2": 0.1008,
398
+ "eval_rougeL": 0.17,
399
+ "eval_rougeLsum": 0.1701,
400
+ "eval_runtime": 17.5571,
401
+ "eval_samples_per_second": 14.125,
402
+ "eval_steps_per_second": 0.911,
403
+ "step": 1798
404
+ },
405
+ {
406
+ "epoch": 30.0,
407
+ "eval_gen_len": 19.0,
408
+ "eval_loss": 2.142939805984497,
409
+ "eval_rouge1": 0.1946,
410
+ "eval_rouge2": 0.0989,
411
+ "eval_rougeL": 0.1677,
412
+ "eval_rougeLsum": 0.1678,
413
+ "eval_runtime": 17.4108,
414
+ "eval_samples_per_second": 14.244,
415
+ "eval_steps_per_second": 0.919,
416
+ "step": 1860
417
+ },
418
+ {
419
+ "epoch": 31.0,
420
+ "eval_gen_len": 19.0,
421
+ "eval_loss": 2.14223051071167,
422
+ "eval_rouge1": 0.1958,
423
+ "eval_rouge2": 0.1,
424
+ "eval_rougeL": 0.1691,
425
+ "eval_rougeLsum": 0.1692,
426
+ "eval_runtime": 17.3596,
427
+ "eval_samples_per_second": 14.286,
428
+ "eval_steps_per_second": 0.922,
429
+ "step": 1922
430
+ },
431
+ {
432
+ "epoch": 32.0,
433
+ "eval_gen_len": 19.0,
434
+ "eval_loss": 2.139946699142456,
435
+ "eval_rouge1": 0.1952,
436
+ "eval_rouge2": 0.0992,
437
+ "eval_rougeL": 0.1687,
438
+ "eval_rougeLsum": 0.1687,
439
+ "eval_runtime": 17.1971,
440
+ "eval_samples_per_second": 14.421,
441
+ "eval_steps_per_second": 0.93,
442
+ "step": 1984
443
+ },
444
+ {
445
+ "epoch": 32.26,
446
+ "learning_rate": 7.103225806451613e-06,
447
+ "loss": 2.1696,
448
+ "step": 2000
449
+ }
450
+ ],
451
+ "max_steps": 3100,
452
+ "num_train_epochs": 50,
453
+ "total_flos": 8635889668325376.0,
454
+ "trial_name": null,
455
+ "trial_params": null
456
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b3aa9ab791ba4b7e80300d59c10840c6bcc3f12ac2ddb83e7eb1190863235d
3
+ size 4155
checkpoint-2500/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 6,
21
+ "num_heads": 8,
22
+ "num_layers": 6,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.31.0",
58
+ "use_cache": true,
59
+ "vocab_size": 32128
60
+ }
checkpoint-2500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffec5bb48be40248b6724cec7900d8527627d6eeb65cc2a2cfed2bc4520916e1
3
+ size 484130629
checkpoint-2500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3eb928d5c009450ade9bbbb011a31cf34b811d004a62d30336cc5f2826269f
3
+ size 242071641
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f679c48bf0cbcf62b98e862e57307b1d7116e678a7fb0a27a4237a88b8f7e9
3
+ size 14575
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02f8a90c19951072e8606311863973b1f3e78903397575a9fa5a43902a733cb
3
+ size 627
checkpoint-2500/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
checkpoint-2500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>"
111
+ }
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 40.32258064516129,
5
+ "global_step": 2500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_gen_len": 19.0,
13
+ "eval_loss": 2.797335147857666,
14
+ "eval_rouge1": 0.1301,
15
+ "eval_rouge2": 0.0352,
16
+ "eval_rougeL": 0.1074,
17
+ "eval_rougeLsum": 0.1075,
18
+ "eval_runtime": 18.6868,
19
+ "eval_samples_per_second": 13.271,
20
+ "eval_steps_per_second": 0.856,
21
+ "step": 62
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_gen_len": 19.0,
26
+ "eval_loss": 2.568485975265503,
27
+ "eval_rouge1": 0.1455,
28
+ "eval_rouge2": 0.051,
29
+ "eval_rougeL": 0.1189,
30
+ "eval_rougeLsum": 0.1187,
31
+ "eval_runtime": 16.801,
32
+ "eval_samples_per_second": 14.761,
33
+ "eval_steps_per_second": 0.952,
34
+ "step": 124
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_gen_len": 19.0,
39
+ "eval_loss": 2.475400924682617,
40
+ "eval_rouge1": 0.1674,
41
+ "eval_rouge2": 0.0692,
42
+ "eval_rougeL": 0.1397,
43
+ "eval_rougeLsum": 0.1397,
44
+ "eval_runtime": 17.3933,
45
+ "eval_samples_per_second": 14.258,
46
+ "eval_steps_per_second": 0.92,
47
+ "step": 186
48
+ },
49
+ {
50
+ "epoch": 4.0,
51
+ "eval_gen_len": 19.0,
52
+ "eval_loss": 2.419504404067993,
53
+ "eval_rouge1": 0.1901,
54
+ "eval_rouge2": 0.0867,
55
+ "eval_rougeL": 0.1586,
56
+ "eval_rougeLsum": 0.1587,
57
+ "eval_runtime": 16.7871,
58
+ "eval_samples_per_second": 14.773,
59
+ "eval_steps_per_second": 0.953,
60
+ "step": 248
61
+ },
62
+ {
63
+ "epoch": 5.0,
64
+ "eval_gen_len": 19.0,
65
+ "eval_loss": 2.3755078315734863,
66
+ "eval_rouge1": 0.1933,
67
+ "eval_rouge2": 0.0907,
68
+ "eval_rougeL": 0.1617,
69
+ "eval_rougeLsum": 0.1619,
70
+ "eval_runtime": 16.6712,
71
+ "eval_samples_per_second": 14.876,
72
+ "eval_steps_per_second": 0.96,
73
+ "step": 310
74
+ },
75
+ {
76
+ "epoch": 6.0,
77
+ "eval_gen_len": 19.0,
78
+ "eval_loss": 2.3425652980804443,
79
+ "eval_rouge1": 0.1946,
80
+ "eval_rouge2": 0.0916,
81
+ "eval_rougeL": 0.1634,
82
+ "eval_rougeLsum": 0.1636,
83
+ "eval_runtime": 17.1282,
84
+ "eval_samples_per_second": 14.479,
85
+ "eval_steps_per_second": 0.934,
86
+ "step": 372
87
+ },
88
+ {
89
+ "epoch": 7.0,
90
+ "eval_gen_len": 19.0,
91
+ "eval_loss": 2.3197405338287354,
92
+ "eval_rouge1": 0.1964,
93
+ "eval_rouge2": 0.0929,
94
+ "eval_rougeL": 0.1646,
95
+ "eval_rougeLsum": 0.1648,
96
+ "eval_runtime": 16.7039,
97
+ "eval_samples_per_second": 14.847,
98
+ "eval_steps_per_second": 0.958,
99
+ "step": 434
100
+ },
101
+ {
102
+ "epoch": 8.0,
103
+ "eval_gen_len": 19.0,
104
+ "eval_loss": 2.2987782955169678,
105
+ "eval_rouge1": 0.1968,
106
+ "eval_rouge2": 0.0933,
107
+ "eval_rougeL": 0.165,
108
+ "eval_rougeLsum": 0.1653,
109
+ "eval_runtime": 16.6651,
110
+ "eval_samples_per_second": 14.881,
111
+ "eval_steps_per_second": 0.96,
112
+ "step": 496
113
+ },
114
+ {
115
+ "epoch": 8.06,
116
+ "learning_rate": 1.6780645161290323e-05,
117
+ "loss": 2.7011,
118
+ "step": 500
119
+ },
120
+ {
121
+ "epoch": 9.0,
122
+ "eval_gen_len": 19.0,
123
+ "eval_loss": 2.279834270477295,
124
+ "eval_rouge1": 0.1969,
125
+ "eval_rouge2": 0.0946,
126
+ "eval_rougeL": 0.1662,
127
+ "eval_rougeLsum": 0.1665,
128
+ "eval_runtime": 17.134,
129
+ "eval_samples_per_second": 14.474,
130
+ "eval_steps_per_second": 0.934,
131
+ "step": 558
132
+ },
133
+ {
134
+ "epoch": 10.0,
135
+ "eval_gen_len": 19.0,
136
+ "eval_loss": 2.265596389770508,
137
+ "eval_rouge1": 0.1987,
138
+ "eval_rouge2": 0.0962,
139
+ "eval_rougeL": 0.1672,
140
+ "eval_rougeLsum": 0.1673,
141
+ "eval_runtime": 17.1955,
142
+ "eval_samples_per_second": 14.422,
143
+ "eval_steps_per_second": 0.93,
144
+ "step": 620
145
+ },
146
+ {
147
+ "epoch": 11.0,
148
+ "eval_gen_len": 19.0,
149
+ "eval_loss": 2.2547566890716553,
150
+ "eval_rouge1": 0.1958,
151
+ "eval_rouge2": 0.0965,
152
+ "eval_rougeL": 0.1655,
153
+ "eval_rougeLsum": 0.1657,
154
+ "eval_runtime": 16.9264,
155
+ "eval_samples_per_second": 14.652,
156
+ "eval_steps_per_second": 0.945,
157
+ "step": 682
158
+ },
159
+ {
160
+ "epoch": 12.0,
161
+ "eval_gen_len": 19.0,
162
+ "eval_loss": 2.243624210357666,
163
+ "eval_rouge1": 0.1965,
164
+ "eval_rouge2": 0.096,
165
+ "eval_rougeL": 0.1659,
166
+ "eval_rougeLsum": 0.166,
167
+ "eval_runtime": 16.9191,
168
+ "eval_samples_per_second": 14.658,
169
+ "eval_steps_per_second": 0.946,
170
+ "step": 744
171
+ },
172
+ {
173
+ "epoch": 13.0,
174
+ "eval_gen_len": 19.0,
175
+ "eval_loss": 2.2352294921875,
176
+ "eval_rouge1": 0.1964,
177
+ "eval_rouge2": 0.0971,
178
+ "eval_rougeL": 0.1663,
179
+ "eval_rougeLsum": 0.1664,
180
+ "eval_runtime": 16.9525,
181
+ "eval_samples_per_second": 14.629,
182
+ "eval_steps_per_second": 0.944,
183
+ "step": 806
184
+ },
185
+ {
186
+ "epoch": 14.0,
187
+ "eval_gen_len": 19.0,
188
+ "eval_loss": 2.2252049446105957,
189
+ "eval_rouge1": 0.197,
190
+ "eval_rouge2": 0.097,
191
+ "eval_rougeL": 0.1664,
192
+ "eval_rougeLsum": 0.1664,
193
+ "eval_runtime": 16.904,
194
+ "eval_samples_per_second": 14.671,
195
+ "eval_steps_per_second": 0.947,
196
+ "step": 868
197
+ },
198
+ {
199
+ "epoch": 15.0,
200
+ "eval_gen_len": 19.0,
201
+ "eval_loss": 2.2152445316314697,
202
+ "eval_rouge1": 0.1954,
203
+ "eval_rouge2": 0.0979,
204
+ "eval_rougeL": 0.1664,
205
+ "eval_rougeLsum": 0.1665,
206
+ "eval_runtime": 17.117,
207
+ "eval_samples_per_second": 14.489,
208
+ "eval_steps_per_second": 0.935,
209
+ "step": 930
210
+ },
211
+ {
212
+ "epoch": 16.0,
213
+ "eval_gen_len": 19.0,
214
+ "eval_loss": 2.207918643951416,
215
+ "eval_rouge1": 0.1954,
216
+ "eval_rouge2": 0.0988,
217
+ "eval_rougeL": 0.1674,
218
+ "eval_rougeLsum": 0.1677,
219
+ "eval_runtime": 17.2029,
220
+ "eval_samples_per_second": 14.416,
221
+ "eval_steps_per_second": 0.93,
222
+ "step": 992
223
+ },
224
+ {
225
+ "epoch": 16.13,
226
+ "learning_rate": 1.355483870967742e-05,
227
+ "loss": 2.3282,
228
+ "step": 1000
229
+ },
230
+ {
231
+ "epoch": 17.0,
232
+ "eval_gen_len": 19.0,
233
+ "eval_loss": 2.1986870765686035,
234
+ "eval_rouge1": 0.1951,
235
+ "eval_rouge2": 0.0995,
236
+ "eval_rougeL": 0.1672,
237
+ "eval_rougeLsum": 0.1673,
238
+ "eval_runtime": 17.1147,
239
+ "eval_samples_per_second": 14.49,
240
+ "eval_steps_per_second": 0.935,
241
+ "step": 1054
242
+ },
243
+ {
244
+ "epoch": 18.0,
245
+ "eval_gen_len": 19.0,
246
+ "eval_loss": 2.1938998699188232,
247
+ "eval_rouge1": 0.1974,
248
+ "eval_rouge2": 0.1015,
249
+ "eval_rougeL": 0.1695,
250
+ "eval_rougeLsum": 0.1697,
251
+ "eval_runtime": 16.7909,
252
+ "eval_samples_per_second": 14.77,
253
+ "eval_steps_per_second": 0.953,
254
+ "step": 1116
255
+ },
256
+ {
257
+ "epoch": 19.0,
258
+ "eval_gen_len": 19.0,
259
+ "eval_loss": 2.18984055519104,
260
+ "eval_rouge1": 0.1965,
261
+ "eval_rouge2": 0.1014,
262
+ "eval_rougeL": 0.1691,
263
+ "eval_rougeLsum": 0.1693,
264
+ "eval_runtime": 16.6689,
265
+ "eval_samples_per_second": 14.878,
266
+ "eval_steps_per_second": 0.96,
267
+ "step": 1178
268
+ },
269
+ {
270
+ "epoch": 20.0,
271
+ "eval_gen_len": 19.0,
272
+ "eval_loss": 2.183218240737915,
273
+ "eval_rouge1": 0.1963,
274
+ "eval_rouge2": 0.0997,
275
+ "eval_rougeL": 0.1683,
276
+ "eval_rougeLsum": 0.1685,
277
+ "eval_runtime": 17.3129,
278
+ "eval_samples_per_second": 14.325,
279
+ "eval_steps_per_second": 0.924,
280
+ "step": 1240
281
+ },
282
+ {
283
+ "epoch": 21.0,
284
+ "eval_gen_len": 19.0,
285
+ "eval_loss": 2.1765005588531494,
286
+ "eval_rouge1": 0.1966,
287
+ "eval_rouge2": 0.0991,
288
+ "eval_rougeL": 0.1676,
289
+ "eval_rougeLsum": 0.1678,
290
+ "eval_runtime": 16.8703,
291
+ "eval_samples_per_second": 14.7,
292
+ "eval_steps_per_second": 0.948,
293
+ "step": 1302
294
+ },
295
+ {
296
+ "epoch": 22.0,
297
+ "eval_gen_len": 19.0,
298
+ "eval_loss": 2.1725897789001465,
299
+ "eval_rouge1": 0.1963,
300
+ "eval_rouge2": 0.0989,
301
+ "eval_rougeL": 0.1677,
302
+ "eval_rougeLsum": 0.1676,
303
+ "eval_runtime": 16.7813,
304
+ "eval_samples_per_second": 14.778,
305
+ "eval_steps_per_second": 0.953,
306
+ "step": 1364
307
+ },
308
+ {
309
+ "epoch": 23.0,
310
+ "eval_gen_len": 19.0,
311
+ "eval_loss": 2.1676828861236572,
312
+ "eval_rouge1": 0.1959,
313
+ "eval_rouge2": 0.0988,
314
+ "eval_rougeL": 0.168,
315
+ "eval_rougeLsum": 0.168,
316
+ "eval_runtime": 17.3121,
317
+ "eval_samples_per_second": 14.325,
318
+ "eval_steps_per_second": 0.924,
319
+ "step": 1426
320
+ },
321
+ {
322
+ "epoch": 24.0,
323
+ "eval_gen_len": 19.0,
324
+ "eval_loss": 2.1647536754608154,
325
+ "eval_rouge1": 0.1967,
326
+ "eval_rouge2": 0.0994,
327
+ "eval_rougeL": 0.169,
328
+ "eval_rougeLsum": 0.1692,
329
+ "eval_runtime": 16.9203,
330
+ "eval_samples_per_second": 14.657,
331
+ "eval_steps_per_second": 0.946,
332
+ "step": 1488
333
+ },
334
+ {
335
+ "epoch": 24.19,
336
+ "learning_rate": 1.0329032258064518e-05,
337
+ "loss": 2.2281,
338
+ "step": 1500
339
+ },
340
+ {
341
+ "epoch": 25.0,
342
+ "eval_gen_len": 19.0,
343
+ "eval_loss": 2.15854549407959,
344
+ "eval_rouge1": 0.1958,
345
+ "eval_rouge2": 0.0988,
346
+ "eval_rougeL": 0.1685,
347
+ "eval_rougeLsum": 0.1687,
348
+ "eval_runtime": 17.1171,
349
+ "eval_samples_per_second": 14.488,
350
+ "eval_steps_per_second": 0.935,
351
+ "step": 1550
352
+ },
353
+ {
354
+ "epoch": 26.0,
355
+ "eval_gen_len": 19.0,
356
+ "eval_loss": 2.1557765007019043,
357
+ "eval_rouge1": 0.197,
358
+ "eval_rouge2": 0.1,
359
+ "eval_rougeL": 0.1698,
360
+ "eval_rougeLsum": 0.1699,
361
+ "eval_runtime": 16.8981,
362
+ "eval_samples_per_second": 14.676,
363
+ "eval_steps_per_second": 0.947,
364
+ "step": 1612
365
+ },
366
+ {
367
+ "epoch": 27.0,
368
+ "eval_gen_len": 19.0,
369
+ "eval_loss": 2.1530044078826904,
370
+ "eval_rouge1": 0.196,
371
+ "eval_rouge2": 0.0994,
372
+ "eval_rougeL": 0.1685,
373
+ "eval_rougeLsum": 0.1687,
374
+ "eval_runtime": 16.9499,
375
+ "eval_samples_per_second": 14.631,
376
+ "eval_steps_per_second": 0.944,
377
+ "step": 1674
378
+ },
379
+ {
380
+ "epoch": 28.0,
381
+ "eval_gen_len": 19.0,
382
+ "eval_loss": 2.1497113704681396,
383
+ "eval_rouge1": 0.1971,
384
+ "eval_rouge2": 0.101,
385
+ "eval_rougeL": 0.1697,
386
+ "eval_rougeLsum": 0.1699,
387
+ "eval_runtime": 16.8885,
388
+ "eval_samples_per_second": 14.685,
389
+ "eval_steps_per_second": 0.947,
390
+ "step": 1736
391
+ },
392
+ {
393
+ "epoch": 29.0,
394
+ "eval_gen_len": 19.0,
395
+ "eval_loss": 2.1459004878997803,
396
+ "eval_rouge1": 0.1972,
397
+ "eval_rouge2": 0.1008,
398
+ "eval_rougeL": 0.17,
399
+ "eval_rougeLsum": 0.1701,
400
+ "eval_runtime": 17.5571,
401
+ "eval_samples_per_second": 14.125,
402
+ "eval_steps_per_second": 0.911,
403
+ "step": 1798
404
+ },
405
+ {
406
+ "epoch": 30.0,
407
+ "eval_gen_len": 19.0,
408
+ "eval_loss": 2.142939805984497,
409
+ "eval_rouge1": 0.1946,
410
+ "eval_rouge2": 0.0989,
411
+ "eval_rougeL": 0.1677,
412
+ "eval_rougeLsum": 0.1678,
413
+ "eval_runtime": 17.4108,
414
+ "eval_samples_per_second": 14.244,
415
+ "eval_steps_per_second": 0.919,
416
+ "step": 1860
417
+ },
418
+ {
419
+ "epoch": 31.0,
420
+ "eval_gen_len": 19.0,
421
+ "eval_loss": 2.14223051071167,
422
+ "eval_rouge1": 0.1958,
423
+ "eval_rouge2": 0.1,
424
+ "eval_rougeL": 0.1691,
425
+ "eval_rougeLsum": 0.1692,
426
+ "eval_runtime": 17.3596,
427
+ "eval_samples_per_second": 14.286,
428
+ "eval_steps_per_second": 0.922,
429
+ "step": 1922
430
+ },
431
+ {
432
+ "epoch": 32.0,
433
+ "eval_gen_len": 19.0,
434
+ "eval_loss": 2.139946699142456,
435
+ "eval_rouge1": 0.1952,
436
+ "eval_rouge2": 0.0992,
437
+ "eval_rougeL": 0.1687,
438
+ "eval_rougeLsum": 0.1687,
439
+ "eval_runtime": 17.1971,
440
+ "eval_samples_per_second": 14.421,
441
+ "eval_steps_per_second": 0.93,
442
+ "step": 1984
443
+ },
444
+ {
445
+ "epoch": 32.26,
446
+ "learning_rate": 7.103225806451613e-06,
447
+ "loss": 2.1696,
448
+ "step": 2000
449
+ },
450
+ {
451
+ "epoch": 33.0,
452
+ "eval_gen_len": 19.0,
453
+ "eval_loss": 2.135340690612793,
454
+ "eval_rouge1": 0.1945,
455
+ "eval_rouge2": 0.0983,
456
+ "eval_rougeL": 0.1676,
457
+ "eval_rougeLsum": 0.1676,
458
+ "eval_runtime": 16.8259,
459
+ "eval_samples_per_second": 14.739,
460
+ "eval_steps_per_second": 0.951,
461
+ "step": 2046
462
+ },
463
+ {
464
+ "epoch": 34.0,
465
+ "eval_gen_len": 19.0,
466
+ "eval_loss": 2.1345207691192627,
467
+ "eval_rouge1": 0.1934,
468
+ "eval_rouge2": 0.097,
469
+ "eval_rougeL": 0.1664,
470
+ "eval_rougeLsum": 0.1665,
471
+ "eval_runtime": 17.2959,
472
+ "eval_samples_per_second": 14.339,
473
+ "eval_steps_per_second": 0.925,
474
+ "step": 2108
475
+ },
476
+ {
477
+ "epoch": 35.0,
478
+ "eval_gen_len": 19.0,
479
+ "eval_loss": 2.132594585418701,
480
+ "eval_rouge1": 0.1934,
481
+ "eval_rouge2": 0.0969,
482
+ "eval_rougeL": 0.1666,
483
+ "eval_rougeLsum": 0.1669,
484
+ "eval_runtime": 16.8869,
485
+ "eval_samples_per_second": 14.686,
486
+ "eval_steps_per_second": 0.947,
487
+ "step": 2170
488
+ },
489
+ {
490
+ "epoch": 36.0,
491
+ "eval_gen_len": 19.0,
492
+ "eval_loss": 2.1315131187438965,
493
+ "eval_rouge1": 0.1942,
494
+ "eval_rouge2": 0.0982,
495
+ "eval_rougeL": 0.1674,
496
+ "eval_rougeLsum": 0.1676,
497
+ "eval_runtime": 16.9224,
498
+ "eval_samples_per_second": 14.655,
499
+ "eval_steps_per_second": 0.945,
500
+ "step": 2232
501
+ },
502
+ {
503
+ "epoch": 37.0,
504
+ "eval_gen_len": 19.0,
505
+ "eval_loss": 2.1289427280426025,
506
+ "eval_rouge1": 0.1941,
507
+ "eval_rouge2": 0.0989,
508
+ "eval_rougeL": 0.1679,
509
+ "eval_rougeLsum": 0.1681,
510
+ "eval_runtime": 17.1711,
511
+ "eval_samples_per_second": 14.443,
512
+ "eval_steps_per_second": 0.932,
513
+ "step": 2294
514
+ },
515
+ {
516
+ "epoch": 38.0,
517
+ "eval_gen_len": 19.0,
518
+ "eval_loss": 2.1285345554351807,
519
+ "eval_rouge1": 0.1924,
520
+ "eval_rouge2": 0.0971,
521
+ "eval_rougeL": 0.1664,
522
+ "eval_rougeLsum": 0.1665,
523
+ "eval_runtime": 16.844,
524
+ "eval_samples_per_second": 14.723,
525
+ "eval_steps_per_second": 0.95,
526
+ "step": 2356
527
+ },
528
+ {
529
+ "epoch": 39.0,
530
+ "eval_gen_len": 19.0,
531
+ "eval_loss": 2.1261112689971924,
532
+ "eval_rouge1": 0.1932,
533
+ "eval_rouge2": 0.0983,
534
+ "eval_rougeL": 0.1671,
535
+ "eval_rougeLsum": 0.1672,
536
+ "eval_runtime": 16.7999,
537
+ "eval_samples_per_second": 14.762,
538
+ "eval_steps_per_second": 0.952,
539
+ "step": 2418
540
+ },
541
+ {
542
+ "epoch": 40.0,
543
+ "eval_gen_len": 19.0,
544
+ "eval_loss": 2.1248812675476074,
545
+ "eval_rouge1": 0.1931,
546
+ "eval_rouge2": 0.0985,
547
+ "eval_rougeL": 0.1672,
548
+ "eval_rougeLsum": 0.1674,
549
+ "eval_runtime": 17.7634,
550
+ "eval_samples_per_second": 13.961,
551
+ "eval_steps_per_second": 0.901,
552
+ "step": 2480
553
+ },
554
+ {
555
+ "epoch": 40.32,
556
+ "learning_rate": 3.87741935483871e-06,
557
+ "loss": 2.1317,
558
+ "step": 2500
559
+ }
560
+ ],
561
+ "max_steps": 3100,
562
+ "num_train_epochs": 50,
563
+ "total_flos": 1.079486208540672e+16,
564
+ "trial_name": null,
565
+ "trial_params": null
566
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b3aa9ab791ba4b7e80300d59c10840c6bcc3f12ac2ddb83e7eb1190863235d
3
+ size 4155
checkpoint-3000/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 6,
21
+ "num_heads": 8,
22
+ "num_layers": 6,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.31.0",
58
+ "use_cache": true,
59
+ "vocab_size": 32128
60
+ }
checkpoint-3000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.31.0"
6
+ }
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97e3f49017ef0e08c7ca518edbdd14f6975469bf5815fbc82d99f7f262b33ea
3
+ size 484130629
checkpoint-3000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe5d1432ef330d15da08bce2a11ccfe2156b8dbaccd22ddfbfe004772de0c8d
3
+ size 242071641
checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7227c7869d5fccd0a267b43fd38e6be1808091f26b649c4dbb6a3d625426bd7
3
+ size 14575
checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7d1f7fec25d18f2d7af695342410fc67ec0320643b979daa1a43939606c35f
3
+ size 627
checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
checkpoint-3000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>"
111
+ }
checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,676 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 48.38709677419355,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_gen_len": 19.0,
13
+ "eval_loss": 2.797335147857666,
14
+ "eval_rouge1": 0.1301,
15
+ "eval_rouge2": 0.0352,
16
+ "eval_rougeL": 0.1074,
17
+ "eval_rougeLsum": 0.1075,
18
+ "eval_runtime": 18.6868,
19
+ "eval_samples_per_second": 13.271,
20
+ "eval_steps_per_second": 0.856,
21
+ "step": 62
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_gen_len": 19.0,
26
+ "eval_loss": 2.568485975265503,
27
+ "eval_rouge1": 0.1455,
28
+ "eval_rouge2": 0.051,
29
+ "eval_rougeL": 0.1189,
30
+ "eval_rougeLsum": 0.1187,
31
+ "eval_runtime": 16.801,
32
+ "eval_samples_per_second": 14.761,
33
+ "eval_steps_per_second": 0.952,
34
+ "step": 124
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_gen_len": 19.0,
39
+ "eval_loss": 2.475400924682617,
40
+ "eval_rouge1": 0.1674,
41
+ "eval_rouge2": 0.0692,
42
+ "eval_rougeL": 0.1397,
43
+ "eval_rougeLsum": 0.1397,
44
+ "eval_runtime": 17.3933,
45
+ "eval_samples_per_second": 14.258,
46
+ "eval_steps_per_second": 0.92,
47
+ "step": 186
48
+ },
49
+ {
50
+ "epoch": 4.0,
51
+ "eval_gen_len": 19.0,
52
+ "eval_loss": 2.419504404067993,
53
+ "eval_rouge1": 0.1901,
54
+ "eval_rouge2": 0.0867,
55
+ "eval_rougeL": 0.1586,
56
+ "eval_rougeLsum": 0.1587,
57
+ "eval_runtime": 16.7871,
58
+ "eval_samples_per_second": 14.773,
59
+ "eval_steps_per_second": 0.953,
60
+ "step": 248
61
+ },
62
+ {
63
+ "epoch": 5.0,
64
+ "eval_gen_len": 19.0,
65
+ "eval_loss": 2.3755078315734863,
66
+ "eval_rouge1": 0.1933,
67
+ "eval_rouge2": 0.0907,
68
+ "eval_rougeL": 0.1617,
69
+ "eval_rougeLsum": 0.1619,
70
+ "eval_runtime": 16.6712,
71
+ "eval_samples_per_second": 14.876,
72
+ "eval_steps_per_second": 0.96,
73
+ "step": 310
74
+ },
75
+ {
76
+ "epoch": 6.0,
77
+ "eval_gen_len": 19.0,
78
+ "eval_loss": 2.3425652980804443,
79
+ "eval_rouge1": 0.1946,
80
+ "eval_rouge2": 0.0916,
81
+ "eval_rougeL": 0.1634,
82
+ "eval_rougeLsum": 0.1636,
83
+ "eval_runtime": 17.1282,
84
+ "eval_samples_per_second": 14.479,
85
+ "eval_steps_per_second": 0.934,
86
+ "step": 372
87
+ },
88
+ {
89
+ "epoch": 7.0,
90
+ "eval_gen_len": 19.0,
91
+ "eval_loss": 2.3197405338287354,
92
+ "eval_rouge1": 0.1964,
93
+ "eval_rouge2": 0.0929,
94
+ "eval_rougeL": 0.1646,
95
+ "eval_rougeLsum": 0.1648,
96
+ "eval_runtime": 16.7039,
97
+ "eval_samples_per_second": 14.847,
98
+ "eval_steps_per_second": 0.958,
99
+ "step": 434
100
+ },
101
+ {
102
+ "epoch": 8.0,
103
+ "eval_gen_len": 19.0,
104
+ "eval_loss": 2.2987782955169678,
105
+ "eval_rouge1": 0.1968,
106
+ "eval_rouge2": 0.0933,
107
+ "eval_rougeL": 0.165,
108
+ "eval_rougeLsum": 0.1653,
109
+ "eval_runtime": 16.6651,
110
+ "eval_samples_per_second": 14.881,
111
+ "eval_steps_per_second": 0.96,
112
+ "step": 496
113
+ },
114
+ {
115
+ "epoch": 8.06,
116
+ "learning_rate": 1.6780645161290323e-05,
117
+ "loss": 2.7011,
118
+ "step": 500
119
+ },
120
+ {
121
+ "epoch": 9.0,
122
+ "eval_gen_len": 19.0,
123
+ "eval_loss": 2.279834270477295,
124
+ "eval_rouge1": 0.1969,
125
+ "eval_rouge2": 0.0946,
126
+ "eval_rougeL": 0.1662,
127
+ "eval_rougeLsum": 0.1665,
128
+ "eval_runtime": 17.134,
129
+ "eval_samples_per_second": 14.474,
130
+ "eval_steps_per_second": 0.934,
131
+ "step": 558
132
+ },
133
+ {
134
+ "epoch": 10.0,
135
+ "eval_gen_len": 19.0,
136
+ "eval_loss": 2.265596389770508,
137
+ "eval_rouge1": 0.1987,
138
+ "eval_rouge2": 0.0962,
139
+ "eval_rougeL": 0.1672,
140
+ "eval_rougeLsum": 0.1673,
141
+ "eval_runtime": 17.1955,
142
+ "eval_samples_per_second": 14.422,
143
+ "eval_steps_per_second": 0.93,
144
+ "step": 620
145
+ },
146
+ {
147
+ "epoch": 11.0,
148
+ "eval_gen_len": 19.0,
149
+ "eval_loss": 2.2547566890716553,
150
+ "eval_rouge1": 0.1958,
151
+ "eval_rouge2": 0.0965,
152
+ "eval_rougeL": 0.1655,
153
+ "eval_rougeLsum": 0.1657,
154
+ "eval_runtime": 16.9264,
155
+ "eval_samples_per_second": 14.652,
156
+ "eval_steps_per_second": 0.945,
157
+ "step": 682
158
+ },
159
+ {
160
+ "epoch": 12.0,
161
+ "eval_gen_len": 19.0,
162
+ "eval_loss": 2.243624210357666,
163
+ "eval_rouge1": 0.1965,
164
+ "eval_rouge2": 0.096,
165
+ "eval_rougeL": 0.1659,
166
+ "eval_rougeLsum": 0.166,
167
+ "eval_runtime": 16.9191,
168
+ "eval_samples_per_second": 14.658,
169
+ "eval_steps_per_second": 0.946,
170
+ "step": 744
171
+ },
172
+ {
173
+ "epoch": 13.0,
174
+ "eval_gen_len": 19.0,
175
+ "eval_loss": 2.2352294921875,
176
+ "eval_rouge1": 0.1964,
177
+ "eval_rouge2": 0.0971,
178
+ "eval_rougeL": 0.1663,
179
+ "eval_rougeLsum": 0.1664,
180
+ "eval_runtime": 16.9525,
181
+ "eval_samples_per_second": 14.629,
182
+ "eval_steps_per_second": 0.944,
183
+ "step": 806
184
+ },
185
+ {
186
+ "epoch": 14.0,
187
+ "eval_gen_len": 19.0,
188
+ "eval_loss": 2.2252049446105957,
189
+ "eval_rouge1": 0.197,
190
+ "eval_rouge2": 0.097,
191
+ "eval_rougeL": 0.1664,
192
+ "eval_rougeLsum": 0.1664,
193
+ "eval_runtime": 16.904,
194
+ "eval_samples_per_second": 14.671,
195
+ "eval_steps_per_second": 0.947,
196
+ "step": 868
197
+ },
198
+ {
199
+ "epoch": 15.0,
200
+ "eval_gen_len": 19.0,
201
+ "eval_loss": 2.2152445316314697,
202
+ "eval_rouge1": 0.1954,
203
+ "eval_rouge2": 0.0979,
204
+ "eval_rougeL": 0.1664,
205
+ "eval_rougeLsum": 0.1665,
206
+ "eval_runtime": 17.117,
207
+ "eval_samples_per_second": 14.489,
208
+ "eval_steps_per_second": 0.935,
209
+ "step": 930
210
+ },
211
+ {
212
+ "epoch": 16.0,
213
+ "eval_gen_len": 19.0,
214
+ "eval_loss": 2.207918643951416,
215
+ "eval_rouge1": 0.1954,
216
+ "eval_rouge2": 0.0988,
217
+ "eval_rougeL": 0.1674,
218
+ "eval_rougeLsum": 0.1677,
219
+ "eval_runtime": 17.2029,
220
+ "eval_samples_per_second": 14.416,
221
+ "eval_steps_per_second": 0.93,
222
+ "step": 992
223
+ },
224
+ {
225
+ "epoch": 16.13,
226
+ "learning_rate": 1.355483870967742e-05,
227
+ "loss": 2.3282,
228
+ "step": 1000
229
+ },
230
+ {
231
+ "epoch": 17.0,
232
+ "eval_gen_len": 19.0,
233
+ "eval_loss": 2.1986870765686035,
234
+ "eval_rouge1": 0.1951,
235
+ "eval_rouge2": 0.0995,
236
+ "eval_rougeL": 0.1672,
237
+ "eval_rougeLsum": 0.1673,
238
+ "eval_runtime": 17.1147,
239
+ "eval_samples_per_second": 14.49,
240
+ "eval_steps_per_second": 0.935,
241
+ "step": 1054
242
+ },
243
+ {
244
+ "epoch": 18.0,
245
+ "eval_gen_len": 19.0,
246
+ "eval_loss": 2.1938998699188232,
247
+ "eval_rouge1": 0.1974,
248
+ "eval_rouge2": 0.1015,
249
+ "eval_rougeL": 0.1695,
250
+ "eval_rougeLsum": 0.1697,
251
+ "eval_runtime": 16.7909,
252
+ "eval_samples_per_second": 14.77,
253
+ "eval_steps_per_second": 0.953,
254
+ "step": 1116
255
+ },
256
+ {
257
+ "epoch": 19.0,
258
+ "eval_gen_len": 19.0,
259
+ "eval_loss": 2.18984055519104,
260
+ "eval_rouge1": 0.1965,
261
+ "eval_rouge2": 0.1014,
262
+ "eval_rougeL": 0.1691,
263
+ "eval_rougeLsum": 0.1693,
264
+ "eval_runtime": 16.6689,
265
+ "eval_samples_per_second": 14.878,
266
+ "eval_steps_per_second": 0.96,
267
+ "step": 1178
268
+ },
269
+ {
270
+ "epoch": 20.0,
271
+ "eval_gen_len": 19.0,
272
+ "eval_loss": 2.183218240737915,
273
+ "eval_rouge1": 0.1963,
274
+ "eval_rouge2": 0.0997,
275
+ "eval_rougeL": 0.1683,
276
+ "eval_rougeLsum": 0.1685,
277
+ "eval_runtime": 17.3129,
278
+ "eval_samples_per_second": 14.325,
279
+ "eval_steps_per_second": 0.924,
280
+ "step": 1240
281
+ },
282
+ {
283
+ "epoch": 21.0,
284
+ "eval_gen_len": 19.0,
285
+ "eval_loss": 2.1765005588531494,
286
+ "eval_rouge1": 0.1966,
287
+ "eval_rouge2": 0.0991,
288
+ "eval_rougeL": 0.1676,
289
+ "eval_rougeLsum": 0.1678,
290
+ "eval_runtime": 16.8703,
291
+ "eval_samples_per_second": 14.7,
292
+ "eval_steps_per_second": 0.948,
293
+ "step": 1302
294
+ },
295
+ {
296
+ "epoch": 22.0,
297
+ "eval_gen_len": 19.0,
298
+ "eval_loss": 2.1725897789001465,
299
+ "eval_rouge1": 0.1963,
300
+ "eval_rouge2": 0.0989,
301
+ "eval_rougeL": 0.1677,
302
+ "eval_rougeLsum": 0.1676,
303
+ "eval_runtime": 16.7813,
304
+ "eval_samples_per_second": 14.778,
305
+ "eval_steps_per_second": 0.953,
306
+ "step": 1364
307
+ },
308
+ {
309
+ "epoch": 23.0,
310
+ "eval_gen_len": 19.0,
311
+ "eval_loss": 2.1676828861236572,
312
+ "eval_rouge1": 0.1959,
313
+ "eval_rouge2": 0.0988,
314
+ "eval_rougeL": 0.168,
315
+ "eval_rougeLsum": 0.168,
316
+ "eval_runtime": 17.3121,
317
+ "eval_samples_per_second": 14.325,
318
+ "eval_steps_per_second": 0.924,
319
+ "step": 1426
320
+ },
321
+ {
322
+ "epoch": 24.0,
323
+ "eval_gen_len": 19.0,
324
+ "eval_loss": 2.1647536754608154,
325
+ "eval_rouge1": 0.1967,
326
+ "eval_rouge2": 0.0994,
327
+ "eval_rougeL": 0.169,
328
+ "eval_rougeLsum": 0.1692,
329
+ "eval_runtime": 16.9203,
330
+ "eval_samples_per_second": 14.657,
331
+ "eval_steps_per_second": 0.946,
332
+ "step": 1488
333
+ },
334
+ {
335
+ "epoch": 24.19,
336
+ "learning_rate": 1.0329032258064518e-05,
337
+ "loss": 2.2281,
338
+ "step": 1500
339
+ },
340
+ {
341
+ "epoch": 25.0,
342
+ "eval_gen_len": 19.0,
343
+ "eval_loss": 2.15854549407959,
344
+ "eval_rouge1": 0.1958,
345
+ "eval_rouge2": 0.0988,
346
+ "eval_rougeL": 0.1685,
347
+ "eval_rougeLsum": 0.1687,
348
+ "eval_runtime": 17.1171,
349
+ "eval_samples_per_second": 14.488,
350
+ "eval_steps_per_second": 0.935,
351
+ "step": 1550
352
+ },
353
+ {
354
+ "epoch": 26.0,
355
+ "eval_gen_len": 19.0,
356
+ "eval_loss": 2.1557765007019043,
357
+ "eval_rouge1": 0.197,
358
+ "eval_rouge2": 0.1,
359
+ "eval_rougeL": 0.1698,
360
+ "eval_rougeLsum": 0.1699,
361
+ "eval_runtime": 16.8981,
362
+ "eval_samples_per_second": 14.676,
363
+ "eval_steps_per_second": 0.947,
364
+ "step": 1612
365
+ },
366
+ {
367
+ "epoch": 27.0,
368
+ "eval_gen_len": 19.0,
369
+ "eval_loss": 2.1530044078826904,
370
+ "eval_rouge1": 0.196,
371
+ "eval_rouge2": 0.0994,
372
+ "eval_rougeL": 0.1685,
373
+ "eval_rougeLsum": 0.1687,
374
+ "eval_runtime": 16.9499,
375
+ "eval_samples_per_second": 14.631,
376
+ "eval_steps_per_second": 0.944,
377
+ "step": 1674
378
+ },
379
+ {
380
+ "epoch": 28.0,
381
+ "eval_gen_len": 19.0,
382
+ "eval_loss": 2.1497113704681396,
383
+ "eval_rouge1": 0.1971,
384
+ "eval_rouge2": 0.101,
385
+ "eval_rougeL": 0.1697,
386
+ "eval_rougeLsum": 0.1699,
387
+ "eval_runtime": 16.8885,
388
+ "eval_samples_per_second": 14.685,
389
+ "eval_steps_per_second": 0.947,
390
+ "step": 1736
391
+ },
392
+ {
393
+ "epoch": 29.0,
394
+ "eval_gen_len": 19.0,
395
+ "eval_loss": 2.1459004878997803,
396
+ "eval_rouge1": 0.1972,
397
+ "eval_rouge2": 0.1008,
398
+ "eval_rougeL": 0.17,
399
+ "eval_rougeLsum": 0.1701,
400
+ "eval_runtime": 17.5571,
401
+ "eval_samples_per_second": 14.125,
402
+ "eval_steps_per_second": 0.911,
403
+ "step": 1798
404
+ },
405
+ {
406
+ "epoch": 30.0,
407
+ "eval_gen_len": 19.0,
408
+ "eval_loss": 2.142939805984497,
409
+ "eval_rouge1": 0.1946,
410
+ "eval_rouge2": 0.0989,
411
+ "eval_rougeL": 0.1677,
412
+ "eval_rougeLsum": 0.1678,
413
+ "eval_runtime": 17.4108,
414
+ "eval_samples_per_second": 14.244,
415
+ "eval_steps_per_second": 0.919,
416
+ "step": 1860
417
+ },
418
+ {
419
+ "epoch": 31.0,
420
+ "eval_gen_len": 19.0,
421
+ "eval_loss": 2.14223051071167,
422
+ "eval_rouge1": 0.1958,
423
+ "eval_rouge2": 0.1,
424
+ "eval_rougeL": 0.1691,
425
+ "eval_rougeLsum": 0.1692,
426
+ "eval_runtime": 17.3596,
427
+ "eval_samples_per_second": 14.286,
428
+ "eval_steps_per_second": 0.922,
429
+ "step": 1922
430
+ },
431
+ {
432
+ "epoch": 32.0,
433
+ "eval_gen_len": 19.0,
434
+ "eval_loss": 2.139946699142456,
435
+ "eval_rouge1": 0.1952,
436
+ "eval_rouge2": 0.0992,
437
+ "eval_rougeL": 0.1687,
438
+ "eval_rougeLsum": 0.1687,
439
+ "eval_runtime": 17.1971,
440
+ "eval_samples_per_second": 14.421,
441
+ "eval_steps_per_second": 0.93,
442
+ "step": 1984
443
+ },
444
+ {
445
+ "epoch": 32.26,
446
+ "learning_rate": 7.103225806451613e-06,
447
+ "loss": 2.1696,
448
+ "step": 2000
449
+ },
450
+ {
451
+ "epoch": 33.0,
452
+ "eval_gen_len": 19.0,
453
+ "eval_loss": 2.135340690612793,
454
+ "eval_rouge1": 0.1945,
455
+ "eval_rouge2": 0.0983,
456
+ "eval_rougeL": 0.1676,
457
+ "eval_rougeLsum": 0.1676,
458
+ "eval_runtime": 16.8259,
459
+ "eval_samples_per_second": 14.739,
460
+ "eval_steps_per_second": 0.951,
461
+ "step": 2046
462
+ },
463
+ {
464
+ "epoch": 34.0,
465
+ "eval_gen_len": 19.0,
466
+ "eval_loss": 2.1345207691192627,
467
+ "eval_rouge1": 0.1934,
468
+ "eval_rouge2": 0.097,
469
+ "eval_rougeL": 0.1664,
470
+ "eval_rougeLsum": 0.1665,
471
+ "eval_runtime": 17.2959,
472
+ "eval_samples_per_second": 14.339,
473
+ "eval_steps_per_second": 0.925,
474
+ "step": 2108
475
+ },
476
+ {
477
+ "epoch": 35.0,
478
+ "eval_gen_len": 19.0,
479
+ "eval_loss": 2.132594585418701,
480
+ "eval_rouge1": 0.1934,
481
+ "eval_rouge2": 0.0969,
482
+ "eval_rougeL": 0.1666,
483
+ "eval_rougeLsum": 0.1669,
484
+ "eval_runtime": 16.8869,
485
+ "eval_samples_per_second": 14.686,
486
+ "eval_steps_per_second": 0.947,
487
+ "step": 2170
488
+ },
489
+ {
490
+ "epoch": 36.0,
491
+ "eval_gen_len": 19.0,
492
+ "eval_loss": 2.1315131187438965,
493
+ "eval_rouge1": 0.1942,
494
+ "eval_rouge2": 0.0982,
495
+ "eval_rougeL": 0.1674,
496
+ "eval_rougeLsum": 0.1676,
497
+ "eval_runtime": 16.9224,
498
+ "eval_samples_per_second": 14.655,
499
+ "eval_steps_per_second": 0.945,
500
+ "step": 2232
501
+ },
502
+ {
503
+ "epoch": 37.0,
504
+ "eval_gen_len": 19.0,
505
+ "eval_loss": 2.1289427280426025,
506
+ "eval_rouge1": 0.1941,
507
+ "eval_rouge2": 0.0989,
508
+ "eval_rougeL": 0.1679,
509
+ "eval_rougeLsum": 0.1681,
510
+ "eval_runtime": 17.1711,
511
+ "eval_samples_per_second": 14.443,
512
+ "eval_steps_per_second": 0.932,
513
+ "step": 2294
514
+ },
515
+ {
516
+ "epoch": 38.0,
517
+ "eval_gen_len": 19.0,
518
+ "eval_loss": 2.1285345554351807,
519
+ "eval_rouge1": 0.1924,
520
+ "eval_rouge2": 0.0971,
521
+ "eval_rougeL": 0.1664,
522
+ "eval_rougeLsum": 0.1665,
523
+ "eval_runtime": 16.844,
524
+ "eval_samples_per_second": 14.723,
525
+ "eval_steps_per_second": 0.95,
526
+ "step": 2356
527
+ },
528
+ {
529
+ "epoch": 39.0,
530
+ "eval_gen_len": 19.0,
531
+ "eval_loss": 2.1261112689971924,
532
+ "eval_rouge1": 0.1932,
533
+ "eval_rouge2": 0.0983,
534
+ "eval_rougeL": 0.1671,
535
+ "eval_rougeLsum": 0.1672,
536
+ "eval_runtime": 16.7999,
537
+ "eval_samples_per_second": 14.762,
538
+ "eval_steps_per_second": 0.952,
539
+ "step": 2418
540
+ },
541
+ {
542
+ "epoch": 40.0,
543
+ "eval_gen_len": 19.0,
544
+ "eval_loss": 2.1248812675476074,
545
+ "eval_rouge1": 0.1931,
546
+ "eval_rouge2": 0.0985,
547
+ "eval_rougeL": 0.1672,
548
+ "eval_rougeLsum": 0.1674,
549
+ "eval_runtime": 17.7634,
550
+ "eval_samples_per_second": 13.961,
551
+ "eval_steps_per_second": 0.901,
552
+ "step": 2480
553
+ },
554
+ {
555
+ "epoch": 40.32,
556
+ "learning_rate": 3.87741935483871e-06,
557
+ "loss": 2.1317,
558
+ "step": 2500
559
+ },
560
+ {
561
+ "epoch": 41.0,
562
+ "eval_gen_len": 19.0,
563
+ "eval_loss": 2.1237168312072754,
564
+ "eval_rouge1": 0.1924,
565
+ "eval_rouge2": 0.0984,
566
+ "eval_rougeL": 0.1675,
567
+ "eval_rougeLsum": 0.1676,
568
+ "eval_runtime": 16.9062,
569
+ "eval_samples_per_second": 14.669,
570
+ "eval_steps_per_second": 0.946,
571
+ "step": 2542
572
+ },
573
+ {
574
+ "epoch": 42.0,
575
+ "eval_gen_len": 19.0,
576
+ "eval_loss": 2.122954845428467,
577
+ "eval_rouge1": 0.1932,
578
+ "eval_rouge2": 0.0987,
579
+ "eval_rougeL": 0.1684,
580
+ "eval_rougeLsum": 0.1686,
581
+ "eval_runtime": 16.7724,
582
+ "eval_samples_per_second": 14.786,
583
+ "eval_steps_per_second": 0.954,
584
+ "step": 2604
585
+ },
586
+ {
587
+ "epoch": 43.0,
588
+ "eval_gen_len": 19.0,
589
+ "eval_loss": 2.1227517127990723,
590
+ "eval_rouge1": 0.1927,
591
+ "eval_rouge2": 0.0975,
592
+ "eval_rougeL": 0.1671,
593
+ "eval_rougeLsum": 0.1672,
594
+ "eval_runtime": 16.7221,
595
+ "eval_samples_per_second": 14.831,
596
+ "eval_steps_per_second": 0.957,
597
+ "step": 2666
598
+ },
599
+ {
600
+ "epoch": 44.0,
601
+ "eval_gen_len": 19.0,
602
+ "eval_loss": 2.1212539672851562,
603
+ "eval_rouge1": 0.1924,
604
+ "eval_rouge2": 0.0973,
605
+ "eval_rougeL": 0.1666,
606
+ "eval_rougeLsum": 0.1668,
607
+ "eval_runtime": 16.7559,
608
+ "eval_samples_per_second": 14.801,
609
+ "eval_steps_per_second": 0.955,
610
+ "step": 2728
611
+ },
612
+ {
613
+ "epoch": 45.0,
614
+ "eval_gen_len": 19.0,
615
+ "eval_loss": 2.121572732925415,
616
+ "eval_rouge1": 0.1924,
617
+ "eval_rouge2": 0.0979,
618
+ "eval_rougeL": 0.1665,
619
+ "eval_rougeLsum": 0.1667,
620
+ "eval_runtime": 16.8173,
621
+ "eval_samples_per_second": 14.747,
622
+ "eval_steps_per_second": 0.951,
623
+ "step": 2790
624
+ },
625
+ {
626
+ "epoch": 46.0,
627
+ "eval_gen_len": 19.0,
628
+ "eval_loss": 2.120406150817871,
629
+ "eval_rouge1": 0.1914,
630
+ "eval_rouge2": 0.097,
631
+ "eval_rougeL": 0.1657,
632
+ "eval_rougeLsum": 0.1659,
633
+ "eval_runtime": 17.3154,
634
+ "eval_samples_per_second": 14.323,
635
+ "eval_steps_per_second": 0.924,
636
+ "step": 2852
637
+ },
638
+ {
639
+ "epoch": 47.0,
640
+ "eval_gen_len": 19.0,
641
+ "eval_loss": 2.1205737590789795,
642
+ "eval_rouge1": 0.1928,
643
+ "eval_rouge2": 0.0982,
644
+ "eval_rougeL": 0.167,
645
+ "eval_rougeLsum": 0.1672,
646
+ "eval_runtime": 16.7782,
647
+ "eval_samples_per_second": 14.781,
648
+ "eval_steps_per_second": 0.954,
649
+ "step": 2914
650
+ },
651
+ {
652
+ "epoch": 48.0,
653
+ "eval_gen_len": 19.0,
654
+ "eval_loss": 2.1202914714813232,
655
+ "eval_rouge1": 0.1926,
656
+ "eval_rouge2": 0.0973,
657
+ "eval_rougeL": 0.1667,
658
+ "eval_rougeLsum": 0.1669,
659
+ "eval_runtime": 16.8047,
660
+ "eval_samples_per_second": 14.758,
661
+ "eval_steps_per_second": 0.952,
662
+ "step": 2976
663
+ },
664
+ {
665
+ "epoch": 48.39,
666
+ "learning_rate": 6.516129032258064e-07,
667
+ "loss": 2.1144,
668
+ "step": 3000
669
+ }
670
+ ],
671
+ "max_steps": 3100,
672
+ "num_train_epochs": 50,
673
+ "total_flos": 1.2953834502488064e+16,
674
+ "trial_name": null,
675
+ "trial_params": null
676
+ }
checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b3aa9ab791ba4b7e80300d59c10840c6bcc3f12ac2ddb83e7eb1190863235d
3
+ size 4155
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 6,
21
+ "num_heads": 8,
22
+ "num_layers": 6,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.31.0",
58
+ "use_cache": true,
59
+ "vocab_size": 32128
60
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe5d1432ef330d15da08bce2a11ccfe2156b8dbaccd22ddfbfe004772de0c8d
3
+ size 242071641
runs/Jul21_05-05-37_1daa7b5ea0df/events.out.tfevents.1689915947.1daa7b5ea0df.1289.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a3331fbf9a794a0f6581babe2d539c47d03dc3973a79d86d147829611daa83
3
+ size 32558
special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>"
111
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b3aa9ab791ba4b7e80300d59c10840c6bcc3f12ac2ddb83e7eb1190863235d
3
+ size 4155