root commited on
Commit
9d18855
1 Parent(s): da30210

updated by abhijithneilabraham

Browse files
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/pegasus-pubmed",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "relu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": true,
7
+ "architectures": [
8
+ "PegasusForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 16,
19
+ "decoder_start_token_id": 0,
20
+ "dropout": 0.1,
21
+ "encoder_attention_heads": 16,
22
+ "encoder_ffn_dim": 4096,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 16,
25
+ "eos_token_id": 1,
26
+ "extra_pos_embeddings": 1,
27
+ "forced_eos_token_id": 1,
28
+ "id2label": {
29
+ "0": "LABEL_0",
30
+ "1": "LABEL_1",
31
+ "2": "LABEL_2"
32
+ },
33
+ "init_std": 0.02,
34
+ "is_encoder_decoder": true,
35
+ "label2id": {
36
+ "LABEL_0": 0,
37
+ "LABEL_1": 1,
38
+ "LABEL_2": 2
39
+ },
40
+ "length_penalty": 0.8,
41
+ "max_length": 256,
42
+ "max_position_embeddings": 1024,
43
+ "min_length": 32,
44
+ "model_type": "pegasus",
45
+ "normalize_before": true,
46
+ "normalize_embedding": false,
47
+ "num_beams": 8,
48
+ "num_hidden_layers": 16,
49
+ "pad_token_id": 0,
50
+ "scale_embedding": true,
51
+ "static_position_embeddings": true,
52
+ "torch_dtype": "float32",
53
+ "transformers_version": "4.13.0",
54
+ "use_cache": true,
55
+ "vocab_size": 96103
56
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1319e5dd880c5a18b9d501f2fa5479532848a6371931b492d44c9462f612ee
3
+ size 4550014615
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244454df3d37b755ec0ce4d818b77e00986be8b506f9afffacdc6fa4f73d6135
3
+ size 2283834829
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acfc7c5d62e915d3c3b520f2600ffa7c59ec9807db73534c68cce067d1520ea9
3
+ size 17672
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae62c7a2018fef5496da63a2bce6219d8ee040ceb325bad8ac1e7a13b7234df
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask_2>", "additional_special_tokens": ["<mask_1>", "<unk_2>", "<unk_3>", "<unk_4>", "<unk_5>", "<unk_6>", "<unk_7>", "<unk_8>", "<unk_9>", "<unk_10>", "<unk_11>", "<unk_12>", "<unk_13>", "<unk_14>", "<unk_15>", "<unk_16>", "<unk_17>", "<unk_18>", "<unk_19>", "<unk_20>", "<unk_21>", "<unk_22>", "<unk_23>", "<unk_24>", "<unk_25>", "<unk_26>", "<unk_27>", "<unk_28>", "<unk_29>", "<unk_30>", "<unk_31>", "<unk_32>", "<unk_33>", "<unk_34>", "<unk_35>", "<unk_36>", "<unk_37>", "<unk_38>", "<unk_39>", "<unk_40>", "<unk_41>", "<unk_42>", "<unk_43>", "<unk_44>", "<unk_45>", "<unk_46>", "<unk_47>", "<unk_48>", "<unk_49>", "<unk_50>", "<unk_51>", "<unk_52>", "<unk_53>", "<unk_54>", "<unk_55>", "<unk_56>", "<unk_57>", "<unk_58>", "<unk_59>", "<unk_60>", "<unk_61>", "<unk_62>", "<unk_63>", "<unk_64>", "<unk_65>", "<unk_66>", "<unk_67>", "<unk_68>", "<unk_69>", "<unk_70>", "<unk_71>", "<unk_72>", "<unk_73>", "<unk_74>", "<unk_75>", "<unk_76>", "<unk_77>", "<unk_78>", "<unk_79>", "<unk_80>", "<unk_81>", "<unk_82>", "<unk_83>", "<unk_84>", "<unk_85>", "<unk_86>", "<unk_87>", "<unk_88>", "<unk_89>", "<unk_90>", "<unk_91>", "<unk_92>", "<unk_93>", "<unk_94>", "<unk_95>", "<unk_96>", "<unk_97>", "<unk_98>", "<unk_99>", "<unk_100>", "<unk_101>", "<unk_102>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0015189ef36359283fec8b93cf6d9ce51bca37eb1101defc68a53b394913b96c
3
+ size 1912529
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"pad_token": "<pad>", "eos_token": "</s>", "unk_token": "<unk>", "mask_token": "<mask_2>", "mask_token_sent": "<mask_1>", "offset": 103, "additional_special_tokens": ["<mask_1>", "<unk_2>", "<unk_3>", "<unk_4>", "<unk_5>", "<unk_6>", "<unk_7>", "<unk_8>", "<unk_9>", "<unk_10>", "<unk_11>", "<unk_12>", "<unk_13>", "<unk_14>", "<unk_15>", "<unk_16>", "<unk_17>", "<unk_18>", "<unk_19>", "<unk_20>", "<unk_21>", "<unk_22>", "<unk_23>", "<unk_24>", "<unk_25>", "<unk_26>", "<unk_27>", "<unk_28>", "<unk_29>", "<unk_30>", "<unk_31>", "<unk_32>", "<unk_33>", "<unk_34>", "<unk_35>", "<unk_36>", "<unk_37>", "<unk_38>", "<unk_39>", "<unk_40>", "<unk_41>", "<unk_42>", "<unk_43>", "<unk_44>", "<unk_45>", "<unk_46>", "<unk_47>", "<unk_48>", "<unk_49>", "<unk_50>", "<unk_51>", "<unk_52>", "<unk_53>", "<unk_54>", "<unk_55>", "<unk_56>", "<unk_57>", "<unk_58>", "<unk_59>", "<unk_60>", "<unk_61>", "<unk_62>", "<unk_63>", "<unk_64>", "<unk_65>", "<unk_66>", "<unk_67>", "<unk_68>", "<unk_69>", "<unk_70>", "<unk_71>", "<unk_72>", "<unk_73>", "<unk_74>", "<unk_75>", "<unk_76>", "<unk_77>", "<unk_78>", "<unk_79>", "<unk_80>", "<unk_81>", "<unk_82>", "<unk_83>", "<unk_84>", "<unk_85>", "<unk_86>", "<unk_87>", "<unk_88>", "<unk_89>", "<unk_90>", "<unk_91>", "<unk_92>", "<unk_93>", "<unk_94>", "<unk_95>", "<unk_96>", "<unk_97>", "<unk_98>", "<unk_99>", "<unk_100>", "<unk_101>", "<unk_102>"], "model_max_length": 1024, "special_tokens_map_file": null, "full_tokenizer_file": null, "name_or_path": "google/pegasus-pubmed", "sp_model_kwargs": {}, "tokenizer_class": "PegasusTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,1156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.4883581399917603,
3
+ "best_model_checkpoint": "ccdv_pegasus_xsum_summarization/checkpoint-13500",
4
+ "epoch": 2.0012007204322595,
5
+ "global_step": 30000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 4.944411091099104e-05,
13
+ "loss": 1.3482,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "eval_gen_len": 190.7041,
19
+ "eval_loss": 1.5671061277389526,
20
+ "eval_rouge1": 43.9725,
21
+ "eval_rouge2": 20.8852,
22
+ "eval_rougeL": 29.6036,
23
+ "eval_rougeLsum": 39.2595,
24
+ "eval_runtime": 10745.9844,
25
+ "eval_samples_per_second": 0.617,
26
+ "eval_steps_per_second": 0.077,
27
+ "step": 500
28
+ },
29
+ {
30
+ "epoch": 0.07,
31
+ "learning_rate": 4.8888221821982085e-05,
32
+ "loss": 1.0335,
33
+ "step": 1000
34
+ },
35
+ {
36
+ "epoch": 0.07,
37
+ "eval_gen_len": 159.8545,
38
+ "eval_loss": 1.5465657711029053,
39
+ "eval_rouge1": 44.9236,
40
+ "eval_rouge2": 21.1853,
41
+ "eval_rougeL": 30.4447,
42
+ "eval_rougeLsum": 39.9918,
43
+ "eval_runtime": 10231.1208,
44
+ "eval_samples_per_second": 0.648,
45
+ "eval_steps_per_second": 0.081,
46
+ "step": 1000
47
+ },
48
+ {
49
+ "epoch": 0.1,
50
+ "learning_rate": 4.8332332732973116e-05,
51
+ "loss": 1.0184,
52
+ "step": 1500
53
+ },
54
+ {
55
+ "epoch": 0.1,
56
+ "eval_gen_len": 143.5453,
57
+ "eval_loss": 1.5334348678588867,
58
+ "eval_rouge1": 44.9483,
59
+ "eval_rouge2": 20.9962,
60
+ "eval_rougeL": 30.5328,
61
+ "eval_rougeLsum": 40.0531,
62
+ "eval_runtime": 8769.3615,
63
+ "eval_samples_per_second": 0.756,
64
+ "eval_steps_per_second": 0.095,
65
+ "step": 1500
66
+ },
67
+ {
68
+ "epoch": 0.13,
69
+ "learning_rate": 4.777644364396416e-05,
70
+ "loss": 1.0015,
71
+ "step": 2000
72
+ },
73
+ {
74
+ "epoch": 0.13,
75
+ "eval_gen_len": 137.856,
76
+ "eval_loss": 1.529853343963623,
77
+ "eval_rouge1": 45.9034,
78
+ "eval_rouge2": 21.784,
79
+ "eval_rougeL": 31.4025,
80
+ "eval_rougeLsum": 40.8983,
81
+ "eval_runtime": 7582.4229,
82
+ "eval_samples_per_second": 0.875,
83
+ "eval_steps_per_second": 0.109,
84
+ "step": 2000
85
+ },
86
+ {
87
+ "epoch": 0.17,
88
+ "learning_rate": 4.72205545549552e-05,
89
+ "loss": 1.0101,
90
+ "step": 2500
91
+ },
92
+ {
93
+ "epoch": 0.17,
94
+ "eval_gen_len": 134.5485,
95
+ "eval_loss": 1.5291049480438232,
96
+ "eval_rouge1": 45.6738,
97
+ "eval_rouge2": 21.5853,
98
+ "eval_rougeL": 31.1439,
99
+ "eval_rougeLsum": 40.7442,
100
+ "eval_runtime": 6945.0608,
101
+ "eval_samples_per_second": 0.955,
102
+ "eval_steps_per_second": 0.12,
103
+ "step": 2500
104
+ },
105
+ {
106
+ "epoch": 0.2,
107
+ "learning_rate": 4.6664665465946236e-05,
108
+ "loss": 0.9973,
109
+ "step": 3000
110
+ },
111
+ {
112
+ "epoch": 0.2,
113
+ "eval_gen_len": 131.2587,
114
+ "eval_loss": 1.523977518081665,
115
+ "eval_rouge1": 45.5052,
116
+ "eval_rouge2": 21.4202,
117
+ "eval_rougeL": 31.1499,
118
+ "eval_rougeLsum": 40.5736,
119
+ "eval_runtime": 6458.4712,
120
+ "eval_samples_per_second": 1.027,
121
+ "eval_steps_per_second": 0.129,
122
+ "step": 3000
123
+ },
124
+ {
125
+ "epoch": 0.23,
126
+ "learning_rate": 4.6108776376937274e-05,
127
+ "loss": 0.9855,
128
+ "step": 3500
129
+ },
130
+ {
131
+ "epoch": 0.23,
132
+ "eval_gen_len": 131.0582,
133
+ "eval_loss": 1.5234577655792236,
134
+ "eval_rouge1": 45.8336,
135
+ "eval_rouge2": 21.7072,
136
+ "eval_rougeL": 31.439,
137
+ "eval_rougeLsum": 40.9387,
138
+ "eval_runtime": 6345.3859,
139
+ "eval_samples_per_second": 1.045,
140
+ "eval_steps_per_second": 0.131,
141
+ "step": 3500
142
+ },
143
+ {
144
+ "epoch": 0.27,
145
+ "learning_rate": 4.555288728792831e-05,
146
+ "loss": 0.9868,
147
+ "step": 4000
148
+ },
149
+ {
150
+ "epoch": 0.27,
151
+ "eval_gen_len": 127.9753,
152
+ "eval_loss": 1.5183237791061401,
153
+ "eval_rouge1": 45.6348,
154
+ "eval_rouge2": 21.5462,
155
+ "eval_rougeL": 31.3009,
156
+ "eval_rougeLsum": 40.6469,
157
+ "eval_runtime": 6091.2782,
158
+ "eval_samples_per_second": 1.089,
159
+ "eval_steps_per_second": 0.136,
160
+ "step": 4000
161
+ },
162
+ {
163
+ "epoch": 0.3,
164
+ "learning_rate": 4.4996998198919356e-05,
165
+ "loss": 0.9802,
166
+ "step": 4500
167
+ },
168
+ {
169
+ "epoch": 0.3,
170
+ "eval_gen_len": 127.78,
171
+ "eval_loss": 1.5132805109024048,
172
+ "eval_rouge1": 45.4357,
173
+ "eval_rouge2": 21.3339,
174
+ "eval_rougeL": 31.1304,
175
+ "eval_rougeLsum": 40.531,
176
+ "eval_runtime": 5970.8563,
177
+ "eval_samples_per_second": 1.111,
178
+ "eval_steps_per_second": 0.139,
179
+ "step": 4500
180
+ },
181
+ {
182
+ "epoch": 0.33,
183
+ "learning_rate": 4.4441109109910394e-05,
184
+ "loss": 0.9743,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 0.33,
189
+ "eval_gen_len": 126.9619,
190
+ "eval_loss": 1.5101301670074463,
191
+ "eval_rouge1": 45.4845,
192
+ "eval_rouge2": 21.4302,
193
+ "eval_rougeL": 31.2033,
194
+ "eval_rougeLsum": 40.5934,
195
+ "eval_runtime": 5820.9525,
196
+ "eval_samples_per_second": 1.14,
197
+ "eval_steps_per_second": 0.143,
198
+ "step": 5000
199
+ },
200
+ {
201
+ "epoch": 0.37,
202
+ "learning_rate": 4.388522002090143e-05,
203
+ "loss": 0.972,
204
+ "step": 5500
205
+ },
206
+ {
207
+ "epoch": 0.37,
208
+ "eval_gen_len": 127.1796,
209
+ "eval_loss": 1.5053614377975464,
210
+ "eval_rouge1": 45.196,
211
+ "eval_rouge2": 21.1882,
212
+ "eval_rougeL": 30.9407,
213
+ "eval_rougeLsum": 40.2648,
214
+ "eval_runtime": 5768.324,
215
+ "eval_samples_per_second": 1.15,
216
+ "eval_steps_per_second": 0.144,
217
+ "step": 5500
218
+ },
219
+ {
220
+ "epoch": 0.4,
221
+ "learning_rate": 4.332933093189247e-05,
222
+ "loss": 0.9651,
223
+ "step": 6000
224
+ },
225
+ {
226
+ "epoch": 0.4,
227
+ "eval_gen_len": 126.9254,
228
+ "eval_loss": 1.5030862092971802,
229
+ "eval_rouge1": 45.4822,
230
+ "eval_rouge2": 21.4363,
231
+ "eval_rougeL": 31.1422,
232
+ "eval_rougeLsum": 40.5397,
233
+ "eval_runtime": 5665.8916,
234
+ "eval_samples_per_second": 1.171,
235
+ "eval_steps_per_second": 0.146,
236
+ "step": 6000
237
+ },
238
+ {
239
+ "epoch": 0.43,
240
+ "learning_rate": 4.277344184288351e-05,
241
+ "loss": 0.9758,
242
+ "step": 6500
243
+ },
244
+ {
245
+ "epoch": 0.43,
246
+ "eval_gen_len": 126.4933,
247
+ "eval_loss": 1.495548963546753,
248
+ "eval_rouge1": 45.299,
249
+ "eval_rouge2": 21.346,
250
+ "eval_rougeL": 31.0361,
251
+ "eval_rougeLsum": 40.3325,
252
+ "eval_runtime": 5589.2093,
253
+ "eval_samples_per_second": 1.187,
254
+ "eval_steps_per_second": 0.149,
255
+ "step": 6500
256
+ },
257
+ {
258
+ "epoch": 0.47,
259
+ "learning_rate": 4.221755275387455e-05,
260
+ "loss": 0.9652,
261
+ "step": 7000
262
+ },
263
+ {
264
+ "epoch": 0.47,
265
+ "eval_gen_len": 126.0859,
266
+ "eval_loss": 1.4975615739822388,
267
+ "eval_rouge1": 45.4694,
268
+ "eval_rouge2": 21.5044,
269
+ "eval_rougeL": 31.1786,
270
+ "eval_rougeLsum": 40.5032,
271
+ "eval_runtime": 5569.8623,
272
+ "eval_samples_per_second": 1.191,
273
+ "eval_steps_per_second": 0.149,
274
+ "step": 7000
275
+ },
276
+ {
277
+ "epoch": 0.5,
278
+ "learning_rate": 4.166166366486558e-05,
279
+ "loss": 0.9601,
280
+ "step": 7500
281
+ },
282
+ {
283
+ "epoch": 0.5,
284
+ "eval_gen_len": 126.8815,
285
+ "eval_loss": 1.4945002794265747,
286
+ "eval_rouge1": 45.1971,
287
+ "eval_rouge2": 21.2682,
288
+ "eval_rougeL": 30.9321,
289
+ "eval_rougeLsum": 40.2959,
290
+ "eval_runtime": 5557.7856,
291
+ "eval_samples_per_second": 1.193,
292
+ "eval_steps_per_second": 0.149,
293
+ "step": 7500
294
+ },
295
+ {
296
+ "epoch": 0.53,
297
+ "learning_rate": 4.110577457585663e-05,
298
+ "loss": 0.9502,
299
+ "step": 8000
300
+ },
301
+ {
302
+ "epoch": 0.53,
303
+ "eval_gen_len": 126.4628,
304
+ "eval_loss": 1.49406898021698,
305
+ "eval_rouge1": 45.5653,
306
+ "eval_rouge2": 21.5655,
307
+ "eval_rougeL": 31.2703,
308
+ "eval_rougeLsum": 40.5622,
309
+ "eval_runtime": 5535.3927,
310
+ "eval_samples_per_second": 1.198,
311
+ "eval_steps_per_second": 0.15,
312
+ "step": 8000
313
+ },
314
+ {
315
+ "epoch": 0.57,
316
+ "learning_rate": 4.0549885486847665e-05,
317
+ "loss": 0.9537,
318
+ "step": 8500
319
+ },
320
+ {
321
+ "epoch": 0.57,
322
+ "eval_gen_len": 126.5709,
323
+ "eval_loss": 1.4941043853759766,
324
+ "eval_rouge1": 45.2806,
325
+ "eval_rouge2": 21.2587,
326
+ "eval_rougeL": 30.93,
327
+ "eval_rougeLsum": 40.354,
328
+ "eval_runtime": 5533.5879,
329
+ "eval_samples_per_second": 1.199,
330
+ "eval_steps_per_second": 0.15,
331
+ "step": 8500
332
+ },
333
+ {
334
+ "epoch": 0.6,
335
+ "learning_rate": 3.999399639783871e-05,
336
+ "loss": 0.9629,
337
+ "step": 9000
338
+ },
339
+ {
340
+ "epoch": 0.6,
341
+ "eval_gen_len": 126.547,
342
+ "eval_loss": 1.4939745664596558,
343
+ "eval_rouge1": 45.2474,
344
+ "eval_rouge2": 21.275,
345
+ "eval_rougeL": 30.9302,
346
+ "eval_rougeLsum": 40.3377,
347
+ "eval_runtime": 5530.7272,
348
+ "eval_samples_per_second": 1.199,
349
+ "eval_steps_per_second": 0.15,
350
+ "step": 9000
351
+ },
352
+ {
353
+ "epoch": 0.63,
354
+ "learning_rate": 3.943810730882974e-05,
355
+ "loss": 0.9528,
356
+ "step": 9500
357
+ },
358
+ {
359
+ "epoch": 0.63,
360
+ "eval_gen_len": 126.768,
361
+ "eval_loss": 1.4947481155395508,
362
+ "eval_rouge1": 45.3619,
363
+ "eval_rouge2": 21.3754,
364
+ "eval_rougeL": 31.0723,
365
+ "eval_rougeLsum": 40.4162,
366
+ "eval_runtime": 5524.5717,
367
+ "eval_samples_per_second": 1.201,
368
+ "eval_steps_per_second": 0.15,
369
+ "step": 9500
370
+ },
371
+ {
372
+ "epoch": 0.67,
373
+ "learning_rate": 3.888221821982078e-05,
374
+ "loss": 0.9532,
375
+ "step": 10000
376
+ },
377
+ {
378
+ "epoch": 0.67,
379
+ "eval_gen_len": 126.5323,
380
+ "eval_loss": 1.4923893213272095,
381
+ "eval_rouge1": 45.5763,
382
+ "eval_rouge2": 21.6469,
383
+ "eval_rougeL": 31.2585,
384
+ "eval_rougeLsum": 40.5722,
385
+ "eval_runtime": 5518.2912,
386
+ "eval_samples_per_second": 1.202,
387
+ "eval_steps_per_second": 0.15,
388
+ "step": 10000
389
+ },
390
+ {
391
+ "epoch": 0.7,
392
+ "learning_rate": 3.832632913081182e-05,
393
+ "loss": 0.945,
394
+ "step": 10500
395
+ },
396
+ {
397
+ "epoch": 0.7,
398
+ "eval_gen_len": 126.69,
399
+ "eval_loss": 1.4898710250854492,
400
+ "eval_rouge1": 45.2629,
401
+ "eval_rouge2": 21.3471,
402
+ "eval_rougeL": 31.0405,
403
+ "eval_rougeLsum": 40.3211,
404
+ "eval_runtime": 6184.8714,
405
+ "eval_samples_per_second": 1.072,
406
+ "eval_steps_per_second": 0.134,
407
+ "step": 10500
408
+ },
409
+ {
410
+ "epoch": 0.73,
411
+ "learning_rate": 3.777044004180286e-05,
412
+ "loss": 0.9464,
413
+ "step": 11000
414
+ },
415
+ {
416
+ "epoch": 0.73,
417
+ "eval_gen_len": 126.9052,
418
+ "eval_loss": 1.489205002784729,
419
+ "eval_rouge1": 45.3769,
420
+ "eval_rouge2": 21.3457,
421
+ "eval_rougeL": 30.9968,
422
+ "eval_rougeLsum": 40.388,
423
+ "eval_runtime": 5520.5499,
424
+ "eval_samples_per_second": 1.202,
425
+ "eval_steps_per_second": 0.15,
426
+ "step": 11000
427
+ },
428
+ {
429
+ "epoch": 0.77,
430
+ "learning_rate": 3.7214550952793906e-05,
431
+ "loss": 0.9544,
432
+ "step": 11500
433
+ },
434
+ {
435
+ "epoch": 0.77,
436
+ "eval_gen_len": 126.5739,
437
+ "eval_loss": 1.4892535209655762,
438
+ "eval_rouge1": 45.411,
439
+ "eval_rouge2": 21.3852,
440
+ "eval_rougeL": 31.0295,
441
+ "eval_rougeLsum": 40.4881,
442
+ "eval_runtime": 5521.4271,
443
+ "eval_samples_per_second": 1.201,
444
+ "eval_steps_per_second": 0.15,
445
+ "step": 11500
446
+ },
447
+ {
448
+ "epoch": 0.8,
449
+ "learning_rate": 3.6658661863784937e-05,
450
+ "loss": 0.9467,
451
+ "step": 12000
452
+ },
453
+ {
454
+ "epoch": 0.8,
455
+ "eval_gen_len": 126.7315,
456
+ "eval_loss": 1.4929231405258179,
457
+ "eval_rouge1": 45.4345,
458
+ "eval_rouge2": 21.4378,
459
+ "eval_rougeL": 31.1163,
460
+ "eval_rougeLsum": 40.4393,
461
+ "eval_runtime": 5524.2145,
462
+ "eval_samples_per_second": 1.201,
463
+ "eval_steps_per_second": 0.15,
464
+ "step": 12000
465
+ },
466
+ {
467
+ "epoch": 0.83,
468
+ "learning_rate": 3.610277277477598e-05,
469
+ "loss": 0.9517,
470
+ "step": 12500
471
+ },
472
+ {
473
+ "epoch": 0.83,
474
+ "eval_gen_len": 126.58,
475
+ "eval_loss": 1.4917516708374023,
476
+ "eval_rouge1": 45.3614,
477
+ "eval_rouge2": 21.3396,
478
+ "eval_rougeL": 30.9925,
479
+ "eval_rougeLsum": 40.3636,
480
+ "eval_runtime": 5514.201,
481
+ "eval_samples_per_second": 1.203,
482
+ "eval_steps_per_second": 0.151,
483
+ "step": 12500
484
+ },
485
+ {
486
+ "epoch": 0.87,
487
+ "learning_rate": 3.554688368576702e-05,
488
+ "loss": 0.9497,
489
+ "step": 13000
490
+ },
491
+ {
492
+ "epoch": 0.87,
493
+ "eval_gen_len": 126.7977,
494
+ "eval_loss": 1.4918133020401,
495
+ "eval_rouge1": 45.2485,
496
+ "eval_rouge2": 21.2367,
497
+ "eval_rougeL": 30.9282,
498
+ "eval_rougeLsum": 40.3438,
499
+ "eval_runtime": 6509.3818,
500
+ "eval_samples_per_second": 1.019,
501
+ "eval_steps_per_second": 0.128,
502
+ "step": 13000
503
+ },
504
+ {
505
+ "epoch": 0.9,
506
+ "learning_rate": 3.499099459675806e-05,
507
+ "loss": 0.9386,
508
+ "step": 13500
509
+ },
510
+ {
511
+ "epoch": 0.9,
512
+ "eval_gen_len": 126.7524,
513
+ "eval_loss": 1.4883581399917603,
514
+ "eval_rouge1": 45.5038,
515
+ "eval_rouge2": 21.5064,
516
+ "eval_rougeL": 31.2132,
517
+ "eval_rougeLsum": 40.5696,
518
+ "eval_runtime": 5529.57,
519
+ "eval_samples_per_second": 1.2,
520
+ "eval_steps_per_second": 0.15,
521
+ "step": 13500
522
+ },
523
+ {
524
+ "epoch": 0.93,
525
+ "learning_rate": 3.4435105507749095e-05,
526
+ "loss": 0.9473,
527
+ "step": 14000
528
+ },
529
+ {
530
+ "epoch": 0.93,
531
+ "eval_gen_len": 126.6534,
532
+ "eval_loss": 1.4918317794799805,
533
+ "eval_rouge1": 45.2367,
534
+ "eval_rouge2": 21.2615,
535
+ "eval_rougeL": 30.9179,
536
+ "eval_rougeLsum": 40.2548,
537
+ "eval_runtime": 5515.735,
538
+ "eval_samples_per_second": 1.203,
539
+ "eval_steps_per_second": 0.15,
540
+ "step": 14000
541
+ },
542
+ {
543
+ "epoch": 0.97,
544
+ "learning_rate": 3.387921641874013e-05,
545
+ "loss": 0.9235,
546
+ "step": 14500
547
+ },
548
+ {
549
+ "epoch": 0.97,
550
+ "eval_gen_len": 126.5972,
551
+ "eval_loss": 1.4897193908691406,
552
+ "eval_rouge1": 45.8027,
553
+ "eval_rouge2": 21.7228,
554
+ "eval_rougeL": 31.3946,
555
+ "eval_rougeLsum": 40.764,
556
+ "eval_runtime": 5518.2889,
557
+ "eval_samples_per_second": 1.202,
558
+ "eval_steps_per_second": 0.15,
559
+ "step": 14500
560
+ },
561
+ {
562
+ "epoch": 1.0,
563
+ "learning_rate": 3.332332732973118e-05,
564
+ "loss": 0.9344,
565
+ "step": 15000
566
+ },
567
+ {
568
+ "epoch": 1.0,
569
+ "eval_gen_len": 126.9212,
570
+ "eval_loss": 1.4973394870758057,
571
+ "eval_rouge1": 44.8773,
572
+ "eval_rouge2": 20.9475,
573
+ "eval_rougeL": 30.5827,
574
+ "eval_rougeLsum": 39.9489,
575
+ "eval_runtime": 5510.2549,
576
+ "eval_samples_per_second": 1.204,
577
+ "eval_steps_per_second": 0.151,
578
+ "step": 15000
579
+ },
580
+ {
581
+ "epoch": 1.03,
582
+ "learning_rate": 3.2767438240722215e-05,
583
+ "loss": 0.9139,
584
+ "step": 15500
585
+ },
586
+ {
587
+ "epoch": 1.03,
588
+ "eval_gen_len": 126.692,
589
+ "eval_loss": 1.5064738988876343,
590
+ "eval_rouge1": 45.4207,
591
+ "eval_rouge2": 21.3856,
592
+ "eval_rougeL": 31.0837,
593
+ "eval_rougeLsum": 40.4414,
594
+ "eval_runtime": 5526.017,
595
+ "eval_samples_per_second": 1.2,
596
+ "eval_steps_per_second": 0.15,
597
+ "step": 15500
598
+ },
599
+ {
600
+ "epoch": 1.07,
601
+ "learning_rate": 3.221154915171325e-05,
602
+ "loss": 0.8939,
603
+ "step": 16000
604
+ },
605
+ {
606
+ "epoch": 1.07,
607
+ "eval_gen_len": 126.5179,
608
+ "eval_loss": 1.508902668952942,
609
+ "eval_rouge1": 45.5575,
610
+ "eval_rouge2": 21.5153,
611
+ "eval_rougeL": 31.2115,
612
+ "eval_rougeLsum": 40.5517,
613
+ "eval_runtime": 5698.8075,
614
+ "eval_samples_per_second": 1.164,
615
+ "eval_steps_per_second": 0.146,
616
+ "step": 16000
617
+ },
618
+ {
619
+ "epoch": 1.1,
620
+ "learning_rate": 3.165566006270429e-05,
621
+ "loss": 0.8968,
622
+ "step": 16500
623
+ },
624
+ {
625
+ "epoch": 1.1,
626
+ "eval_gen_len": 126.5447,
627
+ "eval_loss": 1.5106098651885986,
628
+ "eval_rouge1": 45.4574,
629
+ "eval_rouge2": 21.4786,
630
+ "eval_rougeL": 31.1065,
631
+ "eval_rougeLsum": 40.495,
632
+ "eval_runtime": 5606.9538,
633
+ "eval_samples_per_second": 1.183,
634
+ "eval_steps_per_second": 0.148,
635
+ "step": 16500
636
+ },
637
+ {
638
+ "epoch": 1.13,
639
+ "learning_rate": 3.109977097369533e-05,
640
+ "loss": 0.8999,
641
+ "step": 17000
642
+ },
643
+ {
644
+ "epoch": 1.13,
645
+ "eval_gen_len": 126.6894,
646
+ "eval_loss": 1.5100876092910767,
647
+ "eval_rouge1": 45.4805,
648
+ "eval_rouge2": 21.4579,
649
+ "eval_rougeL": 31.1062,
650
+ "eval_rougeLsum": 40.5138,
651
+ "eval_runtime": 5594.355,
652
+ "eval_samples_per_second": 1.186,
653
+ "eval_steps_per_second": 0.148,
654
+ "step": 17000
655
+ },
656
+ {
657
+ "epoch": 1.17,
658
+ "learning_rate": 3.054388188468637e-05,
659
+ "loss": 0.903,
660
+ "step": 17500
661
+ },
662
+ {
663
+ "epoch": 1.17,
664
+ "eval_gen_len": 126.5988,
665
+ "eval_loss": 1.5103389024734497,
666
+ "eval_rouge1": 45.495,
667
+ "eval_rouge2": 21.4395,
668
+ "eval_rougeL": 31.1445,
669
+ "eval_rougeLsum": 40.4949,
670
+ "eval_runtime": 5586.6059,
671
+ "eval_samples_per_second": 1.187,
672
+ "eval_steps_per_second": 0.149,
673
+ "step": 17500
674
+ },
675
+ {
676
+ "epoch": 1.2,
677
+ "learning_rate": 2.9987992795677407e-05,
678
+ "loss": 0.8988,
679
+ "step": 18000
680
+ },
681
+ {
682
+ "epoch": 1.2,
683
+ "eval_gen_len": 126.5643,
684
+ "eval_loss": 1.5120760202407837,
685
+ "eval_rouge1": 45.2764,
686
+ "eval_rouge2": 21.2652,
687
+ "eval_rougeL": 30.944,
688
+ "eval_rougeLsum": 40.3249,
689
+ "eval_runtime": 5558.8098,
690
+ "eval_samples_per_second": 1.193,
691
+ "eval_steps_per_second": 0.149,
692
+ "step": 18000
693
+ },
694
+ {
695
+ "epoch": 1.23,
696
+ "learning_rate": 2.9432103706668445e-05,
697
+ "loss": 0.9027,
698
+ "step": 18500
699
+ },
700
+ {
701
+ "epoch": 1.23,
702
+ "eval_gen_len": 126.8441,
703
+ "eval_loss": 1.5092076063156128,
704
+ "eval_rouge1": 45.4884,
705
+ "eval_rouge2": 21.4334,
706
+ "eval_rougeL": 31.0499,
707
+ "eval_rougeLsum": 40.4796,
708
+ "eval_runtime": 5536.9856,
709
+ "eval_samples_per_second": 1.198,
710
+ "eval_steps_per_second": 0.15,
711
+ "step": 18500
712
+ },
713
+ {
714
+ "epoch": 1.27,
715
+ "learning_rate": 2.8876214617659486e-05,
716
+ "loss": 0.9044,
717
+ "step": 19000
718
+ },
719
+ {
720
+ "epoch": 1.27,
721
+ "eval_gen_len": 126.8737,
722
+ "eval_loss": 1.5079020261764526,
723
+ "eval_rouge1": 45.5708,
724
+ "eval_rouge2": 21.5358,
725
+ "eval_rougeL": 31.1862,
726
+ "eval_rougeLsum": 40.594,
727
+ "eval_runtime": 5524.867,
728
+ "eval_samples_per_second": 1.201,
729
+ "eval_steps_per_second": 0.15,
730
+ "step": 19000
731
+ },
732
+ {
733
+ "epoch": 1.3,
734
+ "learning_rate": 2.8320325528650527e-05,
735
+ "loss": 0.906,
736
+ "step": 19500
737
+ },
738
+ {
739
+ "epoch": 1.3,
740
+ "eval_gen_len": 126.8627,
741
+ "eval_loss": 1.5116254091262817,
742
+ "eval_rouge1": 45.4542,
743
+ "eval_rouge2": 21.4172,
744
+ "eval_rougeL": 31.0754,
745
+ "eval_rougeLsum": 40.439,
746
+ "eval_runtime": 5524.341,
747
+ "eval_samples_per_second": 1.201,
748
+ "eval_steps_per_second": 0.15,
749
+ "step": 19500
750
+ },
751
+ {
752
+ "epoch": 1.33,
753
+ "learning_rate": 2.776443643964157e-05,
754
+ "loss": 0.8994,
755
+ "step": 20000
756
+ },
757
+ {
758
+ "epoch": 1.33,
759
+ "eval_gen_len": 126.8206,
760
+ "eval_loss": 1.5085355043411255,
761
+ "eval_rouge1": 45.5424,
762
+ "eval_rouge2": 21.5009,
763
+ "eval_rougeL": 31.1428,
764
+ "eval_rougeLsum": 40.5667,
765
+ "eval_runtime": 5528.1375,
766
+ "eval_samples_per_second": 1.2,
767
+ "eval_steps_per_second": 0.15,
768
+ "step": 20000
769
+ },
770
+ {
771
+ "epoch": 1.37,
772
+ "learning_rate": 2.7208547350632603e-05,
773
+ "loss": 0.9088,
774
+ "step": 20500
775
+ },
776
+ {
777
+ "epoch": 1.37,
778
+ "eval_gen_len": 126.7414,
779
+ "eval_loss": 1.5124515295028687,
780
+ "eval_rouge1": 45.3129,
781
+ "eval_rouge2": 21.2629,
782
+ "eval_rougeL": 30.9461,
783
+ "eval_rougeLsum": 40.3271,
784
+ "eval_runtime": 5534.3419,
785
+ "eval_samples_per_second": 1.199,
786
+ "eval_steps_per_second": 0.15,
787
+ "step": 20500
788
+ },
789
+ {
790
+ "epoch": 1.4,
791
+ "learning_rate": 2.665265826162364e-05,
792
+ "loss": 0.8983,
793
+ "step": 21000
794
+ },
795
+ {
796
+ "epoch": 1.4,
797
+ "eval_gen_len": 126.357,
798
+ "eval_loss": 1.5135449171066284,
799
+ "eval_rouge1": 45.6846,
800
+ "eval_rouge2": 21.6282,
801
+ "eval_rougeL": 31.2929,
802
+ "eval_rougeLsum": 40.6821,
803
+ "eval_runtime": 5538.2932,
804
+ "eval_samples_per_second": 1.198,
805
+ "eval_steps_per_second": 0.15,
806
+ "step": 21000
807
+ },
808
+ {
809
+ "epoch": 1.43,
810
+ "learning_rate": 2.6096769172614682e-05,
811
+ "loss": 0.907,
812
+ "step": 21500
813
+ },
814
+ {
815
+ "epoch": 1.43,
816
+ "eval_gen_len": 127.0029,
817
+ "eval_loss": 1.5076923370361328,
818
+ "eval_rouge1": 45.4873,
819
+ "eval_rouge2": 21.455,
820
+ "eval_rougeL": 31.1193,
821
+ "eval_rougeLsum": 40.5128,
822
+ "eval_runtime": 5539.9922,
823
+ "eval_samples_per_second": 1.197,
824
+ "eval_steps_per_second": 0.15,
825
+ "step": 21500
826
+ },
827
+ {
828
+ "epoch": 1.47,
829
+ "learning_rate": 2.5540880083605723e-05,
830
+ "loss": 0.9097,
831
+ "step": 22000
832
+ },
833
+ {
834
+ "epoch": 1.47,
835
+ "eval_gen_len": 126.8553,
836
+ "eval_loss": 1.5052434206008911,
837
+ "eval_rouge1": 45.5988,
838
+ "eval_rouge2": 21.6134,
839
+ "eval_rougeL": 31.247,
840
+ "eval_rougeLsum": 40.58,
841
+ "eval_runtime": 5539.7468,
842
+ "eval_samples_per_second": 1.197,
843
+ "eval_steps_per_second": 0.15,
844
+ "step": 22000
845
+ },
846
+ {
847
+ "epoch": 1.5,
848
+ "learning_rate": 2.498499099459676e-05,
849
+ "loss": 0.9033,
850
+ "step": 22500
851
+ },
852
+ {
853
+ "epoch": 1.5,
854
+ "eval_gen_len": 127.0048,
855
+ "eval_loss": 1.5133850574493408,
856
+ "eval_rouge1": 45.3223,
857
+ "eval_rouge2": 21.2968,
858
+ "eval_rougeL": 30.9357,
859
+ "eval_rougeLsum": 40.3813,
860
+ "eval_runtime": 9404.5,
861
+ "eval_samples_per_second": 0.705,
862
+ "eval_steps_per_second": 0.088,
863
+ "step": 22500
864
+ },
865
+ {
866
+ "epoch": 1.53,
867
+ "learning_rate": 2.44291019055878e-05,
868
+ "loss": 0.8925,
869
+ "step": 23000
870
+ },
871
+ {
872
+ "epoch": 1.53,
873
+ "eval_gen_len": 126.7316,
874
+ "eval_loss": 1.510839819908142,
875
+ "eval_rouge1": 45.6747,
876
+ "eval_rouge2": 21.6374,
877
+ "eval_rougeL": 31.31,
878
+ "eval_rougeLsum": 40.7015,
879
+ "eval_runtime": 22732.2519,
880
+ "eval_samples_per_second": 0.292,
881
+ "eval_steps_per_second": 0.037,
882
+ "step": 23000
883
+ },
884
+ {
885
+ "epoch": 1.57,
886
+ "learning_rate": 2.387321281657884e-05,
887
+ "loss": 0.8913,
888
+ "step": 23500
889
+ },
890
+ {
891
+ "epoch": 1.57,
892
+ "eval_gen_len": 126.6869,
893
+ "eval_loss": 1.5129714012145996,
894
+ "eval_rouge1": 45.6531,
895
+ "eval_rouge2": 21.6354,
896
+ "eval_rougeL": 31.2956,
897
+ "eval_rougeLsum": 40.6555,
898
+ "eval_runtime": 6945.8776,
899
+ "eval_samples_per_second": 0.955,
900
+ "eval_steps_per_second": 0.119,
901
+ "step": 23500
902
+ },
903
+ {
904
+ "epoch": 1.6,
905
+ "learning_rate": 2.3317323727569874e-05,
906
+ "loss": 0.8931,
907
+ "step": 24000
908
+ },
909
+ {
910
+ "epoch": 1.6,
911
+ "eval_gen_len": 126.4862,
912
+ "eval_loss": 1.5111068487167358,
913
+ "eval_rouge1": 45.7876,
914
+ "eval_rouge2": 21.7115,
915
+ "eval_rougeL": 31.3274,
916
+ "eval_rougeLsum": 40.7579,
917
+ "eval_runtime": 5539.5619,
918
+ "eval_samples_per_second": 1.197,
919
+ "eval_steps_per_second": 0.15,
920
+ "step": 24000
921
+ },
922
+ {
923
+ "epoch": 1.63,
924
+ "learning_rate": 2.2761434638560915e-05,
925
+ "loss": 0.9009,
926
+ "step": 24500
927
+ },
928
+ {
929
+ "epoch": 1.63,
930
+ "eval_gen_len": 126.6229,
931
+ "eval_loss": 1.5083845853805542,
932
+ "eval_rouge1": 45.6359,
933
+ "eval_rouge2": 21.583,
934
+ "eval_rougeL": 31.2775,
935
+ "eval_rougeLsum": 40.6351,
936
+ "eval_runtime": 5545.0209,
937
+ "eval_samples_per_second": 1.196,
938
+ "eval_steps_per_second": 0.15,
939
+ "step": 24500
940
+ },
941
+ {
942
+ "epoch": 1.67,
943
+ "learning_rate": 2.2205545549551953e-05,
944
+ "loss": 0.8925,
945
+ "step": 25000
946
+ },
947
+ {
948
+ "epoch": 1.67,
949
+ "eval_gen_len": 126.8396,
950
+ "eval_loss": 1.5094473361968994,
951
+ "eval_rouge1": 45.397,
952
+ "eval_rouge2": 21.4266,
953
+ "eval_rougeL": 31.082,
954
+ "eval_rougeLsum": 40.4261,
955
+ "eval_runtime": 5534.6802,
956
+ "eval_samples_per_second": 1.198,
957
+ "eval_steps_per_second": 0.15,
958
+ "step": 25000
959
+ },
960
+ {
961
+ "epoch": 1.7,
962
+ "learning_rate": 2.1649656460542994e-05,
963
+ "loss": 0.8991,
964
+ "step": 25500
965
+ },
966
+ {
967
+ "epoch": 1.7,
968
+ "eval_gen_len": 126.722,
969
+ "eval_loss": 1.512014627456665,
970
+ "eval_rouge1": 45.2851,
971
+ "eval_rouge2": 21.2798,
972
+ "eval_rougeL": 30.8973,
973
+ "eval_rougeLsum": 40.2787,
974
+ "eval_runtime": 5538.1327,
975
+ "eval_samples_per_second": 1.198,
976
+ "eval_steps_per_second": 0.15,
977
+ "step": 25500
978
+ },
979
+ {
980
+ "epoch": 1.73,
981
+ "learning_rate": 2.1093767371534032e-05,
982
+ "loss": 0.9019,
983
+ "step": 26000
984
+ },
985
+ {
986
+ "epoch": 1.73,
987
+ "eval_gen_len": 126.8048,
988
+ "eval_loss": 1.510252833366394,
989
+ "eval_rouge1": 45.2905,
990
+ "eval_rouge2": 21.2992,
991
+ "eval_rougeL": 30.9204,
992
+ "eval_rougeLsum": 40.3262,
993
+ "eval_runtime": 5535.5354,
994
+ "eval_samples_per_second": 1.198,
995
+ "eval_steps_per_second": 0.15,
996
+ "step": 26000
997
+ },
998
+ {
999
+ "epoch": 1.77,
1000
+ "learning_rate": 2.0537878282525073e-05,
1001
+ "loss": 0.891,
1002
+ "step": 26500
1003
+ },
1004
+ {
1005
+ "epoch": 1.77,
1006
+ "eval_gen_len": 126.2902,
1007
+ "eval_loss": 1.5112383365631104,
1008
+ "eval_rouge1": 45.7091,
1009
+ "eval_rouge2": 21.6159,
1010
+ "eval_rougeL": 31.2889,
1011
+ "eval_rougeLsum": 40.6986,
1012
+ "eval_runtime": 5537.5343,
1013
+ "eval_samples_per_second": 1.198,
1014
+ "eval_steps_per_second": 0.15,
1015
+ "step": 26500
1016
+ },
1017
+ {
1018
+ "epoch": 1.8,
1019
+ "learning_rate": 1.998198919351611e-05,
1020
+ "loss": 0.898,
1021
+ "step": 27000
1022
+ },
1023
+ {
1024
+ "epoch": 1.8,
1025
+ "eval_gen_len": 126.5218,
1026
+ "eval_loss": 1.5084278583526611,
1027
+ "eval_rouge1": 45.4964,
1028
+ "eval_rouge2": 21.4702,
1029
+ "eval_rougeL": 31.177,
1030
+ "eval_rougeLsum": 40.5432,
1031
+ "eval_runtime": 5530.5865,
1032
+ "eval_samples_per_second": 1.199,
1033
+ "eval_steps_per_second": 0.15,
1034
+ "step": 27000
1035
+ },
1036
+ {
1037
+ "epoch": 1.83,
1038
+ "learning_rate": 1.942610010450715e-05,
1039
+ "loss": 0.8839,
1040
+ "step": 27500
1041
+ },
1042
+ {
1043
+ "epoch": 1.83,
1044
+ "eval_gen_len": 126.8648,
1045
+ "eval_loss": 1.5090144872665405,
1046
+ "eval_rouge1": 45.6279,
1047
+ "eval_rouge2": 21.5346,
1048
+ "eval_rougeL": 31.252,
1049
+ "eval_rougeLsum": 40.6096,
1050
+ "eval_runtime": 5522.5033,
1051
+ "eval_samples_per_second": 1.201,
1052
+ "eval_steps_per_second": 0.15,
1053
+ "step": 27500
1054
+ },
1055
+ {
1056
+ "epoch": 1.87,
1057
+ "learning_rate": 1.8870211015498187e-05,
1058
+ "loss": 0.8899,
1059
+ "step": 28000
1060
+ },
1061
+ {
1062
+ "epoch": 1.87,
1063
+ "eval_gen_len": 126.8498,
1064
+ "eval_loss": 1.5073039531707764,
1065
+ "eval_rouge1": 45.6406,
1066
+ "eval_rouge2": 21.5551,
1067
+ "eval_rougeL": 31.2519,
1068
+ "eval_rougeLsum": 40.6425,
1069
+ "eval_runtime": 5520.5026,
1070
+ "eval_samples_per_second": 1.202,
1071
+ "eval_steps_per_second": 0.15,
1072
+ "step": 28000
1073
+ },
1074
+ {
1075
+ "epoch": 1.9,
1076
+ "learning_rate": 1.8314321926489228e-05,
1077
+ "loss": 0.8904,
1078
+ "step": 28500
1079
+ },
1080
+ {
1081
+ "epoch": 1.9,
1082
+ "eval_gen_len": 126.689,
1083
+ "eval_loss": 1.5086652040481567,
1084
+ "eval_rouge1": 45.7334,
1085
+ "eval_rouge2": 21.7071,
1086
+ "eval_rougeL": 31.3069,
1087
+ "eval_rougeLsum": 40.6992,
1088
+ "eval_runtime": 5519.0861,
1089
+ "eval_samples_per_second": 1.202,
1090
+ "eval_steps_per_second": 0.15,
1091
+ "step": 28500
1092
+ },
1093
+ {
1094
+ "epoch": 1.93,
1095
+ "learning_rate": 1.7758432837480266e-05,
1096
+ "loss": 0.8958,
1097
+ "step": 29000
1098
+ },
1099
+ {
1100
+ "epoch": 1.93,
1101
+ "eval_gen_len": 126.9157,
1102
+ "eval_loss": 1.5112992525100708,
1103
+ "eval_rouge1": 45.4618,
1104
+ "eval_rouge2": 21.4623,
1105
+ "eval_rougeL": 31.0914,
1106
+ "eval_rougeLsum": 40.4648,
1107
+ "eval_runtime": 5520.0088,
1108
+ "eval_samples_per_second": 1.202,
1109
+ "eval_steps_per_second": 0.15,
1110
+ "step": 29000
1111
+ },
1112
+ {
1113
+ "epoch": 1.97,
1114
+ "learning_rate": 1.7202543748471307e-05,
1115
+ "loss": 0.8991,
1116
+ "step": 29500
1117
+ },
1118
+ {
1119
+ "epoch": 1.97,
1120
+ "eval_gen_len": 126.7855,
1121
+ "eval_loss": 1.5126971006393433,
1122
+ "eval_rouge1": 45.6364,
1123
+ "eval_rouge2": 21.5467,
1124
+ "eval_rougeL": 31.2001,
1125
+ "eval_rougeLsum": 40.5946,
1126
+ "eval_runtime": 5532.4138,
1127
+ "eval_samples_per_second": 1.199,
1128
+ "eval_steps_per_second": 0.15,
1129
+ "step": 29500
1130
+ },
1131
+ {
1132
+ "epoch": 2.0,
1133
+ "learning_rate": 1.6646654659462345e-05,
1134
+ "loss": 0.889,
1135
+ "step": 30000
1136
+ },
1137
+ {
1138
+ "epoch": 2.0,
1139
+ "eval_gen_len": 126.6989,
1140
+ "eval_loss": 1.5128982067108154,
1141
+ "eval_rouge1": 45.3668,
1142
+ "eval_rouge2": 21.3563,
1143
+ "eval_rougeL": 30.998,
1144
+ "eval_rougeLsum": 40.3714,
1145
+ "eval_runtime": 5736.9272,
1146
+ "eval_samples_per_second": 1.156,
1147
+ "eval_steps_per_second": 0.145,
1148
+ "step": 30000
1149
+ }
1150
+ ],
1151
+ "max_steps": 44973,
1152
+ "num_train_epochs": 3,
1153
+ "total_flos": 7.801293866564321e+17,
1154
+ "trial_name": null,
1155
+ "trial_params": null
1156
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a6e55853e1185615590ece7402cc04f12cebc0d4953dbb1d2e764254b022d1
3
+ size 3055