marksverdhei commited on
Commit
34e6049
1 Parent(s): 05d1180
config.json ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/pegasus-large",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "relu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": true,
7
+ "architectures": [
8
+ "PegasusForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 16,
19
+ "decoder_start_token_id": 0,
20
+ "dropout": 0.1,
21
+ "encoder_attention_heads": 16,
22
+ "encoder_ffn_dim": 4096,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 16,
25
+ "eos_token_id": 1,
26
+ "extra_pos_embeddings": 1,
27
+ "force_bos_token_to_be_generated": false,
28
+ "forced_eos_token_id": 1,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "length_penalty": 0.8,
43
+ "max_length": 256,
44
+ "max_position_embeddings": 1024,
45
+ "model_type": "pegasus",
46
+ "normalize_before": true,
47
+ "normalize_embedding": false,
48
+ "num_beams": 8,
49
+ "num_hidden_layers": 16,
50
+ "pad_token_id": 0,
51
+ "scale_embedding": true,
52
+ "static_position_embeddings": true,
53
+ "task_specific_params": {
54
+ "summarization_aeslc": {
55
+ "length_penalty": 0.6,
56
+ "max_length": 32,
57
+ "max_position_embeddings": 512
58
+ },
59
+ "summarization_arxiv": {
60
+ "length_penalty": 0.8,
61
+ "max_length": 256,
62
+ "max_position_embeddings": 1024
63
+ },
64
+ "summarization_big_patent": {
65
+ "length_penalty": 0.7,
66
+ "max_length": 256,
67
+ "max_position_embeddings": 1024
68
+ },
69
+ "summarization_billsum": {
70
+ "length_penalty": 0.6,
71
+ "max_length": 256,
72
+ "max_position_embeddings": 1024
73
+ },
74
+ "summarization_cnn_dailymail": {
75
+ "length_penalty": 0.8,
76
+ "max_length": 128,
77
+ "max_position_embeddings": 1024
78
+ },
79
+ "summarization_gigaword": {
80
+ "length_penalty": 0.6,
81
+ "max_length": 32,
82
+ "max_position_embeddings": 128
83
+ },
84
+ "summarization_large": {
85
+ "length_penalty": 0.8,
86
+ "max_length": 256,
87
+ "max_position_embeddings": 1024
88
+ },
89
+ "summarization_multi_news": {
90
+ "length_penalty": 0.8,
91
+ "max_length": 256,
92
+ "max_position_embeddings": 1024
93
+ },
94
+ "summarization_newsroom": {
95
+ "length_penalty": 0.8,
96
+ "max_length": 128,
97
+ "max_position_embeddings": 512
98
+ },
99
+ "summarization_pubmed": {
100
+ "length_penalty": 0.8,
101
+ "max_length": 256,
102
+ "max_position_embeddings": 1024
103
+ },
104
+ "summarization_reddit_tifu": {
105
+ "length_penalty": 0.6,
106
+ "max_length": 128,
107
+ "max_position_embeddings": 512
108
+ },
109
+ "summarization_wikihow": {
110
+ "length_penalty": 0.6,
111
+ "max_length": 256,
112
+ "max_position_embeddings": 512
113
+ },
114
+ "summarization_xsum": {
115
+ "length_penalty": 0.8,
116
+ "max_length": 64,
117
+ "max_position_embeddings": 512
118
+ }
119
+ },
120
+ "torch_dtype": "float32",
121
+ "transformers_version": "4.18.0",
122
+ "use_cache": true,
123
+ "vocab_size": 96103
124
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa812010b4ff9f34a67a6daf1c61ed2d3ad277855af06c9b50e3632af6e7b098
3
+ size 5840398
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e7eac08af01fb1cbaffcb601221e54e1b5d59a7481db359cc0dd1eb531fe81
3
+ size 2283800049
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932549f91be8319a73a273cbb6990bee1e2b8673c5741865227af0a885a65561
3
+ size 14503
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:116b86d9ff55b5d6b9b20cd33debff773f99432381dcf591bce71e44ae8b042c
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask_2>", "additional_special_tokens": ["<mask_1>", "<unk_2>", "<unk_3>", "<unk_4>", "<unk_5>", "<unk_6>", "<unk_7>", "<unk_8>", "<unk_9>", "<unk_10>", "<unk_11>", "<unk_12>", "<unk_13>", "<unk_14>", "<unk_15>", "<unk_16>", "<unk_17>", "<unk_18>", "<unk_19>", "<unk_20>", "<unk_21>", "<unk_22>", "<unk_23>", "<unk_24>", "<unk_25>", "<unk_26>", "<unk_27>", "<unk_28>", "<unk_29>", "<unk_30>", "<unk_31>", "<unk_32>", "<unk_33>", "<unk_34>", "<unk_35>", "<unk_36>", "<unk_37>", "<unk_38>", "<unk_39>", "<unk_40>", "<unk_41>", "<unk_42>", "<unk_43>", "<unk_44>", "<unk_45>", "<unk_46>", "<unk_47>", "<unk_48>", "<unk_49>", "<unk_50>", "<unk_51>", "<unk_52>", "<unk_53>", "<unk_54>", "<unk_55>", "<unk_56>", "<unk_57>", "<unk_58>", "<unk_59>", "<unk_60>", "<unk_61>", "<unk_62>", "<unk_63>", "<unk_64>", "<unk_65>", "<unk_66>", "<unk_67>", "<unk_68>", "<unk_69>", "<unk_70>", "<unk_71>", "<unk_72>", "<unk_73>", "<unk_74>", "<unk_75>", "<unk_76>", "<unk_77>", "<unk_78>", "<unk_79>", "<unk_80>", "<unk_81>", "<unk_82>", "<unk_83>", "<unk_84>", "<unk_85>", "<unk_86>", "<unk_87>", "<unk_88>", "<unk_89>", "<unk_90>", "<unk_91>", "<unk_92>", "<unk_93>", "<unk_94>", "<unk_95>", "<unk_96>", "<unk_97>", "<unk_98>", "<unk_99>", "<unk_100>", "<unk_101>", "<unk_102>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0015189ef36359283fec8b93cf6d9ce51bca37eb1101defc68a53b394913b96c
3
+ size 1912529
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"pad_token": "<pad>", "eos_token": "</s>", "unk_token": "<unk>", "mask_token": "<mask_2>", "mask_token_sent": "<mask_1>", "offset": 103, "additional_special_tokens": ["<mask_1>", "<unk_2>", "<unk_3>", "<unk_4>", "<unk_5>", "<unk_6>", "<unk_7>", "<unk_8>", "<unk_9>", "<unk_10>", "<unk_11>", "<unk_12>", "<unk_13>", "<unk_14>", "<unk_15>", "<unk_16>", "<unk_17>", "<unk_18>", "<unk_19>", "<unk_20>", "<unk_21>", "<unk_22>", "<unk_23>", "<unk_24>", "<unk_25>", "<unk_26>", "<unk_27>", "<unk_28>", "<unk_29>", "<unk_30>", "<unk_31>", "<unk_32>", "<unk_33>", "<unk_34>", "<unk_35>", "<unk_36>", "<unk_37>", "<unk_38>", "<unk_39>", "<unk_40>", "<unk_41>", "<unk_42>", "<unk_43>", "<unk_44>", "<unk_45>", "<unk_46>", "<unk_47>", "<unk_48>", "<unk_49>", "<unk_50>", "<unk_51>", "<unk_52>", "<unk_53>", "<unk_54>", "<unk_55>", "<unk_56>", "<unk_57>", "<unk_58>", "<unk_59>", "<unk_60>", "<unk_61>", "<unk_62>", "<unk_63>", "<unk_64>", "<unk_65>", "<unk_66>", "<unk_67>", "<unk_68>", "<unk_69>", "<unk_70>", "<unk_71>", "<unk_72>", "<unk_73>", "<unk_74>", "<unk_75>", "<unk_76>", "<unk_77>", "<unk_78>", "<unk_79>", "<unk_80>", "<unk_81>", "<unk_82>", "<unk_83>", "<unk_84>", "<unk_85>", "<unk_86>", "<unk_87>", "<unk_88>", "<unk_89>", "<unk_90>", "<unk_91>", "<unk_92>", "<unk_93>", "<unk_94>", "<unk_95>", "<unk_96>", "<unk_97>", "<unk_98>", "<unk_99>", "<unk_100>", "<unk_101>", "<unk_102>"], "model_max_length": 1024, "special_tokens_map_file": null, "full_tokenizer_file": null, "name_or_path": "google/pegasus-large", "sp_model_kwargs": {}, "tokenizer_class": "PegasusTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.2010900974273682,
3
+ "best_model_checkpoint": "./checkpoints/pegasus-large/checkpoint-11412",
4
+ "epoch": 12.0,
5
+ "global_step": 11412,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.27,
12
+ "learning_rate": 5e-06,
13
+ "loss": 8.7673,
14
+ "step": 256
15
+ },
16
+ {
17
+ "epoch": 0.54,
18
+ "learning_rate": 1e-05,
19
+ "loss": 7.6286,
20
+ "step": 512
21
+ },
22
+ {
23
+ "epoch": 0.81,
24
+ "learning_rate": 9.93178426774675e-06,
25
+ "loss": 7.0271,
26
+ "step": 768
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_loss": 6.360767364501953,
31
+ "eval_runtime": 8.5969,
32
+ "eval_samples_per_second": 58.16,
33
+ "eval_steps_per_second": 7.328,
34
+ "step": 951
35
+ },
36
+ {
37
+ "epoch": 1.08,
38
+ "learning_rate": 9.863568535493498e-06,
39
+ "loss": 6.6826,
40
+ "step": 1024
41
+ },
42
+ {
43
+ "epoch": 1.35,
44
+ "learning_rate": 9.795352803240248e-06,
45
+ "loss": 6.0794,
46
+ "step": 1280
47
+ },
48
+ {
49
+ "epoch": 1.62,
50
+ "learning_rate": 9.727137070986997e-06,
51
+ "loss": 4.7646,
52
+ "step": 1536
53
+ },
54
+ {
55
+ "epoch": 1.88,
56
+ "learning_rate": 9.658921338733747e-06,
57
+ "loss": 2.6262,
58
+ "step": 1792
59
+ },
60
+ {
61
+ "epoch": 2.0,
62
+ "eval_loss": 1.3431365489959717,
63
+ "eval_runtime": 8.7742,
64
+ "eval_samples_per_second": 56.985,
65
+ "eval_steps_per_second": 7.18,
66
+ "step": 1902
67
+ },
68
+ {
69
+ "epoch": 2.15,
70
+ "learning_rate": 9.590705606480494e-06,
71
+ "loss": 1.5925,
72
+ "step": 2048
73
+ },
74
+ {
75
+ "epoch": 2.42,
76
+ "learning_rate": 9.522489874227244e-06,
77
+ "loss": 1.4186,
78
+ "step": 2304
79
+ },
80
+ {
81
+ "epoch": 2.69,
82
+ "learning_rate": 9.454274141973993e-06,
83
+ "loss": 1.4248,
84
+ "step": 2560
85
+ },
86
+ {
87
+ "epoch": 2.96,
88
+ "learning_rate": 9.386058409720743e-06,
89
+ "loss": 1.4015,
90
+ "step": 2816
91
+ },
92
+ {
93
+ "epoch": 3.0,
94
+ "eval_loss": 1.2660728693008423,
95
+ "eval_runtime": 8.6854,
96
+ "eval_samples_per_second": 57.568,
97
+ "eval_steps_per_second": 7.254,
98
+ "step": 2853
99
+ },
100
+ {
101
+ "epoch": 3.23,
102
+ "learning_rate": 9.31784267746749e-06,
103
+ "loss": 1.3516,
104
+ "step": 3072
105
+ },
106
+ {
107
+ "epoch": 3.5,
108
+ "learning_rate": 9.24962694521424e-06,
109
+ "loss": 1.2862,
110
+ "step": 3328
111
+ },
112
+ {
113
+ "epoch": 3.77,
114
+ "learning_rate": 9.18141121296099e-06,
115
+ "loss": 1.2847,
116
+ "step": 3584
117
+ },
118
+ {
119
+ "epoch": 4.0,
120
+ "eval_loss": 1.238619327545166,
121
+ "eval_runtime": 8.6335,
122
+ "eval_samples_per_second": 57.914,
123
+ "eval_steps_per_second": 7.297,
124
+ "step": 3804
125
+ },
126
+ {
127
+ "epoch": 4.04,
128
+ "learning_rate": 9.11319548070774e-06,
129
+ "loss": 1.3335,
130
+ "step": 3840
131
+ },
132
+ {
133
+ "epoch": 4.31,
134
+ "learning_rate": 9.044979748454487e-06,
135
+ "loss": 1.3177,
136
+ "step": 4096
137
+ },
138
+ {
139
+ "epoch": 4.58,
140
+ "learning_rate": 8.976764016201236e-06,
141
+ "loss": 1.2456,
142
+ "step": 4352
143
+ },
144
+ {
145
+ "epoch": 4.85,
146
+ "learning_rate": 8.908548283947986e-06,
147
+ "loss": 1.269,
148
+ "step": 4608
149
+ },
150
+ {
151
+ "epoch": 5.0,
152
+ "eval_loss": 1.2249763011932373,
153
+ "eval_runtime": 8.6401,
154
+ "eval_samples_per_second": 57.87,
155
+ "eval_steps_per_second": 7.292,
156
+ "step": 4755
157
+ },
158
+ {
159
+ "epoch": 5.11,
160
+ "learning_rate": 8.840332551694735e-06,
161
+ "loss": 1.2381,
162
+ "step": 4864
163
+ },
164
+ {
165
+ "epoch": 5.38,
166
+ "learning_rate": 8.772116819441483e-06,
167
+ "loss": 1.224,
168
+ "step": 5120
169
+ },
170
+ {
171
+ "epoch": 5.65,
172
+ "learning_rate": 8.703901087188233e-06,
173
+ "loss": 1.2753,
174
+ "step": 5376
175
+ },
176
+ {
177
+ "epoch": 5.92,
178
+ "learning_rate": 8.635685354934982e-06,
179
+ "loss": 1.2279,
180
+ "step": 5632
181
+ },
182
+ {
183
+ "epoch": 6.0,
184
+ "eval_loss": 1.2167253494262695,
185
+ "eval_runtime": 8.6769,
186
+ "eval_samples_per_second": 57.625,
187
+ "eval_steps_per_second": 7.261,
188
+ "step": 5706
189
+ },
190
+ {
191
+ "epoch": 6.19,
192
+ "learning_rate": 8.567469622681732e-06,
193
+ "loss": 1.2545,
194
+ "step": 5888
195
+ },
196
+ {
197
+ "epoch": 6.46,
198
+ "learning_rate": 8.499253890428481e-06,
199
+ "loss": 1.1907,
200
+ "step": 6144
201
+ },
202
+ {
203
+ "epoch": 6.73,
204
+ "learning_rate": 8.431038158175229e-06,
205
+ "loss": 1.2113,
206
+ "step": 6400
207
+ },
208
+ {
209
+ "epoch": 7.0,
210
+ "learning_rate": 8.362822425921979e-06,
211
+ "loss": 1.1956,
212
+ "step": 6656
213
+ },
214
+ {
215
+ "epoch": 7.0,
216
+ "eval_loss": 1.210858941078186,
217
+ "eval_runtime": 8.6349,
218
+ "eval_samples_per_second": 57.904,
219
+ "eval_steps_per_second": 7.296,
220
+ "step": 6657
221
+ },
222
+ {
223
+ "epoch": 7.27,
224
+ "learning_rate": 8.294606693668728e-06,
225
+ "loss": 1.1814,
226
+ "step": 6912
227
+ },
228
+ {
229
+ "epoch": 7.54,
230
+ "learning_rate": 8.226390961415478e-06,
231
+ "loss": 1.1784,
232
+ "step": 7168
233
+ },
234
+ {
235
+ "epoch": 7.81,
236
+ "learning_rate": 8.158175229162227e-06,
237
+ "loss": 1.179,
238
+ "step": 7424
239
+ },
240
+ {
241
+ "epoch": 8.0,
242
+ "eval_loss": 1.2075146436691284,
243
+ "eval_runtime": 8.6767,
244
+ "eval_samples_per_second": 57.625,
245
+ "eval_steps_per_second": 7.261,
246
+ "step": 7608
247
+ },
248
+ {
249
+ "epoch": 8.08,
250
+ "learning_rate": 8.089959496908975e-06,
251
+ "loss": 1.2127,
252
+ "step": 7680
253
+ },
254
+ {
255
+ "epoch": 8.34,
256
+ "learning_rate": 8.021743764655724e-06,
257
+ "loss": 1.1417,
258
+ "step": 7936
259
+ },
260
+ {
261
+ "epoch": 8.61,
262
+ "learning_rate": 7.953528032402474e-06,
263
+ "loss": 1.1988,
264
+ "step": 8192
265
+ },
266
+ {
267
+ "epoch": 8.88,
268
+ "learning_rate": 7.885312300149223e-06,
269
+ "loss": 1.183,
270
+ "step": 8448
271
+ },
272
+ {
273
+ "epoch": 9.0,
274
+ "eval_loss": 1.2050586938858032,
275
+ "eval_runtime": 8.769,
276
+ "eval_samples_per_second": 57.019,
277
+ "eval_steps_per_second": 7.184,
278
+ "step": 8559
279
+ },
280
+ {
281
+ "epoch": 9.15,
282
+ "learning_rate": 7.817096567895973e-06,
283
+ "loss": 1.1557,
284
+ "step": 8704
285
+ },
286
+ {
287
+ "epoch": 9.42,
288
+ "learning_rate": 7.74888083564272e-06,
289
+ "loss": 1.149,
290
+ "step": 8960
291
+ },
292
+ {
293
+ "epoch": 9.69,
294
+ "learning_rate": 7.68066510338947e-06,
295
+ "loss": 1.1279,
296
+ "step": 9216
297
+ },
298
+ {
299
+ "epoch": 9.96,
300
+ "learning_rate": 7.612449371136219e-06,
301
+ "loss": 1.1471,
302
+ "step": 9472
303
+ },
304
+ {
305
+ "epoch": 10.0,
306
+ "eval_loss": 1.203889012336731,
307
+ "eval_runtime": 8.6047,
308
+ "eval_samples_per_second": 58.108,
309
+ "eval_steps_per_second": 7.322,
310
+ "step": 9510
311
+ },
312
+ {
313
+ "epoch": 10.23,
314
+ "learning_rate": 7.544233638882968e-06,
315
+ "loss": 1.1345,
316
+ "step": 9728
317
+ },
318
+ {
319
+ "epoch": 10.5,
320
+ "learning_rate": 7.476017906629717e-06,
321
+ "loss": 1.1573,
322
+ "step": 9984
323
+ },
324
+ {
325
+ "epoch": 10.77,
326
+ "learning_rate": 7.4078021743764664e-06,
327
+ "loss": 1.1148,
328
+ "step": 10240
329
+ },
330
+ {
331
+ "epoch": 11.0,
332
+ "eval_loss": 1.2023481130599976,
333
+ "eval_runtime": 8.7109,
334
+ "eval_samples_per_second": 57.4,
335
+ "eval_steps_per_second": 7.232,
336
+ "step": 10461
337
+ },
338
+ {
339
+ "epoch": 11.04,
340
+ "learning_rate": 7.339586442123215e-06,
341
+ "loss": 1.1417,
342
+ "step": 10496
343
+ },
344
+ {
345
+ "epoch": 11.31,
346
+ "learning_rate": 7.2713707098699646e-06,
347
+ "loss": 1.1067,
348
+ "step": 10752
349
+ },
350
+ {
351
+ "epoch": 11.58,
352
+ "learning_rate": 7.203154977616713e-06,
353
+ "loss": 1.1289,
354
+ "step": 11008
355
+ },
356
+ {
357
+ "epoch": 11.84,
358
+ "learning_rate": 7.134939245363463e-06,
359
+ "loss": 1.1112,
360
+ "step": 11264
361
+ },
362
+ {
363
+ "epoch": 12.0,
364
+ "eval_loss": 1.2010900974273682,
365
+ "eval_runtime": 8.6876,
366
+ "eval_samples_per_second": 57.553,
367
+ "eval_steps_per_second": 7.252,
368
+ "step": 11412
369
+ }
370
+ ],
371
+ "max_steps": 38040,
372
+ "num_train_epochs": 40,
373
+ "total_flos": 2.5573624673245594e+17,
374
+ "trial_name": null,
375
+ "trial_params": null
376
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4641decd73fe73e5d1cbf603b963db004fe0398d1401cc8a5c0821f489515c61
3
+ size 3183