chaojiang06 commited on
Commit
cb847b9
1 Parent(s): 744edd2

Upload 15 files

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: tst-translation354
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # tst-translation354
16
+
17
+ This model is a fine-tuned version of [t5-large](https://huggingface.co/t5-large) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.1871
20
+ - Accuracy: 0.8607
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 5e-05
40
+ - train_batch_size: 12
41
+ - eval_batch_size: 12
42
+ - seed: 42
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 10.0
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
51
+ | No log | 1.0 | 105 | 0.3619 | 0.3699 |
52
+ | No log | 2.0 | 210 | 0.1750 | 0.7352 |
53
+ | No log | 3.0 | 315 | 0.1416 | 0.7922 |
54
+ | No log | 4.0 | 420 | 0.1225 | 0.8425 |
55
+ | 0.3579 | 5.0 | 525 | 0.1310 | 0.8516 |
56
+ | 0.3579 | 6.0 | 630 | 0.1358 | 0.8425 |
57
+ | 0.3579 | 7.0 | 735 | 0.1605 | 0.8493 |
58
+ | 0.3579 | 8.0 | 840 | 0.1871 | 0.8607 |
59
+ | 0.3579 | 9.0 | 945 | 0.1990 | 0.8516 |
60
+ | 0.054 | 10.0 | 1050 | 0.2134 | 0.8607 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.17.0
66
+ - Pytorch 1.11.0+cu113
67
+ - Datasets 1.17.0
68
+ - Tokenizers 0.11.6
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[EQUATION]": 32101, "[REF]": 32102, "[MATH]": 32100, "[CITATION]": 32103}
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.860730593607306,
4
+ "eval_loss": 0.1871233731508255,
5
+ "eval_runtime": 15.6788,
6
+ "eval_samples": 438,
7
+ "eval_samples_per_second": 27.936,
8
+ "eval_steps_per_second": 2.36,
9
+ "predict_accuracy": 0.8441860465116279,
10
+ "predict_loss": 0.208589106798172,
11
+ "predict_runtime": 18.0974,
12
+ "predict_samples": 430,
13
+ "predict_samples_per_second": 23.76,
14
+ "predict_steps_per_second": 1.989,
15
+ "train_loss": 0.1974729861531939,
16
+ "train_runtime": 785.0626,
17
+ "train_samples": 1254,
18
+ "train_samples_per_second": 15.973,
19
+ "train_steps_per_second": 1.337
20
+ }
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-large",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 4096,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "t5",
17
+ "n_positions": 512,
18
+ "num_decoder_layers": 24,
19
+ "num_heads": 16,
20
+ "num_layers": 24,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_num_buckets": 32,
24
+ "task_specific_params": {
25
+ "summarization": {
26
+ "early_stopping": true,
27
+ "length_penalty": 2.0,
28
+ "max_length": 200,
29
+ "min_length": 30,
30
+ "no_repeat_ngram_size": 3,
31
+ "num_beams": 4,
32
+ "prefix": "summarize: "
33
+ },
34
+ "translation_en_to_de": {
35
+ "early_stopping": true,
36
+ "max_length": 300,
37
+ "num_beams": 4,
38
+ "prefix": "translate English to German: "
39
+ },
40
+ "translation_en_to_fr": {
41
+ "early_stopping": true,
42
+ "max_length": 300,
43
+ "num_beams": 4,
44
+ "prefix": "translate English to French: "
45
+ },
46
+ "translation_en_to_ro": {
47
+ "early_stopping": true,
48
+ "max_length": 300,
49
+ "num_beams": 4,
50
+ "prefix": "translate English to Romanian: "
51
+ }
52
+ },
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.17.0",
55
+ "use_cache": true,
56
+ "vocab_size": 32104
57
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.860730593607306,
4
+ "eval_loss": 0.1871233731508255,
5
+ "eval_runtime": 15.6788,
6
+ "eval_samples": 438,
7
+ "eval_samples_per_second": 27.936,
8
+ "eval_steps_per_second": 2.36
9
+ }
generated_predictions.txt ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Format
2
+ Content
3
+ Language
4
+ Content
5
+ Content
6
+ Language
7
+ Content
8
+ Format
9
+ Format
10
+ Language
11
+ Format
12
+ Improve-grammar-Typo
13
+ Language
14
+ Content
15
+ Format
16
+ Content
17
+ Improve-grammar-Typo
18
+ Language
19
+ Language
20
+ Content
21
+ Content
22
+ Improve-grammar-Typo
23
+ Language
24
+ Improve-grammar-Typo
25
+ Improve-grammar-Typo
26
+ Format
27
+ Improve-grammar-Typo
28
+ Language
29
+ Content
30
+ Language
31
+ Format
32
+ Improve-grammar-Typo
33
+ Format
34
+ Language
35
+ Content
36
+ Format
37
+ Language
38
+ Format
39
+ Format
40
+ Language
41
+ Language
42
+ Language
43
+ Content
44
+ Content
45
+ Format
46
+ Content
47
+ Format
48
+ Format
49
+ Format
50
+ Format
51
+ Language
52
+ Format
53
+ Improve-grammar-Typo
54
+ Content
55
+ Format
56
+ Language
57
+ Language
58
+ Language
59
+ Content
60
+ Language
61
+ Format
62
+ Content
63
+ Content
64
+ Content
65
+ Content
66
+ Content
67
+ Language
68
+ Format
69
+ Language
70
+ Language
71
+ Language
72
+ Content
73
+ Language
74
+ Improve-grammar-Typo
75
+ Improve-grammar-Typo
76
+ Content
77
+ Improve-grammar-Typo
78
+ Format
79
+ Format
80
+ Improve-grammar-Typo
81
+ Format
82
+ Content
83
+ Content
84
+ Language
85
+ Improve-grammar-Typo
86
+ Format
87
+ Language
88
+ Content
89
+ Format
90
+ Format
91
+ Language
92
+ Improve-grammar-Typo
93
+ Content
94
+ Language
95
+ Format
96
+ Format
97
+ Content
98
+ Language
99
+ Improve-grammar-Typo
100
+ Content
101
+ Format
102
+ Content
103
+ Language
104
+ Language
105
+ Content
106
+ Content
107
+ Content
108
+ Language
109
+ Language
110
+ Format
111
+ Format
112
+ Format
113
+ Format
114
+ Format
115
+ Language
116
+ Content
117
+ Language
118
+ Improve-grammar-Typo
119
+ Format
120
+ Format
121
+ Improve-grammar-Typo
122
+ Content
123
+ Content
124
+ Improve-grammar-Typo
125
+ Language
126
+ Improve-grammar-Typo
127
+ Improve-grammar-Typo
128
+ Content
129
+ Improve-grammar-Typo
130
+ Content
131
+ Improve-grammar-Typo
132
+ Format
133
+ Format
134
+ Content
135
+ Content
136
+ Language
137
+ Content
138
+ Content
139
+ Improve-grammar-Typo
140
+ Content
141
+ Content
142
+ Improve-grammar-Typo
143
+ Improve-grammar-Typo
144
+ Content
145
+ Format
146
+ Language
147
+ Improve-grammar-Typo
148
+ Improve-grammar-Typo
149
+ Content
150
+ Content
151
+ Format
152
+ Content
153
+ Language
154
+ Content
155
+ Improve-grammar-Typo
156
+ Language
157
+ Improve-grammar-Typo
158
+ Content
159
+ Content
160
+ Format
161
+ Improve-grammar-Typo
162
+ Format
163
+ Language
164
+ Content
165
+ Content
166
+ Content
167
+ Improve-grammar-Typo
168
+ Content
169
+ Improve-grammar-Typo
170
+ Format
171
+ Content
172
+ Language
173
+ Format
174
+ Format
175
+ Format
176
+ Language
177
+ Format
178
+ Content
179
+ Content
180
+ Content
181
+ Content
182
+ Content
183
+ Language
184
+ Format
185
+ Language
186
+ Content
187
+ Improve-grammar-Typo
188
+ Format
189
+ Content
190
+ Content
191
+ Language
192
+ Content
193
+ Format
194
+ Format
195
+ Format
196
+ Language
197
+ Language
198
+ Improve-grammar-Typo
199
+ Language
200
+ Format
201
+ Language
202
+ Content
203
+ Language
204
+ Language
205
+ Format
206
+ Content
207
+ Format
208
+ Improve-grammar-Typo
209
+ Format
210
+ Language
211
+ Language
212
+ Content
213
+ Improve-grammar-Typo
214
+ Language
215
+ Format
216
+ Improve-grammar-Typo
217
+ Content
218
+ Content
219
+ Language
220
+ Improve-grammar-Typo
221
+ Format
222
+ Content
223
+ Language
224
+ Format
225
+ Format
226
+ Improve-grammar-Typo
227
+ Language
228
+ Improve-grammar-Typo
229
+ Improve-grammar-Typo
230
+ Language
231
+ Improve-grammar-Typo
232
+ Content
233
+ Language
234
+ Content
235
+ Language
236
+ Language
237
+ Content
238
+ Content
239
+ Content
240
+ Content
241
+ Language
242
+ Content
243
+ Format
244
+ Language
245
+ Language
246
+ Language
247
+ Language
248
+ Language
249
+ Content
250
+ Language
251
+ Format
252
+ Improve-grammar-Typo
253
+ Language
254
+ Language
255
+ Content
256
+ Content
257
+ Language
258
+ Improve-grammar-Typo
259
+ Language
260
+ Format
261
+ Improve-grammar-Typo
262
+ Language
263
+ Improve-grammar-Typo
264
+ Improve-grammar-Typo
265
+ Improve-grammar-Typo
266
+ Improve-grammar-Typo
267
+ Content
268
+ Language
269
+ Improve-grammar-Typo
270
+ Improve-grammar-Typo
271
+ Content
272
+ Improve-grammar-Typo
273
+ Format
274
+ Language
275
+ Language
276
+ Content
277
+ Language
278
+ Language
279
+ Content
280
+ Format
281
+ Content
282
+ Language
283
+ Format
284
+ Language
285
+ Content
286
+ Improve-grammar-Typo
287
+ Improve-grammar-Typo
288
+ Language
289
+ Format
290
+ Content
291
+ Improve-grammar-Typo
292
+ Content
293
+ Content
294
+ Improve-grammar-Typo
295
+ Improve-grammar-Typo
296
+ Format
297
+ Content
298
+ Language
299
+ Improve-grammar-Typo
300
+ Language
301
+ Language
302
+ Improve-grammar-Typo
303
+ Improve-grammar-Typo
304
+ Language
305
+ Language
306
+ Improve-grammar-Typo
307
+ Content
308
+ Content
309
+ Language
310
+ Content
311
+ Content
312
+ Format
313
+ Improve-grammar-Typo
314
+ Format
315
+ Content
316
+ Format
317
+ Format
318
+ Language
319
+ Content
320
+ Improve-grammar-Typo
321
+ Language
322
+ Improve-grammar-Typo
323
+ Language
324
+ Language
325
+ Format
326
+ Content
327
+ Language
328
+ Language
329
+ Language
330
+ Language
331
+ Language
332
+ Improve-grammar-Typo
333
+ Language
334
+ Improve-grammar-Typo
335
+ Improve-grammar-Typo
336
+ Content
337
+ Content
338
+ Language
339
+ Format
340
+ Content
341
+ Improve-grammar-Typo
342
+ Improve-grammar-Typo
343
+ Language
344
+ Content
345
+ Language
346
+ Content
347
+ Content
348
+ Language
349
+ Improve-grammar-Typo
350
+ Improve-grammar-Typo
351
+ Improve-grammar-Typo
352
+ Improve-grammar-Typo
353
+ Content
354
+ Format
355
+ Format
356
+ Language
357
+ Language
358
+ Language
359
+ Content
360
+ Content
361
+ Content
362
+ Content
363
+ Language
364
+ Content
365
+ Language
366
+ Content
367
+ Format
368
+ Improve-grammar-Typo
369
+ Improve-grammar-Typo
370
+ Content
371
+ Format
372
+ Language
373
+ Improve-grammar-Typo
374
+ Improve-grammar-Typo
375
+ Language
376
+ Content
377
+ Content
378
+ Improve-grammar-Typo
379
+ Improve-grammar-Typo
380
+ Language
381
+ Language
382
+ Improve-grammar-Typo
383
+ Improve-grammar-Typo
384
+ Format
385
+ Content
386
+ Language
387
+ Format
388
+ Format
389
+ Content
390
+ Improve-grammar-Typo
391
+ Improve-grammar-Typo
392
+ Language
393
+ Improve-grammar-Typo
394
+ Language
395
+ Language
396
+ Improve-grammar-Typo
397
+ Content
398
+ Improve-grammar-Typo
399
+ Content
400
+ Improve-grammar-Typo
401
+ Language
402
+ Improve-grammar-Typo
403
+ Language
404
+ Improve-grammar-Typo
405
+ Improve-grammar-Typo
406
+ Language
407
+ Language
408
+ Improve-grammar-Typo
409
+ Format
410
+ Format
411
+ Language
412
+ Format
413
+ Language
414
+ Improve-grammar-Typo
415
+ Improve-grammar-Typo
416
+ Improve-grammar-Typo
417
+ Language
418
+ Improve-grammar-Typo
419
+ Language
420
+ Language
421
+ Language
422
+ Improve-grammar-Typo
423
+ Improve-grammar-Typo
424
+ Improve-grammar-Typo
425
+ Content
426
+ Content
427
+ Format
428
+ Content
429
+ Language
430
+ Format
predict_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.8441860465116279,
3
+ "predict_loss": 0.208589106798172,
4
+ "predict_runtime": 18.0974,
5
+ "predict_samples": 430,
6
+ "predict_samples_per_second": 23.76,
7
+ "predict_steps_per_second": 1.989
8
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6a0b0c343e10aaf5b5d29d4bcbee26884983ebbd67632995eb8815724e1350e
3
+ size 2950806407
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "t5-large", "tokenizer_class": "T5Tokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.1974729861531939,
4
+ "train_runtime": 785.0626,
5
+ "train_samples": 1254,
6
+ "train_samples_per_second": 15.973,
7
+ "train_steps_per_second": 1.337
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.860730593607306,
3
+ "best_model_checkpoint": "tmp/tst-translation354/checkpoint-840",
4
+ "epoch": 10.0,
5
+ "global_step": 1050,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.3698630136986301,
13
+ "eval_loss": 0.3618854582309723,
14
+ "eval_runtime": 11.9681,
15
+ "eval_samples_per_second": 36.597,
16
+ "eval_steps_per_second": 3.092,
17
+ "step": 105
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.7351598173515982,
22
+ "eval_loss": 0.1750085949897766,
23
+ "eval_runtime": 13.2291,
24
+ "eval_samples_per_second": 33.109,
25
+ "eval_steps_per_second": 2.797,
26
+ "step": 210
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_accuracy": 0.7922374429223744,
31
+ "eval_loss": 0.14155679941177368,
32
+ "eval_runtime": 12.7875,
33
+ "eval_samples_per_second": 34.252,
34
+ "eval_steps_per_second": 2.893,
35
+ "step": 315
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_accuracy": 0.8424657534246576,
40
+ "eval_loss": 0.12254055589437485,
41
+ "eval_runtime": 14.2052,
42
+ "eval_samples_per_second": 30.834,
43
+ "eval_steps_per_second": 2.605,
44
+ "step": 420
45
+ },
46
+ {
47
+ "epoch": 4.76,
48
+ "learning_rate": 2.6190476190476192e-05,
49
+ "loss": 0.3579,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 5.0,
54
+ "eval_accuracy": 0.8515981735159818,
55
+ "eval_loss": 0.13104116916656494,
56
+ "eval_runtime": 12.6738,
57
+ "eval_samples_per_second": 34.559,
58
+ "eval_steps_per_second": 2.919,
59
+ "step": 525
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_accuracy": 0.8424657534246576,
64
+ "eval_loss": 0.1357753723859787,
65
+ "eval_runtime": 12.4892,
66
+ "eval_samples_per_second": 35.07,
67
+ "eval_steps_per_second": 2.963,
68
+ "step": 630
69
+ },
70
+ {
71
+ "epoch": 7.0,
72
+ "eval_accuracy": 0.8493150684931506,
73
+ "eval_loss": 0.16049088537693024,
74
+ "eval_runtime": 12.626,
75
+ "eval_samples_per_second": 34.69,
76
+ "eval_steps_per_second": 2.93,
77
+ "step": 735
78
+ },
79
+ {
80
+ "epoch": 8.0,
81
+ "eval_accuracy": 0.860730593607306,
82
+ "eval_loss": 0.1871233731508255,
83
+ "eval_runtime": 12.5302,
84
+ "eval_samples_per_second": 34.956,
85
+ "eval_steps_per_second": 2.953,
86
+ "step": 840
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_accuracy": 0.8515981735159818,
91
+ "eval_loss": 0.19895039498806,
92
+ "eval_runtime": 12.66,
93
+ "eval_samples_per_second": 34.597,
94
+ "eval_steps_per_second": 2.923,
95
+ "step": 945
96
+ },
97
+ {
98
+ "epoch": 9.52,
99
+ "learning_rate": 2.3809523809523808e-06,
100
+ "loss": 0.054,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 10.0,
105
+ "eval_accuracy": 0.860730593607306,
106
+ "eval_loss": 0.2134334295988083,
107
+ "eval_runtime": 12.5085,
108
+ "eval_samples_per_second": 35.016,
109
+ "eval_steps_per_second": 2.958,
110
+ "step": 1050
111
+ },
112
+ {
113
+ "epoch": 10.0,
114
+ "step": 1050,
115
+ "total_flos": 8482350845952000.0,
116
+ "train_loss": 0.1974729861531939,
117
+ "train_runtime": 785.0626,
118
+ "train_samples_per_second": 15.973,
119
+ "train_steps_per_second": 1.337
120
+ }
121
+ ],
122
+ "max_steps": 1050,
123
+ "num_train_epochs": 10,
124
+ "total_flos": 8482350845952000.0,
125
+ "trial_name": null,
126
+ "trial_params": null
127
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a23b9a642b4db882ef684755d34a85a97de3a32384696d8bc0c1c6f4a79e7e20
3
+ size 3183