yonathanstwn commited on
Commit
72a3e5a
1 Parent(s): 75bc8f8

End of training

Browse files
.gitattributes CHANGED
@@ -32,7 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
- last-checkpoint/source.spm filter=lfs diff=lfs merge=lfs -text
36
- last-checkpoint/target.spm filter=lfs diff=lfs merge=lfs -text
37
  source.spm filter=lfs diff=lfs merge=lfs -text
38
  target.spm filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
35
  source.spm filter=lfs diff=lfs merge=lfs -text
36
  target.spm filter=lfs diff=lfs merge=lfs -text
last-checkpoint/generation_config.json → generation_config.json RENAMED
File without changes
last-checkpoint/config.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "_name_or_path": "./models/opus-mt-id-en",
3
- "_num_labels": 3,
4
- "activation_dropout": 0.0,
5
- "activation_function": "swish",
6
- "add_bias_logits": false,
7
- "add_final_layer_norm": false,
8
- "architectures": [
9
- "MarianMTModel"
10
- ],
11
- "attention_dropout": 0.0,
12
- "bad_words_ids": [
13
- [
14
- 54795
15
- ]
16
- ],
17
- "bos_token_id": 0,
18
- "classif_dropout": 0.0,
19
- "classifier_dropout": 0.0,
20
- "d_model": 512,
21
- "decoder_attention_heads": 8,
22
- "decoder_ffn_dim": 2048,
23
- "decoder_layerdrop": 0.0,
24
- "decoder_layers": 6,
25
- "decoder_start_token_id": 54795,
26
- "decoder_vocab_size": 54796,
27
- "dropout": 0.1,
28
- "encoder_attention_heads": 8,
29
- "encoder_ffn_dim": 2048,
30
- "encoder_layerdrop": 0.0,
31
- "encoder_layers": 6,
32
- "eos_token_id": 0,
33
- "forced_eos_token_id": 0,
34
- "id2label": {
35
- "0": "LABEL_0",
36
- "1": "LABEL_1",
37
- "2": "LABEL_2"
38
- },
39
- "init_std": 0.02,
40
- "is_encoder_decoder": true,
41
- "label2id": {
42
- "LABEL_0": 0,
43
- "LABEL_1": 1,
44
- "LABEL_2": 2
45
- },
46
- "max_length": 512,
47
- "max_position_embeddings": 512,
48
- "model_type": "marian",
49
- "normalize_before": false,
50
- "normalize_embedding": false,
51
- "num_beams": 6,
52
- "num_hidden_layers": 6,
53
- "pad_token_id": 54795,
54
- "scale_embedding": true,
55
- "share_encoder_decoder_embeddings": true,
56
- "static_position_embeddings": true,
57
- "torch_dtype": "float32",
58
- "transformers_version": "4.26.1",
59
- "use_cache": true,
60
- "vocab_size": 54796
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d2e88c2a463ad4b55f3a38041628644c545d8da712d8235e5396377a447b046
3
- size 577701381
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d90d2cc10256c2049a4d3985e8a8a5ad38f5450b0a05bd884d4ce75748326e1c
3
- size 289081861
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bcf060ca49247eb79b04210ee27fddb6c56d83199d13c6317e8667c52583a46
3
- size 14575
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:368d606ba58025458a7a4928a134d8d02b44d7703e4636b6ec3229ecf6d7599f
3
- size 627
 
 
 
 
last-checkpoint/source.spm DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a8fefe71c7f26cb0c6aa1b9f0cc0f8d18006b20fe41c547af7f25b9c8333465
3
- size 800687
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "eos_token": "</s>",
3
- "pad_token": "<pad>",
4
- "unk_token": "<unk>"
5
- }
 
 
 
 
 
 
last-checkpoint/target.spm DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e88300911c2c573ec5526777a1e84bae698d20925b82dcef9c7248bb0e537ed0
3
- size 795925
 
 
 
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "eos_token": "</s>",
3
- "model_max_length": 512,
4
- "name_or_path": "./models/opus-mt-id-en",
5
- "pad_token": "<pad>",
6
- "separate_vocabs": false,
7
- "source_lang": "id",
8
- "sp_model_kwargs": {},
9
- "special_tokens_map_file": null,
10
- "target_lang": "en",
11
- "tokenizer_class": "MarianTokenizer",
12
- "unk_token": "<unk>"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,391 +0,0 @@
1
- {
2
- "best_metric": 1.3274328708648682,
3
- "best_model_checkpoint": "models/opus-mt-id-en-open-subtitles/checkpoint-28125",
4
- "epoch": 25.0,
5
- "global_step": 703125,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 1.0,
12
- "learning_rate": 9.654925800107278e-05,
13
- "loss": 1.3533,
14
- "step": 28125
15
- },
16
- {
17
- "epoch": 1.0,
18
- "eval_bleu": 37.6662,
19
- "eval_loss": 1.3274328708648682,
20
- "eval_runtime": 1861.5149,
21
- "eval_samples_per_second": 53.72,
22
- "eval_steps_per_second": 1.679,
23
- "step": 28125
24
- },
25
- {
26
- "epoch": 2.0,
27
- "learning_rate": 9.252637225102807e-05,
28
- "loss": 1.2814,
29
- "step": 56250
30
- },
31
- {
32
- "epoch": 2.0,
33
- "eval_bleu": 37.5909,
34
- "eval_loss": 1.3524662256240845,
35
- "eval_runtime": 1509.1742,
36
- "eval_samples_per_second": 66.261,
37
- "eval_steps_per_second": 2.071,
38
- "step": 56250
39
- },
40
- {
41
- "epoch": 3.0,
42
- "learning_rate": 8.850348650098338e-05,
43
- "loss": 1.2058,
44
- "step": 84375
45
- },
46
- {
47
- "epoch": 3.0,
48
- "eval_bleu": 37.8008,
49
- "eval_loss": 1.3674018383026123,
50
- "eval_runtime": 1551.6085,
51
- "eval_samples_per_second": 64.449,
52
- "eval_steps_per_second": 2.014,
53
- "step": 84375
54
- },
55
- {
56
- "epoch": 4.0,
57
- "learning_rate": 8.448060075093868e-05,
58
- "loss": 1.1415,
59
- "step": 112500
60
- },
61
- {
62
- "epoch": 4.0,
63
- "eval_bleu": 37.4849,
64
- "eval_loss": 1.3721529245376587,
65
- "eval_runtime": 1712.582,
66
- "eval_samples_per_second": 58.391,
67
- "eval_steps_per_second": 1.825,
68
- "step": 112500
69
- },
70
- {
71
- "epoch": 5.0,
72
- "learning_rate": 8.045771500089397e-05,
73
- "loss": 1.0842,
74
- "step": 140625
75
- },
76
- {
77
- "epoch": 5.0,
78
- "eval_bleu": 37.7558,
79
- "eval_loss": 1.3942967653274536,
80
- "eval_runtime": 1531.4782,
81
- "eval_samples_per_second": 65.296,
82
- "eval_steps_per_second": 2.041,
83
- "step": 140625
84
- },
85
- {
86
- "epoch": 6.0,
87
- "learning_rate": 7.643482925084928e-05,
88
- "loss": 1.0309,
89
- "step": 168750
90
- },
91
- {
92
- "epoch": 6.0,
93
- "eval_bleu": 37.6332,
94
- "eval_loss": 1.3994051218032837,
95
- "eval_runtime": 1465.4534,
96
- "eval_samples_per_second": 68.238,
97
- "eval_steps_per_second": 2.132,
98
- "step": 168750
99
- },
100
- {
101
- "epoch": 7.0,
102
- "learning_rate": 7.241194350080458e-05,
103
- "loss": 0.9802,
104
- "step": 196875
105
- },
106
- {
107
- "epoch": 7.0,
108
- "eval_bleu": 37.7529,
109
- "eval_loss": 1.4215837717056274,
110
- "eval_runtime": 1573.1177,
111
- "eval_samples_per_second": 63.568,
112
- "eval_steps_per_second": 1.987,
113
- "step": 196875
114
- },
115
- {
116
- "epoch": 8.0,
117
- "learning_rate": 6.838905775075987e-05,
118
- "loss": 0.9316,
119
- "step": 225000
120
- },
121
- {
122
- "epoch": 8.0,
123
- "eval_bleu": 37.9906,
124
- "eval_loss": 1.4304240942001343,
125
- "eval_runtime": 1471.9189,
126
- "eval_samples_per_second": 67.939,
127
- "eval_steps_per_second": 2.123,
128
- "step": 225000
129
- },
130
- {
131
- "epoch": 9.0,
132
- "learning_rate": 6.436617200071518e-05,
133
- "loss": 0.8838,
134
- "step": 253125
135
- },
136
- {
137
- "epoch": 9.0,
138
- "eval_bleu": 37.7833,
139
- "eval_loss": 1.4462167024612427,
140
- "eval_runtime": 1502.4733,
141
- "eval_samples_per_second": 66.557,
142
- "eval_steps_per_second": 2.08,
143
- "step": 253125
144
- },
145
- {
146
- "epoch": 10.0,
147
- "learning_rate": 6.034328625067048e-05,
148
- "loss": 0.8378,
149
- "step": 281250
150
- },
151
- {
152
- "epoch": 10.0,
153
- "eval_bleu": 37.5971,
154
- "eval_loss": 1.4639370441436768,
155
- "eval_runtime": 1489.8072,
156
- "eval_samples_per_second": 67.123,
157
- "eval_steps_per_second": 2.098,
158
- "step": 281250
159
- },
160
- {
161
- "epoch": 11.0,
162
- "learning_rate": 5.632040050062578e-05,
163
- "loss": 0.7921,
164
- "step": 309375
165
- },
166
- {
167
- "epoch": 11.0,
168
- "eval_bleu": 37.6285,
169
- "eval_loss": 1.485935926437378,
170
- "eval_runtime": 1460.5333,
171
- "eval_samples_per_second": 68.468,
172
- "eval_steps_per_second": 2.14,
173
- "step": 309375
174
- },
175
- {
176
- "epoch": 12.0,
177
- "learning_rate": 5.229751475058109e-05,
178
- "loss": 0.7484,
179
- "step": 337500
180
- },
181
- {
182
- "epoch": 12.0,
183
- "eval_bleu": 37.5413,
184
- "eval_loss": 1.5060349702835083,
185
- "eval_runtime": 1480.0942,
186
- "eval_samples_per_second": 67.563,
187
- "eval_steps_per_second": 2.111,
188
- "step": 337500
189
- },
190
- {
191
- "epoch": 13.0,
192
- "learning_rate": 4.827462900053639e-05,
193
- "loss": 0.7043,
194
- "step": 365625
195
- },
196
- {
197
- "epoch": 13.0,
198
- "eval_bleu": 37.5118,
199
- "eval_loss": 1.5256059169769287,
200
- "eval_runtime": 1462.073,
201
- "eval_samples_per_second": 68.396,
202
- "eval_steps_per_second": 2.137,
203
- "step": 365625
204
- },
205
- {
206
- "epoch": 14.0,
207
- "learning_rate": 4.425174325049169e-05,
208
- "loss": 0.6622,
209
- "step": 393750
210
- },
211
- {
212
- "epoch": 14.0,
213
- "eval_bleu": 37.5092,
214
- "eval_loss": 1.555537223815918,
215
- "eval_runtime": 1474.8089,
216
- "eval_samples_per_second": 67.805,
217
- "eval_steps_per_second": 2.119,
218
- "step": 393750
219
- },
220
- {
221
- "epoch": 15.0,
222
- "learning_rate": 4.022885750044699e-05,
223
- "loss": 0.6208,
224
- "step": 421875
225
- },
226
- {
227
- "epoch": 15.0,
228
- "eval_bleu": 37.2924,
229
- "eval_loss": 1.573325276374817,
230
- "eval_runtime": 1464.8257,
231
- "eval_samples_per_second": 68.268,
232
- "eval_steps_per_second": 2.133,
233
- "step": 421875
234
- },
235
- {
236
- "epoch": 16.0,
237
- "learning_rate": 3.620597175040229e-05,
238
- "loss": 0.5807,
239
- "step": 450000
240
- },
241
- {
242
- "epoch": 16.0,
243
- "eval_bleu": 37.319,
244
- "eval_loss": 1.60484778881073,
245
- "eval_runtime": 1459.7156,
246
- "eval_samples_per_second": 68.506,
247
- "eval_steps_per_second": 2.141,
248
- "step": 450000
249
- },
250
- {
251
- "epoch": 17.0,
252
- "learning_rate": 3.218308600035759e-05,
253
- "loss": 0.542,
254
- "step": 478125
255
- },
256
- {
257
- "epoch": 17.0,
258
- "eval_bleu": 37.0629,
259
- "eval_loss": 1.6434943675994873,
260
- "eval_runtime": 1463.555,
261
- "eval_samples_per_second": 68.327,
262
- "eval_steps_per_second": 2.135,
263
- "step": 478125
264
- },
265
- {
266
- "epoch": 18.0,
267
- "learning_rate": 2.816020025031289e-05,
268
- "loss": 0.5043,
269
- "step": 506250
270
- },
271
- {
272
- "epoch": 18.0,
273
- "eval_bleu": 37.1334,
274
- "eval_loss": 1.6646835803985596,
275
- "eval_runtime": 1448.2184,
276
- "eval_samples_per_second": 69.05,
277
- "eval_steps_per_second": 2.158,
278
- "step": 506250
279
- },
280
- {
281
- "epoch": 19.0,
282
- "learning_rate": 2.4137314500268194e-05,
283
- "loss": 0.4685,
284
- "step": 534375
285
- },
286
- {
287
- "epoch": 19.0,
288
- "eval_bleu": 37.02,
289
- "eval_loss": 1.701406717300415,
290
- "eval_runtime": 1487.6794,
291
- "eval_samples_per_second": 67.219,
292
- "eval_steps_per_second": 2.101,
293
- "step": 534375
294
- },
295
- {
296
- "epoch": 20.0,
297
- "learning_rate": 2.0114428750223493e-05,
298
- "loss": 0.4352,
299
- "step": 562500
300
- },
301
- {
302
- "epoch": 20.0,
303
- "eval_bleu": 36.9514,
304
- "eval_loss": 1.7299559116363525,
305
- "eval_runtime": 1449.7878,
306
- "eval_samples_per_second": 68.976,
307
- "eval_steps_per_second": 2.155,
308
- "step": 562500
309
- },
310
- {
311
- "epoch": 21.0,
312
- "learning_rate": 1.6091543000178796e-05,
313
- "loss": 0.4031,
314
- "step": 590625
315
- },
316
- {
317
- "epoch": 21.0,
318
- "eval_bleu": 36.9637,
319
- "eval_loss": 1.7572004795074463,
320
- "eval_runtime": 1454.2044,
321
- "eval_samples_per_second": 68.766,
322
- "eval_steps_per_second": 2.149,
323
- "step": 590625
324
- },
325
- {
326
- "epoch": 22.0,
327
- "learning_rate": 1.2068657250134097e-05,
328
- "loss": 0.3731,
329
- "step": 618750
330
- },
331
- {
332
- "epoch": 22.0,
333
- "eval_bleu": 36.9821,
334
- "eval_loss": 1.7902287244796753,
335
- "eval_runtime": 1450.2265,
336
- "eval_samples_per_second": 68.955,
337
- "eval_steps_per_second": 2.155,
338
- "step": 618750
339
- },
340
- {
341
- "epoch": 23.0,
342
- "learning_rate": 8.045771500089398e-06,
343
- "loss": 0.346,
344
- "step": 646875
345
- },
346
- {
347
- "epoch": 23.0,
348
- "eval_bleu": 36.9586,
349
- "eval_loss": 1.8111997842788696,
350
- "eval_runtime": 1448.2982,
351
- "eval_samples_per_second": 69.047,
352
- "eval_steps_per_second": 2.158,
353
- "step": 646875
354
- },
355
- {
356
- "epoch": 24.0,
357
- "learning_rate": 4.022885750044699e-06,
358
- "loss": 0.3227,
359
- "step": 675000
360
- },
361
- {
362
- "epoch": 24.0,
363
- "eval_bleu": 36.9286,
364
- "eval_loss": 1.8325022459030151,
365
- "eval_runtime": 1450.793,
366
- "eval_samples_per_second": 68.928,
367
- "eval_steps_per_second": 2.154,
368
- "step": 675000
369
- },
370
- {
371
- "epoch": 25.0,
372
- "learning_rate": 0.0,
373
- "loss": 0.303,
374
- "step": 703125
375
- },
376
- {
377
- "epoch": 25.0,
378
- "eval_bleu": 36.9382,
379
- "eval_loss": 1.8429754972457886,
380
- "eval_runtime": 1448.8258,
381
- "eval_samples_per_second": 69.021,
382
- "eval_steps_per_second": 2.157,
383
- "step": 703125
384
- }
385
- ],
386
- "max_steps": 703125,
387
- "num_train_epochs": 25,
388
- "total_flos": 1.3122279945378202e+17,
389
- "trial_name": null,
390
- "trial_params": null
391
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:096873cb715c0742cc88026a10db5fb803013016fd2eb4790edaefea640a8e20
3
- size 3643
 
 
 
 
last-checkpoint/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d90d2cc10256c2049a4d3985e8a8a5ad38f5450b0a05bd884d4ce75748326e1c
3
  size 289081861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476941512fa6d9bf4120703c4b916f3953f36d28ca096772f675e0480ae75f89
3
  size 289081861