marinone94 commited on
Commit
149260c
β€’
1 Parent(s): e9db825

Training in progress, step 300

Browse files
{checkpoint-3700 β†’ checkpoint-300}/config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "KBLab/wav2vec2-large-voxrex",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
1
  {
2
+ "_name_or_path": "marinone94/xls-r-300m-sv-robust",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
{checkpoint-3700 β†’ checkpoint-300}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:619caf3673344ab2a56d8920b454d1a102940191430a61b3330186dad60d1659
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4851e5921f7325e240731959feb278f899280909b8e2cb6567d27fae4017c474
3
  size 2490337809
{checkpoint-3700 β†’ checkpoint-300}/preprocessor_config.json RENAMED
File without changes
{checkpoint-3700 β†’ checkpoint-300}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e95c67d08a1a0d2f34fb48babadb769336f0b21eeae723abbb0714176642fdf
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:434bb79de859e935ca1aaf8411c1439a4e24ef8e2396b088b07525e56e0847d8
3
  size 1262063089
{checkpoint-3700 β†’ checkpoint-300}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff21b04ab59e592ca04a33825328c1a76f9615b11b66591c24235296cebfad21
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e54ce0be1689f12766dccd1bd35050e0f0346ef0556646dc973c608610db2f
3
+ size 14567
{checkpoint-3700 β†’ checkpoint-300}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d78f89f1dff85262347440e00b108cf0d3f1eb1be85989db2947bdafade5f248
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1
3
  size 559
{checkpoint-3700 β†’ checkpoint-300}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e5a5afc631f3556ad61fe3b8190b2405aece04ce6c9c704fbd2ff1d06026a9b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32701a4051ea5d746149966b47cdca0f213673b7e546ae3ee3646c1f9bbb83d6
3
  size 623
checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.2950819672131146,
5
+ "global_step": 300,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.22,
12
+ "learning_rate": 2.5e-06,
13
+ "loss": 3.5867,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.44,
18
+ "learning_rate": 5e-06,
19
+ "loss": 3.5457,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.66,
24
+ "learning_rate": 7.5e-06,
25
+ "loss": 3.4513,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.87,
30
+ "learning_rate": 1e-05,
31
+ "loss": 3.3432,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 1.1,
36
+ "learning_rate": 1.25e-05,
37
+ "loss": 3.3533,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 1.1,
42
+ "eval_loss": 3.2806732654571533,
43
+ "eval_runtime": 190.4728,
44
+ "eval_samples_per_second": 25.426,
45
+ "eval_steps_per_second": 0.798,
46
+ "eval_wer": 1.0,
47
+ "step": 100
48
+ },
49
+ {
50
+ "epoch": 1.32,
51
+ "learning_rate": 1.5e-05,
52
+ "loss": 3.2217,
53
+ "step": 120
54
+ },
55
+ {
56
+ "epoch": 1.54,
57
+ "learning_rate": 1.7500000000000002e-05,
58
+ "loss": 3.1765,
59
+ "step": 140
60
+ },
61
+ {
62
+ "epoch": 1.75,
63
+ "learning_rate": 2e-05,
64
+ "loss": 3.1408,
65
+ "step": 160
66
+ },
67
+ {
68
+ "epoch": 1.97,
69
+ "learning_rate": 2.2499999999999998e-05,
70
+ "loss": 3.1165,
71
+ "step": 180
72
+ },
73
+ {
74
+ "epoch": 2.2,
75
+ "learning_rate": 2.5e-05,
76
+ "loss": 3.1709,
77
+ "step": 200
78
+ },
79
+ {
80
+ "epoch": 2.2,
81
+ "eval_loss": 3.1325438022613525,
82
+ "eval_runtime": 192.4978,
83
+ "eval_samples_per_second": 25.159,
84
+ "eval_steps_per_second": 0.79,
85
+ "eval_wer": 1.0,
86
+ "step": 200
87
+ },
88
+ {
89
+ "epoch": 2.42,
90
+ "learning_rate": 2.75e-05,
91
+ "loss": 3.079,
92
+ "step": 220
93
+ },
94
+ {
95
+ "epoch": 2.63,
96
+ "learning_rate": 3e-05,
97
+ "loss": 3.0677,
98
+ "step": 240
99
+ },
100
+ {
101
+ "epoch": 2.85,
102
+ "learning_rate": 3.2500000000000004e-05,
103
+ "loss": 3.0656,
104
+ "step": 260
105
+ },
106
+ {
107
+ "epoch": 3.08,
108
+ "learning_rate": 3.5000000000000004e-05,
109
+ "loss": 3.1463,
110
+ "step": 280
111
+ },
112
+ {
113
+ "epoch": 3.3,
114
+ "learning_rate": 3.75e-05,
115
+ "loss": 3.0573,
116
+ "step": 300
117
+ },
118
+ {
119
+ "epoch": 3.3,
120
+ "eval_loss": 3.0614514350891113,
121
+ "eval_runtime": 194.36,
122
+ "eval_samples_per_second": 24.918,
123
+ "eval_steps_per_second": 0.782,
124
+ "eval_wer": 1.0,
125
+ "step": 300
126
+ }
127
+ ],
128
+ "max_steps": 4550,
129
+ "num_train_epochs": 50,
130
+ "total_flos": 4.675293533891495e+18,
131
+ "trial_name": null,
132
+ "trial_params": null
133
+ }
{checkpoint-3700 β†’ checkpoint-300}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcaa687be5ba49f76fb6ca2521b979e7539ac5b1f99f37017cc7c1ba8c1c2387
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61c53503eb760641941023d2dcab7bb32b620bf895e31db5c3910b2760135b07
3
  size 3055
checkpoint-3700/trainer_state.json DELETED
@@ -1,1459 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.97531028960363,
5
- "global_step": 3700,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 0.0002,
13
- "loss": 10.6012,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.02,
18
- "learning_rate": 0.0004,
19
- "loss": 3.651,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.03,
24
- "learning_rate": 0.00059,
25
- "loss": 3.3911,
26
- "step": 60
27
- },
28
- {
29
- "epoch": 0.04,
30
- "learning_rate": 0.0007491827839825661,
31
- "loss": 3.0481,
32
- "step": 80
33
- },
34
- {
35
- "epoch": 0.05,
36
- "learning_rate": 0.0007453010078997548,
37
- "loss": 3.4039,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 0.05,
42
- "eval_loss": Infinity,
43
- "eval_runtime": 1260.1628,
44
- "eval_samples_per_second": 21.123,
45
- "eval_steps_per_second": 0.66,
46
- "eval_wer": 1.0,
47
- "step": 100
48
- },
49
- {
50
- "epoch": 0.06,
51
- "learning_rate": 0.0007412149278125851,
52
- "loss": 3.0553,
53
- "step": 120
54
- },
55
- {
56
- "epoch": 0.07,
57
- "learning_rate": 0.0007371288477254155,
58
- "loss": 3.2191,
59
- "step": 140
60
- },
61
- {
62
- "epoch": 0.09,
63
- "learning_rate": 0.0007332470716426042,
64
- "loss": 3.4094,
65
- "step": 160
66
- },
67
- {
68
- "epoch": 0.1,
69
- "learning_rate": 0.0007291609915554345,
70
- "loss": 3.0607,
71
- "step": 180
72
- },
73
- {
74
- "epoch": 0.11,
75
- "learning_rate": 0.0007252792154726233,
76
- "loss": 3.4396,
77
- "step": 200
78
- },
79
- {
80
- "epoch": 0.11,
81
- "eval_loss": Infinity,
82
- "eval_runtime": 1235.1686,
83
- "eval_samples_per_second": 21.55,
84
- "eval_steps_per_second": 0.674,
85
- "eval_wer": 1.0,
86
- "step": 200
87
- },
88
- {
89
- "epoch": 0.12,
90
- "learning_rate": 0.0007211931353854536,
91
- "loss": 3.1198,
92
- "step": 220
93
- },
94
- {
95
- "epoch": 0.13,
96
- "learning_rate": 0.0007171070552982838,
97
- "loss": 3.1801,
98
- "step": 240
99
- },
100
- {
101
- "epoch": 0.14,
102
- "learning_rate": 0.0007132252792154726,
103
- "loss": 3.3954,
104
- "step": 260
105
- },
106
- {
107
- "epoch": 0.15,
108
- "learning_rate": 0.000709139199128303,
109
- "loss": 3.0566,
110
- "step": 280
111
- },
112
- {
113
- "epoch": 0.16,
114
- "learning_rate": 0.0007052574230454917,
115
- "loss": 3.483,
116
- "step": 300
117
- },
118
- {
119
- "epoch": 0.16,
120
- "eval_loss": Infinity,
121
- "eval_runtime": 1262.2253,
122
- "eval_samples_per_second": 21.088,
123
- "eval_steps_per_second": 0.659,
124
- "eval_wer": 1.0,
125
- "step": 300
126
- },
127
- {
128
- "epoch": 0.17,
129
- "learning_rate": 0.000701171342958322,
130
- "loss": 3.1616,
131
- "step": 320
132
- },
133
- {
134
- "epoch": 0.18,
135
- "learning_rate": 0.0006970852628711522,
136
- "loss": 3.2017,
137
- "step": 340
138
- },
139
- {
140
- "epoch": 0.19,
141
- "learning_rate": 0.0006932034867883411,
142
- "loss": 3.4474,
143
- "step": 360
144
- },
145
- {
146
- "epoch": 0.2,
147
- "learning_rate": 0.0006891174067011714,
148
- "loss": 3.0656,
149
- "step": 380
150
- },
151
- {
152
- "epoch": 0.21,
153
- "learning_rate": 0.0006852356306183601,
154
- "loss": 3.5014,
155
- "step": 400
156
- },
157
- {
158
- "epoch": 0.21,
159
- "eval_loss": Infinity,
160
- "eval_runtime": 1255.7751,
161
- "eval_samples_per_second": 21.196,
162
- "eval_steps_per_second": 0.663,
163
- "eval_wer": 1.0,
164
- "step": 400
165
- },
166
- {
167
- "epoch": 0.22,
168
- "learning_rate": 0.0006811495505311904,
169
- "loss": 3.1757,
170
- "step": 420
171
- },
172
- {
173
- "epoch": 0.23,
174
- "learning_rate": 0.0006770634704440206,
175
- "loss": 3.2038,
176
- "step": 440
177
- },
178
- {
179
- "epoch": 0.25,
180
- "learning_rate": 0.0006731816943612095,
181
- "loss": 3.4072,
182
- "step": 460
183
- },
184
- {
185
- "epoch": 0.26,
186
- "learning_rate": 0.0006690956142740398,
187
- "loss": 3.0498,
188
- "step": 480
189
- },
190
- {
191
- "epoch": 0.27,
192
- "learning_rate": 0.00066500953418687,
193
- "loss": 3.331,
194
- "step": 500
195
- },
196
- {
197
- "epoch": 0.27,
198
- "eval_loss": Infinity,
199
- "eval_runtime": 1239.2922,
200
- "eval_samples_per_second": 21.478,
201
- "eval_steps_per_second": 0.671,
202
- "eval_wer": 1.0,
203
- "step": 500
204
- },
205
- {
206
- "epoch": 0.28,
207
- "learning_rate": 0.0006609234540997004,
208
- "loss": 3.0833,
209
- "step": 520
210
- },
211
- {
212
- "epoch": 0.29,
213
- "learning_rate": 0.0006568373740125306,
214
- "loss": 3.1989,
215
- "step": 540
216
- },
217
- {
218
- "epoch": 0.3,
219
- "learning_rate": 0.0006531599019340779,
220
- "loss": 3.5001,
221
- "step": 560
222
- },
223
- {
224
- "epoch": 0.31,
225
- "learning_rate": 0.0006490738218469082,
226
- "loss": 3.0685,
227
- "step": 580
228
- },
229
- {
230
- "epoch": 0.32,
231
- "learning_rate": 0.000645192045764097,
232
- "loss": 3.4809,
233
- "step": 600
234
- },
235
- {
236
- "epoch": 0.32,
237
- "eval_loss": Infinity,
238
- "eval_runtime": 1232.3119,
239
- "eval_samples_per_second": 21.6,
240
- "eval_steps_per_second": 0.675,
241
- "eval_wer": 1.0,
242
- "step": 600
243
- },
244
- {
245
- "epoch": 0.33,
246
- "learning_rate": 0.0006411059656769273,
247
- "loss": 3.1877,
248
- "step": 620
249
- },
250
- {
251
- "epoch": 0.34,
252
- "learning_rate": 0.0006370198855897576,
253
- "loss": 3.2365,
254
- "step": 640
255
- },
256
- {
257
- "epoch": 0.35,
258
- "learning_rate": 0.0006333424135113049,
259
- "loss": 3.67,
260
- "step": 660
261
- },
262
- {
263
- "epoch": 0.36,
264
- "learning_rate": 0.0006292563334241351,
265
- "loss": 3.086,
266
- "step": 680
267
- },
268
- {
269
- "epoch": 0.37,
270
- "learning_rate": 0.0006253745573413239,
271
- "loss": 3.4678,
272
- "step": 700
273
- },
274
- {
275
- "epoch": 0.37,
276
- "eval_loss": Infinity,
277
- "eval_runtime": 1236.8236,
278
- "eval_samples_per_second": 21.521,
279
- "eval_steps_per_second": 0.673,
280
- "eval_wer": 1.0,
281
- "step": 700
282
- },
283
- {
284
- "epoch": 0.38,
285
- "learning_rate": 0.0006212884772541542,
286
- "loss": 3.2344,
287
- "step": 720
288
- },
289
- {
290
- "epoch": 0.4,
291
- "learning_rate": 0.0006172023971669845,
292
- "loss": 3.2507,
293
- "step": 740
294
- },
295
- {
296
- "epoch": 0.41,
297
- "learning_rate": 0.0006131163170798147,
298
- "loss": 3.4283,
299
- "step": 760
300
- },
301
- {
302
- "epoch": 0.42,
303
- "learning_rate": 0.0006090302369926451,
304
- "loss": 3.0802,
305
- "step": 780
306
- },
307
- {
308
- "epoch": 0.43,
309
- "learning_rate": 0.0006051484609098339,
310
- "loss": 3.4596,
311
- "step": 800
312
- },
313
- {
314
- "epoch": 0.43,
315
- "eval_loss": Infinity,
316
- "eval_runtime": 1233.785,
317
- "eval_samples_per_second": 21.574,
318
- "eval_steps_per_second": 0.674,
319
- "eval_wer": 1.0,
320
- "step": 800
321
- },
322
- {
323
- "epoch": 0.44,
324
- "learning_rate": 0.0006010623808226641,
325
- "loss": 3.2227,
326
- "step": 820
327
- },
328
- {
329
- "epoch": 0.45,
330
- "learning_rate": 0.0005969763007354945,
331
- "loss": 3.2357,
332
- "step": 840
333
- },
334
- {
335
- "epoch": 0.46,
336
- "learning_rate": 0.0005930945246526832,
337
- "loss": 3.5563,
338
- "step": 860
339
- },
340
- {
341
- "epoch": 0.47,
342
- "learning_rate": 0.0005890084445655135,
343
- "loss": 3.0845,
344
- "step": 880
345
- },
346
- {
347
- "epoch": 0.48,
348
- "learning_rate": 0.0005851266684827023,
349
- "loss": 3.4644,
350
- "step": 900
351
- },
352
- {
353
- "epoch": 0.48,
354
- "eval_loss": Infinity,
355
- "eval_runtime": 1235.1488,
356
- "eval_samples_per_second": 21.55,
357
- "eval_steps_per_second": 0.674,
358
- "eval_wer": 1.0,
359
- "step": 900
360
- },
361
- {
362
- "epoch": 0.49,
363
- "learning_rate": 0.0005810405883955325,
364
- "loss": 3.2092,
365
- "step": 920
366
- },
367
- {
368
- "epoch": 0.5,
369
- "learning_rate": 0.0005769545083083629,
370
- "loss": 3.2157,
371
- "step": 940
372
- },
373
- {
374
- "epoch": 0.51,
375
- "learning_rate": 0.0005730727322255517,
376
- "loss": 3.4679,
377
- "step": 960
378
- },
379
- {
380
- "epoch": 0.52,
381
- "learning_rate": 0.0005689866521383819,
382
- "loss": 3.0662,
383
- "step": 980
384
- },
385
- {
386
- "epoch": 0.53,
387
- "learning_rate": 0.0005651048760555707,
388
- "loss": 3.4671,
389
- "step": 1000
390
- },
391
- {
392
- "epoch": 0.53,
393
- "eval_loss": Infinity,
394
- "eval_runtime": 1241.2249,
395
- "eval_samples_per_second": 21.445,
396
- "eval_steps_per_second": 0.67,
397
- "eval_wer": 1.0,
398
- "step": 1000
399
- },
400
- {
401
- "epoch": 0.54,
402
- "learning_rate": 0.0005610187959684009,
403
- "loss": 3.2428,
404
- "step": 1020
405
- },
406
- {
407
- "epoch": 0.56,
408
- "learning_rate": 0.0005569327158812314,
409
- "loss": 3.2165,
410
- "step": 1040
411
- },
412
- {
413
- "epoch": 0.57,
414
- "learning_rate": 0.0005530509397984201,
415
- "loss": 3.5106,
416
- "step": 1060
417
- },
418
- {
419
- "epoch": 0.58,
420
- "learning_rate": 0.0005489648597112503,
421
- "loss": 3.1137,
422
- "step": 1080
423
- },
424
- {
425
- "epoch": 0.59,
426
- "learning_rate": 0.0005452873876327976,
427
- "loss": 3.6005,
428
- "step": 1100
429
- },
430
- {
431
- "epoch": 0.59,
432
- "eval_loss": Infinity,
433
- "eval_runtime": 1236.2955,
434
- "eval_samples_per_second": 21.53,
435
- "eval_steps_per_second": 0.673,
436
- "eval_wer": 1.0,
437
- "step": 1100
438
- },
439
- {
440
- "epoch": 0.6,
441
- "learning_rate": 0.0005412013075456279,
442
- "loss": 3.2702,
443
- "step": 1120
444
- },
445
- {
446
- "epoch": 0.61,
447
- "learning_rate": 0.0005371152274584582,
448
- "loss": 3.6768,
449
- "step": 1140
450
- },
451
- {
452
- "epoch": 0.62,
453
- "learning_rate": 0.000533233451375647,
454
- "loss": 3.6313,
455
- "step": 1160
456
- },
457
- {
458
- "epoch": 0.63,
459
- "learning_rate": 0.0005291473712884773,
460
- "loss": 3.2456,
461
- "step": 1180
462
- },
463
- {
464
- "epoch": 0.64,
465
- "learning_rate": 0.000525265595205666,
466
- "loss": 3.9182,
467
- "step": 1200
468
- },
469
- {
470
- "epoch": 0.64,
471
- "eval_loss": Infinity,
472
- "eval_runtime": 1229.4316,
473
- "eval_samples_per_second": 21.651,
474
- "eval_steps_per_second": 0.677,
475
- "eval_wer": 1.0,
476
- "step": 1200
477
- },
478
- {
479
- "epoch": 0.65,
480
- "learning_rate": 0.0005211795151184963,
481
- "loss": 3.3805,
482
- "step": 1220
483
- },
484
- {
485
- "epoch": 0.66,
486
- "learning_rate": 0.0005170934350313267,
487
- "loss": 3.4687,
488
- "step": 1240
489
- },
490
- {
491
- "epoch": 0.67,
492
- "learning_rate": 0.000513007354944157,
493
- "loss": 3.4473,
494
- "step": 1260
495
- },
496
- {
497
- "epoch": 0.68,
498
- "learning_rate": 0.0005089212748569872,
499
- "loss": 3.2232,
500
- "step": 1280
501
- },
502
- {
503
- "epoch": 0.69,
504
- "learning_rate": 0.000505039498774176,
505
- "loss": 3.6466,
506
- "step": 1300
507
- },
508
- {
509
- "epoch": 0.69,
510
- "eval_loss": Infinity,
511
- "eval_runtime": 1237.3739,
512
- "eval_samples_per_second": 21.512,
513
- "eval_steps_per_second": 0.672,
514
- "eval_wer": 1.0,
515
- "step": 1300
516
- },
517
- {
518
- "epoch": 0.7,
519
- "learning_rate": 0.0005009534186870062,
520
- "loss": 3.3496,
521
- "step": 1320
522
- },
523
- {
524
- "epoch": 0.72,
525
- "learning_rate": 0.0004968673385998365,
526
- "loss": 3.3918,
527
- "step": 1340
528
- },
529
- {
530
- "epoch": 0.73,
531
- "learning_rate": 0.0004927812585126669,
532
- "loss": 3.3232,
533
- "step": 1360
534
- },
535
- {
536
- "epoch": 0.74,
537
- "learning_rate": 0.0004886951784254972,
538
- "loss": 3.288,
539
- "step": 1380
540
- },
541
- {
542
- "epoch": 0.75,
543
- "learning_rate": 0.00048481340234268587,
544
- "loss": 3.6932,
545
- "step": 1400
546
- },
547
- {
548
- "epoch": 0.75,
549
- "eval_loss": Infinity,
550
- "eval_runtime": 1236.694,
551
- "eval_samples_per_second": 21.524,
552
- "eval_steps_per_second": 0.673,
553
- "eval_wer": 1.0,
554
- "step": 1400
555
- },
556
- {
557
- "epoch": 0.76,
558
- "learning_rate": 0.00048072732225551624,
559
- "loss": 3.5411,
560
- "step": 1420
561
- },
562
- {
563
- "epoch": 0.77,
564
- "learning_rate": 0.0004766412421683465,
565
- "loss": 3.5658,
566
- "step": 1440
567
- },
568
- {
569
- "epoch": 0.78,
570
- "learning_rate": 0.00047275946608553524,
571
- "loss": 3.7666,
572
- "step": 1460
573
- },
574
- {
575
- "epoch": 0.79,
576
- "learning_rate": 0.0004686733859983656,
577
- "loss": 3.7564,
578
- "step": 1480
579
- },
580
- {
581
- "epoch": 0.8,
582
- "learning_rate": 0.00046479160991555436,
583
- "loss": 3.7939,
584
- "step": 1500
585
- },
586
- {
587
- "epoch": 0.8,
588
- "eval_loss": Infinity,
589
- "eval_runtime": 1235.7295,
590
- "eval_samples_per_second": 21.54,
591
- "eval_steps_per_second": 0.673,
592
- "eval_wer": 1.0,
593
- "step": 1500
594
- },
595
- {
596
- "epoch": 0.81,
597
- "learning_rate": 0.00046070552982838467,
598
- "loss": 3.7471,
599
- "step": 1520
600
- },
601
- {
602
- "epoch": 0.82,
603
- "learning_rate": 0.00045661944974121493,
604
- "loss": 3.7457,
605
- "step": 1540
606
- },
607
- {
608
- "epoch": 0.83,
609
- "learning_rate": 0.0004527376736584037,
610
- "loss": 3.798,
611
- "step": 1560
612
- },
613
- {
614
- "epoch": 0.84,
615
- "learning_rate": 0.00044865159357123405,
616
- "loss": 3.7611,
617
- "step": 1580
618
- },
619
- {
620
- "epoch": 0.85,
621
- "learning_rate": 0.0004447698174884228,
622
- "loss": 3.9284,
623
- "step": 1600
624
- },
625
- {
626
- "epoch": 0.85,
627
- "eval_loss": Infinity,
628
- "eval_runtime": 1251.8158,
629
- "eval_samples_per_second": 21.264,
630
- "eval_steps_per_second": 0.665,
631
- "eval_wer": 1.0,
632
- "step": 1600
633
- },
634
- {
635
- "epoch": 0.86,
636
- "learning_rate": 0.0004406837374012531,
637
- "loss": 3.7416,
638
- "step": 1620
639
- },
640
- {
641
- "epoch": 0.88,
642
- "learning_rate": 0.00043659765731408337,
643
- "loss": 3.7416,
644
- "step": 1640
645
- },
646
- {
647
- "epoch": 0.89,
648
- "learning_rate": 0.0004327158812312721,
649
- "loss": 3.8024,
650
- "step": 1660
651
- },
652
- {
653
- "epoch": 0.9,
654
- "learning_rate": 0.0004286298011441025,
655
- "loss": 3.7578,
656
- "step": 1680
657
- },
658
- {
659
- "epoch": 0.91,
660
- "learning_rate": 0.0004247480250612912,
661
- "loss": 3.7859,
662
- "step": 1700
663
- },
664
- {
665
- "epoch": 0.91,
666
- "eval_loss": Infinity,
667
- "eval_runtime": 1235.734,
668
- "eval_samples_per_second": 21.54,
669
- "eval_steps_per_second": 0.673,
670
- "eval_wer": 1.0,
671
- "step": 1700
672
- },
673
- {
674
- "epoch": 0.92,
675
- "learning_rate": 0.0004206619449741215,
676
- "loss": 3.7427,
677
- "step": 1720
678
- },
679
- {
680
- "epoch": 0.93,
681
- "learning_rate": 0.0004165758648869518,
682
- "loss": 3.751,
683
- "step": 1740
684
- },
685
- {
686
- "epoch": 0.94,
687
- "learning_rate": 0.00041248978479978206,
688
- "loss": 3.6701,
689
- "step": 1760
690
- },
691
- {
692
- "epoch": 0.95,
693
- "learning_rate": 0.0004084037047126123,
694
- "loss": 3.7613,
695
- "step": 1780
696
- },
697
- {
698
- "epoch": 0.96,
699
- "learning_rate": 0.00040472623263415966,
700
- "loss": 3.9363,
701
- "step": 1800
702
- },
703
- {
704
- "epoch": 0.96,
705
- "eval_loss": Infinity,
706
- "eval_runtime": 1236.9158,
707
- "eval_samples_per_second": 21.52,
708
- "eval_steps_per_second": 0.673,
709
- "eval_wer": 1.0,
710
- "step": 1800
711
- },
712
- {
713
- "epoch": 0.97,
714
- "learning_rate": 0.0004006401525469899,
715
- "loss": 3.7464,
716
- "step": 1820
717
- },
718
- {
719
- "epoch": 0.98,
720
- "learning_rate": 0.00039655407245982023,
721
- "loss": 3.7414,
722
- "step": 1840
723
- },
724
- {
725
- "epoch": 0.99,
726
- "learning_rate": 0.000392672296377009,
727
- "loss": 3.8036,
728
- "step": 1860
729
- },
730
- {
731
- "epoch": 1.0,
732
- "learning_rate": 0.00038879052029419783,
733
- "loss": 3.8768,
734
- "step": 1880
735
- },
736
- {
737
- "epoch": 1.01,
738
- "learning_rate": 0.0003847044402070281,
739
- "loss": 3.7573,
740
- "step": 1900
741
- },
742
- {
743
- "epoch": 1.01,
744
- "eval_loss": Infinity,
745
- "eval_runtime": 1232.3535,
746
- "eval_samples_per_second": 21.599,
747
- "eval_steps_per_second": 0.675,
748
- "eval_wer": 1.0,
749
- "step": 1900
750
- },
751
- {
752
- "epoch": 1.03,
753
- "learning_rate": 0.00038061836011985835,
754
- "loss": 3.6959,
755
- "step": 1920
756
- },
757
- {
758
- "epoch": 1.04,
759
- "learning_rate": 0.00037653228003268867,
760
- "loss": 3.7313,
761
- "step": 1940
762
- },
763
- {
764
- "epoch": 1.05,
765
- "learning_rate": 0.00037244619994551893,
766
- "loss": 3.7676,
767
- "step": 1960
768
- },
769
- {
770
- "epoch": 1.06,
771
- "learning_rate": 0.0003685644238627077,
772
- "loss": 3.7872,
773
- "step": 1980
774
- },
775
- {
776
- "epoch": 1.07,
777
- "learning_rate": 0.000364478343775538,
778
- "loss": 3.7553,
779
- "step": 2000
780
- },
781
- {
782
- "epoch": 1.07,
783
- "eval_loss": Infinity,
784
- "eval_runtime": 1239.2247,
785
- "eval_samples_per_second": 21.48,
786
- "eval_steps_per_second": 0.671,
787
- "eval_wer": 1.0,
788
- "step": 2000
789
- },
790
- {
791
- "epoch": 1.08,
792
- "learning_rate": 0.0003603922636883683,
793
- "loss": 3.694,
794
- "step": 2020
795
- },
796
- {
797
- "epoch": 1.09,
798
- "learning_rate": 0.0003565104876055571,
799
- "loss": 3.8389,
800
- "step": 2040
801
- },
802
- {
803
- "epoch": 1.1,
804
- "learning_rate": 0.00035242440751838736,
805
- "loss": 3.7573,
806
- "step": 2060
807
- },
808
- {
809
- "epoch": 1.11,
810
- "learning_rate": 0.0003483383274312177,
811
- "loss": 3.6641,
812
- "step": 2080
813
- },
814
- {
815
- "epoch": 1.12,
816
- "learning_rate": 0.00034425224734404794,
817
- "loss": 3.7606,
818
- "step": 2100
819
- },
820
- {
821
- "epoch": 1.12,
822
- "eval_loss": Infinity,
823
- "eval_runtime": 1237.7997,
824
- "eval_samples_per_second": 21.504,
825
- "eval_steps_per_second": 0.672,
826
- "eval_wer": 1.0,
827
- "step": 2100
828
- },
829
- {
830
- "epoch": 1.13,
831
- "learning_rate": 0.00034016616725687825,
832
- "loss": 3.6827,
833
- "step": 2120
834
- },
835
- {
836
- "epoch": 1.14,
837
- "learning_rate": 0.000336284391174067,
838
- "loss": 3.8375,
839
- "step": 2140
840
- },
841
- {
842
- "epoch": 1.15,
843
- "learning_rate": 0.0003321983110868973,
844
- "loss": 3.7527,
845
- "step": 2160
846
- },
847
- {
848
- "epoch": 1.16,
849
- "learning_rate": 0.0003283165350040861,
850
- "loss": 3.7881,
851
- "step": 2180
852
- },
853
- {
854
- "epoch": 1.17,
855
- "learning_rate": 0.0003242304549169164,
856
- "loss": 3.7514,
857
- "step": 2200
858
- },
859
- {
860
- "epoch": 1.17,
861
- "eval_loss": Infinity,
862
- "eval_runtime": 1237.076,
863
- "eval_samples_per_second": 21.517,
864
- "eval_steps_per_second": 0.673,
865
- "eval_wer": 1.0,
866
- "step": 2200
867
- },
868
- {
869
- "epoch": 1.19,
870
- "learning_rate": 0.0003201443748297467,
871
- "loss": 3.6727,
872
- "step": 2220
873
- },
874
- {
875
- "epoch": 1.2,
876
- "learning_rate": 0.00031626259874693543,
877
- "loss": 3.8424,
878
- "step": 2240
879
- },
880
- {
881
- "epoch": 1.21,
882
- "learning_rate": 0.00031217651865976574,
883
- "loss": 3.7594,
884
- "step": 2260
885
- },
886
- {
887
- "epoch": 1.22,
888
- "learning_rate": 0.00030829474257695454,
889
- "loss": 3.7862,
890
- "step": 2280
891
- },
892
- {
893
- "epoch": 1.23,
894
- "learning_rate": 0.0003042086624897848,
895
- "loss": 3.7472,
896
- "step": 2300
897
- },
898
- {
899
- "epoch": 1.23,
900
- "eval_loss": Infinity,
901
- "eval_runtime": 1236.6319,
902
- "eval_samples_per_second": 21.525,
903
- "eval_steps_per_second": 0.673,
904
- "eval_wer": 1.0,
905
- "step": 2300
906
- },
907
- {
908
- "epoch": 1.24,
909
- "learning_rate": 0.0003001225824026151,
910
- "loss": 3.6956,
911
- "step": 2320
912
- },
913
- {
914
- "epoch": 1.25,
915
- "learning_rate": 0.0002960365023154454,
916
- "loss": 3.7248,
917
- "step": 2340
918
- },
919
- {
920
- "epoch": 1.26,
921
- "learning_rate": 0.0002919504222282757,
922
- "loss": 3.7531,
923
- "step": 2360
924
- },
925
- {
926
- "epoch": 1.27,
927
- "learning_rate": 0.0002882729501498229,
928
- "loss": 3.9245,
929
- "step": 2380
930
- },
931
- {
932
- "epoch": 1.28,
933
- "learning_rate": 0.00028418687006265324,
934
- "loss": 3.7478,
935
- "step": 2400
936
- },
937
- {
938
- "epoch": 1.28,
939
- "eval_loss": Infinity,
940
- "eval_runtime": 1236.7725,
941
- "eval_samples_per_second": 21.522,
942
- "eval_steps_per_second": 0.673,
943
- "eval_wer": 1.0,
944
- "step": 2400
945
- },
946
- {
947
- "epoch": 1.29,
948
- "learning_rate": 0.00028010078997548355,
949
- "loss": 3.6826,
950
- "step": 2420
951
- },
952
- {
953
- "epoch": 1.3,
954
- "learning_rate": 0.0002760147098883138,
955
- "loss": 3.7236,
956
- "step": 2440
957
- },
958
- {
959
- "epoch": 1.31,
960
- "learning_rate": 0.00027192862980114413,
961
- "loss": 3.7609,
962
- "step": 2460
963
- },
964
- {
965
- "epoch": 1.32,
966
- "learning_rate": 0.00026804685371833287,
967
- "loss": 3.7846,
968
- "step": 2480
969
- },
970
- {
971
- "epoch": 1.33,
972
- "learning_rate": 0.0002639607736311632,
973
- "loss": 3.7496,
974
- "step": 2500
975
- },
976
- {
977
- "epoch": 1.33,
978
- "eval_loss": Infinity,
979
- "eval_runtime": 1233.5496,
980
- "eval_samples_per_second": 21.578,
981
- "eval_steps_per_second": 0.674,
982
- "eval_wer": 1.0,
983
- "step": 2500
984
- },
985
- {
986
- "epoch": 1.35,
987
- "learning_rate": 0.00025987469354399345,
988
- "loss": 3.6785,
989
- "step": 2520
990
- },
991
- {
992
- "epoch": 1.36,
993
- "learning_rate": 0.00025578861345682376,
994
- "loss": 3.7212,
995
- "step": 2540
996
- },
997
- {
998
- "epoch": 1.37,
999
- "learning_rate": 0.0002517025333696541,
1000
- "loss": 3.7637,
1001
- "step": 2560
1002
- },
1003
- {
1004
- "epoch": 1.38,
1005
- "learning_rate": 0.00024761645328248434,
1006
- "loss": 3.6513,
1007
- "step": 2580
1008
- },
1009
- {
1010
- "epoch": 1.39,
1011
- "learning_rate": 0.00024353037319531465,
1012
- "loss": 3.7513,
1013
- "step": 2600
1014
- },
1015
- {
1016
- "epoch": 1.39,
1017
- "eval_loss": Infinity,
1018
- "eval_runtime": 1231.8132,
1019
- "eval_samples_per_second": 21.609,
1020
- "eval_steps_per_second": 0.675,
1021
- "eval_wer": 1.0,
1022
- "step": 2600
1023
- },
1024
- {
1025
- "epoch": 1.4,
1026
- "learning_rate": 0.00023944429310814494,
1027
- "loss": 3.6818,
1028
- "step": 2620
1029
- },
1030
- {
1031
- "epoch": 1.41,
1032
- "learning_rate": 0.00023556251702533368,
1033
- "loss": 3.8417,
1034
- "step": 2640
1035
- },
1036
- {
1037
- "epoch": 1.42,
1038
- "learning_rate": 0.000231476436938164,
1039
- "loss": 3.7639,
1040
- "step": 2660
1041
- },
1042
- {
1043
- "epoch": 1.43,
1044
- "learning_rate": 0.00022759466085535277,
1045
- "loss": 3.7842,
1046
- "step": 2680
1047
- },
1048
- {
1049
- "epoch": 1.44,
1050
- "learning_rate": 0.00022350858076818309,
1051
- "loss": 3.7497,
1052
- "step": 2700
1053
- },
1054
- {
1055
- "epoch": 1.44,
1056
- "eval_loss": Infinity,
1057
- "eval_runtime": 1234.8437,
1058
- "eval_samples_per_second": 21.556,
1059
- "eval_steps_per_second": 0.674,
1060
- "eval_wer": 1.0,
1061
- "step": 2700
1062
- },
1063
- {
1064
- "epoch": 1.45,
1065
- "learning_rate": 0.00021942250068101335,
1066
- "loss": 3.6795,
1067
- "step": 2720
1068
- },
1069
- {
1070
- "epoch": 1.46,
1071
- "learning_rate": 0.00021554072459820212,
1072
- "loss": 3.8421,
1073
- "step": 2740
1074
- },
1075
- {
1076
- "epoch": 1.47,
1077
- "learning_rate": 0.00021145464451103243,
1078
- "loss": 3.7614,
1079
- "step": 2760
1080
- },
1081
- {
1082
- "epoch": 1.48,
1083
- "learning_rate": 0.0002075728684282212,
1084
- "loss": 3.9259,
1085
- "step": 2780
1086
- },
1087
- {
1088
- "epoch": 1.49,
1089
- "learning_rate": 0.00020348678834105147,
1090
- "loss": 3.7539,
1091
- "step": 2800
1092
- },
1093
- {
1094
- "epoch": 1.49,
1095
- "eval_loss": Infinity,
1096
- "eval_runtime": 1240.1103,
1097
- "eval_samples_per_second": 21.464,
1098
- "eval_steps_per_second": 0.671,
1099
- "eval_wer": 1.0,
1100
- "step": 2800
1101
- },
1102
- {
1103
- "epoch": 1.51,
1104
- "learning_rate": 0.00019940070825388178,
1105
- "loss": 3.6889,
1106
- "step": 2820
1107
- },
1108
- {
1109
- "epoch": 1.52,
1110
- "learning_rate": 0.00019551893217107055,
1111
- "loss": 3.843,
1112
- "step": 2840
1113
- },
1114
- {
1115
- "epoch": 1.53,
1116
- "learning_rate": 0.00019143285208390087,
1117
- "loss": 3.7666,
1118
- "step": 2860
1119
- },
1120
- {
1121
- "epoch": 1.54,
1122
- "learning_rate": 0.00018755107600108964,
1123
- "loss": 3.786,
1124
- "step": 2880
1125
- },
1126
- {
1127
- "epoch": 1.55,
1128
- "learning_rate": 0.0001834649959139199,
1129
- "loss": 3.7581,
1130
- "step": 2900
1131
- },
1132
- {
1133
- "epoch": 1.55,
1134
- "eval_loss": Infinity,
1135
- "eval_runtime": 1231.1699,
1136
- "eval_samples_per_second": 21.62,
1137
- "eval_steps_per_second": 0.676,
1138
- "eval_wer": 1.0,
1139
- "step": 2900
1140
- },
1141
- {
1142
- "epoch": 1.56,
1143
- "learning_rate": 0.00017937891582675021,
1144
- "loss": 3.672,
1145
- "step": 2920
1146
- },
1147
- {
1148
- "epoch": 1.57,
1149
- "learning_rate": 0.00017549713974393899,
1150
- "loss": 3.8539,
1151
- "step": 2940
1152
- },
1153
- {
1154
- "epoch": 1.58,
1155
- "learning_rate": 0.00017141105965676927,
1156
- "loss": 3.7652,
1157
- "step": 2960
1158
- },
1159
- {
1160
- "epoch": 1.59,
1161
- "learning_rate": 0.00016752928357395807,
1162
- "loss": 3.7825,
1163
- "step": 2980
1164
- },
1165
- {
1166
- "epoch": 1.6,
1167
- "learning_rate": 0.00016344320348678833,
1168
- "loss": 3.7572,
1169
- "step": 3000
1170
- },
1171
- {
1172
- "epoch": 1.6,
1173
- "eval_loss": Infinity,
1174
- "eval_runtime": 1241.8499,
1175
- "eval_samples_per_second": 21.434,
1176
- "eval_steps_per_second": 0.67,
1177
- "eval_wer": 1.0,
1178
- "step": 3000
1179
- },
1180
- {
1181
- "epoch": 1.61,
1182
- "learning_rate": 0.00015935712339961862,
1183
- "loss": 3.6906,
1184
- "step": 3020
1185
- },
1186
- {
1187
- "epoch": 1.62,
1188
- "learning_rate": 0.00015547534731680742,
1189
- "loss": 3.845,
1190
- "step": 3040
1191
- },
1192
- {
1193
- "epoch": 1.63,
1194
- "learning_rate": 0.0001513892672296377,
1195
- "loss": 3.763,
1196
- "step": 3060
1197
- },
1198
- {
1199
- "epoch": 1.64,
1200
- "learning_rate": 0.00014750749114682648,
1201
- "loss": 3.779,
1202
- "step": 3080
1203
- },
1204
- {
1205
- "epoch": 1.66,
1206
- "learning_rate": 0.0001434214110596568,
1207
- "loss": 3.7589,
1208
- "step": 3100
1209
- },
1210
- {
1211
- "epoch": 1.66,
1212
- "eval_loss": Infinity,
1213
- "eval_runtime": 1236.916,
1214
- "eval_samples_per_second": 21.52,
1215
- "eval_steps_per_second": 0.673,
1216
- "eval_wer": 1.0,
1217
- "step": 3100
1218
- },
1219
- {
1220
- "epoch": 1.67,
1221
- "learning_rate": 0.00013933533097248705,
1222
- "loss": 3.6948,
1223
- "step": 3120
1224
- },
1225
- {
1226
- "epoch": 1.68,
1227
- "learning_rate": 0.00013524925088531734,
1228
- "loss": 3.7211,
1229
- "step": 3140
1230
- },
1231
- {
1232
- "epoch": 1.69,
1233
- "learning_rate": 0.00013116317079814763,
1234
- "loss": 3.7588,
1235
- "step": 3160
1236
- },
1237
- {
1238
- "epoch": 1.7,
1239
- "learning_rate": 0.00012728139471533643,
1240
- "loss": 3.7898,
1241
- "step": 3180
1242
- },
1243
- {
1244
- "epoch": 1.71,
1245
- "learning_rate": 0.00012319531462816672,
1246
- "loss": 3.7592,
1247
- "step": 3200
1248
- },
1249
- {
1250
- "epoch": 1.71,
1251
- "eval_loss": Infinity,
1252
- "eval_runtime": 1232.1567,
1253
- "eval_samples_per_second": 21.603,
1254
- "eval_steps_per_second": 0.675,
1255
- "eval_wer": 1.0,
1256
- "step": 3200
1257
- },
1258
- {
1259
- "epoch": 1.72,
1260
- "learning_rate": 0.000119109234540997,
1261
- "loss": 3.688,
1262
- "step": 3220
1263
- },
1264
- {
1265
- "epoch": 1.73,
1266
- "learning_rate": 0.00011522745845818579,
1267
- "loss": 3.8419,
1268
- "step": 3240
1269
- },
1270
- {
1271
- "epoch": 1.74,
1272
- "learning_rate": 0.00011114137837101608,
1273
- "loss": 3.7578,
1274
- "step": 3260
1275
- },
1276
- {
1277
- "epoch": 1.75,
1278
- "learning_rate": 0.00010705529828384636,
1279
- "loss": 3.6534,
1280
- "step": 3280
1281
- },
1282
- {
1283
- "epoch": 1.76,
1284
- "learning_rate": 0.00010296921819667667,
1285
- "loss": 3.7531,
1286
- "step": 3300
1287
- },
1288
- {
1289
- "epoch": 1.76,
1290
- "eval_loss": Infinity,
1291
- "eval_runtime": 1237.4955,
1292
- "eval_samples_per_second": 21.51,
1293
- "eval_steps_per_second": 0.672,
1294
- "eval_wer": 1.0,
1295
- "step": 3300
1296
- },
1297
- {
1298
- "epoch": 1.77,
1299
- "learning_rate": 9.888313810950695e-05,
1300
- "loss": 3.691,
1301
- "step": 3320
1302
- },
1303
- {
1304
- "epoch": 1.78,
1305
- "learning_rate": 9.500136202669572e-05,
1306
- "loss": 3.8424,
1307
- "step": 3340
1308
- },
1309
- {
1310
- "epoch": 1.79,
1311
- "learning_rate": 9.091528193952601e-05,
1312
- "loss": 3.7698,
1313
- "step": 3360
1314
- },
1315
- {
1316
- "epoch": 1.8,
1317
- "learning_rate": 8.70335058567148e-05,
1318
- "loss": 3.7836,
1319
- "step": 3380
1320
- },
1321
- {
1322
- "epoch": 1.82,
1323
- "learning_rate": 8.294742576954509e-05,
1324
- "loss": 3.7567,
1325
- "step": 3400
1326
- },
1327
- {
1328
- "epoch": 1.82,
1329
- "eval_loss": Infinity,
1330
- "eval_runtime": 1242.7888,
1331
- "eval_samples_per_second": 21.418,
1332
- "eval_steps_per_second": 0.669,
1333
- "eval_wer": 1.0,
1334
- "step": 3400
1335
- },
1336
- {
1337
- "epoch": 1.83,
1338
- "learning_rate": 7.886134568237537e-05,
1339
- "loss": 3.6741,
1340
- "step": 3420
1341
- },
1342
- {
1343
- "epoch": 1.84,
1344
- "learning_rate": 7.497956959956416e-05,
1345
- "loss": 3.8442,
1346
- "step": 3440
1347
- },
1348
- {
1349
- "epoch": 1.85,
1350
- "learning_rate": 7.089348951239445e-05,
1351
- "loss": 3.7663,
1352
- "step": 3460
1353
- },
1354
- {
1355
- "epoch": 1.86,
1356
- "learning_rate": 6.701171342958322e-05,
1357
- "loss": 3.7789,
1358
- "step": 3480
1359
- },
1360
- {
1361
- "epoch": 1.87,
1362
- "learning_rate": 6.292563334241352e-05,
1363
- "loss": 3.7613,
1364
- "step": 3500
1365
- },
1366
- {
1367
- "epoch": 1.87,
1368
- "eval_loss": Infinity,
1369
- "eval_runtime": 1244.1733,
1370
- "eval_samples_per_second": 21.394,
1371
- "eval_steps_per_second": 0.669,
1372
- "eval_wer": 1.0,
1373
- "step": 3500
1374
- },
1375
- {
1376
- "epoch": 1.88,
1377
- "learning_rate": 5.88395532552438e-05,
1378
- "loss": 3.6884,
1379
- "step": 3520
1380
- },
1381
- {
1382
- "epoch": 1.89,
1383
- "learning_rate": 5.4957777172432585e-05,
1384
- "loss": 3.8268,
1385
- "step": 3540
1386
- },
1387
- {
1388
- "epoch": 1.9,
1389
- "learning_rate": 5.0871697085262866e-05,
1390
- "loss": 3.7517,
1391
- "step": 3560
1392
- },
1393
- {
1394
- "epoch": 1.91,
1395
- "learning_rate": 4.6989921002451645e-05,
1396
- "loss": 3.7802,
1397
- "step": 3580
1398
- },
1399
- {
1400
- "epoch": 1.92,
1401
- "learning_rate": 4.290384091528194e-05,
1402
- "loss": 3.7516,
1403
- "step": 3600
1404
- },
1405
- {
1406
- "epoch": 1.92,
1407
- "eval_loss": Infinity,
1408
- "eval_runtime": 1253.9236,
1409
- "eval_samples_per_second": 21.228,
1410
- "eval_steps_per_second": 0.664,
1411
- "eval_wer": 1.0,
1412
- "step": 3600
1413
- },
1414
- {
1415
- "epoch": 1.93,
1416
- "learning_rate": 3.8817760828112234e-05,
1417
- "loss": 3.6719,
1418
- "step": 3620
1419
- },
1420
- {
1421
- "epoch": 1.94,
1422
- "learning_rate": 3.4935984745301006e-05,
1423
- "loss": 3.8471,
1424
- "step": 3640
1425
- },
1426
- {
1427
- "epoch": 1.95,
1428
- "learning_rate": 3.08499046581313e-05,
1429
- "loss": 3.7568,
1430
- "step": 3660
1431
- },
1432
- {
1433
- "epoch": 1.96,
1434
- "learning_rate": 2.6968128575320075e-05,
1435
- "loss": 3.7887,
1436
- "step": 3680
1437
- },
1438
- {
1439
- "epoch": 1.98,
1440
- "learning_rate": 2.2882048488150366e-05,
1441
- "loss": 3.7581,
1442
- "step": 3700
1443
- },
1444
- {
1445
- "epoch": 1.98,
1446
- "eval_loss": Infinity,
1447
- "eval_runtime": 1242.0033,
1448
- "eval_samples_per_second": 21.432,
1449
- "eval_steps_per_second": 0.67,
1450
- "eval_wer": 1.0,
1451
- "step": 3700
1452
- }
1453
- ],
1454
- "max_steps": 3746,
1455
- "num_train_epochs": 2,
1456
- "total_flos": 7.120999649529697e+19,
1457
- "trial_name": null,
1458
- "trial_params": null
1459
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3abb902c24b9b514e6979e814143734f3ac477116b25bc616e937ac37aa386b6
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:434bb79de859e935ca1aaf8411c1439a4e24ef8e2396b088b07525e56e0847d8
3
  size 1262063089