marinone94 commited on
Commit
3bca5eb
β€’
1 Parent(s): c756cf3

Training in progress, step 900

Browse files
{checkpoint-600 β†’ checkpoint-900}/config.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-900}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94e544ed6fc2d04d9e3d9655de3eea506999b2814daab419e0d050c27a4a370f
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586c6f70f453bacaef539f6082450d1d6345b28afac567b5164059be9e21c0aa
3
  size 2490337809
{checkpoint-600 β†’ checkpoint-900}/preprocessor_config.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-900}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3baffe7a116d58666fc0726fb4fa6188834614eac15e96843270fc53529b943e
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704e6898ee156f5080bd3c12729eab90778262ef46c752641e041495f274ba56
3
  size 1262063089
{checkpoint-600 β†’ checkpoint-900}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58a88b760d3a9745a43789af262417186d48a5e0a471e8a2a080c8b3681cf9a
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb48c4b47c267134052f2e43cc544a219534620acef82e17a3e9be9f1684520
3
+ size 14503
{checkpoint-600 β†’ checkpoint-900}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9
3
  size 559
{checkpoint-600 β†’ checkpoint-900}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6b4eb516015852d76c10d5b0c139cec1e488aa3b53e560bc99f9c2300e33423
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30116330e344473afc42b675f3adc8ae4b90035dba726f931014800aa6aa5856
3
  size 623
{checkpoint-600 β†’ checkpoint-900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.590163934426229,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -240,11 +240,128 @@
240
  "eval_steps_per_second": 0.785,
241
  "eval_wer": 1.0,
242
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  }
244
  ],
245
  "max_steps": 4550,
246
  "num_train_epochs": 50,
247
- "total_flos": 9.32024224555606e+18,
248
  "trial_name": null,
249
  "trial_params": null
250
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.885245901639344,
5
+ "global_step": 900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
240
  "eval_steps_per_second": 0.785,
241
  "eval_wer": 1.0,
242
  "step": 600
243
+ },
244
+ {
245
+ "epoch": 6.81,
246
+ "learning_rate": 7.75e-05,
247
+ "loss": 2.9921,
248
+ "step": 620
249
+ },
250
+ {
251
+ "epoch": 7.03,
252
+ "learning_rate": 8e-05,
253
+ "loss": 3.0665,
254
+ "step": 640
255
+ },
256
+ {
257
+ "epoch": 7.25,
258
+ "learning_rate": 8.25e-05,
259
+ "loss": 2.9826,
260
+ "step": 660
261
+ },
262
+ {
263
+ "epoch": 7.47,
264
+ "learning_rate": 8.5e-05,
265
+ "loss": 2.9689,
266
+ "step": 680
267
+ },
268
+ {
269
+ "epoch": 7.69,
270
+ "learning_rate": 8.75e-05,
271
+ "loss": 2.9602,
272
+ "step": 700
273
+ },
274
+ {
275
+ "epoch": 7.69,
276
+ "eval_loss": 2.9620397090911865,
277
+ "eval_runtime": 193.5851,
278
+ "eval_samples_per_second": 25.017,
279
+ "eval_steps_per_second": 0.785,
280
+ "eval_wer": 1.0,
281
+ "step": 700
282
+ },
283
+ {
284
+ "epoch": 7.91,
285
+ "learning_rate": 8.999999999999999e-05,
286
+ "loss": 2.9639,
287
+ "step": 720
288
+ },
289
+ {
290
+ "epoch": 8.13,
291
+ "learning_rate": 9.25e-05,
292
+ "loss": 3.0215,
293
+ "step": 740
294
+ },
295
+ {
296
+ "epoch": 8.35,
297
+ "learning_rate": 9.5e-05,
298
+ "loss": 2.9454,
299
+ "step": 760
300
+ },
301
+ {
302
+ "epoch": 8.57,
303
+ "learning_rate": 9.750000000000001e-05,
304
+ "loss": 2.9239,
305
+ "step": 780
306
+ },
307
+ {
308
+ "epoch": 8.79,
309
+ "learning_rate": 0.0001,
310
+ "loss": 2.8756,
311
+ "step": 800
312
+ },
313
+ {
314
+ "epoch": 8.79,
315
+ "eval_loss": 2.7302000522613525,
316
+ "eval_runtime": 191.8065,
317
+ "eval_samples_per_second": 25.249,
318
+ "eval_steps_per_second": 0.792,
319
+ "eval_wer": 1.0,
320
+ "step": 800
321
+ },
322
+ {
323
+ "epoch": 9.01,
324
+ "learning_rate": 0.0001025,
325
+ "loss": 2.8933,
326
+ "step": 820
327
+ },
328
+ {
329
+ "epoch": 9.23,
330
+ "learning_rate": 0.000105,
331
+ "loss": 2.7318,
332
+ "step": 840
333
+ },
334
+ {
335
+ "epoch": 9.45,
336
+ "learning_rate": 0.0001075,
337
+ "loss": 2.5941,
338
+ "step": 860
339
+ },
340
+ {
341
+ "epoch": 9.67,
342
+ "learning_rate": 0.00011,
343
+ "loss": 2.4441,
344
+ "step": 880
345
+ },
346
+ {
347
+ "epoch": 9.89,
348
+ "learning_rate": 0.00011250000000000001,
349
+ "loss": 2.2931,
350
+ "step": 900
351
+ },
352
+ {
353
+ "epoch": 9.89,
354
+ "eval_loss": 1.5057899951934814,
355
+ "eval_runtime": 196.4368,
356
+ "eval_samples_per_second": 24.654,
357
+ "eval_steps_per_second": 0.774,
358
+ "eval_wer": 0.9775759296054499,
359
+ "step": 900
360
  }
361
  ],
362
  "max_steps": 4550,
363
  "num_train_epochs": 50,
364
+ "total_flos": 1.3944116272848585e+19,
365
  "trial_name": null,
366
  "trial_params": null
367
  }
{checkpoint-600 β†’ checkpoint-900}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f2151485570eebb9c2c7ead79b5dbb947c911745136ec08aec4531150fc59d
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704e6898ee156f5080bd3c12729eab90778262ef46c752641e041495f274ba56
3
  size 1262063089