Baselhany commited on
Commit
0481aaf
·
verified ·
1 Parent(s): 75ce36a

Training in progress, step 31000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aac9fb703442293b19e405bf4164788bf426a64df5cc82596cf5e220aaa680b7
3
  size 223144592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869a0e57d1d1ee9a2fb1d0c01f9524bb4c80a900cd8117beeba130d80f1bf321
3
  size 223144592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d2a33825f6a5fff2b41693fbcc0f91ce6163ef73cdbd1c03e6554d0b82c5f42
3
  size 281574266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb63c0ce7275a1c9ee86300c27e661af044d42bb26561a6ccd9d69c137ff500d
3
  size 281574266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b5cc2e059bf5994720338ae5c6e6dec69e298afa0568c75f8adaffc4768726d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dc6f8b922ca5c3d24f7537f685a761eded3c26ca293345adbe507125769101a
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6d46aec3833efc2c92b1486514727c87c4f32a04aeafab142f4a8c156f76f8d
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3931a748c5d903224857a1ab9abba558951fac8f6dc32f736599e211fe96e5ce
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ba53b40b2a30a25e436425c9258142131e66d9a345f8a16e78444d8fcb29696
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def63641a6ca9901171b06d19cec2f06e49d8d9307e1c178a24fd97f04041d2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
- "epoch": 17.57469244288225,
6
  "eval_steps": 1000,
7
- "global_step": 30000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2378,6 +2378,85 @@
2378
  "eval_steps_per_second": 0.428,
2379
  "eval_wer": 0.18670792808723843,
2380
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2381
  }
2382
  ],
2383
  "logging_steps": 100,
@@ -2397,7 +2476,7 @@
2397
  "attributes": {}
2398
  }
2399
  },
2400
- "total_flos": 3.902187903123456e+19,
2401
  "train_batch_size": 8,
2402
  "trial_name": null,
2403
  "trial_params": null
 
2
  "best_global_step": 28000,
3
  "best_metric": 0.18110816386678455,
4
  "best_model_checkpoint": "./distil-whisper/checkpoint-28000",
5
+ "epoch": 18.16051552431166,
6
  "eval_steps": 1000,
7
+ "global_step": 31000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2378
  "eval_steps_per_second": 0.428,
2379
  "eval_wer": 0.18670792808723843,
2380
  "step": 30000
2381
+ },
2382
+ {
2383
+ "epoch": 17.63327475102519,
2384
+ "grad_norm": 6.075071334838867,
2385
+ "learning_rate": 2.0157068062827225e-05,
2386
+ "loss": 0.7471,
2387
+ "step": 30100
2388
+ },
2389
+ {
2390
+ "epoch": 17.691857059168132,
2391
+ "grad_norm": 6.894543647766113,
2392
+ "learning_rate": 1.9887191666216872e-05,
2393
+ "loss": 0.7926,
2394
+ "step": 30200
2395
+ },
2396
+ {
2397
+ "epoch": 17.75043936731107,
2398
+ "grad_norm": 11.549782752990723,
2399
+ "learning_rate": 1.9617315269606522e-05,
2400
+ "loss": 0.7308,
2401
+ "step": 30300
2402
+ },
2403
+ {
2404
+ "epoch": 17.809021675454012,
2405
+ "grad_norm": 7.361614227294922,
2406
+ "learning_rate": 1.934743887299617e-05,
2407
+ "loss": 0.7545,
2408
+ "step": 30400
2409
+ },
2410
+ {
2411
+ "epoch": 17.867603983596954,
2412
+ "grad_norm": 12.995938301086426,
2413
+ "learning_rate": 1.9077562476385815e-05,
2414
+ "loss": 0.7809,
2415
+ "step": 30500
2416
+ },
2417
+ {
2418
+ "epoch": 17.926186291739896,
2419
+ "grad_norm": 4.978572368621826,
2420
+ "learning_rate": 1.8807686079775465e-05,
2421
+ "loss": 0.7587,
2422
+ "step": 30600
2423
+ },
2424
+ {
2425
+ "epoch": 17.984768599882834,
2426
+ "grad_norm": 6.543401718139648,
2427
+ "learning_rate": 1.853780968316511e-05,
2428
+ "loss": 0.7432,
2429
+ "step": 30700
2430
+ },
2431
+ {
2432
+ "epoch": 18.043350908025776,
2433
+ "grad_norm": 8.726702690124512,
2434
+ "learning_rate": 1.8267933286554758e-05,
2435
+ "loss": 0.7305,
2436
+ "step": 30800
2437
+ },
2438
+ {
2439
+ "epoch": 18.101933216168717,
2440
+ "grad_norm": 6.32004976272583,
2441
+ "learning_rate": 1.7998056889944405e-05,
2442
+ "loss": 0.7454,
2443
+ "step": 30900
2444
+ },
2445
+ {
2446
+ "epoch": 18.16051552431166,
2447
+ "grad_norm": 5.896217346191406,
2448
+ "learning_rate": 1.7728180493334055e-05,
2449
+ "loss": 0.7166,
2450
+ "step": 31000
2451
+ },
2452
+ {
2453
+ "epoch": 18.16051552431166,
2454
+ "eval_loss": 0.0838567316532135,
2455
+ "eval_runtime": 147.3922,
2456
+ "eval_samples_per_second": 3.392,
2457
+ "eval_steps_per_second": 0.427,
2458
+ "eval_wer": 0.1905393457117595,
2459
+ "step": 31000
2460
  }
2461
  ],
2462
  "logging_steps": 100,
 
2476
  "attributes": {}
2477
  }
2478
  },
2479
+ "total_flos": 4.032253787111424e+19,
2480
  "train_batch_size": 8,
2481
  "trial_name": null,
2482
  "trial_params": null