Training in progress, step 31000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 223144592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:869a0e57d1d1ee9a2fb1d0c01f9524bb4c80a900cd8117beeba130d80f1bf321
|
| 3 |
size 223144592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 281574266
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb63c0ce7275a1c9ee86300c27e661af044d42bb26561a6ccd9d69c137ff500d
|
| 3 |
size 281574266
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dc6f8b922ca5c3d24f7537f685a761eded3c26ca293345adbe507125769101a
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3931a748c5d903224857a1ab9abba558951fac8f6dc32f736599e211fe96e5ce
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:def63641a6ca9901171b06d19cec2f06e49d8d9307e1c178a24fd97f04041d2a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 28000,
|
| 3 |
"best_metric": 0.18110816386678455,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2378,6 +2378,85 @@
|
|
| 2378 |
"eval_steps_per_second": 0.428,
|
| 2379 |
"eval_wer": 0.18670792808723843,
|
| 2380 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2381 |
}
|
| 2382 |
],
|
| 2383 |
"logging_steps": 100,
|
|
@@ -2397,7 +2476,7 @@
|
|
| 2397 |
"attributes": {}
|
| 2398 |
}
|
| 2399 |
},
|
| 2400 |
-
"total_flos":
|
| 2401 |
"train_batch_size": 8,
|
| 2402 |
"trial_name": null,
|
| 2403 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 28000,
|
| 3 |
"best_metric": 0.18110816386678455,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
|
| 5 |
+
"epoch": 18.16051552431166,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 31000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2378 |
"eval_steps_per_second": 0.428,
|
| 2379 |
"eval_wer": 0.18670792808723843,
|
| 2380 |
"step": 30000
|
| 2381 |
+
},
|
| 2382 |
+
{
|
| 2383 |
+
"epoch": 17.63327475102519,
|
| 2384 |
+
"grad_norm": 6.075071334838867,
|
| 2385 |
+
"learning_rate": 2.0157068062827225e-05,
|
| 2386 |
+
"loss": 0.7471,
|
| 2387 |
+
"step": 30100
|
| 2388 |
+
},
|
| 2389 |
+
{
|
| 2390 |
+
"epoch": 17.691857059168132,
|
| 2391 |
+
"grad_norm": 6.894543647766113,
|
| 2392 |
+
"learning_rate": 1.9887191666216872e-05,
|
| 2393 |
+
"loss": 0.7926,
|
| 2394 |
+
"step": 30200
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"epoch": 17.75043936731107,
|
| 2398 |
+
"grad_norm": 11.549782752990723,
|
| 2399 |
+
"learning_rate": 1.9617315269606522e-05,
|
| 2400 |
+
"loss": 0.7308,
|
| 2401 |
+
"step": 30300
|
| 2402 |
+
},
|
| 2403 |
+
{
|
| 2404 |
+
"epoch": 17.809021675454012,
|
| 2405 |
+
"grad_norm": 7.361614227294922,
|
| 2406 |
+
"learning_rate": 1.934743887299617e-05,
|
| 2407 |
+
"loss": 0.7545,
|
| 2408 |
+
"step": 30400
|
| 2409 |
+
},
|
| 2410 |
+
{
|
| 2411 |
+
"epoch": 17.867603983596954,
|
| 2412 |
+
"grad_norm": 12.995938301086426,
|
| 2413 |
+
"learning_rate": 1.9077562476385815e-05,
|
| 2414 |
+
"loss": 0.7809,
|
| 2415 |
+
"step": 30500
|
| 2416 |
+
},
|
| 2417 |
+
{
|
| 2418 |
+
"epoch": 17.926186291739896,
|
| 2419 |
+
"grad_norm": 4.978572368621826,
|
| 2420 |
+
"learning_rate": 1.8807686079775465e-05,
|
| 2421 |
+
"loss": 0.7587,
|
| 2422 |
+
"step": 30600
|
| 2423 |
+
},
|
| 2424 |
+
{
|
| 2425 |
+
"epoch": 17.984768599882834,
|
| 2426 |
+
"grad_norm": 6.543401718139648,
|
| 2427 |
+
"learning_rate": 1.853780968316511e-05,
|
| 2428 |
+
"loss": 0.7432,
|
| 2429 |
+
"step": 30700
|
| 2430 |
+
},
|
| 2431 |
+
{
|
| 2432 |
+
"epoch": 18.043350908025776,
|
| 2433 |
+
"grad_norm": 8.726702690124512,
|
| 2434 |
+
"learning_rate": 1.8267933286554758e-05,
|
| 2435 |
+
"loss": 0.7305,
|
| 2436 |
+
"step": 30800
|
| 2437 |
+
},
|
| 2438 |
+
{
|
| 2439 |
+
"epoch": 18.101933216168717,
|
| 2440 |
+
"grad_norm": 6.32004976272583,
|
| 2441 |
+
"learning_rate": 1.7998056889944405e-05,
|
| 2442 |
+
"loss": 0.7454,
|
| 2443 |
+
"step": 30900
|
| 2444 |
+
},
|
| 2445 |
+
{
|
| 2446 |
+
"epoch": 18.16051552431166,
|
| 2447 |
+
"grad_norm": 5.896217346191406,
|
| 2448 |
+
"learning_rate": 1.7728180493334055e-05,
|
| 2449 |
+
"loss": 0.7166,
|
| 2450 |
+
"step": 31000
|
| 2451 |
+
},
|
| 2452 |
+
{
|
| 2453 |
+
"epoch": 18.16051552431166,
|
| 2454 |
+
"eval_loss": 0.0838567316532135,
|
| 2455 |
+
"eval_runtime": 147.3922,
|
| 2456 |
+
"eval_samples_per_second": 3.392,
|
| 2457 |
+
"eval_steps_per_second": 0.427,
|
| 2458 |
+
"eval_wer": 0.1905393457117595,
|
| 2459 |
+
"step": 31000
|
| 2460 |
}
|
| 2461 |
],
|
| 2462 |
"logging_steps": 100,
|
|
|
|
| 2476 |
"attributes": {}
|
| 2477 |
}
|
| 2478 |
},
|
| 2479 |
+
"total_flos": 4.032253787111424e+19,
|
| 2480 |
"train_batch_size": 8,
|
| 2481 |
"trial_name": null,
|
| 2482 |
"trial_params": null
|