"auto-commit"
Browse files- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/rng_state.pth +1 -1
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/trainer_state.json +793 -4
- model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/training_args.bin +0 -0
- model-bin/finetune/base/log/1629973389.0556126/events.out.tfevents.1629973389.8e89bd551565.924.151 +3 -0
- model-bin/finetune/base/log/1629973815.1045587/events.out.tfevents.1629973815.8e89bd551565.924.153 +3 -0
- model-bin/finetune/base/log/1629974318.6384456/events.out.tfevents.1629974318.8e89bd551565.924.155 +3 -0
- model-bin/finetune/base/log/1629974780.9713771/events.out.tfevents.1629974782.8e89bd551565.924.157 +3 -0
- model-bin/finetune/base/log/1629975202.6585348/events.out.tfevents.1629975203.8e89bd551565.924.159 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629973389.8e89bd551565.924.150 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629973815.8e89bd551565.924.152 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629974318.8e89bd551565.924.154 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629974780.8e89bd551565.924.156 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629975202.8e89bd551565.924.158 +3 -0
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:311515da0033dfdb51c4a90c3caf18fdf19d2bdcc686ed9c3b39f515e97a2e4b
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:baaf5142592aab8967db3f95da57443f80660950cce603545ec60fb40d3c2957
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6078434755d07c4f90438c69fc4a5189fa9888ee9334b3f81fbca291ce56e36
|
| 3 |
size 14503
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56d5993def0ea05772fc1c508da3bdf2dfdaf6741594c58e6daab97bc2def632
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cb14f596e156bdf01a59bba3c7cfbd593bd897fec3c9caf24e6667093a6e1d2
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.17565318086415285,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-94333",
|
| 4 |
-
"epoch": 786.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -223665,11 +223665,800 @@
|
|
| 223665 |
"eval_steps_per_second": 0.693,
|
| 223666 |
"eval_wer": 0.1871849199970775,
|
| 223667 |
"step": 97696
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223668 |
}
|
| 223669 |
],
|
| 223670 |
-
"max_steps":
|
| 223671 |
"num_train_epochs": 5000,
|
| 223672 |
-
"total_flos": 2.
|
| 223673 |
"trial_name": null,
|
| 223674 |
"trial_params": null
|
| 223675 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.17565318086415285,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-94333",
|
| 4 |
+
"epoch": 786.0,
|
| 5 |
+
"global_step": 98319,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 223665 |
"eval_steps_per_second": 0.693,
|
| 223666 |
"eval_wer": 0.1871849199970775,
|
| 223667 |
"step": 97696
|
| 223668 |
+
},
|
| 223669 |
+
{
|
| 223670 |
+
"epoch": 781.03,
|
| 223671 |
+
"learning_rate": 8.450945512820513e-06,
|
| 223672 |
+
"loss": 0.2919,
|
| 223673 |
+
"step": 97700
|
| 223674 |
+
},
|
| 223675 |
+
{
|
| 223676 |
+
"epoch": 781.07,
|
| 223677 |
+
"learning_rate": 8.450865384615386e-06,
|
| 223678 |
+
"loss": 0.2605,
|
| 223679 |
+
"step": 97705
|
| 223680 |
+
},
|
| 223681 |
+
{
|
| 223682 |
+
"epoch": 781.11,
|
| 223683 |
+
"learning_rate": 8.450785256410257e-06,
|
| 223684 |
+
"loss": 0.3836,
|
| 223685 |
+
"step": 97710
|
| 223686 |
+
},
|
| 223687 |
+
{
|
| 223688 |
+
"epoch": 781.15,
|
| 223689 |
+
"learning_rate": 8.450705128205129e-06,
|
| 223690 |
+
"loss": 0.4065,
|
| 223691 |
+
"step": 97715
|
| 223692 |
+
},
|
| 223693 |
+
{
|
| 223694 |
+
"epoch": 781.19,
|
| 223695 |
+
"learning_rate": 8.450625000000002e-06,
|
| 223696 |
+
"loss": 0.9074,
|
| 223697 |
+
"step": 97720
|
| 223698 |
+
},
|
| 223699 |
+
{
|
| 223700 |
+
"epoch": 781.23,
|
| 223701 |
+
"learning_rate": 8.450544871794873e-06,
|
| 223702 |
+
"loss": 0.5634,
|
| 223703 |
+
"step": 97725
|
| 223704 |
+
},
|
| 223705 |
+
{
|
| 223706 |
+
"epoch": 781.27,
|
| 223707 |
+
"learning_rate": 8.450464743589744e-06,
|
| 223708 |
+
"loss": 0.2907,
|
| 223709 |
+
"step": 97730
|
| 223710 |
+
},
|
| 223711 |
+
{
|
| 223712 |
+
"epoch": 781.31,
|
| 223713 |
+
"learning_rate": 8.450384615384616e-06,
|
| 223714 |
+
"loss": 0.3008,
|
| 223715 |
+
"step": 97735
|
| 223716 |
+
},
|
| 223717 |
+
{
|
| 223718 |
+
"epoch": 781.35,
|
| 223719 |
+
"learning_rate": 8.450304487179489e-06,
|
| 223720 |
+
"loss": 0.4055,
|
| 223721 |
+
"step": 97740
|
| 223722 |
+
},
|
| 223723 |
+
{
|
| 223724 |
+
"epoch": 781.39,
|
| 223725 |
+
"learning_rate": 8.450224358974359e-06,
|
| 223726 |
+
"loss": 0.9335,
|
| 223727 |
+
"step": 97745
|
| 223728 |
+
},
|
| 223729 |
+
{
|
| 223730 |
+
"epoch": 781.43,
|
| 223731 |
+
"learning_rate": 8.450144230769232e-06,
|
| 223732 |
+
"loss": 0.6154,
|
| 223733 |
+
"step": 97750
|
| 223734 |
+
},
|
| 223735 |
+
{
|
| 223736 |
+
"epoch": 781.47,
|
| 223737 |
+
"learning_rate": 8.450064102564103e-06,
|
| 223738 |
+
"loss": 0.3004,
|
| 223739 |
+
"step": 97755
|
| 223740 |
+
},
|
| 223741 |
+
{
|
| 223742 |
+
"epoch": 781.51,
|
| 223743 |
+
"learning_rate": 8.449983974358974e-06,
|
| 223744 |
+
"loss": 0.2874,
|
| 223745 |
+
"step": 97760
|
| 223746 |
+
},
|
| 223747 |
+
{
|
| 223748 |
+
"epoch": 781.55,
|
| 223749 |
+
"learning_rate": 8.449903846153846e-06,
|
| 223750 |
+
"loss": 0.4046,
|
| 223751 |
+
"step": 97765
|
| 223752 |
+
},
|
| 223753 |
+
{
|
| 223754 |
+
"epoch": 781.59,
|
| 223755 |
+
"learning_rate": 8.449823717948719e-06,
|
| 223756 |
+
"loss": 0.928,
|
| 223757 |
+
"step": 97770
|
| 223758 |
+
},
|
| 223759 |
+
{
|
| 223760 |
+
"epoch": 781.63,
|
| 223761 |
+
"learning_rate": 8.44974358974359e-06,
|
| 223762 |
+
"loss": 0.5812,
|
| 223763 |
+
"step": 97775
|
| 223764 |
+
},
|
| 223765 |
+
{
|
| 223766 |
+
"epoch": 781.67,
|
| 223767 |
+
"learning_rate": 8.449663461538461e-06,
|
| 223768 |
+
"loss": 0.3869,
|
| 223769 |
+
"step": 97780
|
| 223770 |
+
},
|
| 223771 |
+
{
|
| 223772 |
+
"epoch": 781.71,
|
| 223773 |
+
"learning_rate": 8.449583333333335e-06,
|
| 223774 |
+
"loss": 0.3412,
|
| 223775 |
+
"step": 97785
|
| 223776 |
+
},
|
| 223777 |
+
{
|
| 223778 |
+
"epoch": 781.75,
|
| 223779 |
+
"learning_rate": 8.449503205128206e-06,
|
| 223780 |
+
"loss": 0.4655,
|
| 223781 |
+
"step": 97790
|
| 223782 |
+
},
|
| 223783 |
+
{
|
| 223784 |
+
"epoch": 781.79,
|
| 223785 |
+
"learning_rate": 8.449423076923077e-06,
|
| 223786 |
+
"loss": 0.8813,
|
| 223787 |
+
"step": 97795
|
| 223788 |
+
},
|
| 223789 |
+
{
|
| 223790 |
+
"epoch": 781.83,
|
| 223791 |
+
"learning_rate": 8.449342948717949e-06,
|
| 223792 |
+
"loss": 0.6633,
|
| 223793 |
+
"step": 97800
|
| 223794 |
+
},
|
| 223795 |
+
{
|
| 223796 |
+
"epoch": 781.87,
|
| 223797 |
+
"learning_rate": 8.449262820512822e-06,
|
| 223798 |
+
"loss": 0.2537,
|
| 223799 |
+
"step": 97805
|
| 223800 |
+
},
|
| 223801 |
+
{
|
| 223802 |
+
"epoch": 781.91,
|
| 223803 |
+
"learning_rate": 8.449182692307693e-06,
|
| 223804 |
+
"loss": 0.3987,
|
| 223805 |
+
"step": 97810
|
| 223806 |
+
},
|
| 223807 |
+
{
|
| 223808 |
+
"epoch": 781.95,
|
| 223809 |
+
"learning_rate": 8.449102564102564e-06,
|
| 223810 |
+
"loss": 0.5047,
|
| 223811 |
+
"step": 97815
|
| 223812 |
+
},
|
| 223813 |
+
{
|
| 223814 |
+
"epoch": 781.99,
|
| 223815 |
+
"learning_rate": 8.449022435897437e-06,
|
| 223816 |
+
"loss": 0.8551,
|
| 223817 |
+
"step": 97820
|
| 223818 |
+
},
|
| 223819 |
+
{
|
| 223820 |
+
"epoch": 782.0,
|
| 223821 |
+
"eval_loss": 0.4274960458278656,
|
| 223822 |
+
"eval_runtime": 38.5501,
|
| 223823 |
+
"eval_samples_per_second": 21.79,
|
| 223824 |
+
"eval_steps_per_second": 0.7,
|
| 223825 |
+
"eval_wer": 0.19067860938211628,
|
| 223826 |
+
"step": 97821
|
| 223827 |
+
},
|
| 223828 |
+
{
|
| 223829 |
+
"epoch": 788.03,
|
| 223830 |
+
"learning_rate": 8.448942307692309e-06,
|
| 223831 |
+
"loss": 0.305,
|
| 223832 |
+
"step": 97825
|
| 223833 |
+
},
|
| 223834 |
+
{
|
| 223835 |
+
"epoch": 788.07,
|
| 223836 |
+
"learning_rate": 8.44886217948718e-06,
|
| 223837 |
+
"loss": 0.2805,
|
| 223838 |
+
"step": 97830
|
| 223839 |
+
},
|
| 223840 |
+
{
|
| 223841 |
+
"epoch": 788.11,
|
| 223842 |
+
"learning_rate": 8.448782051282051e-06,
|
| 223843 |
+
"loss": 0.3658,
|
| 223844 |
+
"step": 97835
|
| 223845 |
+
},
|
| 223846 |
+
{
|
| 223847 |
+
"epoch": 788.15,
|
| 223848 |
+
"learning_rate": 8.448701923076925e-06,
|
| 223849 |
+
"loss": 0.3714,
|
| 223850 |
+
"step": 97840
|
| 223851 |
+
},
|
| 223852 |
+
{
|
| 223853 |
+
"epoch": 788.19,
|
| 223854 |
+
"learning_rate": 8.448621794871796e-06,
|
| 223855 |
+
"loss": 1.0718,
|
| 223856 |
+
"step": 97845
|
| 223857 |
+
},
|
| 223858 |
+
{
|
| 223859 |
+
"epoch": 788.23,
|
| 223860 |
+
"learning_rate": 8.448541666666667e-06,
|
| 223861 |
+
"loss": 0.6729,
|
| 223862 |
+
"step": 97850
|
| 223863 |
+
},
|
| 223864 |
+
{
|
| 223865 |
+
"epoch": 788.27,
|
| 223866 |
+
"learning_rate": 8.448461538461539e-06,
|
| 223867 |
+
"loss": 0.2891,
|
| 223868 |
+
"step": 97855
|
| 223869 |
+
},
|
| 223870 |
+
{
|
| 223871 |
+
"epoch": 788.31,
|
| 223872 |
+
"learning_rate": 8.448381410256412e-06,
|
| 223873 |
+
"loss": 0.3042,
|
| 223874 |
+
"step": 97860
|
| 223875 |
+
},
|
| 223876 |
+
{
|
| 223877 |
+
"epoch": 788.35,
|
| 223878 |
+
"learning_rate": 8.448301282051283e-06,
|
| 223879 |
+
"loss": 0.321,
|
| 223880 |
+
"step": 97865
|
| 223881 |
+
},
|
| 223882 |
+
{
|
| 223883 |
+
"epoch": 788.39,
|
| 223884 |
+
"learning_rate": 8.448221153846154e-06,
|
| 223885 |
+
"loss": 0.8365,
|
| 223886 |
+
"step": 97870
|
| 223887 |
+
},
|
| 223888 |
+
{
|
| 223889 |
+
"epoch": 788.43,
|
| 223890 |
+
"learning_rate": 8.448141025641027e-06,
|
| 223891 |
+
"loss": 0.614,
|
| 223892 |
+
"step": 97875
|
| 223893 |
+
},
|
| 223894 |
+
{
|
| 223895 |
+
"epoch": 788.47,
|
| 223896 |
+
"learning_rate": 8.448060897435899e-06,
|
| 223897 |
+
"loss": 0.3504,
|
| 223898 |
+
"step": 97880
|
| 223899 |
+
},
|
| 223900 |
+
{
|
| 223901 |
+
"epoch": 788.51,
|
| 223902 |
+
"learning_rate": 8.44798076923077e-06,
|
| 223903 |
+
"loss": 0.3619,
|
| 223904 |
+
"step": 97885
|
| 223905 |
+
},
|
| 223906 |
+
{
|
| 223907 |
+
"epoch": 788.55,
|
| 223908 |
+
"learning_rate": 8.447900641025642e-06,
|
| 223909 |
+
"loss": 0.3774,
|
| 223910 |
+
"step": 97890
|
| 223911 |
+
},
|
| 223912 |
+
{
|
| 223913 |
+
"epoch": 788.59,
|
| 223914 |
+
"learning_rate": 8.447820512820515e-06,
|
| 223915 |
+
"loss": 0.9715,
|
| 223916 |
+
"step": 97895
|
| 223917 |
+
},
|
| 223918 |
+
{
|
| 223919 |
+
"epoch": 788.63,
|
| 223920 |
+
"learning_rate": 8.447740384615384e-06,
|
| 223921 |
+
"loss": 0.6846,
|
| 223922 |
+
"step": 97900
|
| 223923 |
+
},
|
| 223924 |
+
{
|
| 223925 |
+
"epoch": 788.67,
|
| 223926 |
+
"learning_rate": 8.447660256410257e-06,
|
| 223927 |
+
"loss": 0.313,
|
| 223928 |
+
"step": 97905
|
| 223929 |
+
},
|
| 223930 |
+
{
|
| 223931 |
+
"epoch": 788.71,
|
| 223932 |
+
"learning_rate": 8.447580128205129e-06,
|
| 223933 |
+
"loss": 0.2738,
|
| 223934 |
+
"step": 97910
|
| 223935 |
+
},
|
| 223936 |
+
{
|
| 223937 |
+
"epoch": 788.76,
|
| 223938 |
+
"learning_rate": 8.4475e-06,
|
| 223939 |
+
"loss": 0.4333,
|
| 223940 |
+
"step": 97915
|
| 223941 |
+
},
|
| 223942 |
+
{
|
| 223943 |
+
"epoch": 788.8,
|
| 223944 |
+
"learning_rate": 8.447419871794873e-06,
|
| 223945 |
+
"loss": 0.8836,
|
| 223946 |
+
"step": 97920
|
| 223947 |
+
},
|
| 223948 |
+
{
|
| 223949 |
+
"epoch": 788.84,
|
| 223950 |
+
"learning_rate": 8.447339743589744e-06,
|
| 223951 |
+
"loss": 0.7105,
|
| 223952 |
+
"step": 97925
|
| 223953 |
+
},
|
| 223954 |
+
{
|
| 223955 |
+
"epoch": 788.88,
|
| 223956 |
+
"learning_rate": 8.447259615384616e-06,
|
| 223957 |
+
"loss": 0.2908,
|
| 223958 |
+
"step": 97930
|
| 223959 |
+
},
|
| 223960 |
+
{
|
| 223961 |
+
"epoch": 788.92,
|
| 223962 |
+
"learning_rate": 8.447179487179487e-06,
|
| 223963 |
+
"loss": 0.3699,
|
| 223964 |
+
"step": 97935
|
| 223965 |
+
},
|
| 223966 |
+
{
|
| 223967 |
+
"epoch": 788.96,
|
| 223968 |
+
"learning_rate": 8.44709935897436e-06,
|
| 223969 |
+
"loss": 0.4936,
|
| 223970 |
+
"step": 97940
|
| 223971 |
+
},
|
| 223972 |
+
{
|
| 223973 |
+
"epoch": 789.0,
|
| 223974 |
+
"learning_rate": 8.447019230769232e-06,
|
| 223975 |
+
"loss": 0.9831,
|
| 223976 |
+
"step": 97945
|
| 223977 |
+
},
|
| 223978 |
+
{
|
| 223979 |
+
"epoch": 789.0,
|
| 223980 |
+
"eval_loss": 0.36342912912368774,
|
| 223981 |
+
"eval_runtime": 39.6989,
|
| 223982 |
+
"eval_samples_per_second": 21.159,
|
| 223983 |
+
"eval_steps_per_second": 0.68,
|
| 223984 |
+
"eval_wer": 0.17981118373275237,
|
| 223985 |
+
"step": 97945
|
| 223986 |
+
},
|
| 223987 |
+
{
|
| 223988 |
+
"epoch": 789.04,
|
| 223989 |
+
"learning_rate": 8.446939102564103e-06,
|
| 223990 |
+
"loss": 0.3496,
|
| 223991 |
+
"step": 97950
|
| 223992 |
+
},
|
| 223993 |
+
{
|
| 223994 |
+
"epoch": 789.08,
|
| 223995 |
+
"learning_rate": 8.446858974358974e-06,
|
| 223996 |
+
"loss": 0.2818,
|
| 223997 |
+
"step": 97955
|
| 223998 |
+
},
|
| 223999 |
+
{
|
| 224000 |
+
"epoch": 789.12,
|
| 224001 |
+
"learning_rate": 8.446778846153847e-06,
|
| 224002 |
+
"loss": 0.3026,
|
| 224003 |
+
"step": 97960
|
| 224004 |
+
},
|
| 224005 |
+
{
|
| 224006 |
+
"epoch": 789.16,
|
| 224007 |
+
"learning_rate": 8.446698717948719e-06,
|
| 224008 |
+
"loss": 0.4188,
|
| 224009 |
+
"step": 97965
|
| 224010 |
+
},
|
| 224011 |
+
{
|
| 224012 |
+
"epoch": 789.2,
|
| 224013 |
+
"learning_rate": 8.44661858974359e-06,
|
| 224014 |
+
"loss": 1.33,
|
| 224015 |
+
"step": 97970
|
| 224016 |
+
},
|
| 224017 |
+
{
|
| 224018 |
+
"epoch": 789.24,
|
| 224019 |
+
"learning_rate": 8.446538461538463e-06,
|
| 224020 |
+
"loss": 0.3258,
|
| 224021 |
+
"step": 97975
|
| 224022 |
+
},
|
| 224023 |
+
{
|
| 224024 |
+
"epoch": 789.28,
|
| 224025 |
+
"learning_rate": 8.446458333333334e-06,
|
| 224026 |
+
"loss": 0.3017,
|
| 224027 |
+
"step": 97980
|
| 224028 |
+
},
|
| 224029 |
+
{
|
| 224030 |
+
"epoch": 789.32,
|
| 224031 |
+
"learning_rate": 8.446378205128206e-06,
|
| 224032 |
+
"loss": 0.3333,
|
| 224033 |
+
"step": 97985
|
| 224034 |
+
},
|
| 224035 |
+
{
|
| 224036 |
+
"epoch": 789.36,
|
| 224037 |
+
"learning_rate": 8.446298076923077e-06,
|
| 224038 |
+
"loss": 0.4465,
|
| 224039 |
+
"step": 97990
|
| 224040 |
+
},
|
| 224041 |
+
{
|
| 224042 |
+
"epoch": 789.4,
|
| 224043 |
+
"learning_rate": 8.44621794871795e-06,
|
| 224044 |
+
"loss": 1.2709,
|
| 224045 |
+
"step": 97995
|
| 224046 |
+
},
|
| 224047 |
+
{
|
| 224048 |
+
"epoch": 789.44,
|
| 224049 |
+
"learning_rate": 8.446137820512822e-06,
|
| 224050 |
+
"loss": 0.2966,
|
| 224051 |
+
"step": 98000
|
| 224052 |
+
},
|
| 224053 |
+
{
|
| 224054 |
+
"epoch": 789.48,
|
| 224055 |
+
"learning_rate": 8.446057692307693e-06,
|
| 224056 |
+
"loss": 0.3206,
|
| 224057 |
+
"step": 98005
|
| 224058 |
+
},
|
| 224059 |
+
{
|
| 224060 |
+
"epoch": 789.52,
|
| 224061 |
+
"learning_rate": 8.445977564102564e-06,
|
| 224062 |
+
"loss": 0.3696,
|
| 224063 |
+
"step": 98010
|
| 224064 |
+
},
|
| 224065 |
+
{
|
| 224066 |
+
"epoch": 789.56,
|
| 224067 |
+
"learning_rate": 8.445897435897437e-06,
|
| 224068 |
+
"loss": 0.4276,
|
| 224069 |
+
"step": 98015
|
| 224070 |
+
},
|
| 224071 |
+
{
|
| 224072 |
+
"epoch": 789.6,
|
| 224073 |
+
"learning_rate": 8.445817307692309e-06,
|
| 224074 |
+
"loss": 1.1847,
|
| 224075 |
+
"step": 98020
|
| 224076 |
+
},
|
| 224077 |
+
{
|
| 224078 |
+
"epoch": 789.65,
|
| 224079 |
+
"learning_rate": 8.44573717948718e-06,
|
| 224080 |
+
"loss": 0.3682,
|
| 224081 |
+
"step": 98025
|
| 224082 |
+
},
|
| 224083 |
+
{
|
| 224084 |
+
"epoch": 789.69,
|
| 224085 |
+
"learning_rate": 8.445657051282053e-06,
|
| 224086 |
+
"loss": 0.2609,
|
| 224087 |
+
"step": 98030
|
| 224088 |
+
},
|
| 224089 |
+
{
|
| 224090 |
+
"epoch": 789.73,
|
| 224091 |
+
"learning_rate": 8.445576923076923e-06,
|
| 224092 |
+
"loss": 0.2844,
|
| 224093 |
+
"step": 98035
|
| 224094 |
+
},
|
| 224095 |
+
{
|
| 224096 |
+
"epoch": 789.77,
|
| 224097 |
+
"learning_rate": 8.445496794871796e-06,
|
| 224098 |
+
"loss": 0.4721,
|
| 224099 |
+
"step": 98040
|
| 224100 |
+
},
|
| 224101 |
+
{
|
| 224102 |
+
"epoch": 789.81,
|
| 224103 |
+
"learning_rate": 8.445416666666667e-06,
|
| 224104 |
+
"loss": 1.279,
|
| 224105 |
+
"step": 98045
|
| 224106 |
+
},
|
| 224107 |
+
{
|
| 224108 |
+
"epoch": 789.85,
|
| 224109 |
+
"learning_rate": 8.445336538461539e-06,
|
| 224110 |
+
"loss": 0.3208,
|
| 224111 |
+
"step": 98050
|
| 224112 |
+
},
|
| 224113 |
+
{
|
| 224114 |
+
"epoch": 789.89,
|
| 224115 |
+
"learning_rate": 8.44525641025641e-06,
|
| 224116 |
+
"loss": 0.2875,
|
| 224117 |
+
"step": 98055
|
| 224118 |
+
},
|
| 224119 |
+
{
|
| 224120 |
+
"epoch": 789.93,
|
| 224121 |
+
"learning_rate": 8.445176282051283e-06,
|
| 224122 |
+
"loss": 0.3356,
|
| 224123 |
+
"step": 98060
|
| 224124 |
+
},
|
| 224125 |
+
{
|
| 224126 |
+
"epoch": 789.97,
|
| 224127 |
+
"learning_rate": 8.445096153846154e-06,
|
| 224128 |
+
"loss": 0.45,
|
| 224129 |
+
"step": 98065
|
| 224130 |
+
},
|
| 224131 |
+
{
|
| 224132 |
+
"epoch": 790.0,
|
| 224133 |
+
"eval_loss": 0.34974199533462524,
|
| 224134 |
+
"eval_runtime": 40.8067,
|
| 224135 |
+
"eval_samples_per_second": 20.413,
|
| 224136 |
+
"eval_steps_per_second": 0.662,
|
| 224137 |
+
"eval_wer": 0.1802901881913518,
|
| 224138 |
+
"step": 98069
|
| 224139 |
+
},
|
| 224140 |
+
{
|
| 224141 |
+
"epoch": 784.01,
|
| 224142 |
+
"learning_rate": 8.445016025641026e-06,
|
| 224143 |
+
"loss": 0.3581,
|
| 224144 |
+
"step": 98070
|
| 224145 |
+
},
|
| 224146 |
+
{
|
| 224147 |
+
"epoch": 784.05,
|
| 224148 |
+
"learning_rate": 8.444935897435899e-06,
|
| 224149 |
+
"loss": 0.3122,
|
| 224150 |
+
"step": 98075
|
| 224151 |
+
},
|
| 224152 |
+
{
|
| 224153 |
+
"epoch": 784.09,
|
| 224154 |
+
"learning_rate": 8.44485576923077e-06,
|
| 224155 |
+
"loss": 0.2598,
|
| 224156 |
+
"step": 98080
|
| 224157 |
+
},
|
| 224158 |
+
{
|
| 224159 |
+
"epoch": 784.13,
|
| 224160 |
+
"learning_rate": 8.444775641025641e-06,
|
| 224161 |
+
"loss": 0.3464,
|
| 224162 |
+
"step": 98085
|
| 224163 |
+
},
|
| 224164 |
+
{
|
| 224165 |
+
"epoch": 784.17,
|
| 224166 |
+
"learning_rate": 8.444695512820513e-06,
|
| 224167 |
+
"loss": 0.4918,
|
| 224168 |
+
"step": 98090
|
| 224169 |
+
},
|
| 224170 |
+
{
|
| 224171 |
+
"epoch": 784.21,
|
| 224172 |
+
"learning_rate": 8.444615384615386e-06,
|
| 224173 |
+
"loss": 1.2073,
|
| 224174 |
+
"step": 98095
|
| 224175 |
+
},
|
| 224176 |
+
{
|
| 224177 |
+
"epoch": 784.25,
|
| 224178 |
+
"learning_rate": 8.444535256410257e-06,
|
| 224179 |
+
"loss": 0.3045,
|
| 224180 |
+
"step": 98100
|
| 224181 |
+
},
|
| 224182 |
+
{
|
| 224183 |
+
"epoch": 784.29,
|
| 224184 |
+
"learning_rate": 8.444455128205129e-06,
|
| 224185 |
+
"loss": 0.3826,
|
| 224186 |
+
"step": 98105
|
| 224187 |
+
},
|
| 224188 |
+
{
|
| 224189 |
+
"epoch": 784.33,
|
| 224190 |
+
"learning_rate": 8.444375e-06,
|
| 224191 |
+
"loss": 0.3352,
|
| 224192 |
+
"step": 98110
|
| 224193 |
+
},
|
| 224194 |
+
{
|
| 224195 |
+
"epoch": 784.37,
|
| 224196 |
+
"learning_rate": 8.444294871794873e-06,
|
| 224197 |
+
"loss": 0.606,
|
| 224198 |
+
"step": 98115
|
| 224199 |
+
},
|
| 224200 |
+
{
|
| 224201 |
+
"epoch": 784.41,
|
| 224202 |
+
"learning_rate": 8.444214743589744e-06,
|
| 224203 |
+
"loss": 1.1556,
|
| 224204 |
+
"step": 98120
|
| 224205 |
+
},
|
| 224206 |
+
{
|
| 224207 |
+
"epoch": 784.45,
|
| 224208 |
+
"learning_rate": 8.444134615384616e-06,
|
| 224209 |
+
"loss": 0.3254,
|
| 224210 |
+
"step": 98125
|
| 224211 |
+
},
|
| 224212 |
+
{
|
| 224213 |
+
"epoch": 784.49,
|
| 224214 |
+
"learning_rate": 8.444054487179489e-06,
|
| 224215 |
+
"loss": 0.3011,
|
| 224216 |
+
"step": 98130
|
| 224217 |
+
},
|
| 224218 |
+
{
|
| 224219 |
+
"epoch": 784.53,
|
| 224220 |
+
"learning_rate": 8.44397435897436e-06,
|
| 224221 |
+
"loss": 0.3503,
|
| 224222 |
+
"step": 98135
|
| 224223 |
+
},
|
| 224224 |
+
{
|
| 224225 |
+
"epoch": 784.57,
|
| 224226 |
+
"learning_rate": 8.443894230769231e-06,
|
| 224227 |
+
"loss": 0.5107,
|
| 224228 |
+
"step": 98140
|
| 224229 |
+
},
|
| 224230 |
+
{
|
| 224231 |
+
"epoch": 784.61,
|
| 224232 |
+
"learning_rate": 8.443814102564103e-06,
|
| 224233 |
+
"loss": 1.1632,
|
| 224234 |
+
"step": 98145
|
| 224235 |
+
},
|
| 224236 |
+
{
|
| 224237 |
+
"epoch": 784.65,
|
| 224238 |
+
"learning_rate": 8.443733974358976e-06,
|
| 224239 |
+
"loss": 0.3098,
|
| 224240 |
+
"step": 98150
|
| 224241 |
+
},
|
| 224242 |
+
{
|
| 224243 |
+
"epoch": 784.69,
|
| 224244 |
+
"learning_rate": 8.443653846153847e-06,
|
| 224245 |
+
"loss": 0.35,
|
| 224246 |
+
"step": 98155
|
| 224247 |
+
},
|
| 224248 |
+
{
|
| 224249 |
+
"epoch": 784.73,
|
| 224250 |
+
"learning_rate": 8.443573717948719e-06,
|
| 224251 |
+
"loss": 0.3411,
|
| 224252 |
+
"step": 98160
|
| 224253 |
+
},
|
| 224254 |
+
{
|
| 224255 |
+
"epoch": 784.77,
|
| 224256 |
+
"learning_rate": 8.44349358974359e-06,
|
| 224257 |
+
"loss": 0.5823,
|
| 224258 |
+
"step": 98165
|
| 224259 |
+
},
|
| 224260 |
+
{
|
| 224261 |
+
"epoch": 784.81,
|
| 224262 |
+
"learning_rate": 8.443429487179487e-06,
|
| 224263 |
+
"loss": 1.13,
|
| 224264 |
+
"step": 98170
|
| 224265 |
+
},
|
| 224266 |
+
{
|
| 224267 |
+
"epoch": 784.85,
|
| 224268 |
+
"learning_rate": 8.44334935897436e-06,
|
| 224269 |
+
"loss": 0.2983,
|
| 224270 |
+
"step": 98175
|
| 224271 |
+
},
|
| 224272 |
+
{
|
| 224273 |
+
"epoch": 784.89,
|
| 224274 |
+
"learning_rate": 8.443269230769232e-06,
|
| 224275 |
+
"loss": 0.2595,
|
| 224276 |
+
"step": 98180
|
| 224277 |
+
},
|
| 224278 |
+
{
|
| 224279 |
+
"epoch": 784.93,
|
| 224280 |
+
"learning_rate": 8.443189102564103e-06,
|
| 224281 |
+
"loss": 0.2895,
|
| 224282 |
+
"step": 98185
|
| 224283 |
+
},
|
| 224284 |
+
{
|
| 224285 |
+
"epoch": 784.97,
|
| 224286 |
+
"learning_rate": 8.443108974358976e-06,
|
| 224287 |
+
"loss": 0.5252,
|
| 224288 |
+
"step": 98190
|
| 224289 |
+
},
|
| 224290 |
+
{
|
| 224291 |
+
"epoch": 785.0,
|
| 224292 |
+
"eval_loss": 0.4434640109539032,
|
| 224293 |
+
"eval_runtime": 38.2793,
|
| 224294 |
+
"eval_samples_per_second": 21.761,
|
| 224295 |
+
"eval_steps_per_second": 0.705,
|
| 224296 |
+
"eval_wer": 0.18793128762296285,
|
| 224297 |
+
"step": 98194
|
| 224298 |
+
},
|
| 224299 |
+
{
|
| 224300 |
+
"epoch": 785.01,
|
| 224301 |
+
"learning_rate": 8.443028846153848e-06,
|
| 224302 |
+
"loss": 0.4674,
|
| 224303 |
+
"step": 98195
|
| 224304 |
+
},
|
| 224305 |
+
{
|
| 224306 |
+
"epoch": 785.05,
|
| 224307 |
+
"learning_rate": 8.442948717948719e-06,
|
| 224308 |
+
"loss": 0.3205,
|
| 224309 |
+
"step": 98200
|
| 224310 |
+
},
|
| 224311 |
+
{
|
| 224312 |
+
"epoch": 785.09,
|
| 224313 |
+
"learning_rate": 8.44286858974359e-06,
|
| 224314 |
+
"loss": 0.2484,
|
| 224315 |
+
"step": 98205
|
| 224316 |
+
},
|
| 224317 |
+
{
|
| 224318 |
+
"epoch": 785.13,
|
| 224319 |
+
"learning_rate": 8.442788461538463e-06,
|
| 224320 |
+
"loss": 0.3657,
|
| 224321 |
+
"step": 98210
|
| 224322 |
+
},
|
| 224323 |
+
{
|
| 224324 |
+
"epoch": 785.17,
|
| 224325 |
+
"learning_rate": 8.442708333333333e-06,
|
| 224326 |
+
"loss": 0.4509,
|
| 224327 |
+
"step": 98215
|
| 224328 |
+
},
|
| 224329 |
+
{
|
| 224330 |
+
"epoch": 785.21,
|
| 224331 |
+
"learning_rate": 8.442628205128206e-06,
|
| 224332 |
+
"loss": 1.0371,
|
| 224333 |
+
"step": 98220
|
| 224334 |
+
},
|
| 224335 |
+
{
|
| 224336 |
+
"epoch": 785.25,
|
| 224337 |
+
"learning_rate": 8.442548076923077e-06,
|
| 224338 |
+
"loss": 0.349,
|
| 224339 |
+
"step": 98225
|
| 224340 |
+
},
|
| 224341 |
+
{
|
| 224342 |
+
"epoch": 785.29,
|
| 224343 |
+
"learning_rate": 8.442467948717949e-06,
|
| 224344 |
+
"loss": 0.3501,
|
| 224345 |
+
"step": 98230
|
| 224346 |
+
},
|
| 224347 |
+
{
|
| 224348 |
+
"epoch": 785.33,
|
| 224349 |
+
"learning_rate": 8.44238782051282e-06,
|
| 224350 |
+
"loss": 0.3634,
|
| 224351 |
+
"step": 98235
|
| 224352 |
+
},
|
| 224353 |
+
{
|
| 224354 |
+
"epoch": 785.37,
|
| 224355 |
+
"learning_rate": 8.442307692307693e-06,
|
| 224356 |
+
"loss": 0.4103,
|
| 224357 |
+
"step": 98240
|
| 224358 |
+
},
|
| 224359 |
+
{
|
| 224360 |
+
"epoch": 785.41,
|
| 224361 |
+
"learning_rate": 8.442227564102565e-06,
|
| 224362 |
+
"loss": 1.0151,
|
| 224363 |
+
"step": 98245
|
| 224364 |
+
},
|
| 224365 |
+
{
|
| 224366 |
+
"epoch": 785.45,
|
| 224367 |
+
"learning_rate": 8.442147435897436e-06,
|
| 224368 |
+
"loss": 0.2994,
|
| 224369 |
+
"step": 98250
|
| 224370 |
+
},
|
| 224371 |
+
{
|
| 224372 |
+
"epoch": 785.49,
|
| 224373 |
+
"learning_rate": 8.442067307692309e-06,
|
| 224374 |
+
"loss": 0.2963,
|
| 224375 |
+
"step": 98255
|
| 224376 |
+
},
|
| 224377 |
+
{
|
| 224378 |
+
"epoch": 785.53,
|
| 224379 |
+
"learning_rate": 8.44198717948718e-06,
|
| 224380 |
+
"loss": 0.3585,
|
| 224381 |
+
"step": 98260
|
| 224382 |
+
},
|
| 224383 |
+
{
|
| 224384 |
+
"epoch": 785.57,
|
| 224385 |
+
"learning_rate": 8.441907051282052e-06,
|
| 224386 |
+
"loss": 0.5456,
|
| 224387 |
+
"step": 98265
|
| 224388 |
+
},
|
| 224389 |
+
{
|
| 224390 |
+
"epoch": 785.61,
|
| 224391 |
+
"learning_rate": 8.441826923076923e-06,
|
| 224392 |
+
"loss": 1.1244,
|
| 224393 |
+
"step": 98270
|
| 224394 |
+
},
|
| 224395 |
+
{
|
| 224396 |
+
"epoch": 785.65,
|
| 224397 |
+
"learning_rate": 8.441746794871796e-06,
|
| 224398 |
+
"loss": 0.2905,
|
| 224399 |
+
"step": 98275
|
| 224400 |
+
},
|
| 224401 |
+
{
|
| 224402 |
+
"epoch": 785.69,
|
| 224403 |
+
"learning_rate": 8.441666666666667e-06,
|
| 224404 |
+
"loss": 0.2842,
|
| 224405 |
+
"step": 98280
|
| 224406 |
+
},
|
| 224407 |
+
{
|
| 224408 |
+
"epoch": 785.73,
|
| 224409 |
+
"learning_rate": 8.441586538461539e-06,
|
| 224410 |
+
"loss": 0.3149,
|
| 224411 |
+
"step": 98285
|
| 224412 |
+
},
|
| 224413 |
+
{
|
| 224414 |
+
"epoch": 785.77,
|
| 224415 |
+
"learning_rate": 8.441506410256412e-06,
|
| 224416 |
+
"loss": 0.5731,
|
| 224417 |
+
"step": 98290
|
| 224418 |
+
},
|
| 224419 |
+
{
|
| 224420 |
+
"epoch": 785.81,
|
| 224421 |
+
"learning_rate": 8.441426282051283e-06,
|
| 224422 |
+
"loss": 1.2097,
|
| 224423 |
+
"step": 98295
|
| 224424 |
+
},
|
| 224425 |
+
{
|
| 224426 |
+
"epoch": 785.85,
|
| 224427 |
+
"learning_rate": 8.441346153846155e-06,
|
| 224428 |
+
"loss": 0.3033,
|
| 224429 |
+
"step": 98300
|
| 224430 |
+
},
|
| 224431 |
+
{
|
| 224432 |
+
"epoch": 785.89,
|
| 224433 |
+
"learning_rate": 8.441266025641026e-06,
|
| 224434 |
+
"loss": 0.2666,
|
| 224435 |
+
"step": 98305
|
| 224436 |
+
},
|
| 224437 |
+
{
|
| 224438 |
+
"epoch": 785.93,
|
| 224439 |
+
"learning_rate": 8.441185897435899e-06,
|
| 224440 |
+
"loss": 0.4129,
|
| 224441 |
+
"step": 98310
|
| 224442 |
+
},
|
| 224443 |
+
{
|
| 224444 |
+
"epoch": 785.97,
|
| 224445 |
+
"learning_rate": 8.44110576923077e-06,
|
| 224446 |
+
"loss": 0.4955,
|
| 224447 |
+
"step": 98315
|
| 224448 |
+
},
|
| 224449 |
+
{
|
| 224450 |
+
"epoch": 786.0,
|
| 224451 |
+
"eval_loss": 0.3902416527271271,
|
| 224452 |
+
"eval_runtime": 37.791,
|
| 224453 |
+
"eval_samples_per_second": 22.042,
|
| 224454 |
+
"eval_steps_per_second": 0.714,
|
| 224455 |
+
"eval_wer": 0.186774099883856,
|
| 224456 |
+
"step": 98319
|
| 224457 |
}
|
| 224458 |
],
|
| 224459 |
+
"max_steps": 625000,
|
| 224460 |
"num_train_epochs": 5000,
|
| 224461 |
+
"total_flos": 2.7668434896102595e+20,
|
| 224462 |
"trial_name": null,
|
| 224463 |
"trial_params": null
|
| 224464 |
}
|
model-bin/finetune/base/{checkpoint-97696 β checkpoint-98319}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/log/1629973389.0556126/events.out.tfevents.1629973389.8e89bd551565.924.151
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a71c2ae88d1dbc0739c291414e315de93ccb83f495a760a66d7fccc056d67e3b
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629973815.1045587/events.out.tfevents.1629973815.8e89bd551565.924.153
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aca11f565f5950335895bb6cb6a84a642f8d592f24c0b664f4b56d65e9ca1e88
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629974318.6384456/events.out.tfevents.1629974318.8e89bd551565.924.155
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59e00dd02417490537138a47f44c4a5975a0ae4505031ba9b92d4d7b2a5131f1
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629974780.9713771/events.out.tfevents.1629974782.8e89bd551565.924.157
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16c12509ae9c71beed118d29f41fd90fbbde6c58b018f392724126dc30d5b92a
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629975202.6585348/events.out.tfevents.1629975203.8e89bd551565.924.159
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aeaeccf01eb626fff6c3226ca3fb004afb7982fe5ede6abd24d8730aaf134ef
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/events.out.tfevents.1629973389.8e89bd551565.924.150
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa6d539ecb1eef1ff578b58924d416b556d7ba569b58b5af9640850cfb8cea3a
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629973815.8e89bd551565.924.152
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c406b133dab8e3ff9b434b5348b96644d986acbfffdb6bee31445d687bdb3b7
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629974318.8e89bd551565.924.154
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:244ced347ec2a009af32c42d2fe39a3f8c84726e0c6bfae4a868eb6ff9db4327
|
| 3 |
+
size 8462
|
model-bin/finetune/base/log/events.out.tfevents.1629974780.8e89bd551565.924.156
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a06eb54de941462aca52ae6849ba11e404e03d55cd0744f4d52feb28045e9b6
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629975202.8e89bd551565.924.158
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca02d5ec42de8d3dac0719d78e4add2974260b0228a010b92f78e1c932e11a51
|
| 3 |
+
size 8622
|