marinone94 commited on
Commit
24b7b3f
β€’
1 Parent(s): 5ada171
Files changed (31) hide show
  1. .ipynb_checkpoints/eval_results-checkpoint.json +9 -0
  2. {checkpoint-16000 β†’ checkpoint-1000}/config.json +1 -1
  3. {checkpoint-16500 β†’ checkpoint-1000}/optimizer.pt +1 -1
  4. {checkpoint-16000 β†’ checkpoint-1000}/preprocessor_config.json +0 -0
  5. {checkpoint-16500 β†’ checkpoint-1000}/pytorch_model.bin +1 -1
  6. {checkpoint-16500 β†’ checkpoint-1000}/rng_state.pth +2 -2
  7. {checkpoint-16000 β†’ checkpoint-1000}/scaler.pt +1 -1
  8. {checkpoint-17000 β†’ checkpoint-1000}/scheduler.pt +1 -1
  9. checkpoint-1000/trainer_state.json +94 -0
  10. {checkpoint-16500 β†’ checkpoint-1000}/training_args.bin +1 -1
  11. {checkpoint-16500 β†’ checkpoint-1500}/config.json +1 -1
  12. {checkpoint-16000 β†’ checkpoint-1500}/optimizer.pt +1 -1
  13. {checkpoint-16500 β†’ checkpoint-1500}/preprocessor_config.json +0 -0
  14. {checkpoint-17000 β†’ checkpoint-1500}/pytorch_model.bin +1 -1
  15. {checkpoint-17000 β†’ checkpoint-1500}/rng_state.pth +2 -2
  16. {checkpoint-17000 β†’ checkpoint-1500}/scaler.pt +1 -1
  17. {checkpoint-16000 β†’ checkpoint-1500}/scheduler.pt +1 -1
  18. checkpoint-1500/trainer_state.json +133 -0
  19. {checkpoint-17000 β†’ checkpoint-1500}/training_args.bin +1 -1
  20. checkpoint-16000/pytorch_model.bin +0 -3
  21. checkpoint-16000/rng_state.pth +0 -3
  22. checkpoint-16000/trainer_state.json +0 -1264
  23. checkpoint-16000/training_args.bin +0 -3
  24. checkpoint-16500/scaler.pt +0 -3
  25. checkpoint-16500/scheduler.pt +0 -3
  26. checkpoint-16500/trainer_state.json +0 -1303
  27. checkpoint-17000/config.json +0 -107
  28. checkpoint-17000/optimizer.pt +0 -3
  29. checkpoint-17000/preprocessor_config.json +0 -9
  30. checkpoint-17000/trainer_state.json +0 -1342
  31. checkpoint-500/.ipynb_checkpoints/trainer_state-checkpoint.json +55 -0
.ipynb_checkpoints/eval_results-checkpoint.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.31790396571159363,
4
+ "eval_runtime": 136.0793,
5
+ "eval_samples": 4620,
6
+ "eval_samples_per_second": 33.951,
7
+ "eval_steps_per_second": 4.248,
8
+ "eval_wer": 0.2734810010402007
9
+ }
{checkpoint-16000 β†’ checkpoint-1000}/config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
1
  {
2
+ "_name_or_path": "marinone94/xls-r-300m-sv-robust",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
{checkpoint-16500 β†’ checkpoint-1000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f228b737be7b0ab54382497d712270d476c95b36ecae1d727785dc7bff0978e
3
  size 2490362385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:997e39e3068f6334cf85b372c45d1b1aec7a201fbe1f730152e5d4a0b55fe960
3
  size 2490362385
{checkpoint-16000 β†’ checkpoint-1000}/preprocessor_config.json RENAMED
File without changes
{checkpoint-16500 β†’ checkpoint-1000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c6a62856623bc4550b5f336bd7209b1bd2e3e670b1e7950a79cb005b3fd7534
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a0d1201f69f8dc32ebd7a780daea905f15db2bbd2da823ebd02ed30aa3bee71
3
  size 1262075377
{checkpoint-16500 β†’ checkpoint-1000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a27ba1fd455e94e8b7a6027e7ed9cf82cd26113cc0bd18ebed10a0b5d449392f
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3789e7a321dae6b4bf60417d43627b247a6090022ececb8fe9e7931554f2a6
3
+ size 14503
{checkpoint-16000 β†’ checkpoint-1000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bcb42b3bf1898edd5501a8a54e2a8aeeb1c707564ea3cf64a8e4a8749b091df
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:637182a715f3446bd8163c96f6b2a5376a6865fe60d3b7086f55025f5f89924a
3
  size 559
{checkpoint-17000 β†’ checkpoint-1000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29137f29f94e1209db7d5266845567f6c8e2dcdd80c671249312f489b8a23f6f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0a14af155fdce6ec463b3e3ef0c4d974d177934cf33badd4dcae71e1061f0d5
3
  size 623
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9050036258158087,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.29,
12
+ "learning_rate": 3.675e-06,
13
+ "loss": 1.0197,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.58,
18
+ "learning_rate": 7.425e-06,
19
+ "loss": 1.0171,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.87,
24
+ "learning_rate": 1.1137499999999998e-05,
25
+ "loss": 1.026,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 1.16,
30
+ "learning_rate": 1.48875e-05,
31
+ "loss": 1.0383,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 1.45,
36
+ "learning_rate": 1.86375e-05,
37
+ "loss": 1.0296,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 1.45,
42
+ "eval_loss": 0.3191435635089874,
43
+ "eval_runtime": 138.7872,
44
+ "eval_samples_per_second": 33.288,
45
+ "eval_steps_per_second": 4.165,
46
+ "eval_wer": 0.27421526035611576,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 1.74,
51
+ "learning_rate": 2.23875e-05,
52
+ "loss": 1.0529,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 2.03,
57
+ "learning_rate": 2.6137499999999995e-05,
58
+ "loss": 1.0442,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 2.32,
63
+ "learning_rate": 2.9887499999999998e-05,
64
+ "loss": 1.0632,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 2.61,
69
+ "learning_rate": 3.36375e-05,
70
+ "loss": 1.037,
71
+ "step": 900
72
+ },
73
+ {
74
+ "epoch": 2.91,
75
+ "learning_rate": 3.7387499999999994e-05,
76
+ "loss": 1.0495,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 2.91,
81
+ "eval_loss": 0.33204758167266846,
82
+ "eval_runtime": 139.9575,
83
+ "eval_samples_per_second": 33.01,
84
+ "eval_steps_per_second": 4.13,
85
+ "eval_wer": 0.28963470599033225,
86
+ "step": 1000
87
+ }
88
+ ],
89
+ "max_steps": 17200,
90
+ "num_train_epochs": 50,
91
+ "total_flos": 2.9636799545011507e+18,
92
+ "trial_name": null,
93
+ "trial_params": null
94
+ }
{checkpoint-16500 β†’ checkpoint-1000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a168f126d02648193e16ba893c3e1ef9f2c2de91803928caf5e25532aff1325
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0efc7aad7fd151e94de194f50e712cd8d3c82a2cf2ccee51d253c9130af43c3a
3
  size 2991
{checkpoint-16500 β†’ checkpoint-1500}/config.json RENAMED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
1
  {
2
+ "_name_or_path": "marinone94/xls-r-300m-sv-robust",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
{checkpoint-16000 β†’ checkpoint-1500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23505cec8634d6a983156ca9aaadbd850d5f159d59400b96c32cdb115a378ee4
3
  size 2490362385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b883c1f26bb60f965959e813f1fda3c9b0ed41e76a86ef50f71624403b758fb8
3
  size 2490362385
{checkpoint-16500 β†’ checkpoint-1500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-17000 β†’ checkpoint-1500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f1f33d70822ede911a061ebd9c8ac6adb1b85af8f06aef411beb21f64e5513
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c440e5ae149a9c96063d6fceaaf311e4a73da0f507f9d87f6781437b8936e2e
3
  size 1262075377
{checkpoint-17000 β†’ checkpoint-1500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da448dcf17115922df19725e2f419fc357399e18ea720fe14302bdb4ceec2828
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4982f7f3a3d3c9848cfac0ab0e4aff5b6f3cfec2ee0d2bf0be5471908380429e
3
+ size 14503
{checkpoint-17000 β†’ checkpoint-1500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7084db4c57c4f9d27d144de3844af74c529a53119c951e7d5083f342e8946b0f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85136f3eae8581fb19c033614c8131d6fa5eb689dca190899d818b25b6fac92d
3
  size 559
{checkpoint-16000 β†’ checkpoint-1500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:812531ae10018fac0bc1fdeb95a26197edbd60dd570f21800a631a5c4b646bc9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ccfdada89267f464b440ff4504b5d32a3dac17d15e8a8d5898e8ba842c4cbe
3
  size 623
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.359680928208847,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.29,
12
+ "learning_rate": 3.675e-06,
13
+ "loss": 1.0197,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.58,
18
+ "learning_rate": 7.425e-06,
19
+ "loss": 1.0171,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.87,
24
+ "learning_rate": 1.1137499999999998e-05,
25
+ "loss": 1.026,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 1.16,
30
+ "learning_rate": 1.48875e-05,
31
+ "loss": 1.0383,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 1.45,
36
+ "learning_rate": 1.86375e-05,
37
+ "loss": 1.0296,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 1.45,
42
+ "eval_loss": 0.3191435635089874,
43
+ "eval_runtime": 138.7872,
44
+ "eval_samples_per_second": 33.288,
45
+ "eval_steps_per_second": 4.165,
46
+ "eval_wer": 0.27421526035611576,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 1.74,
51
+ "learning_rate": 2.23875e-05,
52
+ "loss": 1.0529,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 2.03,
57
+ "learning_rate": 2.6137499999999995e-05,
58
+ "loss": 1.0442,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 2.32,
63
+ "learning_rate": 2.9887499999999998e-05,
64
+ "loss": 1.0632,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 2.61,
69
+ "learning_rate": 3.36375e-05,
70
+ "loss": 1.037,
71
+ "step": 900
72
+ },
73
+ {
74
+ "epoch": 2.91,
75
+ "learning_rate": 3.7387499999999994e-05,
76
+ "loss": 1.0495,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 2.91,
81
+ "eval_loss": 0.33204758167266846,
82
+ "eval_runtime": 139.9575,
83
+ "eval_samples_per_second": 33.01,
84
+ "eval_steps_per_second": 4.13,
85
+ "eval_wer": 0.28963470599033225,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "epoch": 3.2,
90
+ "learning_rate": 4.11375e-05,
91
+ "loss": 1.0547,
92
+ "step": 1100
93
+ },
94
+ {
95
+ "epoch": 3.49,
96
+ "learning_rate": 4.48875e-05,
97
+ "loss": 1.06,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 3.78,
102
+ "learning_rate": 4.8637499999999996e-05,
103
+ "loss": 1.0655,
104
+ "step": 1300
105
+ },
106
+ {
107
+ "epoch": 4.07,
108
+ "learning_rate": 5.23875e-05,
109
+ "loss": 1.0757,
110
+ "step": 1400
111
+ },
112
+ {
113
+ "epoch": 4.36,
114
+ "learning_rate": 5.61375e-05,
115
+ "loss": 1.0532,
116
+ "step": 1500
117
+ },
118
+ {
119
+ "epoch": 4.36,
120
+ "eval_loss": 0.344494104385376,
121
+ "eval_runtime": 139.9249,
122
+ "eval_samples_per_second": 33.018,
123
+ "eval_steps_per_second": 4.131,
124
+ "eval_wer": 0.2935201615370495,
125
+ "step": 1500
126
+ }
127
+ ],
128
+ "max_steps": 17200,
129
+ "num_train_epochs": 50,
130
+ "total_flos": 4.445118706030802e+18,
131
+ "trial_name": null,
132
+ "trial_params": null
133
+ }
{checkpoint-17000 β†’ checkpoint-1500}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a168f126d02648193e16ba893c3e1ef9f2c2de91803928caf5e25532aff1325
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0efc7aad7fd151e94de194f50e712cd8d3c82a2cf2ccee51d253c9130af43c3a
3
  size 2991
checkpoint-16000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4470667bcf97fe5d54dee8281ff4760245200a1adf08a6e672825247d4a08b79
3
- size 1262075377
 
 
 
 
checkpoint-16000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:61b1412288f80f87d3b78af99d5eeabfcc510fdb01676bb0cd4bcc6470be1c10
3
- size 14567
 
 
 
 
checkpoint-16000/trainer_state.json DELETED
@@ -1,1264 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 46.51051486584481,
5
- "global_step": 16000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.29,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 12.2713,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 0.58,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 6.2026,
20
- "step": 200
21
- },
22
- {
23
- "epoch": 0.87,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.1374,
26
- "step": 300
27
- },
28
- {
29
- "epoch": 1.16,
30
- "learning_rate": 1.49625e-05,
31
- "loss": 3.6755,
32
- "step": 400
33
- },
34
- {
35
- "epoch": 1.45,
36
- "learning_rate": 1.8712499999999997e-05,
37
- "loss": 3.3332,
38
- "step": 500
39
- },
40
- {
41
- "epoch": 1.45,
42
- "eval_loss": 3.292029857635498,
43
- "eval_runtime": 137.2974,
44
- "eval_samples_per_second": 33.65,
45
- "eval_steps_per_second": 4.21,
46
- "eval_wer": 1.0,
47
- "step": 500
48
- },
49
- {
50
- "epoch": 1.74,
51
- "learning_rate": 2.2462499999999997e-05,
52
- "loss": 3.1418,
53
- "step": 600
54
- },
55
- {
56
- "epoch": 2.03,
57
- "learning_rate": 2.6212499999999997e-05,
58
- "loss": 3.0879,
59
- "step": 700
60
- },
61
- {
62
- "epoch": 2.32,
63
- "learning_rate": 2.99625e-05,
64
- "loss": 3.0216,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 2.61,
69
- "learning_rate": 3.37125e-05,
70
- "loss": 2.9595,
71
- "step": 900
72
- },
73
- {
74
- "epoch": 2.91,
75
- "learning_rate": 3.7462499999999996e-05,
76
- "loss": 2.9269,
77
- "step": 1000
78
- },
79
- {
80
- "epoch": 2.91,
81
- "eval_loss": 2.941540241241455,
82
- "eval_runtime": 178.4388,
83
- "eval_samples_per_second": 25.891,
84
- "eval_steps_per_second": 3.239,
85
- "eval_wer": 0.9966346448020559,
86
- "step": 1000
87
- },
88
- {
89
- "epoch": 3.2,
90
- "learning_rate": 4.12125e-05,
91
- "loss": 2.914,
92
- "step": 1100
93
- },
94
- {
95
- "epoch": 3.49,
96
- "learning_rate": 4.4962499999999995e-05,
97
- "loss": 2.8432,
98
- "step": 1200
99
- },
100
- {
101
- "epoch": 3.78,
102
- "learning_rate": 4.871249999999999e-05,
103
- "loss": 2.6828,
104
- "step": 1300
105
- },
106
- {
107
- "epoch": 4.07,
108
- "learning_rate": 5.2462499999999994e-05,
109
- "loss": 2.355,
110
- "step": 1400
111
- },
112
- {
113
- "epoch": 4.36,
114
- "learning_rate": 5.62125e-05,
115
- "loss": 2.0719,
116
- "step": 1500
117
- },
118
- {
119
- "epoch": 4.36,
120
- "eval_loss": 1.164096713066101,
121
- "eval_runtime": 134.2981,
122
- "eval_samples_per_second": 34.401,
123
- "eval_steps_per_second": 4.304,
124
- "eval_wer": 0.8507923881784251,
125
- "step": 1500
126
- },
127
- {
128
- "epoch": 4.65,
129
- "learning_rate": 5.9962499999999994e-05,
130
- "loss": 1.944,
131
- "step": 1600
132
- },
133
- {
134
- "epoch": 4.94,
135
- "learning_rate": 6.37125e-05,
136
- "loss": 1.8772,
137
- "step": 1700
138
- },
139
- {
140
- "epoch": 5.23,
141
- "learning_rate": 6.746249999999999e-05,
142
- "loss": 1.807,
143
- "step": 1800
144
- },
145
- {
146
- "epoch": 5.52,
147
- "learning_rate": 7.121249999999999e-05,
148
- "loss": 1.7612,
149
- "step": 1900
150
- },
151
- {
152
- "epoch": 5.81,
153
- "learning_rate": 7.492499999999999e-05,
154
- "loss": 1.7404,
155
- "step": 2000
156
- },
157
- {
158
- "epoch": 5.81,
159
- "eval_loss": 0.7280541658401489,
160
- "eval_runtime": 174.7873,
161
- "eval_samples_per_second": 26.432,
162
- "eval_steps_per_second": 3.307,
163
- "eval_wer": 0.6846356238144772,
164
- "step": 2000
165
- },
166
- {
167
- "epoch": 6.1,
168
- "learning_rate": 7.451644736842105e-05,
169
- "loss": 1.6883,
170
- "step": 2100
171
- },
172
- {
173
- "epoch": 6.39,
174
- "learning_rate": 7.402302631578947e-05,
175
- "loss": 1.6463,
176
- "step": 2200
177
- },
178
- {
179
- "epoch": 6.68,
180
- "learning_rate": 7.352960526315788e-05,
181
- "loss": 1.6216,
182
- "step": 2300
183
- },
184
- {
185
- "epoch": 6.97,
186
- "learning_rate": 7.30361842105263e-05,
187
- "loss": 1.586,
188
- "step": 2400
189
- },
190
- {
191
- "epoch": 7.27,
192
- "learning_rate": 7.254276315789473e-05,
193
- "loss": 1.5921,
194
- "step": 2500
195
- },
196
- {
197
- "epoch": 7.27,
198
- "eval_loss": 0.5885757803916931,
199
- "eval_runtime": 137.296,
200
- "eval_samples_per_second": 33.65,
201
- "eval_steps_per_second": 4.21,
202
- "eval_wer": 0.5146545921801383,
203
- "step": 2500
204
- },
205
- {
206
- "epoch": 7.56,
207
- "learning_rate": 7.204934210526316e-05,
208
- "loss": 1.5694,
209
- "step": 2600
210
- },
211
- {
212
- "epoch": 7.85,
213
- "learning_rate": 7.155592105263157e-05,
214
- "loss": 1.5279,
215
- "step": 2700
216
- },
217
- {
218
- "epoch": 8.14,
219
- "learning_rate": 7.10625e-05,
220
- "loss": 1.5435,
221
- "step": 2800
222
- },
223
- {
224
- "epoch": 8.43,
225
- "learning_rate": 7.056907894736841e-05,
226
- "loss": 1.5163,
227
- "step": 2900
228
- },
229
- {
230
- "epoch": 8.72,
231
- "learning_rate": 7.007565789473684e-05,
232
- "loss": 1.4941,
233
- "step": 3000
234
- },
235
- {
236
- "epoch": 8.72,
237
- "eval_loss": 0.5183178186416626,
238
- "eval_runtime": 134.3566,
239
- "eval_samples_per_second": 34.386,
240
- "eval_steps_per_second": 4.302,
241
- "eval_wer": 0.5063023924616044,
242
- "step": 3000
243
- },
244
- {
245
- "epoch": 9.01,
246
- "learning_rate": 6.958223684210525e-05,
247
- "loss": 1.5061,
248
- "step": 3100
249
- },
250
- {
251
- "epoch": 9.3,
252
- "learning_rate": 6.908881578947368e-05,
253
- "loss": 1.4551,
254
- "step": 3200
255
- },
256
- {
257
- "epoch": 9.59,
258
- "learning_rate": 6.859539473684209e-05,
259
- "loss": 1.4674,
260
- "step": 3300
261
- },
262
- {
263
- "epoch": 9.88,
264
- "learning_rate": 6.810197368421052e-05,
265
- "loss": 1.4691,
266
- "step": 3400
267
- },
268
- {
269
- "epoch": 10.17,
270
- "learning_rate": 6.760855263157895e-05,
271
- "loss": 1.4486,
272
- "step": 3500
273
- },
274
- {
275
- "epoch": 10.17,
276
- "eval_loss": 0.47492966055870056,
277
- "eval_runtime": 135.1792,
278
- "eval_samples_per_second": 34.177,
279
- "eval_steps_per_second": 4.276,
280
- "eval_wer": 0.46760080768524753,
281
- "step": 3500
282
- },
283
- {
284
- "epoch": 10.46,
285
- "learning_rate": 6.711513157894736e-05,
286
- "loss": 1.4274,
287
- "step": 3600
288
- },
289
- {
290
- "epoch": 10.75,
291
- "learning_rate": 6.66266447368421e-05,
292
- "loss": 1.4277,
293
- "step": 3700
294
- },
295
- {
296
- "epoch": 11.05,
297
- "learning_rate": 6.613322368421052e-05,
298
- "loss": 1.4445,
299
- "step": 3800
300
- },
301
- {
302
- "epoch": 11.34,
303
- "learning_rate": 6.563980263157894e-05,
304
- "loss": 1.4101,
305
- "step": 3900
306
- },
307
- {
308
- "epoch": 11.63,
309
- "learning_rate": 6.514638157894736e-05,
310
- "loss": 1.3899,
311
- "step": 4000
312
- },
313
- {
314
- "epoch": 11.63,
315
- "eval_loss": 0.4564875364303589,
316
- "eval_runtime": 134.5257,
317
- "eval_samples_per_second": 34.343,
318
- "eval_steps_per_second": 4.297,
319
- "eval_wer": 0.4432172795692345,
320
- "step": 4000
321
- },
322
- {
323
- "epoch": 11.92,
324
- "learning_rate": 6.465296052631578e-05,
325
- "loss": 1.404,
326
- "step": 4100
327
- },
328
- {
329
- "epoch": 12.21,
330
- "learning_rate": 6.415953947368421e-05,
331
- "loss": 1.3911,
332
- "step": 4200
333
- },
334
- {
335
- "epoch": 12.5,
336
- "learning_rate": 6.366611842105262e-05,
337
- "loss": 1.3873,
338
- "step": 4300
339
- },
340
- {
341
- "epoch": 12.79,
342
- "learning_rate": 6.317269736842105e-05,
343
- "loss": 1.3712,
344
- "step": 4400
345
- },
346
- {
347
- "epoch": 13.08,
348
- "learning_rate": 6.267927631578946e-05,
349
- "loss": 1.3881,
350
- "step": 4500
351
- },
352
- {
353
- "epoch": 13.08,
354
- "eval_loss": 0.43156012892723083,
355
- "eval_runtime": 134.756,
356
- "eval_samples_per_second": 34.284,
357
- "eval_steps_per_second": 4.289,
358
- "eval_wer": 0.42278039527626504,
359
- "step": 4500
360
- },
361
- {
362
- "epoch": 13.37,
363
- "learning_rate": 6.218585526315789e-05,
364
- "loss": 1.3538,
365
- "step": 4600
366
- },
367
- {
368
- "epoch": 13.66,
369
- "learning_rate": 6.16924342105263e-05,
370
- "loss": 1.355,
371
- "step": 4700
372
- },
373
- {
374
- "epoch": 13.95,
375
- "learning_rate": 6.119901315789473e-05,
376
- "loss": 1.341,
377
- "step": 4800
378
- },
379
- {
380
- "epoch": 14.24,
381
- "learning_rate": 6.070559210526316e-05,
382
- "loss": 1.3474,
383
- "step": 4900
384
- },
385
- {
386
- "epoch": 14.53,
387
- "learning_rate": 6.021217105263157e-05,
388
- "loss": 1.3572,
389
- "step": 5000
390
- },
391
- {
392
- "epoch": 14.53,
393
- "eval_loss": 0.4194825291633606,
394
- "eval_runtime": 134.9338,
395
- "eval_samples_per_second": 34.239,
396
- "eval_steps_per_second": 4.284,
397
- "eval_wer": 0.3834363335984825,
398
- "step": 5000
399
- },
400
- {
401
- "epoch": 14.82,
402
- "learning_rate": 5.971875e-05,
403
- "loss": 1.3408,
404
- "step": 5100
405
- },
406
- {
407
- "epoch": 15.12,
408
- "learning_rate": 5.922532894736842e-05,
409
- "loss": 1.3357,
410
- "step": 5200
411
- },
412
- {
413
- "epoch": 15.41,
414
- "learning_rate": 5.873190789473683e-05,
415
- "loss": 1.3288,
416
- "step": 5300
417
- },
418
- {
419
- "epoch": 15.7,
420
- "learning_rate": 5.823848684210526e-05,
421
- "loss": 1.3258,
422
- "step": 5400
423
- },
424
- {
425
- "epoch": 15.99,
426
- "learning_rate": 5.7745065789473685e-05,
427
- "loss": 1.3261,
428
- "step": 5500
429
- },
430
- {
431
- "epoch": 15.99,
432
- "eval_loss": 0.3974045217037201,
433
- "eval_runtime": 134.8844,
434
- "eval_samples_per_second": 34.252,
435
- "eval_steps_per_second": 4.285,
436
- "eval_wer": 0.3606742948051153,
437
- "step": 5500
438
- },
439
- {
440
- "epoch": 16.28,
441
- "learning_rate": 5.72516447368421e-05,
442
- "loss": 1.3273,
443
- "step": 5600
444
- },
445
- {
446
- "epoch": 16.57,
447
- "learning_rate": 5.675822368421052e-05,
448
- "loss": 1.3131,
449
- "step": 5700
450
- },
451
- {
452
- "epoch": 16.86,
453
- "learning_rate": 5.6264802631578946e-05,
454
- "loss": 1.2739,
455
- "step": 5800
456
- },
457
- {
458
- "epoch": 17.15,
459
- "learning_rate": 5.577138157894736e-05,
460
- "loss": 1.3141,
461
- "step": 5900
462
- },
463
- {
464
- "epoch": 17.44,
465
- "learning_rate": 5.5277960526315786e-05,
466
- "loss": 1.2809,
467
- "step": 6000
468
- },
469
- {
470
- "epoch": 17.44,
471
- "eval_loss": 0.3844749331474304,
472
- "eval_runtime": 136.3592,
473
- "eval_samples_per_second": 33.881,
474
- "eval_steps_per_second": 4.239,
475
- "eval_wer": 0.3466621795264027,
476
- "step": 6000
477
- },
478
- {
479
- "epoch": 17.73,
480
- "learning_rate": 5.4784539473684206e-05,
481
- "loss": 1.2847,
482
- "step": 6100
483
- },
484
- {
485
- "epoch": 18.02,
486
- "learning_rate": 5.4291118421052626e-05,
487
- "loss": 1.303,
488
- "step": 6200
489
- },
490
- {
491
- "epoch": 18.31,
492
- "learning_rate": 5.3797697368421046e-05,
493
- "loss": 1.2733,
494
- "step": 6300
495
- },
496
- {
497
- "epoch": 18.6,
498
- "learning_rate": 5.330427631578947e-05,
499
- "loss": 1.2707,
500
- "step": 6400
501
- },
502
- {
503
- "epoch": 18.89,
504
- "learning_rate": 5.2810855263157886e-05,
505
- "loss": 1.2713,
506
- "step": 6500
507
- },
508
- {
509
- "epoch": 18.89,
510
- "eval_loss": 0.3831607401371002,
511
- "eval_runtime": 138.2456,
512
- "eval_samples_per_second": 33.419,
513
- "eval_steps_per_second": 4.181,
514
- "eval_wer": 0.3449795019274307,
515
- "step": 6500
516
- },
517
- {
518
- "epoch": 19.19,
519
- "learning_rate": 5.231743421052631e-05,
520
- "loss": 1.2562,
521
- "step": 6600
522
- },
523
- {
524
- "epoch": 19.48,
525
- "learning_rate": 5.182894736842105e-05,
526
- "loss": 1.2721,
527
- "step": 6700
528
- },
529
- {
530
- "epoch": 19.77,
531
- "learning_rate": 5.133552631578947e-05,
532
- "loss": 1.2536,
533
- "step": 6800
534
- },
535
- {
536
- "epoch": 20.06,
537
- "learning_rate": 5.084210526315789e-05,
538
- "loss": 1.2503,
539
- "step": 6900
540
- },
541
- {
542
- "epoch": 20.35,
543
- "learning_rate": 5.0353618421052625e-05,
544
- "loss": 1.257,
545
- "step": 7000
546
- },
547
- {
548
- "epoch": 20.35,
549
- "eval_loss": 0.3778809607028961,
550
- "eval_runtime": 138.2403,
551
- "eval_samples_per_second": 33.42,
552
- "eval_steps_per_second": 4.181,
553
- "eval_wer": 0.33726977911032247,
554
- "step": 7000
555
- },
556
- {
557
- "epoch": 20.64,
558
- "learning_rate": 4.9860197368421045e-05,
559
- "loss": 1.2557,
560
- "step": 7100
561
- },
562
- {
563
- "epoch": 20.93,
564
- "learning_rate": 4.936677631578947e-05,
565
- "loss": 1.2397,
566
- "step": 7200
567
- },
568
- {
569
- "epoch": 21.22,
570
- "learning_rate": 4.8873355263157886e-05,
571
- "loss": 1.2527,
572
- "step": 7300
573
- },
574
- {
575
- "epoch": 21.51,
576
- "learning_rate": 4.837993421052631e-05,
577
- "loss": 1.2273,
578
- "step": 7400
579
- },
580
- {
581
- "epoch": 21.8,
582
- "learning_rate": 4.788651315789473e-05,
583
- "loss": 1.2298,
584
- "step": 7500
585
- },
586
- {
587
- "epoch": 21.8,
588
- "eval_loss": 0.3743567168712616,
589
- "eval_runtime": 136.1375,
590
- "eval_samples_per_second": 33.936,
591
- "eval_steps_per_second": 4.246,
592
- "eval_wer": 0.33913602153827327,
593
- "step": 7500
594
- },
595
- {
596
- "epoch": 22.09,
597
- "learning_rate": 4.739309210526315e-05,
598
- "loss": 1.2444,
599
- "step": 7600
600
- },
601
- {
602
- "epoch": 22.38,
603
- "learning_rate": 4.689967105263157e-05,
604
- "loss": 1.2153,
605
- "step": 7700
606
- },
607
- {
608
- "epoch": 22.67,
609
- "learning_rate": 4.640625e-05,
610
- "loss": 1.2327,
611
- "step": 7800
612
- },
613
- {
614
- "epoch": 22.96,
615
- "learning_rate": 4.591282894736841e-05,
616
- "loss": 1.2404,
617
- "step": 7900
618
- },
619
- {
620
- "epoch": 23.26,
621
- "learning_rate": 4.541940789473684e-05,
622
- "loss": 1.2173,
623
- "step": 8000
624
- },
625
- {
626
- "epoch": 23.26,
627
- "eval_loss": 0.3745496869087219,
628
- "eval_runtime": 139.3565,
629
- "eval_samples_per_second": 33.152,
630
- "eval_steps_per_second": 4.148,
631
- "eval_wer": 0.32619470109527016,
632
- "step": 8000
633
- },
634
- {
635
- "epoch": 23.55,
636
- "learning_rate": 4.492598684210526e-05,
637
- "loss": 1.222,
638
- "step": 8100
639
- },
640
- {
641
- "epoch": 23.84,
642
- "learning_rate": 4.443256578947369e-05,
643
- "loss": 1.1868,
644
- "step": 8200
645
- },
646
- {
647
- "epoch": 24.13,
648
- "learning_rate": 4.39391447368421e-05,
649
- "loss": 1.2272,
650
- "step": 8300
651
- },
652
- {
653
- "epoch": 24.42,
654
- "learning_rate": 4.344572368421052e-05,
655
- "loss": 1.2212,
656
- "step": 8400
657
- },
658
- {
659
- "epoch": 24.71,
660
- "learning_rate": 4.295230263157895e-05,
661
- "loss": 1.1966,
662
- "step": 8500
663
- },
664
- {
665
- "epoch": 24.71,
666
- "eval_loss": 0.36800575256347656,
667
- "eval_runtime": 137.1685,
668
- "eval_samples_per_second": 33.681,
669
- "eval_steps_per_second": 4.214,
670
- "eval_wer": 0.32411429970017747,
671
- "step": 8500
672
- },
673
- {
674
- "epoch": 25.0,
675
- "learning_rate": 4.245888157894736e-05,
676
- "loss": 1.2128,
677
- "step": 8600
678
- },
679
- {
680
- "epoch": 25.29,
681
- "learning_rate": 4.196546052631579e-05,
682
- "loss": 1.2118,
683
- "step": 8700
684
- },
685
- {
686
- "epoch": 25.58,
687
- "learning_rate": 4.147203947368421e-05,
688
- "loss": 1.2009,
689
- "step": 8800
690
- },
691
- {
692
- "epoch": 25.87,
693
- "learning_rate": 4.097861842105263e-05,
694
- "loss": 1.1684,
695
- "step": 8900
696
- },
697
- {
698
- "epoch": 26.16,
699
- "learning_rate": 4.048519736842105e-05,
700
- "loss": 1.1925,
701
- "step": 9000
702
- },
703
- {
704
- "epoch": 26.16,
705
- "eval_loss": 0.3604615330696106,
706
- "eval_runtime": 153.4534,
707
- "eval_samples_per_second": 30.107,
708
- "eval_steps_per_second": 3.767,
709
- "eval_wer": 0.31713883619898425,
710
- "step": 9000
711
- },
712
- {
713
- "epoch": 26.45,
714
- "learning_rate": 3.9991776315789475e-05,
715
- "loss": 1.1759,
716
- "step": 9100
717
- },
718
- {
719
- "epoch": 26.74,
720
- "learning_rate": 3.949835526315789e-05,
721
- "loss": 1.1754,
722
- "step": 9200
723
- },
724
- {
725
- "epoch": 27.03,
726
- "learning_rate": 3.9004934210526315e-05,
727
- "loss": 1.198,
728
- "step": 9300
729
- },
730
- {
731
- "epoch": 27.32,
732
- "learning_rate": 3.8511513157894735e-05,
733
- "loss": 1.1602,
734
- "step": 9400
735
- },
736
- {
737
- "epoch": 27.61,
738
- "learning_rate": 3.801809210526315e-05,
739
- "loss": 1.1692,
740
- "step": 9500
741
- },
742
- {
743
- "epoch": 27.61,
744
- "eval_loss": 0.3512294292449951,
745
- "eval_runtime": 166.9619,
746
- "eval_samples_per_second": 27.671,
747
- "eval_steps_per_second": 3.462,
748
- "eval_wer": 0.31472189928409716,
749
- "step": 9500
750
- },
751
- {
752
- "epoch": 27.91,
753
- "learning_rate": 3.7524671052631575e-05,
754
- "loss": 1.1495,
755
- "step": 9600
756
- },
757
- {
758
- "epoch": 28.2,
759
- "learning_rate": 3.7031249999999995e-05,
760
- "loss": 1.1722,
761
- "step": 9700
762
- },
763
- {
764
- "epoch": 28.49,
765
- "learning_rate": 3.6537828947368416e-05,
766
- "loss": 1.1443,
767
- "step": 9800
768
- },
769
- {
770
- "epoch": 28.78,
771
- "learning_rate": 3.6044407894736836e-05,
772
- "loss": 1.1535,
773
- "step": 9900
774
- },
775
- {
776
- "epoch": 29.07,
777
- "learning_rate": 3.555098684210526e-05,
778
- "loss": 1.1704,
779
- "step": 10000
780
- },
781
- {
782
- "epoch": 29.07,
783
- "eval_loss": 0.3532196581363678,
784
- "eval_runtime": 134.5452,
785
- "eval_samples_per_second": 34.338,
786
- "eval_steps_per_second": 4.296,
787
- "eval_wer": 0.30976564890167047,
788
- "step": 10000
789
- },
790
- {
791
- "epoch": 29.36,
792
- "learning_rate": 3.505756578947368e-05,
793
- "loss": 1.1519,
794
- "step": 10100
795
- },
796
- {
797
- "epoch": 29.65,
798
- "learning_rate": 3.45641447368421e-05,
799
- "loss": 1.1507,
800
- "step": 10200
801
- },
802
- {
803
- "epoch": 29.94,
804
- "learning_rate": 3.407072368421052e-05,
805
- "loss": 1.1517,
806
- "step": 10300
807
- },
808
- {
809
- "epoch": 30.23,
810
- "learning_rate": 3.357730263157894e-05,
811
- "loss": 1.1447,
812
- "step": 10400
813
- },
814
- {
815
- "epoch": 30.52,
816
- "learning_rate": 3.308388157894737e-05,
817
- "loss": 1.1595,
818
- "step": 10500
819
- },
820
- {
821
- "epoch": 30.52,
822
- "eval_loss": 0.34246256947517395,
823
- "eval_runtime": 159.7393,
824
- "eval_samples_per_second": 28.922,
825
- "eval_steps_per_second": 3.618,
826
- "eval_wer": 0.3038915743743499,
827
- "step": 10500
828
- },
829
- {
830
- "epoch": 30.81,
831
- "learning_rate": 3.259046052631579e-05,
832
- "loss": 1.1451,
833
- "step": 10600
834
- },
835
- {
836
- "epoch": 31.1,
837
- "learning_rate": 3.2097039473684203e-05,
838
- "loss": 1.1304,
839
- "step": 10700
840
- },
841
- {
842
- "epoch": 31.39,
843
- "learning_rate": 3.160361842105263e-05,
844
- "loss": 1.1316,
845
- "step": 10800
846
- },
847
- {
848
- "epoch": 31.68,
849
- "learning_rate": 3.111019736842105e-05,
850
- "loss": 1.1301,
851
- "step": 10900
852
- },
853
- {
854
- "epoch": 31.97,
855
- "learning_rate": 3.061677631578947e-05,
856
- "loss": 1.1433,
857
- "step": 11000
858
- },
859
- {
860
- "epoch": 31.97,
861
- "eval_loss": 0.35683709383010864,
862
- "eval_runtime": 158.7503,
863
- "eval_samples_per_second": 29.102,
864
- "eval_steps_per_second": 3.641,
865
- "eval_wer": 0.30257602643333537,
866
- "step": 11000
867
- },
868
- {
869
- "epoch": 32.27,
870
- "learning_rate": 3.0123355263157894e-05,
871
- "loss": 1.1337,
872
- "step": 11100
873
- },
874
- {
875
- "epoch": 32.56,
876
- "learning_rate": 2.9629934210526314e-05,
877
- "loss": 1.1227,
878
- "step": 11200
879
- },
880
- {
881
- "epoch": 32.85,
882
- "learning_rate": 2.913651315789473e-05,
883
- "loss": 1.1281,
884
- "step": 11300
885
- },
886
- {
887
- "epoch": 33.14,
888
- "learning_rate": 2.8643092105263154e-05,
889
- "loss": 1.1203,
890
- "step": 11400
891
- },
892
- {
893
- "epoch": 33.43,
894
- "learning_rate": 2.8149671052631574e-05,
895
- "loss": 1.1295,
896
- "step": 11500
897
- },
898
- {
899
- "epoch": 33.43,
900
- "eval_loss": 0.34605443477630615,
901
- "eval_runtime": 160.1749,
902
- "eval_samples_per_second": 28.843,
903
- "eval_steps_per_second": 3.609,
904
- "eval_wer": 0.2992106712353913,
905
- "step": 11500
906
- },
907
- {
908
- "epoch": 33.72,
909
- "learning_rate": 2.7656249999999998e-05,
910
- "loss": 1.0915,
911
- "step": 11600
912
- },
913
- {
914
- "epoch": 34.01,
915
- "learning_rate": 2.7162828947368418e-05,
916
- "loss": 1.1312,
917
- "step": 11700
918
- },
919
- {
920
- "epoch": 34.3,
921
- "learning_rate": 2.6669407894736838e-05,
922
- "loss": 1.1146,
923
- "step": 11800
924
- },
925
- {
926
- "epoch": 34.59,
927
- "learning_rate": 2.6175986842105262e-05,
928
- "loss": 1.1184,
929
- "step": 11900
930
- },
931
- {
932
- "epoch": 34.88,
933
- "learning_rate": 2.56875e-05,
934
- "loss": 1.1131,
935
- "step": 12000
936
- },
937
- {
938
- "epoch": 34.88,
939
- "eval_loss": 0.3348712623119354,
940
- "eval_runtime": 157.6811,
941
- "eval_samples_per_second": 29.3,
942
- "eval_steps_per_second": 3.666,
943
- "eval_wer": 0.29422382671480146,
944
- "step": 12000
945
- },
946
- {
947
- "epoch": 35.17,
948
- "learning_rate": 2.5194078947368418e-05,
949
- "loss": 1.1025,
950
- "step": 12100
951
- },
952
- {
953
- "epoch": 35.46,
954
- "learning_rate": 2.4700657894736838e-05,
955
- "loss": 1.1069,
956
- "step": 12200
957
- },
958
- {
959
- "epoch": 35.75,
960
- "learning_rate": 2.420723684210526e-05,
961
- "loss": 1.076,
962
- "step": 12300
963
- },
964
- {
965
- "epoch": 36.05,
966
- "learning_rate": 2.371381578947368e-05,
967
- "loss": 1.111,
968
- "step": 12400
969
- },
970
- {
971
- "epoch": 36.34,
972
- "learning_rate": 2.32203947368421e-05,
973
- "loss": 1.1015,
974
- "step": 12500
975
- },
976
- {
977
- "epoch": 36.34,
978
- "eval_loss": 0.3378337025642395,
979
- "eval_runtime": 160.9202,
980
- "eval_samples_per_second": 28.71,
981
- "eval_steps_per_second": 3.592,
982
- "eval_wer": 0.29612066328091535,
983
- "step": 12500
984
- },
985
- {
986
- "epoch": 36.63,
987
- "learning_rate": 2.2726973684210525e-05,
988
- "loss": 1.1047,
989
- "step": 12600
990
- },
991
- {
992
- "epoch": 36.92,
993
- "learning_rate": 2.2233552631578945e-05,
994
- "loss": 1.102,
995
- "step": 12700
996
- },
997
- {
998
- "epoch": 37.21,
999
- "learning_rate": 2.1740131578947365e-05,
1000
- "loss": 1.095,
1001
- "step": 12800
1002
- },
1003
- {
1004
- "epoch": 37.5,
1005
- "learning_rate": 2.124671052631579e-05,
1006
- "loss": 1.0709,
1007
- "step": 12900
1008
- },
1009
- {
1010
- "epoch": 37.79,
1011
- "learning_rate": 2.075328947368421e-05,
1012
- "loss": 1.0835,
1013
- "step": 13000
1014
- },
1015
- {
1016
- "epoch": 37.79,
1017
- "eval_loss": 0.3281959593296051,
1018
- "eval_runtime": 159.3279,
1019
- "eval_samples_per_second": 28.997,
1020
- "eval_steps_per_second": 3.628,
1021
- "eval_wer": 0.2865141038976932,
1022
- "step": 13000
1023
- },
1024
- {
1025
- "epoch": 38.08,
1026
- "learning_rate": 2.0259868421052632e-05,
1027
- "loss": 1.0846,
1028
- "step": 13100
1029
- },
1030
- {
1031
- "epoch": 38.37,
1032
- "learning_rate": 1.9766447368421053e-05,
1033
- "loss": 1.0933,
1034
- "step": 13200
1035
- },
1036
- {
1037
- "epoch": 38.66,
1038
- "learning_rate": 1.927302631578947e-05,
1039
- "loss": 1.0661,
1040
- "step": 13300
1041
- },
1042
- {
1043
- "epoch": 38.95,
1044
- "learning_rate": 1.8779605263157893e-05,
1045
- "loss": 1.0902,
1046
- "step": 13400
1047
- },
1048
- {
1049
- "epoch": 39.24,
1050
- "learning_rate": 1.8286184210526313e-05,
1051
- "loss": 1.083,
1052
- "step": 13500
1053
- },
1054
- {
1055
- "epoch": 39.24,
1056
- "eval_loss": 0.3181643486022949,
1057
- "eval_runtime": 158.3527,
1058
- "eval_samples_per_second": 29.175,
1059
- "eval_steps_per_second": 3.65,
1060
- "eval_wer": 0.2825674600746497,
1061
- "step": 13500
1062
- },
1063
- {
1064
- "epoch": 39.53,
1065
- "learning_rate": 1.7792763157894736e-05,
1066
- "loss": 1.0548,
1067
- "step": 13600
1068
- },
1069
- {
1070
- "epoch": 39.82,
1071
- "learning_rate": 1.7299342105263156e-05,
1072
- "loss": 1.0735,
1073
- "step": 13700
1074
- },
1075
- {
1076
- "epoch": 40.12,
1077
- "learning_rate": 1.6805921052631577e-05,
1078
- "loss": 1.085,
1079
- "step": 13800
1080
- },
1081
- {
1082
- "epoch": 40.41,
1083
- "learning_rate": 1.63125e-05,
1084
- "loss": 1.0531,
1085
- "step": 13900
1086
- },
1087
- {
1088
- "epoch": 40.7,
1089
- "learning_rate": 1.581907894736842e-05,
1090
- "loss": 1.0819,
1091
- "step": 14000
1092
- },
1093
- {
1094
- "epoch": 40.7,
1095
- "eval_loss": 0.32643991708755493,
1096
- "eval_runtime": 159.1099,
1097
- "eval_samples_per_second": 29.037,
1098
- "eval_steps_per_second": 3.633,
1099
- "eval_wer": 0.28504558526586304,
1100
- "step": 14000
1101
- },
1102
- {
1103
- "epoch": 40.99,
1104
- "learning_rate": 1.532565789473684e-05,
1105
- "loss": 1.0679,
1106
- "step": 14100
1107
- },
1108
- {
1109
- "epoch": 41.28,
1110
- "learning_rate": 1.4832236842105262e-05,
1111
- "loss": 1.0773,
1112
- "step": 14200
1113
- },
1114
- {
1115
- "epoch": 41.57,
1116
- "learning_rate": 1.4338815789473682e-05,
1117
- "loss": 1.0587,
1118
- "step": 14300
1119
- },
1120
- {
1121
- "epoch": 41.86,
1122
- "learning_rate": 1.3845394736842104e-05,
1123
- "loss": 1.0287,
1124
- "step": 14400
1125
- },
1126
- {
1127
- "epoch": 42.15,
1128
- "learning_rate": 1.3351973684210524e-05,
1129
- "loss": 1.072,
1130
- "step": 14500
1131
- },
1132
- {
1133
- "epoch": 42.15,
1134
- "eval_loss": 0.32792535424232483,
1135
- "eval_runtime": 155.2521,
1136
- "eval_samples_per_second": 29.758,
1137
- "eval_steps_per_second": 3.723,
1138
- "eval_wer": 0.2817108242060821,
1139
- "step": 14500
1140
- },
1141
- {
1142
- "epoch": 42.44,
1143
- "learning_rate": 1.2863486842105262e-05,
1144
- "loss": 1.0508,
1145
- "step": 14600
1146
- },
1147
- {
1148
- "epoch": 42.73,
1149
- "learning_rate": 1.2370065789473684e-05,
1150
- "loss": 1.0408,
1151
- "step": 14700
1152
- },
1153
- {
1154
- "epoch": 43.02,
1155
- "learning_rate": 1.1876644736842105e-05,
1156
- "loss": 1.0725,
1157
- "step": 14800
1158
- },
1159
- {
1160
- "epoch": 43.31,
1161
- "learning_rate": 1.1383223684210525e-05,
1162
- "loss": 1.0381,
1163
- "step": 14900
1164
- },
1165
- {
1166
- "epoch": 43.6,
1167
- "learning_rate": 1.0889802631578946e-05,
1168
- "loss": 1.0456,
1169
- "step": 15000
1170
- },
1171
- {
1172
- "epoch": 43.6,
1173
- "eval_loss": 0.323445200920105,
1174
- "eval_runtime": 137.3413,
1175
- "eval_samples_per_second": 33.639,
1176
- "eval_steps_per_second": 4.208,
1177
- "eval_wer": 0.2792632931530319,
1178
- "step": 15000
1179
- },
1180
- {
1181
- "epoch": 43.89,
1182
- "learning_rate": 1.0396381578947367e-05,
1183
- "loss": 1.0655,
1184
- "step": 15100
1185
- },
1186
- {
1187
- "epoch": 44.19,
1188
- "learning_rate": 9.90296052631579e-06,
1189
- "loss": 1.0509,
1190
- "step": 15200
1191
- },
1192
- {
1193
- "epoch": 44.48,
1194
- "learning_rate": 9.40953947368421e-06,
1195
- "loss": 1.0357,
1196
- "step": 15300
1197
- },
1198
- {
1199
- "epoch": 44.77,
1200
- "learning_rate": 8.916118421052631e-06,
1201
- "loss": 1.0467,
1202
- "step": 15400
1203
- },
1204
- {
1205
- "epoch": 45.06,
1206
- "learning_rate": 8.422697368421051e-06,
1207
- "loss": 1.0581,
1208
- "step": 15500
1209
- },
1210
- {
1211
- "epoch": 45.06,
1212
- "eval_loss": 0.32200726866722107,
1213
- "eval_runtime": 138.9222,
1214
- "eval_samples_per_second": 33.256,
1215
- "eval_steps_per_second": 4.161,
1216
- "eval_wer": 0.27791715107385423,
1217
- "step": 15500
1218
- },
1219
- {
1220
- "epoch": 45.35,
1221
- "learning_rate": 7.929276315789473e-06,
1222
- "loss": 1.0512,
1223
- "step": 15600
1224
- },
1225
- {
1226
- "epoch": 45.64,
1227
- "learning_rate": 7.435855263157894e-06,
1228
- "loss": 1.0397,
1229
- "step": 15700
1230
- },
1231
- {
1232
- "epoch": 45.93,
1233
- "learning_rate": 6.9473684210526315e-06,
1234
- "loss": 1.0455,
1235
- "step": 15800
1236
- },
1237
- {
1238
- "epoch": 46.22,
1239
- "learning_rate": 6.453947368421052e-06,
1240
- "loss": 1.0347,
1241
- "step": 15900
1242
- },
1243
- {
1244
- "epoch": 46.51,
1245
- "learning_rate": 5.9605263157894735e-06,
1246
- "loss": 1.0406,
1247
- "step": 16000
1248
- },
1249
- {
1250
- "epoch": 46.51,
1251
- "eval_loss": 0.32076749205589294,
1252
- "eval_runtime": 134.5334,
1253
- "eval_samples_per_second": 34.341,
1254
- "eval_steps_per_second": 4.296,
1255
- "eval_wer": 0.2762344734748822,
1256
- "step": 16000
1257
- }
1258
- ],
1259
- "max_steps": 17200,
1260
- "num_train_epochs": 50,
1261
- "total_flos": 4.722636215807744e+19,
1262
- "trial_name": null,
1263
- "trial_params": null
1264
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-16000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a168f126d02648193e16ba893c3e1ef9f2c2de91803928caf5e25532aff1325
3
- size 2991
 
 
 
 
checkpoint-16500/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9dc60172b032edcc9f74ac3e1b49feca378ebd1f3b02a4d7cfc53aef7447534
3
- size 559
 
 
 
 
checkpoint-16500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e93700db66c6e0b98be12ee747665bf9e5cb13cb3f69d7765f6d28a5384f3c0b
3
- size 623
 
 
 
 
checkpoint-16500/trainer_state.json DELETED
@@ -1,1303 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 47.963016678752716,
5
- "global_step": 16500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.29,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 12.2713,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 0.58,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 6.2026,
20
- "step": 200
21
- },
22
- {
23
- "epoch": 0.87,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.1374,
26
- "step": 300
27
- },
28
- {
29
- "epoch": 1.16,
30
- "learning_rate": 1.49625e-05,
31
- "loss": 3.6755,
32
- "step": 400
33
- },
34
- {
35
- "epoch": 1.45,
36
- "learning_rate": 1.8712499999999997e-05,
37
- "loss": 3.3332,
38
- "step": 500
39
- },
40
- {
41
- "epoch": 1.45,
42
- "eval_loss": 3.292029857635498,
43
- "eval_runtime": 137.2974,
44
- "eval_samples_per_second": 33.65,
45
- "eval_steps_per_second": 4.21,
46
- "eval_wer": 1.0,
47
- "step": 500
48
- },
49
- {
50
- "epoch": 1.74,
51
- "learning_rate": 2.2462499999999997e-05,
52
- "loss": 3.1418,
53
- "step": 600
54
- },
55
- {
56
- "epoch": 2.03,
57
- "learning_rate": 2.6212499999999997e-05,
58
- "loss": 3.0879,
59
- "step": 700
60
- },
61
- {
62
- "epoch": 2.32,
63
- "learning_rate": 2.99625e-05,
64
- "loss": 3.0216,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 2.61,
69
- "learning_rate": 3.37125e-05,
70
- "loss": 2.9595,
71
- "step": 900
72
- },
73
- {
74
- "epoch": 2.91,
75
- "learning_rate": 3.7462499999999996e-05,
76
- "loss": 2.9269,
77
- "step": 1000
78
- },
79
- {
80
- "epoch": 2.91,
81
- "eval_loss": 2.941540241241455,
82
- "eval_runtime": 178.4388,
83
- "eval_samples_per_second": 25.891,
84
- "eval_steps_per_second": 3.239,
85
- "eval_wer": 0.9966346448020559,
86
- "step": 1000
87
- },
88
- {
89
- "epoch": 3.2,
90
- "learning_rate": 4.12125e-05,
91
- "loss": 2.914,
92
- "step": 1100
93
- },
94
- {
95
- "epoch": 3.49,
96
- "learning_rate": 4.4962499999999995e-05,
97
- "loss": 2.8432,
98
- "step": 1200
99
- },
100
- {
101
- "epoch": 3.78,
102
- "learning_rate": 4.871249999999999e-05,
103
- "loss": 2.6828,
104
- "step": 1300
105
- },
106
- {
107
- "epoch": 4.07,
108
- "learning_rate": 5.2462499999999994e-05,
109
- "loss": 2.355,
110
- "step": 1400
111
- },
112
- {
113
- "epoch": 4.36,
114
- "learning_rate": 5.62125e-05,
115
- "loss": 2.0719,
116
- "step": 1500
117
- },
118
- {
119
- "epoch": 4.36,
120
- "eval_loss": 1.164096713066101,
121
- "eval_runtime": 134.2981,
122
- "eval_samples_per_second": 34.401,
123
- "eval_steps_per_second": 4.304,
124
- "eval_wer": 0.8507923881784251,
125
- "step": 1500
126
- },
127
- {
128
- "epoch": 4.65,
129
- "learning_rate": 5.9962499999999994e-05,
130
- "loss": 1.944,
131
- "step": 1600
132
- },
133
- {
134
- "epoch": 4.94,
135
- "learning_rate": 6.37125e-05,
136
- "loss": 1.8772,
137
- "step": 1700
138
- },
139
- {
140
- "epoch": 5.23,
141
- "learning_rate": 6.746249999999999e-05,
142
- "loss": 1.807,
143
- "step": 1800
144
- },
145
- {
146
- "epoch": 5.52,
147
- "learning_rate": 7.121249999999999e-05,
148
- "loss": 1.7612,
149
- "step": 1900
150
- },
151
- {
152
- "epoch": 5.81,
153
- "learning_rate": 7.492499999999999e-05,
154
- "loss": 1.7404,
155
- "step": 2000
156
- },
157
- {
158
- "epoch": 5.81,
159
- "eval_loss": 0.7280541658401489,
160
- "eval_runtime": 174.7873,
161
- "eval_samples_per_second": 26.432,
162
- "eval_steps_per_second": 3.307,
163
- "eval_wer": 0.6846356238144772,
164
- "step": 2000
165
- },
166
- {
167
- "epoch": 6.1,
168
- "learning_rate": 7.451644736842105e-05,
169
- "loss": 1.6883,
170
- "step": 2100
171
- },
172
- {
173
- "epoch": 6.39,
174
- "learning_rate": 7.402302631578947e-05,
175
- "loss": 1.6463,
176
- "step": 2200
177
- },
178
- {
179
- "epoch": 6.68,
180
- "learning_rate": 7.352960526315788e-05,
181
- "loss": 1.6216,
182
- "step": 2300
183
- },
184
- {
185
- "epoch": 6.97,
186
- "learning_rate": 7.30361842105263e-05,
187
- "loss": 1.586,
188
- "step": 2400
189
- },
190
- {
191
- "epoch": 7.27,
192
- "learning_rate": 7.254276315789473e-05,
193
- "loss": 1.5921,
194
- "step": 2500
195
- },
196
- {
197
- "epoch": 7.27,
198
- "eval_loss": 0.5885757803916931,
199
- "eval_runtime": 137.296,
200
- "eval_samples_per_second": 33.65,
201
- "eval_steps_per_second": 4.21,
202
- "eval_wer": 0.5146545921801383,
203
- "step": 2500
204
- },
205
- {
206
- "epoch": 7.56,
207
- "learning_rate": 7.204934210526316e-05,
208
- "loss": 1.5694,
209
- "step": 2600
210
- },
211
- {
212
- "epoch": 7.85,
213
- "learning_rate": 7.155592105263157e-05,
214
- "loss": 1.5279,
215
- "step": 2700
216
- },
217
- {
218
- "epoch": 8.14,
219
- "learning_rate": 7.10625e-05,
220
- "loss": 1.5435,
221
- "step": 2800
222
- },
223
- {
224
- "epoch": 8.43,
225
- "learning_rate": 7.056907894736841e-05,
226
- "loss": 1.5163,
227
- "step": 2900
228
- },
229
- {
230
- "epoch": 8.72,
231
- "learning_rate": 7.007565789473684e-05,
232
- "loss": 1.4941,
233
- "step": 3000
234
- },
235
- {
236
- "epoch": 8.72,
237
- "eval_loss": 0.5183178186416626,
238
- "eval_runtime": 134.3566,
239
- "eval_samples_per_second": 34.386,
240
- "eval_steps_per_second": 4.302,
241
- "eval_wer": 0.5063023924616044,
242
- "step": 3000
243
- },
244
- {
245
- "epoch": 9.01,
246
- "learning_rate": 6.958223684210525e-05,
247
- "loss": 1.5061,
248
- "step": 3100
249
- },
250
- {
251
- "epoch": 9.3,
252
- "learning_rate": 6.908881578947368e-05,
253
- "loss": 1.4551,
254
- "step": 3200
255
- },
256
- {
257
- "epoch": 9.59,
258
- "learning_rate": 6.859539473684209e-05,
259
- "loss": 1.4674,
260
- "step": 3300
261
- },
262
- {
263
- "epoch": 9.88,
264
- "learning_rate": 6.810197368421052e-05,
265
- "loss": 1.4691,
266
- "step": 3400
267
- },
268
- {
269
- "epoch": 10.17,
270
- "learning_rate": 6.760855263157895e-05,
271
- "loss": 1.4486,
272
- "step": 3500
273
- },
274
- {
275
- "epoch": 10.17,
276
- "eval_loss": 0.47492966055870056,
277
- "eval_runtime": 135.1792,
278
- "eval_samples_per_second": 34.177,
279
- "eval_steps_per_second": 4.276,
280
- "eval_wer": 0.46760080768524753,
281
- "step": 3500
282
- },
283
- {
284
- "epoch": 10.46,
285
- "learning_rate": 6.711513157894736e-05,
286
- "loss": 1.4274,
287
- "step": 3600
288
- },
289
- {
290
- "epoch": 10.75,
291
- "learning_rate": 6.66266447368421e-05,
292
- "loss": 1.4277,
293
- "step": 3700
294
- },
295
- {
296
- "epoch": 11.05,
297
- "learning_rate": 6.613322368421052e-05,
298
- "loss": 1.4445,
299
- "step": 3800
300
- },
301
- {
302
- "epoch": 11.34,
303
- "learning_rate": 6.563980263157894e-05,
304
- "loss": 1.4101,
305
- "step": 3900
306
- },
307
- {
308
- "epoch": 11.63,
309
- "learning_rate": 6.514638157894736e-05,
310
- "loss": 1.3899,
311
- "step": 4000
312
- },
313
- {
314
- "epoch": 11.63,
315
- "eval_loss": 0.4564875364303589,
316
- "eval_runtime": 134.5257,
317
- "eval_samples_per_second": 34.343,
318
- "eval_steps_per_second": 4.297,
319
- "eval_wer": 0.4432172795692345,
320
- "step": 4000
321
- },
322
- {
323
- "epoch": 11.92,
324
- "learning_rate": 6.465296052631578e-05,
325
- "loss": 1.404,
326
- "step": 4100
327
- },
328
- {
329
- "epoch": 12.21,
330
- "learning_rate": 6.415953947368421e-05,
331
- "loss": 1.3911,
332
- "step": 4200
333
- },
334
- {
335
- "epoch": 12.5,
336
- "learning_rate": 6.366611842105262e-05,
337
- "loss": 1.3873,
338
- "step": 4300
339
- },
340
- {
341
- "epoch": 12.79,
342
- "learning_rate": 6.317269736842105e-05,
343
- "loss": 1.3712,
344
- "step": 4400
345
- },
346
- {
347
- "epoch": 13.08,
348
- "learning_rate": 6.267927631578946e-05,
349
- "loss": 1.3881,
350
- "step": 4500
351
- },
352
- {
353
- "epoch": 13.08,
354
- "eval_loss": 0.43156012892723083,
355
- "eval_runtime": 134.756,
356
- "eval_samples_per_second": 34.284,
357
- "eval_steps_per_second": 4.289,
358
- "eval_wer": 0.42278039527626504,
359
- "step": 4500
360
- },
361
- {
362
- "epoch": 13.37,
363
- "learning_rate": 6.218585526315789e-05,
364
- "loss": 1.3538,
365
- "step": 4600
366
- },
367
- {
368
- "epoch": 13.66,
369
- "learning_rate": 6.16924342105263e-05,
370
- "loss": 1.355,
371
- "step": 4700
372
- },
373
- {
374
- "epoch": 13.95,
375
- "learning_rate": 6.119901315789473e-05,
376
- "loss": 1.341,
377
- "step": 4800
378
- },
379
- {
380
- "epoch": 14.24,
381
- "learning_rate": 6.070559210526316e-05,
382
- "loss": 1.3474,
383
- "step": 4900
384
- },
385
- {
386
- "epoch": 14.53,
387
- "learning_rate": 6.021217105263157e-05,
388
- "loss": 1.3572,
389
- "step": 5000
390
- },
391
- {
392
- "epoch": 14.53,
393
- "eval_loss": 0.4194825291633606,
394
- "eval_runtime": 134.9338,
395
- "eval_samples_per_second": 34.239,
396
- "eval_steps_per_second": 4.284,
397
- "eval_wer": 0.3834363335984825,
398
- "step": 5000
399
- },
400
- {
401
- "epoch": 14.82,
402
- "learning_rate": 5.971875e-05,
403
- "loss": 1.3408,
404
- "step": 5100
405
- },
406
- {
407
- "epoch": 15.12,
408
- "learning_rate": 5.922532894736842e-05,
409
- "loss": 1.3357,
410
- "step": 5200
411
- },
412
- {
413
- "epoch": 15.41,
414
- "learning_rate": 5.873190789473683e-05,
415
- "loss": 1.3288,
416
- "step": 5300
417
- },
418
- {
419
- "epoch": 15.7,
420
- "learning_rate": 5.823848684210526e-05,
421
- "loss": 1.3258,
422
- "step": 5400
423
- },
424
- {
425
- "epoch": 15.99,
426
- "learning_rate": 5.7745065789473685e-05,
427
- "loss": 1.3261,
428
- "step": 5500
429
- },
430
- {
431
- "epoch": 15.99,
432
- "eval_loss": 0.3974045217037201,
433
- "eval_runtime": 134.8844,
434
- "eval_samples_per_second": 34.252,
435
- "eval_steps_per_second": 4.285,
436
- "eval_wer": 0.3606742948051153,
437
- "step": 5500
438
- },
439
- {
440
- "epoch": 16.28,
441
- "learning_rate": 5.72516447368421e-05,
442
- "loss": 1.3273,
443
- "step": 5600
444
- },
445
- {
446
- "epoch": 16.57,
447
- "learning_rate": 5.675822368421052e-05,
448
- "loss": 1.3131,
449
- "step": 5700
450
- },
451
- {
452
- "epoch": 16.86,
453
- "learning_rate": 5.6264802631578946e-05,
454
- "loss": 1.2739,
455
- "step": 5800
456
- },
457
- {
458
- "epoch": 17.15,
459
- "learning_rate": 5.577138157894736e-05,
460
- "loss": 1.3141,
461
- "step": 5900
462
- },
463
- {
464
- "epoch": 17.44,
465
- "learning_rate": 5.5277960526315786e-05,
466
- "loss": 1.2809,
467
- "step": 6000
468
- },
469
- {
470
- "epoch": 17.44,
471
- "eval_loss": 0.3844749331474304,
472
- "eval_runtime": 136.3592,
473
- "eval_samples_per_second": 33.881,
474
- "eval_steps_per_second": 4.239,
475
- "eval_wer": 0.3466621795264027,
476
- "step": 6000
477
- },
478
- {
479
- "epoch": 17.73,
480
- "learning_rate": 5.4784539473684206e-05,
481
- "loss": 1.2847,
482
- "step": 6100
483
- },
484
- {
485
- "epoch": 18.02,
486
- "learning_rate": 5.4291118421052626e-05,
487
- "loss": 1.303,
488
- "step": 6200
489
- },
490
- {
491
- "epoch": 18.31,
492
- "learning_rate": 5.3797697368421046e-05,
493
- "loss": 1.2733,
494
- "step": 6300
495
- },
496
- {
497
- "epoch": 18.6,
498
- "learning_rate": 5.330427631578947e-05,
499
- "loss": 1.2707,
500
- "step": 6400
501
- },
502
- {
503
- "epoch": 18.89,
504
- "learning_rate": 5.2810855263157886e-05,
505
- "loss": 1.2713,
506
- "step": 6500
507
- },
508
- {
509
- "epoch": 18.89,
510
- "eval_loss": 0.3831607401371002,
511
- "eval_runtime": 138.2456,
512
- "eval_samples_per_second": 33.419,
513
- "eval_steps_per_second": 4.181,
514
- "eval_wer": 0.3449795019274307,
515
- "step": 6500
516
- },
517
- {
518
- "epoch": 19.19,
519
- "learning_rate": 5.231743421052631e-05,
520
- "loss": 1.2562,
521
- "step": 6600
522
- },
523
- {
524
- "epoch": 19.48,
525
- "learning_rate": 5.182894736842105e-05,
526
- "loss": 1.2721,
527
- "step": 6700
528
- },
529
- {
530
- "epoch": 19.77,
531
- "learning_rate": 5.133552631578947e-05,
532
- "loss": 1.2536,
533
- "step": 6800
534
- },
535
- {
536
- "epoch": 20.06,
537
- "learning_rate": 5.084210526315789e-05,
538
- "loss": 1.2503,
539
- "step": 6900
540
- },
541
- {
542
- "epoch": 20.35,
543
- "learning_rate": 5.0353618421052625e-05,
544
- "loss": 1.257,
545
- "step": 7000
546
- },
547
- {
548
- "epoch": 20.35,
549
- "eval_loss": 0.3778809607028961,
550
- "eval_runtime": 138.2403,
551
- "eval_samples_per_second": 33.42,
552
- "eval_steps_per_second": 4.181,
553
- "eval_wer": 0.33726977911032247,
554
- "step": 7000
555
- },
556
- {
557
- "epoch": 20.64,
558
- "learning_rate": 4.9860197368421045e-05,
559
- "loss": 1.2557,
560
- "step": 7100
561
- },
562
- {
563
- "epoch": 20.93,
564
- "learning_rate": 4.936677631578947e-05,
565
- "loss": 1.2397,
566
- "step": 7200
567
- },
568
- {
569
- "epoch": 21.22,
570
- "learning_rate": 4.8873355263157886e-05,
571
- "loss": 1.2527,
572
- "step": 7300
573
- },
574
- {
575
- "epoch": 21.51,
576
- "learning_rate": 4.837993421052631e-05,
577
- "loss": 1.2273,
578
- "step": 7400
579
- },
580
- {
581
- "epoch": 21.8,
582
- "learning_rate": 4.788651315789473e-05,
583
- "loss": 1.2298,
584
- "step": 7500
585
- },
586
- {
587
- "epoch": 21.8,
588
- "eval_loss": 0.3743567168712616,
589
- "eval_runtime": 136.1375,
590
- "eval_samples_per_second": 33.936,
591
- "eval_steps_per_second": 4.246,
592
- "eval_wer": 0.33913602153827327,
593
- "step": 7500
594
- },
595
- {
596
- "epoch": 22.09,
597
- "learning_rate": 4.739309210526315e-05,
598
- "loss": 1.2444,
599
- "step": 7600
600
- },
601
- {
602
- "epoch": 22.38,
603
- "learning_rate": 4.689967105263157e-05,
604
- "loss": 1.2153,
605
- "step": 7700
606
- },
607
- {
608
- "epoch": 22.67,
609
- "learning_rate": 4.640625e-05,
610
- "loss": 1.2327,
611
- "step": 7800
612
- },
613
- {
614
- "epoch": 22.96,
615
- "learning_rate": 4.591282894736841e-05,
616
- "loss": 1.2404,
617
- "step": 7900
618
- },
619
- {
620
- "epoch": 23.26,
621
- "learning_rate": 4.541940789473684e-05,
622
- "loss": 1.2173,
623
- "step": 8000
624
- },
625
- {
626
- "epoch": 23.26,
627
- "eval_loss": 0.3745496869087219,
628
- "eval_runtime": 139.3565,
629
- "eval_samples_per_second": 33.152,
630
- "eval_steps_per_second": 4.148,
631
- "eval_wer": 0.32619470109527016,
632
- "step": 8000
633
- },
634
- {
635
- "epoch": 23.55,
636
- "learning_rate": 4.492598684210526e-05,
637
- "loss": 1.222,
638
- "step": 8100
639
- },
640
- {
641
- "epoch": 23.84,
642
- "learning_rate": 4.443256578947369e-05,
643
- "loss": 1.1868,
644
- "step": 8200
645
- },
646
- {
647
- "epoch": 24.13,
648
- "learning_rate": 4.39391447368421e-05,
649
- "loss": 1.2272,
650
- "step": 8300
651
- },
652
- {
653
- "epoch": 24.42,
654
- "learning_rate": 4.344572368421052e-05,
655
- "loss": 1.2212,
656
- "step": 8400
657
- },
658
- {
659
- "epoch": 24.71,
660
- "learning_rate": 4.295230263157895e-05,
661
- "loss": 1.1966,
662
- "step": 8500
663
- },
664
- {
665
- "epoch": 24.71,
666
- "eval_loss": 0.36800575256347656,
667
- "eval_runtime": 137.1685,
668
- "eval_samples_per_second": 33.681,
669
- "eval_steps_per_second": 4.214,
670
- "eval_wer": 0.32411429970017747,
671
- "step": 8500
672
- },
673
- {
674
- "epoch": 25.0,
675
- "learning_rate": 4.245888157894736e-05,
676
- "loss": 1.2128,
677
- "step": 8600
678
- },
679
- {
680
- "epoch": 25.29,
681
- "learning_rate": 4.196546052631579e-05,
682
- "loss": 1.2118,
683
- "step": 8700
684
- },
685
- {
686
- "epoch": 25.58,
687
- "learning_rate": 4.147203947368421e-05,
688
- "loss": 1.2009,
689
- "step": 8800
690
- },
691
- {
692
- "epoch": 25.87,
693
- "learning_rate": 4.097861842105263e-05,
694
- "loss": 1.1684,
695
- "step": 8900
696
- },
697
- {
698
- "epoch": 26.16,
699
- "learning_rate": 4.048519736842105e-05,
700
- "loss": 1.1925,
701
- "step": 9000
702
- },
703
- {
704
- "epoch": 26.16,
705
- "eval_loss": 0.3604615330696106,
706
- "eval_runtime": 153.4534,
707
- "eval_samples_per_second": 30.107,
708
- "eval_steps_per_second": 3.767,
709
- "eval_wer": 0.31713883619898425,
710
- "step": 9000
711
- },
712
- {
713
- "epoch": 26.45,
714
- "learning_rate": 3.9991776315789475e-05,
715
- "loss": 1.1759,
716
- "step": 9100
717
- },
718
- {
719
- "epoch": 26.74,
720
- "learning_rate": 3.949835526315789e-05,
721
- "loss": 1.1754,
722
- "step": 9200
723
- },
724
- {
725
- "epoch": 27.03,
726
- "learning_rate": 3.9004934210526315e-05,
727
- "loss": 1.198,
728
- "step": 9300
729
- },
730
- {
731
- "epoch": 27.32,
732
- "learning_rate": 3.8511513157894735e-05,
733
- "loss": 1.1602,
734
- "step": 9400
735
- },
736
- {
737
- "epoch": 27.61,
738
- "learning_rate": 3.801809210526315e-05,
739
- "loss": 1.1692,
740
- "step": 9500
741
- },
742
- {
743
- "epoch": 27.61,
744
- "eval_loss": 0.3512294292449951,
745
- "eval_runtime": 166.9619,
746
- "eval_samples_per_second": 27.671,
747
- "eval_steps_per_second": 3.462,
748
- "eval_wer": 0.31472189928409716,
749
- "step": 9500
750
- },
751
- {
752
- "epoch": 27.91,
753
- "learning_rate": 3.7524671052631575e-05,
754
- "loss": 1.1495,
755
- "step": 9600
756
- },
757
- {
758
- "epoch": 28.2,
759
- "learning_rate": 3.7031249999999995e-05,
760
- "loss": 1.1722,
761
- "step": 9700
762
- },
763
- {
764
- "epoch": 28.49,
765
- "learning_rate": 3.6537828947368416e-05,
766
- "loss": 1.1443,
767
- "step": 9800
768
- },
769
- {
770
- "epoch": 28.78,
771
- "learning_rate": 3.6044407894736836e-05,
772
- "loss": 1.1535,
773
- "step": 9900
774
- },
775
- {
776
- "epoch": 29.07,
777
- "learning_rate": 3.555098684210526e-05,
778
- "loss": 1.1704,
779
- "step": 10000
780
- },
781
- {
782
- "epoch": 29.07,
783
- "eval_loss": 0.3532196581363678,
784
- "eval_runtime": 134.5452,
785
- "eval_samples_per_second": 34.338,
786
- "eval_steps_per_second": 4.296,
787
- "eval_wer": 0.30976564890167047,
788
- "step": 10000
789
- },
790
- {
791
- "epoch": 29.36,
792
- "learning_rate": 3.505756578947368e-05,
793
- "loss": 1.1519,
794
- "step": 10100
795
- },
796
- {
797
- "epoch": 29.65,
798
- "learning_rate": 3.45641447368421e-05,
799
- "loss": 1.1507,
800
- "step": 10200
801
- },
802
- {
803
- "epoch": 29.94,
804
- "learning_rate": 3.407072368421052e-05,
805
- "loss": 1.1517,
806
- "step": 10300
807
- },
808
- {
809
- "epoch": 30.23,
810
- "learning_rate": 3.357730263157894e-05,
811
- "loss": 1.1447,
812
- "step": 10400
813
- },
814
- {
815
- "epoch": 30.52,
816
- "learning_rate": 3.308388157894737e-05,
817
- "loss": 1.1595,
818
- "step": 10500
819
- },
820
- {
821
- "epoch": 30.52,
822
- "eval_loss": 0.34246256947517395,
823
- "eval_runtime": 159.7393,
824
- "eval_samples_per_second": 28.922,
825
- "eval_steps_per_second": 3.618,
826
- "eval_wer": 0.3038915743743499,
827
- "step": 10500
828
- },
829
- {
830
- "epoch": 30.81,
831
- "learning_rate": 3.259046052631579e-05,
832
- "loss": 1.1451,
833
- "step": 10600
834
- },
835
- {
836
- "epoch": 31.1,
837
- "learning_rate": 3.2097039473684203e-05,
838
- "loss": 1.1304,
839
- "step": 10700
840
- },
841
- {
842
- "epoch": 31.39,
843
- "learning_rate": 3.160361842105263e-05,
844
- "loss": 1.1316,
845
- "step": 10800
846
- },
847
- {
848
- "epoch": 31.68,
849
- "learning_rate": 3.111019736842105e-05,
850
- "loss": 1.1301,
851
- "step": 10900
852
- },
853
- {
854
- "epoch": 31.97,
855
- "learning_rate": 3.061677631578947e-05,
856
- "loss": 1.1433,
857
- "step": 11000
858
- },
859
- {
860
- "epoch": 31.97,
861
- "eval_loss": 0.35683709383010864,
862
- "eval_runtime": 158.7503,
863
- "eval_samples_per_second": 29.102,
864
- "eval_steps_per_second": 3.641,
865
- "eval_wer": 0.30257602643333537,
866
- "step": 11000
867
- },
868
- {
869
- "epoch": 32.27,
870
- "learning_rate": 3.0123355263157894e-05,
871
- "loss": 1.1337,
872
- "step": 11100
873
- },
874
- {
875
- "epoch": 32.56,
876
- "learning_rate": 2.9629934210526314e-05,
877
- "loss": 1.1227,
878
- "step": 11200
879
- },
880
- {
881
- "epoch": 32.85,
882
- "learning_rate": 2.913651315789473e-05,
883
- "loss": 1.1281,
884
- "step": 11300
885
- },
886
- {
887
- "epoch": 33.14,
888
- "learning_rate": 2.8643092105263154e-05,
889
- "loss": 1.1203,
890
- "step": 11400
891
- },
892
- {
893
- "epoch": 33.43,
894
- "learning_rate": 2.8149671052631574e-05,
895
- "loss": 1.1295,
896
- "step": 11500
897
- },
898
- {
899
- "epoch": 33.43,
900
- "eval_loss": 0.34605443477630615,
901
- "eval_runtime": 160.1749,
902
- "eval_samples_per_second": 28.843,
903
- "eval_steps_per_second": 3.609,
904
- "eval_wer": 0.2992106712353913,
905
- "step": 11500
906
- },
907
- {
908
- "epoch": 33.72,
909
- "learning_rate": 2.7656249999999998e-05,
910
- "loss": 1.0915,
911
- "step": 11600
912
- },
913
- {
914
- "epoch": 34.01,
915
- "learning_rate": 2.7162828947368418e-05,
916
- "loss": 1.1312,
917
- "step": 11700
918
- },
919
- {
920
- "epoch": 34.3,
921
- "learning_rate": 2.6669407894736838e-05,
922
- "loss": 1.1146,
923
- "step": 11800
924
- },
925
- {
926
- "epoch": 34.59,
927
- "learning_rate": 2.6175986842105262e-05,
928
- "loss": 1.1184,
929
- "step": 11900
930
- },
931
- {
932
- "epoch": 34.88,
933
- "learning_rate": 2.56875e-05,
934
- "loss": 1.1131,
935
- "step": 12000
936
- },
937
- {
938
- "epoch": 34.88,
939
- "eval_loss": 0.3348712623119354,
940
- "eval_runtime": 157.6811,
941
- "eval_samples_per_second": 29.3,
942
- "eval_steps_per_second": 3.666,
943
- "eval_wer": 0.29422382671480146,
944
- "step": 12000
945
- },
946
- {
947
- "epoch": 35.17,
948
- "learning_rate": 2.5194078947368418e-05,
949
- "loss": 1.1025,
950
- "step": 12100
951
- },
952
- {
953
- "epoch": 35.46,
954
- "learning_rate": 2.4700657894736838e-05,
955
- "loss": 1.1069,
956
- "step": 12200
957
- },
958
- {
959
- "epoch": 35.75,
960
- "learning_rate": 2.420723684210526e-05,
961
- "loss": 1.076,
962
- "step": 12300
963
- },
964
- {
965
- "epoch": 36.05,
966
- "learning_rate": 2.371381578947368e-05,
967
- "loss": 1.111,
968
- "step": 12400
969
- },
970
- {
971
- "epoch": 36.34,
972
- "learning_rate": 2.32203947368421e-05,
973
- "loss": 1.1015,
974
- "step": 12500
975
- },
976
- {
977
- "epoch": 36.34,
978
- "eval_loss": 0.3378337025642395,
979
- "eval_runtime": 160.9202,
980
- "eval_samples_per_second": 28.71,
981
- "eval_steps_per_second": 3.592,
982
- "eval_wer": 0.29612066328091535,
983
- "step": 12500
984
- },
985
- {
986
- "epoch": 36.63,
987
- "learning_rate": 2.2726973684210525e-05,
988
- "loss": 1.1047,
989
- "step": 12600
990
- },
991
- {
992
- "epoch": 36.92,
993
- "learning_rate": 2.2233552631578945e-05,
994
- "loss": 1.102,
995
- "step": 12700
996
- },
997
- {
998
- "epoch": 37.21,
999
- "learning_rate": 2.1740131578947365e-05,
1000
- "loss": 1.095,
1001
- "step": 12800
1002
- },
1003
- {
1004
- "epoch": 37.5,
1005
- "learning_rate": 2.124671052631579e-05,
1006
- "loss": 1.0709,
1007
- "step": 12900
1008
- },
1009
- {
1010
- "epoch": 37.79,
1011
- "learning_rate": 2.075328947368421e-05,
1012
- "loss": 1.0835,
1013
- "step": 13000
1014
- },
1015
- {
1016
- "epoch": 37.79,
1017
- "eval_loss": 0.3281959593296051,
1018
- "eval_runtime": 159.3279,
1019
- "eval_samples_per_second": 28.997,
1020
- "eval_steps_per_second": 3.628,
1021
- "eval_wer": 0.2865141038976932,
1022
- "step": 13000
1023
- },
1024
- {
1025
- "epoch": 38.08,
1026
- "learning_rate": 2.0259868421052632e-05,
1027
- "loss": 1.0846,
1028
- "step": 13100
1029
- },
1030
- {
1031
- "epoch": 38.37,
1032
- "learning_rate": 1.9766447368421053e-05,
1033
- "loss": 1.0933,
1034
- "step": 13200
1035
- },
1036
- {
1037
- "epoch": 38.66,
1038
- "learning_rate": 1.927302631578947e-05,
1039
- "loss": 1.0661,
1040
- "step": 13300
1041
- },
1042
- {
1043
- "epoch": 38.95,
1044
- "learning_rate": 1.8779605263157893e-05,
1045
- "loss": 1.0902,
1046
- "step": 13400
1047
- },
1048
- {
1049
- "epoch": 39.24,
1050
- "learning_rate": 1.8286184210526313e-05,
1051
- "loss": 1.083,
1052
- "step": 13500
1053
- },
1054
- {
1055
- "epoch": 39.24,
1056
- "eval_loss": 0.3181643486022949,
1057
- "eval_runtime": 158.3527,
1058
- "eval_samples_per_second": 29.175,
1059
- "eval_steps_per_second": 3.65,
1060
- "eval_wer": 0.2825674600746497,
1061
- "step": 13500
1062
- },
1063
- {
1064
- "epoch": 39.53,
1065
- "learning_rate": 1.7792763157894736e-05,
1066
- "loss": 1.0548,
1067
- "step": 13600
1068
- },
1069
- {
1070
- "epoch": 39.82,
1071
- "learning_rate": 1.7299342105263156e-05,
1072
- "loss": 1.0735,
1073
- "step": 13700
1074
- },
1075
- {
1076
- "epoch": 40.12,
1077
- "learning_rate": 1.6805921052631577e-05,
1078
- "loss": 1.085,
1079
- "step": 13800
1080
- },
1081
- {
1082
- "epoch": 40.41,
1083
- "learning_rate": 1.63125e-05,
1084
- "loss": 1.0531,
1085
- "step": 13900
1086
- },
1087
- {
1088
- "epoch": 40.7,
1089
- "learning_rate": 1.581907894736842e-05,
1090
- "loss": 1.0819,
1091
- "step": 14000
1092
- },
1093
- {
1094
- "epoch": 40.7,
1095
- "eval_loss": 0.32643991708755493,
1096
- "eval_runtime": 159.1099,
1097
- "eval_samples_per_second": 29.037,
1098
- "eval_steps_per_second": 3.633,
1099
- "eval_wer": 0.28504558526586304,
1100
- "step": 14000
1101
- },
1102
- {
1103
- "epoch": 40.99,
1104
- "learning_rate": 1.532565789473684e-05,
1105
- "loss": 1.0679,
1106
- "step": 14100
1107
- },
1108
- {
1109
- "epoch": 41.28,
1110
- "learning_rate": 1.4832236842105262e-05,
1111
- "loss": 1.0773,
1112
- "step": 14200
1113
- },
1114
- {
1115
- "epoch": 41.57,
1116
- "learning_rate": 1.4338815789473682e-05,
1117
- "loss": 1.0587,
1118
- "step": 14300
1119
- },
1120
- {
1121
- "epoch": 41.86,
1122
- "learning_rate": 1.3845394736842104e-05,
1123
- "loss": 1.0287,
1124
- "step": 14400
1125
- },
1126
- {
1127
- "epoch": 42.15,
1128
- "learning_rate": 1.3351973684210524e-05,
1129
- "loss": 1.072,
1130
- "step": 14500
1131
- },
1132
- {
1133
- "epoch": 42.15,
1134
- "eval_loss": 0.32792535424232483,
1135
- "eval_runtime": 155.2521,
1136
- "eval_samples_per_second": 29.758,
1137
- "eval_steps_per_second": 3.723,
1138
- "eval_wer": 0.2817108242060821,
1139
- "step": 14500
1140
- },
1141
- {
1142
- "epoch": 42.44,
1143
- "learning_rate": 1.2863486842105262e-05,
1144
- "loss": 1.0508,
1145
- "step": 14600
1146
- },
1147
- {
1148
- "epoch": 42.73,
1149
- "learning_rate": 1.2370065789473684e-05,
1150
- "loss": 1.0408,
1151
- "step": 14700
1152
- },
1153
- {
1154
- "epoch": 43.02,
1155
- "learning_rate": 1.1876644736842105e-05,
1156
- "loss": 1.0725,
1157
- "step": 14800
1158
- },
1159
- {
1160
- "epoch": 43.31,
1161
- "learning_rate": 1.1383223684210525e-05,
1162
- "loss": 1.0381,
1163
- "step": 14900
1164
- },
1165
- {
1166
- "epoch": 43.6,
1167
- "learning_rate": 1.0889802631578946e-05,
1168
- "loss": 1.0456,
1169
- "step": 15000
1170
- },
1171
- {
1172
- "epoch": 43.6,
1173
- "eval_loss": 0.323445200920105,
1174
- "eval_runtime": 137.3413,
1175
- "eval_samples_per_second": 33.639,
1176
- "eval_steps_per_second": 4.208,
1177
- "eval_wer": 0.2792632931530319,
1178
- "step": 15000
1179
- },
1180
- {
1181
- "epoch": 43.89,
1182
- "learning_rate": 1.0396381578947367e-05,
1183
- "loss": 1.0655,
1184
- "step": 15100
1185
- },
1186
- {
1187
- "epoch": 44.19,
1188
- "learning_rate": 9.90296052631579e-06,
1189
- "loss": 1.0509,
1190
- "step": 15200
1191
- },
1192
- {
1193
- "epoch": 44.48,
1194
- "learning_rate": 9.40953947368421e-06,
1195
- "loss": 1.0357,
1196
- "step": 15300
1197
- },
1198
- {
1199
- "epoch": 44.77,
1200
- "learning_rate": 8.916118421052631e-06,
1201
- "loss": 1.0467,
1202
- "step": 15400
1203
- },
1204
- {
1205
- "epoch": 45.06,
1206
- "learning_rate": 8.422697368421051e-06,
1207
- "loss": 1.0581,
1208
- "step": 15500
1209
- },
1210
- {
1211
- "epoch": 45.06,
1212
- "eval_loss": 0.32200726866722107,
1213
- "eval_runtime": 138.9222,
1214
- "eval_samples_per_second": 33.256,
1215
- "eval_steps_per_second": 4.161,
1216
- "eval_wer": 0.27791715107385423,
1217
- "step": 15500
1218
- },
1219
- {
1220
- "epoch": 45.35,
1221
- "learning_rate": 7.929276315789473e-06,
1222
- "loss": 1.0512,
1223
- "step": 15600
1224
- },
1225
- {
1226
- "epoch": 45.64,
1227
- "learning_rate": 7.435855263157894e-06,
1228
- "loss": 1.0397,
1229
- "step": 15700
1230
- },
1231
- {
1232
- "epoch": 45.93,
1233
- "learning_rate": 6.9473684210526315e-06,
1234
- "loss": 1.0455,
1235
- "step": 15800
1236
- },
1237
- {
1238
- "epoch": 46.22,
1239
- "learning_rate": 6.453947368421052e-06,
1240
- "loss": 1.0347,
1241
- "step": 15900
1242
- },
1243
- {
1244
- "epoch": 46.51,
1245
- "learning_rate": 5.9605263157894735e-06,
1246
- "loss": 1.0406,
1247
- "step": 16000
1248
- },
1249
- {
1250
- "epoch": 46.51,
1251
- "eval_loss": 0.32076749205589294,
1252
- "eval_runtime": 134.5334,
1253
- "eval_samples_per_second": 34.341,
1254
- "eval_steps_per_second": 4.296,
1255
- "eval_wer": 0.2762344734748822,
1256
- "step": 16000
1257
- },
1258
- {
1259
- "epoch": 46.8,
1260
- "learning_rate": 5.467105263157894e-06,
1261
- "loss": 1.0528,
1262
- "step": 16100
1263
- },
1264
- {
1265
- "epoch": 47.09,
1266
- "learning_rate": 4.973684210526316e-06,
1267
- "loss": 1.045,
1268
- "step": 16200
1269
- },
1270
- {
1271
- "epoch": 47.38,
1272
- "learning_rate": 4.480263157894736e-06,
1273
- "loss": 1.0129,
1274
- "step": 16300
1275
- },
1276
- {
1277
- "epoch": 47.67,
1278
- "learning_rate": 3.986842105263157e-06,
1279
- "loss": 1.0102,
1280
- "step": 16400
1281
- },
1282
- {
1283
- "epoch": 47.96,
1284
- "learning_rate": 3.4934210526315787e-06,
1285
- "loss": 1.0422,
1286
- "step": 16500
1287
- },
1288
- {
1289
- "epoch": 47.96,
1290
- "eval_loss": 0.3183736503124237,
1291
- "eval_runtime": 137.2442,
1292
- "eval_samples_per_second": 33.663,
1293
- "eval_steps_per_second": 4.211,
1294
- "eval_wer": 0.2751942727773359,
1295
- "step": 16500
1296
- }
1297
- ],
1298
- "max_steps": 17200,
1299
- "num_train_epochs": 50,
1300
- "total_flos": 4.869540507003347e+19,
1301
- "trial_name": null,
1302
- "trial_params": null
1303
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-17000/config.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.0,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 768,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.0,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "hidden_act": "gelu",
56
- "hidden_dropout": 0.0,
57
- "hidden_size": 1024,
58
- "initializer_range": 0.02,
59
- "intermediate_size": 4096,
60
- "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.0,
62
- "mask_feature_length": 64,
63
- "mask_feature_min_masks": 0,
64
- "mask_feature_prob": 0.25,
65
- "mask_time_length": 10,
66
- "mask_time_min_masks": 2,
67
- "mask_time_prob": 0.75,
68
- "model_type": "wav2vec2",
69
- "num_adapter_layers": 3,
70
- "num_attention_heads": 16,
71
- "num_codevector_groups": 2,
72
- "num_codevectors_per_group": 320,
73
- "num_conv_pos_embedding_groups": 16,
74
- "num_conv_pos_embeddings": 128,
75
- "num_feat_extract_layers": 7,
76
- "num_hidden_layers": 24,
77
- "num_negatives": 100,
78
- "output_hidden_size": 1024,
79
- "pad_token_id": 34,
80
- "proj_codevector_dim": 768,
81
- "tdnn_dilation": [
82
- 1,
83
- 2,
84
- 3,
85
- 1,
86
- 1
87
- ],
88
- "tdnn_dim": [
89
- 512,
90
- 512,
91
- 512,
92
- 512,
93
- 1500
94
- ],
95
- "tdnn_kernel": [
96
- 5,
97
- 3,
98
- 3,
99
- 1,
100
- 1
101
- ],
102
- "torch_dtype": "float32",
103
- "transformers_version": "4.16.0.dev0",
104
- "use_weighted_layer_sum": false,
105
- "vocab_size": 37,
106
- "xvector_output_dim": 512
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-17000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1daeae5d4afe881cb6c0563fb2d056998741e52f136ae2e7acaac074cf332438
3
- size 2490362385
 
 
 
 
checkpoint-17000/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-17000/trainer_state.json DELETED
@@ -1,1342 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 49.41769398114576,
5
- "global_step": 17000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.29,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 12.2713,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 0.58,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 6.2026,
20
- "step": 200
21
- },
22
- {
23
- "epoch": 0.87,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.1374,
26
- "step": 300
27
- },
28
- {
29
- "epoch": 1.16,
30
- "learning_rate": 1.49625e-05,
31
- "loss": 3.6755,
32
- "step": 400
33
- },
34
- {
35
- "epoch": 1.45,
36
- "learning_rate": 1.8712499999999997e-05,
37
- "loss": 3.3332,
38
- "step": 500
39
- },
40
- {
41
- "epoch": 1.45,
42
- "eval_loss": 3.292029857635498,
43
- "eval_runtime": 137.2974,
44
- "eval_samples_per_second": 33.65,
45
- "eval_steps_per_second": 4.21,
46
- "eval_wer": 1.0,
47
- "step": 500
48
- },
49
- {
50
- "epoch": 1.74,
51
- "learning_rate": 2.2462499999999997e-05,
52
- "loss": 3.1418,
53
- "step": 600
54
- },
55
- {
56
- "epoch": 2.03,
57
- "learning_rate": 2.6212499999999997e-05,
58
- "loss": 3.0879,
59
- "step": 700
60
- },
61
- {
62
- "epoch": 2.32,
63
- "learning_rate": 2.99625e-05,
64
- "loss": 3.0216,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 2.61,
69
- "learning_rate": 3.37125e-05,
70
- "loss": 2.9595,
71
- "step": 900
72
- },
73
- {
74
- "epoch": 2.91,
75
- "learning_rate": 3.7462499999999996e-05,
76
- "loss": 2.9269,
77
- "step": 1000
78
- },
79
- {
80
- "epoch": 2.91,
81
- "eval_loss": 2.941540241241455,
82
- "eval_runtime": 178.4388,
83
- "eval_samples_per_second": 25.891,
84
- "eval_steps_per_second": 3.239,
85
- "eval_wer": 0.9966346448020559,
86
- "step": 1000
87
- },
88
- {
89
- "epoch": 3.2,
90
- "learning_rate": 4.12125e-05,
91
- "loss": 2.914,
92
- "step": 1100
93
- },
94
- {
95
- "epoch": 3.49,
96
- "learning_rate": 4.4962499999999995e-05,
97
- "loss": 2.8432,
98
- "step": 1200
99
- },
100
- {
101
- "epoch": 3.78,
102
- "learning_rate": 4.871249999999999e-05,
103
- "loss": 2.6828,
104
- "step": 1300
105
- },
106
- {
107
- "epoch": 4.07,
108
- "learning_rate": 5.2462499999999994e-05,
109
- "loss": 2.355,
110
- "step": 1400
111
- },
112
- {
113
- "epoch": 4.36,
114
- "learning_rate": 5.62125e-05,
115
- "loss": 2.0719,
116
- "step": 1500
117
- },
118
- {
119
- "epoch": 4.36,
120
- "eval_loss": 1.164096713066101,
121
- "eval_runtime": 134.2981,
122
- "eval_samples_per_second": 34.401,
123
- "eval_steps_per_second": 4.304,
124
- "eval_wer": 0.8507923881784251,
125
- "step": 1500
126
- },
127
- {
128
- "epoch": 4.65,
129
- "learning_rate": 5.9962499999999994e-05,
130
- "loss": 1.944,
131
- "step": 1600
132
- },
133
- {
134
- "epoch": 4.94,
135
- "learning_rate": 6.37125e-05,
136
- "loss": 1.8772,
137
- "step": 1700
138
- },
139
- {
140
- "epoch": 5.23,
141
- "learning_rate": 6.746249999999999e-05,
142
- "loss": 1.807,
143
- "step": 1800
144
- },
145
- {
146
- "epoch": 5.52,
147
- "learning_rate": 7.121249999999999e-05,
148
- "loss": 1.7612,
149
- "step": 1900
150
- },
151
- {
152
- "epoch": 5.81,
153
- "learning_rate": 7.492499999999999e-05,
154
- "loss": 1.7404,
155
- "step": 2000
156
- },
157
- {
158
- "epoch": 5.81,
159
- "eval_loss": 0.7280541658401489,
160
- "eval_runtime": 174.7873,
161
- "eval_samples_per_second": 26.432,
162
- "eval_steps_per_second": 3.307,
163
- "eval_wer": 0.6846356238144772,
164
- "step": 2000
165
- },
166
- {
167
- "epoch": 6.1,
168
- "learning_rate": 7.451644736842105e-05,
169
- "loss": 1.6883,
170
- "step": 2100
171
- },
172
- {
173
- "epoch": 6.39,
174
- "learning_rate": 7.402302631578947e-05,
175
- "loss": 1.6463,
176
- "step": 2200
177
- },
178
- {
179
- "epoch": 6.68,
180
- "learning_rate": 7.352960526315788e-05,
181
- "loss": 1.6216,
182
- "step": 2300
183
- },
184
- {
185
- "epoch": 6.97,
186
- "learning_rate": 7.30361842105263e-05,
187
- "loss": 1.586,
188
- "step": 2400
189
- },
190
- {
191
- "epoch": 7.27,
192
- "learning_rate": 7.254276315789473e-05,
193
- "loss": 1.5921,
194
- "step": 2500
195
- },
196
- {
197
- "epoch": 7.27,
198
- "eval_loss": 0.5885757803916931,
199
- "eval_runtime": 137.296,
200
- "eval_samples_per_second": 33.65,
201
- "eval_steps_per_second": 4.21,
202
- "eval_wer": 0.5146545921801383,
203
- "step": 2500
204
- },
205
- {
206
- "epoch": 7.56,
207
- "learning_rate": 7.204934210526316e-05,
208
- "loss": 1.5694,
209
- "step": 2600
210
- },
211
- {
212
- "epoch": 7.85,
213
- "learning_rate": 7.155592105263157e-05,
214
- "loss": 1.5279,
215
- "step": 2700
216
- },
217
- {
218
- "epoch": 8.14,
219
- "learning_rate": 7.10625e-05,
220
- "loss": 1.5435,
221
- "step": 2800
222
- },
223
- {
224
- "epoch": 8.43,
225
- "learning_rate": 7.056907894736841e-05,
226
- "loss": 1.5163,
227
- "step": 2900
228
- },
229
- {
230
- "epoch": 8.72,
231
- "learning_rate": 7.007565789473684e-05,
232
- "loss": 1.4941,
233
- "step": 3000
234
- },
235
- {
236
- "epoch": 8.72,
237
- "eval_loss": 0.5183178186416626,
238
- "eval_runtime": 134.3566,
239
- "eval_samples_per_second": 34.386,
240
- "eval_steps_per_second": 4.302,
241
- "eval_wer": 0.5063023924616044,
242
- "step": 3000
243
- },
244
- {
245
- "epoch": 9.01,
246
- "learning_rate": 6.958223684210525e-05,
247
- "loss": 1.5061,
248
- "step": 3100
249
- },
250
- {
251
- "epoch": 9.3,
252
- "learning_rate": 6.908881578947368e-05,
253
- "loss": 1.4551,
254
- "step": 3200
255
- },
256
- {
257
- "epoch": 9.59,
258
- "learning_rate": 6.859539473684209e-05,
259
- "loss": 1.4674,
260
- "step": 3300
261
- },
262
- {
263
- "epoch": 9.88,
264
- "learning_rate": 6.810197368421052e-05,
265
- "loss": 1.4691,
266
- "step": 3400
267
- },
268
- {
269
- "epoch": 10.17,
270
- "learning_rate": 6.760855263157895e-05,
271
- "loss": 1.4486,
272
- "step": 3500
273
- },
274
- {
275
- "epoch": 10.17,
276
- "eval_loss": 0.47492966055870056,
277
- "eval_runtime": 135.1792,
278
- "eval_samples_per_second": 34.177,
279
- "eval_steps_per_second": 4.276,
280
- "eval_wer": 0.46760080768524753,
281
- "step": 3500
282
- },
283
- {
284
- "epoch": 10.46,
285
- "learning_rate": 6.711513157894736e-05,
286
- "loss": 1.4274,
287
- "step": 3600
288
- },
289
- {
290
- "epoch": 10.75,
291
- "learning_rate": 6.66266447368421e-05,
292
- "loss": 1.4277,
293
- "step": 3700
294
- },
295
- {
296
- "epoch": 11.05,
297
- "learning_rate": 6.613322368421052e-05,
298
- "loss": 1.4445,
299
- "step": 3800
300
- },
301
- {
302
- "epoch": 11.34,
303
- "learning_rate": 6.563980263157894e-05,
304
- "loss": 1.4101,
305
- "step": 3900
306
- },
307
- {
308
- "epoch": 11.63,
309
- "learning_rate": 6.514638157894736e-05,
310
- "loss": 1.3899,
311
- "step": 4000
312
- },
313
- {
314
- "epoch": 11.63,
315
- "eval_loss": 0.4564875364303589,
316
- "eval_runtime": 134.5257,
317
- "eval_samples_per_second": 34.343,
318
- "eval_steps_per_second": 4.297,
319
- "eval_wer": 0.4432172795692345,
320
- "step": 4000
321
- },
322
- {
323
- "epoch": 11.92,
324
- "learning_rate": 6.465296052631578e-05,
325
- "loss": 1.404,
326
- "step": 4100
327
- },
328
- {
329
- "epoch": 12.21,
330
- "learning_rate": 6.415953947368421e-05,
331
- "loss": 1.3911,
332
- "step": 4200
333
- },
334
- {
335
- "epoch": 12.5,
336
- "learning_rate": 6.366611842105262e-05,
337
- "loss": 1.3873,
338
- "step": 4300
339
- },
340
- {
341
- "epoch": 12.79,
342
- "learning_rate": 6.317269736842105e-05,
343
- "loss": 1.3712,
344
- "step": 4400
345
- },
346
- {
347
- "epoch": 13.08,
348
- "learning_rate": 6.267927631578946e-05,
349
- "loss": 1.3881,
350
- "step": 4500
351
- },
352
- {
353
- "epoch": 13.08,
354
- "eval_loss": 0.43156012892723083,
355
- "eval_runtime": 134.756,
356
- "eval_samples_per_second": 34.284,
357
- "eval_steps_per_second": 4.289,
358
- "eval_wer": 0.42278039527626504,
359
- "step": 4500
360
- },
361
- {
362
- "epoch": 13.37,
363
- "learning_rate": 6.218585526315789e-05,
364
- "loss": 1.3538,
365
- "step": 4600
366
- },
367
- {
368
- "epoch": 13.66,
369
- "learning_rate": 6.16924342105263e-05,
370
- "loss": 1.355,
371
- "step": 4700
372
- },
373
- {
374
- "epoch": 13.95,
375
- "learning_rate": 6.119901315789473e-05,
376
- "loss": 1.341,
377
- "step": 4800
378
- },
379
- {
380
- "epoch": 14.24,
381
- "learning_rate": 6.070559210526316e-05,
382
- "loss": 1.3474,
383
- "step": 4900
384
- },
385
- {
386
- "epoch": 14.53,
387
- "learning_rate": 6.021217105263157e-05,
388
- "loss": 1.3572,
389
- "step": 5000
390
- },
391
- {
392
- "epoch": 14.53,
393
- "eval_loss": 0.4194825291633606,
394
- "eval_runtime": 134.9338,
395
- "eval_samples_per_second": 34.239,
396
- "eval_steps_per_second": 4.284,
397
- "eval_wer": 0.3834363335984825,
398
- "step": 5000
399
- },
400
- {
401
- "epoch": 14.82,
402
- "learning_rate": 5.971875e-05,
403
- "loss": 1.3408,
404
- "step": 5100
405
- },
406
- {
407
- "epoch": 15.12,
408
- "learning_rate": 5.922532894736842e-05,
409
- "loss": 1.3357,
410
- "step": 5200
411
- },
412
- {
413
- "epoch": 15.41,
414
- "learning_rate": 5.873190789473683e-05,
415
- "loss": 1.3288,
416
- "step": 5300
417
- },
418
- {
419
- "epoch": 15.7,
420
- "learning_rate": 5.823848684210526e-05,
421
- "loss": 1.3258,
422
- "step": 5400
423
- },
424
- {
425
- "epoch": 15.99,
426
- "learning_rate": 5.7745065789473685e-05,
427
- "loss": 1.3261,
428
- "step": 5500
429
- },
430
- {
431
- "epoch": 15.99,
432
- "eval_loss": 0.3974045217037201,
433
- "eval_runtime": 134.8844,
434
- "eval_samples_per_second": 34.252,
435
- "eval_steps_per_second": 4.285,
436
- "eval_wer": 0.3606742948051153,
437
- "step": 5500
438
- },
439
- {
440
- "epoch": 16.28,
441
- "learning_rate": 5.72516447368421e-05,
442
- "loss": 1.3273,
443
- "step": 5600
444
- },
445
- {
446
- "epoch": 16.57,
447
- "learning_rate": 5.675822368421052e-05,
448
- "loss": 1.3131,
449
- "step": 5700
450
- },
451
- {
452
- "epoch": 16.86,
453
- "learning_rate": 5.6264802631578946e-05,
454
- "loss": 1.2739,
455
- "step": 5800
456
- },
457
- {
458
- "epoch": 17.15,
459
- "learning_rate": 5.577138157894736e-05,
460
- "loss": 1.3141,
461
- "step": 5900
462
- },
463
- {
464
- "epoch": 17.44,
465
- "learning_rate": 5.5277960526315786e-05,
466
- "loss": 1.2809,
467
- "step": 6000
468
- },
469
- {
470
- "epoch": 17.44,
471
- "eval_loss": 0.3844749331474304,
472
- "eval_runtime": 136.3592,
473
- "eval_samples_per_second": 33.881,
474
- "eval_steps_per_second": 4.239,
475
- "eval_wer": 0.3466621795264027,
476
- "step": 6000
477
- },
478
- {
479
- "epoch": 17.73,
480
- "learning_rate": 5.4784539473684206e-05,
481
- "loss": 1.2847,
482
- "step": 6100
483
- },
484
- {
485
- "epoch": 18.02,
486
- "learning_rate": 5.4291118421052626e-05,
487
- "loss": 1.303,
488
- "step": 6200
489
- },
490
- {
491
- "epoch": 18.31,
492
- "learning_rate": 5.3797697368421046e-05,
493
- "loss": 1.2733,
494
- "step": 6300
495
- },
496
- {
497
- "epoch": 18.6,
498
- "learning_rate": 5.330427631578947e-05,
499
- "loss": 1.2707,
500
- "step": 6400
501
- },
502
- {
503
- "epoch": 18.89,
504
- "learning_rate": 5.2810855263157886e-05,
505
- "loss": 1.2713,
506
- "step": 6500
507
- },
508
- {
509
- "epoch": 18.89,
510
- "eval_loss": 0.3831607401371002,
511
- "eval_runtime": 138.2456,
512
- "eval_samples_per_second": 33.419,
513
- "eval_steps_per_second": 4.181,
514
- "eval_wer": 0.3449795019274307,
515
- "step": 6500
516
- },
517
- {
518
- "epoch": 19.19,
519
- "learning_rate": 5.231743421052631e-05,
520
- "loss": 1.2562,
521
- "step": 6600
522
- },
523
- {
524
- "epoch": 19.48,
525
- "learning_rate": 5.182894736842105e-05,
526
- "loss": 1.2721,
527
- "step": 6700
528
- },
529
- {
530
- "epoch": 19.77,
531
- "learning_rate": 5.133552631578947e-05,
532
- "loss": 1.2536,
533
- "step": 6800
534
- },
535
- {
536
- "epoch": 20.06,
537
- "learning_rate": 5.084210526315789e-05,
538
- "loss": 1.2503,
539
- "step": 6900
540
- },
541
- {
542
- "epoch": 20.35,
543
- "learning_rate": 5.0353618421052625e-05,
544
- "loss": 1.257,
545
- "step": 7000
546
- },
547
- {
548
- "epoch": 20.35,
549
- "eval_loss": 0.3778809607028961,
550
- "eval_runtime": 138.2403,
551
- "eval_samples_per_second": 33.42,
552
- "eval_steps_per_second": 4.181,
553
- "eval_wer": 0.33726977911032247,
554
- "step": 7000
555
- },
556
- {
557
- "epoch": 20.64,
558
- "learning_rate": 4.9860197368421045e-05,
559
- "loss": 1.2557,
560
- "step": 7100
561
- },
562
- {
563
- "epoch": 20.93,
564
- "learning_rate": 4.936677631578947e-05,
565
- "loss": 1.2397,
566
- "step": 7200
567
- },
568
- {
569
- "epoch": 21.22,
570
- "learning_rate": 4.8873355263157886e-05,
571
- "loss": 1.2527,
572
- "step": 7300
573
- },
574
- {
575
- "epoch": 21.51,
576
- "learning_rate": 4.837993421052631e-05,
577
- "loss": 1.2273,
578
- "step": 7400
579
- },
580
- {
581
- "epoch": 21.8,
582
- "learning_rate": 4.788651315789473e-05,
583
- "loss": 1.2298,
584
- "step": 7500
585
- },
586
- {
587
- "epoch": 21.8,
588
- "eval_loss": 0.3743567168712616,
589
- "eval_runtime": 136.1375,
590
- "eval_samples_per_second": 33.936,
591
- "eval_steps_per_second": 4.246,
592
- "eval_wer": 0.33913602153827327,
593
- "step": 7500
594
- },
595
- {
596
- "epoch": 22.09,
597
- "learning_rate": 4.739309210526315e-05,
598
- "loss": 1.2444,
599
- "step": 7600
600
- },
601
- {
602
- "epoch": 22.38,
603
- "learning_rate": 4.689967105263157e-05,
604
- "loss": 1.2153,
605
- "step": 7700
606
- },
607
- {
608
- "epoch": 22.67,
609
- "learning_rate": 4.640625e-05,
610
- "loss": 1.2327,
611
- "step": 7800
612
- },
613
- {
614
- "epoch": 22.96,
615
- "learning_rate": 4.591282894736841e-05,
616
- "loss": 1.2404,
617
- "step": 7900
618
- },
619
- {
620
- "epoch": 23.26,
621
- "learning_rate": 4.541940789473684e-05,
622
- "loss": 1.2173,
623
- "step": 8000
624
- },
625
- {
626
- "epoch": 23.26,
627
- "eval_loss": 0.3745496869087219,
628
- "eval_runtime": 139.3565,
629
- "eval_samples_per_second": 33.152,
630
- "eval_steps_per_second": 4.148,
631
- "eval_wer": 0.32619470109527016,
632
- "step": 8000
633
- },
634
- {
635
- "epoch": 23.55,
636
- "learning_rate": 4.492598684210526e-05,
637
- "loss": 1.222,
638
- "step": 8100
639
- },
640
- {
641
- "epoch": 23.84,
642
- "learning_rate": 4.443256578947369e-05,
643
- "loss": 1.1868,
644
- "step": 8200
645
- },
646
- {
647
- "epoch": 24.13,
648
- "learning_rate": 4.39391447368421e-05,
649
- "loss": 1.2272,
650
- "step": 8300
651
- },
652
- {
653
- "epoch": 24.42,
654
- "learning_rate": 4.344572368421052e-05,
655
- "loss": 1.2212,
656
- "step": 8400
657
- },
658
- {
659
- "epoch": 24.71,
660
- "learning_rate": 4.295230263157895e-05,
661
- "loss": 1.1966,
662
- "step": 8500
663
- },
664
- {
665
- "epoch": 24.71,
666
- "eval_loss": 0.36800575256347656,
667
- "eval_runtime": 137.1685,
668
- "eval_samples_per_second": 33.681,
669
- "eval_steps_per_second": 4.214,
670
- "eval_wer": 0.32411429970017747,
671
- "step": 8500
672
- },
673
- {
674
- "epoch": 25.0,
675
- "learning_rate": 4.245888157894736e-05,
676
- "loss": 1.2128,
677
- "step": 8600
678
- },
679
- {
680
- "epoch": 25.29,
681
- "learning_rate": 4.196546052631579e-05,
682
- "loss": 1.2118,
683
- "step": 8700
684
- },
685
- {
686
- "epoch": 25.58,
687
- "learning_rate": 4.147203947368421e-05,
688
- "loss": 1.2009,
689
- "step": 8800
690
- },
691
- {
692
- "epoch": 25.87,
693
- "learning_rate": 4.097861842105263e-05,
694
- "loss": 1.1684,
695
- "step": 8900
696
- },
697
- {
698
- "epoch": 26.16,
699
- "learning_rate": 4.048519736842105e-05,
700
- "loss": 1.1925,
701
- "step": 9000
702
- },
703
- {
704
- "epoch": 26.16,
705
- "eval_loss": 0.3604615330696106,
706
- "eval_runtime": 153.4534,
707
- "eval_samples_per_second": 30.107,
708
- "eval_steps_per_second": 3.767,
709
- "eval_wer": 0.31713883619898425,
710
- "step": 9000
711
- },
712
- {
713
- "epoch": 26.45,
714
- "learning_rate": 3.9991776315789475e-05,
715
- "loss": 1.1759,
716
- "step": 9100
717
- },
718
- {
719
- "epoch": 26.74,
720
- "learning_rate": 3.949835526315789e-05,
721
- "loss": 1.1754,
722
- "step": 9200
723
- },
724
- {
725
- "epoch": 27.03,
726
- "learning_rate": 3.9004934210526315e-05,
727
- "loss": 1.198,
728
- "step": 9300
729
- },
730
- {
731
- "epoch": 27.32,
732
- "learning_rate": 3.8511513157894735e-05,
733
- "loss": 1.1602,
734
- "step": 9400
735
- },
736
- {
737
- "epoch": 27.61,
738
- "learning_rate": 3.801809210526315e-05,
739
- "loss": 1.1692,
740
- "step": 9500
741
- },
742
- {
743
- "epoch": 27.61,
744
- "eval_loss": 0.3512294292449951,
745
- "eval_runtime": 166.9619,
746
- "eval_samples_per_second": 27.671,
747
- "eval_steps_per_second": 3.462,
748
- "eval_wer": 0.31472189928409716,
749
- "step": 9500
750
- },
751
- {
752
- "epoch": 27.91,
753
- "learning_rate": 3.7524671052631575e-05,
754
- "loss": 1.1495,
755
- "step": 9600
756
- },
757
- {
758
- "epoch": 28.2,
759
- "learning_rate": 3.7031249999999995e-05,
760
- "loss": 1.1722,
761
- "step": 9700
762
- },
763
- {
764
- "epoch": 28.49,
765
- "learning_rate": 3.6537828947368416e-05,
766
- "loss": 1.1443,
767
- "step": 9800
768
- },
769
- {
770
- "epoch": 28.78,
771
- "learning_rate": 3.6044407894736836e-05,
772
- "loss": 1.1535,
773
- "step": 9900
774
- },
775
- {
776
- "epoch": 29.07,
777
- "learning_rate": 3.555098684210526e-05,
778
- "loss": 1.1704,
779
- "step": 10000
780
- },
781
- {
782
- "epoch": 29.07,
783
- "eval_loss": 0.3532196581363678,
784
- "eval_runtime": 134.5452,
785
- "eval_samples_per_second": 34.338,
786
- "eval_steps_per_second": 4.296,
787
- "eval_wer": 0.30976564890167047,
788
- "step": 10000
789
- },
790
- {
791
- "epoch": 29.36,
792
- "learning_rate": 3.505756578947368e-05,
793
- "loss": 1.1519,
794
- "step": 10100
795
- },
796
- {
797
- "epoch": 29.65,
798
- "learning_rate": 3.45641447368421e-05,
799
- "loss": 1.1507,
800
- "step": 10200
801
- },
802
- {
803
- "epoch": 29.94,
804
- "learning_rate": 3.407072368421052e-05,
805
- "loss": 1.1517,
806
- "step": 10300
807
- },
808
- {
809
- "epoch": 30.23,
810
- "learning_rate": 3.357730263157894e-05,
811
- "loss": 1.1447,
812
- "step": 10400
813
- },
814
- {
815
- "epoch": 30.52,
816
- "learning_rate": 3.308388157894737e-05,
817
- "loss": 1.1595,
818
- "step": 10500
819
- },
820
- {
821
- "epoch": 30.52,
822
- "eval_loss": 0.34246256947517395,
823
- "eval_runtime": 159.7393,
824
- "eval_samples_per_second": 28.922,
825
- "eval_steps_per_second": 3.618,
826
- "eval_wer": 0.3038915743743499,
827
- "step": 10500
828
- },
829
- {
830
- "epoch": 30.81,
831
- "learning_rate": 3.259046052631579e-05,
832
- "loss": 1.1451,
833
- "step": 10600
834
- },
835
- {
836
- "epoch": 31.1,
837
- "learning_rate": 3.2097039473684203e-05,
838
- "loss": 1.1304,
839
- "step": 10700
840
- },
841
- {
842
- "epoch": 31.39,
843
- "learning_rate": 3.160361842105263e-05,
844
- "loss": 1.1316,
845
- "step": 10800
846
- },
847
- {
848
- "epoch": 31.68,
849
- "learning_rate": 3.111019736842105e-05,
850
- "loss": 1.1301,
851
- "step": 10900
852
- },
853
- {
854
- "epoch": 31.97,
855
- "learning_rate": 3.061677631578947e-05,
856
- "loss": 1.1433,
857
- "step": 11000
858
- },
859
- {
860
- "epoch": 31.97,
861
- "eval_loss": 0.35683709383010864,
862
- "eval_runtime": 158.7503,
863
- "eval_samples_per_second": 29.102,
864
- "eval_steps_per_second": 3.641,
865
- "eval_wer": 0.30257602643333537,
866
- "step": 11000
867
- },
868
- {
869
- "epoch": 32.27,
870
- "learning_rate": 3.0123355263157894e-05,
871
- "loss": 1.1337,
872
- "step": 11100
873
- },
874
- {
875
- "epoch": 32.56,
876
- "learning_rate": 2.9629934210526314e-05,
877
- "loss": 1.1227,
878
- "step": 11200
879
- },
880
- {
881
- "epoch": 32.85,
882
- "learning_rate": 2.913651315789473e-05,
883
- "loss": 1.1281,
884
- "step": 11300
885
- },
886
- {
887
- "epoch": 33.14,
888
- "learning_rate": 2.8643092105263154e-05,
889
- "loss": 1.1203,
890
- "step": 11400
891
- },
892
- {
893
- "epoch": 33.43,
894
- "learning_rate": 2.8149671052631574e-05,
895
- "loss": 1.1295,
896
- "step": 11500
897
- },
898
- {
899
- "epoch": 33.43,
900
- "eval_loss": 0.34605443477630615,
901
- "eval_runtime": 160.1749,
902
- "eval_samples_per_second": 28.843,
903
- "eval_steps_per_second": 3.609,
904
- "eval_wer": 0.2992106712353913,
905
- "step": 11500
906
- },
907
- {
908
- "epoch": 33.72,
909
- "learning_rate": 2.7656249999999998e-05,
910
- "loss": 1.0915,
911
- "step": 11600
912
- },
913
- {
914
- "epoch": 34.01,
915
- "learning_rate": 2.7162828947368418e-05,
916
- "loss": 1.1312,
917
- "step": 11700
918
- },
919
- {
920
- "epoch": 34.3,
921
- "learning_rate": 2.6669407894736838e-05,
922
- "loss": 1.1146,
923
- "step": 11800
924
- },
925
- {
926
- "epoch": 34.59,
927
- "learning_rate": 2.6175986842105262e-05,
928
- "loss": 1.1184,
929
- "step": 11900
930
- },
931
- {
932
- "epoch": 34.88,
933
- "learning_rate": 2.56875e-05,
934
- "loss": 1.1131,
935
- "step": 12000
936
- },
937
- {
938
- "epoch": 34.88,
939
- "eval_loss": 0.3348712623119354,
940
- "eval_runtime": 157.6811,
941
- "eval_samples_per_second": 29.3,
942
- "eval_steps_per_second": 3.666,
943
- "eval_wer": 0.29422382671480146,
944
- "step": 12000
945
- },
946
- {
947
- "epoch": 35.17,
948
- "learning_rate": 2.5194078947368418e-05,
949
- "loss": 1.1025,
950
- "step": 12100
951
- },
952
- {
953
- "epoch": 35.46,
954
- "learning_rate": 2.4700657894736838e-05,
955
- "loss": 1.1069,
956
- "step": 12200
957
- },
958
- {
959
- "epoch": 35.75,
960
- "learning_rate": 2.420723684210526e-05,
961
- "loss": 1.076,
962
- "step": 12300
963
- },
964
- {
965
- "epoch": 36.05,
966
- "learning_rate": 2.371381578947368e-05,
967
- "loss": 1.111,
968
- "step": 12400
969
- },
970
- {
971
- "epoch": 36.34,
972
- "learning_rate": 2.32203947368421e-05,
973
- "loss": 1.1015,
974
- "step": 12500
975
- },
976
- {
977
- "epoch": 36.34,
978
- "eval_loss": 0.3378337025642395,
979
- "eval_runtime": 160.9202,
980
- "eval_samples_per_second": 28.71,
981
- "eval_steps_per_second": 3.592,
982
- "eval_wer": 0.29612066328091535,
983
- "step": 12500
984
- },
985
- {
986
- "epoch": 36.63,
987
- "learning_rate": 2.2726973684210525e-05,
988
- "loss": 1.1047,
989
- "step": 12600
990
- },
991
- {
992
- "epoch": 36.92,
993
- "learning_rate": 2.2233552631578945e-05,
994
- "loss": 1.102,
995
- "step": 12700
996
- },
997
- {
998
- "epoch": 37.21,
999
- "learning_rate": 2.1740131578947365e-05,
1000
- "loss": 1.095,
1001
- "step": 12800
1002
- },
1003
- {
1004
- "epoch": 37.5,
1005
- "learning_rate": 2.124671052631579e-05,
1006
- "loss": 1.0709,
1007
- "step": 12900
1008
- },
1009
- {
1010
- "epoch": 37.79,
1011
- "learning_rate": 2.075328947368421e-05,
1012
- "loss": 1.0835,
1013
- "step": 13000
1014
- },
1015
- {
1016
- "epoch": 37.79,
1017
- "eval_loss": 0.3281959593296051,
1018
- "eval_runtime": 159.3279,
1019
- "eval_samples_per_second": 28.997,
1020
- "eval_steps_per_second": 3.628,
1021
- "eval_wer": 0.2865141038976932,
1022
- "step": 13000
1023
- },
1024
- {
1025
- "epoch": 38.08,
1026
- "learning_rate": 2.0259868421052632e-05,
1027
- "loss": 1.0846,
1028
- "step": 13100
1029
- },
1030
- {
1031
- "epoch": 38.37,
1032
- "learning_rate": 1.9766447368421053e-05,
1033
- "loss": 1.0933,
1034
- "step": 13200
1035
- },
1036
- {
1037
- "epoch": 38.66,
1038
- "learning_rate": 1.927302631578947e-05,
1039
- "loss": 1.0661,
1040
- "step": 13300
1041
- },
1042
- {
1043
- "epoch": 38.95,
1044
- "learning_rate": 1.8779605263157893e-05,
1045
- "loss": 1.0902,
1046
- "step": 13400
1047
- },
1048
- {
1049
- "epoch": 39.24,
1050
- "learning_rate": 1.8286184210526313e-05,
1051
- "loss": 1.083,
1052
- "step": 13500
1053
- },
1054
- {
1055
- "epoch": 39.24,
1056
- "eval_loss": 0.3181643486022949,
1057
- "eval_runtime": 158.3527,
1058
- "eval_samples_per_second": 29.175,
1059
- "eval_steps_per_second": 3.65,
1060
- "eval_wer": 0.2825674600746497,
1061
- "step": 13500
1062
- },
1063
- {
1064
- "epoch": 39.53,
1065
- "learning_rate": 1.7792763157894736e-05,
1066
- "loss": 1.0548,
1067
- "step": 13600
1068
- },
1069
- {
1070
- "epoch": 39.82,
1071
- "learning_rate": 1.7299342105263156e-05,
1072
- "loss": 1.0735,
1073
- "step": 13700
1074
- },
1075
- {
1076
- "epoch": 40.12,
1077
- "learning_rate": 1.6805921052631577e-05,
1078
- "loss": 1.085,
1079
- "step": 13800
1080
- },
1081
- {
1082
- "epoch": 40.41,
1083
- "learning_rate": 1.63125e-05,
1084
- "loss": 1.0531,
1085
- "step": 13900
1086
- },
1087
- {
1088
- "epoch": 40.7,
1089
- "learning_rate": 1.581907894736842e-05,
1090
- "loss": 1.0819,
1091
- "step": 14000
1092
- },
1093
- {
1094
- "epoch": 40.7,
1095
- "eval_loss": 0.32643991708755493,
1096
- "eval_runtime": 159.1099,
1097
- "eval_samples_per_second": 29.037,
1098
- "eval_steps_per_second": 3.633,
1099
- "eval_wer": 0.28504558526586304,
1100
- "step": 14000
1101
- },
1102
- {
1103
- "epoch": 40.99,
1104
- "learning_rate": 1.532565789473684e-05,
1105
- "loss": 1.0679,
1106
- "step": 14100
1107
- },
1108
- {
1109
- "epoch": 41.28,
1110
- "learning_rate": 1.4832236842105262e-05,
1111
- "loss": 1.0773,
1112
- "step": 14200
1113
- },
1114
- {
1115
- "epoch": 41.57,
1116
- "learning_rate": 1.4338815789473682e-05,
1117
- "loss": 1.0587,
1118
- "step": 14300
1119
- },
1120
- {
1121
- "epoch": 41.86,
1122
- "learning_rate": 1.3845394736842104e-05,
1123
- "loss": 1.0287,
1124
- "step": 14400
1125
- },
1126
- {
1127
- "epoch": 42.15,
1128
- "learning_rate": 1.3351973684210524e-05,
1129
- "loss": 1.072,
1130
- "step": 14500
1131
- },
1132
- {
1133
- "epoch": 42.15,
1134
- "eval_loss": 0.32792535424232483,
1135
- "eval_runtime": 155.2521,
1136
- "eval_samples_per_second": 29.758,
1137
- "eval_steps_per_second": 3.723,
1138
- "eval_wer": 0.2817108242060821,
1139
- "step": 14500
1140
- },
1141
- {
1142
- "epoch": 42.44,
1143
- "learning_rate": 1.2863486842105262e-05,
1144
- "loss": 1.0508,
1145
- "step": 14600
1146
- },
1147
- {
1148
- "epoch": 42.73,
1149
- "learning_rate": 1.2370065789473684e-05,
1150
- "loss": 1.0408,
1151
- "step": 14700
1152
- },
1153
- {
1154
- "epoch": 43.02,
1155
- "learning_rate": 1.1876644736842105e-05,
1156
- "loss": 1.0725,
1157
- "step": 14800
1158
- },
1159
- {
1160
- "epoch": 43.31,
1161
- "learning_rate": 1.1383223684210525e-05,
1162
- "loss": 1.0381,
1163
- "step": 14900
1164
- },
1165
- {
1166
- "epoch": 43.6,
1167
- "learning_rate": 1.0889802631578946e-05,
1168
- "loss": 1.0456,
1169
- "step": 15000
1170
- },
1171
- {
1172
- "epoch": 43.6,
1173
- "eval_loss": 0.323445200920105,
1174
- "eval_runtime": 137.3413,
1175
- "eval_samples_per_second": 33.639,
1176
- "eval_steps_per_second": 4.208,
1177
- "eval_wer": 0.2792632931530319,
1178
- "step": 15000
1179
- },
1180
- {
1181
- "epoch": 43.89,
1182
- "learning_rate": 1.0396381578947367e-05,
1183
- "loss": 1.0655,
1184
- "step": 15100
1185
- },
1186
- {
1187
- "epoch": 44.19,
1188
- "learning_rate": 9.90296052631579e-06,
1189
- "loss": 1.0509,
1190
- "step": 15200
1191
- },
1192
- {
1193
- "epoch": 44.48,
1194
- "learning_rate": 9.40953947368421e-06,
1195
- "loss": 1.0357,
1196
- "step": 15300
1197
- },
1198
- {
1199
- "epoch": 44.77,
1200
- "learning_rate": 8.916118421052631e-06,
1201
- "loss": 1.0467,
1202
- "step": 15400
1203
- },
1204
- {
1205
- "epoch": 45.06,
1206
- "learning_rate": 8.422697368421051e-06,
1207
- "loss": 1.0581,
1208
- "step": 15500
1209
- },
1210
- {
1211
- "epoch": 45.06,
1212
- "eval_loss": 0.32200726866722107,
1213
- "eval_runtime": 138.9222,
1214
- "eval_samples_per_second": 33.256,
1215
- "eval_steps_per_second": 4.161,
1216
- "eval_wer": 0.27791715107385423,
1217
- "step": 15500
1218
- },
1219
- {
1220
- "epoch": 45.35,
1221
- "learning_rate": 7.929276315789473e-06,
1222
- "loss": 1.0512,
1223
- "step": 15600
1224
- },
1225
- {
1226
- "epoch": 45.64,
1227
- "learning_rate": 7.435855263157894e-06,
1228
- "loss": 1.0397,
1229
- "step": 15700
1230
- },
1231
- {
1232
- "epoch": 45.93,
1233
- "learning_rate": 6.9473684210526315e-06,
1234
- "loss": 1.0455,
1235
- "step": 15800
1236
- },
1237
- {
1238
- "epoch": 46.22,
1239
- "learning_rate": 6.453947368421052e-06,
1240
- "loss": 1.0347,
1241
- "step": 15900
1242
- },
1243
- {
1244
- "epoch": 46.51,
1245
- "learning_rate": 5.9605263157894735e-06,
1246
- "loss": 1.0406,
1247
- "step": 16000
1248
- },
1249
- {
1250
- "epoch": 46.51,
1251
- "eval_loss": 0.32076749205589294,
1252
- "eval_runtime": 134.5334,
1253
- "eval_samples_per_second": 34.341,
1254
- "eval_steps_per_second": 4.296,
1255
- "eval_wer": 0.2762344734748822,
1256
- "step": 16000
1257
- },
1258
- {
1259
- "epoch": 46.8,
1260
- "learning_rate": 5.467105263157894e-06,
1261
- "loss": 1.0528,
1262
- "step": 16100
1263
- },
1264
- {
1265
- "epoch": 47.09,
1266
- "learning_rate": 4.973684210526316e-06,
1267
- "loss": 1.045,
1268
- "step": 16200
1269
- },
1270
- {
1271
- "epoch": 47.38,
1272
- "learning_rate": 4.480263157894736e-06,
1273
- "loss": 1.0129,
1274
- "step": 16300
1275
- },
1276
- {
1277
- "epoch": 47.67,
1278
- "learning_rate": 3.986842105263157e-06,
1279
- "loss": 1.0102,
1280
- "step": 16400
1281
- },
1282
- {
1283
- "epoch": 47.96,
1284
- "learning_rate": 3.4934210526315787e-06,
1285
- "loss": 1.0422,
1286
- "step": 16500
1287
- },
1288
- {
1289
- "epoch": 47.96,
1290
- "eval_loss": 0.3183736503124237,
1291
- "eval_runtime": 137.2442,
1292
- "eval_samples_per_second": 33.663,
1293
- "eval_steps_per_second": 4.211,
1294
- "eval_wer": 0.2751942727773359,
1295
- "step": 16500
1296
- },
1297
- {
1298
- "epoch": 48.26,
1299
- "learning_rate": 2.9999999999999997e-06,
1300
- "loss": 1.0323,
1301
- "step": 16600
1302
- },
1303
- {
1304
- "epoch": 48.55,
1305
- "learning_rate": 2.506578947368421e-06,
1306
- "loss": 1.0292,
1307
- "step": 16700
1308
- },
1309
- {
1310
- "epoch": 48.84,
1311
- "learning_rate": 2.013157894736842e-06,
1312
- "loss": 1.0378,
1313
- "step": 16800
1314
- },
1315
- {
1316
- "epoch": 49.13,
1317
- "learning_rate": 1.519736842105263e-06,
1318
- "loss": 1.035,
1319
- "step": 16900
1320
- },
1321
- {
1322
- "epoch": 49.42,
1323
- "learning_rate": 1.0263157894736841e-06,
1324
- "loss": 1.0099,
1325
- "step": 17000
1326
- },
1327
- {
1328
- "epoch": 49.42,
1329
- "eval_loss": 0.31810811161994934,
1330
- "eval_runtime": 134.533,
1331
- "eval_samples_per_second": 34.341,
1332
- "eval_steps_per_second": 4.296,
1333
- "eval_wer": 0.2734810010402007,
1334
- "step": 17000
1335
- }
1336
- ],
1337
- "max_steps": 17200,
1338
- "num_train_epochs": 50,
1339
- "total_flos": 5.016617790448189e+19,
1340
- "trial_name": null,
1341
- "trial_params": null
1342
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-500/.ipynb_checkpoints/trainer_state-checkpoint.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.4525018129079044,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.29,
12
+ "learning_rate": 3.675e-06,
13
+ "loss": 1.0197,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.58,
18
+ "learning_rate": 7.425e-06,
19
+ "loss": 1.0171,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.87,
24
+ "learning_rate": 1.1137499999999998e-05,
25
+ "loss": 1.026,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 1.16,
30
+ "learning_rate": 1.48875e-05,
31
+ "loss": 1.0383,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 1.45,
36
+ "learning_rate": 1.86375e-05,
37
+ "loss": 1.0296,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 1.45,
42
+ "eval_loss": 0.3191435635089874,
43
+ "eval_runtime": 138.7872,
44
+ "eval_samples_per_second": 33.288,
45
+ "eval_steps_per_second": 4.165,
46
+ "eval_wer": 0.27421526035611576,
47
+ "step": 500
48
+ }
49
+ ],
50
+ "max_steps": 17200,
51
+ "num_train_epochs": 50,
52
+ "total_flos": 1.4827194756605722e+18,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }