boumehdi commited on
Commit
8135567
1 Parent(s): 1670723

Upload 9 files

Browse files
Files changed (8) hide show
  1. config.json +1 -1
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +2 -2
  5. scaler.pt +1 -1
  6. scheduler.pt +1 -1
  7. trainer_state.json +797 -122
  8. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "checkpoint-1200",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
1
  {
2
+ "_name_or_path": "othrif",
3
  "activation_dropout": 0.1,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd2f774f6cf1ca2bd300966262722c8cde7aab481aed5e40f8b70664af4d8d6c
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c79e728d98168d85015d00930e5f1bd407f25c2d89b2d55d9c9bb2f99ce3eee
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d1176a167f49d4d997324e388a10a7e83e555a32144196cecdac19812847823
3
  size 1262195949
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccfbee3b3ae99465c34b8091981ba73de06eaab423aefbdb55c7872677393c70
3
  size 1262195949
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c438dbe3477c483067ae6907d5b0fdcc8f6e66c02f2ef7717fe65f4b5b65eda
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef516f13eae1058f7d3a5544d2d46b334d6f3f2c0af866334d159bdf2bf78524
3
+ size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:573c0e110dfe1e7a3fc11065b73453594c7e0321077f7f0120dfed0a0d70518c
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc746e056a378283285a9fcc1e3f23a267ec6b0193f2c4ba34347b78ae0c98f
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b5d8f13321c8b4e468ca80ef66eaf9552edb236f9798a14f3c7466d6f8495a4
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf093631f76407e20836696a637f5ab6e0d337c65ba9151883e190682646544
3
  size 627
trainer_state.json CHANGED
@@ -1,211 +1,886 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.395939086294415,
5
- "global_step": 5200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 2.03,
12
- "learning_rate": 0.0002,
13
- "loss": 0.2479,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "step": 400
15
  },
16
  {
17
- "epoch": 2.03,
18
- "eval_loss": 0.47397056221961975,
19
- "eval_runtime": 121.9231,
20
- "eval_samples_per_second": 17.47,
21
- "eval_steps_per_second": 2.19,
22
- "eval_wer": 0.5506920619898261,
23
  "step": 400
24
  },
25
  {
26
- "epoch": 4.06,
27
- "learning_rate": 0.00018605221657613458,
28
- "loss": 0.2306,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "step": 800
30
  },
31
  {
32
- "epoch": 4.06,
33
- "eval_loss": 0.4647212624549866,
34
- "eval_runtime": 123.4969,
35
- "eval_samples_per_second": 17.247,
36
- "eval_steps_per_second": 2.162,
37
- "eval_wer": 0.5497456524310895,
38
  "step": 800
39
  },
40
  {
41
- "epoch": 6.09,
42
- "learning_rate": 0.00017206938847029964,
43
- "loss": 0.201,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "step": 1200
45
  },
46
  {
47
- "epoch": 6.09,
48
- "eval_loss": 0.43976637721061707,
49
- "eval_runtime": 125.3202,
50
- "eval_samples_per_second": 16.996,
51
- "eval_steps_per_second": 2.131,
52
- "eval_wer": 0.5441854962735123,
53
  "step": 1200
54
  },
55
  {
56
- "epoch": 8.12,
57
- "learning_rate": 0.0001580515156824952,
58
- "loss": 0.2676,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "step": 1600
60
  },
61
  {
62
- "epoch": 8.12,
63
- "eval_loss": 0.46590062975883484,
64
- "eval_runtime": 127.6886,
65
- "eval_samples_per_second": 16.681,
66
- "eval_steps_per_second": 2.091,
67
- "eval_wer": 0.5331834851532,
68
  "step": 1600
69
  },
70
  {
71
- "epoch": 10.15,
72
- "learning_rate": 0.00014403364289469075,
73
- "loss": 0.2295,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  "step": 2000
75
  },
76
  {
77
- "epoch": 10.15,
78
- "eval_loss": 0.433741956949234,
79
- "eval_runtime": 126.2574,
80
- "eval_samples_per_second": 16.87,
81
- "eval_steps_per_second": 2.115,
82
- "eval_wer": 0.5302259552821483,
83
  "step": 2000
84
  },
85
  {
86
- "epoch": 12.18,
87
- "learning_rate": 0.00013001577010688628,
88
- "loss": 0.2011,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  "step": 2400
90
  },
91
  {
92
- "epoch": 12.18,
93
- "eval_loss": 0.43763187527656555,
94
- "eval_runtime": 127.2021,
95
- "eval_samples_per_second": 16.745,
96
- "eval_steps_per_second": 2.099,
97
- "eval_wer": 0.5218265704483616,
98
  "step": 2400
99
  },
100
  {
101
- "epoch": 14.21,
102
- "learning_rate": 0.00011599789731908184,
103
- "loss": 0.1883,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  "step": 2800
105
  },
106
  {
107
- "epoch": 14.21,
108
- "eval_loss": 0.4427320659160614,
109
- "eval_runtime": 133.553,
110
- "eval_samples_per_second": 15.949,
111
- "eval_steps_per_second": 1.999,
112
- "eval_wer": 0.5275050278007808,
113
  "step": 2800
114
  },
115
  {
116
- "epoch": 16.24,
117
- "learning_rate": 0.00010198002453127737,
118
- "loss": 0.1599,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  "step": 3200
120
  },
121
  {
122
- "epoch": 16.24,
123
- "eval_loss": 0.4528989791870117,
124
- "eval_runtime": 130.4751,
125
- "eval_samples_per_second": 16.325,
126
- "eval_steps_per_second": 2.046,
127
- "eval_wer": 0.5167396190701526,
128
  "step": 3200
129
  },
130
  {
131
- "epoch": 18.27,
132
- "learning_rate": 8.796215174347293e-05,
133
- "loss": 0.1473,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "step": 3600
135
  },
136
  {
137
- "epoch": 18.27,
138
- "eval_loss": 0.46878582239151,
139
- "eval_runtime": 130.7961,
140
- "eval_samples_per_second": 16.285,
141
- "eval_steps_per_second": 2.041,
142
- "eval_wer": 0.5125990772506802,
143
  "step": 3600
144
  },
145
  {
146
- "epoch": 20.3,
147
- "learning_rate": 7.394427895566848e-05,
148
- "loss": 0.1306,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "step": 4000
150
  },
151
  {
152
- "epoch": 20.3,
153
- "eval_loss": 0.46525096893310547,
154
- "eval_runtime": 130.9702,
155
- "eval_samples_per_second": 16.263,
156
- "eval_steps_per_second": 2.039,
157
- "eval_wer": 0.5118892700816279,
158
  "step": 4000
159
  },
160
  {
161
- "epoch": 22.34,
162
- "learning_rate": 5.9926406167864026e-05,
163
- "loss": 0.1222,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  "step": 4400
165
  },
166
  {
167
- "epoch": 22.34,
168
- "eval_loss": 0.4552956223487854,
169
- "eval_runtime": 132.9038,
170
- "eval_samples_per_second": 16.027,
171
- "eval_steps_per_second": 2.009,
172
- "eval_wer": 0.5014787649355259,
173
  "step": 4400
174
  },
175
  {
176
- "epoch": 24.37,
177
- "learning_rate": 4.5908533380059577e-05,
178
- "loss": 0.1097,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  "step": 4800
180
  },
181
  {
182
- "epoch": 24.37,
183
- "eval_loss": 0.4868864417076111,
184
- "eval_runtime": 128.1956,
185
- "eval_samples_per_second": 16.615,
186
- "eval_steps_per_second": 2.083,
187
- "eval_wer": 0.5012421625458418,
188
  "step": 4800
189
  },
190
  {
191
- "epoch": 26.4,
192
- "learning_rate": 3.189066059225513e-05,
193
- "loss": 0.0988,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  "step": 5200
195
  },
196
  {
197
- "epoch": 26.4,
198
- "eval_loss": 0.45241138339042664,
199
- "eval_runtime": 127.9868,
200
- "eval_samples_per_second": 16.642,
201
- "eval_steps_per_second": 2.086,
202
- "eval_wer": 0.4968650183366852,
203
  "step": 5200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  }
205
  ],
206
- "max_steps": 6107,
207
- "num_train_epochs": 31,
208
- "total_flos": 1.2361810216998281e+19,
209
  "trial_name": null,
210
  "trial_params": null
211
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 37.90553745928339,
5
+ "global_step": 5800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.65,
12
+ "learning_rate": 0.0001,
13
+ "loss": 1.0071,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.65,
18
+ "eval_loss": 0.3728577196598053,
19
+ "eval_runtime": 169.6136,
20
+ "eval_samples_per_second": 19.48,
21
+ "eval_steps_per_second": 2.435,
22
+ "eval_wer": 0.5494842925753878,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 1.31,
27
+ "learning_rate": 9.918652891889694e-05,
28
+ "loss": 0.6823,
29
+ "step": 200
30
+ },
31
+ {
32
+ "epoch": 1.31,
33
+ "eval_loss": 0.33882805705070496,
34
+ "eval_runtime": 167.327,
35
+ "eval_samples_per_second": 19.746,
36
+ "eval_steps_per_second": 2.468,
37
+ "eval_wer": 0.5344461066057791,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 1.96,
42
+ "learning_rate": 9.837305783779387e-05,
43
+ "loss": 0.6063,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 1.96,
48
+ "eval_loss": 0.3208909332752228,
49
+ "eval_runtime": 174.661,
50
+ "eval_samples_per_second": 18.917,
51
+ "eval_steps_per_second": 2.365,
52
+ "eval_wer": 0.5098811117234864,
53
+ "step": 300
54
+ },
55
+ {
56
+ "epoch": 2.61,
57
+ "learning_rate": 9.755958675669081e-05,
58
+ "loss": 0.5326,
59
  "step": 400
60
  },
61
  {
62
+ "epoch": 2.61,
63
+ "eval_loss": 0.30963289737701416,
64
+ "eval_runtime": 177.329,
65
+ "eval_samples_per_second": 18.632,
66
+ "eval_steps_per_second": 2.329,
67
+ "eval_wer": 0.5025588536335721,
68
  "step": 400
69
  },
70
  {
71
+ "epoch": 3.27,
72
+ "learning_rate": 9.674611567558773e-05,
73
+ "loss": 0.5074,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 3.27,
78
+ "eval_loss": 0.3118290901184082,
79
+ "eval_runtime": 173.1863,
80
+ "eval_samples_per_second": 19.078,
81
+ "eval_steps_per_second": 2.385,
82
+ "eval_wer": 0.4959452011652626,
83
+ "step": 500
84
+ },
85
+ {
86
+ "epoch": 3.92,
87
+ "learning_rate": 9.593264459448467e-05,
88
+ "loss": 0.446,
89
+ "step": 600
90
+ },
91
+ {
92
+ "epoch": 3.92,
93
+ "eval_loss": 0.30445897579193115,
94
+ "eval_runtime": 173.9457,
95
+ "eval_samples_per_second": 18.994,
96
+ "eval_steps_per_second": 2.374,
97
+ "eval_wer": 0.4865758601684907,
98
+ "step": 600
99
+ },
100
+ {
101
+ "epoch": 4.57,
102
+ "learning_rate": 9.51191735133816e-05,
103
+ "loss": 0.4283,
104
+ "step": 700
105
+ },
106
+ {
107
+ "epoch": 4.57,
108
+ "eval_loss": 0.30916285514831543,
109
+ "eval_runtime": 176.2446,
110
+ "eval_samples_per_second": 18.747,
111
+ "eval_steps_per_second": 2.343,
112
+ "eval_wer": 0.48366270372411624,
113
+ "step": 700
114
+ },
115
+ {
116
+ "epoch": 5.23,
117
+ "learning_rate": 9.430570243227855e-05,
118
+ "loss": 0.41,
119
  "step": 800
120
  },
121
  {
122
+ "epoch": 5.23,
123
+ "eval_loss": 0.35956883430480957,
124
+ "eval_runtime": 169.7235,
125
+ "eval_samples_per_second": 19.467,
126
+ "eval_steps_per_second": 2.433,
127
+ "eval_wer": 0.4932682465947563,
128
  "step": 800
129
  },
130
  {
131
+ "epoch": 5.88,
132
+ "learning_rate": 9.349223135117547e-05,
133
+ "loss": 0.3802,
134
+ "step": 900
135
+ },
136
+ {
137
+ "epoch": 5.88,
138
+ "eval_loss": 0.3234783411026001,
139
+ "eval_runtime": 171.4522,
140
+ "eval_samples_per_second": 19.271,
141
+ "eval_steps_per_second": 2.409,
142
+ "eval_wer": 0.47752145500354304,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 6.53,
147
+ "learning_rate": 9.267876027007241e-05,
148
+ "loss": 0.3852,
149
+ "step": 1000
150
+ },
151
+ {
152
+ "epoch": 6.53,
153
+ "eval_loss": 0.32342973351478577,
154
+ "eval_runtime": 184.4459,
155
+ "eval_samples_per_second": 17.913,
156
+ "eval_steps_per_second": 2.239,
157
+ "eval_wer": 0.47815132666719157,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 7.19,
162
+ "learning_rate": 9.186528918896934e-05,
163
+ "loss": 0.3539,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 7.19,
168
+ "eval_loss": 0.33684083819389343,
169
+ "eval_runtime": 170.8359,
170
+ "eval_samples_per_second": 19.34,
171
+ "eval_steps_per_second": 2.418,
172
+ "eval_wer": 0.4796472718683568,
173
+ "step": 1100
174
+ },
175
+ {
176
+ "epoch": 7.84,
177
+ "learning_rate": 9.105181810786628e-05,
178
+ "loss": 0.3444,
179
  "step": 1200
180
  },
181
  {
182
+ "epoch": 7.84,
183
+ "eval_loss": 0.3268304169178009,
184
+ "eval_runtime": 170.984,
185
+ "eval_samples_per_second": 19.323,
186
+ "eval_steps_per_second": 2.415,
187
+ "eval_wer": 0.4732698212739154,
188
  "step": 1200
189
  },
190
  {
191
+ "epoch": 8.5,
192
+ "learning_rate": 9.02383470267632e-05,
193
+ "loss": 0.336,
194
+ "step": 1300
195
+ },
196
+ {
197
+ "epoch": 8.5,
198
+ "eval_loss": 0.34285250306129456,
199
+ "eval_runtime": 171.7981,
200
+ "eval_samples_per_second": 19.232,
201
+ "eval_steps_per_second": 2.404,
202
+ "eval_wer": 0.479883473742225,
203
+ "step": 1300
204
+ },
205
+ {
206
+ "epoch": 9.15,
207
+ "learning_rate": 8.942487594566014e-05,
208
+ "loss": 0.3041,
209
+ "step": 1400
210
+ },
211
+ {
212
+ "epoch": 9.15,
213
+ "eval_loss": 0.35453349351882935,
214
+ "eval_runtime": 172.0678,
215
+ "eval_samples_per_second": 19.202,
216
+ "eval_steps_per_second": 2.4,
217
+ "eval_wer": 0.46248326903393433,
218
+ "step": 1400
219
+ },
220
+ {
221
+ "epoch": 9.8,
222
+ "learning_rate": 8.861140486455706e-05,
223
+ "loss": 0.3074,
224
+ "step": 1500
225
+ },
226
+ {
227
+ "epoch": 9.8,
228
+ "eval_loss": 0.3339354693889618,
229
+ "eval_runtime": 172.1803,
230
+ "eval_samples_per_second": 19.189,
231
+ "eval_steps_per_second": 2.399,
232
+ "eval_wer": 0.46319187465553896,
233
+ "step": 1500
234
+ },
235
+ {
236
+ "epoch": 10.46,
237
+ "learning_rate": 8.7797933783454e-05,
238
+ "loss": 0.2948,
239
  "step": 1600
240
  },
241
  {
242
+ "epoch": 10.46,
243
+ "eval_loss": 0.34325212240219116,
244
+ "eval_runtime": 171.9876,
245
+ "eval_samples_per_second": 19.211,
246
+ "eval_steps_per_second": 2.401,
247
+ "eval_wer": 0.4646090858987481,
248
  "step": 1600
249
  },
250
  {
251
+ "epoch": 11.11,
252
+ "learning_rate": 8.698446270235093e-05,
253
+ "loss": 0.2905,
254
+ "step": 1700
255
+ },
256
+ {
257
+ "epoch": 11.11,
258
+ "eval_loss": 0.34282687306404114,
259
+ "eval_runtime": 172.5719,
260
+ "eval_samples_per_second": 19.146,
261
+ "eval_steps_per_second": 2.393,
262
+ "eval_wer": 0.4641366821510117,
263
+ "step": 1700
264
+ },
265
+ {
266
+ "epoch": 11.76,
267
+ "learning_rate": 8.617099162124787e-05,
268
+ "loss": 0.296,
269
+ "step": 1800
270
+ },
271
+ {
272
+ "epoch": 11.76,
273
+ "eval_loss": 0.35734105110168457,
274
+ "eval_runtime": 173.1126,
275
+ "eval_samples_per_second": 19.086,
276
+ "eval_steps_per_second": 2.386,
277
+ "eval_wer": 0.4665774348476498,
278
+ "step": 1800
279
+ },
280
+ {
281
+ "epoch": 12.42,
282
+ "learning_rate": 8.535752054014479e-05,
283
+ "loss": 0.2669,
284
+ "step": 1900
285
+ },
286
+ {
287
+ "epoch": 12.42,
288
+ "eval_loss": 0.34095147252082825,
289
+ "eval_runtime": 172.9162,
290
+ "eval_samples_per_second": 19.108,
291
+ "eval_steps_per_second": 2.388,
292
+ "eval_wer": 0.46224706716006614,
293
+ "step": 1900
294
+ },
295
+ {
296
+ "epoch": 13.07,
297
+ "learning_rate": 8.454404945904173e-05,
298
+ "loss": 0.2778,
299
  "step": 2000
300
  },
301
  {
302
+ "epoch": 13.07,
303
+ "eval_loss": 0.3445983827114105,
304
+ "eval_runtime": 172.7429,
305
+ "eval_samples_per_second": 19.127,
306
+ "eval_steps_per_second": 2.391,
307
+ "eval_wer": 0.4621683332021101,
308
  "step": 2000
309
  },
310
  {
311
+ "epoch": 13.72,
312
+ "learning_rate": 8.373057837793867e-05,
313
+ "loss": 0.2605,
314
+ "step": 2100
315
+ },
316
+ {
317
+ "epoch": 13.72,
318
+ "eval_loss": 0.364580363035202,
319
+ "eval_runtime": 173.3351,
320
+ "eval_samples_per_second": 19.061,
321
+ "eval_steps_per_second": 2.383,
322
+ "eval_wer": 0.4611447917486812,
323
+ "step": 2100
324
+ },
325
+ {
326
+ "epoch": 14.38,
327
+ "learning_rate": 8.291710729683561e-05,
328
+ "loss": 0.2562,
329
+ "step": 2200
330
+ },
331
+ {
332
+ "epoch": 14.38,
333
+ "eval_loss": 0.3529307544231415,
334
+ "eval_runtime": 173.4538,
335
+ "eval_samples_per_second": 19.048,
336
+ "eval_steps_per_second": 2.381,
337
+ "eval_wer": 0.46201086528619795,
338
+ "step": 2200
339
+ },
340
+ {
341
+ "epoch": 15.03,
342
+ "learning_rate": 8.210363621573253e-05,
343
+ "loss": 0.2587,
344
+ "step": 2300
345
+ },
346
+ {
347
+ "epoch": 15.03,
348
+ "eval_loss": 0.35722818970680237,
349
+ "eval_runtime": 173.1723,
350
+ "eval_samples_per_second": 19.079,
351
+ "eval_steps_per_second": 2.385,
352
+ "eval_wer": 0.4694118573340682,
353
+ "step": 2300
354
+ },
355
+ {
356
+ "epoch": 15.68,
357
+ "learning_rate": 8.129016513462947e-05,
358
+ "loss": 0.242,
359
  "step": 2400
360
  },
361
  {
362
+ "epoch": 15.68,
363
+ "eval_loss": 0.36534029245376587,
364
+ "eval_runtime": 173.1065,
365
+ "eval_samples_per_second": 19.087,
366
+ "eval_steps_per_second": 2.386,
367
+ "eval_wer": 0.45894024092591135,
368
  "step": 2400
369
  },
370
  {
371
+ "epoch": 16.34,
372
+ "learning_rate": 8.047669405352641e-05,
373
+ "loss": 0.232,
374
+ "step": 2500
375
+ },
376
+ {
377
+ "epoch": 16.34,
378
+ "eval_loss": 0.34964719414711,
379
+ "eval_runtime": 174.2382,
380
+ "eval_samples_per_second": 18.963,
381
+ "eval_steps_per_second": 2.37,
382
+ "eval_wer": 0.4605149200850327,
383
+ "step": 2500
384
+ },
385
+ {
386
+ "epoch": 16.99,
387
+ "learning_rate": 7.966322297242333e-05,
388
+ "loss": 0.2474,
389
+ "step": 2600
390
+ },
391
+ {
392
+ "epoch": 16.99,
393
+ "eval_loss": 0.3596344590187073,
394
+ "eval_runtime": 174.0298,
395
+ "eval_samples_per_second": 18.985,
396
+ "eval_steps_per_second": 2.373,
397
+ "eval_wer": 0.46783717817494685,
398
+ "step": 2600
399
+ },
400
+ {
401
+ "epoch": 17.64,
402
+ "learning_rate": 7.884975189132027e-05,
403
+ "loss": 0.2137,
404
+ "step": 2700
405
+ },
406
+ {
407
+ "epoch": 17.64,
408
+ "eval_loss": 0.3547351360321045,
409
+ "eval_runtime": 174.6108,
410
+ "eval_samples_per_second": 18.922,
411
+ "eval_steps_per_second": 2.365,
412
+ "eval_wer": 0.4609873238327691,
413
+ "step": 2700
414
+ },
415
+ {
416
+ "epoch": 18.3,
417
+ "learning_rate": 7.80362808102172e-05,
418
+ "loss": 0.2261,
419
  "step": 2800
420
  },
421
  {
422
+ "epoch": 18.3,
423
+ "eval_loss": 0.35713937878608704,
424
+ "eval_runtime": 173.8691,
425
+ "eval_samples_per_second": 19.003,
426
+ "eval_steps_per_second": 2.375,
427
+ "eval_wer": 0.4579954334304385,
428
  "step": 2800
429
  },
430
  {
431
+ "epoch": 18.95,
432
+ "learning_rate": 7.723094443992517e-05,
433
+ "loss": 0.2141,
434
+ "step": 2900
435
+ },
436
+ {
437
+ "epoch": 18.95,
438
+ "eval_loss": 0.36411064863204956,
439
+ "eval_runtime": 174.3463,
440
+ "eval_samples_per_second": 18.951,
441
+ "eval_steps_per_second": 2.369,
442
+ "eval_wer": 0.45563341469175656,
443
+ "step": 2900
444
+ },
445
+ {
446
+ "epoch": 19.61,
447
+ "learning_rate": 7.64174733588221e-05,
448
+ "loss": 0.2201,
449
+ "step": 3000
450
+ },
451
+ {
452
+ "epoch": 19.61,
453
+ "eval_loss": 0.34566032886505127,
454
+ "eval_runtime": 173.9331,
455
+ "eval_samples_per_second": 18.996,
456
+ "eval_steps_per_second": 2.374,
457
+ "eval_wer": 0.45303519407920634,
458
+ "step": 3000
459
+ },
460
+ {
461
+ "epoch": 20.26,
462
+ "learning_rate": 7.560400227771903e-05,
463
+ "loss": 0.2243,
464
+ "step": 3100
465
+ },
466
+ {
467
+ "epoch": 20.26,
468
+ "eval_loss": 0.3523178994655609,
469
+ "eval_runtime": 174.1671,
470
+ "eval_samples_per_second": 18.97,
471
+ "eval_steps_per_second": 2.371,
472
+ "eval_wer": 0.4571293598929218,
473
+ "step": 3100
474
+ },
475
+ {
476
+ "epoch": 20.91,
477
+ "learning_rate": 7.479053119661597e-05,
478
+ "loss": 0.1891,
479
  "step": 3200
480
  },
481
  {
482
+ "epoch": 20.91,
483
+ "eval_loss": 0.337533563375473,
484
+ "eval_runtime": 174.612,
485
+ "eval_samples_per_second": 18.922,
486
+ "eval_steps_per_second": 2.365,
487
+ "eval_wer": 0.4541374694905913,
488
  "step": 3200
489
  },
490
  {
491
+ "epoch": 21.57,
492
+ "learning_rate": 7.39770601155129e-05,
493
+ "loss": 0.2033,
494
+ "step": 3300
495
+ },
496
+ {
497
+ "epoch": 21.57,
498
+ "eval_loss": 0.3634466230869293,
499
+ "eval_runtime": 174.6521,
500
+ "eval_samples_per_second": 18.918,
501
+ "eval_steps_per_second": 2.365,
502
+ "eval_wer": 0.4579166994724825,
503
+ "step": 3300
504
+ },
505
+ {
506
+ "epoch": 22.22,
507
+ "learning_rate": 7.316358903440983e-05,
508
+ "loss": 0.2035,
509
+ "step": 3400
510
+ },
511
+ {
512
+ "epoch": 22.22,
513
+ "eval_loss": 0.3793589174747467,
514
+ "eval_runtime": 174.394,
515
+ "eval_samples_per_second": 18.946,
516
+ "eval_steps_per_second": 2.368,
517
+ "eval_wer": 0.4555546807338005,
518
+ "step": 3400
519
+ },
520
+ {
521
+ "epoch": 22.87,
522
+ "learning_rate": 7.235011795330676e-05,
523
+ "loss": 0.1867,
524
+ "step": 3500
525
+ },
526
+ {
527
+ "epoch": 22.87,
528
+ "eval_loss": 0.37910905480384827,
529
+ "eval_runtime": 174.9971,
530
+ "eval_samples_per_second": 18.88,
531
+ "eval_steps_per_second": 2.36,
532
+ "eval_wer": 0.454924809070152,
533
+ "step": 3500
534
+ },
535
+ {
536
+ "epoch": 23.53,
537
+ "learning_rate": 7.15366468722037e-05,
538
+ "loss": 0.1956,
539
  "step": 3600
540
  },
541
  {
542
+ "epoch": 23.53,
543
+ "eval_loss": 0.3568515479564667,
544
+ "eval_runtime": 174.799,
545
+ "eval_samples_per_second": 18.902,
546
+ "eval_steps_per_second": 2.363,
547
+ "eval_wer": 0.45760176364065824,
548
  "step": 3600
549
  },
550
  {
551
+ "epoch": 24.18,
552
+ "learning_rate": 7.072317579110062e-05,
553
+ "loss": 0.1826,
554
+ "step": 3700
555
+ },
556
+ {
557
+ "epoch": 24.18,
558
+ "eval_loss": 0.3747410178184509,
559
+ "eval_runtime": 175.1918,
560
+ "eval_samples_per_second": 18.859,
561
+ "eval_steps_per_second": 2.357,
562
+ "eval_wer": 0.4543736713644595,
563
+ "step": 3700
564
+ },
565
+ {
566
+ "epoch": 24.83,
567
+ "learning_rate": 6.99178394208086e-05,
568
+ "loss": 0.1867,
569
+ "step": 3800
570
+ },
571
+ {
572
+ "epoch": 24.83,
573
+ "eval_loss": 0.36731651425361633,
574
+ "eval_runtime": 175.3726,
575
+ "eval_samples_per_second": 18.84,
576
+ "eval_steps_per_second": 2.355,
577
+ "eval_wer": 0.45366506574285487,
578
+ "step": 3800
579
+ },
580
+ {
581
+ "epoch": 25.49,
582
+ "learning_rate": 6.910436833970553e-05,
583
+ "loss": 0.1902,
584
+ "step": 3900
585
+ },
586
+ {
587
+ "epoch": 25.49,
588
+ "eval_loss": 0.3835786283016205,
589
+ "eval_runtime": 182.8434,
590
+ "eval_samples_per_second": 18.07,
591
+ "eval_steps_per_second": 2.259,
592
+ "eval_wer": 0.4522478544996457,
593
+ "step": 3900
594
+ },
595
+ {
596
+ "epoch": 26.14,
597
+ "learning_rate": 6.829089725860246e-05,
598
+ "loss": 0.1786,
599
  "step": 4000
600
  },
601
  {
602
+ "epoch": 26.14,
603
+ "eval_loss": 0.3528241813182831,
604
+ "eval_runtime": 182.8588,
605
+ "eval_samples_per_second": 18.069,
606
+ "eval_steps_per_second": 2.259,
607
+ "eval_wer": 0.4485473584757106,
608
  "step": 4000
609
  },
610
  {
611
+ "epoch": 26.79,
612
+ "learning_rate": 6.74774261774994e-05,
613
+ "loss": 0.178,
614
+ "step": 4100
615
+ },
616
+ {
617
+ "epoch": 26.79,
618
+ "eval_loss": 0.3756342828273773,
619
+ "eval_runtime": 183.4843,
620
+ "eval_samples_per_second": 18.007,
621
+ "eval_steps_per_second": 2.251,
622
+ "eval_wer": 0.45303519407920634,
623
+ "step": 4100
624
+ },
625
+ {
626
+ "epoch": 27.45,
627
+ "learning_rate": 6.666395509639632e-05,
628
+ "loss": 0.1783,
629
+ "step": 4200
630
+ },
631
+ {
632
+ "epoch": 27.45,
633
+ "eval_loss": 0.38552403450012207,
634
+ "eval_runtime": 185.3761,
635
+ "eval_samples_per_second": 17.823,
636
+ "eval_steps_per_second": 2.228,
637
+ "eval_wer": 0.4515392488780411,
638
+ "step": 4200
639
+ },
640
+ {
641
+ "epoch": 28.1,
642
+ "learning_rate": 6.585048401529326e-05,
643
+ "loss": 0.1747,
644
+ "step": 4300
645
+ },
646
+ {
647
+ "epoch": 28.1,
648
+ "eval_loss": 0.3594723045825958,
649
+ "eval_runtime": 184.1815,
650
+ "eval_samples_per_second": 17.939,
651
+ "eval_steps_per_second": 2.242,
652
+ "eval_wer": 0.4475238170222817,
653
+ "step": 4300
654
+ },
655
+ {
656
+ "epoch": 28.76,
657
+ "learning_rate": 6.503701293419018e-05,
658
+ "loss": 0.1776,
659
  "step": 4400
660
  },
661
  {
662
+ "epoch": 28.76,
663
+ "eval_loss": 0.3899536728858948,
664
+ "eval_runtime": 183.8028,
665
+ "eval_samples_per_second": 17.976,
666
+ "eval_steps_per_second": 2.247,
667
+ "eval_wer": 0.45303519407920634,
668
  "step": 4400
669
  },
670
  {
671
+ "epoch": 29.41,
672
+ "learning_rate": 6.422354185308712e-05,
673
+ "loss": 0.1615,
674
+ "step": 4500
675
+ },
676
+ {
677
+ "epoch": 29.41,
678
+ "eval_loss": 0.37925612926483154,
679
+ "eval_runtime": 184.3645,
680
+ "eval_samples_per_second": 17.921,
681
+ "eval_steps_per_second": 2.24,
682
+ "eval_wer": 0.4487048263916227,
683
+ "step": 4500
684
+ },
685
+ {
686
+ "epoch": 30.07,
687
+ "learning_rate": 6.341007077198405e-05,
688
+ "loss": 0.1665,
689
+ "step": 4600
690
+ },
691
+ {
692
+ "epoch": 30.07,
693
+ "eval_loss": 0.3769548237323761,
694
+ "eval_runtime": 185.5661,
695
+ "eval_samples_per_second": 17.805,
696
+ "eval_steps_per_second": 2.226,
697
+ "eval_wer": 0.4504369734666562,
698
+ "step": 4600
699
+ },
700
+ {
701
+ "epoch": 30.72,
702
+ "learning_rate": 6.2596599690881e-05,
703
+ "loss": 0.1562,
704
+ "step": 4700
705
+ },
706
+ {
707
+ "epoch": 30.72,
708
+ "eval_loss": 0.38725826144218445,
709
+ "eval_runtime": 184.2843,
710
+ "eval_samples_per_second": 17.929,
711
+ "eval_steps_per_second": 2.241,
712
+ "eval_wer": 0.45090937721439256,
713
+ "step": 4700
714
+ },
715
+ {
716
+ "epoch": 31.37,
717
+ "learning_rate": 6.178312860977793e-05,
718
+ "loss": 0.1558,
719
  "step": 4800
720
  },
721
  {
722
+ "epoch": 31.37,
723
+ "eval_loss": 0.37403690814971924,
724
+ "eval_runtime": 184.7842,
725
+ "eval_samples_per_second": 17.88,
726
+ "eval_steps_per_second": 2.235,
727
+ "eval_wer": 0.4494134320132273,
728
  "step": 4800
729
  },
730
  {
731
+ "epoch": 32.03,
732
+ "learning_rate": 6.0969657528674864e-05,
733
+ "loss": 0.1574,
734
+ "step": 4900
735
+ },
736
+ {
737
+ "epoch": 32.03,
738
+ "eval_loss": 0.38782382011413574,
739
+ "eval_runtime": 185.4497,
740
+ "eval_samples_per_second": 17.816,
741
+ "eval_steps_per_second": 2.227,
742
+ "eval_wer": 0.44933469805527126,
743
+ "step": 4900
744
+ },
745
+ {
746
+ "epoch": 32.68,
747
+ "learning_rate": 6.0156186447571796e-05,
748
+ "loss": 0.152,
749
+ "step": 5000
750
+ },
751
+ {
752
+ "epoch": 32.68,
753
+ "eval_loss": 0.36702463030815125,
754
+ "eval_runtime": 184.067,
755
+ "eval_samples_per_second": 17.95,
756
+ "eval_steps_per_second": 2.244,
757
+ "eval_wer": 0.44933469805527126,
758
+ "step": 5000
759
+ },
760
+ {
761
+ "epoch": 33.33,
762
+ "learning_rate": 5.934271536646873e-05,
763
+ "loss": 0.1477,
764
+ "step": 5100
765
+ },
766
+ {
767
+ "epoch": 33.33,
768
+ "eval_loss": 0.36524683237075806,
769
+ "eval_runtime": 186.7144,
770
+ "eval_samples_per_second": 17.695,
771
+ "eval_steps_per_second": 2.212,
772
+ "eval_wer": 0.4496496338870955,
773
+ "step": 5100
774
+ },
775
+ {
776
+ "epoch": 33.98,
777
+ "learning_rate": 5.852924428536566e-05,
778
+ "loss": 0.1561,
779
  "step": 5200
780
  },
781
  {
782
+ "epoch": 33.98,
783
+ "eval_loss": 0.3987789452075958,
784
+ "eval_runtime": 177.1487,
785
+ "eval_samples_per_second": 18.651,
786
+ "eval_steps_per_second": 2.331,
787
+ "eval_wer": 0.4535863317848988,
788
  "step": 5200
789
+ },
790
+ {
791
+ "epoch": 34.64,
792
+ "learning_rate": 5.771577320426259e-05,
793
+ "loss": 0.1441,
794
+ "step": 5300
795
+ },
796
+ {
797
+ "epoch": 34.64,
798
+ "eval_loss": 0.37290704250335693,
799
+ "eval_runtime": 178.4122,
800
+ "eval_samples_per_second": 18.519,
801
+ "eval_steps_per_second": 2.315,
802
+ "eval_wer": 0.4471301472325014,
803
+ "step": 5300
804
+ },
805
+ {
806
+ "epoch": 35.29,
807
+ "learning_rate": 5.691043683397056e-05,
808
+ "loss": 0.1462,
809
+ "step": 5400
810
+ },
811
+ {
812
+ "epoch": 35.29,
813
+ "eval_loss": 0.3913721740245819,
814
+ "eval_runtime": 175.0751,
815
+ "eval_samples_per_second": 18.872,
816
+ "eval_steps_per_second": 2.359,
817
+ "eval_wer": 0.4488622943075348,
818
+ "step": 5400
819
+ },
820
+ {
821
+ "epoch": 35.94,
822
+ "learning_rate": 5.6096965752867494e-05,
823
+ "loss": 0.1388,
824
+ "step": 5500
825
+ },
826
+ {
827
+ "epoch": 35.94,
828
+ "eval_loss": 0.3886808454990387,
829
+ "eval_runtime": 175.0289,
830
+ "eval_samples_per_second": 18.877,
831
+ "eval_steps_per_second": 2.36,
832
+ "eval_wer": 0.44807495472797415,
833
+ "step": 5500
834
+ },
835
+ {
836
+ "epoch": 36.6,
837
+ "learning_rate": 5.5283494671764426e-05,
838
+ "loss": 0.1362,
839
+ "step": 5600
840
+ },
841
+ {
842
+ "epoch": 36.6,
843
+ "eval_loss": 0.3816515803337097,
844
+ "eval_runtime": 175.1136,
845
+ "eval_samples_per_second": 18.868,
846
+ "eval_steps_per_second": 2.358,
847
+ "eval_wer": 0.445476734115424,
848
+ "step": 5600
849
+ },
850
+ {
851
+ "epoch": 37.25,
852
+ "learning_rate": 5.447002359066136e-05,
853
+ "loss": 0.1439,
854
+ "step": 5700
855
+ },
856
+ {
857
+ "epoch": 37.25,
858
+ "eval_loss": 0.39244014024734497,
859
+ "eval_runtime": 175.9891,
860
+ "eval_samples_per_second": 18.774,
861
+ "eval_steps_per_second": 2.347,
862
+ "eval_wer": 0.446657743484765,
863
+ "step": 5700
864
+ },
865
+ {
866
+ "epoch": 37.91,
867
+ "learning_rate": 5.365655250955829e-05,
868
+ "loss": 0.1299,
869
+ "step": 5800
870
+ },
871
+ {
872
+ "epoch": 37.91,
873
+ "eval_loss": 0.3693729341030121,
874
+ "eval_runtime": 176.2438,
875
+ "eval_samples_per_second": 18.747,
876
+ "eval_steps_per_second": 2.343,
877
+ "eval_wer": 0.4430359814187859,
878
+ "step": 5800
879
  }
880
  ],
881
+ "max_steps": 12393,
882
+ "num_train_epochs": 81,
883
+ "total_flos": 2.5038038866869117e+19,
884
  "trial_name": null,
885
  "trial_params": null
886
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:402a953d59f3d2c66918c47d238ae1d51eea2ac6120f7939459a514169d7a01a
3
  size 3003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:429d56e7f33237cdebb585dfadbe12372aa7b7c12ffbf8faf5185cef71f533cb
3
  size 3003