Automatic Speech Recognition
Transformers
Safetensors
Welsh
English
wav2vec2
Inference Endpoints
DewiBrynJones commited on
Commit
09e9fbb
1 Parent(s): 3a473cf

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-ccv-en-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.2765
21
  - Wer: 0.2115
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - techiaith/commonvoice_16_1_en_cy
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-ccv-en-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the TECHIAITH/COMMONVOICE_16_1_EN_CY - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.2765
23
  - Wer: 0.2115
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 4.55,
3
- "eval_loss": 0.2908598780632019,
4
- "eval_runtime": 1109.9533,
5
- "eval_samples": 26295,
6
- "eval_samples_per_second": 23.69,
7
- "eval_steps_per_second": 0.741,
8
- "eval_wer": 0.21777283505046477,
9
- "train_loss": 0.47586327913072374,
10
- "train_runtime": 55725.2112,
11
- "train_samples": 126693,
12
- "train_samples_per_second": 10.336,
13
- "train_steps_per_second": 0.162
14
  }
 
1
  {
2
+ "epoch": 4.52,
3
+ "eval_loss": 0.2764733135700226,
4
+ "eval_runtime": 1091.0858,
5
+ "eval_samples": 26228,
6
+ "eval_samples_per_second": 24.038,
7
+ "eval_steps_per_second": 0.752,
8
+ "eval_wer": 0.21145248459613483,
9
+ "train_loss": 0.49683192168341744,
10
+ "train_runtime": 52483.3074,
11
+ "train_samples": 127287,
12
+ "train_samples_per_second": 10.975,
13
+ "train_steps_per_second": 0.171
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 4.55,
3
- "eval_loss": 0.2908598780632019,
4
- "eval_runtime": 1109.9533,
5
- "eval_samples": 26295,
6
- "eval_samples_per_second": 23.69,
7
- "eval_steps_per_second": 0.741,
8
- "eval_wer": 0.21777283505046477
9
  }
 
1
  {
2
+ "epoch": 4.52,
3
+ "eval_loss": 0.2764733135700226,
4
+ "eval_runtime": 1091.0858,
5
+ "eval_samples": 26228,
6
+ "eval_samples_per_second": 24.038,
7
+ "eval_steps_per_second": 0.752,
8
+ "eval_wer": 0.21145248459613483
9
  }
runs/Mar05_08-26-36_6f484a5b3164/events.out.tfevents.1709687641.6f484a5b3164.1068.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71ddd99e3067192b0e3694f4bd27c83f3242d678d541fa16fcc435052292331f
3
+ size 406
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.55,
3
- "train_loss": 0.47586327913072374,
4
- "train_runtime": 55725.2112,
5
- "train_samples": 126693,
6
- "train_samples_per_second": 10.336,
7
- "train_steps_per_second": 0.162
8
  }
 
1
  {
2
+ "epoch": 4.52,
3
+ "train_loss": 0.49683192168341744,
4
+ "train_runtime": 52483.3074,
5
+ "train_samples": 127287,
6
+ "train_samples_per_second": 10.975,
7
+ "train_steps_per_second": 0.171
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.2908598780632019,
3
  "best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000",
4
- "epoch": 4.545454545454545,
5
  "eval_steps": 500,
6
  "global_step": 9000,
7
  "is_hyper_param_search": false,
@@ -10,313 +10,337 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2,
13
- "learning_rate": 0.00014925,
14
- "loss": 5.8377,
 
15
  "step": 400
16
  },
17
  {
18
  "epoch": 0.25,
19
- "eval_loss": 1.2190359830856323,
20
- "eval_runtime": 1220.4523,
21
- "eval_samples_per_second": 21.545,
22
- "eval_steps_per_second": 0.674,
23
- "eval_wer": 0.8568814654720188,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.4,
28
- "learning_rate": 0.00029925,
29
- "loss": 0.9829,
 
30
  "step": 800
31
  },
32
  {
33
- "epoch": 0.51,
34
- "eval_loss": 0.5584714412689209,
35
- "eval_runtime": 1188.1001,
36
- "eval_samples_per_second": 22.132,
37
- "eval_steps_per_second": 0.692,
38
- "eval_wer": 0.4700631596617496,
39
  "step": 1000
40
  },
41
  {
42
- "epoch": 0.61,
43
- "learning_rate": 0.00028543902439024386,
44
- "loss": 0.45,
 
45
  "step": 1200
46
  },
47
  {
48
- "epoch": 0.76,
49
- "eval_loss": 0.4735090434551239,
50
- "eval_runtime": 1202.8188,
51
- "eval_samples_per_second": 21.861,
52
- "eval_steps_per_second": 0.683,
53
- "eval_wer": 0.39008750026229094,
54
  "step": 1500
55
  },
56
  {
57
- "epoch": 0.81,
58
- "learning_rate": 0.00027080487804878047,
59
- "loss": 0.3636,
 
60
  "step": 1600
61
  },
62
  {
63
  "epoch": 1.01,
64
- "learning_rate": 0.00025617073170731703,
65
- "loss": 0.3151,
 
66
  "step": 2000
67
  },
68
  {
69
  "epoch": 1.01,
70
- "eval_loss": 0.4124945402145386,
71
- "eval_runtime": 1160.0644,
72
- "eval_samples_per_second": 22.667,
73
- "eval_steps_per_second": 0.709,
74
- "eval_wer": 0.34183435801666073,
75
  "step": 2000
76
  },
77
  {
78
  "epoch": 1.21,
79
- "learning_rate": 0.00024153658536585365,
80
- "loss": 0.2524,
 
81
  "step": 2400
82
  },
83
  {
84
  "epoch": 1.26,
85
- "eval_loss": 0.3830628991127014,
86
- "eval_runtime": 1409.4338,
87
- "eval_samples_per_second": 18.656,
88
- "eval_steps_per_second": 0.583,
89
- "eval_wer": 0.3117191598296158,
90
  "step": 2500
91
  },
92
  {
93
  "epoch": 1.41,
94
- "learning_rate": 0.00022690243902439024,
95
- "loss": 0.243,
 
96
  "step": 2800
97
  },
98
  {
99
- "epoch": 1.52,
100
- "eval_loss": 0.3661448061466217,
101
- "eval_runtime": 1425.6726,
102
- "eval_samples_per_second": 18.444,
103
- "eval_steps_per_second": 0.577,
104
- "eval_wer": 0.30780368046666806,
105
  "step": 3000
106
  },
107
  {
108
- "epoch": 1.62,
109
- "learning_rate": 0.0002122682926829268,
110
- "loss": 0.2341,
 
111
  "step": 3200
112
  },
113
  {
114
- "epoch": 1.77,
115
- "eval_loss": 0.3488619327545166,
116
- "eval_runtime": 1416.3145,
117
- "eval_samples_per_second": 18.566,
118
- "eval_steps_per_second": 0.58,
119
- "eval_wer": 0.2883060201019787,
120
  "step": 3500
121
  },
122
  {
123
- "epoch": 1.82,
124
- "learning_rate": 0.00019763414634146341,
125
- "loss": 0.2235,
 
126
  "step": 3600
127
  },
128
  {
129
- "epoch": 2.02,
130
- "learning_rate": 0.00018299999999999998,
131
- "loss": 0.211,
 
132
  "step": 4000
133
  },
134
  {
135
- "epoch": 2.02,
136
- "eval_loss": 0.34998372197151184,
137
- "eval_runtime": 1154.299,
138
- "eval_samples_per_second": 22.78,
139
- "eval_steps_per_second": 0.712,
140
- "eval_wer": 0.27384434605619323,
141
  "step": 4000
142
  },
143
  {
144
- "epoch": 2.22,
145
- "learning_rate": 0.0001683658536585366,
146
- "loss": 0.1702,
 
147
  "step": 4400
148
  },
149
  {
150
- "epoch": 2.27,
151
- "eval_loss": 0.34594303369522095,
152
- "eval_runtime": 1230.9263,
153
- "eval_samples_per_second": 21.362,
154
- "eval_steps_per_second": 0.668,
155
- "eval_wer": 0.27035272887508655,
156
  "step": 4500
157
  },
158
  {
159
- "epoch": 2.42,
160
- "learning_rate": 0.00015373170731707315,
161
- "loss": 0.1634,
 
162
  "step": 4800
163
  },
164
  {
165
- "epoch": 2.53,
166
- "eval_loss": 0.33047276735305786,
167
- "eval_runtime": 1138.5459,
168
- "eval_samples_per_second": 23.095,
169
- "eval_steps_per_second": 0.722,
170
- "eval_wer": 0.2583125249176406,
171
  "step": 5000
172
  },
173
  {
174
- "epoch": 2.63,
175
- "learning_rate": 0.00013909756097560974,
176
- "loss": 0.1608,
 
177
  "step": 5200
178
  },
179
  {
180
- "epoch": 2.78,
181
- "eval_loss": 0.3136747479438782,
182
- "eval_runtime": 1199.9023,
183
- "eval_samples_per_second": 21.914,
184
- "eval_steps_per_second": 0.685,
185
- "eval_wer": 0.24792580313490148,
186
  "step": 5500
187
  },
188
  {
189
- "epoch": 2.83,
190
- "learning_rate": 0.00012446341463414633,
191
- "loss": 0.1542,
 
192
  "step": 5600
193
  },
194
  {
195
- "epoch": 3.03,
196
- "learning_rate": 0.00010982926829268292,
197
- "loss": 0.1481,
 
198
  "step": 6000
199
  },
200
  {
201
- "epoch": 3.03,
202
- "eval_loss": 0.32883504033088684,
203
- "eval_runtime": 1167.9676,
204
- "eval_samples_per_second": 22.513,
205
- "eval_steps_per_second": 0.704,
206
- "eval_wer": 0.2562183939400298,
207
  "step": 6000
208
  },
209
  {
210
- "epoch": 3.23,
211
- "learning_rate": 9.519512195121951e-05,
212
- "loss": 0.1216,
 
213
  "step": 6400
214
  },
215
  {
216
- "epoch": 3.28,
217
- "eval_loss": 0.3174259662628174,
218
- "eval_runtime": 1161.665,
219
- "eval_samples_per_second": 22.636,
220
- "eval_steps_per_second": 0.708,
221
- "eval_wer": 0.24461464213022222,
222
  "step": 6500
223
  },
224
  {
225
- "epoch": 3.43,
226
- "learning_rate": 8.05609756097561e-05,
227
- "loss": 0.1181,
 
228
  "step": 6800
229
  },
230
  {
231
- "epoch": 3.54,
232
- "eval_loss": 0.30002185702323914,
233
- "eval_runtime": 1207.6988,
234
- "eval_samples_per_second": 21.773,
235
- "eval_steps_per_second": 0.681,
236
- "eval_wer": 0.2324569318253352,
237
  "step": 7000
238
  },
239
  {
240
- "epoch": 3.64,
241
- "learning_rate": 6.592682926829267e-05,
242
- "loss": 0.1143,
 
243
  "step": 7200
244
  },
245
  {
246
- "epoch": 3.79,
247
- "eval_loss": 0.29287537932395935,
248
- "eval_runtime": 1095.1242,
249
- "eval_samples_per_second": 24.011,
250
- "eval_steps_per_second": 0.751,
251
- "eval_wer": 0.23258702813857357,
252
  "step": 7500
253
  },
254
  {
255
- "epoch": 3.84,
256
- "learning_rate": 5.129268292682927e-05,
257
- "loss": 0.1139,
 
258
  "step": 7600
259
  },
260
  {
261
- "epoch": 4.04,
262
- "learning_rate": 3.665853658536585e-05,
263
- "loss": 0.1049,
 
264
  "step": 8000
265
  },
266
  {
267
- "epoch": 4.04,
268
- "eval_loss": 0.2921387255191803,
269
- "eval_runtime": 1091.9168,
270
- "eval_samples_per_second": 24.082,
271
- "eval_steps_per_second": 0.753,
272
- "eval_wer": 0.22178483748452482,
273
  "step": 8000
274
  },
275
  {
276
- "epoch": 4.24,
277
- "learning_rate": 2.2024390243902437e-05,
278
- "loss": 0.0913,
 
279
  "step": 8400
280
  },
281
  {
282
- "epoch": 4.29,
283
- "eval_loss": 0.2968423664569855,
284
- "eval_runtime": 1104.263,
285
- "eval_samples_per_second": 23.812,
286
- "eval_steps_per_second": 0.744,
287
- "eval_wer": 0.2208196067734016,
288
  "step": 8500
289
  },
290
  {
291
- "epoch": 4.44,
292
- "learning_rate": 7.4268292682926826e-06,
293
- "loss": 0.0883,
 
294
  "step": 8800
295
  },
296
  {
297
- "epoch": 4.55,
298
- "eval_loss": 0.2908598780632019,
299
- "eval_runtime": 1095.7607,
300
- "eval_samples_per_second": 23.997,
301
- "eval_steps_per_second": 0.75,
302
- "eval_wer": 0.21777283505046477,
303
  "step": 9000
304
  },
305
  {
306
- "epoch": 4.55,
307
  "step": 9000,
308
- "total_flos": 8.920242900072627e+19,
309
- "train_loss": 0.47586327913072374,
310
- "train_runtime": 55725.2112,
311
- "train_samples_per_second": 10.336,
312
- "train_steps_per_second": 0.162
313
  }
314
  ],
315
  "logging_steps": 400,
316
  "max_steps": 9000,
 
317
  "num_train_epochs": 5,
318
  "save_steps": 500,
319
- "total_flos": 8.920242900072627e+19,
 
320
  "trial_name": null,
321
  "trial_params": null
322
  }
 
1
  {
2
+ "best_metric": 0.2764733135700226,
3
  "best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000",
4
+ "epoch": 4.524886877828054,
5
  "eval_steps": 500,
6
  "global_step": 9000,
7
  "is_hyper_param_search": false,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2,
13
+ "grad_norm": 1.6815159320831299,
14
+ "learning_rate": 0.00014774999999999999,
15
+ "loss": 5.9898,
16
  "step": 400
17
  },
18
  {
19
  "epoch": 0.25,
20
+ "eval_loss": 1.3093085289001465,
21
+ "eval_runtime": 3176.8808,
22
+ "eval_samples_per_second": 8.256,
23
+ "eval_steps_per_second": 0.258,
24
+ "eval_wer": 0.7970769457237188,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.4,
29
+ "grad_norm": 2.3292043209075928,
30
+ "learning_rate": 0.00029775,
31
+ "loss": 1.0749,
32
  "step": 800
33
  },
34
  {
35
+ "epoch": 0.5,
36
+ "eval_loss": 0.5815957188606262,
37
+ "eval_runtime": 1123.0217,
38
+ "eval_samples_per_second": 23.355,
39
+ "eval_steps_per_second": 0.73,
40
+ "eval_wer": 0.4617458414821357,
41
  "step": 1000
42
  },
43
  {
44
+ "epoch": 0.6,
45
+ "grad_norm": 2.9652466773986816,
46
+ "learning_rate": 0.0002855853658536585,
47
+ "loss": 0.4332,
48
  "step": 1200
49
  },
50
  {
51
+ "epoch": 0.75,
52
+ "eval_loss": 0.48338082432746887,
53
+ "eval_runtime": 1101.5414,
54
+ "eval_samples_per_second": 23.81,
55
+ "eval_steps_per_second": 0.744,
56
+ "eval_wer": 0.4091476878430383,
57
  "step": 1500
58
  },
59
  {
60
+ "epoch": 0.8,
61
+ "grad_norm": 3.57124924659729,
62
+ "learning_rate": 0.0002709512195121951,
63
+ "loss": 0.3655,
64
  "step": 1600
65
  },
66
  {
67
  "epoch": 1.01,
68
+ "grad_norm": 1.1706229448318481,
69
+ "learning_rate": 0.0002563170731707317,
70
+ "loss": 0.3303,
71
  "step": 2000
72
  },
73
  {
74
  "epoch": 1.01,
75
+ "eval_loss": 0.42033129930496216,
76
+ "eval_runtime": 1101.1368,
77
+ "eval_samples_per_second": 23.819,
78
+ "eval_steps_per_second": 0.745,
79
+ "eval_wer": 0.3419174394885707,
80
  "step": 2000
81
  },
82
  {
83
  "epoch": 1.21,
84
+ "grad_norm": 0.8928599953651428,
85
+ "learning_rate": 0.0002416829268292683,
86
+ "loss": 0.276,
87
  "step": 2400
88
  },
89
  {
90
  "epoch": 1.26,
91
+ "eval_loss": 0.3909631669521332,
92
+ "eval_runtime": 1098.4606,
93
+ "eval_samples_per_second": 23.877,
94
+ "eval_steps_per_second": 0.746,
95
+ "eval_wer": 0.3186423569490884,
96
  "step": 2500
97
  },
98
  {
99
  "epoch": 1.41,
100
+ "grad_norm": 0.6678148508071899,
101
+ "learning_rate": 0.00022704878048780485,
102
+ "loss": 0.2591,
103
  "step": 2800
104
  },
105
  {
106
+ "epoch": 1.51,
107
+ "eval_loss": 0.39008986949920654,
108
+ "eval_runtime": 1093.6554,
109
+ "eval_samples_per_second": 23.982,
110
+ "eval_steps_per_second": 0.75,
111
+ "eval_wer": 0.3067188190019557,
112
  "step": 3000
113
  },
114
  {
115
+ "epoch": 1.61,
116
+ "grad_norm": 0.7449674606323242,
117
+ "learning_rate": 0.00021241463414634144,
118
+ "loss": 0.2501,
119
  "step": 3200
120
  },
121
  {
122
+ "epoch": 1.76,
123
+ "eval_loss": 0.3645510971546173,
124
+ "eval_runtime": 1101.42,
125
+ "eval_samples_per_second": 23.813,
126
+ "eval_steps_per_second": 0.744,
127
+ "eval_wer": 0.2895379891910079,
128
  "step": 3500
129
  },
130
  {
131
+ "epoch": 1.81,
132
+ "grad_norm": 0.994420051574707,
133
+ "learning_rate": 0.00019778048780487803,
134
+ "loss": 0.2332,
135
  "step": 3600
136
  },
137
  {
138
+ "epoch": 2.01,
139
+ "grad_norm": 0.632382333278656,
140
+ "learning_rate": 0.00018314634146341462,
141
+ "loss": 0.224,
142
  "step": 4000
143
  },
144
  {
145
+ "epoch": 2.01,
146
+ "eval_loss": 0.35174447298049927,
147
+ "eval_runtime": 1113.1837,
148
+ "eval_samples_per_second": 23.561,
149
+ "eval_steps_per_second": 0.737,
150
+ "eval_wer": 0.2805501230206296,
151
  "step": 4000
152
  },
153
  {
154
+ "epoch": 2.21,
155
+ "grad_norm": 0.5861485600471497,
156
+ "learning_rate": 0.00016851219512195123,
157
+ "loss": 0.182,
158
  "step": 4400
159
  },
160
  {
161
+ "epoch": 2.26,
162
+ "eval_loss": 0.33475443720817566,
163
+ "eval_runtime": 1111.5845,
164
+ "eval_samples_per_second": 23.595,
165
+ "eval_steps_per_second": 0.738,
166
+ "eval_wer": 0.2655689441255021,
167
  "step": 4500
168
  },
169
  {
170
+ "epoch": 2.41,
171
+ "grad_norm": 0.585738480091095,
172
+ "learning_rate": 0.0001538780487804878,
173
+ "loss": 0.1777,
174
  "step": 4800
175
  },
176
  {
177
+ "epoch": 2.51,
178
+ "eval_loss": 0.32769647240638733,
179
+ "eval_runtime": 1109.5932,
180
+ "eval_samples_per_second": 23.637,
181
+ "eval_steps_per_second": 0.739,
182
+ "eval_wer": 0.2611948772948079,
183
  "step": 5000
184
  },
185
  {
186
+ "epoch": 2.61,
187
+ "grad_norm": 0.6404664516448975,
188
+ "learning_rate": 0.00013924390243902438,
189
+ "loss": 0.1734,
190
  "step": 5200
191
  },
192
  {
193
+ "epoch": 2.77,
194
+ "eval_loss": 0.33233708143234253,
195
+ "eval_runtime": 1114.1252,
196
+ "eval_samples_per_second": 23.541,
197
+ "eval_steps_per_second": 0.736,
198
+ "eval_wer": 0.2643113999116775,
199
  "step": 5500
200
  },
201
  {
202
+ "epoch": 2.82,
203
+ "grad_norm": 1.567084550857544,
204
+ "learning_rate": 0.00012460975609756097,
205
+ "loss": 0.1704,
206
  "step": 5600
207
  },
208
  {
209
+ "epoch": 3.02,
210
+ "grad_norm": 1.35818612575531,
211
+ "learning_rate": 0.00010997560975609755,
212
+ "loss": 0.1629,
213
  "step": 6000
214
  },
215
  {
216
+ "epoch": 3.02,
217
+ "eval_loss": 0.31713536381721497,
218
+ "eval_runtime": 1084.7842,
219
+ "eval_samples_per_second": 24.178,
220
+ "eval_steps_per_second": 0.756,
221
+ "eval_wer": 0.24851428931928585,
222
  "step": 6000
223
  },
224
  {
225
+ "epoch": 3.22,
226
+ "grad_norm": 1.0975894927978516,
227
+ "learning_rate": 9.534146341463413e-05,
228
+ "loss": 0.1338,
229
  "step": 6400
230
  },
231
  {
232
+ "epoch": 3.27,
233
+ "eval_loss": 0.310283362865448,
234
+ "eval_runtime": 1090.9879,
235
+ "eval_samples_per_second": 24.041,
236
+ "eval_steps_per_second": 0.752,
237
+ "eval_wer": 0.23984186066073643,
238
  "step": 6500
239
  },
240
  {
241
+ "epoch": 3.42,
242
+ "grad_norm": 1.2747470140457153,
243
+ "learning_rate": 8.070731707317072e-05,
244
+ "loss": 0.1292,
245
  "step": 6800
246
  },
247
  {
248
+ "epoch": 3.52,
249
+ "eval_loss": 0.2933865785598755,
250
+ "eval_runtime": 1076.7354,
251
+ "eval_samples_per_second": 24.359,
252
+ "eval_steps_per_second": 0.762,
253
+ "eval_wer": 0.22680798267196603,
254
  "step": 7000
255
  },
256
  {
257
+ "epoch": 3.62,
258
+ "grad_norm": 0.5606548190116882,
259
+ "learning_rate": 6.607317073170731e-05,
260
+ "loss": 0.1264,
261
  "step": 7200
262
  },
263
  {
264
+ "epoch": 3.77,
265
+ "eval_loss": 0.29226595163345337,
266
+ "eval_runtime": 1074.899,
267
+ "eval_samples_per_second": 24.4,
268
+ "eval_steps_per_second": 0.763,
269
+ "eval_wer": 0.22483965259815364,
270
  "step": 7500
271
  },
272
  {
273
+ "epoch": 3.82,
274
+ "grad_norm": 1.5185168981552124,
275
+ "learning_rate": 5.14390243902439e-05,
276
+ "loss": 0.1241,
277
  "step": 7600
278
  },
279
  {
280
+ "epoch": 4.02,
281
+ "grad_norm": 0.7815582156181335,
282
+ "learning_rate": 3.680487804878048e-05,
283
+ "loss": 0.118,
284
  "step": 8000
285
  },
286
  {
287
+ "epoch": 4.02,
288
+ "eval_loss": 0.28800907731056213,
289
+ "eval_runtime": 1083.1518,
290
+ "eval_samples_per_second": 24.215,
291
+ "eval_steps_per_second": 0.757,
292
+ "eval_wer": 0.21931739322440225,
293
  "step": 8000
294
  },
295
  {
296
+ "epoch": 4.22,
297
+ "grad_norm": 1.339690089225769,
298
+ "learning_rate": 2.217073170731707e-05,
299
+ "loss": 0.0996,
300
  "step": 8400
301
  },
302
  {
303
+ "epoch": 4.27,
304
+ "eval_loss": 0.2792861759662628,
305
+ "eval_runtime": 1078.7477,
306
+ "eval_samples_per_second": 24.313,
307
+ "eval_steps_per_second": 0.76,
308
+ "eval_wer": 0.21242403213256786,
309
  "step": 8500
310
  },
311
  {
312
+ "epoch": 4.42,
313
+ "grad_norm": 1.0311238765716553,
314
+ "learning_rate": 7.536585365853659e-06,
315
+ "loss": 0.0969,
316
  "step": 8800
317
  },
318
  {
319
+ "epoch": 4.52,
320
+ "eval_loss": 0.2764733135700226,
321
+ "eval_runtime": 1074.1619,
322
+ "eval_samples_per_second": 24.417,
323
+ "eval_steps_per_second": 0.763,
324
+ "eval_wer": 0.21145248459613483,
325
  "step": 9000
326
  },
327
  {
328
+ "epoch": 4.52,
329
  "step": 9000,
330
+ "total_flos": 8.933861078537978e+19,
331
+ "train_loss": 0.49683192168341744,
332
+ "train_runtime": 52483.3074,
333
+ "train_samples_per_second": 10.975,
334
+ "train_steps_per_second": 0.171
335
  }
336
  ],
337
  "logging_steps": 400,
338
  "max_steps": 9000,
339
+ "num_input_tokens_seen": 0,
340
  "num_train_epochs": 5,
341
  "save_steps": 500,
342
+ "total_flos": 8.933861078537978e+19,
343
+ "train_batch_size": 32,
344
  "trial_name": null,
345
  "trial_params": null
346
  }