saurabhy27-outcomes commited on
Commit
33ff84e
·
verified ·
1 Parent(s): 88d1306

End of training

Browse files
README.md CHANGED
@@ -3,20 +3,33 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: openai/whisper-large-v3
5
  tags:
 
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - wer
9
  model-index:
10
- - name: whisper-large-v3-medical
11
- results: []
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # whisper-large-v3-medical
18
 
19
- This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1453
22
  - Wer: 3.2636
 
3
  license: apache-2.0
4
  base_model: openai/whisper-large-v3
5
  tags:
6
+ - whisper-event
7
  - generated_from_trainer
8
+ datasets:
9
+ - OUTCOMESAI/medical_speech_corpus
10
  metrics:
11
  - wer
12
  model-index:
13
+ - name: Whisper Large V3 Medical
14
+ results:
15
+ - task:
16
+ name: Automatic Speech Recognition
17
+ type: automatic-speech-recognition
18
+ dataset:
19
+ name: OUTCOMESAI/medical_speech_corpus en
20
+ type: OUTCOMESAI/medical_speech_corpus
21
+ metrics:
22
+ - name: Wer
23
+ type: wer
24
+ value: 3.2635854592980795
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
  should probably proofread and complete it, then remove this comment. -->
29
 
30
+ # Whisper Large V3 Medical
31
 
32
+ This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the OUTCOMESAI/medical_speech_corpus en dataset.
33
  It achieves the following results on the evaluation set:
34
  - Loss: 0.1453
35
  - Wer: 3.2636
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.8255547054322876,
3
+ "eval_loss": 0.145263671875,
4
+ "eval_runtime": 998.2773,
5
+ "eval_samples": 5000,
6
+ "eval_samples_per_second": 1.12,
7
+ "eval_steps_per_second": 0.035,
8
+ "eval_wer": 3.2635854592980795,
9
+ "total_flos": 1.0871892140465376e+21,
10
+ "train_loss": 0.9307616455078125,
11
+ "train_runtime": 97022.7442,
12
+ "train_samples_per_second": 3.298,
13
+ "train_steps_per_second": 0.052
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.8255547054322876,
3
+ "eval_loss": 0.145263671875,
4
+ "eval_runtime": 998.2773,
5
+ "eval_samples": 5000,
6
+ "eval_samples_per_second": 1.12,
7
+ "eval_steps_per_second": 0.035,
8
+ "eval_wer": 3.2635854592980795
9
+ }
runs/Dec08_08-51-14_d27baedf9b3d/events.out.tfevents.1733746088.d27baedf9b3d.62779.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038de83d50b74d2e4017f170901d1e2ccd240953b7d78b6c47f3f154bc027388
3
+ size 406
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.8255547054322876,
3
+ "total_flos": 1.0871892140465376e+21,
4
+ "train_loss": 0.9307616455078125,
5
+ "train_runtime": 97022.7442,
6
+ "train_samples_per_second": 3.298,
7
+ "train_steps_per_second": 0.052
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 3.2635854592980795,
3
+ "best_model_checkpoint": "OUTCOMESAI/whisper-large-v3-medical/checkpoint-5000",
4
+ "epoch": 3.8255547054322876,
5
+ "eval_steps": 200,
6
+ "global_step": 5000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07651109410864575,
13
+ "grad_norm": 4.125481605529785,
14
+ "learning_rate": 4.2874883363489934e-07,
15
+ "loss": 6.0929,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.1530221882172915,
20
+ "grad_norm": 4.159045696258545,
21
+ "learning_rate": 4.971255772939331e-07,
22
+ "loss": 4.2439,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.1530221882172915,
27
+ "eval_loss": 0.29345703125,
28
+ "eval_runtime": 988.7646,
29
+ "eval_samples_per_second": 1.131,
30
+ "eval_steps_per_second": 0.035,
31
+ "eval_wer": 4.507771356816114,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.22953328232593725,
36
+ "grad_norm": 2.488393545150757,
37
+ "learning_rate": 4.903124999999999e-07,
38
+ "loss": 3.5952,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 0.306044376434583,
43
+ "grad_norm": 2.386241912841797,
44
+ "learning_rate": 4.798958333333334e-07,
45
+ "loss": 3.3374,
46
+ "step": 400
47
+ },
48
+ {
49
+ "epoch": 0.306044376434583,
50
+ "eval_loss": 0.2734375,
51
+ "eval_runtime": 1214.5433,
52
+ "eval_samples_per_second": 0.921,
53
+ "eval_steps_per_second": 0.029,
54
+ "eval_wer": 4.696129057058288,
55
+ "step": 400
56
+ },
57
+ {
58
+ "epoch": 0.38255547054322875,
59
+ "grad_norm": 2.348032236099243,
60
+ "learning_rate": 4.6947916666666664e-07,
61
+ "loss": 3.1968,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 0.4590665646518745,
66
+ "grad_norm": 2.6516611576080322,
67
+ "learning_rate": 4.5906249999999995e-07,
68
+ "loss": 3.0833,
69
+ "step": 600
70
+ },
71
+ {
72
+ "epoch": 0.4590665646518745,
73
+ "eval_loss": 0.267333984375,
74
+ "eval_runtime": 991.4442,
75
+ "eval_samples_per_second": 1.128,
76
+ "eval_steps_per_second": 0.035,
77
+ "eval_wer": 4.273285240188102,
78
+ "step": 600
79
+ },
80
+ {
81
+ "epoch": 0.5355776587605203,
82
+ "grad_norm": 3.4489781856536865,
83
+ "learning_rate": 4.486458333333333e-07,
84
+ "loss": 2.6306,
85
+ "step": 700
86
+ },
87
+ {
88
+ "epoch": 0.612088752869166,
89
+ "grad_norm": 2.9474337100982666,
90
+ "learning_rate": 4.3822916666666667e-07,
91
+ "loss": 1.8243,
92
+ "step": 800
93
+ },
94
+ {
95
+ "epoch": 0.612088752869166,
96
+ "eval_loss": 0.26806640625,
97
+ "eval_runtime": 991.8395,
98
+ "eval_samples_per_second": 1.127,
99
+ "eval_steps_per_second": 0.035,
100
+ "eval_wer": 4.4372973873377495,
101
+ "step": 800
102
+ },
103
+ {
104
+ "epoch": 0.6885998469778117,
105
+ "grad_norm": 2.7142157554626465,
106
+ "learning_rate": 4.278125e-07,
107
+ "loss": 1.382,
108
+ "step": 900
109
+ },
110
+ {
111
+ "epoch": 0.7651109410864575,
112
+ "grad_norm": 2.386582612991333,
113
+ "learning_rate": 4.1739583333333334e-07,
114
+ "loss": 1.1288,
115
+ "step": 1000
116
+ },
117
+ {
118
+ "epoch": 0.7651109410864575,
119
+ "eval_loss": 0.2548828125,
120
+ "eval_runtime": 991.8205,
121
+ "eval_samples_per_second": 1.127,
122
+ "eval_steps_per_second": 0.035,
123
+ "eval_wer": 4.277129274886922,
124
+ "step": 1000
125
+ },
126
+ {
127
+ "epoch": 0.8416220351951033,
128
+ "grad_norm": 2.2802257537841797,
129
+ "learning_rate": 4.0697916666666665e-07,
130
+ "loss": 0.9419,
131
+ "step": 1100
132
+ },
133
+ {
134
+ "epoch": 0.918133129303749,
135
+ "grad_norm": 2.0405514240264893,
136
+ "learning_rate": 3.9656249999999996e-07,
137
+ "loss": 0.8199,
138
+ "step": 1200
139
+ },
140
+ {
141
+ "epoch": 0.918133129303749,
142
+ "eval_loss": 0.2412109375,
143
+ "eval_runtime": 994.3546,
144
+ "eval_samples_per_second": 1.124,
145
+ "eval_steps_per_second": 0.035,
146
+ "eval_wer": 4.204092615609343,
147
+ "step": 1200
148
+ },
149
+ {
150
+ "epoch": 0.9946442234123948,
151
+ "grad_norm": 1.88164222240448,
152
+ "learning_rate": 3.861458333333333e-07,
153
+ "loss": 0.7493,
154
+ "step": 1300
155
+ },
156
+ {
157
+ "epoch": 1.0711553175210407,
158
+ "grad_norm": 1.8802990913391113,
159
+ "learning_rate": 3.7572916666666663e-07,
160
+ "loss": 0.681,
161
+ "step": 1400
162
+ },
163
+ {
164
+ "epoch": 1.0711553175210407,
165
+ "eval_loss": 0.2310791015625,
166
+ "eval_runtime": 990.487,
167
+ "eval_samples_per_second": 1.129,
168
+ "eval_steps_per_second": 0.035,
169
+ "eval_wer": 4.105429058339634,
170
+ "step": 1400
171
+ },
172
+ {
173
+ "epoch": 1.1476664116296864,
174
+ "grad_norm": 1.6826539039611816,
175
+ "learning_rate": 3.6531249999999994e-07,
176
+ "loss": 0.6296,
177
+ "step": 1500
178
+ },
179
+ {
180
+ "epoch": 1.2241775057383322,
181
+ "grad_norm": 1.7766120433807373,
182
+ "learning_rate": 3.5489583333333336e-07,
183
+ "loss": 0.5798,
184
+ "step": 1600
185
+ },
186
+ {
187
+ "epoch": 1.2241775057383322,
188
+ "eval_loss": 0.21923828125,
189
+ "eval_runtime": 989.3653,
190
+ "eval_samples_per_second": 1.13,
191
+ "eval_steps_per_second": 0.035,
192
+ "eval_wer": 4.009328190869136,
193
+ "step": 1600
194
+ },
195
+ {
196
+ "epoch": 1.300688599846978,
197
+ "grad_norm": 1.597964882850647,
198
+ "learning_rate": 3.4447916666666667e-07,
199
+ "loss": 0.5603,
200
+ "step": 1700
201
+ },
202
+ {
203
+ "epoch": 1.3771996939556237,
204
+ "grad_norm": 1.5654646158218384,
205
+ "learning_rate": 3.340625e-07,
206
+ "loss": 0.5233,
207
+ "step": 1800
208
+ },
209
+ {
210
+ "epoch": 1.3771996939556237,
211
+ "eval_loss": 0.2071533203125,
212
+ "eval_runtime": 987.6154,
213
+ "eval_samples_per_second": 1.132,
214
+ "eval_steps_per_second": 0.035,
215
+ "eval_wer": 3.8927258050049334,
216
+ "step": 1800
217
+ },
218
+ {
219
+ "epoch": 1.4537107880642695,
220
+ "grad_norm": 1.5115400552749634,
221
+ "learning_rate": 3.2364583333333334e-07,
222
+ "loss": 0.4919,
223
+ "step": 1900
224
+ },
225
+ {
226
+ "epoch": 1.5302218821729152,
227
+ "grad_norm": 1.5361956357955933,
228
+ "learning_rate": 3.1322916666666665e-07,
229
+ "loss": 0.463,
230
+ "step": 2000
231
+ },
232
+ {
233
+ "epoch": 1.5302218821729152,
234
+ "eval_loss": 0.19921875,
235
+ "eval_runtime": 984.868,
236
+ "eval_samples_per_second": 1.135,
237
+ "eval_steps_per_second": 0.036,
238
+ "eval_wer": 3.8196891457273554,
239
+ "step": 2000
240
+ },
241
+ {
242
+ "epoch": 1.606732976281561,
243
+ "grad_norm": 1.4659453630447388,
244
+ "learning_rate": 3.0281249999999996e-07,
245
+ "loss": 0.4509,
246
+ "step": 2100
247
+ },
248
+ {
249
+ "epoch": 1.6832440703902067,
250
+ "grad_norm": 1.384372591972351,
251
+ "learning_rate": 2.923958333333333e-07,
252
+ "loss": 0.428,
253
+ "step": 2200
254
+ },
255
+ {
256
+ "epoch": 1.6832440703902067,
257
+ "eval_loss": 0.195068359375,
258
+ "eval_runtime": 985.7714,
259
+ "eval_samples_per_second": 1.134,
260
+ "eval_steps_per_second": 0.036,
261
+ "eval_wer": 3.7748420742411235,
262
+ "step": 2200
263
+ },
264
+ {
265
+ "epoch": 1.7597551644988525,
266
+ "grad_norm": 1.451076865196228,
267
+ "learning_rate": 2.8197916666666663e-07,
268
+ "loss": 0.4161,
269
+ "step": 2300
270
+ },
271
+ {
272
+ "epoch": 1.836266258607498,
273
+ "grad_norm": 1.5242278575897217,
274
+ "learning_rate": 2.715625e-07,
275
+ "loss": 0.3944,
276
+ "step": 2400
277
+ },
278
+ {
279
+ "epoch": 1.836266258607498,
280
+ "eval_loss": 0.1866455078125,
281
+ "eval_runtime": 987.2353,
282
+ "eval_samples_per_second": 1.132,
283
+ "eval_steps_per_second": 0.035,
284
+ "eval_wer": 3.67745986187102,
285
+ "step": 2400
286
+ },
287
+ {
288
+ "epoch": 1.9127773527161438,
289
+ "grad_norm": 1.31293785572052,
290
+ "learning_rate": 2.6114583333333336e-07,
291
+ "loss": 0.38,
292
+ "step": 2500
293
+ },
294
+ {
295
+ "epoch": 1.9892884468247896,
296
+ "grad_norm": 1.3785984516143799,
297
+ "learning_rate": 2.5072916666666667e-07,
298
+ "loss": 0.3682,
299
+ "step": 2600
300
+ },
301
+ {
302
+ "epoch": 1.9892884468247896,
303
+ "eval_loss": 0.17919921875,
304
+ "eval_runtime": 989.9094,
305
+ "eval_samples_per_second": 1.129,
306
+ "eval_steps_per_second": 0.035,
307
+ "eval_wer": 3.604423202593442,
308
+ "step": 2600
309
+ },
310
+ {
311
+ "epoch": 2.0657995409334355,
312
+ "grad_norm": 1.3718817234039307,
313
+ "learning_rate": 2.403125e-07,
314
+ "loss": 0.36,
315
+ "step": 2700
316
+ },
317
+ {
318
+ "epoch": 2.1423106350420813,
319
+ "grad_norm": 1.3219904899597168,
320
+ "learning_rate": 2.298958333333333e-07,
321
+ "loss": 0.3543,
322
+ "step": 2800
323
+ },
324
+ {
325
+ "epoch": 2.1423106350420813,
326
+ "eval_loss": 0.1724853515625,
327
+ "eval_runtime": 995.0367,
328
+ "eval_samples_per_second": 1.124,
329
+ "eval_steps_per_second": 0.035,
330
+ "eval_wer": 3.530105198416258,
331
+ "step": 2800
332
+ },
333
+ {
334
+ "epoch": 2.218821729150727,
335
+ "grad_norm": 1.2633302211761475,
336
+ "learning_rate": 2.1947916666666665e-07,
337
+ "loss": 0.3438,
338
+ "step": 2900
339
+ },
340
+ {
341
+ "epoch": 2.295332823259373,
342
+ "grad_norm": 1.2741142511367798,
343
+ "learning_rate": 2.090625e-07,
344
+ "loss": 0.3368,
345
+ "step": 3000
346
+ },
347
+ {
348
+ "epoch": 2.295332823259373,
349
+ "eval_loss": 0.17138671875,
350
+ "eval_runtime": 991.227,
351
+ "eval_samples_per_second": 1.128,
352
+ "eval_steps_per_second": 0.035,
353
+ "eval_wer": 3.490383506528452,
354
+ "step": 3000
355
+ },
356
+ {
357
+ "epoch": 2.371843917368018,
358
+ "grad_norm": 1.2177200317382812,
359
+ "learning_rate": 1.9864583333333332e-07,
360
+ "loss": 0.3218,
361
+ "step": 3100
362
+ },
363
+ {
364
+ "epoch": 2.4483550114766643,
365
+ "grad_norm": 1.24095618724823,
366
+ "learning_rate": 1.8822916666666666e-07,
367
+ "loss": 0.3136,
368
+ "step": 3200
369
+ },
370
+ {
371
+ "epoch": 2.4483550114766643,
372
+ "eval_loss": 0.164794921875,
373
+ "eval_runtime": 995.8265,
374
+ "eval_samples_per_second": 1.123,
375
+ "eval_steps_per_second": 0.035,
376
+ "eval_wer": 3.45706853913868,
377
+ "step": 3200
378
+ },
379
+ {
380
+ "epoch": 2.5248661055853097,
381
+ "grad_norm": 1.146142840385437,
382
+ "learning_rate": 1.778125e-07,
383
+ "loss": 0.3269,
384
+ "step": 3300
385
+ },
386
+ {
387
+ "epoch": 2.601377199693956,
388
+ "grad_norm": 1.1721028089523315,
389
+ "learning_rate": 1.6739583333333333e-07,
390
+ "loss": 0.3121,
391
+ "step": 3400
392
+ },
393
+ {
394
+ "epoch": 2.601377199693956,
395
+ "eval_loss": 0.160400390625,
396
+ "eval_runtime": 990.5861,
397
+ "eval_samples_per_second": 1.129,
398
+ "eval_steps_per_second": 0.035,
399
+ "eval_wer": 3.4237535717489074,
400
+ "step": 3400
401
+ },
402
+ {
403
+ "epoch": 2.677888293802601,
404
+ "grad_norm": 1.5002176761627197,
405
+ "learning_rate": 1.5697916666666666e-07,
406
+ "loss": 0.2999,
407
+ "step": 3500
408
+ },
409
+ {
410
+ "epoch": 2.7543993879112474,
411
+ "grad_norm": 1.250117301940918,
412
+ "learning_rate": 1.465625e-07,
413
+ "loss": 0.2959,
414
+ "step": 3600
415
+ },
416
+ {
417
+ "epoch": 2.7543993879112474,
418
+ "eval_loss": 0.1561279296875,
419
+ "eval_runtime": 997.6176,
420
+ "eval_samples_per_second": 1.121,
421
+ "eval_steps_per_second": 0.035,
422
+ "eval_wer": 3.395563983957562,
423
+ "step": 3600
424
+ },
425
+ {
426
+ "epoch": 2.8309104820198927,
427
+ "grad_norm": 1.2006161212921143,
428
+ "learning_rate": 1.361458333333333e-07,
429
+ "loss": 0.2942,
430
+ "step": 3700
431
+ },
432
+ {
433
+ "epoch": 2.907421576128539,
434
+ "grad_norm": 1.2639755010604858,
435
+ "learning_rate": 1.2572916666666667e-07,
436
+ "loss": 0.2912,
437
+ "step": 3800
438
+ },
439
+ {
440
+ "epoch": 2.907421576128539,
441
+ "eval_loss": 0.15380859375,
442
+ "eval_runtime": 989.6556,
443
+ "eval_samples_per_second": 1.13,
444
+ "eval_steps_per_second": 0.035,
445
+ "eval_wer": 3.3737811206642494,
446
+ "step": 3800
447
+ },
448
+ {
449
+ "epoch": 2.9839326702371842,
450
+ "grad_norm": 1.1383545398712158,
451
+ "learning_rate": 1.153125e-07,
452
+ "loss": 0.2921,
453
+ "step": 3900
454
+ },
455
+ {
456
+ "epoch": 3.06044376434583,
457
+ "grad_norm": 1.1553541421890259,
458
+ "learning_rate": 1.0489583333333332e-07,
459
+ "loss": 0.2767,
460
+ "step": 4000
461
+ },
462
+ {
463
+ "epoch": 3.06044376434583,
464
+ "eval_loss": 0.151123046875,
465
+ "eval_runtime": 994.8808,
466
+ "eval_samples_per_second": 1.124,
467
+ "eval_steps_per_second": 0.035,
468
+ "eval_wer": 3.3455915328729033,
469
+ "step": 4000
470
+ },
471
+ {
472
+ "epoch": 3.1369548584544757,
473
+ "grad_norm": 14.852476119995117,
474
+ "learning_rate": 9.447916666666667e-08,
475
+ "loss": 0.2857,
476
+ "step": 4100
477
+ },
478
+ {
479
+ "epoch": 3.2134659525631215,
480
+ "grad_norm": 3.1388800144195557,
481
+ "learning_rate": 8.406249999999999e-08,
482
+ "loss": 0.2848,
483
+ "step": 4200
484
+ },
485
+ {
486
+ "epoch": 3.2134659525631215,
487
+ "eval_loss": 0.148681640625,
488
+ "eval_runtime": 1017.2976,
489
+ "eval_samples_per_second": 1.099,
490
+ "eval_steps_per_second": 0.034,
491
+ "eval_wer": 3.319964634880771,
492
+ "step": 4200
493
+ },
494
+ {
495
+ "epoch": 3.2899770466717673,
496
+ "grad_norm": 1.152143120765686,
497
+ "learning_rate": 7.364583333333333e-08,
498
+ "loss": 0.2756,
499
+ "step": 4300
500
+ },
501
+ {
502
+ "epoch": 3.366488140780413,
503
+ "grad_norm": 1.1069058179855347,
504
+ "learning_rate": 6.322916666666666e-08,
505
+ "loss": 0.274,
506
+ "step": 4400
507
+ },
508
+ {
509
+ "epoch": 3.366488140780413,
510
+ "eval_loss": 0.1474609375,
511
+ "eval_runtime": 993.2425,
512
+ "eval_samples_per_second": 1.126,
513
+ "eval_steps_per_second": 0.035,
514
+ "eval_wer": 3.284086977691785,
515
+ "step": 4400
516
+ },
517
+ {
518
+ "epoch": 3.442999234889059,
519
+ "grad_norm": 1.054897427558899,
520
+ "learning_rate": 5.291666666666667e-08,
521
+ "loss": 0.2729,
522
+ "step": 4500
523
+ },
524
+ {
525
+ "epoch": 3.5195103289977046,
526
+ "grad_norm": 1.0845283269882202,
527
+ "learning_rate": 4.2500000000000003e-08,
528
+ "loss": 0.2694,
529
+ "step": 4600
530
+ },
531
+ {
532
+ "epoch": 3.5195103289977046,
533
+ "eval_loss": 0.1463623046875,
534
+ "eval_runtime": 1016.9332,
535
+ "eval_samples_per_second": 1.099,
536
+ "eval_steps_per_second": 0.034,
537
+ "eval_wer": 3.282805632792179,
538
+ "step": 4600
539
+ },
540
+ {
541
+ "epoch": 3.5960214231063503,
542
+ "grad_norm": 1.0754883289337158,
543
+ "learning_rate": 3.208333333333333e-08,
544
+ "loss": 0.2635,
545
+ "step": 4700
546
+ },
547
+ {
548
+ "epoch": 3.672532517214996,
549
+ "grad_norm": 1.3197473287582397,
550
+ "learning_rate": 2.1770833333333332e-08,
551
+ "loss": 0.2731,
552
+ "step": 4800
553
+ },
554
+ {
555
+ "epoch": 3.672532517214996,
556
+ "eval_loss": 0.1455078125,
557
+ "eval_runtime": 989.4621,
558
+ "eval_samples_per_second": 1.13,
559
+ "eval_steps_per_second": 0.035,
560
+ "eval_wer": 3.268710838896506,
561
+ "step": 4800
562
+ },
563
+ {
564
+ "epoch": 3.749043611323642,
565
+ "grad_norm": 1.156948208808899,
566
+ "learning_rate": 1.1354166666666667e-08,
567
+ "loss": 0.2589,
568
+ "step": 4900
569
+ },
570
+ {
571
+ "epoch": 3.8255547054322876,
572
+ "grad_norm": 1.1573657989501953,
573
+ "learning_rate": 9.375e-10,
574
+ "loss": 0.2677,
575
+ "step": 5000
576
+ },
577
+ {
578
+ "epoch": 3.8255547054322876,
579
+ "eval_loss": 0.145263671875,
580
+ "eval_runtime": 992.9783,
581
+ "eval_samples_per_second": 1.126,
582
+ "eval_steps_per_second": 0.035,
583
+ "eval_wer": 3.2635854592980795,
584
+ "step": 5000
585
+ },
586
+ {
587
+ "epoch": 3.8255547054322876,
588
+ "step": 5000,
589
+ "total_flos": 1.0871892140465376e+21,
590
+ "train_loss": 0.9307616455078125,
591
+ "train_runtime": 97022.7442,
592
+ "train_samples_per_second": 3.298,
593
+ "train_steps_per_second": 0.052
594
+ }
595
+ ],
596
+ "logging_steps": 100,
597
+ "max_steps": 5000,
598
+ "num_input_tokens_seen": 0,
599
+ "num_train_epochs": 4,
600
+ "save_steps": 200,
601
+ "stateful_callbacks": {
602
+ "TrainerControl": {
603
+ "args": {
604
+ "should_epoch_stop": false,
605
+ "should_evaluate": false,
606
+ "should_log": false,
607
+ "should_save": true,
608
+ "should_training_stop": true
609
+ },
610
+ "attributes": {}
611
+ }
612
+ },
613
+ "total_flos": 1.0871892140465376e+21,
614
+ "train_batch_size": 64,
615
+ "trial_name": null,
616
+ "trial_params": null
617
+ }