utakumi commited on
Commit
a96ea52
1 Parent(s): f6a9817

End of training

Browse files
Files changed (5) hide show
  1. README.md +8 -4
  2. all_results.json +15 -0
  3. eval_results.json +9 -0
  4. train_results.json +9 -0
  5. trainer_state.json +623 -0
README.md CHANGED
@@ -1,8 +1,12 @@
1
  ---
2
  library_name: transformers
 
 
3
  license: apache-2.0
4
  base_model: rinna/japanese-hubert-base
5
  tags:
 
 
6
  - generated_from_trainer
7
  datasets:
8
  - common_voice_13_0
@@ -15,11 +19,11 @@ model-index:
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
- name: common_voice_13_0
19
  type: common_voice_13_0
20
  config: ja
21
  split: test
22
- args: ja
23
  metrics:
24
  - name: Wer
25
  type: wer
@@ -31,9 +35,9 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  # Hubert-common_voice-ja-demo
33
 
34
- This model is a fine-tuned version of [rinna/japanese-hubert-base](https://huggingface.co/rinna/japanese-hubert-base) on the common_voice_13_0 dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 5.6950
37
  - Wer: 1.0
38
 
39
  ## Model description
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - ja
5
  license: apache-2.0
6
  base_model: rinna/japanese-hubert-base
7
  tags:
8
+ - automatic-speech-recognition
9
+ - mozilla-foundation/common_voice_13_0
10
  - generated_from_trainer
11
  datasets:
12
  - common_voice_13_0
 
19
  name: Automatic Speech Recognition
20
  type: automatic-speech-recognition
21
  dataset:
22
+ name: MOZILLA-FOUNDATION/COMMON_VOICE_13_0 - JA
23
  type: common_voice_13_0
24
  config: ja
25
  split: test
26
+ args: 'Config: ja, Training split: train+validation, Eval split: test'
27
  metrics:
28
  - name: Wer
29
  type: wer
 
35
 
36
  # Hubert-common_voice-ja-demo
37
 
38
+ This model is a fine-tuned version of [rinna/japanese-hubert-base](https://huggingface.co/rinna/japanese-hubert-base) on the MOZILLA-FOUNDATION/COMMON_VOICE_13_0 - JA dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 5.6933
41
  - Wer: 1.0
42
 
43
  ## Model description
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_loss": 5.693275451660156,
4
+ "eval_runtime": 205.8981,
5
+ "eval_samples": 4961,
6
+ "eval_samples_per_second": 24.094,
7
+ "eval_steps_per_second": 3.016,
8
+ "eval_wer": 1.0,
9
+ "total_flos": 8.043656369376246e+18,
10
+ "train_loss": 7.281221960622368,
11
+ "train_runtime": 28890.9511,
12
+ "train_samples": 12032,
13
+ "train_samples_per_second": 6.247,
14
+ "train_steps_per_second": 0.195
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_loss": 5.693275451660156,
4
+ "eval_runtime": 205.8981,
5
+ "eval_samples": 4961,
6
+ "eval_samples_per_second": 24.094,
7
+ "eval_steps_per_second": 3.016,
8
+ "eval_wer": 1.0
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "total_flos": 8.043656369376246e+18,
4
+ "train_loss": 7.281221960622368,
5
+ "train_runtime": 28890.9511,
6
+ "train_samples": 12032,
7
+ "train_samples_per_second": 6.247,
8
+ "train_steps_per_second": 0.195
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,623 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "eval_steps": 100,
6
+ "global_step": 5640,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.26595744680851063,
13
+ "eval_loss": 54.1580696105957,
14
+ "eval_runtime": 202.8833,
15
+ "eval_samples_per_second": 24.452,
16
+ "eval_steps_per_second": 3.061,
17
+ "eval_wer": 1.0,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.5319148936170213,
22
+ "eval_loss": 24.32993507385254,
23
+ "eval_runtime": 201.7271,
24
+ "eval_samples_per_second": 24.593,
25
+ "eval_steps_per_second": 3.078,
26
+ "eval_wer": 1.0,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 0.7978723404255319,
31
+ "eval_loss": 5.720522403717041,
32
+ "eval_runtime": 202.6755,
33
+ "eval_samples_per_second": 24.478,
34
+ "eval_steps_per_second": 3.064,
35
+ "eval_wer": 1.0,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 1.0638297872340425,
40
+ "eval_loss": 5.728226184844971,
41
+ "eval_runtime": 201.555,
42
+ "eval_samples_per_second": 24.614,
43
+ "eval_steps_per_second": 3.081,
44
+ "eval_wer": 1.0,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 1.3297872340425532,
49
+ "grad_norm": 0.829672634601593,
50
+ "learning_rate": 0.0002964,
51
+ "loss": 23.1666,
52
+ "step": 500
53
+ },
54
+ {
55
+ "epoch": 1.3297872340425532,
56
+ "eval_loss": 5.725348949432373,
57
+ "eval_runtime": 200.3693,
58
+ "eval_samples_per_second": 24.759,
59
+ "eval_steps_per_second": 3.099,
60
+ "eval_wer": 1.0,
61
+ "step": 500
62
+ },
63
+ {
64
+ "epoch": 1.5957446808510638,
65
+ "eval_loss": 5.7276530265808105,
66
+ "eval_runtime": 200.6833,
67
+ "eval_samples_per_second": 24.721,
68
+ "eval_steps_per_second": 3.094,
69
+ "eval_wer": 1.0,
70
+ "step": 600
71
+ },
72
+ {
73
+ "epoch": 1.8617021276595744,
74
+ "eval_loss": 5.661632061004639,
75
+ "eval_runtime": 201.1477,
76
+ "eval_samples_per_second": 24.663,
77
+ "eval_steps_per_second": 3.087,
78
+ "eval_wer": 1.0,
79
+ "step": 700
80
+ },
81
+ {
82
+ "epoch": 2.127659574468085,
83
+ "eval_loss": 5.668219089508057,
84
+ "eval_runtime": 202.0028,
85
+ "eval_samples_per_second": 24.559,
86
+ "eval_steps_per_second": 3.074,
87
+ "eval_wer": 1.0,
88
+ "step": 800
89
+ },
90
+ {
91
+ "epoch": 2.393617021276596,
92
+ "eval_loss": 5.672210693359375,
93
+ "eval_runtime": 201.836,
94
+ "eval_samples_per_second": 24.579,
95
+ "eval_steps_per_second": 3.077,
96
+ "eval_wer": 1.0,
97
+ "step": 900
98
+ },
99
+ {
100
+ "epoch": 2.6595744680851063,
101
+ "grad_norm": 1.0530561208724976,
102
+ "learning_rate": 0.00027116731517509723,
103
+ "loss": 5.7461,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 2.6595744680851063,
108
+ "eval_loss": 5.667008876800537,
109
+ "eval_runtime": 200.8506,
110
+ "eval_samples_per_second": 24.7,
111
+ "eval_steps_per_second": 3.092,
112
+ "eval_wer": 1.0,
113
+ "step": 1000
114
+ },
115
+ {
116
+ "epoch": 2.925531914893617,
117
+ "eval_loss": 5.669973373413086,
118
+ "eval_runtime": 200.4895,
119
+ "eval_samples_per_second": 24.744,
120
+ "eval_steps_per_second": 3.097,
121
+ "eval_wer": 1.0,
122
+ "step": 1100
123
+ },
124
+ {
125
+ "epoch": 3.1914893617021276,
126
+ "eval_loss": 5.679357528686523,
127
+ "eval_runtime": 200.5114,
128
+ "eval_samples_per_second": 24.742,
129
+ "eval_steps_per_second": 3.097,
130
+ "eval_wer": 1.0,
131
+ "step": 1200
132
+ },
133
+ {
134
+ "epoch": 3.4574468085106385,
135
+ "eval_loss": 5.708792209625244,
136
+ "eval_runtime": 201.877,
137
+ "eval_samples_per_second": 24.574,
138
+ "eval_steps_per_second": 3.076,
139
+ "eval_wer": 1.0,
140
+ "step": 1300
141
+ },
142
+ {
143
+ "epoch": 3.723404255319149,
144
+ "eval_loss": 5.797519207000732,
145
+ "eval_runtime": 201.5816,
146
+ "eval_samples_per_second": 24.61,
147
+ "eval_steps_per_second": 3.081,
148
+ "eval_wer": 1.0,
149
+ "step": 1400
150
+ },
151
+ {
152
+ "epoch": 3.9893617021276597,
153
+ "grad_norm": 1.8153401613235474,
154
+ "learning_rate": 0.00024198443579766536,
155
+ "loss": 5.7384,
156
+ "step": 1500
157
+ },
158
+ {
159
+ "epoch": 3.9893617021276597,
160
+ "eval_loss": 5.685624122619629,
161
+ "eval_runtime": 201.635,
162
+ "eval_samples_per_second": 24.604,
163
+ "eval_steps_per_second": 3.08,
164
+ "eval_wer": 1.0,
165
+ "step": 1500
166
+ },
167
+ {
168
+ "epoch": 4.25531914893617,
169
+ "eval_loss": 5.674282550811768,
170
+ "eval_runtime": 201.113,
171
+ "eval_samples_per_second": 24.668,
172
+ "eval_steps_per_second": 3.088,
173
+ "eval_wer": 1.0,
174
+ "step": 1600
175
+ },
176
+ {
177
+ "epoch": 4.5212765957446805,
178
+ "eval_loss": 5.676913261413574,
179
+ "eval_runtime": 200.8704,
180
+ "eval_samples_per_second": 24.698,
181
+ "eval_steps_per_second": 3.092,
182
+ "eval_wer": 1.0,
183
+ "step": 1700
184
+ },
185
+ {
186
+ "epoch": 4.787234042553192,
187
+ "eval_loss": 5.673855781555176,
188
+ "eval_runtime": 200.8399,
189
+ "eval_samples_per_second": 24.701,
190
+ "eval_steps_per_second": 3.092,
191
+ "eval_wer": 1.0,
192
+ "step": 1800
193
+ },
194
+ {
195
+ "epoch": 5.053191489361702,
196
+ "eval_loss": 5.765174388885498,
197
+ "eval_runtime": 201.5227,
198
+ "eval_samples_per_second": 24.618,
199
+ "eval_steps_per_second": 3.082,
200
+ "eval_wer": 1.0,
201
+ "step": 1900
202
+ },
203
+ {
204
+ "epoch": 5.319148936170213,
205
+ "grad_norm": 1.6339465379714966,
206
+ "learning_rate": 0.00021280155642023347,
207
+ "loss": 5.7316,
208
+ "step": 2000
209
+ },
210
+ {
211
+ "epoch": 5.319148936170213,
212
+ "eval_loss": 5.7473955154418945,
213
+ "eval_runtime": 202.3782,
214
+ "eval_samples_per_second": 24.514,
215
+ "eval_steps_per_second": 3.069,
216
+ "eval_wer": 1.0,
217
+ "step": 2000
218
+ },
219
+ {
220
+ "epoch": 5.585106382978723,
221
+ "eval_loss": 5.7853193283081055,
222
+ "eval_runtime": 200.4177,
223
+ "eval_samples_per_second": 24.753,
224
+ "eval_steps_per_second": 3.099,
225
+ "eval_wer": 1.0,
226
+ "step": 2100
227
+ },
228
+ {
229
+ "epoch": 5.851063829787234,
230
+ "eval_loss": 5.774293422698975,
231
+ "eval_runtime": 200.6089,
232
+ "eval_samples_per_second": 24.73,
233
+ "eval_steps_per_second": 3.096,
234
+ "eval_wer": 1.0,
235
+ "step": 2200
236
+ },
237
+ {
238
+ "epoch": 6.117021276595745,
239
+ "eval_loss": 5.670905590057373,
240
+ "eval_runtime": 206.5962,
241
+ "eval_samples_per_second": 24.013,
242
+ "eval_steps_per_second": 3.006,
243
+ "eval_wer": 1.0,
244
+ "step": 2300
245
+ },
246
+ {
247
+ "epoch": 6.382978723404255,
248
+ "eval_loss": 5.6688079833984375,
249
+ "eval_runtime": 201.5545,
250
+ "eval_samples_per_second": 24.614,
251
+ "eval_steps_per_second": 3.081,
252
+ "eval_wer": 1.0,
253
+ "step": 2400
254
+ },
255
+ {
256
+ "epoch": 6.648936170212766,
257
+ "grad_norm": 1.8275456428527832,
258
+ "learning_rate": 0.00018361867704280155,
259
+ "loss": 5.7285,
260
+ "step": 2500
261
+ },
262
+ {
263
+ "epoch": 6.648936170212766,
264
+ "eval_loss": 5.670084476470947,
265
+ "eval_runtime": 204.4912,
266
+ "eval_samples_per_second": 24.26,
267
+ "eval_steps_per_second": 3.037,
268
+ "eval_wer": 1.0,
269
+ "step": 2500
270
+ },
271
+ {
272
+ "epoch": 6.914893617021277,
273
+ "eval_loss": 5.669408798217773,
274
+ "eval_runtime": 203.0292,
275
+ "eval_samples_per_second": 24.435,
276
+ "eval_steps_per_second": 3.059,
277
+ "eval_wer": 1.0,
278
+ "step": 2600
279
+ },
280
+ {
281
+ "epoch": 7.180851063829787,
282
+ "eval_loss": 5.733961582183838,
283
+ "eval_runtime": 203.9578,
284
+ "eval_samples_per_second": 24.324,
285
+ "eval_steps_per_second": 3.045,
286
+ "eval_wer": 1.0,
287
+ "step": 2700
288
+ },
289
+ {
290
+ "epoch": 7.446808510638298,
291
+ "eval_loss": 5.733468055725098,
292
+ "eval_runtime": 204.4453,
293
+ "eval_samples_per_second": 24.266,
294
+ "eval_steps_per_second": 3.037,
295
+ "eval_wer": 1.0,
296
+ "step": 2800
297
+ },
298
+ {
299
+ "epoch": 7.712765957446808,
300
+ "eval_loss": 5.7401442527771,
301
+ "eval_runtime": 205.5376,
302
+ "eval_samples_per_second": 24.137,
303
+ "eval_steps_per_second": 3.021,
304
+ "eval_wer": 1.0,
305
+ "step": 2900
306
+ },
307
+ {
308
+ "epoch": 7.9787234042553195,
309
+ "grad_norm": 4.018444538116455,
310
+ "learning_rate": 0.00015443579766536962,
311
+ "loss": 5.7214,
312
+ "step": 3000
313
+ },
314
+ {
315
+ "epoch": 7.9787234042553195,
316
+ "eval_loss": 5.6974005699157715,
317
+ "eval_runtime": 204.123,
318
+ "eval_samples_per_second": 24.304,
319
+ "eval_steps_per_second": 3.042,
320
+ "eval_wer": 1.0,
321
+ "step": 3000
322
+ },
323
+ {
324
+ "epoch": 8.24468085106383,
325
+ "eval_loss": 5.670176029205322,
326
+ "eval_runtime": 202.6691,
327
+ "eval_samples_per_second": 24.478,
328
+ "eval_steps_per_second": 3.064,
329
+ "eval_wer": 1.0,
330
+ "step": 3100
331
+ },
332
+ {
333
+ "epoch": 8.51063829787234,
334
+ "eval_loss": 5.672538757324219,
335
+ "eval_runtime": 204.1094,
336
+ "eval_samples_per_second": 24.306,
337
+ "eval_steps_per_second": 3.042,
338
+ "eval_wer": 1.0,
339
+ "step": 3200
340
+ },
341
+ {
342
+ "epoch": 8.77659574468085,
343
+ "eval_loss": 5.670905590057373,
344
+ "eval_runtime": 204.1388,
345
+ "eval_samples_per_second": 24.302,
346
+ "eval_steps_per_second": 3.042,
347
+ "eval_wer": 1.0,
348
+ "step": 3300
349
+ },
350
+ {
351
+ "epoch": 9.042553191489361,
352
+ "eval_loss": 5.702911853790283,
353
+ "eval_runtime": 201.9231,
354
+ "eval_samples_per_second": 24.569,
355
+ "eval_steps_per_second": 3.075,
356
+ "eval_wer": 1.0,
357
+ "step": 3400
358
+ },
359
+ {
360
+ "epoch": 9.308510638297872,
361
+ "grad_norm": 2.366471290588379,
362
+ "learning_rate": 0.00012525291828793773,
363
+ "loss": 5.8418,
364
+ "step": 3500
365
+ },
366
+ {
367
+ "epoch": 9.308510638297872,
368
+ "eval_loss": 5.681683540344238,
369
+ "eval_runtime": 204.7137,
370
+ "eval_samples_per_second": 24.234,
371
+ "eval_steps_per_second": 3.034,
372
+ "eval_wer": 1.0,
373
+ "step": 3500
374
+ },
375
+ {
376
+ "epoch": 9.574468085106384,
377
+ "eval_loss": 5.6594767570495605,
378
+ "eval_runtime": 204.4351,
379
+ "eval_samples_per_second": 24.267,
380
+ "eval_steps_per_second": 3.038,
381
+ "eval_wer": 1.0,
382
+ "step": 3600
383
+ },
384
+ {
385
+ "epoch": 9.840425531914894,
386
+ "eval_loss": 5.6728105545043945,
387
+ "eval_runtime": 204.2763,
388
+ "eval_samples_per_second": 24.286,
389
+ "eval_steps_per_second": 3.04,
390
+ "eval_wer": 1.0,
391
+ "step": 3700
392
+ },
393
+ {
394
+ "epoch": 10.106382978723405,
395
+ "eval_loss": 5.678412437438965,
396
+ "eval_runtime": 203.5212,
397
+ "eval_samples_per_second": 24.376,
398
+ "eval_steps_per_second": 3.051,
399
+ "eval_wer": 1.0,
400
+ "step": 3800
401
+ },
402
+ {
403
+ "epoch": 10.372340425531915,
404
+ "eval_loss": 5.686674118041992,
405
+ "eval_runtime": 202.6935,
406
+ "eval_samples_per_second": 24.475,
407
+ "eval_steps_per_second": 3.064,
408
+ "eval_wer": 1.0,
409
+ "step": 3900
410
+ },
411
+ {
412
+ "epoch": 10.638297872340425,
413
+ "grad_norm": 1.3308906555175781,
414
+ "learning_rate": 9.607003891050582e-05,
415
+ "loss": 5.722,
416
+ "step": 4000
417
+ },
418
+ {
419
+ "epoch": 10.638297872340425,
420
+ "eval_loss": 5.6920013427734375,
421
+ "eval_runtime": 204.6922,
422
+ "eval_samples_per_second": 24.236,
423
+ "eval_steps_per_second": 3.034,
424
+ "eval_wer": 1.0,
425
+ "step": 4000
426
+ },
427
+ {
428
+ "epoch": 10.904255319148936,
429
+ "eval_loss": 5.686488151550293,
430
+ "eval_runtime": 203.9263,
431
+ "eval_samples_per_second": 24.327,
432
+ "eval_steps_per_second": 3.045,
433
+ "eval_wer": 1.0,
434
+ "step": 4100
435
+ },
436
+ {
437
+ "epoch": 11.170212765957446,
438
+ "eval_loss": 5.661727428436279,
439
+ "eval_runtime": 205.3332,
440
+ "eval_samples_per_second": 24.161,
441
+ "eval_steps_per_second": 3.024,
442
+ "eval_wer": 1.0,
443
+ "step": 4200
444
+ },
445
+ {
446
+ "epoch": 11.436170212765958,
447
+ "eval_loss": 5.66921854019165,
448
+ "eval_runtime": 204.4223,
449
+ "eval_samples_per_second": 24.268,
450
+ "eval_steps_per_second": 3.038,
451
+ "eval_wer": 1.0,
452
+ "step": 4300
453
+ },
454
+ {
455
+ "epoch": 11.702127659574469,
456
+ "eval_loss": 5.665226936340332,
457
+ "eval_runtime": 205.6671,
458
+ "eval_samples_per_second": 24.122,
459
+ "eval_steps_per_second": 3.019,
460
+ "eval_wer": 1.0,
461
+ "step": 4400
462
+ },
463
+ {
464
+ "epoch": 11.96808510638298,
465
+ "grad_norm": 2.1740901470184326,
466
+ "learning_rate": 6.688715953307392e-05,
467
+ "loss": 5.7175,
468
+ "step": 4500
469
+ },
470
+ {
471
+ "epoch": 11.96808510638298,
472
+ "eval_loss": 5.665287494659424,
473
+ "eval_runtime": 204.8619,
474
+ "eval_samples_per_second": 24.216,
475
+ "eval_steps_per_second": 3.031,
476
+ "eval_wer": 1.0,
477
+ "step": 4500
478
+ },
479
+ {
480
+ "epoch": 12.23404255319149,
481
+ "eval_loss": 5.7036333084106445,
482
+ "eval_runtime": 204.757,
483
+ "eval_samples_per_second": 24.229,
484
+ "eval_steps_per_second": 3.033,
485
+ "eval_wer": 1.0,
486
+ "step": 4600
487
+ },
488
+ {
489
+ "epoch": 12.5,
490
+ "eval_loss": 5.701287746429443,
491
+ "eval_runtime": 203.8014,
492
+ "eval_samples_per_second": 24.342,
493
+ "eval_steps_per_second": 3.047,
494
+ "eval_wer": 1.0,
495
+ "step": 4700
496
+ },
497
+ {
498
+ "epoch": 12.76595744680851,
499
+ "eval_loss": 5.711965560913086,
500
+ "eval_runtime": 203.7123,
501
+ "eval_samples_per_second": 24.353,
502
+ "eval_steps_per_second": 3.048,
503
+ "eval_wer": 1.0,
504
+ "step": 4800
505
+ },
506
+ {
507
+ "epoch": 13.03191489361702,
508
+ "eval_loss": 5.673594951629639,
509
+ "eval_runtime": 204.4175,
510
+ "eval_samples_per_second": 24.269,
511
+ "eval_steps_per_second": 3.038,
512
+ "eval_wer": 1.0,
513
+ "step": 4900
514
+ },
515
+ {
516
+ "epoch": 13.297872340425531,
517
+ "grad_norm": 0.5258080363273621,
518
+ "learning_rate": 3.770428015564202e-05,
519
+ "loss": 5.7121,
520
+ "step": 5000
521
+ },
522
+ {
523
+ "epoch": 13.297872340425531,
524
+ "eval_loss": 5.671267032623291,
525
+ "eval_runtime": 205.4915,
526
+ "eval_samples_per_second": 24.142,
527
+ "eval_steps_per_second": 3.022,
528
+ "eval_wer": 1.0,
529
+ "step": 5000
530
+ },
531
+ {
532
+ "epoch": 13.563829787234042,
533
+ "eval_loss": 5.684414863586426,
534
+ "eval_runtime": 203.8885,
535
+ "eval_samples_per_second": 24.332,
536
+ "eval_steps_per_second": 3.046,
537
+ "eval_wer": 1.0,
538
+ "step": 5100
539
+ },
540
+ {
541
+ "epoch": 13.829787234042554,
542
+ "eval_loss": 5.685628414154053,
543
+ "eval_runtime": 203.6343,
544
+ "eval_samples_per_second": 24.362,
545
+ "eval_steps_per_second": 3.05,
546
+ "eval_wer": 1.0,
547
+ "step": 5200
548
+ },
549
+ {
550
+ "epoch": 14.095744680851064,
551
+ "eval_loss": 5.7026543617248535,
552
+ "eval_runtime": 204.5754,
553
+ "eval_samples_per_second": 24.25,
554
+ "eval_steps_per_second": 3.036,
555
+ "eval_wer": 1.0,
556
+ "step": 5300
557
+ },
558
+ {
559
+ "epoch": 14.361702127659575,
560
+ "eval_loss": 5.693380832672119,
561
+ "eval_runtime": 203.4822,
562
+ "eval_samples_per_second": 24.381,
563
+ "eval_steps_per_second": 3.052,
564
+ "eval_wer": 1.0,
565
+ "step": 5400
566
+ },
567
+ {
568
+ "epoch": 14.627659574468085,
569
+ "grad_norm": 1.1351114511489868,
570
+ "learning_rate": 8.521400778210115e-06,
571
+ "loss": 5.7053,
572
+ "step": 5500
573
+ },
574
+ {
575
+ "epoch": 14.627659574468085,
576
+ "eval_loss": 5.6951751708984375,
577
+ "eval_runtime": 203.7485,
578
+ "eval_samples_per_second": 24.349,
579
+ "eval_steps_per_second": 3.048,
580
+ "eval_wer": 1.0,
581
+ "step": 5500
582
+ },
583
+ {
584
+ "epoch": 14.893617021276595,
585
+ "eval_loss": 5.694952487945557,
586
+ "eval_runtime": 204.1287,
587
+ "eval_samples_per_second": 24.303,
588
+ "eval_steps_per_second": 3.042,
589
+ "eval_wer": 1.0,
590
+ "step": 5600
591
+ },
592
+ {
593
+ "epoch": 15.0,
594
+ "step": 5640,
595
+ "total_flos": 8.043656369376246e+18,
596
+ "train_loss": 7.281221960622368,
597
+ "train_runtime": 28890.9511,
598
+ "train_samples_per_second": 6.247,
599
+ "train_steps_per_second": 0.195
600
+ }
601
+ ],
602
+ "logging_steps": 500,
603
+ "max_steps": 5640,
604
+ "num_input_tokens_seen": 0,
605
+ "num_train_epochs": 15,
606
+ "save_steps": 400,
607
+ "stateful_callbacks": {
608
+ "TrainerControl": {
609
+ "args": {
610
+ "should_epoch_stop": false,
611
+ "should_evaluate": false,
612
+ "should_log": false,
613
+ "should_save": true,
614
+ "should_training_stop": true
615
+ },
616
+ "attributes": {}
617
+ }
618
+ },
619
+ "total_flos": 8.043656369376246e+18,
620
+ "train_batch_size": 16,
621
+ "trial_name": null,
622
+ "trial_params": null
623
+ }