AndrewMcDowell commited on
Commit
e855da0
1 Parent(s): 433d77c

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +4 -4
  4. trainer_state.json +361 -127
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 0.9693832397460938,
4
- "eval_runtime": 412.1508,
5
  "eval_samples": 10388,
6
- "eval_samples_per_second": 25.204,
7
- "eval_steps_per_second": 3.152,
8
- "eval_wer": 0.7823963593984128,
9
- "train_loss": 2.3014913469352978,
10
- "train_runtime": 21335.6515,
11
  "train_samples": 38209,
12
- "train_samples_per_second": 17.909,
13
- "train_steps_per_second": 0.14
14
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 1.5198630094528198,
4
+ "eval_runtime": 420.7858,
5
  "eval_samples": 10388,
6
+ "eval_samples_per_second": 24.687,
7
+ "eval_steps_per_second": 3.087,
8
+ "eval_wer": 0.9682742290545165,
9
+ "train_loss": 2.5334128375029445,
10
+ "train_runtime": 24080.0149,
11
  "train_samples": 38209,
12
+ "train_samples_per_second": 15.868,
13
+ "train_steps_per_second": 0.248
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 0.9693832397460938,
4
- "eval_runtime": 412.1508,
5
  "eval_samples": 10388,
6
- "eval_samples_per_second": 25.204,
7
- "eval_steps_per_second": 3.152,
8
- "eval_wer": 0.7823963593984128
9
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 1.5198630094528198,
4
+ "eval_runtime": 420.7858,
5
  "eval_samples": 10388,
6
+ "eval_samples_per_second": 24.687,
7
+ "eval_steps_per_second": 3.087,
8
+ "eval_wer": 0.9682742290545165
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 10.0,
3
- "train_loss": 2.3014913469352978,
4
- "train_runtime": 21335.6515,
5
  "train_samples": 38209,
6
- "train_samples_per_second": 17.909,
7
- "train_steps_per_second": 0.14
8
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "train_loss": 2.5334128375029445,
4
+ "train_runtime": 24080.0149,
5
  "train_samples": 38209,
6
+ "train_samples_per_second": 15.868,
7
+ "train_steps_per_second": 0.248
8
  }
trainer_state.json CHANGED
@@ -1,244 +1,478 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.997489539748955,
5
- "global_step": 2980,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.33,
12
- "learning_rate": 4.9000000000000005e-05,
13
- "loss": 5.3579,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 0.67,
18
- "learning_rate": 9.900000000000001e-05,
19
- "loss": 3.2697,
20
  "step": 200
21
  },
22
  {
23
- "epoch": 1.01,
24
- "learning_rate": 0.000149,
25
- "loss": 2.4711,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 1.34,
30
- "learning_rate": 0.000199,
31
- "loss": 2.1202,
32
  "step": 400
33
  },
34
  {
35
- "epoch": 1.68,
36
- "learning_rate": 0.000249,
37
- "loss": 2.0846,
38
  "step": 500
39
  },
40
  {
41
- "epoch": 1.68,
42
- "eval_loss": 1.1640619039535522,
43
- "eval_runtime": 414.2546,
44
- "eval_samples_per_second": 25.076,
45
- "eval_steps_per_second": 3.136,
46
- "eval_wer": 0.8072035074088462,
47
  "step": 500
48
  },
49
  {
50
- "epoch": 2.01,
51
- "learning_rate": 0.000299,
52
- "loss": 2.1202,
53
  "step": 600
54
  },
55
  {
56
- "epoch": 2.35,
57
- "learning_rate": 0.00034899999999999997,
58
- "loss": 2.1037,
59
  "step": 700
60
  },
61
  {
62
- "epoch": 2.68,
63
- "learning_rate": 0.00039900000000000005,
64
- "loss": 2.0882,
65
  "step": 800
66
  },
67
  {
68
- "epoch": 3.02,
69
- "learning_rate": 0.000449,
70
- "loss": 2.126,
71
  "step": 900
72
  },
73
  {
74
- "epoch": 3.35,
75
- "learning_rate": 0.000499,
76
- "loss": 2.1201,
77
  "step": 1000
78
  },
79
  {
80
- "epoch": 3.35,
81
- "eval_loss": 1.1776171922683716,
82
- "eval_runtime": 424.2371,
83
- "eval_samples_per_second": 24.486,
84
- "eval_steps_per_second": 3.062,
85
- "eval_wer": 0.8329171060177221,
86
  "step": 1000
87
  },
88
  {
89
- "epoch": 3.69,
90
- "learning_rate": 0.000549,
91
- "loss": 2.1435,
92
  "step": 1100
93
  },
94
  {
95
- "epoch": 4.03,
96
- "learning_rate": 0.000599,
97
- "loss": 2.152,
98
  "step": 1200
99
  },
100
  {
101
- "epoch": 4.36,
102
- "learning_rate": 0.0006490000000000001,
103
- "loss": 2.11,
104
  "step": 1300
105
  },
106
  {
107
- "epoch": 4.7,
108
- "learning_rate": 0.000699,
109
- "loss": 2.1503,
110
  "step": 1400
111
  },
112
  {
113
- "epoch": 5.03,
114
- "learning_rate": 0.000749,
115
- "loss": 2.1972,
116
  "step": 1500
117
  },
118
  {
119
- "epoch": 5.03,
120
- "eval_loss": 1.2631869316101074,
121
- "eval_runtime": 410.677,
122
- "eval_samples_per_second": 25.295,
123
- "eval_steps_per_second": 3.163,
124
- "eval_wer": 0.8723754555376732,
125
  "step": 1500
126
  },
127
  {
128
- "epoch": 5.37,
129
- "learning_rate": 0.000799,
130
- "loss": 2.1788,
131
  "step": 1600
132
  },
133
  {
134
- "epoch": 5.7,
135
- "learning_rate": 0.000849,
136
- "loss": 2.1774,
137
  "step": 1700
138
  },
139
  {
140
- "epoch": 6.04,
141
- "learning_rate": 0.0008990000000000001,
142
- "loss": 2.2205,
143
  "step": 1800
144
  },
145
  {
146
- "epoch": 6.37,
147
- "learning_rate": 0.000949,
148
- "loss": 2.2466,
149
  "step": 1900
150
  },
151
  {
152
- "epoch": 6.71,
153
- "learning_rate": 0.000999,
154
- "loss": 2.2643,
155
  "step": 2000
156
  },
157
  {
158
- "epoch": 6.71,
159
- "eval_loss": 1.372324824333191,
160
- "eval_runtime": 411.6031,
161
- "eval_samples_per_second": 25.238,
162
- "eval_steps_per_second": 3.156,
163
- "eval_wer": 0.8982740440645984,
164
  "step": 2000
165
  },
166
  {
167
- "epoch": 7.05,
168
- "learning_rate": 0.0009000000000000001,
169
- "loss": 2.2996,
170
  "step": 2100
171
  },
172
  {
173
- "epoch": 7.38,
174
- "learning_rate": 0.0007979591836734694,
175
- "loss": 2.2775,
176
  "step": 2200
177
  },
178
  {
179
- "epoch": 7.72,
180
- "learning_rate": 0.0006959183673469388,
181
- "loss": 2.2478,
182
  "step": 2300
183
  },
184
  {
185
- "epoch": 8.05,
186
- "learning_rate": 0.0005938775510204082,
187
- "loss": 2.2288,
188
  "step": 2400
189
  },
190
  {
191
- "epoch": 8.39,
192
- "learning_rate": 0.0004918367346938776,
193
- "loss": 2.1649,
194
  "step": 2500
195
  },
196
  {
197
- "epoch": 8.39,
198
- "eval_loss": 1.25503671169281,
199
- "eval_runtime": 416.24,
200
- "eval_samples_per_second": 24.957,
201
- "eval_steps_per_second": 3.121,
202
- "eval_wer": 0.884196311301034,
203
  "step": 2500
204
  },
205
  {
206
- "epoch": 8.72,
207
- "learning_rate": 0.000389795918367347,
208
- "loss": 2.1054,
209
  "step": 2600
210
  },
211
  {
212
- "epoch": 9.06,
213
- "learning_rate": 0.00028775510204081633,
214
- "loss": 2.0872,
215
  "step": 2700
216
  },
217
  {
218
- "epoch": 9.39,
219
- "learning_rate": 0.00018571428571428572,
220
- "loss": 1.9953,
221
  "step": 2800
222
  },
223
  {
224
- "epoch": 9.73,
225
- "learning_rate": 8.36734693877551e-05,
226
- "loss": 1.9474,
227
  "step": 2900
228
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  {
230
  "epoch": 10.0,
231
- "step": 2980,
232
- "total_flos": 1.006981652948512e+20,
233
- "train_loss": 2.3014913469352978,
234
- "train_runtime": 21335.6515,
235
- "train_samples_per_second": 17.909,
236
- "train_steps_per_second": 0.14
237
  }
238
  ],
239
- "max_steps": 2980,
240
  "num_train_epochs": 10,
241
- "total_flos": 1.006981652948512e+20,
242
  "trial_name": null,
243
  "trial_params": null
244
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.999163179916318,
5
+ "global_step": 5970,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.17,
12
+ "learning_rate": 0.000194,
13
+ "loss": 4.6647,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.33,
18
+ "learning_rate": 0.00039400000000000004,
19
+ "loss": 3.2495,
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 0.5,
24
+ "learning_rate": 0.000594,
25
+ "loss": 2.8044,
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 0.67,
30
+ "learning_rate": 0.0007940000000000001,
31
+ "loss": 2.6636,
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 0.84,
36
+ "learning_rate": 0.000994,
37
+ "loss": 2.6638,
38
  "step": 500
39
  },
40
  {
41
+ "epoch": 0.84,
42
+ "eval_loss": 2.3851921558380127,
43
+ "eval_runtime": 415.3375,
44
+ "eval_samples_per_second": 25.011,
45
+ "eval_steps_per_second": 3.128,
46
+ "eval_wer": 0.9974286401391124,
47
  "step": 500
48
  },
49
  {
50
+ "epoch": 1.01,
51
+ "learning_rate": 0.0009822669104204754,
52
+ "loss": 2.6437,
53
  "step": 600
54
  },
55
  {
56
+ "epoch": 1.17,
57
+ "learning_rate": 0.0009639853747714809,
58
+ "loss": 2.6394,
59
  "step": 700
60
  },
61
  {
62
+ "epoch": 1.34,
63
+ "learning_rate": 0.0009457038391224862,
64
+ "loss": 2.6303,
65
  "step": 800
66
  },
67
  {
68
+ "epoch": 1.51,
69
+ "learning_rate": 0.0009274223034734918,
70
+ "loss": 2.6351,
71
  "step": 900
72
  },
73
  {
74
+ "epoch": 1.67,
75
+ "learning_rate": 0.0009091407678244972,
76
+ "loss": 2.6578,
77
  "step": 1000
78
  },
79
  {
80
+ "epoch": 1.67,
81
+ "eval_loss": 2.2796220779418945,
82
+ "eval_runtime": 416.0072,
83
+ "eval_samples_per_second": 24.971,
84
+ "eval_steps_per_second": 3.123,
85
+ "eval_wer": 0.9970586603030135,
86
  "step": 1000
87
  },
88
  {
89
+ "epoch": 1.84,
90
+ "learning_rate": 0.0008908592321755028,
91
+ "loss": 2.6462,
92
  "step": 1100
93
  },
94
  {
95
+ "epoch": 2.01,
96
+ "learning_rate": 0.0008725776965265082,
97
+ "loss": 2.6498,
98
  "step": 1200
99
  },
100
  {
101
+ "epoch": 2.18,
102
+ "learning_rate": 0.0008542961608775137,
103
+ "loss": 2.614,
104
  "step": 1300
105
  },
106
  {
107
+ "epoch": 2.34,
108
+ "learning_rate": 0.0008360146252285192,
109
+ "loss": 2.6209,
110
  "step": 1400
111
  },
112
  {
113
+ "epoch": 2.51,
114
+ "learning_rate": 0.0008177330895795247,
115
+ "loss": 2.6016,
116
  "step": 1500
117
  },
118
  {
119
+ "epoch": 2.51,
120
+ "eval_loss": 2.004575252532959,
121
+ "eval_runtime": 417.037,
122
+ "eval_samples_per_second": 24.909,
123
+ "eval_steps_per_second": 3.115,
124
+ "eval_wer": 0.9960782137373513,
125
  "step": 1500
126
  },
127
  {
128
+ "epoch": 2.68,
129
+ "learning_rate": 0.0007994515539305302,
130
+ "loss": 2.5937,
131
  "step": 1600
132
  },
133
  {
134
+ "epoch": 2.85,
135
+ "learning_rate": 0.0007811700182815357,
136
+ "loss": 2.5909,
137
  "step": 1700
138
  },
139
  {
140
+ "epoch": 3.02,
141
+ "learning_rate": 0.0007628884826325412,
142
+ "loss": 2.5913,
143
  "step": 1800
144
  },
145
  {
146
+ "epoch": 3.18,
147
+ "learning_rate": 0.0007446069469835466,
148
+ "loss": 2.5828,
149
  "step": 1900
150
  },
151
  {
152
+ "epoch": 3.35,
153
+ "learning_rate": 0.0007263254113345521,
154
+ "loss": 2.5752,
155
  "step": 2000
156
  },
157
  {
158
+ "epoch": 3.35,
159
+ "eval_loss": 1.960595726966858,
160
+ "eval_runtime": 416.9162,
161
+ "eval_samples_per_second": 24.916,
162
+ "eval_steps_per_second": 3.116,
163
+ "eval_wer": 0.9961152117209612,
164
  "step": 2000
165
  },
166
  {
167
+ "epoch": 3.52,
168
+ "learning_rate": 0.0007080438756855576,
169
+ "loss": 2.5663,
170
  "step": 2100
171
  },
172
  {
173
+ "epoch": 3.68,
174
+ "learning_rate": 0.0006897623400365632,
175
+ "loss": 2.5729,
176
  "step": 2200
177
  },
178
  {
179
+ "epoch": 3.85,
180
+ "learning_rate": 0.0006714808043875685,
181
+ "loss": 2.5767,
182
  "step": 2300
183
  },
184
  {
185
+ "epoch": 4.02,
186
+ "learning_rate": 0.0006531992687385741,
187
+ "loss": 2.5661,
188
  "step": 2400
189
  },
190
  {
191
+ "epoch": 4.19,
192
+ "learning_rate": 0.0006349177330895795,
193
+ "loss": 2.539,
194
  "step": 2500
195
  },
196
  {
197
+ "epoch": 4.19,
198
+ "eval_loss": 1.8835679292678833,
199
+ "eval_runtime": 417.6452,
200
+ "eval_samples_per_second": 24.873,
201
+ "eval_steps_per_second": 3.11,
202
+ "eval_wer": 0.9939693286715874,
203
  "step": 2500
204
  },
205
  {
206
+ "epoch": 4.35,
207
+ "learning_rate": 0.000616636197440585,
208
+ "loss": 2.5237,
209
  "step": 2600
210
  },
211
  {
212
+ "epoch": 4.52,
213
+ "learning_rate": 0.0005983546617915904,
214
+ "loss": 2.5464,
215
  "step": 2700
216
  },
217
  {
218
+ "epoch": 4.69,
219
+ "learning_rate": 0.000580073126142596,
220
+ "loss": 2.5135,
221
  "step": 2800
222
  },
223
  {
224
+ "epoch": 4.86,
225
+ "learning_rate": 0.0005617915904936015,
226
+ "loss": 2.5058,
227
  "step": 2900
228
  },
229
+ {
230
+ "epoch": 5.03,
231
+ "learning_rate": 0.0005435100548446069,
232
+ "loss": 2.5214,
233
+ "step": 3000
234
+ },
235
+ {
236
+ "epoch": 5.03,
237
+ "eval_loss": 1.859293818473816,
238
+ "eval_runtime": 418.88,
239
+ "eval_samples_per_second": 24.799,
240
+ "eval_steps_per_second": 3.101,
241
+ "eval_wer": 0.9933033649666093,
242
+ "step": 3000
243
+ },
244
+ {
245
+ "epoch": 5.19,
246
+ "learning_rate": 0.0005252285191956125,
247
+ "loss": 2.4984,
248
+ "step": 3100
249
+ },
250
+ {
251
+ "epoch": 5.36,
252
+ "learning_rate": 0.0005069469835466179,
253
+ "loss": 2.4812,
254
+ "step": 3200
255
+ },
256
+ {
257
+ "epoch": 5.53,
258
+ "learning_rate": 0.0004886654478976234,
259
+ "loss": 2.4626,
260
+ "step": 3300
261
+ },
262
+ {
263
+ "epoch": 5.69,
264
+ "learning_rate": 0.0004703839122486289,
265
+ "loss": 2.476,
266
+ "step": 3400
267
+ },
268
+ {
269
+ "epoch": 5.86,
270
+ "learning_rate": 0.00045210237659963436,
271
+ "loss": 2.4684,
272
+ "step": 3500
273
+ },
274
+ {
275
+ "epoch": 5.86,
276
+ "eval_loss": 1.781636357307434,
277
+ "eval_runtime": 415.6525,
278
+ "eval_samples_per_second": 24.992,
279
+ "eval_steps_per_second": 3.125,
280
+ "eval_wer": 0.9884566291137133,
281
+ "step": 3500
282
+ },
283
+ {
284
+ "epoch": 6.03,
285
+ "learning_rate": 0.0004338208409506398,
286
+ "loss": 2.4739,
287
+ "step": 3600
288
+ },
289
+ {
290
+ "epoch": 6.2,
291
+ "learning_rate": 0.0004155393053016453,
292
+ "loss": 2.4494,
293
+ "step": 3700
294
+ },
295
+ {
296
+ "epoch": 6.36,
297
+ "learning_rate": 0.00039725776965265084,
298
+ "loss": 2.4263,
299
+ "step": 3800
300
+ },
301
+ {
302
+ "epoch": 6.53,
303
+ "learning_rate": 0.00037897623400365635,
304
+ "loss": 2.4187,
305
+ "step": 3900
306
+ },
307
+ {
308
+ "epoch": 6.7,
309
+ "learning_rate": 0.0003606946983546618,
310
+ "loss": 2.4134,
311
+ "step": 4000
312
+ },
313
+ {
314
+ "epoch": 6.7,
315
+ "eval_loss": 1.7167690992355347,
316
+ "eval_runtime": 416.8699,
317
+ "eval_samples_per_second": 24.919,
318
+ "eval_steps_per_second": 3.116,
319
+ "eval_wer": 0.9808165454982704,
320
+ "step": 4000
321
+ },
322
+ {
323
+ "epoch": 6.87,
324
+ "learning_rate": 0.0003424131627056673,
325
+ "loss": 2.4008,
326
+ "step": 4100
327
+ },
328
+ {
329
+ "epoch": 7.04,
330
+ "learning_rate": 0.00032413162705667277,
331
+ "loss": 2.4048,
332
+ "step": 4200
333
+ },
334
+ {
335
+ "epoch": 7.2,
336
+ "learning_rate": 0.0003058500914076783,
337
+ "loss": 2.3795,
338
+ "step": 4300
339
+ },
340
+ {
341
+ "epoch": 7.37,
342
+ "learning_rate": 0.00028756855575868374,
343
+ "loss": 2.3803,
344
+ "step": 4400
345
+ },
346
+ {
347
+ "epoch": 7.54,
348
+ "learning_rate": 0.0002692870201096892,
349
+ "loss": 2.3732,
350
+ "step": 4500
351
+ },
352
+ {
353
+ "epoch": 7.54,
354
+ "eval_loss": 1.6406092643737793,
355
+ "eval_runtime": 415.1084,
356
+ "eval_samples_per_second": 25.025,
357
+ "eval_steps_per_second": 3.129,
358
+ "eval_wer": 0.976432284440498,
359
+ "step": 4500
360
+ },
361
+ {
362
+ "epoch": 7.7,
363
+ "learning_rate": 0.0002510054844606947,
364
+ "loss": 2.3657,
365
+ "step": 4600
366
+ },
367
+ {
368
+ "epoch": 7.87,
369
+ "learning_rate": 0.0002327239488117002,
370
+ "loss": 2.3565,
371
+ "step": 4700
372
+ },
373
+ {
374
+ "epoch": 8.04,
375
+ "learning_rate": 0.00021462522851919562,
376
+ "loss": 2.3679,
377
+ "step": 4800
378
+ },
379
+ {
380
+ "epoch": 8.21,
381
+ "learning_rate": 0.0001963436928702011,
382
+ "loss": 2.34,
383
+ "step": 4900
384
+ },
385
+ {
386
+ "epoch": 8.37,
387
+ "learning_rate": 0.00017806215722120658,
388
+ "loss": 2.3371,
389
+ "step": 5000
390
+ },
391
+ {
392
+ "epoch": 8.37,
393
+ "eval_loss": 1.6087424755096436,
394
+ "eval_runtime": 417.7716,
395
+ "eval_samples_per_second": 24.865,
396
+ "eval_steps_per_second": 3.109,
397
+ "eval_wer": 0.9739349205468302,
398
+ "step": 5000
399
+ },
400
+ {
401
+ "epoch": 8.54,
402
+ "learning_rate": 0.00015978062157221207,
403
+ "loss": 2.3216,
404
+ "step": 5100
405
+ },
406
+ {
407
+ "epoch": 8.71,
408
+ "learning_rate": 0.00014149908592321755,
409
+ "loss": 2.3004,
410
+ "step": 5200
411
+ },
412
+ {
413
+ "epoch": 8.88,
414
+ "learning_rate": 0.00012321755027422303,
415
+ "loss": 2.3028,
416
+ "step": 5300
417
+ },
418
+ {
419
+ "epoch": 9.05,
420
+ "learning_rate": 0.00010493601462522852,
421
+ "loss": 2.3099,
422
+ "step": 5400
423
+ },
424
+ {
425
+ "epoch": 9.21,
426
+ "learning_rate": 8.6654478976234e-05,
427
+ "loss": 2.2824,
428
+ "step": 5500
429
+ },
430
+ {
431
+ "epoch": 9.21,
432
+ "eval_loss": 1.5476473569869995,
433
+ "eval_runtime": 417.8751,
434
+ "eval_samples_per_second": 24.859,
435
+ "eval_steps_per_second": 3.109,
436
+ "eval_wer": 0.9695691584808628,
437
+ "step": 5500
438
+ },
439
+ {
440
+ "epoch": 9.38,
441
+ "learning_rate": 6.83729433272395e-05,
442
+ "loss": 2.2888,
443
+ "step": 5600
444
+ },
445
+ {
446
+ "epoch": 9.55,
447
+ "learning_rate": 5.0091407678244975e-05,
448
+ "loss": 2.2764,
449
+ "step": 5700
450
+ },
451
+ {
452
+ "epoch": 9.71,
453
+ "learning_rate": 3.180987202925046e-05,
454
+ "loss": 2.2787,
455
+ "step": 5800
456
+ },
457
+ {
458
+ "epoch": 9.88,
459
+ "learning_rate": 1.3528336380255942e-05,
460
+ "loss": 2.2783,
461
+ "step": 5900
462
+ },
463
  {
464
  "epoch": 10.0,
465
+ "step": 5970,
466
+ "total_flos": 1.0137616284937745e+20,
467
+ "train_loss": 2.5334128375029445,
468
+ "train_runtime": 24080.0149,
469
+ "train_samples_per_second": 15.868,
470
+ "train_steps_per_second": 0.248
471
  }
472
  ],
473
+ "max_steps": 5970,
474
  "num_train_epochs": 10,
475
+ "total_flos": 1.0137616284937745e+20,
476
  "trial_name": null,
477
  "trial_params": null
478
  }