kimbochen commited on
Commit
dc84781
1 Parent(s): d5473ba

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 6.11,
3
- "eval_loss": 0.32497844099998474,
4
- "eval_runtime": 1112.1106,
5
- "eval_samples_per_second": 4.234,
6
  "eval_steps_per_second": 0.133,
7
- "eval_wer": 32.594792142530835,
8
- "train_loss": 0.23146729975938796,
9
- "train_runtime": 12715.3657,
10
- "train_samples_per_second": 5.033,
11
- "train_steps_per_second": 0.079
12
  }
 
1
  {
2
+ "epoch": 13.04,
3
+ "eval_loss": 0.4333903193473816,
4
+ "eval_runtime": 1109.1543,
5
+ "eval_samples_per_second": 4.246,
6
  "eval_steps_per_second": 0.133,
7
+ "eval_wer": 32.04202832343535,
8
+ "train_loss": 0.006580068808048963,
9
+ "train_runtime": 18902.2125,
10
+ "train_samples_per_second": 6.772,
11
+ "train_steps_per_second": 0.106
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.11,
3
- "eval_loss": 0.32497844099998474,
4
- "eval_runtime": 1112.1106,
5
- "eval_samples_per_second": 4.234,
6
  "eval_steps_per_second": 0.133,
7
- "eval_wer": 32.594792142530835
8
  }
 
1
  {
2
+ "epoch": 13.04,
3
+ "eval_loss": 0.4333903193473816,
4
+ "eval_runtime": 1109.1543,
5
+ "eval_samples_per_second": 4.246,
6
  "eval_steps_per_second": 0.133,
7
+ "eval_wer": 32.04202832343535
8
  }
runs/Dec14_01-26-33_129-213-23-45/events.out.tfevents.1671001275.129-213-23-45.2845517.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5080d66f70737b41adf816e2f5508247c99a789dd3fb2b70d2b24bf439b6281
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 6.11,
3
- "train_loss": 0.23146729975938796,
4
- "train_runtime": 12715.3657,
5
- "train_samples_per_second": 5.033,
6
- "train_steps_per_second": 0.079
7
  }
 
1
  {
2
+ "epoch": 13.04,
3
+ "train_loss": 0.006580068808048963,
4
+ "train_runtime": 18902.2125,
5
+ "train_samples_per_second": 6.772,
6
+ "train_steps_per_second": 0.106
7
  }
trainer_state.json CHANGED
@@ -1,310 +1,550 @@
1
  {
2
- "best_metric": 32.594792142530835,
3
- "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 6.112,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.03,
12
- "learning_rate": 2.75e-07,
13
- "loss": 1.1711,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 0.05,
18
- "learning_rate": 5.875e-07,
19
- "loss": 0.9928,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 0.07,
24
- "learning_rate": 9.000000000000001e-07,
25
- "loss": 0.6807,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 0.1,
30
- "learning_rate": 1.2125e-06,
31
- "loss": 0.4894,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 0.12,
36
- "learning_rate": 1.525e-06,
37
- "loss": 0.4589,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 1.0,
42
- "learning_rate": 1.8375000000000002e-06,
43
- "loss": 0.4241,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 1.03,
48
- "learning_rate": 2.15e-06,
49
- "loss": 0.3693,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 1.05,
54
- "learning_rate": 2.4625e-06,
55
- "loss": 0.3465,
56
- "step": 200
57
- },
58
- {
59
- "epoch": 1.05,
60
- "eval_loss": 0.3499109745025635,
61
- "eval_runtime": 1155.2135,
62
- "eval_samples_per_second": 4.076,
63
- "eval_steps_per_second": 0.128,
64
- "eval_wer": 41.93238921882138,
65
  "step": 200
66
  },
67
  {
68
- "epoch": 1.08,
69
- "learning_rate": 2.7750000000000005e-06,
70
- "loss": 0.3232,
71
  "step": 225
72
  },
73
  {
74
- "epoch": 1.1,
75
- "learning_rate": 3.0875000000000005e-06,
76
- "loss": 0.2946,
77
  "step": 250
78
  },
79
  {
80
- "epoch": 1.13,
81
- "learning_rate": 3.4000000000000005e-06,
82
- "loss": 0.3059,
83
  "step": 275
84
  },
85
  {
86
  "epoch": 2.0,
87
- "learning_rate": 3.7125000000000005e-06,
88
- "loss": 0.2842,
89
  "step": 300
90
  },
91
  {
92
- "epoch": 2.03,
93
- "learning_rate": 4.0250000000000004e-06,
94
- "loss": 0.2499,
95
  "step": 325
96
  },
97
  {
98
- "epoch": 2.05,
99
- "learning_rate": 4.3375e-06,
100
- "loss": 0.2456,
101
  "step": 350
102
  },
103
  {
104
- "epoch": 2.08,
105
- "learning_rate": 4.65e-06,
106
- "loss": 0.2252,
107
  "step": 375
108
  },
109
  {
110
- "epoch": 2.1,
111
- "learning_rate": 4.9625e-06,
112
- "loss": 0.2137,
113
  "step": 400
114
  },
115
  {
116
- "epoch": 2.1,
117
- "eval_loss": 0.2953178584575653,
118
- "eval_runtime": 1162.2242,
119
- "eval_samples_per_second": 4.052,
120
- "eval_steps_per_second": 0.127,
121
- "eval_wer": 36.29511192325263,
122
  "step": 400
123
  },
124
  {
125
- "epoch": 2.13,
126
- "learning_rate": 5.275e-06,
127
- "loss": 0.2182,
128
  "step": 425
129
  },
130
  {
131
- "epoch": 3.01,
132
- "learning_rate": 5.5875e-06,
133
- "loss": 0.1958,
134
  "step": 450
135
  },
136
  {
137
- "epoch": 3.03,
138
- "learning_rate": 5.9e-06,
139
- "loss": 0.1758,
140
  "step": 475
141
  },
142
  {
143
- "epoch": 3.06,
144
- "learning_rate": 6.2125e-06,
145
- "loss": 0.164,
146
  "step": 500
147
  },
148
  {
149
- "epoch": 3.08,
150
- "learning_rate": 6.525e-06,
151
- "loss": 0.154,
152
  "step": 525
153
  },
154
  {
155
- "epoch": 3.11,
156
- "learning_rate": 6.8375e-06,
157
- "loss": 0.1397,
158
  "step": 550
159
  },
160
  {
161
- "epoch": 3.13,
162
- "learning_rate": 7.15e-06,
163
- "loss": 0.1419,
164
  "step": 575
165
  },
166
  {
167
- "epoch": 4.01,
168
- "learning_rate": 7.4625e-06,
169
- "loss": 0.1255,
170
- "step": 600
171
- },
172
- {
173
- "epoch": 4.01,
174
- "eval_loss": 0.29274308681488037,
175
- "eval_runtime": 1130.2455,
176
- "eval_samples_per_second": 4.166,
177
- "eval_steps_per_second": 0.131,
178
- "eval_wer": 33.72316126084971,
179
  "step": 600
180
  },
181
  {
182
- "epoch": 4.03,
183
- "learning_rate": 7.775000000000001e-06,
184
- "loss": 0.1077,
185
  "step": 625
186
  },
187
  {
188
- "epoch": 4.06,
189
- "learning_rate": 8.0875e-06,
190
- "loss": 0.0996,
191
  "step": 650
192
  },
193
  {
194
- "epoch": 4.08,
195
- "learning_rate": 8.400000000000001e-06,
196
- "loss": 0.091,
197
  "step": 675
198
  },
199
  {
200
- "epoch": 4.11,
201
- "learning_rate": 8.7125e-06,
202
- "loss": 0.0804,
203
  "step": 700
204
  },
205
  {
206
- "epoch": 4.13,
207
- "learning_rate": 9.025e-06,
208
- "loss": 0.0781,
209
  "step": 725
210
  },
211
  {
212
- "epoch": 5.01,
213
- "learning_rate": 9.3375e-06,
214
- "loss": 0.0648,
215
  "step": 750
216
  },
217
  {
218
- "epoch": 5.04,
219
- "learning_rate": 9.65e-06,
220
- "loss": 0.0549,
221
  "step": 775
222
  },
223
  {
224
- "epoch": 5.06,
225
- "learning_rate": 9.9625e-06,
226
- "loss": 0.0509,
227
  "step": 800
228
  },
229
  {
230
- "epoch": 5.06,
231
- "eval_loss": 0.3148699104785919,
232
- "eval_runtime": 1070.2676,
233
- "eval_samples_per_second": 4.4,
234
- "eval_steps_per_second": 0.138,
235
- "eval_wer": 34.05664687071722,
236
  "step": 800
237
  },
238
  {
239
- "epoch": 5.08,
240
- "learning_rate": 8.900000000000001e-06,
241
- "loss": 0.0518,
242
  "step": 825
243
  },
244
  {
245
- "epoch": 5.11,
246
- "learning_rate": 7.650000000000001e-06,
247
- "loss": 0.0406,
248
  "step": 850
249
  },
250
  {
251
- "epoch": 5.13,
252
- "learning_rate": 6.4000000000000006e-06,
253
- "loss": 0.0361,
254
  "step": 875
255
  },
256
  {
257
  "epoch": 6.01,
258
- "learning_rate": 5.150000000000001e-06,
259
- "loss": 0.0299,
260
  "step": 900
261
  },
262
  {
263
- "epoch": 6.04,
264
- "learning_rate": 3.900000000000001e-06,
265
- "loss": 0.0246,
266
  "step": 925
267
  },
268
  {
269
- "epoch": 6.06,
270
- "learning_rate": 2.6500000000000005e-06,
271
- "loss": 0.0218,
272
  "step": 950
273
  },
274
  {
275
- "epoch": 6.09,
276
- "learning_rate": 1.4000000000000001e-06,
277
- "loss": 0.0202,
278
  "step": 975
279
  },
280
  {
281
- "epoch": 6.11,
282
- "learning_rate": 1.5000000000000002e-07,
283
- "loss": 0.0164,
284
  "step": 1000
285
  },
286
  {
287
- "epoch": 6.11,
288
- "eval_loss": 0.32497844099998474,
289
- "eval_runtime": 1129.333,
290
- "eval_samples_per_second": 4.17,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  "eval_steps_per_second": 0.131,
292
- "eval_wer": 32.594792142530835,
293
- "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  },
295
  {
296
- "epoch": 6.11,
297
- "step": 1000,
298
- "total_flos": 1.837596395077632e+19,
299
- "train_loss": 0.23146729975938796,
300
- "train_runtime": 12715.3657,
301
- "train_samples_per_second": 5.033,
302
- "train_steps_per_second": 0.079
303
  }
304
  ],
305
- "max_steps": 1000,
306
  "num_train_epochs": 9223372036854775807,
307
- "total_flos": 1.837596395077632e+19,
308
  "trial_name": null,
309
  "trial_params": null
310
  }
 
1
  {
2
+ "best_metric": 32.04202832343535,
3
+ "best_model_checkpoint": "./checkpoint-2000",
4
+ "epoch": 13.038,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
+ "learning_rate": 2.0833333333333333e-07,
13
+ "loss": 0.011,
14
  "step": 25
15
  },
16
  {
17
+ "epoch": 0.03,
18
+ "learning_rate": 4.1666666666666667e-07,
19
+ "loss": 0.0111,
20
  "step": 50
21
  },
22
  {
23
+ "epoch": 0.04,
24
+ "learning_rate": 6.25e-07,
25
+ "loss": 0.0111,
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 0.05,
30
+ "learning_rate": 8.333333333333333e-07,
31
+ "loss": 0.0132,
32
  "step": 100
33
  },
34
  {
35
+ "epoch": 0.06,
36
+ "learning_rate": 1.0416666666666667e-06,
37
+ "loss": 0.0139,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 1.0,
42
+ "learning_rate": 1.25e-06,
43
+ "loss": 0.013,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 1.01,
48
+ "learning_rate": 1.4583333333333335e-06,
49
+ "loss": 0.0092,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 1.03,
54
+ "learning_rate": 1.6666666666666667e-06,
55
+ "loss": 0.0098,
 
 
 
 
 
 
 
 
 
56
  "step": 200
57
  },
58
  {
59
+ "epoch": 1.04,
60
+ "learning_rate": 1.8750000000000003e-06,
61
+ "loss": 0.0091,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 1.05,
66
+ "learning_rate": 2.0833333333333334e-06,
67
+ "loss": 0.0101,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 1.06,
72
+ "learning_rate": 2.2916666666666666e-06,
73
+ "loss": 0.0092,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 2.0,
78
+ "learning_rate": 2.5e-06,
79
+ "loss": 0.0084,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 2.01,
84
+ "learning_rate": 2.7083333333333334e-06,
85
+ "loss": 0.0067,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 2.03,
90
+ "learning_rate": 2.916666666666667e-06,
91
+ "loss": 0.0066,
92
  "step": 350
93
  },
94
  {
95
+ "epoch": 2.04,
96
+ "learning_rate": 3.125e-06,
97
+ "loss": 0.0066,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 2.05,
102
+ "learning_rate": 3.3333333333333333e-06,
103
+ "loss": 0.0066,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 2.05,
108
+ "eval_loss": 0.374257892370224,
109
+ "eval_runtime": 1097.1851,
110
+ "eval_samples_per_second": 4.292,
111
+ "eval_steps_per_second": 0.135,
112
+ "eval_wer": 32.910004568296024,
113
  "step": 400
114
  },
115
  {
116
+ "epoch": 2.06,
117
+ "learning_rate": 3.5416666666666673e-06,
118
+ "loss": 0.0059,
119
  "step": 425
120
  },
121
  {
122
+ "epoch": 3.0,
123
+ "learning_rate": 3.7500000000000005e-06,
124
+ "loss": 0.0053,
125
  "step": 450
126
  },
127
  {
128
+ "epoch": 3.02,
129
+ "learning_rate": 3.958333333333333e-06,
130
+ "loss": 0.0046,
131
  "step": 475
132
  },
133
  {
134
+ "epoch": 3.03,
135
+ "learning_rate": 4.166666666666667e-06,
136
+ "loss": 0.0052,
137
  "step": 500
138
  },
139
  {
140
+ "epoch": 3.04,
141
+ "learning_rate": 4.3750000000000005e-06,
142
+ "loss": 0.0049,
143
  "step": 525
144
  },
145
  {
146
+ "epoch": 3.05,
147
+ "learning_rate": 4.583333333333333e-06,
148
+ "loss": 0.0051,
149
  "step": 550
150
  },
151
  {
152
+ "epoch": 3.07,
153
+ "learning_rate": 4.791666666666668e-06,
154
+ "loss": 0.0047,
155
  "step": 575
156
  },
157
  {
158
+ "epoch": 4.0,
159
+ "learning_rate": 5e-06,
160
+ "loss": 0.0052,
 
 
 
 
 
 
 
 
 
161
  "step": 600
162
  },
163
  {
164
+ "epoch": 4.02,
165
+ "learning_rate": 5.208333333333334e-06,
166
+ "loss": 0.0049,
167
  "step": 625
168
  },
169
  {
170
+ "epoch": 4.03,
171
+ "learning_rate": 5.416666666666667e-06,
172
+ "loss": 0.0052,
173
  "step": 650
174
  },
175
  {
176
+ "epoch": 4.04,
177
+ "learning_rate": 5.625e-06,
178
+ "loss": 0.0059,
179
  "step": 675
180
  },
181
  {
182
+ "epoch": 4.05,
183
+ "learning_rate": 5.833333333333334e-06,
184
+ "loss": 0.0084,
185
  "step": 700
186
  },
187
  {
188
+ "epoch": 4.07,
189
+ "learning_rate": 6.041666666666667e-06,
190
+ "loss": 0.0101,
191
  "step": 725
192
  },
193
  {
194
+ "epoch": 5.0,
195
+ "learning_rate": 6.25e-06,
196
+ "loss": 0.0078,
197
  "step": 750
198
  },
199
  {
200
+ "epoch": 5.02,
201
+ "learning_rate": 6.458333333333334e-06,
202
+ "loss": 0.0077,
203
  "step": 775
204
  },
205
  {
206
+ "epoch": 5.03,
207
+ "learning_rate": 6.666666666666667e-06,
208
+ "loss": 0.0084,
209
  "step": 800
210
  },
211
  {
212
+ "epoch": 5.03,
213
+ "eval_loss": 0.3786599338054657,
214
+ "eval_runtime": 1048.12,
215
+ "eval_samples_per_second": 4.493,
216
+ "eval_steps_per_second": 0.141,
217
+ "eval_wer": 33.41708542713568,
218
  "step": 800
219
  },
220
  {
221
+ "epoch": 5.04,
222
+ "learning_rate": 6.875e-06,
223
+ "loss": 0.0098,
224
  "step": 825
225
  },
226
  {
227
+ "epoch": 5.05,
228
+ "learning_rate": 7.083333333333335e-06,
229
+ "loss": 0.0097,
230
  "step": 850
231
  },
232
  {
233
+ "epoch": 5.07,
234
+ "learning_rate": 7.291666666666667e-06,
235
+ "loss": 0.0093,
236
  "step": 875
237
  },
238
  {
239
  "epoch": 6.01,
240
+ "learning_rate": 7.500000000000001e-06,
241
+ "loss": 0.0092,
242
  "step": 900
243
  },
244
  {
245
+ "epoch": 6.02,
246
+ "learning_rate": 7.708333333333334e-06,
247
+ "loss": 0.0086,
248
  "step": 925
249
  },
250
  {
251
+ "epoch": 6.03,
252
+ "learning_rate": 7.916666666666667e-06,
253
+ "loss": 0.0087,
254
  "step": 950
255
  },
256
  {
257
+ "epoch": 6.04,
258
+ "learning_rate": 8.125000000000001e-06,
259
+ "loss": 0.0091,
260
  "step": 975
261
  },
262
  {
263
+ "epoch": 6.06,
264
+ "learning_rate": 8.333333333333334e-06,
265
+ "loss": 0.0119,
266
  "step": 1000
267
  },
268
  {
269
+ "epoch": 6.07,
270
+ "learning_rate": 8.541666666666666e-06,
271
+ "loss": 0.0115,
272
+ "step": 1025
273
+ },
274
+ {
275
+ "epoch": 7.01,
276
+ "learning_rate": 8.750000000000001e-06,
277
+ "loss": 0.0093,
278
+ "step": 1050
279
+ },
280
+ {
281
+ "epoch": 7.02,
282
+ "learning_rate": 8.958333333333334e-06,
283
+ "loss": 0.01,
284
+ "step": 1075
285
+ },
286
+ {
287
+ "epoch": 7.03,
288
+ "learning_rate": 9.166666666666666e-06,
289
+ "loss": 0.0101,
290
+ "step": 1100
291
+ },
292
+ {
293
+ "epoch": 7.04,
294
+ "learning_rate": 9.375000000000001e-06,
295
+ "loss": 0.0095,
296
+ "step": 1125
297
+ },
298
+ {
299
+ "epoch": 7.06,
300
+ "learning_rate": 9.583333333333335e-06,
301
+ "loss": 0.0103,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 7.07,
306
+ "learning_rate": 9.791666666666666e-06,
307
+ "loss": 0.0111,
308
+ "step": 1175
309
+ },
310
+ {
311
+ "epoch": 8.01,
312
+ "learning_rate": 1e-05,
313
+ "loss": 0.0098,
314
+ "step": 1200
315
+ },
316
+ {
317
+ "epoch": 8.01,
318
+ "eval_loss": 0.397890567779541,
319
+ "eval_runtime": 1133.829,
320
+ "eval_samples_per_second": 4.153,
321
  "eval_steps_per_second": 0.131,
322
+ "eval_wer": 33.24805847418913,
323
+ "step": 1200
324
+ },
325
+ {
326
+ "epoch": 8.02,
327
+ "learning_rate": 9.6875e-06,
328
+ "loss": 0.0106,
329
+ "step": 1225
330
+ },
331
+ {
332
+ "epoch": 8.03,
333
+ "learning_rate": 9.375000000000001e-06,
334
+ "loss": 0.009,
335
+ "step": 1250
336
+ },
337
+ {
338
+ "epoch": 8.05,
339
+ "learning_rate": 9.0625e-06,
340
+ "loss": 0.0093,
341
+ "step": 1275
342
+ },
343
+ {
344
+ "epoch": 8.06,
345
+ "learning_rate": 8.750000000000001e-06,
346
+ "loss": 0.0087,
347
+ "step": 1300
348
+ },
349
+ {
350
+ "epoch": 8.07,
351
+ "learning_rate": 8.4375e-06,
352
+ "loss": 0.0098,
353
+ "step": 1325
354
+ },
355
+ {
356
+ "epoch": 9.01,
357
+ "learning_rate": 8.125000000000001e-06,
358
+ "loss": 0.0057,
359
+ "step": 1350
360
+ },
361
+ {
362
+ "epoch": 9.02,
363
+ "learning_rate": 7.8125e-06,
364
+ "loss": 0.0053,
365
+ "step": 1375
366
+ },
367
+ {
368
+ "epoch": 9.03,
369
+ "learning_rate": 7.500000000000001e-06,
370
+ "loss": 0.0061,
371
+ "step": 1400
372
+ },
373
+ {
374
+ "epoch": 9.05,
375
+ "learning_rate": 7.1875e-06,
376
+ "loss": 0.007,
377
+ "step": 1425
378
+ },
379
+ {
380
+ "epoch": 9.06,
381
+ "learning_rate": 6.875e-06,
382
+ "loss": 0.0049,
383
+ "step": 1450
384
+ },
385
+ {
386
+ "epoch": 9.07,
387
+ "learning_rate": 6.5625e-06,
388
+ "loss": 0.0042,
389
+ "step": 1475
390
+ },
391
+ {
392
+ "epoch": 10.01,
393
+ "learning_rate": 6.25e-06,
394
+ "loss": 0.003,
395
+ "step": 1500
396
+ },
397
+ {
398
+ "epoch": 10.02,
399
+ "learning_rate": 5.9375e-06,
400
+ "loss": 0.0048,
401
+ "step": 1525
402
+ },
403
+ {
404
+ "epoch": 10.04,
405
+ "learning_rate": 5.625e-06,
406
+ "loss": 0.0025,
407
+ "step": 1550
408
+ },
409
+ {
410
+ "epoch": 10.05,
411
+ "learning_rate": 5.3125e-06,
412
+ "loss": 0.0028,
413
+ "step": 1575
414
+ },
415
+ {
416
+ "epoch": 10.06,
417
+ "learning_rate": 5e-06,
418
+ "loss": 0.0019,
419
+ "step": 1600
420
+ },
421
+ {
422
+ "epoch": 10.06,
423
+ "eval_loss": 0.40835869312286377,
424
+ "eval_runtime": 1079.5315,
425
+ "eval_samples_per_second": 4.362,
426
+ "eval_steps_per_second": 0.137,
427
+ "eval_wer": 32.311557788944725,
428
+ "step": 1600
429
+ },
430
+ {
431
+ "epoch": 10.07,
432
+ "learning_rate": 4.6875000000000004e-06,
433
+ "loss": 0.0024,
434
+ "step": 1625
435
+ },
436
+ {
437
+ "epoch": 11.01,
438
+ "learning_rate": 4.3750000000000005e-06,
439
+ "loss": 0.0013,
440
+ "step": 1650
441
+ },
442
+ {
443
+ "epoch": 11.02,
444
+ "learning_rate": 4.0625000000000005e-06,
445
+ "loss": 0.0017,
446
+ "step": 1675
447
+ },
448
+ {
449
+ "epoch": 11.04,
450
+ "learning_rate": 3.7500000000000005e-06,
451
+ "loss": 0.0012,
452
+ "step": 1700
453
+ },
454
+ {
455
+ "epoch": 11.05,
456
+ "learning_rate": 3.4375e-06,
457
+ "loss": 0.0012,
458
+ "step": 1725
459
+ },
460
+ {
461
+ "epoch": 11.06,
462
+ "learning_rate": 3.125e-06,
463
+ "loss": 0.001,
464
+ "step": 1750
465
+ },
466
+ {
467
+ "epoch": 11.07,
468
+ "learning_rate": 2.8125e-06,
469
+ "loss": 0.0011,
470
+ "step": 1775
471
+ },
472
+ {
473
+ "epoch": 12.01,
474
+ "learning_rate": 2.5e-06,
475
+ "loss": 0.0009,
476
+ "step": 1800
477
+ },
478
+ {
479
+ "epoch": 12.02,
480
+ "learning_rate": 2.1875000000000002e-06,
481
+ "loss": 0.001,
482
+ "step": 1825
483
+ },
484
+ {
485
+ "epoch": 12.04,
486
+ "learning_rate": 1.8750000000000003e-06,
487
+ "loss": 0.0008,
488
+ "step": 1850
489
+ },
490
+ {
491
+ "epoch": 12.05,
492
+ "learning_rate": 1.5625e-06,
493
+ "loss": 0.0008,
494
+ "step": 1875
495
+ },
496
+ {
497
+ "epoch": 12.06,
498
+ "learning_rate": 1.25e-06,
499
+ "loss": 0.0009,
500
+ "step": 1900
501
+ },
502
+ {
503
+ "epoch": 13.0,
504
+ "learning_rate": 9.375000000000001e-07,
505
+ "loss": 0.0009,
506
+ "step": 1925
507
+ },
508
+ {
509
+ "epoch": 13.01,
510
+ "learning_rate": 6.25e-07,
511
+ "loss": 0.0008,
512
+ "step": 1950
513
+ },
514
+ {
515
+ "epoch": 13.03,
516
+ "learning_rate": 3.125e-07,
517
+ "loss": 0.0009,
518
+ "step": 1975
519
+ },
520
+ {
521
+ "epoch": 13.04,
522
+ "learning_rate": 0.0,
523
+ "loss": 0.0008,
524
+ "step": 2000
525
+ },
526
+ {
527
+ "epoch": 13.04,
528
+ "eval_loss": 0.4333903193473816,
529
+ "eval_runtime": 1067.3464,
530
+ "eval_samples_per_second": 4.412,
531
+ "eval_steps_per_second": 0.139,
532
+ "eval_wer": 32.04202832343535,
533
+ "step": 2000
534
  },
535
  {
536
+ "epoch": 13.04,
537
+ "step": 2000,
538
+ "total_flos": 3.673634428993536e+19,
539
+ "train_loss": 0.006580068808048963,
540
+ "train_runtime": 18902.2125,
541
+ "train_samples_per_second": 6.772,
542
+ "train_steps_per_second": 0.106
543
  }
544
  ],
545
+ "max_steps": 2000,
546
  "num_train_epochs": 9223372036854775807,
547
+ "total_flos": 3.673634428993536e+19,
548
  "trial_name": null,
549
  "trial_params": null
550
  }