drmeeseeks commited on
Commit
1aaecd9
1 Parent(s): 15e190c

End of training

Browse files
all_results.json CHANGED
@@ -1,7 +1,13 @@
1
  {
2
- "epoch": 142.86,
3
- "train_loss": 0.4370949484743178,
4
- "train_runtime": 4327.5129,
5
- "train_samples_per_second": 14.789,
6
- "train_steps_per_second": 0.231
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "epoch": 2000.0,
3
+ "eval_loss": 6.801175117492676,
4
+ "eval_runtime": 8.6897,
5
+ "eval_samples": 5,
6
+ "eval_samples_per_second": 0.575,
7
+ "eval_steps_per_second": 0.115,
8
+ "eval_wer": 100.0,
9
+ "train_loss": 0.10164999849759625,
10
+ "train_runtime": 1221.0979,
11
+ "train_samples_per_second": 104.824,
12
+ "train_steps_per_second": 1.638
13
  }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2000.0,
3
+ "eval_loss": 6.801175117492676,
4
+ "eval_runtime": 8.6897,
5
+ "eval_samples": 5,
6
+ "eval_samples_per_second": 0.575,
7
+ "eval_steps_per_second": 0.115,
8
+ "eval_wer": 100.0
9
+ }
runs/Dec25_15-57-57_129-213-85-198/events.out.tfevents.1671985223.129-213-85-198.71965.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dfacc0e460d4c92f158e375c72a33993fb2f825683cf77987f47dea65bf5c59
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 142.86,
3
- "train_loss": 0.4370949484743178,
4
- "train_runtime": 4327.5129,
5
- "train_samples_per_second": 14.789,
6
- "train_steps_per_second": 0.231
7
  }
 
1
  {
2
+ "epoch": 2000.0,
3
+ "train_loss": 0.10164999849759625,
4
+ "train_runtime": 1221.0979,
5
+ "train_samples_per_second": 104.824,
6
+ "train_steps_per_second": 1.638
7
  }
trainer_state.json CHANGED
@@ -1,265 +1,685 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 142.85714285714286,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 3.57,
12
  "learning_rate": 4.2000000000000006e-07,
13
- "loss": 3.0968,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 7.14,
18
  "learning_rate": 9.200000000000001e-07,
19
- "loss": 2.4565,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 10.71,
24
- "learning_rate": 1.42e-06,
25
- "loss": 1.9962,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 14.29,
30
- "learning_rate": 1.9200000000000003e-06,
31
- "loss": 1.7133,
 
 
 
 
 
 
 
 
 
32
  "step": 100
33
  },
34
  {
35
- "epoch": 17.86,
36
- "learning_rate": 2.42e-06,
37
- "loss": 1.5526,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 21.43,
42
- "learning_rate": 2.92e-06,
43
- "loss": 1.4427,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 25.0,
48
- "learning_rate": 3.4200000000000007e-06,
49
- "loss": 1.3382,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 28.57,
54
- "learning_rate": 3.920000000000001e-06,
55
- "loss": 1.178,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 32.14,
60
- "learning_rate": 4.42e-06,
61
- "loss": 0.9536,
 
 
 
 
 
 
 
 
 
62
  "step": 225
63
  },
64
  {
65
- "epoch": 35.71,
66
- "learning_rate": 4.92e-06,
67
- "loss": 0.7107,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 39.29,
72
- "learning_rate": 5.420000000000001e-06,
73
- "loss": 0.4791,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 42.86,
78
- "learning_rate": 5.92e-06,
79
- "loss": 0.2683,
 
 
 
 
 
 
 
 
 
80
  "step": 300
81
  },
82
  {
83
- "epoch": 46.43,
84
- "learning_rate": 6.42e-06,
85
- "loss": 0.1293,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 50.0,
90
- "learning_rate": 6.92e-06,
91
- "loss": 0.0561,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 53.57,
96
- "learning_rate": 7.420000000000001e-06,
97
- "loss": 0.03,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 57.14,
102
- "learning_rate": 7.92e-06,
103
- "loss": 0.0178,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 60.71,
108
- "learning_rate": 8.42e-06,
109
- "loss": 0.0134,
 
 
 
 
 
 
 
 
 
110
  "step": 425
111
  },
112
  {
113
- "epoch": 64.29,
114
- "learning_rate": 8.920000000000001e-06,
115
- "loss": 0.0087,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 67.86,
120
- "learning_rate": 9.42e-06,
121
- "loss": 0.008,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 71.43,
126
- "learning_rate": 9.920000000000002e-06,
127
- "loss": 0.0064,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 75.0,
132
- "learning_rate": 9.58e-06,
133
- "loss": 0.0068,
 
 
 
 
 
 
 
 
 
134
  "step": 525
135
  },
136
  {
137
- "epoch": 78.57,
138
- "learning_rate": 9.080000000000001e-06,
139
- "loss": 0.0049,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 82.14,
144
- "learning_rate": 8.580000000000001e-06,
145
- "loss": 0.0035,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 85.71,
150
- "learning_rate": 8.08e-06,
151
- "loss": 0.0022,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 89.29,
156
- "learning_rate": 7.58e-06,
157
- "loss": 0.0015,
 
 
 
 
 
 
 
 
 
158
  "step": 625
159
  },
160
  {
161
- "epoch": 92.86,
162
- "learning_rate": 7.08e-06,
163
- "loss": 0.0017,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 96.43,
168
- "learning_rate": 6.5800000000000005e-06,
169
- "loss": 0.0014,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 100.0,
174
- "learning_rate": 6.08e-06,
175
- "loss": 0.0013,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 103.57,
180
- "learning_rate": 5.580000000000001e-06,
181
- "loss": 0.0006,
 
 
 
 
 
 
 
 
 
182
  "step": 725
183
  },
184
  {
185
- "epoch": 107.14,
186
- "learning_rate": 5.0800000000000005e-06,
187
- "loss": 0.0004,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 110.71,
192
- "learning_rate": 4.58e-06,
193
- "loss": 0.0004,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 114.29,
198
- "learning_rate": 4.08e-06,
199
- "loss": 0.0004,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 117.86,
204
- "learning_rate": 3.58e-06,
205
- "loss": 0.0004,
 
 
 
 
 
 
 
 
 
206
  "step": 825
207
  },
208
  {
209
- "epoch": 121.43,
210
- "learning_rate": 3.08e-06,
211
- "loss": 0.0004,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 125.0,
216
- "learning_rate": 2.5800000000000003e-06,
217
- "loss": 0.0004,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 128.57,
222
- "learning_rate": 2.08e-06,
223
- "loss": 0.0004,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 132.14,
228
- "learning_rate": 1.5800000000000001e-06,
229
- "loss": 0.0004,
 
 
 
 
 
 
 
 
 
230
  "step": 925
231
  },
232
  {
233
- "epoch": 135.71,
234
- "learning_rate": 1.08e-06,
235
- "loss": 0.0004,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 139.29,
240
- "learning_rate": 5.800000000000001e-07,
241
- "loss": 0.0004,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 142.86,
246
- "learning_rate": 8e-08,
247
- "loss": 0.0004,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 142.86,
252
- "step": 1000,
253
- "total_flos": 1.838750736678912e+19,
254
- "train_loss": 0.4370949484743178,
255
- "train_runtime": 4327.5129,
256
- "train_samples_per_second": 14.789,
257
- "train_steps_per_second": 0.231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  }
259
  ],
260
- "max_steps": 1000,
261
- "num_train_epochs": 143,
262
- "total_flos": 1.838750736678912e+19,
263
  "trial_name": null,
264
  "trial_params": null
265
  }
 
1
  {
2
+ "best_metric": 100.0,
3
+ "best_model_checkpoint": "./whisper-small-amet/checkpoint-1100",
4
+ "epoch": 2000.0,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 25.0,
12
  "learning_rate": 4.2000000000000006e-07,
13
+ "loss": 2.9938,
14
  "step": 25
15
  },
16
  {
17
+ "epoch": 50.0,
18
  "learning_rate": 9.200000000000001e-07,
19
+ "loss": 2.2746,
20
  "step": 50
21
  },
22
  {
23
+ "epoch": 75.0,
24
+ "learning_rate": 1.4000000000000001e-06,
25
+ "loss": 1.7055,
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 100.0,
30
+ "learning_rate": 1.9000000000000002e-06,
31
+ "loss": 0.9013,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 100.0,
36
+ "eval_loss": 2.7051408290863037,
37
+ "eval_runtime": 8.4302,
38
+ "eval_samples_per_second": 0.593,
39
+ "eval_steps_per_second": 0.119,
40
+ "eval_wer": 276.0,
41
  "step": 100
42
  },
43
  {
44
+ "epoch": 125.0,
45
+ "learning_rate": 2.4000000000000003e-06,
46
+ "loss": 0.1244,
47
  "step": 125
48
  },
49
  {
50
+ "epoch": 150.0,
51
+ "learning_rate": 2.9e-06,
52
+ "loss": 0.0031,
53
  "step": 150
54
  },
55
  {
56
+ "epoch": 175.0,
57
+ "learning_rate": 3.4000000000000005e-06,
58
+ "loss": 0.0004,
59
  "step": 175
60
  },
61
  {
62
+ "epoch": 200.0,
63
+ "learning_rate": 3.900000000000001e-06,
64
+ "loss": 0.0002,
65
  "step": 200
66
  },
67
  {
68
+ "epoch": 200.0,
69
+ "eval_loss": 3.741528034210205,
70
+ "eval_runtime": 8.6419,
71
+ "eval_samples_per_second": 0.579,
72
+ "eval_steps_per_second": 0.116,
73
+ "eval_wer": 334.6666666666667,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 225.0,
78
+ "learning_rate": 4.4e-06,
79
+ "loss": 0.0002,
80
  "step": 225
81
  },
82
  {
83
+ "epoch": 250.0,
84
+ "learning_rate": 4.9000000000000005e-06,
85
+ "loss": 0.0001,
86
  "step": 250
87
  },
88
  {
89
+ "epoch": 275.0,
90
+ "learning_rate": 5.400000000000001e-06,
91
+ "loss": 0.0001,
92
  "step": 275
93
  },
94
  {
95
+ "epoch": 300.0,
96
+ "learning_rate": 5.9e-06,
97
+ "loss": 0.0001,
98
+ "step": 300
99
+ },
100
+ {
101
+ "epoch": 300.0,
102
+ "eval_loss": 3.840170383453369,
103
+ "eval_runtime": 1.5102,
104
+ "eval_samples_per_second": 3.311,
105
+ "eval_steps_per_second": 0.662,
106
+ "eval_wer": 117.33333333333333,
107
  "step": 300
108
  },
109
  {
110
+ "epoch": 325.0,
111
+ "learning_rate": 6.4000000000000006e-06,
112
+ "loss": 0.0001,
113
  "step": 325
114
  },
115
  {
116
+ "epoch": 350.0,
117
+ "learning_rate": 6.9e-06,
118
+ "loss": 0.0001,
119
  "step": 350
120
  },
121
  {
122
+ "epoch": 375.0,
123
+ "learning_rate": 7.4e-06,
124
+ "loss": 0.0001,
125
  "step": 375
126
  },
127
  {
128
+ "epoch": 400.0,
129
+ "learning_rate": 7.9e-06,
130
+ "loss": 0.0001,
131
  "step": 400
132
  },
133
  {
134
+ "epoch": 400.0,
135
+ "eval_loss": 3.8931400775909424,
136
+ "eval_runtime": 8.4556,
137
+ "eval_samples_per_second": 0.591,
138
+ "eval_steps_per_second": 0.118,
139
+ "eval_wer": 340.0,
140
+ "step": 400
141
+ },
142
+ {
143
+ "epoch": 425.0,
144
+ "learning_rate": 8.400000000000001e-06,
145
+ "loss": 0.0001,
146
  "step": 425
147
  },
148
  {
149
+ "epoch": 450.0,
150
+ "learning_rate": 8.900000000000001e-06,
151
+ "loss": 0.0001,
152
  "step": 450
153
  },
154
  {
155
+ "epoch": 475.0,
156
+ "learning_rate": 9.4e-06,
157
+ "loss": 0.0001,
158
  "step": 475
159
  },
160
  {
161
+ "epoch": 500.0,
162
+ "learning_rate": 9.9e-06,
163
+ "loss": 0.0001,
164
  "step": 500
165
  },
166
  {
167
+ "epoch": 500.0,
168
+ "eval_loss": 4.06705904006958,
169
+ "eval_runtime": 8.4836,
170
+ "eval_samples_per_second": 0.589,
171
+ "eval_steps_per_second": 0.118,
172
+ "eval_wer": 397.3333333333333,
173
+ "step": 500
174
+ },
175
+ {
176
+ "epoch": 525.0,
177
+ "learning_rate": 9.866666666666668e-06,
178
+ "loss": 0.0001,
179
  "step": 525
180
  },
181
  {
182
+ "epoch": 550.0,
183
+ "learning_rate": 9.7e-06,
184
+ "loss": 0.0001,
185
  "step": 550
186
  },
187
  {
188
+ "epoch": 575.0,
189
+ "learning_rate": 9.533333333333334e-06,
190
+ "loss": 0.0001,
191
  "step": 575
192
  },
193
  {
194
+ "epoch": 600.0,
195
+ "learning_rate": 9.366666666666668e-06,
196
+ "loss": 0.0001,
197
  "step": 600
198
  },
199
  {
200
+ "epoch": 600.0,
201
+ "eval_loss": 4.284416675567627,
202
+ "eval_runtime": 1.5895,
203
+ "eval_samples_per_second": 3.146,
204
+ "eval_steps_per_second": 0.629,
205
+ "eval_wer": 137.33333333333334,
206
+ "step": 600
207
+ },
208
+ {
209
+ "epoch": 625.0,
210
+ "learning_rate": 9.200000000000002e-06,
211
+ "loss": 0.0001,
212
  "step": 625
213
  },
214
  {
215
+ "epoch": 650.0,
216
+ "learning_rate": 9.033333333333334e-06,
217
+ "loss": 0.0,
218
  "step": 650
219
  },
220
  {
221
+ "epoch": 675.0,
222
+ "learning_rate": 8.866666666666668e-06,
223
+ "loss": 0.0,
224
  "step": 675
225
  },
226
  {
227
+ "epoch": 700.0,
228
+ "learning_rate": 8.700000000000001e-06,
229
+ "loss": 0.0,
230
  "step": 700
231
  },
232
  {
233
+ "epoch": 700.0,
234
+ "eval_loss": 4.469689846038818,
235
+ "eval_runtime": 4.8142,
236
+ "eval_samples_per_second": 1.039,
237
+ "eval_steps_per_second": 0.208,
238
+ "eval_wer": 289.33333333333337,
239
+ "step": 700
240
+ },
241
+ {
242
+ "epoch": 725.0,
243
+ "learning_rate": 8.533333333333335e-06,
244
+ "loss": 0.0,
245
  "step": 725
246
  },
247
  {
248
+ "epoch": 750.0,
249
+ "learning_rate": 8.366666666666667e-06,
250
+ "loss": 0.0,
251
  "step": 750
252
  },
253
  {
254
+ "epoch": 775.0,
255
+ "learning_rate": 8.2e-06,
256
+ "loss": 0.0,
257
  "step": 775
258
  },
259
  {
260
+ "epoch": 800.0,
261
+ "learning_rate": 8.033333333333335e-06,
262
+ "loss": 0.0,
263
  "step": 800
264
  },
265
  {
266
+ "epoch": 800.0,
267
+ "eval_loss": 4.627803325653076,
268
+ "eval_runtime": 8.4484,
269
+ "eval_samples_per_second": 0.592,
270
+ "eval_steps_per_second": 0.118,
271
+ "eval_wer": 449.3333333333333,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 825.0,
276
+ "learning_rate": 7.866666666666667e-06,
277
+ "loss": 0.0,
278
  "step": 825
279
  },
280
  {
281
+ "epoch": 850.0,
282
+ "learning_rate": 7.7e-06,
283
+ "loss": 0.0,
284
  "step": 850
285
  },
286
  {
287
+ "epoch": 875.0,
288
+ "learning_rate": 7.533333333333334e-06,
289
+ "loss": 0.0,
290
  "step": 875
291
  },
292
  {
293
+ "epoch": 900.0,
294
+ "learning_rate": 7.3666666666666676e-06,
295
+ "loss": 0.0,
296
  "step": 900
297
  },
298
  {
299
+ "epoch": 900.0,
300
+ "eval_loss": 4.779428005218506,
301
+ "eval_runtime": 8.432,
302
+ "eval_samples_per_second": 0.593,
303
+ "eval_steps_per_second": 0.119,
304
+ "eval_wer": 678.6666666666667,
305
+ "step": 900
306
+ },
307
+ {
308
+ "epoch": 925.0,
309
+ "learning_rate": 7.2000000000000005e-06,
310
+ "loss": 0.0,
311
  "step": 925
312
  },
313
  {
314
+ "epoch": 950.0,
315
+ "learning_rate": 7.033333333333334e-06,
316
+ "loss": 0.0,
317
  "step": 950
318
  },
319
  {
320
+ "epoch": 975.0,
321
+ "learning_rate": 6.886666666666667e-06,
322
+ "loss": 0.0753,
323
  "step": 975
324
  },
325
  {
326
+ "epoch": 1000.0,
327
+ "learning_rate": 6.720000000000001e-06,
328
+ "loss": 0.0405,
329
  "step": 1000
330
  },
331
  {
332
+ "epoch": 1000.0,
333
+ "eval_loss": 4.676939487457275,
334
+ "eval_runtime": 8.4726,
335
+ "eval_samples_per_second": 0.59,
336
+ "eval_steps_per_second": 0.118,
337
+ "eval_wer": 261.3333333333333,
338
+ "step": 1000
339
+ },
340
+ {
341
+ "epoch": 1025.0,
342
+ "learning_rate": 6.553333333333334e-06,
343
+ "loss": 0.0057,
344
+ "step": 1025
345
+ },
346
+ {
347
+ "epoch": 1050.0,
348
+ "learning_rate": 6.386666666666668e-06,
349
+ "loss": 0.0001,
350
+ "step": 1050
351
+ },
352
+ {
353
+ "epoch": 1075.0,
354
+ "learning_rate": 6.220000000000001e-06,
355
+ "loss": 0.0001,
356
+ "step": 1075
357
+ },
358
+ {
359
+ "epoch": 1100.0,
360
+ "learning_rate": 6.0533333333333335e-06,
361
+ "loss": 0.0002,
362
+ "step": 1100
363
+ },
364
+ {
365
+ "epoch": 1100.0,
366
+ "eval_loss": 5.499487400054932,
367
+ "eval_runtime": 8.5264,
368
+ "eval_samples_per_second": 0.586,
369
+ "eval_steps_per_second": 0.117,
370
+ "eval_wer": 100.0,
371
+ "step": 1100
372
+ },
373
+ {
374
+ "epoch": 1125.0,
375
+ "learning_rate": 5.886666666666667e-06,
376
+ "loss": 0.0002,
377
+ "step": 1125
378
+ },
379
+ {
380
+ "epoch": 1150.0,
381
+ "learning_rate": 5.72e-06,
382
+ "loss": 0.0002,
383
+ "step": 1150
384
+ },
385
+ {
386
+ "epoch": 1175.0,
387
+ "learning_rate": 5.553333333333334e-06,
388
+ "loss": 0.0002,
389
+ "step": 1175
390
+ },
391
+ {
392
+ "epoch": 1200.0,
393
+ "learning_rate": 5.386666666666667e-06,
394
+ "loss": 0.0002,
395
+ "step": 1200
396
+ },
397
+ {
398
+ "epoch": 1200.0,
399
+ "eval_loss": 6.003348350524902,
400
+ "eval_runtime": 8.4985,
401
+ "eval_samples_per_second": 0.588,
402
+ "eval_steps_per_second": 0.118,
403
+ "eval_wer": 100.0,
404
+ "step": 1200
405
+ },
406
+ {
407
+ "epoch": 1225.0,
408
+ "learning_rate": 5.220000000000001e-06,
409
+ "loss": 0.0002,
410
+ "step": 1225
411
+ },
412
+ {
413
+ "epoch": 1250.0,
414
+ "learning_rate": 5.053333333333334e-06,
415
+ "loss": 0.0002,
416
+ "step": 1250
417
+ },
418
+ {
419
+ "epoch": 1275.0,
420
+ "learning_rate": 4.886666666666668e-06,
421
+ "loss": 0.0002,
422
+ "step": 1275
423
+ },
424
+ {
425
+ "epoch": 1300.0,
426
+ "learning_rate": 4.7200000000000005e-06,
427
+ "loss": 0.0002,
428
+ "step": 1300
429
+ },
430
+ {
431
+ "epoch": 1300.0,
432
+ "eval_loss": 6.288400650024414,
433
+ "eval_runtime": 8.612,
434
+ "eval_samples_per_second": 0.581,
435
+ "eval_steps_per_second": 0.116,
436
+ "eval_wer": 100.0,
437
+ "step": 1300
438
+ },
439
+ {
440
+ "epoch": 1325.0,
441
+ "learning_rate": 4.5533333333333335e-06,
442
+ "loss": 0.0002,
443
+ "step": 1325
444
+ },
445
+ {
446
+ "epoch": 1350.0,
447
+ "learning_rate": 4.3866666666666665e-06,
448
+ "loss": 0.0002,
449
+ "step": 1350
450
+ },
451
+ {
452
+ "epoch": 1375.0,
453
+ "learning_rate": 4.22e-06,
454
+ "loss": 0.0002,
455
+ "step": 1375
456
+ },
457
+ {
458
+ "epoch": 1400.0,
459
+ "learning_rate": 4.053333333333333e-06,
460
+ "loss": 0.0002,
461
+ "step": 1400
462
+ },
463
+ {
464
+ "epoch": 1400.0,
465
+ "eval_loss": 6.474369049072266,
466
+ "eval_runtime": 8.436,
467
+ "eval_samples_per_second": 0.593,
468
+ "eval_steps_per_second": 0.119,
469
+ "eval_wer": 100.0,
470
+ "step": 1400
471
+ },
472
+ {
473
+ "epoch": 1425.0,
474
+ "learning_rate": 3.886666666666667e-06,
475
+ "loss": 0.0002,
476
+ "step": 1425
477
+ },
478
+ {
479
+ "epoch": 1450.0,
480
+ "learning_rate": 3.7200000000000004e-06,
481
+ "loss": 0.0002,
482
+ "step": 1450
483
+ },
484
+ {
485
+ "epoch": 1475.0,
486
+ "learning_rate": 3.5533333333333338e-06,
487
+ "loss": 0.0002,
488
+ "step": 1475
489
+ },
490
+ {
491
+ "epoch": 1500.0,
492
+ "learning_rate": 3.386666666666667e-06,
493
+ "loss": 0.0002,
494
+ "step": 1500
495
+ },
496
+ {
497
+ "epoch": 1500.0,
498
+ "eval_loss": 6.596408843994141,
499
+ "eval_runtime": 8.5317,
500
+ "eval_samples_per_second": 0.586,
501
+ "eval_steps_per_second": 0.117,
502
+ "eval_wer": 100.0,
503
+ "step": 1500
504
+ },
505
+ {
506
+ "epoch": 1525.0,
507
+ "learning_rate": 3.2200000000000005e-06,
508
+ "loss": 0.0002,
509
+ "step": 1525
510
+ },
511
+ {
512
+ "epoch": 1550.0,
513
+ "learning_rate": 3.053333333333334e-06,
514
+ "loss": 0.0001,
515
+ "step": 1550
516
+ },
517
+ {
518
+ "epoch": 1575.0,
519
+ "learning_rate": 2.8866666666666673e-06,
520
+ "loss": 0.0001,
521
+ "step": 1575
522
+ },
523
+ {
524
+ "epoch": 1600.0,
525
+ "learning_rate": 2.7200000000000002e-06,
526
+ "loss": 0.0001,
527
+ "step": 1600
528
+ },
529
+ {
530
+ "epoch": 1600.0,
531
+ "eval_loss": 6.679154872894287,
532
+ "eval_runtime": 8.5665,
533
+ "eval_samples_per_second": 0.584,
534
+ "eval_steps_per_second": 0.117,
535
+ "eval_wer": 100.0,
536
+ "step": 1600
537
+ },
538
+ {
539
+ "epoch": 1625.0,
540
+ "learning_rate": 2.5533333333333336e-06,
541
+ "loss": 0.0001,
542
+ "step": 1625
543
+ },
544
+ {
545
+ "epoch": 1650.0,
546
+ "learning_rate": 2.386666666666667e-06,
547
+ "loss": 0.0001,
548
+ "step": 1650
549
+ },
550
+ {
551
+ "epoch": 1675.0,
552
+ "learning_rate": 2.2200000000000003e-06,
553
+ "loss": 0.0001,
554
+ "step": 1675
555
+ },
556
+ {
557
+ "epoch": 1700.0,
558
+ "learning_rate": 2.0533333333333337e-06,
559
+ "loss": 0.0001,
560
+ "step": 1700
561
+ },
562
+ {
563
+ "epoch": 1700.0,
564
+ "eval_loss": 6.7370285987854,
565
+ "eval_runtime": 8.4426,
566
+ "eval_samples_per_second": 0.592,
567
+ "eval_steps_per_second": 0.118,
568
+ "eval_wer": 100.0,
569
+ "step": 1700
570
+ },
571
+ {
572
+ "epoch": 1725.0,
573
+ "learning_rate": 1.8866666666666669e-06,
574
+ "loss": 0.0001,
575
+ "step": 1725
576
+ },
577
+ {
578
+ "epoch": 1750.0,
579
+ "learning_rate": 1.72e-06,
580
+ "loss": 0.0001,
581
+ "step": 1750
582
+ },
583
+ {
584
+ "epoch": 1775.0,
585
+ "learning_rate": 1.5533333333333334e-06,
586
+ "loss": 0.0001,
587
+ "step": 1775
588
+ },
589
+ {
590
+ "epoch": 1800.0,
591
+ "learning_rate": 1.3866666666666668e-06,
592
+ "loss": 0.0001,
593
+ "step": 1800
594
+ },
595
+ {
596
+ "epoch": 1800.0,
597
+ "eval_loss": 6.773484230041504,
598
+ "eval_runtime": 8.4858,
599
+ "eval_samples_per_second": 0.589,
600
+ "eval_steps_per_second": 0.118,
601
+ "eval_wer": 100.0,
602
+ "step": 1800
603
+ },
604
+ {
605
+ "epoch": 1825.0,
606
+ "learning_rate": 1.2200000000000002e-06,
607
+ "loss": 0.0001,
608
+ "step": 1825
609
+ },
610
+ {
611
+ "epoch": 1850.0,
612
+ "learning_rate": 1.0533333333333333e-06,
613
+ "loss": 0.0001,
614
+ "step": 1850
615
+ },
616
+ {
617
+ "epoch": 1875.0,
618
+ "learning_rate": 8.866666666666668e-07,
619
+ "loss": 0.0001,
620
+ "step": 1875
621
+ },
622
+ {
623
+ "epoch": 1900.0,
624
+ "learning_rate": 7.2e-07,
625
+ "loss": 0.0001,
626
+ "step": 1900
627
+ },
628
+ {
629
+ "epoch": 1900.0,
630
+ "eval_loss": 6.795783042907715,
631
+ "eval_runtime": 8.5107,
632
+ "eval_samples_per_second": 0.587,
633
+ "eval_steps_per_second": 0.117,
634
+ "eval_wer": 100.0,
635
+ "step": 1900
636
+ },
637
+ {
638
+ "epoch": 1925.0,
639
+ "learning_rate": 5.533333333333334e-07,
640
+ "loss": 0.0001,
641
+ "step": 1925
642
+ },
643
+ {
644
+ "epoch": 1950.0,
645
+ "learning_rate": 3.8666666666666674e-07,
646
+ "loss": 0.0001,
647
+ "step": 1950
648
+ },
649
+ {
650
+ "epoch": 1975.0,
651
+ "learning_rate": 2.2e-07,
652
+ "loss": 0.0001,
653
+ "step": 1975
654
+ },
655
+ {
656
+ "epoch": 2000.0,
657
+ "learning_rate": 5.3333333333333334e-08,
658
+ "loss": 0.0001,
659
+ "step": 2000
660
+ },
661
+ {
662
+ "epoch": 2000.0,
663
+ "eval_loss": 6.801175117492676,
664
+ "eval_runtime": 8.5138,
665
+ "eval_samples_per_second": 0.587,
666
+ "eval_steps_per_second": 0.117,
667
+ "eval_wer": 100.0,
668
+ "step": 2000
669
+ },
670
+ {
671
+ "epoch": 2000.0,
672
+ "step": 2000,
673
+ "total_flos": 5.7717080064e+17,
674
+ "train_loss": 0.10164999849759625,
675
+ "train_runtime": 1221.0979,
676
+ "train_samples_per_second": 104.824,
677
+ "train_steps_per_second": 1.638
678
  }
679
  ],
680
+ "max_steps": 2000,
681
+ "num_train_epochs": 2000,
682
+ "total_flos": 5.7717080064e+17,
683
  "trial_name": null,
684
  "trial_params": null
685
  }