marinone94 commited on
Commit
58c7e8e
1 Parent(s): 22780cf

End of training

Browse files
all_results.json CHANGED
@@ -1,33 +1,33 @@
1
  {
2
- "epoch": 1.0,
3
  "eval_loss": 1.6191972494125366,
4
- "eval_pretrained_loss": 1.8532216548919678,
5
- "eval_pretrained_runtime": 33.9735,
6
- "eval_pretrained_samples_per_second": 0.118,
7
- "eval_pretrained_steps_per_second": 0.059,
8
- "eval_pretrained_wer": 123.40425531914893,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
- "test_finetuned_loss": 1.7792198657989502,
14
- "test_finetuned_runtime": 13.3999,
15
- "test_finetuned_samples_per_second": 0.299,
16
- "test_finetuned_steps_per_second": 0.149,
17
- "test_finetuned_wer": 127.90697674418605,
18
  "test_loss": 1.7568330764770508,
19
- "test_pretrained_loss": 1.7792198657989502,
20
- "test_pretrained_runtime": 33.5108,
21
- "test_pretrained_samples_per_second": 0.119,
22
- "test_pretrained_steps_per_second": 0.06,
23
- "test_pretrained_wer": 127.90697674418605,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
- "total_flos": 196951080960000.0,
29
- "train_loss": 1.7391430735588074,
30
- "train_runtime": 61.5449,
31
- "train_samples_per_second": 0.13,
32
- "train_steps_per_second": 0.032
33
  }
 
1
  {
2
+ "epoch": 2.33,
3
  "eval_loss": 1.6191972494125366,
4
+ "eval_pretrained_loss": 1.715580940246582,
5
+ "eval_pretrained_runtime": 131.9468,
6
+ "eval_pretrained_samples_per_second": 2.501,
7
+ "eval_pretrained_steps_per_second": 0.318,
8
+ "eval_pretrained_wer": 264.5217946670924,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
+ "test_finetuned_loss": 1.0018435716629028,
14
+ "test_finetuned_runtime": 238.9736,
15
+ "test_finetuned_samples_per_second": 3.176,
16
+ "test_finetuned_steps_per_second": 0.398,
17
+ "test_finetuned_wer": 173.15939719843325,
18
  "test_loss": 1.7568330764770508,
19
+ "test_pretrained_loss": 1.724961757659912,
20
+ "test_pretrained_runtime": 275.9851,
21
+ "test_pretrained_samples_per_second": 2.75,
22
+ "test_pretrained_steps_per_second": 0.344,
23
+ "test_pretrained_wer": 261.9066587001262,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
+ "total_flos": 1.7572960198656e+17,
29
+ "train_loss": 0.8751795228038516,
30
+ "train_runtime": 2133.32,
31
+ "train_samples_per_second": 3.36,
32
+ "train_steps_per_second": 0.21
33
  }
eval_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "eval_pretrained_loss": 1.8532216548919678,
3
- "eval_pretrained_runtime": 33.9735,
4
- "eval_pretrained_samples_per_second": 0.118,
5
- "eval_pretrained_steps_per_second": 0.059,
6
- "eval_pretrained_wer": 123.40425531914893
7
  }
 
1
  {
2
+ "eval_pretrained_loss": 1.715580940246582,
3
+ "eval_pretrained_runtime": 131.9468,
4
+ "eval_pretrained_samples_per_second": 2.501,
5
+ "eval_pretrained_steps_per_second": 0.318,
6
+ "eval_pretrained_wer": 264.5217946670924
7
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3843686519777a4550909e8bd4961dcf7425e7183295f03d09a433a271f0887
3
  size 151098921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68bb0c59266f7c595eaaa4c27443c5b5bde3796bdbf17a2e1ef41723e9e02297
3
  size 151098921
test_finetuned_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "test_finetuned_loss": 1.7792198657989502,
4
- "test_finetuned_runtime": 13.3999,
5
- "test_finetuned_samples_per_second": 0.299,
6
- "test_finetuned_steps_per_second": 0.149,
7
- "test_finetuned_wer": 127.90697674418605
8
  }
 
1
  {
2
+ "epoch": 2.33,
3
+ "test_finetuned_loss": 1.0018435716629028,
4
+ "test_finetuned_runtime": 238.9736,
5
+ "test_finetuned_samples_per_second": 3.176,
6
+ "test_finetuned_steps_per_second": 0.398,
7
+ "test_finetuned_wer": 173.15939719843325
8
  }
test_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "test_pretrained_loss": 1.7792198657989502,
3
- "test_pretrained_runtime": 33.5108,
4
- "test_pretrained_samples_per_second": 0.119,
5
- "test_pretrained_steps_per_second": 0.06,
6
- "test_pretrained_wer": 127.90697674418605
7
  }
 
1
  {
2
+ "test_pretrained_loss": 1.724961757659912,
3
+ "test_pretrained_runtime": 275.9851,
4
+ "test_pretrained_samples_per_second": 2.75,
5
+ "test_pretrained_steps_per_second": 0.344,
6
+ "test_pretrained_wer": 261.9066587001262
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 196951080960000.0,
4
- "train_loss": 1.7391430735588074,
5
- "train_runtime": 61.5449,
6
- "train_samples_per_second": 0.13,
7
- "train_steps_per_second": 0.032
8
  }
 
1
  {
2
+ "epoch": 2.33,
3
+ "total_flos": 1.7572960198656e+17,
4
+ "train_loss": 0.8751795228038516,
5
+ "train_runtime": 2133.32,
6
+ "train_samples_per_second": 3.36,
7
+ "train_steps_per_second": 0.21
8
  }
trainer_state.json CHANGED
@@ -1,55 +1,787 @@
1
  {
2
- "best_metric": 123.40425531914893,
3
- "best_model_checkpoint": "./whisper-training-blog/checkpoint-1",
4
- "epoch": 1.0,
5
- "global_step": 2,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.5,
12
- "learning_rate": 0.0,
13
- "loss": 1.8019,
14
- "step": 1
15
  },
16
  {
17
- "epoch": 0.5,
18
- "eval_loss": 1.8532216548919678,
19
- "eval_runtime": 6.8084,
20
- "eval_samples_per_second": 0.588,
21
- "eval_steps_per_second": 0.294,
22
- "eval_wer": 123.40425531914893,
23
- "step": 1
24
  },
25
  {
26
- "epoch": 1.0,
27
- "learning_rate": 7.5e-06,
28
- "loss": 1.6763,
29
- "step": 2
30
  },
31
  {
32
- "epoch": 1.0,
33
- "eval_loss": 1.8532216548919678,
34
- "eval_runtime": 5.8615,
35
- "eval_samples_per_second": 0.682,
36
- "eval_steps_per_second": 0.341,
37
- "eval_wer": 123.40425531914893,
38
- "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 1.0,
42
- "step": 2,
43
- "total_flos": 196951080960000.0,
44
- "train_loss": 1.7391430735588074,
45
- "train_runtime": 61.5449,
46
- "train_samples_per_second": 0.13,
47
- "train_steps_per_second": 0.032
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  }
49
  ],
50
- "max_steps": 2,
51
  "num_train_epochs": 9223372036854775807,
52
- "total_flos": 196951080960000.0,
53
  "trial_name": null,
54
  "trial_params": null
55
  }
 
1
  {
2
+ "best_metric": 173.47916334025228,
3
+ "best_model_checkpoint": "./whisper-training-blog/checkpoint-396",
4
+ "epoch": 2.330357142857143,
5
+ "global_step": 448,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.6666666666666668e-07,
13
+ "loss": 1.7185,
14
+ "step": 4
15
  },
16
  {
17
+ "epoch": 0.02,
18
+ "learning_rate": 3.3333333333333335e-07,
19
+ "loss": 1.6808,
20
+ "step": 8
 
 
 
21
  },
22
  {
23
+ "epoch": 0.03,
24
+ "learning_rate": 5.555555555555555e-07,
25
+ "loss": 1.712,
26
+ "step": 12
27
  },
28
  {
29
+ "epoch": 0.04,
30
+ "learning_rate": 7.777777777777778e-07,
31
+ "loss": 1.543,
32
+ "step": 16
33
+ },
34
+ {
35
+ "epoch": 0.04,
36
+ "learning_rate": 1e-06,
37
+ "loss": 1.609,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.05,
42
+ "learning_rate": 1.2222222222222223e-06,
43
+ "loss": 1.5657,
44
+ "step": 24
45
+ },
46
+ {
47
+ "epoch": 0.06,
48
+ "learning_rate": 1.4444444444444445e-06,
49
+ "loss": 1.608,
50
+ "step": 28
51
+ },
52
+ {
53
+ "epoch": 0.07,
54
+ "learning_rate": 1.6666666666666667e-06,
55
+ "loss": 1.5075,
56
+ "step": 32
57
+ },
58
+ {
59
+ "epoch": 0.08,
60
+ "learning_rate": 1.8888888888888888e-06,
61
+ "loss": 1.5183,
62
+ "step": 36
63
+ },
64
+ {
65
+ "epoch": 0.09,
66
+ "learning_rate": 2.1111111111111114e-06,
67
+ "loss": 1.4948,
68
+ "step": 40
69
+ },
70
+ {
71
+ "epoch": 0.1,
72
+ "learning_rate": 2.3333333333333336e-06,
73
+ "loss": 1.4112,
74
+ "step": 44
75
+ },
76
+ {
77
+ "epoch": 0.1,
78
+ "eval_loss": 1.4919109344482422,
79
+ "eval_runtime": 118.4455,
80
+ "eval_samples_per_second": 2.786,
81
+ "eval_steps_per_second": 0.355,
82
+ "eval_wer": 245.29778061631805,
83
+ "step": 44
84
+ },
85
+ {
86
+ "epoch": 0.11,
87
+ "learning_rate": 2.5555555555555557e-06,
88
+ "loss": 1.3192,
89
+ "step": 48
90
+ },
91
+ {
92
+ "epoch": 0.12,
93
+ "learning_rate": 2.7777777777777775e-06,
94
+ "loss": 1.3334,
95
+ "step": 52
96
+ },
97
+ {
98
+ "epoch": 0.12,
99
+ "learning_rate": 3e-06,
100
+ "loss": 1.2895,
101
+ "step": 56
102
+ },
103
+ {
104
+ "epoch": 0.13,
105
+ "learning_rate": 3.2222222222222222e-06,
106
+ "loss": 1.2749,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.14,
111
+ "learning_rate": 3.4444444444444444e-06,
112
+ "loss": 1.3856,
113
+ "step": 64
114
+ },
115
+ {
116
+ "epoch": 0.15,
117
+ "learning_rate": 3.6666666666666666e-06,
118
+ "loss": 1.2078,
119
+ "step": 68
120
+ },
121
+ {
122
+ "epoch": 0.16,
123
+ "learning_rate": 3.888888888888889e-06,
124
+ "loss": 1.2185,
125
+ "step": 72
126
+ },
127
+ {
128
+ "epoch": 0.17,
129
+ "learning_rate": 4.111111111111111e-06,
130
+ "loss": 1.1661,
131
+ "step": 76
132
+ },
133
+ {
134
+ "epoch": 0.18,
135
+ "learning_rate": 4.333333333333333e-06,
136
+ "loss": 1.1351,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 0.19,
141
+ "learning_rate": 4.555555555555555e-06,
142
+ "loss": 1.0729,
143
+ "step": 84
144
+ },
145
+ {
146
+ "epoch": 0.2,
147
+ "learning_rate": 4.777777777777778e-06,
148
+ "loss": 1.0501,
149
+ "step": 88
150
+ },
151
+ {
152
+ "epoch": 0.2,
153
+ "eval_loss": 1.225467324256897,
154
+ "eval_runtime": 119.277,
155
+ "eval_samples_per_second": 2.767,
156
+ "eval_steps_per_second": 0.352,
157
+ "eval_wer": 219.94251955931662,
158
+ "step": 88
159
+ },
160
+ {
161
+ "epoch": 0.21,
162
+ "learning_rate": 4.9999999999999996e-06,
163
+ "loss": 1.1278,
164
+ "step": 92
165
+ },
166
+ {
167
+ "epoch": 0.21,
168
+ "learning_rate": 5.2222222222222226e-06,
169
+ "loss": 1.0563,
170
+ "step": 96
171
+ },
172
+ {
173
+ "epoch": 0.22,
174
+ "learning_rate": 5.444444444444445e-06,
175
+ "loss": 0.9929,
176
+ "step": 100
177
+ },
178
+ {
179
+ "epoch": 0.23,
180
+ "learning_rate": 5.666666666666667e-06,
181
+ "loss": 1.0879,
182
+ "step": 104
183
+ },
184
+ {
185
+ "epoch": 0.24,
186
+ "learning_rate": 5.888888888888889e-06,
187
+ "loss": 1.0417,
188
+ "step": 108
189
+ },
190
+ {
191
+ "epoch": 0.25,
192
+ "learning_rate": 6.111111111111111e-06,
193
+ "loss": 0.9337,
194
+ "step": 112
195
+ },
196
+ {
197
+ "epoch": 0.26,
198
+ "learning_rate": 6.333333333333333e-06,
199
+ "loss": 0.9688,
200
+ "step": 116
201
+ },
202
+ {
203
+ "epoch": 0.27,
204
+ "learning_rate": 6.555555555555556e-06,
205
+ "loss": 0.9559,
206
+ "step": 120
207
+ },
208
+ {
209
+ "epoch": 0.28,
210
+ "learning_rate": 6.777777777777778e-06,
211
+ "loss": 1.0192,
212
+ "step": 124
213
+ },
214
+ {
215
+ "epoch": 0.29,
216
+ "learning_rate": 7e-06,
217
+ "loss": 0.8509,
218
+ "step": 128
219
+ },
220
+ {
221
+ "epoch": 0.29,
222
+ "learning_rate": 7.222222222222222e-06,
223
+ "loss": 0.9033,
224
+ "step": 132
225
+ },
226
+ {
227
+ "epoch": 0.29,
228
+ "eval_loss": 1.120314359664917,
229
+ "eval_runtime": 111.0003,
230
+ "eval_samples_per_second": 2.973,
231
+ "eval_steps_per_second": 0.378,
232
+ "eval_wer": 205.7799776464953,
233
+ "step": 132
234
+ },
235
+ {
236
+ "epoch": 0.3,
237
+ "learning_rate": 7.444444444444444e-06,
238
+ "loss": 0.9171,
239
+ "step": 136
240
+ },
241
+ {
242
+ "epoch": 0.31,
243
+ "learning_rate": 7.428115015974441e-06,
244
+ "loss": 0.9445,
245
+ "step": 140
246
+ },
247
+ {
248
+ "epoch": 0.32,
249
+ "learning_rate": 7.332268370607029e-06,
250
+ "loss": 0.8926,
251
+ "step": 144
252
+ },
253
+ {
254
+ "epoch": 0.33,
255
+ "learning_rate": 7.236421725239617e-06,
256
+ "loss": 0.9975,
257
+ "step": 148
258
  },
259
  {
260
  "epoch": 1.0,
261
+ "learning_rate": 7.140575079872205e-06,
262
+ "loss": 1.239,
263
+ "step": 152
264
+ },
265
+ {
266
+ "epoch": 1.01,
267
+ "learning_rate": 7.044728434504793e-06,
268
+ "loss": 0.9148,
269
+ "step": 156
270
+ },
271
+ {
272
+ "epoch": 1.02,
273
+ "learning_rate": 6.948881789137381e-06,
274
+ "loss": 1.0117,
275
+ "step": 160
276
+ },
277
+ {
278
+ "epoch": 1.03,
279
+ "learning_rate": 6.853035143769968e-06,
280
+ "loss": 0.9081,
281
+ "step": 164
282
+ },
283
+ {
284
+ "epoch": 1.04,
285
+ "learning_rate": 6.7571884984025565e-06,
286
+ "loss": 0.8434,
287
+ "step": 168
288
+ },
289
+ {
290
+ "epoch": 1.05,
291
+ "learning_rate": 6.6613418530351436e-06,
292
+ "loss": 0.8672,
293
+ "step": 172
294
+ },
295
+ {
296
+ "epoch": 1.06,
297
+ "learning_rate": 6.565495207667732e-06,
298
+ "loss": 0.8142,
299
+ "step": 176
300
+ },
301
+ {
302
+ "epoch": 1.06,
303
+ "eval_loss": 1.0674585103988647,
304
+ "eval_runtime": 102.4925,
305
+ "eval_samples_per_second": 3.22,
306
+ "eval_steps_per_second": 0.41,
307
+ "eval_wer": 192.87881207089254,
308
+ "step": 176
309
+ },
310
+ {
311
+ "epoch": 1.07,
312
+ "learning_rate": 6.469648562300319e-06,
313
+ "loss": 0.8329,
314
+ "step": 180
315
+ },
316
+ {
317
+ "epoch": 1.08,
318
+ "learning_rate": 6.373801916932908e-06,
319
+ "loss": 0.8191,
320
+ "step": 184
321
+ },
322
+ {
323
+ "epoch": 1.08,
324
+ "learning_rate": 6.277955271565495e-06,
325
+ "loss": 0.8659,
326
+ "step": 188
327
+ },
328
+ {
329
+ "epoch": 1.09,
330
+ "learning_rate": 6.182108626198084e-06,
331
+ "loss": 0.821,
332
+ "step": 192
333
+ },
334
+ {
335
+ "epoch": 1.1,
336
+ "learning_rate": 6.086261980830671e-06,
337
+ "loss": 0.8518,
338
+ "step": 196
339
+ },
340
+ {
341
+ "epoch": 1.11,
342
+ "learning_rate": 5.990415335463259e-06,
343
+ "loss": 0.7813,
344
+ "step": 200
345
+ },
346
+ {
347
+ "epoch": 1.12,
348
+ "learning_rate": 5.894568690095847e-06,
349
+ "loss": 0.7541,
350
+ "step": 204
351
+ },
352
+ {
353
+ "epoch": 1.13,
354
+ "learning_rate": 5.798722044728435e-06,
355
+ "loss": 0.7938,
356
+ "step": 208
357
+ },
358
+ {
359
+ "epoch": 1.14,
360
+ "learning_rate": 5.702875399361023e-06,
361
+ "loss": 0.8182,
362
+ "step": 212
363
+ },
364
+ {
365
+ "epoch": 1.15,
366
+ "learning_rate": 5.607028753993611e-06,
367
+ "loss": 0.7461,
368
+ "step": 216
369
+ },
370
+ {
371
+ "epoch": 1.16,
372
+ "learning_rate": 5.5111821086261985e-06,
373
+ "loss": 0.8029,
374
+ "step": 220
375
+ },
376
+ {
377
+ "epoch": 1.16,
378
+ "eval_loss": 1.0393497943878174,
379
+ "eval_runtime": 101.6759,
380
+ "eval_samples_per_second": 3.246,
381
+ "eval_steps_per_second": 0.413,
382
+ "eval_wer": 178.42886795465432,
383
+ "step": 220
384
+ },
385
+ {
386
+ "epoch": 1.17,
387
+ "learning_rate": 5.415335463258786e-06,
388
+ "loss": 0.754,
389
+ "step": 224
390
+ },
391
+ {
392
+ "epoch": 1.17,
393
+ "learning_rate": 5.319488817891374e-06,
394
+ "loss": 0.7662,
395
+ "step": 228
396
+ },
397
+ {
398
+ "epoch": 1.18,
399
+ "learning_rate": 5.223642172523962e-06,
400
+ "loss": 0.7253,
401
+ "step": 232
402
+ },
403
+ {
404
+ "epoch": 1.19,
405
+ "learning_rate": 5.127795527156549e-06,
406
+ "loss": 0.6944,
407
+ "step": 236
408
+ },
409
+ {
410
+ "epoch": 1.2,
411
+ "learning_rate": 5.031948881789138e-06,
412
+ "loss": 0.7281,
413
+ "step": 240
414
+ },
415
+ {
416
+ "epoch": 1.21,
417
+ "learning_rate": 4.936102236421725e-06,
418
+ "loss": 0.7841,
419
+ "step": 244
420
+ },
421
+ {
422
+ "epoch": 1.22,
423
+ "learning_rate": 4.840255591054313e-06,
424
+ "loss": 0.7154,
425
+ "step": 248
426
+ },
427
+ {
428
+ "epoch": 1.23,
429
+ "learning_rate": 4.744408945686901e-06,
430
+ "loss": 0.7077,
431
+ "step": 252
432
+ },
433
+ {
434
+ "epoch": 1.24,
435
+ "learning_rate": 4.648562300319489e-06,
436
+ "loss": 0.7812,
437
+ "step": 256
438
+ },
439
+ {
440
+ "epoch": 1.25,
441
+ "learning_rate": 4.552715654952077e-06,
442
+ "loss": 0.6817,
443
+ "step": 260
444
+ },
445
+ {
446
+ "epoch": 1.25,
447
+ "learning_rate": 4.456869009584665e-06,
448
+ "loss": 0.6324,
449
+ "step": 264
450
+ },
451
+ {
452
+ "epoch": 1.25,
453
+ "eval_loss": 1.0302220582962036,
454
+ "eval_runtime": 114.7389,
455
+ "eval_samples_per_second": 2.876,
456
+ "eval_steps_per_second": 0.366,
457
+ "eval_wer": 216.6054606418649,
458
+ "step": 264
459
+ },
460
+ {
461
+ "epoch": 1.26,
462
+ "learning_rate": 4.361022364217253e-06,
463
+ "loss": 0.6826,
464
+ "step": 268
465
+ },
466
+ {
467
+ "epoch": 1.27,
468
+ "learning_rate": 4.26517571884984e-06,
469
+ "loss": 0.7187,
470
+ "step": 272
471
+ },
472
+ {
473
+ "epoch": 1.28,
474
+ "learning_rate": 4.169329073482428e-06,
475
+ "loss": 0.6699,
476
+ "step": 276
477
+ },
478
+ {
479
+ "epoch": 1.29,
480
+ "learning_rate": 4.0734824281150155e-06,
481
+ "loss": 0.5677,
482
+ "step": 280
483
+ },
484
+ {
485
+ "epoch": 1.3,
486
+ "learning_rate": 3.977635782747604e-06,
487
+ "loss": 0.6833,
488
+ "step": 284
489
+ },
490
+ {
491
+ "epoch": 1.31,
492
+ "learning_rate": 3.881789137380191e-06,
493
+ "loss": 0.6293,
494
+ "step": 288
495
+ },
496
+ {
497
+ "epoch": 1.32,
498
+ "learning_rate": 3.7859424920127796e-06,
499
+ "loss": 0.672,
500
+ "step": 292
501
+ },
502
+ {
503
+ "epoch": 1.33,
504
+ "learning_rate": 3.6900958466453675e-06,
505
+ "loss": 0.7121,
506
+ "step": 296
507
+ },
508
+ {
509
+ "epoch": 1.33,
510
+ "learning_rate": 3.5942492012779555e-06,
511
+ "loss": 0.8766,
512
+ "step": 300
513
+ },
514
+ {
515
+ "epoch": 2.01,
516
+ "learning_rate": 3.4984025559105434e-06,
517
+ "loss": 0.6726,
518
+ "step": 304
519
+ },
520
+ {
521
+ "epoch": 2.02,
522
+ "learning_rate": 3.4025559105431313e-06,
523
+ "loss": 0.6971,
524
+ "step": 308
525
+ },
526
+ {
527
+ "epoch": 2.02,
528
+ "eval_loss": 1.0134836435317993,
529
+ "eval_runtime": 101.0358,
530
+ "eval_samples_per_second": 3.266,
531
+ "eval_steps_per_second": 0.416,
532
+ "eval_wer": 179.3709085102986,
533
+ "step": 308
534
+ },
535
+ {
536
+ "epoch": 2.03,
537
+ "learning_rate": 3.306709265175719e-06,
538
+ "loss": 0.7432,
539
+ "step": 312
540
+ },
541
+ {
542
+ "epoch": 2.04,
543
+ "learning_rate": 3.2108626198083067e-06,
544
+ "loss": 0.6264,
545
+ "step": 316
546
+ },
547
+ {
548
+ "epoch": 2.04,
549
+ "learning_rate": 3.1150159744408946e-06,
550
+ "loss": 0.6604,
551
+ "step": 320
552
+ },
553
+ {
554
+ "epoch": 2.05,
555
+ "learning_rate": 3.0191693290734825e-06,
556
+ "loss": 0.6409,
557
+ "step": 324
558
+ },
559
+ {
560
+ "epoch": 2.06,
561
+ "learning_rate": 2.9233226837060704e-06,
562
+ "loss": 0.6539,
563
+ "step": 328
564
+ },
565
+ {
566
+ "epoch": 2.07,
567
+ "learning_rate": 2.8274760383386583e-06,
568
+ "loss": 0.611,
569
+ "step": 332
570
+ },
571
+ {
572
+ "epoch": 2.08,
573
+ "learning_rate": 2.7316293929712462e-06,
574
+ "loss": 0.6828,
575
+ "step": 336
576
+ },
577
+ {
578
+ "epoch": 2.09,
579
+ "learning_rate": 2.635782747603834e-06,
580
+ "loss": 0.6499,
581
+ "step": 340
582
+ },
583
+ {
584
+ "epoch": 2.1,
585
+ "learning_rate": 2.539936102236422e-06,
586
+ "loss": 0.6642,
587
+ "step": 344
588
+ },
589
+ {
590
+ "epoch": 2.11,
591
+ "learning_rate": 2.44408945686901e-06,
592
+ "loss": 0.6147,
593
+ "step": 348
594
+ },
595
+ {
596
+ "epoch": 2.12,
597
+ "learning_rate": 2.3482428115015974e-06,
598
+ "loss": 0.6051,
599
+ "step": 352
600
+ },
601
+ {
602
+ "epoch": 2.12,
603
+ "eval_loss": 1.0064767599105835,
604
+ "eval_runtime": 107.523,
605
+ "eval_samples_per_second": 3.069,
606
+ "eval_steps_per_second": 0.391,
607
+ "eval_wer": 194.63515886955133,
608
+ "step": 352
609
+ },
610
+ {
611
+ "epoch": 2.12,
612
+ "learning_rate": 2.2523961661341854e-06,
613
+ "loss": 0.6237,
614
+ "step": 356
615
+ },
616
+ {
617
+ "epoch": 2.13,
618
+ "learning_rate": 2.1565495207667733e-06,
619
+ "loss": 0.5992,
620
+ "step": 360
621
+ },
622
+ {
623
+ "epoch": 2.14,
624
+ "learning_rate": 2.060702875399361e-06,
625
+ "loss": 0.6777,
626
+ "step": 364
627
+ },
628
+ {
629
+ "epoch": 2.15,
630
+ "learning_rate": 1.964856230031949e-06,
631
+ "loss": 0.6012,
632
+ "step": 368
633
+ },
634
+ {
635
+ "epoch": 2.16,
636
+ "learning_rate": 1.8690095846645368e-06,
637
+ "loss": 0.622,
638
+ "step": 372
639
+ },
640
+ {
641
+ "epoch": 2.17,
642
+ "learning_rate": 1.7731629392971245e-06,
643
+ "loss": 0.6288,
644
+ "step": 376
645
+ },
646
+ {
647
+ "epoch": 2.18,
648
+ "learning_rate": 1.6773162939297124e-06,
649
+ "loss": 0.6079,
650
+ "step": 380
651
+ },
652
+ {
653
+ "epoch": 2.19,
654
+ "learning_rate": 1.5814696485623003e-06,
655
+ "loss": 0.5887,
656
+ "step": 384
657
+ },
658
+ {
659
+ "epoch": 2.2,
660
+ "learning_rate": 1.4856230031948882e-06,
661
+ "loss": 0.5932,
662
+ "step": 388
663
+ },
664
+ {
665
+ "epoch": 2.21,
666
+ "learning_rate": 1.389776357827476e-06,
667
+ "loss": 0.6373,
668
+ "step": 392
669
+ },
670
+ {
671
+ "epoch": 2.21,
672
+ "learning_rate": 1.2939297124600638e-06,
673
+ "loss": 0.6048,
674
+ "step": 396
675
+ },
676
+ {
677
+ "epoch": 2.21,
678
+ "eval_loss": 1.002966284751892,
679
+ "eval_runtime": 97.8102,
680
+ "eval_samples_per_second": 3.374,
681
+ "eval_steps_per_second": 0.429,
682
+ "eval_wer": 173.47916334025228,
683
+ "step": 396
684
+ },
685
+ {
686
+ "epoch": 2.22,
687
+ "learning_rate": 1.1980830670926517e-06,
688
+ "loss": 0.5774,
689
+ "step": 400
690
+ },
691
+ {
692
+ "epoch": 2.23,
693
+ "learning_rate": 1.1022364217252397e-06,
694
+ "loss": 0.6325,
695
+ "step": 404
696
+ },
697
+ {
698
+ "epoch": 2.24,
699
+ "learning_rate": 1.0063897763578274e-06,
700
+ "loss": 0.601,
701
+ "step": 408
702
+ },
703
+ {
704
+ "epoch": 2.25,
705
+ "learning_rate": 9.105431309904153e-07,
706
+ "loss": 0.5618,
707
+ "step": 412
708
+ },
709
+ {
710
+ "epoch": 2.26,
711
+ "learning_rate": 8.146964856230032e-07,
712
+ "loss": 0.5729,
713
+ "step": 416
714
+ },
715
+ {
716
+ "epoch": 2.27,
717
+ "learning_rate": 7.188498402555911e-07,
718
+ "loss": 0.5755,
719
+ "step": 420
720
+ },
721
+ {
722
+ "epoch": 2.28,
723
+ "learning_rate": 6.230031948881789e-07,
724
+ "loss": 0.6207,
725
+ "step": 424
726
+ },
727
+ {
728
+ "epoch": 2.29,
729
+ "learning_rate": 5.271565495207668e-07,
730
+ "loss": 0.5041,
731
+ "step": 428
732
+ },
733
+ {
734
+ "epoch": 2.29,
735
+ "learning_rate": 4.313099041533546e-07,
736
+ "loss": 0.5544,
737
+ "step": 432
738
+ },
739
+ {
740
+ "epoch": 2.3,
741
+ "learning_rate": 3.3546325878594247e-07,
742
+ "loss": 0.5608,
743
+ "step": 436
744
+ },
745
+ {
746
+ "epoch": 2.31,
747
+ "learning_rate": 2.3961661341853033e-07,
748
+ "loss": 0.585,
749
+ "step": 440
750
+ },
751
+ {
752
+ "epoch": 2.31,
753
+ "eval_loss": 1.0049320459365845,
754
+ "eval_runtime": 105.5439,
755
+ "eval_samples_per_second": 3.127,
756
+ "eval_steps_per_second": 0.398,
757
+ "eval_wer": 186.6677311192719,
758
+ "step": 440
759
+ },
760
+ {
761
+ "epoch": 2.32,
762
+ "learning_rate": 1.437699680511182e-07,
763
+ "loss": 0.5486,
764
+ "step": 444
765
+ },
766
+ {
767
+ "epoch": 2.33,
768
+ "learning_rate": 4.792332268370607e-08,
769
+ "loss": 0.6387,
770
+ "step": 448
771
+ },
772
+ {
773
+ "epoch": 2.33,
774
+ "step": 448,
775
+ "total_flos": 1.7572960198656e+17,
776
+ "train_loss": 0.8751795228038516,
777
+ "train_runtime": 2133.32,
778
+ "train_samples_per_second": 3.36,
779
+ "train_steps_per_second": 0.21
780
  }
781
  ],
782
+ "max_steps": 448,
783
  "num_train_epochs": 9223372036854775807,
784
+ "total_flos": 1.7572960198656e+17,
785
  "trial_name": null,
786
  "trial_params": null
787
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f657957109096465f8089361114a5bcf715d17db0a314d2a9784c7deda67c16
3
  size 3707
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098eb422614f2b69cada5c370a7aafa45f3e0897c25ca997abbd0fbb3b31385a
3
  size 3707