marinone94 commited on
Commit
8a2f935
1 Parent(s): ded539e

End of training

Browse files
all_results.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
- "epoch": 1.0,
3
  "eval_loss": 1.6191972494125366,
4
- "eval_pretrained_loss": 1.6191972494125366,
5
- "eval_pretrained_runtime": 59.7459,
6
- "eval_pretrained_samples_per_second": 0.067,
7
- "eval_pretrained_steps_per_second": 0.033,
8
- "eval_pretrained_wer": 153.2258064516129,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
- "test_finetuned_loss": 1.7568330764770508,
14
- "test_finetuned_runtime": 39.6579,
15
- "test_finetuned_samples_per_second": 0.101,
16
- "test_finetuned_steps_per_second": 0.05,
17
- "test_finetuned_wer": 138.5964912280702,
18
  "test_loss": 1.7568330764770508,
19
- "test_pretrained_loss": 1.7568330764770508,
20
- "test_pretrained_runtime": 42.5376,
21
- "test_pretrained_samples_per_second": 0.094,
22
- "test_pretrained_steps_per_second": 0.047,
23
- "test_pretrained_wer": 138.5964912280702,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
- "train_loss": 1.4339025020599365,
29
- "train_runtime": 102.2429,
30
- "train_samples_per_second": 0.078,
31
- "train_steps_per_second": 0.02
32
  }
 
1
  {
2
+ "epoch": 2.32,
3
  "eval_loss": 1.6191972494125366,
4
+ "eval_pretrained_loss": 1.710707187652588,
5
+ "eval_pretrained_runtime": 113.3653,
6
+ "eval_pretrained_samples_per_second": 2.911,
7
+ "eval_pretrained_steps_per_second": 0.097,
8
+ "eval_pretrained_wer": 258.2308797700783,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
+ "test_finetuned_loss": 1.0526511669158936,
14
+ "test_finetuned_runtime": 167.4865,
15
+ "test_finetuned_samples_per_second": 4.532,
16
+ "test_finetuned_steps_per_second": 0.143,
17
+ "test_finetuned_wer": 176.51198300471353,
18
  "test_loss": 1.7568330764770508,
19
+ "test_pretrained_loss": 1.723103642463684,
20
+ "test_pretrained_runtime": 171.9859,
21
+ "test_pretrained_samples_per_second": 4.413,
22
+ "test_pretrained_steps_per_second": 0.14,
23
+ "test_pretrained_wer": 261.9265750514506,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
+ "train_loss": 0.9721650715385165,
29
+ "train_runtime": 1491.7987,
30
+ "train_samples_per_second": 4.805,
31
+ "train_steps_per_second": 0.075
32
  }
eval_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "eval_pretrained_loss": 1.6191972494125366,
3
- "eval_pretrained_runtime": 59.7459,
4
- "eval_pretrained_samples_per_second": 0.067,
5
- "eval_pretrained_steps_per_second": 0.033,
6
- "eval_pretrained_wer": 153.2258064516129
7
  }
 
1
  {
2
+ "eval_pretrained_loss": 1.710707187652588,
3
+ "eval_pretrained_runtime": 113.3653,
4
+ "eval_pretrained_samples_per_second": 2.911,
5
+ "eval_pretrained_steps_per_second": 0.097,
6
+ "eval_pretrained_wer": 258.2308797700783
7
  }
huggingface_training.py CHANGED
@@ -35,8 +35,8 @@ dataset = load_dataset(dataset_id, dataset_language_code, streaming=True)
35
 
36
  """The first time you run this code, make sure everything works fine using a small sample and low number of training steps. Just uncomment the next cell and run it. One note: since the dataset is loaded in streaming mode, the instruction will not be executed immediately. Instead, the dataset will be subsampled only when data will be needed during training."""
37
 
38
- test_script = True
39
- # test_script = False
40
 
41
  ## Sample dataset for testing
42
  if test_script is True:
 
35
 
36
  """The first time you run this code, make sure everything works fine using a small sample and low number of training steps. Just uncomment the next cell and run it. One note: since the dataset is loaded in streaming mode, the instruction will not be executed immediately. Instead, the dataset will be subsampled only when data will be needed during training."""
37
 
38
+ # test_script = True
39
+ test_script = False
40
 
41
  ## Sample dataset for testing
42
  if test_script is True:
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3843686519777a4550909e8bd4961dcf7425e7183295f03d09a433a271f0887
3
  size 151098921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74db4ae6de9cc4fe7ef67b639a61333e374adb566ec56ff72f1129f43c25bf4
3
  size 151098921
test_finetuned_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "test_finetuned_loss": 1.7568330764770508,
4
- "test_finetuned_runtime": 39.6579,
5
- "test_finetuned_samples_per_second": 0.101,
6
- "test_finetuned_steps_per_second": 0.05,
7
- "test_finetuned_wer": 138.5964912280702
8
  }
 
1
  {
2
+ "epoch": 2.32,
3
+ "test_finetuned_loss": 1.0526511669158936,
4
+ "test_finetuned_runtime": 167.4865,
5
+ "test_finetuned_samples_per_second": 4.532,
6
+ "test_finetuned_steps_per_second": 0.143,
7
+ "test_finetuned_wer": 176.51198300471353
8
  }
test_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "test_pretrained_loss": 1.7568330764770508,
3
- "test_pretrained_runtime": 42.5376,
4
- "test_pretrained_samples_per_second": 0.094,
5
- "test_pretrained_steps_per_second": 0.047,
6
- "test_pretrained_wer": 138.5964912280702
7
  }
 
1
  {
2
+ "test_pretrained_loss": 1.723103642463684,
3
+ "test_pretrained_runtime": 171.9859,
4
+ "test_pretrained_samples_per_second": 4.413,
5
+ "test_pretrained_steps_per_second": 0.14,
6
+ "test_pretrained_wer": 261.9265750514506
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 1.4339025020599365,
4
- "train_runtime": 102.2429,
5
- "train_samples_per_second": 0.078,
6
- "train_steps_per_second": 0.02
7
  }
 
1
  {
2
+ "epoch": 2.32,
3
+ "train_loss": 0.9721650715385165,
4
+ "train_runtime": 1491.7987,
5
+ "train_samples_per_second": 4.805,
6
+ "train_steps_per_second": 0.075
7
  }
trainer_state.json CHANGED
@@ -1,55 +1,787 @@
1
  {
2
- "best_metric": 153.2258064516129,
3
- "best_model_checkpoint": "./checkpoint-1",
4
- "epoch": 1.0,
5
- "global_step": 2,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.5,
12
  "learning_rate": 0.0,
13
- "loss": 1.565,
14
  "step": 1
15
  },
16
  {
17
- "epoch": 0.5,
18
- "eval_loss": 1.6191972494125366,
19
- "eval_runtime": 38.6993,
20
- "eval_samples_per_second": 0.103,
21
- "eval_steps_per_second": 0.052,
22
- "eval_wer": 153.2258064516129,
23
- "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
  {
26
- "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "learning_rate": 1e-05,
28
- "loss": 1.3028,
29
- "step": 2
30
  },
31
  {
32
- "epoch": 1.0,
33
- "eval_loss": 1.6191972494125366,
34
- "eval_runtime": 39.2638,
35
- "eval_samples_per_second": 0.102,
36
- "eval_steps_per_second": 0.051,
37
- "eval_wer": 153.2258064516129,
38
- "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  {
41
- "epoch": 1.0,
42
- "step": 2,
43
- "total_flos": 196951080960000.0,
44
- "train_loss": 1.4339025020599365,
45
- "train_runtime": 102.2429,
46
- "train_samples_per_second": 0.078,
47
- "train_steps_per_second": 0.02
48
  }
49
  ],
50
- "max_steps": 2,
51
  "num_train_epochs": 9223372036854775807,
52
- "total_flos": 196951080960000.0,
53
  "trial_name": null,
54
  "trial_params": null
55
  }
 
1
  {
2
+ "best_metric": 158.1031454574485,
3
+ "best_model_checkpoint": "./checkpoint-88",
4
+ "epoch": 2.3214285714285716,
5
+ "global_step": 112,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
  "learning_rate": 0.0,
13
+ "loss": 1.6569,
14
  "step": 1
15
  },
16
  {
17
+ "epoch": 0.02,
18
+ "learning_rate": 4.347826086956522e-07,
19
+ "loss": 1.6564,
20
+ "step": 2
21
+ },
22
+ {
23
+ "epoch": 0.03,
24
+ "learning_rate": 8.695652173913044e-07,
25
+ "loss": 1.6518,
26
+ "step": 3
27
+ },
28
+ {
29
+ "epoch": 0.04,
30
+ "learning_rate": 1.3043478260869566e-06,
31
+ "loss": 1.6233,
32
+ "step": 4
33
+ },
34
+ {
35
+ "epoch": 0.04,
36
+ "learning_rate": 1.7391304347826088e-06,
37
+ "loss": 1.712,
38
+ "step": 5
39
+ },
40
+ {
41
+ "epoch": 0.05,
42
+ "learning_rate": 2.173913043478261e-06,
43
+ "loss": 1.6476,
44
+ "step": 6
45
+ },
46
+ {
47
+ "epoch": 0.06,
48
+ "learning_rate": 2.6086956521739132e-06,
49
+ "loss": 1.5716,
50
+ "step": 7
51
+ },
52
+ {
53
+ "epoch": 0.07,
54
+ "learning_rate": 3.043478260869566e-06,
55
+ "loss": 1.5909,
56
+ "step": 8
57
+ },
58
+ {
59
+ "epoch": 0.08,
60
+ "learning_rate": 3.4782608695652175e-06,
61
+ "loss": 1.5905,
62
+ "step": 9
63
+ },
64
+ {
65
+ "epoch": 0.09,
66
+ "learning_rate": 3.91304347826087e-06,
67
+ "loss": 1.5486,
68
+ "step": 10
69
+ },
70
+ {
71
+ "epoch": 0.1,
72
+ "learning_rate": 4.347826086956522e-06,
73
+ "loss": 1.5299,
74
+ "step": 11
75
+ },
76
+ {
77
+ "epoch": 0.1,
78
+ "eval_loss": 1.5621598958969116,
79
+ "eval_runtime": 105.0814,
80
+ "eval_samples_per_second": 3.14,
81
+ "eval_steps_per_second": 0.105,
82
+ "eval_wer": 219.67108414497844,
83
+ "step": 11
84
+ },
85
+ {
86
+ "epoch": 0.11,
87
+ "learning_rate": 4.782608695652174e-06,
88
+ "loss": 1.4537,
89
+ "step": 12
90
+ },
91
+ {
92
+ "epoch": 0.12,
93
+ "learning_rate": 5.2173913043478265e-06,
94
+ "loss": 1.3729,
95
+ "step": 13
96
+ },
97
+ {
98
+ "epoch": 0.12,
99
+ "learning_rate": 5.652173913043479e-06,
100
+ "loss": 1.4921,
101
+ "step": 14
102
+ },
103
+ {
104
+ "epoch": 0.13,
105
+ "learning_rate": 6.086956521739132e-06,
106
+ "loss": 1.5069,
107
+ "step": 15
108
  },
109
  {
110
+ "epoch": 0.14,
111
+ "learning_rate": 6.521739130434783e-06,
112
+ "loss": 1.388,
113
+ "step": 16
114
+ },
115
+ {
116
+ "epoch": 0.15,
117
+ "learning_rate": 6.956521739130435e-06,
118
+ "loss": 1.3857,
119
+ "step": 17
120
+ },
121
+ {
122
+ "epoch": 0.16,
123
+ "learning_rate": 7.391304347826087e-06,
124
+ "loss": 1.3389,
125
+ "step": 18
126
+ },
127
+ {
128
+ "epoch": 0.17,
129
+ "learning_rate": 7.82608695652174e-06,
130
+ "loss": 1.3089,
131
+ "step": 19
132
+ },
133
+ {
134
+ "epoch": 0.18,
135
+ "learning_rate": 8.260869565217392e-06,
136
+ "loss": 1.2118,
137
+ "step": 20
138
+ },
139
+ {
140
+ "epoch": 0.19,
141
+ "learning_rate": 8.695652173913044e-06,
142
+ "loss": 1.1634,
143
+ "step": 21
144
+ },
145
+ {
146
+ "epoch": 0.2,
147
+ "learning_rate": 9.130434782608697e-06,
148
+ "loss": 1.1908,
149
+ "step": 22
150
+ },
151
+ {
152
+ "epoch": 0.2,
153
+ "eval_loss": 1.3651723861694336,
154
+ "eval_runtime": 110.8115,
155
+ "eval_samples_per_second": 2.978,
156
+ "eval_steps_per_second": 0.099,
157
+ "eval_wer": 192.2401405077439,
158
+ "step": 22
159
+ },
160
+ {
161
+ "epoch": 0.21,
162
+ "learning_rate": 9.565217391304349e-06,
163
+ "loss": 1.1192,
164
+ "step": 23
165
+ },
166
+ {
167
+ "epoch": 0.21,
168
  "learning_rate": 1e-05,
169
+ "loss": 1.2042,
170
+ "step": 24
171
  },
172
  {
173
+ "epoch": 0.22,
174
+ "learning_rate": 9.887640449438202e-06,
175
+ "loss": 1.1448,
176
+ "step": 25
177
+ },
178
+ {
179
+ "epoch": 0.23,
180
+ "learning_rate": 9.775280898876405e-06,
181
+ "loss": 1.1393,
182
+ "step": 26
183
+ },
184
+ {
185
+ "epoch": 0.24,
186
+ "learning_rate": 9.662921348314608e-06,
187
+ "loss": 1.1482,
188
+ "step": 27
189
+ },
190
+ {
191
+ "epoch": 0.25,
192
+ "learning_rate": 9.55056179775281e-06,
193
+ "loss": 1.179,
194
+ "step": 28
195
+ },
196
+ {
197
+ "epoch": 0.26,
198
+ "learning_rate": 9.438202247191012e-06,
199
+ "loss": 0.9847,
200
+ "step": 29
201
+ },
202
+ {
203
+ "epoch": 0.27,
204
+ "learning_rate": 9.325842696629213e-06,
205
+ "loss": 1.1149,
206
+ "step": 30
207
+ },
208
+ {
209
+ "epoch": 0.28,
210
+ "learning_rate": 9.213483146067417e-06,
211
+ "loss": 1.0657,
212
+ "step": 31
213
+ },
214
+ {
215
+ "epoch": 0.29,
216
+ "learning_rate": 9.101123595505619e-06,
217
+ "loss": 1.0028,
218
+ "step": 32
219
+ },
220
+ {
221
+ "epoch": 0.29,
222
+ "learning_rate": 8.988764044943822e-06,
223
+ "loss": 1.1161,
224
+ "step": 33
225
+ },
226
+ {
227
+ "epoch": 0.29,
228
+ "eval_loss": 1.1921106576919556,
229
+ "eval_runtime": 103.0628,
230
+ "eval_samples_per_second": 3.202,
231
+ "eval_steps_per_second": 0.107,
232
+ "eval_wer": 200.23950183618075,
233
+ "step": 33
234
+ },
235
+ {
236
+ "epoch": 0.3,
237
+ "learning_rate": 8.876404494382023e-06,
238
+ "loss": 1.0931,
239
+ "step": 34
240
+ },
241
+ {
242
+ "epoch": 0.31,
243
+ "learning_rate": 8.764044943820226e-06,
244
+ "loss": 1.0635,
245
+ "step": 35
246
+ },
247
+ {
248
+ "epoch": 0.32,
249
+ "learning_rate": 8.651685393258428e-06,
250
+ "loss": 1.0671,
251
+ "step": 36
252
+ },
253
+ {
254
+ "epoch": 0.33,
255
+ "learning_rate": 8.53932584269663e-06,
256
+ "loss": 1.0251,
257
+ "step": 37
258
+ },
259
+ {
260
+ "epoch": 0.34,
261
+ "learning_rate": 8.426966292134832e-06,
262
+ "loss": 1.0486,
263
+ "step": 38
264
+ },
265
+ {
266
+ "epoch": 1.01,
267
+ "learning_rate": 8.314606741573035e-06,
268
+ "loss": 0.9926,
269
+ "step": 39
270
+ },
271
+ {
272
+ "epoch": 1.02,
273
+ "learning_rate": 8.202247191011237e-06,
274
+ "loss": 1.0232,
275
+ "step": 40
276
+ },
277
+ {
278
+ "epoch": 1.03,
279
+ "learning_rate": 8.08988764044944e-06,
280
+ "loss": 0.9421,
281
+ "step": 41
282
+ },
283
+ {
284
+ "epoch": 1.04,
285
+ "learning_rate": 7.97752808988764e-06,
286
+ "loss": 0.9541,
287
+ "step": 42
288
+ },
289
+ {
290
+ "epoch": 1.04,
291
+ "learning_rate": 7.865168539325843e-06,
292
+ "loss": 0.9639,
293
+ "step": 43
294
+ },
295
+ {
296
+ "epoch": 1.05,
297
+ "learning_rate": 7.752808988764046e-06,
298
+ "loss": 0.9216,
299
+ "step": 44
300
+ },
301
+ {
302
+ "epoch": 1.05,
303
+ "eval_loss": 1.1263455152511597,
304
+ "eval_runtime": 93.6663,
305
+ "eval_samples_per_second": 3.523,
306
+ "eval_steps_per_second": 0.117,
307
+ "eval_wer": 186.52403001756346,
308
+ "step": 44
309
+ },
310
+ {
311
+ "epoch": 1.06,
312
+ "learning_rate": 7.640449438202247e-06,
313
+ "loss": 0.9097,
314
+ "step": 45
315
+ },
316
+ {
317
+ "epoch": 1.07,
318
+ "learning_rate": 7.5280898876404495e-06,
319
+ "loss": 0.8688,
320
+ "step": 46
321
+ },
322
+ {
323
+ "epoch": 1.08,
324
+ "learning_rate": 7.415730337078652e-06,
325
+ "loss": 0.9019,
326
+ "step": 47
327
+ },
328
+ {
329
+ "epoch": 1.09,
330
+ "learning_rate": 7.303370786516854e-06,
331
+ "loss": 0.9135,
332
+ "step": 48
333
+ },
334
+ {
335
+ "epoch": 1.1,
336
+ "learning_rate": 7.191011235955056e-06,
337
+ "loss": 0.9033,
338
+ "step": 49
339
+ },
340
+ {
341
+ "epoch": 1.11,
342
+ "learning_rate": 7.078651685393258e-06,
343
+ "loss": 0.8575,
344
+ "step": 50
345
+ },
346
+ {
347
+ "epoch": 1.12,
348
+ "learning_rate": 6.966292134831461e-06,
349
+ "loss": 0.8276,
350
+ "step": 51
351
+ },
352
+ {
353
+ "epoch": 1.12,
354
+ "learning_rate": 6.853932584269663e-06,
355
+ "loss": 0.9276,
356
+ "step": 52
357
+ },
358
+ {
359
+ "epoch": 1.13,
360
+ "learning_rate": 6.741573033707865e-06,
361
+ "loss": 0.9186,
362
+ "step": 53
363
+ },
364
+ {
365
+ "epoch": 1.14,
366
+ "learning_rate": 6.629213483146067e-06,
367
+ "loss": 0.8693,
368
+ "step": 54
369
+ },
370
+ {
371
+ "epoch": 1.15,
372
+ "learning_rate": 6.51685393258427e-06,
373
+ "loss": 0.8441,
374
+ "step": 55
375
+ },
376
+ {
377
+ "epoch": 1.15,
378
+ "eval_loss": 1.0945535898208618,
379
+ "eval_runtime": 102.0356,
380
+ "eval_samples_per_second": 3.234,
381
+ "eval_steps_per_second": 0.108,
382
+ "eval_wer": 179.32300814306242,
383
+ "step": 55
384
+ },
385
+ {
386
+ "epoch": 1.16,
387
+ "learning_rate": 6.404494382022472e-06,
388
+ "loss": 0.8189,
389
+ "step": 56
390
+ },
391
+ {
392
+ "epoch": 1.17,
393
+ "learning_rate": 6.292134831460674e-06,
394
+ "loss": 0.8047,
395
+ "step": 57
396
+ },
397
+ {
398
+ "epoch": 1.18,
399
+ "learning_rate": 6.179775280898876e-06,
400
+ "loss": 0.7858,
401
+ "step": 58
402
+ },
403
+ {
404
+ "epoch": 1.19,
405
+ "learning_rate": 6.06741573033708e-06,
406
+ "loss": 0.7312,
407
+ "step": 59
408
+ },
409
+ {
410
+ "epoch": 1.2,
411
+ "learning_rate": 5.955056179775281e-06,
412
+ "loss": 0.7638,
413
+ "step": 60
414
+ },
415
+ {
416
+ "epoch": 1.21,
417
+ "learning_rate": 5.842696629213483e-06,
418
+ "loss": 0.7374,
419
+ "step": 61
420
+ },
421
+ {
422
+ "epoch": 1.21,
423
+ "learning_rate": 5.730337078651685e-06,
424
+ "loss": 0.7986,
425
+ "step": 62
426
+ },
427
+ {
428
+ "epoch": 1.22,
429
+ "learning_rate": 5.617977528089889e-06,
430
+ "loss": 0.762,
431
+ "step": 63
432
+ },
433
+ {
434
+ "epoch": 1.23,
435
+ "learning_rate": 5.50561797752809e-06,
436
+ "loss": 0.785,
437
+ "step": 64
438
+ },
439
+ {
440
+ "epoch": 1.24,
441
+ "learning_rate": 5.393258426966292e-06,
442
+ "loss": 0.8349,
443
+ "step": 65
444
+ },
445
+ {
446
+ "epoch": 1.25,
447
+ "learning_rate": 5.280898876404494e-06,
448
+ "loss": 0.8505,
449
+ "step": 66
450
+ },
451
+ {
452
+ "epoch": 1.25,
453
+ "eval_loss": 1.0748353004455566,
454
+ "eval_runtime": 93.3893,
455
+ "eval_samples_per_second": 3.534,
456
+ "eval_steps_per_second": 0.118,
457
+ "eval_wer": 159.68385757624142,
458
+ "step": 66
459
+ },
460
+ {
461
+ "epoch": 1.26,
462
+ "learning_rate": 5.168539325842698e-06,
463
+ "loss": 0.7309,
464
+ "step": 67
465
+ },
466
+ {
467
+ "epoch": 1.27,
468
+ "learning_rate": 5.0561797752809e-06,
469
+ "loss": 0.8261,
470
+ "step": 68
471
+ },
472
+ {
473
+ "epoch": 1.28,
474
+ "learning_rate": 4.943820224719101e-06,
475
+ "loss": 0.8053,
476
+ "step": 69
477
+ },
478
+ {
479
+ "epoch": 1.29,
480
+ "learning_rate": 4.831460674157304e-06,
481
+ "loss": 0.7672,
482
+ "step": 70
483
+ },
484
+ {
485
+ "epoch": 1.29,
486
+ "learning_rate": 4.719101123595506e-06,
487
+ "loss": 0.8692,
488
+ "step": 71
489
+ },
490
+ {
491
+ "epoch": 1.3,
492
+ "learning_rate": 4.606741573033709e-06,
493
+ "loss": 0.8588,
494
+ "step": 72
495
+ },
496
+ {
497
+ "epoch": 1.31,
498
+ "learning_rate": 4.494382022471911e-06,
499
+ "loss": 0.8277,
500
+ "step": 73
501
+ },
502
+ {
503
+ "epoch": 1.32,
504
+ "learning_rate": 4.382022471910113e-06,
505
+ "loss": 0.8387,
506
+ "step": 74
507
+ },
508
+ {
509
+ "epoch": 1.33,
510
+ "learning_rate": 4.269662921348315e-06,
511
+ "loss": 0.801,
512
+ "step": 75
513
+ },
514
+ {
515
+ "epoch": 1.34,
516
+ "learning_rate": 4.157303370786518e-06,
517
+ "loss": 0.7801,
518
+ "step": 76
519
+ },
520
+ {
521
+ "epoch": 2.01,
522
+ "learning_rate": 4.04494382022472e-06,
523
+ "loss": 0.7844,
524
+ "step": 77
525
+ },
526
+ {
527
+ "epoch": 2.01,
528
+ "eval_loss": 1.0585265159606934,
529
+ "eval_runtime": 87.6428,
530
+ "eval_samples_per_second": 3.765,
531
+ "eval_steps_per_second": 0.126,
532
+ "eval_wer": 163.2923519080313,
533
+ "step": 77
534
+ },
535
+ {
536
+ "epoch": 2.02,
537
+ "learning_rate": 3.932584269662922e-06,
538
+ "loss": 0.8227,
539
+ "step": 78
540
+ },
541
+ {
542
+ "epoch": 2.03,
543
+ "learning_rate": 3.820224719101124e-06,
544
+ "loss": 0.757,
545
+ "step": 79
546
+ },
547
+ {
548
+ "epoch": 2.04,
549
+ "learning_rate": 3.707865168539326e-06,
550
+ "loss": 0.7713,
551
+ "step": 80
552
+ },
553
+ {
554
+ "epoch": 2.04,
555
+ "learning_rate": 3.595505617977528e-06,
556
+ "loss": 0.7782,
557
+ "step": 81
558
+ },
559
+ {
560
+ "epoch": 2.05,
561
+ "learning_rate": 3.4831460674157306e-06,
562
+ "loss": 0.7468,
563
+ "step": 82
564
+ },
565
+ {
566
+ "epoch": 2.06,
567
+ "learning_rate": 3.3707865168539327e-06,
568
+ "loss": 0.7457,
569
+ "step": 83
570
+ },
571
+ {
572
+ "epoch": 2.07,
573
+ "learning_rate": 3.258426966292135e-06,
574
+ "loss": 0.7126,
575
+ "step": 84
576
+ },
577
+ {
578
+ "epoch": 2.08,
579
+ "learning_rate": 3.146067415730337e-06,
580
+ "loss": 0.7476,
581
+ "step": 85
582
+ },
583
+ {
584
+ "epoch": 2.09,
585
+ "learning_rate": 3.03370786516854e-06,
586
+ "loss": 0.76,
587
+ "step": 86
588
+ },
589
+ {
590
+ "epoch": 2.1,
591
+ "learning_rate": 2.9213483146067416e-06,
592
+ "loss": 0.7673,
593
+ "step": 87
594
+ },
595
+ {
596
+ "epoch": 2.11,
597
+ "learning_rate": 2.8089887640449444e-06,
598
+ "loss": 0.7208,
599
+ "step": 88
600
+ },
601
+ {
602
+ "epoch": 2.11,
603
+ "eval_loss": 1.0490810871124268,
604
+ "eval_runtime": 106.8845,
605
+ "eval_samples_per_second": 3.087,
606
+ "eval_steps_per_second": 0.103,
607
+ "eval_wer": 158.1031454574485,
608
+ "step": 88
609
+ },
610
+ {
611
+ "epoch": 2.12,
612
+ "learning_rate": 2.696629213483146e-06,
613
+ "loss": 0.7045,
614
+ "step": 89
615
+ },
616
+ {
617
+ "epoch": 2.12,
618
+ "learning_rate": 2.584269662921349e-06,
619
+ "loss": 0.7887,
620
+ "step": 90
621
+ },
622
+ {
623
+ "epoch": 2.13,
624
+ "learning_rate": 2.4719101123595505e-06,
625
+ "loss": 0.7821,
626
+ "step": 91
627
+ },
628
+ {
629
+ "epoch": 2.14,
630
+ "learning_rate": 2.359550561797753e-06,
631
+ "loss": 0.7487,
632
+ "step": 92
633
+ },
634
+ {
635
+ "epoch": 2.15,
636
+ "learning_rate": 2.2471910112359554e-06,
637
+ "loss": 0.7281,
638
+ "step": 93
639
+ },
640
+ {
641
+ "epoch": 2.16,
642
+ "learning_rate": 2.1348314606741574e-06,
643
+ "loss": 0.7084,
644
+ "step": 94
645
+ },
646
+ {
647
+ "epoch": 2.17,
648
+ "learning_rate": 2.02247191011236e-06,
649
+ "loss": 0.6971,
650
+ "step": 95
651
+ },
652
+ {
653
+ "epoch": 2.18,
654
+ "learning_rate": 1.910112359550562e-06,
655
+ "loss": 0.6869,
656
+ "step": 96
657
+ },
658
+ {
659
+ "epoch": 2.19,
660
+ "learning_rate": 1.797752808988764e-06,
661
+ "loss": 0.6411,
662
+ "step": 97
663
+ },
664
+ {
665
+ "epoch": 2.2,
666
+ "learning_rate": 1.6853932584269663e-06,
667
+ "loss": 0.6658,
668
+ "step": 98
669
+ },
670
+ {
671
+ "epoch": 2.21,
672
+ "learning_rate": 1.5730337078651686e-06,
673
+ "loss": 0.6481,
674
+ "step": 99
675
+ },
676
+ {
677
+ "epoch": 2.21,
678
+ "eval_loss": 1.046801209449768,
679
+ "eval_runtime": 89.0792,
680
+ "eval_samples_per_second": 3.705,
681
+ "eval_steps_per_second": 0.123,
682
+ "eval_wer": 158.51828197349514,
683
+ "step": 99
684
+ },
685
+ {
686
+ "epoch": 2.21,
687
+ "learning_rate": 1.4606741573033708e-06,
688
+ "loss": 0.7064,
689
+ "step": 100
690
+ },
691
+ {
692
+ "epoch": 2.22,
693
+ "learning_rate": 1.348314606741573e-06,
694
+ "loss": 0.6699,
695
+ "step": 101
696
+ },
697
+ {
698
+ "epoch": 2.23,
699
+ "learning_rate": 1.2359550561797752e-06,
700
+ "loss": 0.6974,
701
+ "step": 102
702
+ },
703
+ {
704
+ "epoch": 2.24,
705
+ "learning_rate": 1.1235955056179777e-06,
706
+ "loss": 0.7523,
707
+ "step": 103
708
+ },
709
+ {
710
+ "epoch": 2.25,
711
+ "learning_rate": 1.01123595505618e-06,
712
+ "loss": 0.7621,
713
+ "step": 104
714
+ },
715
+ {
716
+ "epoch": 2.26,
717
+ "learning_rate": 8.98876404494382e-07,
718
+ "loss": 0.657,
719
+ "step": 105
720
+ },
721
+ {
722
+ "epoch": 2.27,
723
+ "learning_rate": 7.865168539325843e-07,
724
+ "loss": 0.7443,
725
+ "step": 106
726
+ },
727
+ {
728
+ "epoch": 2.28,
729
+ "learning_rate": 6.741573033707865e-07,
730
+ "loss": 0.729,
731
+ "step": 107
732
+ },
733
+ {
734
+ "epoch": 2.29,
735
+ "learning_rate": 5.617977528089888e-07,
736
+ "loss": 0.6983,
737
+ "step": 108
738
+ },
739
+ {
740
+ "epoch": 2.29,
741
+ "learning_rate": 4.49438202247191e-07,
742
+ "loss": 0.7963,
743
+ "step": 109
744
+ },
745
+ {
746
+ "epoch": 2.3,
747
+ "learning_rate": 3.3707865168539325e-07,
748
+ "loss": 0.7912,
749
+ "step": 110
750
+ },
751
+ {
752
+ "epoch": 2.3,
753
+ "eval_loss": 1.045613408088684,
754
+ "eval_runtime": 91.8976,
755
+ "eval_samples_per_second": 3.591,
756
+ "eval_steps_per_second": 0.12,
757
+ "eval_wer": 168.6092926712438,
758
+ "step": 110
759
+ },
760
+ {
761
+ "epoch": 2.31,
762
+ "learning_rate": 2.247191011235955e-07,
763
+ "loss": 0.7626,
764
+ "step": 111
765
+ },
766
+ {
767
+ "epoch": 2.32,
768
+ "learning_rate": 1.1235955056179776e-07,
769
+ "loss": 0.7754,
770
+ "step": 112
771
  },
772
  {
773
+ "epoch": 2.32,
774
+ "step": 112,
775
+ "total_flos": 1.7415399333888e+17,
776
+ "train_loss": 0.9721650715385165,
777
+ "train_runtime": 1491.7987,
778
+ "train_samples_per_second": 4.805,
779
+ "train_steps_per_second": 0.075
780
  }
781
  ],
782
+ "max_steps": 112,
783
  "num_train_epochs": 9223372036854775807,
784
+ "total_flos": 1.7415399333888e+17,
785
  "trial_name": null,
786
  "trial_params": null
787
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9cf1b9241b15d95f3310024d7bec9fb6d139a94c9d760c0be51787d41a93fc3
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23666f2916cd5411d616f8c83fed6cb219a0ba72c70f8943bef9ed5bb45800ed
3
  size 3579