nouamanetazi HF staff commited on
Commit
789fd07
1 Parent(s): a4180c5

End of training

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - common_voice
7
+ model-index:
8
+ - name: wav2vec2-xls-r-300m-ar
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # wav2vec2-xls-r-300m-ar
16
+
17
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the common_voice dataset.
18
+
19
+ ## Model description
20
+
21
+ More information needed
22
+
23
+ ## Intended uses & limitations
24
+
25
+ More information needed
26
+
27
+ ## Training and evaluation data
28
+
29
+ More information needed
30
+
31
+ ## Training procedure
32
+
33
+ ### Training hyperparameters
34
+
35
+ The following hyperparameters were used during training:
36
+ - learning_rate: 0.0005
37
+ - train_batch_size: 64
38
+ - eval_batch_size: 64
39
+ - seed: 42
40
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
+ - lr_scheduler_type: linear
42
+ - lr_scheduler_warmup_steps: 2000
43
+ - num_epochs: 5
44
+ - mixed_precision_training: Native AMP
45
+
46
+ ### Framework versions
47
+
48
+ - Transformers 4.17.0.dev0
49
+ - Pytorch 1.10.2+cu102
50
+ - Datasets 1.18.2.dev0
51
+ - Tokenizers 0.11.0
.ipynb_checkpoints/eval_results-checkpoint.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_loss": 6.937458515167236,
4
+ "eval_runtime": 5.7217,
5
+ "eval_samples": 128,
6
+ "eval_samples_per_second": 22.371,
7
+ "eval_steps_per_second": 0.35,
8
+ "eval_wer": 1.0
9
+ }
.ipynb_checkpoints/trainer_state-checkpoint.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 340,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.15,
12
+ "learning_rate": 1.1750000000000001e-05,
13
+ "loss": 15.017,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 0.29,
18
+ "learning_rate": 2.425e-05,
19
+ "loss": 6.7134,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.44,
24
+ "learning_rate": 3.675e-05,
25
+ "loss": 4.3869,
26
+ "step": 150
27
+ },
28
+ {
29
+ "epoch": 0.59,
30
+ "learning_rate": 4.9250000000000004e-05,
31
+ "loss": 3.6209,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.74,
36
+ "learning_rate": 6.175e-05,
37
+ "loss": 3.2011,
38
+ "step": 250
39
+ },
40
+ {
41
+ "epoch": 0.88,
42
+ "learning_rate": 7.425e-05,
43
+ "loss": 3.0513,
44
+ "step": 300
45
+ }
46
+ ],
47
+ "max_steps": 1700,
48
+ "num_train_epochs": 5,
49
+ "total_flos": 1.7302176965482906e+18,
50
+ "trial_name": null,
51
+ "trial_params": null
52
+ }
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 6.937458515167236,
4
  "eval_runtime": 5.7217,
5
- "eval_samples": 128,
6
  "eval_samples_per_second": 22.371,
7
  "eval_steps_per_second": 0.35,
8
  "eval_wer": 1.0,
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 3.0191357135772705,
4
  "eval_runtime": 5.7217,
5
+ "eval_samples": 7622,
6
  "eval_samples_per_second": 22.371,
7
  "eval_steps_per_second": 0.35,
8
  "eval_wer": 1.0,
eval_results.json CHANGED
@@ -1,9 +1,5 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_loss": 6.937458515167236,
4
- "eval_runtime": 5.7217,
5
- "eval_samples": 128,
6
- "eval_samples_per_second": 22.371,
7
- "eval_steps_per_second": 0.35,
8
  "eval_wer": 1.0
9
  }
 
1
  {
2
+ "eval_loss": 3.0191357135772705,
3
+ "eval_samples": 7622,
 
 
 
 
4
  "eval_wer": 1.0
5
  }
trainer_state.json CHANGED
@@ -1,325 +1,52 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.0,
5
- "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.0,
12
- "learning_rate": 7.5e-05,
13
- "loss": 19.0933,
14
- "step": 2
15
  },
16
  {
17
- "epoch": 1.0,
18
- "eval_loss": 32.55470657348633,
19
- "eval_runtime": 5.3015,
20
- "eval_samples_per_second": 24.144,
21
- "eval_steps_per_second": 0.377,
22
- "eval_wer": 1.0,
23
- "step": 2
24
  },
25
  {
26
- "epoch": 2.0,
27
- "learning_rate": 7.125e-05,
28
- "loss": 18.6757,
29
- "step": 4
30
  },
31
  {
32
- "epoch": 2.0,
33
- "eval_loss": 31.040802001953125,
34
- "eval_runtime": 5.2939,
35
- "eval_samples_per_second": 24.179,
36
- "eval_steps_per_second": 0.378,
37
- "eval_wer": 1.0,
38
- "step": 4
39
  },
40
  {
41
- "epoch": 3.0,
42
- "learning_rate": 6.937499999999999e-05,
43
- "loss": 17.1649,
44
- "step": 6
45
  },
46
  {
47
- "epoch": 3.0,
48
- "eval_loss": 29.57763671875,
49
- "eval_runtime": 5.2818,
50
- "eval_samples_per_second": 24.234,
51
- "eval_steps_per_second": 0.379,
52
- "eval_wer": 1.0,
53
- "step": 6
54
- },
55
- {
56
- "epoch": 4.0,
57
- "learning_rate": 6.5625e-05,
58
- "loss": 14.7415,
59
- "step": 8
60
- },
61
- {
62
- "epoch": 4.0,
63
- "eval_loss": 22.918315887451172,
64
- "eval_runtime": 5.4291,
65
- "eval_samples_per_second": 23.577,
66
- "eval_steps_per_second": 0.368,
67
- "eval_wer": 1.0,
68
- "step": 8
69
- },
70
- {
71
- "epoch": 5.0,
72
- "learning_rate": 6.187499999999999e-05,
73
- "loss": 11.8071,
74
- "step": 10
75
- },
76
- {
77
- "epoch": 5.0,
78
- "eval_loss": 17.507305145263672,
79
- "eval_runtime": 5.3146,
80
- "eval_samples_per_second": 24.085,
81
- "eval_steps_per_second": 0.376,
82
- "eval_wer": 1.0,
83
- "step": 10
84
- },
85
- {
86
- "epoch": 6.0,
87
- "learning_rate": 5.8124999999999997e-05,
88
- "loss": 9.7675,
89
- "step": 12
90
- },
91
- {
92
- "epoch": 6.0,
93
- "eval_loss": 14.17426872253418,
94
- "eval_runtime": 5.4054,
95
- "eval_samples_per_second": 23.68,
96
- "eval_steps_per_second": 0.37,
97
- "eval_wer": 1.0,
98
- "step": 12
99
- },
100
- {
101
- "epoch": 7.0,
102
- "learning_rate": 5.4374999999999994e-05,
103
- "loss": 8.4193,
104
- "step": 14
105
- },
106
- {
107
- "epoch": 7.0,
108
- "eval_loss": 12.122542381286621,
109
- "eval_runtime": 5.2782,
110
- "eval_samples_per_second": 24.251,
111
- "eval_steps_per_second": 0.379,
112
- "eval_wer": 1.0,
113
- "step": 14
114
- },
115
- {
116
- "epoch": 8.0,
117
- "learning_rate": 5.0625e-05,
118
- "loss": 7.4746,
119
- "step": 16
120
- },
121
- {
122
- "epoch": 8.0,
123
- "eval_loss": 10.744585037231445,
124
- "eval_runtime": 5.3374,
125
- "eval_samples_per_second": 23.982,
126
- "eval_steps_per_second": 0.375,
127
- "eval_wer": 1.0,
128
- "step": 16
129
- },
130
- {
131
- "epoch": 9.0,
132
- "learning_rate": 4.6874999999999994e-05,
133
- "loss": 6.8442,
134
- "step": 18
135
- },
136
- {
137
- "epoch": 9.0,
138
- "eval_loss": 9.794867515563965,
139
- "eval_runtime": 5.2577,
140
- "eval_samples_per_second": 24.345,
141
- "eval_steps_per_second": 0.38,
142
- "eval_wer": 1.0,
143
- "step": 18
144
- },
145
- {
146
- "epoch": 10.0,
147
- "learning_rate": 4.312499999999999e-05,
148
- "loss": 6.3765,
149
- "step": 20
150
- },
151
- {
152
- "epoch": 10.0,
153
- "eval_loss": 9.114768981933594,
154
- "eval_runtime": 5.2827,
155
- "eval_samples_per_second": 24.23,
156
- "eval_steps_per_second": 0.379,
157
- "eval_wer": 1.0,
158
- "step": 20
159
- },
160
- {
161
- "epoch": 11.0,
162
- "learning_rate": 3.9374999999999995e-05,
163
- "loss": 6.0321,
164
- "step": 22
165
- },
166
- {
167
- "epoch": 11.0,
168
- "eval_loss": 8.59341812133789,
169
- "eval_runtime": 5.2171,
170
- "eval_samples_per_second": 24.535,
171
- "eval_steps_per_second": 0.383,
172
- "eval_wer": 1.0,
173
- "step": 22
174
- },
175
- {
176
- "epoch": 12.0,
177
- "learning_rate": 3.5625e-05,
178
- "loss": 5.7783,
179
- "step": 24
180
- },
181
- {
182
- "epoch": 12.0,
183
- "eval_loss": 8.198293685913086,
184
- "eval_runtime": 5.3204,
185
- "eval_samples_per_second": 24.058,
186
- "eval_steps_per_second": 0.376,
187
- "eval_wer": 1.0,
188
- "step": 24
189
- },
190
- {
191
- "epoch": 13.0,
192
- "learning_rate": 3.1874999999999996e-05,
193
- "loss": 5.5827,
194
- "step": 26
195
- },
196
- {
197
- "epoch": 13.0,
198
- "eval_loss": 7.877962112426758,
199
- "eval_runtime": 5.2188,
200
- "eval_samples_per_second": 24.527,
201
- "eval_steps_per_second": 0.383,
202
- "eval_wer": 1.0,
203
- "step": 26
204
- },
205
- {
206
- "epoch": 14.0,
207
- "learning_rate": 2.8125e-05,
208
- "loss": 5.4249,
209
- "step": 28
210
- },
211
- {
212
- "epoch": 14.0,
213
- "eval_loss": 7.628803730010986,
214
- "eval_runtime": 5.2811,
215
- "eval_samples_per_second": 24.238,
216
- "eval_steps_per_second": 0.379,
217
- "eval_wer": 1.0,
218
- "step": 28
219
- },
220
- {
221
- "epoch": 15.0,
222
- "learning_rate": 2.4375e-05,
223
- "loss": 5.3088,
224
- "step": 30
225
- },
226
- {
227
- "epoch": 15.0,
228
- "eval_loss": 7.427917003631592,
229
- "eval_runtime": 5.236,
230
- "eval_samples_per_second": 24.446,
231
- "eval_steps_per_second": 0.382,
232
- "eval_wer": 1.0,
233
- "step": 30
234
- },
235
- {
236
- "epoch": 16.0,
237
- "learning_rate": 2.0625e-05,
238
- "loss": 5.2078,
239
- "step": 32
240
- },
241
- {
242
- "epoch": 16.0,
243
- "eval_loss": 7.268764972686768,
244
- "eval_runtime": 5.2941,
245
- "eval_samples_per_second": 24.178,
246
- "eval_steps_per_second": 0.378,
247
- "eval_wer": 1.0,
248
- "step": 32
249
- },
250
- {
251
- "epoch": 17.0,
252
- "learning_rate": 1.6875e-05,
253
- "loss": 5.1289,
254
- "step": 34
255
- },
256
- {
257
- "epoch": 17.0,
258
- "eval_loss": 7.145933628082275,
259
- "eval_runtime": 5.2685,
260
- "eval_samples_per_second": 24.295,
261
- "eval_steps_per_second": 0.38,
262
- "eval_wer": 1.0,
263
- "step": 34
264
- },
265
- {
266
- "epoch": 18.0,
267
- "learning_rate": 1.3124999999999999e-05,
268
- "loss": 5.0697,
269
- "step": 36
270
- },
271
- {
272
- "epoch": 18.0,
273
- "eval_loss": 7.052780628204346,
274
- "eval_runtime": 5.263,
275
- "eval_samples_per_second": 24.321,
276
- "eval_steps_per_second": 0.38,
277
- "eval_wer": 1.0,
278
- "step": 36
279
- },
280
- {
281
- "epoch": 19.0,
282
- "learning_rate": 9.375e-06,
283
- "loss": 5.0227,
284
- "step": 38
285
- },
286
- {
287
- "epoch": 19.0,
288
- "eval_loss": 6.983470916748047,
289
- "eval_runtime": 5.2829,
290
- "eval_samples_per_second": 24.229,
291
- "eval_steps_per_second": 0.379,
292
- "eval_wer": 1.0,
293
- "step": 38
294
- },
295
- {
296
- "epoch": 20.0,
297
- "learning_rate": 5.6249999999999995e-06,
298
- "loss": 4.9853,
299
- "step": 40
300
- },
301
- {
302
- "epoch": 20.0,
303
- "eval_loss": 6.937458515167236,
304
- "eval_runtime": 5.3165,
305
- "eval_samples_per_second": 24.076,
306
- "eval_steps_per_second": 0.376,
307
- "eval_wer": 1.0,
308
- "step": 40
309
- },
310
- {
311
- "epoch": 20.0,
312
- "step": 40,
313
- "total_flos": 5.430583918308557e+17,
314
- "train_loss": 8.69529299736023,
315
- "train_runtime": 243.8197,
316
- "train_samples_per_second": 10.5,
317
- "train_steps_per_second": 0.164
318
  }
319
  ],
320
- "max_steps": 40,
321
- "num_train_epochs": 20,
322
- "total_flos": 5.430583918308557e+17,
323
  "trial_name": null,
324
  "trial_params": null
325
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 340,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.15,
12
+ "learning_rate": 1.1750000000000001e-05,
13
+ "loss": 15.017,
14
+ "step": 50
15
  },
16
  {
17
+ "epoch": 0.29,
18
+ "learning_rate": 2.425e-05,
19
+ "loss": 6.7134,
20
+ "step": 100
 
 
 
21
  },
22
  {
23
+ "epoch": 0.44,
24
+ "learning_rate": 3.675e-05,
25
+ "loss": 4.3869,
26
+ "step": 150
27
  },
28
  {
29
+ "epoch": 0.59,
30
+ "learning_rate": 4.9250000000000004e-05,
31
+ "loss": 3.6209,
32
+ "step": 200
 
 
 
33
  },
34
  {
35
+ "epoch": 0.74,
36
+ "learning_rate": 6.175e-05,
37
+ "loss": 3.2011,
38
+ "step": 250
39
  },
40
  {
41
+ "epoch": 0.88,
42
+ "learning_rate": 7.425e-05,
43
+ "loss": 3.0513,
44
+ "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
  ],
47
+ "max_steps": 1700,
48
+ "num_train_epochs": 5,
49
+ "total_flos": 1.7302176965482906e+18,
50
  "trial_name": null,
51
  "trial_params": null
52
  }