comodoro commited on
Commit
74c4791
1 Parent(s): 1588b12

Added further fine-tuned model

Browse files
Files changed (3) hide show
  1. config.json +4 -4
  2. pytorch_model.bin +1 -1
  3. trainer_state.json +53 -229
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
@@ -8,7 +8,7 @@
8
  "architectures": [
9
  "Wav2Vec2ForCTC"
10
  ],
11
- "attention_dropout": 0.0,
12
  "bos_token_id": 1,
13
  "classifier_proj_size": 256,
14
  "codevector_dim": 768,
@@ -54,12 +54,12 @@
54
  "final_dropout": 0.0,
55
  "gradient_checkpointing": false,
56
  "hidden_act": "gelu",
57
- "hidden_dropout": 0.0,
58
  "hidden_size": 1024,
59
  "initializer_range": 0.02,
60
  "intermediate_size": 4096,
61
  "layer_norm_eps": 1e-05,
62
- "layerdrop": 0.0,
63
  "mask_feature_length": 10,
64
  "mask_feature_min_masks": 0,
65
  "mask_feature_prob": 0.0,
 
1
  {
2
+ "_name_or_path": "comodoro/wav2vec2-xls-r-300m-cs-cv8",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
 
8
  "architectures": [
9
  "Wav2Vec2ForCTC"
10
  ],
11
+ "attention_dropout": 0.1,
12
  "bos_token_id": 1,
13
  "classifier_proj_size": 256,
14
  "codevector_dim": 768,
 
54
  "final_dropout": 0.0,
55
  "gradient_checkpointing": false,
56
  "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
  "hidden_size": 1024,
59
  "initializer_range": 0.02,
60
  "intermediate_size": 4096,
61
  "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
  "mask_feature_length": 10,
64
  "mask_feature_min_masks": 0,
65
  "mask_feature_prob": 0.0,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bac8b9bb8d3e9f421a115fc0eb137f2bbd90c1eeee93d9a2de231922740eb66d
3
  size 1262112241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a74367fcdd020adea6363f8737e27690d841dece58ef8b611e02c105cac20db
3
  size 1262112241
trainer_state.json CHANGED
@@ -1,288 +1,112 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 137.09493670886076,
5
- "global_step": 4250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 8.06,
12
- "learning_rate": 3.5e-05,
13
- "loss": 7.2926,
14
  "step": 250
15
  },
16
  {
17
  "epoch": 8.06,
18
- "eval_cer": 1.0,
19
- "eval_loss": 3.8496713638305664,
20
- "eval_runtime": 271.7106,
21
- "eval_samples_per_second": 26.745,
22
- "eval_steps_per_second": 3.345,
23
- "eval_wer": 1.0,
24
  "step": 250
25
  },
26
  {
27
  "epoch": 16.13,
28
- "learning_rate": 7e-05,
29
- "loss": 3.417,
30
  "step": 500
31
  },
32
  {
33
  "epoch": 16.13,
34
- "eval_cer": 0.9856759584948347,
35
- "eval_loss": 3.285226583480835,
36
- "eval_runtime": 275.8724,
37
- "eval_samples_per_second": 26.342,
38
- "eval_steps_per_second": 3.295,
39
- "eval_wer": 1.0,
40
  "step": 500
41
  },
42
  {
43
  "epoch": 24.19,
44
- "learning_rate": 6.578313253012048e-05,
45
- "loss": 2.0264,
46
  "step": 750
47
  },
48
  {
49
  "epoch": 24.19,
50
- "eval_cer": 0.176804441300443,
51
- "eval_loss": 0.7098603248596191,
52
- "eval_runtime": 269.2535,
53
- "eval_samples_per_second": 26.989,
54
- "eval_steps_per_second": 3.376,
55
- "eval_wer": 0.7342138090806487,
56
  "step": 750
57
  },
58
  {
59
  "epoch": 32.25,
60
- "learning_rate": 6.156626506024095e-05,
61
- "loss": 0.4018,
62
  "step": 1000
63
  },
64
  {
65
  "epoch": 32.25,
66
- "eval_cer": 0.15511300475620768,
67
- "eval_loss": 0.6187673211097717,
68
- "eval_runtime": 271.6934,
69
- "eval_samples_per_second": 26.747,
70
- "eval_steps_per_second": 3.346,
71
- "eval_wer": 0.6415460467694989,
72
  "step": 1000
73
  },
74
  {
75
  "epoch": 40.32,
76
- "learning_rate": 5.734939759036144e-05,
77
- "loss": 0.2444,
78
  "step": 1250
79
  },
80
  {
81
  "epoch": 40.32,
82
- "eval_cer": 0.15995397008055237,
83
- "eval_loss": 0.6631603837013245,
84
- "eval_runtime": 276.2198,
85
- "eval_samples_per_second": 26.309,
86
- "eval_steps_per_second": 3.291,
87
- "eval_wer": 0.6361800289091737,
88
  "step": 1250
89
  },
90
  {
91
  "epoch": 48.38,
92
- "learning_rate": 5.313253012048192e-05,
93
- "loss": 0.1882,
94
  "step": 1500
95
  },
96
  {
97
  "epoch": 48.38,
98
- "eval_cer": 0.13876129966064343,
99
- "eval_loss": 0.6070172190666199,
100
- "eval_runtime": 272.4136,
101
- "eval_samples_per_second": 26.676,
102
- "eval_steps_per_second": 3.337,
103
- "eval_wer": 0.578262677464705,
104
  "step": 1500
105
- },
106
- {
107
- "epoch": 56.44,
108
- "learning_rate": 4.891566265060241e-05,
109
- "loss": 0.153,
110
- "step": 1750
111
- },
112
- {
113
- "epoch": 56.44,
114
- "eval_cer": 0.13767574986063888,
115
- "eval_loss": 0.6425250172615051,
116
- "eval_runtime": 272.6101,
117
- "eval_samples_per_second": 26.657,
118
- "eval_steps_per_second": 3.334,
119
- "eval_wer": 0.5720056234283112,
120
- "step": 1750
121
- },
122
- {
123
- "epoch": 64.51,
124
- "learning_rate": 4.469879518072288e-05,
125
- "loss": 0.1214,
126
- "step": 2000
127
- },
128
- {
129
- "epoch": 64.51,
130
- "eval_cer": 0.1337117001405021,
131
- "eval_loss": 0.6362873315811157,
132
- "eval_runtime": 271.8138,
133
- "eval_samples_per_second": 26.735,
134
- "eval_steps_per_second": 3.344,
135
- "eval_wer": 0.5546007167891016,
136
- "step": 2000
137
- },
138
- {
139
- "epoch": 72.57,
140
- "learning_rate": 4.048192771084337e-05,
141
- "loss": 0.1011,
142
- "step": 2250
143
- },
144
- {
145
- "epoch": 72.57,
146
- "eval_cer": 0.12238677519991394,
147
- "eval_loss": 0.6309632658958435,
148
- "eval_runtime": 276.7543,
149
- "eval_samples_per_second": 26.258,
150
- "eval_steps_per_second": 3.285,
151
- "eval_wer": 0.5221669999801992,
152
- "step": 2250
153
- },
154
- {
155
- "epoch": 80.63,
156
- "learning_rate": 3.6265060240963855e-05,
157
- "loss": 0.0879,
158
- "step": 2500
159
- },
160
- {
161
- "epoch": 80.63,
162
- "eval_cer": 0.12531743366899534,
163
- "eval_loss": 0.6352854371070862,
164
- "eval_runtime": 270.8217,
165
- "eval_samples_per_second": 26.833,
166
- "eval_steps_per_second": 3.356,
167
- "eval_wer": 0.5258301487040374,
168
- "step": 2500
169
- },
170
- {
171
- "epoch": 88.7,
172
- "learning_rate": 3.2048192771084335e-05,
173
- "loss": 0.0782,
174
- "step": 2750
175
- },
176
- {
177
- "epoch": 88.7,
178
- "eval_cer": 0.11265268600227542,
179
- "eval_loss": 0.607792854309082,
180
- "eval_runtime": 270.7843,
181
- "eval_samples_per_second": 26.837,
182
- "eval_steps_per_second": 3.357,
183
- "eval_wer": 0.4904263113082391,
184
- "step": 2750
185
- },
186
- {
187
- "epoch": 96.76,
188
- "learning_rate": 2.783132530120482e-05,
189
- "loss": 0.0709,
190
- "step": 3000
191
- },
192
- {
193
- "epoch": 96.76,
194
- "eval_cer": 0.11539100982210675,
195
- "eval_loss": 0.6464908123016357,
196
- "eval_runtime": 272.9311,
197
- "eval_samples_per_second": 26.626,
198
- "eval_steps_per_second": 3.331,
199
- "eval_wer": 0.49601013801160326,
200
- "step": 3000
201
- },
202
- {
203
- "epoch": 104.82,
204
- "learning_rate": 2.36144578313253e-05,
205
- "loss": 0.0661,
206
- "step": 3250
207
- },
208
- {
209
- "epoch": 104.82,
210
- "eval_cer": 0.11656783708277235,
211
- "eval_loss": 0.6621575951576233,
212
- "eval_runtime": 270.3992,
213
- "eval_samples_per_second": 26.875,
214
- "eval_steps_per_second": 3.362,
215
- "eval_wer": 0.494544878522068,
216
- "step": 3250
217
- },
218
- {
219
- "epoch": 112.89,
220
- "learning_rate": 1.9397590361445782e-05,
221
- "loss": 0.0616,
222
- "step": 3500
223
- },
224
- {
225
- "epoch": 112.89,
226
- "eval_cer": 0.11035119002989337,
227
- "eval_loss": 0.6440250277519226,
228
- "eval_runtime": 271.0109,
229
- "eval_samples_per_second": 26.814,
230
- "eval_steps_per_second": 3.354,
231
- "eval_wer": 0.47860523137239375,
232
- "step": 3500
233
- },
234
- {
235
- "epoch": 120.95,
236
- "learning_rate": 1.5180722891566264e-05,
237
- "loss": 0.0579,
238
- "step": 3750
239
- },
240
- {
241
- "epoch": 120.95,
242
- "eval_cer": 0.11444237621309375,
243
- "eval_loss": 0.6815317273139954,
244
- "eval_runtime": 269.1998,
245
- "eval_samples_per_second": 26.995,
246
- "eval_steps_per_second": 3.377,
247
- "eval_wer": 0.4887432429756648,
248
- "step": 3750
249
- },
250
- {
251
- "epoch": 129.03,
252
- "learning_rate": 1.0963855421686746e-05,
253
- "loss": 0.0549,
254
- "step": 4000
255
- },
256
- {
257
- "epoch": 129.03,
258
- "eval_cer": 0.11051744540466885,
259
- "eval_loss": 0.6602992415428162,
260
- "eval_runtime": 274.1737,
261
- "eval_samples_per_second": 26.505,
262
- "eval_steps_per_second": 3.315,
263
- "eval_wer": 0.47799140645110194,
264
- "step": 4000
265
- },
266
- {
267
- "epoch": 137.09,
268
- "learning_rate": 6.746987951807228e-06,
269
- "loss": 0.0527,
270
- "step": 4250
271
- },
272
- {
273
- "epoch": 137.09,
274
- "eval_cer": 0.10900158757583364,
275
- "eval_loss": 0.6652226448059082,
276
- "eval_runtime": 271.842,
277
- "eval_samples_per_second": 26.732,
278
- "eval_steps_per_second": 3.344,
279
- "eval_wer": 0.47486287943290495,
280
- "step": 4250
281
  }
282
  ],
283
- "max_steps": 4650,
284
- "num_train_epochs": 150,
285
- "total_flos": 3.524250563411945e+20,
286
  "trial_name": null,
287
  "trial_params": null
288
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 48.379746835443036,
5
+ "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 8.06,
12
+ "learning_rate": 0.0005,
13
+ "loss": 0.576,
14
  "step": 250
15
  },
16
  {
17
  "epoch": 8.06,
18
+ "eval_cer": 0.050238462365976976,
19
+ "eval_loss": 0.24107621610164642,
20
+ "eval_runtime": 380.2463,
21
+ "eval_samples_per_second": 19.111,
22
+ "eval_steps_per_second": 2.391,
23
+ "eval_wer": 0.23400590063956597,
24
  "step": 250
25
  },
26
  {
27
  "epoch": 16.13,
28
+ "learning_rate": 0.001,
29
+ "loss": 0.2564,
30
  "step": 500
31
  },
32
  {
33
  "epoch": 16.13,
34
+ "eval_cer": 0.04923441029870549,
35
+ "eval_loss": 0.23052524030208588,
36
+ "eval_runtime": 267.8454,
37
+ "eval_samples_per_second": 27.131,
38
+ "eval_steps_per_second": 3.394,
39
+ "eval_wer": 0.20967071263093282,
40
  "step": 500
41
  },
42
  {
43
  "epoch": 24.19,
44
+ "learning_rate": 0.0007619047619047619,
45
+ "loss": 0.2018,
46
  "step": 750
47
  },
48
  {
49
  "epoch": 24.19,
50
+ "eval_cer": 0.04937458639900638,
51
+ "eval_loss": 0.23705527186393738,
52
+ "eval_runtime": 264.0504,
53
+ "eval_samples_per_second": 27.521,
54
+ "eval_steps_per_second": 3.443,
55
+ "eval_wer": 0.20590855988753143,
56
  "step": 750
57
  },
58
  {
59
  "epoch": 32.25,
60
+ "learning_rate": 0.0005238095238095238,
61
+ "loss": 0.1549,
62
  "step": 1000
63
  },
64
  {
65
  "epoch": 32.25,
66
+ "eval_cer": 0.04349696991429698,
67
+ "eval_loss": 0.22975854575634003,
68
+ "eval_runtime": 264.3572,
69
+ "eval_samples_per_second": 27.489,
70
+ "eval_steps_per_second": 3.439,
71
+ "eval_wer": 0.18440488683840564,
72
  "step": 1000
73
  },
74
  {
75
  "epoch": 40.32,
76
+ "learning_rate": 0.0002857142857142857,
77
+ "loss": 0.1224,
78
  "step": 1250
79
  },
80
  {
81
  "epoch": 40.32,
82
+ "eval_cer": 0.040729306910681745,
83
+ "eval_loss": 0.22875599563121796,
84
+ "eval_runtime": 265.748,
85
+ "eval_samples_per_second": 27.345,
86
+ "eval_steps_per_second": 3.421,
87
+ "eval_wer": 0.1724650020790844,
88
  "step": 1250
89
  },
90
  {
91
  "epoch": 48.38,
92
+ "learning_rate": 4.761904761904762e-05,
93
+ "loss": 0.1004,
94
  "step": 1500
95
  },
96
  {
97
  "epoch": 48.38,
98
+ "eval_cer": 0.03763565297613421,
99
+ "eval_loss": 0.23267094790935516,
100
+ "eval_runtime": 266.4102,
101
+ "eval_samples_per_second": 27.277,
102
+ "eval_steps_per_second": 3.412,
103
+ "eval_wer": 0.16082212937845278,
104
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
106
  ],
107
+ "max_steps": 1550,
108
+ "num_train_epochs": 50,
109
+ "total_flos": 1.244071149131343e+20,
110
  "trial_name": null,
111
  "trial_params": null
112
  }