alvanli commited on
Commit
e78d049
1 Parent(s): bf27d95

Added new model

Browse files
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "ctc_zero_infinity": true,
21
  "diversity_loss_weight": 0.1,
22
  "eos_token_id": 2,
23
- "feat_proj_dropout": 0.0,
24
  "feat_quantizer_dropout": 0.0,
25
  "feature_projection_input_dim": 160,
26
  "final_dropout": 0.1,
@@ -30,14 +30,14 @@
30
  "initializer_range": 0.02,
31
  "intermediate_size": 4096,
32
  "layer_norm_eps": 1e-05,
33
- "layerdrop": 0.0,
34
  "left_max_position_embeddings": 64,
35
  "mask_feature_length": 10,
36
  "mask_feature_min_masks": 0,
37
  "mask_feature_prob": 0.0,
38
  "mask_time_length": 10,
39
  "mask_time_min_masks": 2,
40
- "mask_time_prob": 0.0,
41
  "max_source_positions": 5000,
42
  "model_type": "wav2vec2-bert",
43
  "num_adapter_layers": 1,
 
20
  "ctc_zero_infinity": true,
21
  "diversity_loss_weight": 0.1,
22
  "eos_token_id": 2,
23
+ "feat_proj_dropout": 0.1,
24
  "feat_quantizer_dropout": 0.0,
25
  "feature_projection_input_dim": 160,
26
  "final_dropout": 0.1,
 
30
  "initializer_range": 0.02,
31
  "intermediate_size": 4096,
32
  "layer_norm_eps": 1e-05,
33
+ "layerdrop": 0.1,
34
  "left_max_position_embeddings": 64,
35
  "mask_feature_length": 10,
36
  "mask_feature_min_masks": 0,
37
  "mask_feature_prob": 0.0,
38
  "mask_time_length": 10,
39
  "mask_time_min_masks": 2,
40
+ "mask_time_prob": 0.1,
41
  "max_source_positions": 5000,
42
  "model_type": "wav2vec2-bert",
43
  "num_adapter_layers": 1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51ae7f6fabf39cfa3fe1eac86982beef0c2680843bea9980722328bca1584e04
3
- size 1474560000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e70f697a1dc0b68ac5aae2a4005b2d13b00a7c87f296f5dd65c9c5757bbcefa
3
+ size 2216165376
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee1dad2d0ef61fbb44f5e7d2ffa4c63aed3ebcfa30984aaf5e43ad06dd8d870f
3
- size 1482948608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdee21825782bb7083220355f85c380c5e72ac2ae86dac2b8bfbc52e82e3c09
3
+ size 2268332032
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca21d0ee0b847bef6f94dc79b48cc77221aec4d522263edad9f60ef93361bb1c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ef7d7cab190261ce34c81580dbcbbd7bf22bc26086f9d99ad5d88f185483d0
3
  size 14244
runs/Jan26_18-56-17_18c35eb46719/events.out.tfevents.1706295378.18c35eb46719.4079.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9d86d9b9e5f22f62a603b89d5acae12a9a4b5399f3fc736e7338dd63672896f
3
- size 9411
 
 
 
 
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c007b769b1d4621f09a01df6014495eaa14d38d5303d8207ac51729ce97aa45
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d54da4132a617bfa22d92a5cae04b1e7aa2058d7d8d482e011eb5eed4d79d2
3
  size 1064
trainer_state.json CHANGED
@@ -1,140 +1,500 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9200410630774494,
5
  "eval_steps": 300,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.37,
13
- "learning_rate": 2.97e-05,
14
- "loss": 11.4962,
15
  "step": 300
16
  },
17
  {
18
  "epoch": 0.37,
19
- "eval_cer": 0.7285700557370748,
20
- "eval_loss": 3.9011037349700928,
21
- "eval_runtime": 45.9839,
22
- "eval_samples_per_second": 48.3,
23
- "eval_steps_per_second": 6.046,
24
  "step": 300
25
  },
26
  {
27
  "epoch": 0.73,
28
- "learning_rate": 4.752929190015283e-05,
29
- "loss": 1.9358,
30
  "step": 600
31
  },
32
  {
33
  "epoch": 0.73,
34
- "eval_cer": 0.3138093407649433,
35
- "eval_loss": 1.294021487236023,
36
- "eval_runtime": 39.5216,
37
- "eval_samples_per_second": 56.197,
38
- "eval_steps_per_second": 7.034,
39
  "step": 600
40
  },
41
  {
42
  "epoch": 1.1,
43
- "learning_rate": 3.9938869077941925e-05,
44
- "loss": 1.1506,
45
  "step": 900
46
  },
47
  {
48
  "epoch": 1.1,
49
- "eval_cer": 0.33158754564674225,
50
- "eval_loss": 1.1284617185592651,
51
- "eval_runtime": 39.8848,
52
- "eval_samples_per_second": 55.685,
53
- "eval_steps_per_second": 6.97,
54
  "step": 900
55
  },
56
  {
57
  "epoch": 1.46,
58
- "learning_rate": 3.234844625573103e-05,
59
- "loss": 0.9373,
60
  "step": 1200
61
  },
62
  {
63
  "epoch": 1.46,
64
- "eval_cer": 0.27407265039400347,
65
- "eval_loss": 0.9184823632240295,
66
- "eval_runtime": 39.7128,
67
- "eval_samples_per_second": 55.927,
68
- "eval_steps_per_second": 7.0,
69
  "step": 1200
70
  },
71
  {
72
  "epoch": 1.83,
73
- "learning_rate": 2.470708099847173e-05,
74
- "loss": 0.8532,
75
  "step": 1500
76
  },
77
  {
78
  "epoch": 1.83,
79
- "eval_cer": 0.28651739381126273,
80
- "eval_loss": 0.8993165493011475,
81
- "eval_runtime": 39.7812,
82
- "eval_samples_per_second": 55.83,
83
- "eval_steps_per_second": 6.988,
84
  "step": 1500
85
  },
86
  {
87
  "epoch": 2.19,
88
- "learning_rate": 1.7065715741212433e-05,
89
- "loss": 0.7554,
90
  "step": 1800
91
  },
92
  {
93
  "epoch": 2.19,
94
- "eval_cer": 0.2535075917739765,
95
- "eval_loss": 0.8039466738700867,
96
- "eval_runtime": 39.8849,
97
- "eval_samples_per_second": 55.685,
98
- "eval_steps_per_second": 6.97,
99
  "step": 1800
100
  },
101
  {
102
  "epoch": 2.56,
103
- "learning_rate": 9.424350483953132e-06,
104
- "loss": 0.6747,
105
  "step": 2100
106
  },
107
  {
108
  "epoch": 2.56,
109
- "eval_cer": 0.24255237363059773,
110
- "eval_loss": 0.7663877606391907,
111
- "eval_runtime": 39.7233,
112
- "eval_samples_per_second": 55.912,
113
- "eval_steps_per_second": 6.998,
114
  "step": 2100
115
  },
116
  {
117
  "epoch": 2.92,
118
- "learning_rate": 1.7829852266938361e-06,
119
- "loss": 0.6314,
120
  "step": 2400
121
  },
122
  {
123
  "epoch": 2.92,
124
- "eval_cer": 0.24130309436863348,
125
- "eval_loss": 0.7546484470367432,
126
- "eval_runtime": 39.7215,
127
- "eval_samples_per_second": 55.914,
128
- "eval_steps_per_second": 6.999,
129
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ],
132
  "logging_steps": 300,
133
- "max_steps": 2463,
134
  "num_input_tokens_seen": 0,
135
- "num_train_epochs": 3,
136
  "save_steps": 600,
137
- "total_flos": 3.771491125288823e+19,
138
  "train_batch_size": 4,
139
  "trial_name": null,
140
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.680164252309797,
5
  "eval_steps": 300,
6
+ "global_step": 9600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.37,
13
+ "learning_rate": 5.96e-05,
14
+ "loss": 14.3929,
15
  "step": 300
16
  },
17
  {
18
  "epoch": 0.37,
19
+ "eval_cer": 0.9869306169517587,
20
+ "eval_loss": 7.0630784034729,
21
+ "eval_runtime": 45.4918,
22
+ "eval_samples_per_second": 48.822,
23
+ "eval_steps_per_second": 6.111,
24
  "step": 300
25
  },
26
  {
27
  "epoch": 0.73,
28
+ "learning_rate": 9.895209580838324e-05,
29
+ "loss": 8.6544,
30
  "step": 600
31
  },
32
  {
33
  "epoch": 0.73,
34
+ "eval_cer": 0.8301460695752451,
35
+ "eval_loss": 5.885794639587402,
36
+ "eval_runtime": 39.1068,
37
+ "eval_samples_per_second": 56.793,
38
+ "eval_steps_per_second": 7.109,
39
  "step": 600
40
  },
41
  {
42
  "epoch": 1.1,
43
+ "learning_rate": 9.575491873396066e-05,
44
+ "loss": 4.2963,
45
  "step": 900
46
  },
47
  {
48
  "epoch": 1.1,
49
+ "eval_cer": 0.5212857966557756,
50
+ "eval_loss": 2.0850701332092285,
51
+ "eval_runtime": 39.3849,
52
+ "eval_samples_per_second": 56.392,
53
+ "eval_steps_per_second": 7.059,
54
  "step": 900
55
  },
56
  {
57
  "epoch": 1.46,
58
+ "learning_rate": 9.256843455945254e-05,
59
+ "loss": 2.416,
60
  "step": 1200
61
  },
62
  {
63
  "epoch": 1.46,
64
+ "eval_cer": 0.42461080146069574,
65
+ "eval_loss": 1.6146297454833984,
66
+ "eval_runtime": 39.2918,
67
+ "eval_samples_per_second": 56.526,
68
+ "eval_steps_per_second": 7.075,
69
  "step": 1200
70
  },
71
  {
72
  "epoch": 1.83,
73
+ "learning_rate": 8.936056458511549e-05,
74
+ "loss": 2.0879,
75
  "step": 1500
76
  },
77
  {
78
  "epoch": 1.83,
79
+ "eval_cer": 0.38155871612531234,
80
+ "eval_loss": 1.3970342874526978,
81
+ "eval_runtime": 39.3238,
82
+ "eval_samples_per_second": 56.48,
83
+ "eval_steps_per_second": 7.07,
84
  "step": 1500
85
  },
86
  {
87
  "epoch": 2.19,
88
+ "learning_rate": 8.615269461077845e-05,
89
+ "loss": 1.8739,
90
  "step": 1800
91
  },
92
  {
93
  "epoch": 2.19,
94
+ "eval_cer": 0.3361522198731501,
95
+ "eval_loss": 1.259849190711975,
96
+ "eval_runtime": 39.8822,
97
+ "eval_samples_per_second": 55.689,
98
+ "eval_steps_per_second": 6.971,
99
  "step": 1800
100
  },
101
  {
102
  "epoch": 2.56,
103
+ "learning_rate": 8.29448246364414e-05,
104
+ "loss": 1.7464,
105
  "step": 2100
106
  },
107
  {
108
  "epoch": 2.56,
109
+ "eval_cer": 0.3469632904093792,
110
+ "eval_loss": 1.1771619319915771,
111
+ "eval_runtime": 39.4164,
112
+ "eval_samples_per_second": 56.347,
113
+ "eval_steps_per_second": 7.053,
114
  "step": 2100
115
  },
116
  {
117
  "epoch": 2.92,
118
+ "learning_rate": 7.973695466210437e-05,
119
+ "loss": 1.7025,
120
  "step": 2400
121
  },
122
  {
123
  "epoch": 2.92,
124
+ "eval_cer": 0.31635594849125503,
125
+ "eval_loss": 1.1029127836227417,
126
+ "eval_runtime": 39.436,
127
+ "eval_samples_per_second": 56.319,
128
+ "eval_steps_per_second": 7.049,
129
  "step": 2400
130
+ },
131
+ {
132
+ "epoch": 3.29,
133
+ "learning_rate": 7.652908468776732e-05,
134
+ "loss": 1.5982,
135
+ "step": 2700
136
+ },
137
+ {
138
+ "epoch": 3.29,
139
+ "eval_cer": 0.3095329617528349,
140
+ "eval_loss": 1.049662709236145,
141
+ "eval_runtime": 39.3386,
142
+ "eval_samples_per_second": 56.459,
143
+ "eval_steps_per_second": 7.067,
144
+ "step": 2700
145
+ },
146
+ {
147
+ "epoch": 3.65,
148
+ "learning_rate": 7.332121471343029e-05,
149
+ "loss": 1.5632,
150
+ "step": 3000
151
+ },
152
+ {
153
+ "epoch": 3.65,
154
+ "eval_cer": 0.30030751489525276,
155
+ "eval_loss": 1.013907790184021,
156
+ "eval_runtime": 39.4665,
157
+ "eval_samples_per_second": 56.276,
158
+ "eval_steps_per_second": 7.044,
159
+ "step": 3000
160
+ },
161
+ {
162
+ "epoch": 4.02,
163
+ "learning_rate": 7.011334473909324e-05,
164
+ "loss": 1.5063,
165
+ "step": 3300
166
+ },
167
+ {
168
+ "epoch": 4.02,
169
+ "eval_cer": 0.28858350951374206,
170
+ "eval_loss": 0.9488086104393005,
171
+ "eval_runtime": 39.4805,
172
+ "eval_samples_per_second": 56.256,
173
+ "eval_steps_per_second": 7.041,
174
+ "step": 3300
175
+ },
176
+ {
177
+ "epoch": 4.38,
178
+ "learning_rate": 6.69054747647562e-05,
179
+ "loss": 1.4507,
180
+ "step": 3600
181
+ },
182
+ {
183
+ "epoch": 4.38,
184
+ "eval_cer": 0.272583125120123,
185
+ "eval_loss": 0.9192214012145996,
186
+ "eval_runtime": 39.3363,
187
+ "eval_samples_per_second": 56.462,
188
+ "eval_steps_per_second": 7.067,
189
+ "step": 3600
190
+ },
191
+ {
192
+ "epoch": 4.75,
193
+ "learning_rate": 6.369760479041916e-05,
194
+ "loss": 1.4029,
195
+ "step": 3900
196
+ },
197
+ {
198
+ "epoch": 4.75,
199
+ "eval_cer": 0.2764751105131655,
200
+ "eval_loss": 0.9218717217445374,
201
+ "eval_runtime": 39.3534,
202
+ "eval_samples_per_second": 56.437,
203
+ "eval_steps_per_second": 7.064,
204
+ "step": 3900
205
+ },
206
+ {
207
+ "epoch": 5.11,
208
+ "learning_rate": 6.050042771599658e-05,
209
+ "loss": 1.3126,
210
+ "step": 4200
211
+ },
212
+ {
213
+ "epoch": 5.11,
214
+ "eval_cer": 0.27450509321545263,
215
+ "eval_loss": 0.9048557281494141,
216
+ "eval_runtime": 39.4768,
217
+ "eval_samples_per_second": 56.261,
218
+ "eval_steps_per_second": 7.042,
219
+ "step": 4200
220
+ },
221
+ {
222
+ "epoch": 5.48,
223
+ "learning_rate": 5.729255774165954e-05,
224
+ "loss": 1.3053,
225
+ "step": 4500
226
+ },
227
+ {
228
+ "epoch": 5.48,
229
+ "eval_cer": 0.2652315971554872,
230
+ "eval_loss": 0.8531870245933533,
231
+ "eval_runtime": 39.4442,
232
+ "eval_samples_per_second": 56.307,
233
+ "eval_steps_per_second": 7.048,
234
+ "step": 4500
235
+ },
236
+ {
237
+ "epoch": 5.84,
238
+ "learning_rate": 5.40846877673225e-05,
239
+ "loss": 1.2989,
240
+ "step": 4800
241
+ },
242
+ {
243
+ "epoch": 5.84,
244
+ "eval_cer": 0.24903901595233519,
245
+ "eval_loss": 0.8212350010871887,
246
+ "eval_runtime": 39.4345,
247
+ "eval_samples_per_second": 56.321,
248
+ "eval_steps_per_second": 7.05,
249
+ "step": 4800
250
+ },
251
+ {
252
+ "epoch": 6.21,
253
+ "learning_rate": 5.087681779298546e-05,
254
+ "loss": 1.2403,
255
+ "step": 5100
256
+ },
257
+ {
258
+ "epoch": 6.21,
259
+ "eval_cer": 0.26124351335767826,
260
+ "eval_loss": 0.8196715116500854,
261
+ "eval_runtime": 39.3565,
262
+ "eval_samples_per_second": 56.433,
263
+ "eval_steps_per_second": 7.064,
264
+ "step": 5100
265
+ },
266
+ {
267
+ "epoch": 6.57,
268
+ "learning_rate": 4.766894781864842e-05,
269
+ "loss": 1.1903,
270
+ "step": 5400
271
+ },
272
+ {
273
+ "epoch": 6.57,
274
+ "eval_cer": 0.2607630213338459,
275
+ "eval_loss": 0.8173399567604065,
276
+ "eval_runtime": 39.5209,
277
+ "eval_samples_per_second": 56.198,
278
+ "eval_steps_per_second": 7.034,
279
+ "step": 5400
280
+ },
281
+ {
282
+ "epoch": 6.94,
283
+ "learning_rate": 4.446107784431138e-05,
284
+ "loss": 1.2313,
285
+ "step": 5700
286
+ },
287
+ {
288
+ "epoch": 6.94,
289
+ "eval_cer": 0.24995195079761676,
290
+ "eval_loss": 0.8241677284240723,
291
+ "eval_runtime": 39.4127,
292
+ "eval_samples_per_second": 56.352,
293
+ "eval_steps_per_second": 7.054,
294
+ "step": 5700
295
+ },
296
+ {
297
+ "epoch": 7.3,
298
+ "learning_rate": 4.125320786997434e-05,
299
+ "loss": 1.1554,
300
+ "step": 6000
301
+ },
302
+ {
303
+ "epoch": 7.3,
304
+ "eval_cer": 0.24529117816644244,
305
+ "eval_loss": 0.7795117497444153,
306
+ "eval_runtime": 39.3516,
307
+ "eval_samples_per_second": 56.44,
308
+ "eval_steps_per_second": 7.065,
309
+ "step": 6000
310
+ },
311
+ {
312
+ "epoch": 7.67,
313
+ "learning_rate": 3.80453378956373e-05,
314
+ "loss": 1.1243,
315
+ "step": 6300
316
+ },
317
+ {
318
+ "epoch": 7.67,
319
+ "eval_cer": 0.2526427061310782,
320
+ "eval_loss": 0.782616913318634,
321
+ "eval_runtime": 39.456,
322
+ "eval_samples_per_second": 56.291,
323
+ "eval_steps_per_second": 7.046,
324
+ "step": 6300
325
+ },
326
+ {
327
+ "epoch": 8.03,
328
+ "learning_rate": 3.483746792130026e-05,
329
+ "loss": 1.099,
330
+ "step": 6600
331
+ },
332
+ {
333
+ "epoch": 8.03,
334
+ "eval_cer": 0.23010763021333847,
335
+ "eval_loss": 0.7462431192398071,
336
+ "eval_runtime": 39.3849,
337
+ "eval_samples_per_second": 56.392,
338
+ "eval_steps_per_second": 7.059,
339
+ "step": 6600
340
+ },
341
+ {
342
+ "epoch": 8.4,
343
+ "learning_rate": 3.1629597946963216e-05,
344
+ "loss": 1.0777,
345
+ "step": 6900
346
+ },
347
+ {
348
+ "epoch": 8.4,
349
+ "eval_cer": 0.22544685758216412,
350
+ "eval_loss": 0.7633857131004333,
351
+ "eval_runtime": 39.2653,
352
+ "eval_samples_per_second": 56.564,
353
+ "eval_steps_per_second": 7.08,
354
+ "step": 6900
355
+ },
356
+ {
357
+ "epoch": 8.76,
358
+ "learning_rate": 2.8421727972626178e-05,
359
+ "loss": 1.0901,
360
+ "step": 7200
361
+ },
362
+ {
363
+ "epoch": 8.76,
364
+ "eval_cer": 0.23880453584470498,
365
+ "eval_loss": 0.7462579607963562,
366
+ "eval_runtime": 39.2693,
367
+ "eval_samples_per_second": 56.558,
368
+ "eval_steps_per_second": 7.079,
369
+ "step": 7200
370
+ },
371
+ {
372
+ "epoch": 9.13,
373
+ "learning_rate": 2.5224550898203592e-05,
374
+ "loss": 1.0049,
375
+ "step": 7500
376
+ },
377
+ {
378
+ "epoch": 9.13,
379
+ "eval_cer": 0.22155487218912165,
380
+ "eval_loss": 0.7342504858970642,
381
+ "eval_runtime": 39.3738,
382
+ "eval_samples_per_second": 56.408,
383
+ "eval_steps_per_second": 7.061,
384
+ "step": 7500
385
+ },
386
+ {
387
+ "epoch": 9.49,
388
+ "learning_rate": 2.2016680923866555e-05,
389
+ "loss": 1.0011,
390
+ "step": 7800
391
+ },
392
+ {
393
+ "epoch": 9.49,
394
+ "eval_cer": 0.22674418604651161,
395
+ "eval_loss": 0.7101256251335144,
396
+ "eval_runtime": 39.3531,
397
+ "eval_samples_per_second": 56.438,
398
+ "eval_steps_per_second": 7.064,
399
+ "step": 7800
400
+ },
401
+ {
402
+ "epoch": 9.86,
403
+ "learning_rate": 1.8808810949529513e-05,
404
+ "loss": 1.0084,
405
+ "step": 8100
406
+ },
407
+ {
408
+ "epoch": 9.86,
409
+ "eval_cer": 0.22189121660580435,
410
+ "eval_loss": 0.698137640953064,
411
+ "eval_runtime": 39.3187,
412
+ "eval_samples_per_second": 56.487,
413
+ "eval_steps_per_second": 7.07,
414
+ "step": 8100
415
+ },
416
+ {
417
+ "epoch": 10.22,
418
+ "learning_rate": 1.5600940975192476e-05,
419
+ "loss": 0.9547,
420
+ "step": 8400
421
+ },
422
+ {
423
+ "epoch": 10.22,
424
+ "eval_cer": 0.22222756102248703,
425
+ "eval_loss": 0.7049764394760132,
426
+ "eval_runtime": 39.2625,
427
+ "eval_samples_per_second": 56.568,
428
+ "eval_steps_per_second": 7.081,
429
+ "step": 8400
430
+ },
431
+ {
432
+ "epoch": 10.59,
433
+ "learning_rate": 1.2393071000855433e-05,
434
+ "loss": 0.9304,
435
+ "step": 8700
436
+ },
437
+ {
438
+ "epoch": 10.59,
439
+ "eval_cer": 0.2266961368441284,
440
+ "eval_loss": 0.6863571405410767,
441
+ "eval_runtime": 39.4555,
442
+ "eval_samples_per_second": 56.291,
443
+ "eval_steps_per_second": 7.046,
444
+ "step": 8700
445
+ },
446
+ {
447
+ "epoch": 10.95,
448
+ "learning_rate": 9.185201026518392e-06,
449
+ "loss": 0.9044,
450
+ "step": 9000
451
+ },
452
+ {
453
+ "epoch": 10.95,
454
+ "eval_cer": 0.2206419373438401,
455
+ "eval_loss": 0.6961002945899963,
456
+ "eval_runtime": 39.4203,
457
+ "eval_samples_per_second": 56.341,
458
+ "eval_steps_per_second": 7.052,
459
+ "step": 9000
460
+ },
461
+ {
462
+ "epoch": 11.32,
463
+ "learning_rate": 5.977331052181352e-06,
464
+ "loss": 0.9054,
465
+ "step": 9300
466
+ },
467
+ {
468
+ "epoch": 11.32,
469
+ "eval_cer": 0.21146453968864118,
470
+ "eval_loss": 0.6892764568328857,
471
+ "eval_runtime": 39.4357,
472
+ "eval_samples_per_second": 56.32,
473
+ "eval_steps_per_second": 7.049,
474
+ "step": 9300
475
+ },
476
+ {
477
+ "epoch": 11.68,
478
+ "learning_rate": 2.7694610778443115e-06,
479
+ "loss": 0.9067,
480
+ "step": 9600
481
+ },
482
+ {
483
+ "epoch": 11.68,
484
+ "eval_cer": 0.21718239477224677,
485
+ "eval_loss": 0.682004451751709,
486
+ "eval_runtime": 39.4317,
487
+ "eval_samples_per_second": 56.325,
488
+ "eval_steps_per_second": 7.05,
489
+ "step": 9600
490
  }
491
  ],
492
  "logging_steps": 300,
493
+ "max_steps": 9852,
494
  "num_input_tokens_seen": 0,
495
+ "num_train_epochs": 12,
496
  "save_steps": 600,
497
+ "total_flos": 1.5092741663533305e+20,
498
  "train_batch_size": 4,
499
  "trial_name": null,
500
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d21e22c8e12912abbc958057ead57a442298a5529aa3c4095360b661c1355422
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b776f2bd0d7f5772735369f9baf92b4b5776a2e075f0b57ff591c191b5c0666
3
  size 4664