alvanli commited on
Commit
8015d27
1 Parent(s): a5970fc
checkpoint-24600/config.json DELETED
@@ -1,82 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/w2v-bert-2.0",
3
- "activation_dropout": 0.0,
4
- "adapter_act": "relu",
5
- "adapter_kernel_size": 3,
6
- "adapter_stride": 2,
7
- "add_adapter": true,
8
- "apply_spec_augment": false,
9
- "architectures": [
10
- "Wav2Vec2BertForCTC"
11
- ],
12
- "attention_dropout": 0.0,
13
- "bos_token_id": 1,
14
- "classifier_proj_size": 768,
15
- "codevector_dim": 768,
16
- "conformer_conv_dropout": 0.1,
17
- "contrastive_logits_temperature": 0.1,
18
- "conv_depthwise_kernel_size": 31,
19
- "ctc_loss_reduction": "mean",
20
- "ctc_zero_infinity": true,
21
- "diversity_loss_weight": 0.1,
22
- "eos_token_id": 2,
23
- "feat_proj_dropout": 0.1,
24
- "feat_quantizer_dropout": 0.0,
25
- "feature_projection_input_dim": 160,
26
- "final_dropout": 0.1,
27
- "hidden_act": "swish",
28
- "hidden_dropout": 0.0,
29
- "hidden_size": 1024,
30
- "initializer_range": 0.02,
31
- "intermediate_size": 4096,
32
- "layer_norm_eps": 1e-05,
33
- "layerdrop": 0.1,
34
- "left_max_position_embeddings": 64,
35
- "mask_feature_length": 10,
36
- "mask_feature_min_masks": 0,
37
- "mask_feature_prob": 0.0,
38
- "mask_time_length": 10,
39
- "mask_time_min_masks": 2,
40
- "mask_time_prob": 0.1,
41
- "max_source_positions": 5000,
42
- "model_type": "wav2vec2-bert",
43
- "num_adapter_layers": 1,
44
- "num_attention_heads": 16,
45
- "num_codevector_groups": 2,
46
- "num_codevectors_per_group": 320,
47
- "num_hidden_layers": 24,
48
- "num_negatives": 100,
49
- "output_hidden_size": 1024,
50
- "pad_token_id": 2696,
51
- "position_embeddings_type": "relative_key",
52
- "proj_codevector_dim": 768,
53
- "right_max_position_embeddings": 8,
54
- "rotary_embedding_base": 10000,
55
- "tdnn_dilation": [
56
- 1,
57
- 2,
58
- 3,
59
- 1,
60
- 1
61
- ],
62
- "tdnn_dim": [
63
- 512,
64
- 512,
65
- 512,
66
- 512,
67
- 1500
68
- ],
69
- "tdnn_kernel": [
70
- 5,
71
- 3,
72
- 3,
73
- 1,
74
- 1
75
- ],
76
- "torch_dtype": "float32",
77
- "transformers_version": "4.37.1",
78
- "use_intermediate_ffn_before_adapter": false,
79
- "use_weighted_layer_sum": false,
80
- "vocab_size": 2699,
81
- "xvector_output_dim": 512
82
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-24600/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce93fae4806d914ed61bbca586978bf9283d32c224193981a18d91a1fe44316d
3
- size 2433884676
 
 
 
 
checkpoint-24600/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95740ac80d1a253ebeafa74c918c2e7a75f0cf1eafce8805c28299cabfb13ee8
3
- size 4868221674
 
 
 
 
checkpoint-24600/preprocessor_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
- "feature_size": 80,
4
- "num_mel_bins": 80,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000,
9
- "stride": 2
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-24600/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb34f676888fd9f6b3eaa3be33a23eae3ac676eb3a085d0292f21810095a2fca
3
- size 14308
 
 
 
 
checkpoint-24600/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebfe7655856e7efb8f9907ba4e84bf10035582fb91bca4a965f55aa11dd46718
3
- size 1064
 
 
 
 
checkpoint-24600/trainer_state.json DELETED
@@ -1,1251 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 29.930420896543858,
5
- "eval_steps": 300,
6
- "global_step": 24600,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.37,
13
- "learning_rate": 9.900000000000002e-06,
14
- "loss": 19.7382,
15
- "step": 300
16
- },
17
- {
18
- "epoch": 0.37,
19
- "eval_cer": 1.0,
20
- "eval_loss": 6.821648120880127,
21
- "eval_runtime": 52.3193,
22
- "eval_samples_per_second": 42.451,
23
- "eval_steps_per_second": 5.314,
24
- "step": 300
25
- },
26
- {
27
- "epoch": 0.73,
28
- "learning_rate": 1.9900000000000003e-05,
29
- "loss": 9.8181,
30
- "step": 600
31
- },
32
- {
33
- "epoch": 0.73,
34
- "eval_cer": 1.0,
35
- "eval_loss": 6.651111602783203,
36
- "eval_runtime": 43.577,
37
- "eval_samples_per_second": 50.967,
38
- "eval_steps_per_second": 6.38,
39
- "step": 600
40
- },
41
- {
42
- "epoch": 1.1,
43
- "learning_rate": 2.9900000000000002e-05,
44
- "loss": 9.5191,
45
- "step": 900
46
- },
47
- {
48
- "epoch": 1.1,
49
- "eval_cer": 0.9806448402826152,
50
- "eval_loss": 6.58424186706543,
51
- "eval_runtime": 43.1655,
52
- "eval_samples_per_second": 51.453,
53
- "eval_steps_per_second": 6.44,
54
- "step": 900
55
- },
56
- {
57
- "epoch": 1.46,
58
- "learning_rate": 3.99e-05,
59
- "loss": 8.6238,
60
- "step": 1200
61
- },
62
- {
63
- "epoch": 1.46,
64
- "eval_cer": 0.9216837496268285,
65
- "eval_loss": 6.142301082611084,
66
- "eval_runtime": 42.9764,
67
- "eval_samples_per_second": 51.68,
68
- "eval_steps_per_second": 6.469,
69
- "step": 1200
70
- },
71
- {
72
- "epoch": 1.83,
73
- "learning_rate": 4.99e-05,
74
- "loss": 6.883,
75
- "step": 1500
76
- },
77
- {
78
- "epoch": 1.83,
79
- "eval_cer": 0.850830928450592,
80
- "eval_loss": 3.596842050552368,
81
- "eval_runtime": 42.8348,
82
- "eval_samples_per_second": 51.85,
83
- "eval_steps_per_second": 6.49,
84
- "step": 1500
85
- },
86
- {
87
- "epoch": 2.19,
88
- "learning_rate": 4.93579766536965e-05,
89
- "loss": 4.0838,
90
- "step": 1800
91
- },
92
- {
93
- "epoch": 2.19,
94
- "eval_cer": 0.48343118718280426,
95
- "eval_loss": 2.5516390800476074,
96
- "eval_runtime": 42.9354,
97
- "eval_samples_per_second": 51.729,
98
- "eval_steps_per_second": 6.475,
99
- "step": 1800
100
- },
101
- {
102
- "epoch": 2.56,
103
- "learning_rate": 4.87094682230869e-05,
104
- "loss": 3.167,
105
- "step": 2100
106
- },
107
- {
108
- "epoch": 2.56,
109
- "eval_cer": 0.4450691611105583,
110
- "eval_loss": 2.2739391326904297,
111
- "eval_runtime": 42.8894,
112
- "eval_samples_per_second": 51.784,
113
- "eval_steps_per_second": 6.482,
114
- "step": 2100
115
- },
116
- {
117
- "epoch": 2.92,
118
- "learning_rate": 4.806312148724601e-05,
119
- "loss": 2.826,
120
- "step": 2400
121
- },
122
- {
123
- "epoch": 2.92,
124
- "eval_cer": 0.4178525226390686,
125
- "eval_loss": 2.0223917961120605,
126
- "eval_runtime": 42.9677,
127
- "eval_samples_per_second": 51.69,
128
- "eval_steps_per_second": 6.47,
129
- "step": 2400
130
- },
131
- {
132
- "epoch": 3.29,
133
- "learning_rate": 4.7414613056636405e-05,
134
- "loss": 2.6955,
135
- "step": 2700
136
- },
137
- {
138
- "epoch": 3.29,
139
- "eval_cer": 0.4174544730818987,
140
- "eval_loss": 1.9600275754928589,
141
- "eval_runtime": 42.8412,
142
- "eval_samples_per_second": 51.843,
143
- "eval_steps_per_second": 6.489,
144
- "step": 2700
145
- },
146
- {
147
- "epoch": 3.65,
148
- "learning_rate": 4.676610462602681e-05,
149
- "loss": 2.5812,
150
- "step": 3000
151
- },
152
- {
153
- "epoch": 3.65,
154
- "eval_cer": 0.40994128769031746,
155
- "eval_loss": 1.769142985343933,
156
- "eval_runtime": 42.9361,
157
- "eval_samples_per_second": 51.728,
158
- "eval_steps_per_second": 6.475,
159
- "step": 3000
160
- },
161
- {
162
- "epoch": 4.02,
163
- "learning_rate": 4.611975789018591e-05,
164
- "loss": 2.4952,
165
- "step": 3300
166
- },
167
- {
168
- "epoch": 4.02,
169
- "eval_cer": 0.4013832222111653,
170
- "eval_loss": 1.8323670625686646,
171
- "eval_runtime": 42.7115,
172
- "eval_samples_per_second": 52.0,
173
- "eval_steps_per_second": 6.509,
174
- "step": 3300
175
- },
176
- {
177
- "epoch": 4.38,
178
- "learning_rate": 4.547124945957631e-05,
179
- "loss": 2.3938,
180
- "step": 3600
181
- },
182
- {
183
- "epoch": 4.38,
184
- "eval_cer": 0.37799781072743555,
185
- "eval_loss": 1.7351980209350586,
186
- "eval_runtime": 42.7436,
187
- "eval_samples_per_second": 51.961,
188
- "eval_steps_per_second": 6.504,
189
- "step": 3600
190
- },
191
- {
192
- "epoch": 4.75,
193
- "learning_rate": 4.482490272373541e-05,
194
- "loss": 2.3584,
195
- "step": 3900
196
- },
197
- {
198
- "epoch": 4.75,
199
- "eval_cer": 0.3678475470196039,
200
- "eval_loss": 1.64540696144104,
201
- "eval_runtime": 42.7445,
202
- "eval_samples_per_second": 51.96,
203
- "eval_steps_per_second": 6.504,
204
- "step": 3900
205
- },
206
- {
207
- "epoch": 5.11,
208
- "learning_rate": 4.417639429312581e-05,
209
- "loss": 2.325,
210
- "step": 4200
211
- },
212
- {
213
- "epoch": 5.11,
214
- "eval_cer": 0.38352074833316746,
215
- "eval_loss": 1.6946874856948853,
216
- "eval_runtime": 42.4882,
217
- "eval_samples_per_second": 52.273,
218
- "eval_steps_per_second": 6.543,
219
- "step": 4200
220
- },
221
- {
222
- "epoch": 5.48,
223
- "learning_rate": 4.3527885862516214e-05,
224
- "loss": 2.2454,
225
- "step": 4500
226
- },
227
- {
228
- "epoch": 5.48,
229
- "eval_cer": 0.34078017713205294,
230
- "eval_loss": 1.5765234231948853,
231
- "eval_runtime": 42.1243,
232
- "eval_samples_per_second": 52.725,
233
- "eval_steps_per_second": 6.6,
234
- "step": 4500
235
- },
236
- {
237
- "epoch": 5.84,
238
- "learning_rate": 4.287937743190661e-05,
239
- "loss": 2.1954,
240
- "step": 4800
241
- },
242
- {
243
- "epoch": 5.84,
244
- "eval_cer": 0.37093243108767043,
245
- "eval_loss": 1.603211760520935,
246
- "eval_runtime": 42.6116,
247
- "eval_samples_per_second": 52.122,
248
- "eval_steps_per_second": 6.524,
249
- "step": 4800
250
- },
251
- {
252
- "epoch": 6.21,
253
- "learning_rate": 4.223086900129702e-05,
254
- "loss": 2.1492,
255
- "step": 5100
256
- },
257
- {
258
- "epoch": 6.21,
259
- "eval_cer": 0.3447606727037516,
260
- "eval_loss": 1.6078611612319946,
261
- "eval_runtime": 42.9188,
262
- "eval_samples_per_second": 51.749,
263
- "eval_steps_per_second": 6.477,
264
- "step": 5100
265
- },
266
- {
267
- "epoch": 6.57,
268
- "learning_rate": 4.1582360570687426e-05,
269
- "loss": 2.1655,
270
- "step": 5400
271
- },
272
- {
273
- "epoch": 6.57,
274
- "eval_cer": 0.33555577669419845,
275
- "eval_loss": 1.4955742359161377,
276
- "eval_runtime": 42.6136,
277
- "eval_samples_per_second": 52.12,
278
- "eval_steps_per_second": 6.524,
279
- "step": 5400
280
- },
281
- {
282
- "epoch": 6.94,
283
- "learning_rate": 4.093385214007782e-05,
284
- "loss": 2.1393,
285
- "step": 5700
286
- },
287
- {
288
- "epoch": 6.94,
289
- "eval_cer": 0.331625037317146,
290
- "eval_loss": 1.4772567749023438,
291
- "eval_runtime": 42.6929,
292
- "eval_samples_per_second": 52.023,
293
- "eval_steps_per_second": 6.512,
294
- "step": 5700
295
- },
296
- {
297
- "epoch": 7.3,
298
- "learning_rate": 4.028534370946823e-05,
299
- "loss": 2.1027,
300
- "step": 6000
301
- },
302
- {
303
- "epoch": 7.3,
304
- "eval_cer": 0.341427007662454,
305
- "eval_loss": 1.5089548826217651,
306
- "eval_runtime": 42.7699,
307
- "eval_samples_per_second": 51.929,
308
- "eval_steps_per_second": 6.5,
309
- "step": 6000
310
- },
311
- {
312
- "epoch": 7.67,
313
- "learning_rate": 3.9636835278858624e-05,
314
- "loss": 2.0824,
315
- "step": 6300
316
- },
317
- {
318
- "epoch": 7.67,
319
- "eval_cer": 0.34575579659667627,
320
- "eval_loss": 1.5948169231414795,
321
- "eval_runtime": 42.6031,
322
- "eval_samples_per_second": 52.132,
323
- "eval_steps_per_second": 6.525,
324
- "step": 6300
325
- },
326
- {
327
- "epoch": 8.03,
328
- "learning_rate": 3.899048854301773e-05,
329
- "loss": 2.061,
330
- "step": 6600
331
- },
332
- {
333
- "epoch": 8.03,
334
- "eval_cer": 0.35058214747736094,
335
- "eval_loss": 1.4923882484436035,
336
- "eval_runtime": 42.6516,
337
- "eval_samples_per_second": 52.073,
338
- "eval_steps_per_second": 6.518,
339
- "step": 6600
340
- },
341
- {
342
- "epoch": 8.4,
343
- "learning_rate": 3.8341980112408135e-05,
344
- "loss": 2.0212,
345
- "step": 6900
346
- },
347
- {
348
- "epoch": 8.4,
349
- "eval_cer": 0.33247089262613194,
350
- "eval_loss": 1.4590569734573364,
351
- "eval_runtime": 42.5489,
352
- "eval_samples_per_second": 52.199,
353
- "eval_steps_per_second": 6.534,
354
- "step": 6900
355
- },
356
- {
357
- "epoch": 8.76,
358
- "learning_rate": 3.769347168179853e-05,
359
- "loss": 2.0504,
360
- "step": 7200
361
- },
362
- {
363
- "epoch": 8.76,
364
- "eval_cer": 0.3344611404119813,
365
- "eval_loss": 1.4551000595092773,
366
- "eval_runtime": 42.7689,
367
- "eval_samples_per_second": 51.93,
368
- "eval_steps_per_second": 6.5,
369
- "step": 7200
370
- },
371
- {
372
- "epoch": 9.13,
373
- "learning_rate": 3.7044963251188936e-05,
374
- "loss": 2.0113,
375
- "step": 7500
376
- },
377
- {
378
- "epoch": 9.13,
379
- "eval_cer": 0.3344113842173351,
380
- "eval_loss": 1.4068984985351562,
381
- "eval_runtime": 42.6741,
382
- "eval_samples_per_second": 52.046,
383
- "eval_steps_per_second": 6.514,
384
- "step": 7500
385
- },
386
- {
387
- "epoch": 9.49,
388
- "learning_rate": 3.639645482057933e-05,
389
- "loss": 2.0057,
390
- "step": 7800
391
- },
392
- {
393
- "epoch": 9.49,
394
- "eval_cer": 0.3230669718379938,
395
- "eval_loss": 1.443265676498413,
396
- "eval_runtime": 42.7855,
397
- "eval_samples_per_second": 51.91,
398
- "eval_steps_per_second": 6.498,
399
- "step": 7800
400
- },
401
- {
402
- "epoch": 9.86,
403
- "learning_rate": 3.574794638996974e-05,
404
- "loss": 1.9741,
405
- "step": 8100
406
- },
407
- {
408
- "epoch": 9.86,
409
- "eval_cer": 0.3216240421932531,
410
- "eval_loss": 1.426885724067688,
411
- "eval_runtime": 42.8301,
412
- "eval_samples_per_second": 51.856,
413
- "eval_steps_per_second": 6.491,
414
- "step": 8100
415
- },
416
- {
417
- "epoch": 10.22,
418
- "learning_rate": 3.509943795936014e-05,
419
- "loss": 1.936,
420
- "step": 8400
421
- },
422
- {
423
- "epoch": 10.22,
424
- "eval_cer": 0.3227186784754702,
425
- "eval_loss": 1.3612221479415894,
426
- "eval_runtime": 43.0542,
427
- "eval_samples_per_second": 51.586,
428
- "eval_steps_per_second": 6.457,
429
- "step": 8400
430
- },
431
- {
432
- "epoch": 10.59,
433
- "learning_rate": 3.4450929528750544e-05,
434
- "loss": 1.9416,
435
- "step": 8700
436
- },
437
- {
438
- "epoch": 10.59,
439
- "eval_cer": 0.3027664444223306,
440
- "eval_loss": 1.363100290298462,
441
- "eval_runtime": 43.1271,
442
- "eval_samples_per_second": 51.499,
443
- "eval_steps_per_second": 6.446,
444
- "step": 8700
445
- },
446
- {
447
- "epoch": 10.95,
448
- "learning_rate": 3.380242109814095e-05,
449
- "loss": 1.9425,
450
- "step": 9000
451
- },
452
- {
453
- "epoch": 10.95,
454
- "eval_cer": 0.3038610807045477,
455
- "eval_loss": 1.3716000318527222,
456
- "eval_runtime": 43.286,
457
- "eval_samples_per_second": 51.31,
458
- "eval_steps_per_second": 6.422,
459
- "step": 9000
460
- },
461
- {
462
- "epoch": 11.32,
463
- "learning_rate": 3.3153912667531345e-05,
464
- "loss": 1.9351,
465
- "step": 9300
466
- },
467
- {
468
- "epoch": 11.32,
469
- "eval_cer": 0.31490695591601153,
470
- "eval_loss": 1.3932286500930786,
471
- "eval_runtime": 43.3029,
472
- "eval_samples_per_second": 51.29,
473
- "eval_steps_per_second": 6.42,
474
- "step": 9300
475
- },
476
- {
477
- "epoch": 11.68,
478
- "learning_rate": 3.250540423692175e-05,
479
- "loss": 1.9046,
480
- "step": 9600
481
- },
482
- {
483
- "epoch": 11.68,
484
- "eval_cer": 0.3329684545725943,
485
- "eval_loss": 1.4470584392547607,
486
- "eval_runtime": 42.9923,
487
- "eval_samples_per_second": 51.66,
488
- "eval_steps_per_second": 6.466,
489
- "step": 9600
490
- },
491
- {
492
- "epoch": 12.05,
493
- "learning_rate": 3.185905750108085e-05,
494
- "loss": 1.8587,
495
- "step": 9900
496
- },
497
- {
498
- "epoch": 12.05,
499
- "eval_cer": 0.3056523037118121,
500
- "eval_loss": 1.3519924879074097,
501
- "eval_runtime": 42.7004,
502
- "eval_samples_per_second": 52.014,
503
- "eval_steps_per_second": 6.51,
504
- "step": 9900
505
- },
506
- {
507
- "epoch": 12.41,
508
- "learning_rate": 3.1210549070471253e-05,
509
- "loss": 1.8699,
510
- "step": 10200
511
- },
512
- {
513
- "epoch": 12.41,
514
- "eval_cer": 0.3289879590008956,
515
- "eval_loss": 1.4434651136398315,
516
- "eval_runtime": 42.904,
517
- "eval_samples_per_second": 51.767,
518
- "eval_steps_per_second": 6.48,
519
- "step": 10200
520
- },
521
- {
522
- "epoch": 12.78,
523
- "learning_rate": 3.056204063986166e-05,
524
- "loss": 1.8328,
525
- "step": 10500
526
- },
527
- {
528
- "epoch": 12.78,
529
- "eval_cer": 0.31356353866056325,
530
- "eval_loss": 1.361649751663208,
531
- "eval_runtime": 42.7673,
532
- "eval_samples_per_second": 51.932,
533
- "eval_steps_per_second": 6.5,
534
- "step": 10500
535
- },
536
- {
537
- "epoch": 13.14,
538
- "learning_rate": 2.9913532209252054e-05,
539
- "loss": 1.8136,
540
- "step": 10800
541
- },
542
- {
543
- "epoch": 13.14,
544
- "eval_cer": 0.2943078913324709,
545
- "eval_loss": 1.3512203693389893,
546
- "eval_runtime": 42.5723,
547
- "eval_samples_per_second": 52.17,
548
- "eval_steps_per_second": 6.53,
549
- "step": 10800
550
- },
551
- {
552
- "epoch": 13.51,
553
- "learning_rate": 2.9265023778642458e-05,
554
- "loss": 1.8099,
555
- "step": 11100
556
- },
557
- {
558
- "epoch": 13.51,
559
- "eval_cer": 0.2956513085879192,
560
- "eval_loss": 1.3534834384918213,
561
- "eval_runtime": 42.854,
562
- "eval_samples_per_second": 51.827,
563
- "eval_steps_per_second": 6.487,
564
- "step": 11100
565
- },
566
- {
567
- "epoch": 13.87,
568
- "learning_rate": 2.861651534803286e-05,
569
- "loss": 1.8021,
570
- "step": 11400
571
- },
572
- {
573
- "epoch": 13.87,
574
- "eval_cer": 0.2981888745148771,
575
- "eval_loss": 1.3732918500900269,
576
- "eval_runtime": 42.5792,
577
- "eval_samples_per_second": 52.162,
578
- "eval_steps_per_second": 6.529,
579
- "step": 11400
580
- },
581
- {
582
- "epoch": 14.24,
583
- "learning_rate": 2.7968006917423263e-05,
584
- "loss": 1.7809,
585
- "step": 11700
586
- },
587
- {
588
- "epoch": 14.24,
589
- "eval_cer": 0.30804060105483133,
590
- "eval_loss": 1.3088232278823853,
591
- "eval_runtime": 42.9072,
592
- "eval_samples_per_second": 51.763,
593
- "eval_steps_per_second": 6.479,
594
- "step": 11700
595
- },
596
- {
597
- "epoch": 14.6,
598
- "learning_rate": 2.731949848681366e-05,
599
- "loss": 1.7734,
600
- "step": 12000
601
- },
602
- {
603
- "epoch": 14.6,
604
- "eval_cer": 0.28858592894815405,
605
- "eval_loss": 1.320089340209961,
606
- "eval_runtime": 42.6671,
607
- "eval_samples_per_second": 52.054,
608
- "eval_steps_per_second": 6.516,
609
- "step": 12000
610
- },
611
- {
612
- "epoch": 14.97,
613
- "learning_rate": 2.6670990056204063e-05,
614
- "loss": 1.7646,
615
- "step": 12300
616
- },
617
- {
618
- "epoch": 14.97,
619
- "eval_cer": 0.3268981988257538,
620
- "eval_loss": 1.3471167087554932,
621
- "eval_runtime": 42.7924,
622
- "eval_samples_per_second": 51.902,
623
- "eval_steps_per_second": 6.496,
624
- "step": 12300
625
- },
626
- {
627
- "epoch": 15.33,
628
- "learning_rate": 2.602248162559447e-05,
629
- "loss": 1.733,
630
- "step": 12600
631
- },
632
- {
633
- "epoch": 15.33,
634
- "eval_cer": 0.30321425017414666,
635
- "eval_loss": 1.3437916040420532,
636
- "eval_runtime": 42.7066,
637
- "eval_samples_per_second": 52.006,
638
- "eval_steps_per_second": 6.51,
639
- "step": 12600
640
- },
641
- {
642
- "epoch": 15.7,
643
- "learning_rate": 2.5373973194984868e-05,
644
- "loss": 1.7182,
645
- "step": 12900
646
- },
647
- {
648
- "epoch": 15.7,
649
- "eval_cer": 0.2999800975221415,
650
- "eval_loss": 1.3310909271240234,
651
- "eval_runtime": 42.787,
652
- "eval_samples_per_second": 51.908,
653
- "eval_steps_per_second": 6.497,
654
- "step": 12900
655
- },
656
- {
657
- "epoch": 16.06,
658
- "learning_rate": 2.472546476437527e-05,
659
- "loss": 1.7071,
660
- "step": 13200
661
- },
662
- {
663
- "epoch": 16.06,
664
- "eval_cer": 0.3073937705244303,
665
- "eval_loss": 1.2641910314559937,
666
- "eval_runtime": 42.6973,
667
- "eval_samples_per_second": 52.017,
668
- "eval_steps_per_second": 6.511,
669
- "step": 13200
670
- },
671
- {
672
- "epoch": 16.43,
673
- "learning_rate": 2.4076956333765675e-05,
674
- "loss": 1.7196,
675
- "step": 13500
676
- },
677
- {
678
- "epoch": 16.43,
679
- "eval_cer": 0.2859488506319037,
680
- "eval_loss": 1.2662409543991089,
681
- "eval_runtime": 42.6819,
682
- "eval_samples_per_second": 52.036,
683
- "eval_steps_per_second": 6.513,
684
- "step": 13500
685
- },
686
- {
687
- "epoch": 16.79,
688
- "learning_rate": 2.3428447903156076e-05,
689
- "loss": 1.7264,
690
- "step": 13800
691
- },
692
- {
693
- "epoch": 16.79,
694
- "eval_cer": 0.2878893422231068,
695
- "eval_loss": 1.2460156679153442,
696
- "eval_runtime": 42.7771,
697
- "eval_samples_per_second": 51.92,
698
- "eval_steps_per_second": 6.499,
699
- "step": 13800
700
- },
701
- {
702
- "epoch": 17.16,
703
- "learning_rate": 2.2782101167315176e-05,
704
- "loss": 1.6875,
705
- "step": 14100
706
- },
707
- {
708
- "epoch": 17.16,
709
- "eval_cer": 0.2931137426609613,
710
- "eval_loss": 1.3022774457931519,
711
- "eval_runtime": 42.5345,
712
- "eval_samples_per_second": 52.216,
713
- "eval_steps_per_second": 6.536,
714
- "step": 14100
715
- },
716
- {
717
- "epoch": 17.52,
718
- "learning_rate": 2.2133592736705577e-05,
719
- "loss": 1.6659,
720
- "step": 14400
721
- },
722
- {
723
- "epoch": 17.52,
724
- "eval_cer": 0.2927654492984377,
725
- "eval_loss": 1.32107675075531,
726
- "eval_runtime": 42.821,
727
- "eval_samples_per_second": 51.867,
728
- "eval_steps_per_second": 6.492,
729
- "step": 14400
730
- },
731
- {
732
- "epoch": 17.89,
733
- "learning_rate": 2.148508430609598e-05,
734
- "loss": 1.6694,
735
- "step": 14700
736
- },
737
- {
738
- "epoch": 17.89,
739
- "eval_cer": 0.2882873917802766,
740
- "eval_loss": 1.3291140794754028,
741
- "eval_runtime": 42.7715,
742
- "eval_samples_per_second": 51.927,
743
- "eval_steps_per_second": 6.5,
744
- "step": 14700
745
- },
746
- {
747
- "epoch": 18.25,
748
- "learning_rate": 2.0836575875486384e-05,
749
- "loss": 1.643,
750
- "step": 15000
751
- },
752
- {
753
- "epoch": 18.25,
754
- "eval_cer": 0.294755697084287,
755
- "eval_loss": 1.2615532875061035,
756
- "eval_runtime": 42.8646,
757
- "eval_samples_per_second": 51.814,
758
- "eval_steps_per_second": 6.486,
759
- "step": 15000
760
- },
761
- {
762
- "epoch": 18.62,
763
- "learning_rate": 2.0188067444876785e-05,
764
- "loss": 1.676,
765
- "step": 15300
766
- },
767
- {
768
- "epoch": 18.62,
769
- "eval_cer": 0.2835107970942382,
770
- "eval_loss": 1.2185758352279663,
771
- "eval_runtime": 42.7823,
772
- "eval_samples_per_second": 51.914,
773
- "eval_steps_per_second": 6.498,
774
- "step": 15300
775
- },
776
- {
777
- "epoch": 18.98,
778
- "learning_rate": 1.9539559014267185e-05,
779
- "loss": 1.6397,
780
- "step": 15600
781
- },
782
- {
783
- "epoch": 18.98,
784
- "eval_cer": 0.2810727435565728,
785
- "eval_loss": 1.3059513568878174,
786
- "eval_runtime": 42.9668,
787
- "eval_samples_per_second": 51.691,
788
- "eval_steps_per_second": 6.47,
789
- "step": 15600
790
- },
791
- {
792
- "epoch": 19.35,
793
- "learning_rate": 1.8893212278426286e-05,
794
- "loss": 1.6347,
795
- "step": 15900
796
- },
797
- {
798
- "epoch": 19.35,
799
- "eval_cer": 0.28838690416956914,
800
- "eval_loss": 1.2377227544784546,
801
- "eval_runtime": 42.8094,
802
- "eval_samples_per_second": 51.881,
803
- "eval_steps_per_second": 6.494,
804
- "step": 15900
805
- },
806
- {
807
- "epoch": 19.71,
808
- "learning_rate": 1.824470384781669e-05,
809
- "loss": 1.6328,
810
- "step": 16200
811
- },
812
- {
813
- "epoch": 19.71,
814
- "eval_cer": 0.27599761170265696,
815
- "eval_loss": 1.2721112966537476,
816
- "eval_runtime": 42.6795,
817
- "eval_samples_per_second": 52.039,
818
- "eval_steps_per_second": 6.514,
819
- "step": 16200
820
- },
821
- {
822
- "epoch": 20.08,
823
- "learning_rate": 1.7596195417207094e-05,
824
- "loss": 1.6092,
825
- "step": 16500
826
- },
827
- {
828
- "epoch": 20.08,
829
- "eval_cer": 0.28231664842272863,
830
- "eval_loss": 1.2696741819381714,
831
- "eval_runtime": 42.6768,
832
- "eval_samples_per_second": 52.042,
833
- "eval_steps_per_second": 6.514,
834
- "step": 16500
835
- },
836
- {
837
- "epoch": 20.44,
838
- "learning_rate": 1.6947686986597494e-05,
839
- "loss": 1.5737,
840
- "step": 16800
841
- },
842
- {
843
- "epoch": 20.44,
844
- "eval_cer": 0.28306299134242213,
845
- "eval_loss": 1.2230887413024902,
846
- "eval_runtime": 42.9425,
847
- "eval_samples_per_second": 51.72,
848
- "eval_steps_per_second": 6.474,
849
- "step": 16800
850
- },
851
- {
852
- "epoch": 20.81,
853
- "learning_rate": 1.6299178555987894e-05,
854
- "loss": 1.6166,
855
- "step": 17100
856
- },
857
- {
858
- "epoch": 20.81,
859
- "eval_cer": 0.2663449099412877,
860
- "eval_loss": 1.2277541160583496,
861
- "eval_runtime": 42.813,
862
- "eval_samples_per_second": 51.877,
863
- "eval_steps_per_second": 6.493,
864
- "step": 17100
865
- },
866
- {
867
- "epoch": 21.17,
868
- "learning_rate": 1.56506701253783e-05,
869
- "loss": 1.5964,
870
- "step": 17400
871
- },
872
- {
873
- "epoch": 21.17,
874
- "eval_cer": 0.27355955816499156,
875
- "eval_loss": 1.2313120365142822,
876
- "eval_runtime": 42.7309,
877
- "eval_samples_per_second": 51.976,
878
- "eval_steps_per_second": 6.506,
879
- "step": 17400
880
- },
881
- {
882
- "epoch": 21.54,
883
- "learning_rate": 1.5002161694768699e-05,
884
- "loss": 1.5237,
885
- "step": 17700
886
- },
887
- {
888
- "epoch": 21.54,
889
- "eval_cer": 0.27863469001890734,
890
- "eval_loss": 1.2411593198776245,
891
- "eval_runtime": 42.9368,
892
- "eval_samples_per_second": 51.727,
893
- "eval_steps_per_second": 6.475,
894
- "step": 17700
895
- },
896
- {
897
- "epoch": 21.9,
898
- "learning_rate": 1.4353653264159101e-05,
899
- "loss": 1.5419,
900
- "step": 18000
901
- },
902
- {
903
- "epoch": 21.9,
904
- "eval_cer": 0.28634690018907355,
905
- "eval_loss": 1.2718561887741089,
906
- "eval_runtime": 42.5781,
907
- "eval_samples_per_second": 52.163,
908
- "eval_steps_per_second": 6.529,
909
- "step": 18000
910
- },
911
- {
912
- "epoch": 22.27,
913
- "learning_rate": 1.3705144833549505e-05,
914
- "loss": 1.5654,
915
- "step": 18300
916
- },
917
- {
918
- "epoch": 22.27,
919
- "eval_cer": 0.26734003383421234,
920
- "eval_loss": 1.2373576164245605,
921
- "eval_runtime": 42.5574,
922
- "eval_samples_per_second": 52.188,
923
- "eval_steps_per_second": 6.532,
924
- "step": 18300
925
- },
926
- {
927
- "epoch": 22.63,
928
- "learning_rate": 1.3060959792477304e-05,
929
- "loss": 1.5331,
930
- "step": 18600
931
- },
932
- {
933
- "epoch": 22.63,
934
- "eval_cer": 0.2708727236540949,
935
- "eval_loss": 1.197614073753357,
936
- "eval_runtime": 42.6921,
937
- "eval_samples_per_second": 52.024,
938
- "eval_steps_per_second": 6.512,
939
- "step": 18600
940
- },
941
- {
942
- "epoch": 23.0,
943
- "learning_rate": 1.2412451361867706e-05,
944
- "loss": 1.5378,
945
- "step": 18900
946
- },
947
- {
948
- "epoch": 23.0,
949
- "eval_cer": 0.26838491392178326,
950
- "eval_loss": 1.1672557592391968,
951
- "eval_runtime": 42.9497,
952
- "eval_samples_per_second": 51.712,
953
- "eval_steps_per_second": 6.473,
954
- "step": 18900
955
- },
956
- {
957
- "epoch": 23.36,
958
- "learning_rate": 1.1763942931258106e-05,
959
- "loss": 1.4972,
960
- "step": 19200
961
- },
962
- {
963
- "epoch": 23.36,
964
- "eval_cer": 0.26938003781470793,
965
- "eval_loss": 1.1548832654953003,
966
- "eval_runtime": 42.5425,
967
- "eval_samples_per_second": 52.207,
968
- "eval_steps_per_second": 6.535,
969
- "step": 19200
970
- },
971
- {
972
- "epoch": 23.73,
973
- "learning_rate": 1.1115434500648508e-05,
974
- "loss": 1.5112,
975
- "step": 19500
976
- },
977
- {
978
- "epoch": 23.73,
979
- "eval_cer": 0.2684844263110757,
980
- "eval_loss": 1.2580962181091309,
981
- "eval_runtime": 43.0178,
982
- "eval_samples_per_second": 51.63,
983
- "eval_steps_per_second": 6.462,
984
- "step": 19500
985
- },
986
- {
987
- "epoch": 24.09,
988
- "learning_rate": 1.046692607003891e-05,
989
- "loss": 1.5026,
990
- "step": 19800
991
- },
992
- {
993
- "epoch": 24.09,
994
- "eval_cer": 0.26957906259329284,
995
- "eval_loss": 1.2475780248641968,
996
- "eval_runtime": 42.8521,
997
- "eval_samples_per_second": 51.829,
998
- "eval_steps_per_second": 6.487,
999
- "step": 19800
1000
- },
1001
- {
1002
- "epoch": 24.46,
1003
- "learning_rate": 9.818417639429313e-06,
1004
- "loss": 1.5062,
1005
- "step": 20100
1006
- },
1007
- {
1008
- "epoch": 24.46,
1009
- "eval_cer": 0.2755995621454871,
1010
- "eval_loss": 1.2111254930496216,
1011
- "eval_runtime": 42.9059,
1012
- "eval_samples_per_second": 51.764,
1013
- "eval_steps_per_second": 6.479,
1014
- "step": 20100
1015
- },
1016
- {
1017
- "epoch": 24.82,
1018
- "learning_rate": 9.169909208819715e-06,
1019
- "loss": 1.4816,
1020
- "step": 20400
1021
- },
1022
- {
1023
- "epoch": 24.82,
1024
- "eval_cer": 0.26345905065180614,
1025
- "eval_loss": 1.2007070779800415,
1026
- "eval_runtime": 42.6871,
1027
- "eval_samples_per_second": 52.03,
1028
- "eval_steps_per_second": 6.513,
1029
- "step": 20400
1030
- },
1031
- {
1032
- "epoch": 25.19,
1033
- "learning_rate": 8.521400778210117e-06,
1034
- "loss": 1.4836,
1035
- "step": 20700
1036
- },
1037
- {
1038
- "epoch": 25.19,
1039
- "eval_cer": 0.27435565727933126,
1040
- "eval_loss": 1.2548900842666626,
1041
- "eval_runtime": 42.6925,
1042
- "eval_samples_per_second": 52.023,
1043
- "eval_steps_per_second": 6.512,
1044
- "step": 20700
1045
- },
1046
- {
1047
- "epoch": 25.55,
1048
- "learning_rate": 7.87289234760052e-06,
1049
- "loss": 1.479,
1050
- "step": 21000
1051
- },
1052
- {
1053
- "epoch": 25.55,
1054
- "eval_cer": 0.26987759976117026,
1055
- "eval_loss": 1.1535056829452515,
1056
- "eval_runtime": 42.5922,
1057
- "eval_samples_per_second": 52.146,
1058
- "eval_steps_per_second": 6.527,
1059
- "step": 21000
1060
- },
1061
- {
1062
- "epoch": 25.92,
1063
- "learning_rate": 7.2243839169909205e-06,
1064
- "loss": 1.493,
1065
- "step": 21300
1066
- },
1067
- {
1068
- "epoch": 25.92,
1069
- "eval_cer": 0.26972833117723155,
1070
- "eval_loss": 1.198728084564209,
1071
- "eval_runtime": 42.6034,
1072
- "eval_samples_per_second": 52.132,
1073
- "eval_steps_per_second": 6.525,
1074
- "step": 21300
1075
- },
1076
- {
1077
- "epoch": 26.28,
1078
- "learning_rate": 6.5758754863813235e-06,
1079
- "loss": 1.4524,
1080
- "step": 21600
1081
- },
1082
- {
1083
- "epoch": 26.28,
1084
- "eval_cer": 0.27470395064185493,
1085
- "eval_loss": 1.2245545387268066,
1086
- "eval_runtime": 42.9242,
1087
- "eval_samples_per_second": 51.742,
1088
- "eval_steps_per_second": 6.477,
1089
- "step": 21600
1090
- },
1091
- {
1092
- "epoch": 26.65,
1093
- "learning_rate": 5.927367055771725e-06,
1094
- "loss": 1.4569,
1095
- "step": 21900
1096
- },
1097
- {
1098
- "epoch": 26.65,
1099
- "eval_cer": 0.2605234351676784,
1100
- "eval_loss": 1.1879122257232666,
1101
- "eval_runtime": 42.634,
1102
- "eval_samples_per_second": 52.095,
1103
- "eval_steps_per_second": 6.521,
1104
- "step": 21900
1105
- },
1106
- {
1107
- "epoch": 27.01,
1108
- "learning_rate": 5.278858625162128e-06,
1109
- "loss": 1.4535,
1110
- "step": 22200
1111
- },
1112
- {
1113
- "epoch": 27.01,
1114
- "eval_cer": 0.27281321524529806,
1115
- "eval_loss": 1.2265853881835938,
1116
- "eval_runtime": 42.7816,
1117
- "eval_samples_per_second": 51.915,
1118
- "eval_steps_per_second": 6.498,
1119
- "step": 22200
1120
- },
1121
- {
1122
- "epoch": 27.38,
1123
- "learning_rate": 4.63035019455253e-06,
1124
- "loss": 1.4452,
1125
- "step": 22500
1126
- },
1127
- {
1128
- "epoch": 27.38,
1129
- "eval_cer": 0.2566424519852722,
1130
- "eval_loss": 1.1812487840652466,
1131
- "eval_runtime": 42.5285,
1132
- "eval_samples_per_second": 52.224,
1133
- "eval_steps_per_second": 6.537,
1134
- "step": 22500
1135
- },
1136
- {
1137
- "epoch": 27.74,
1138
- "learning_rate": 3.981841763942931e-06,
1139
- "loss": 1.4513,
1140
- "step": 22800
1141
- },
1142
- {
1143
- "epoch": 27.74,
1144
- "eval_cer": 0.26286197631605135,
1145
- "eval_loss": 1.1672886610031128,
1146
- "eval_runtime": 43.0029,
1147
- "eval_samples_per_second": 51.648,
1148
- "eval_steps_per_second": 6.465,
1149
- "step": 22800
1150
- },
1151
- {
1152
- "epoch": 28.11,
1153
- "learning_rate": 3.3333333333333333e-06,
1154
- "loss": 1.4561,
1155
- "step": 23100
1156
- },
1157
- {
1158
- "epoch": 28.11,
1159
- "eval_cer": 0.26181709622848043,
1160
- "eval_loss": 1.1963270902633667,
1161
- "eval_runtime": 42.7473,
1162
- "eval_samples_per_second": 51.957,
1163
- "eval_steps_per_second": 6.503,
1164
- "step": 23100
1165
- },
1166
- {
1167
- "epoch": 28.47,
1168
- "learning_rate": 2.6848249027237355e-06,
1169
- "loss": 1.4357,
1170
- "step": 23400
1171
- },
1172
- {
1173
- "epoch": 28.47,
1174
- "eval_cer": 0.26913125684147676,
1175
- "eval_loss": 1.201293706893921,
1176
- "eval_runtime": 42.6759,
1177
- "eval_samples_per_second": 52.043,
1178
- "eval_steps_per_second": 6.514,
1179
- "step": 23400
1180
- },
1181
- {
1182
- "epoch": 28.84,
1183
- "learning_rate": 2.0363164721141376e-06,
1184
- "loss": 1.4427,
1185
- "step": 23700
1186
- },
1187
- {
1188
- "epoch": 28.84,
1189
- "eval_cer": 0.2726141904667131,
1190
- "eval_loss": 1.2448346614837646,
1191
- "eval_runtime": 42.4761,
1192
- "eval_samples_per_second": 52.288,
1193
- "eval_steps_per_second": 6.545,
1194
- "step": 23700
1195
- },
1196
- {
1197
- "epoch": 29.2,
1198
- "learning_rate": 1.3899697362732382e-06,
1199
- "loss": 1.4171,
1200
- "step": 24000
1201
- },
1202
- {
1203
- "epoch": 29.2,
1204
- "eval_cer": 0.26684247188775,
1205
- "eval_loss": 1.2063277959823608,
1206
- "eval_runtime": 42.7033,
1207
- "eval_samples_per_second": 52.01,
1208
- "eval_steps_per_second": 6.51,
1209
- "step": 24000
1210
- },
1211
- {
1212
- "epoch": 29.57,
1213
- "learning_rate": 7.414613056636403e-07,
1214
- "loss": 1.4639,
1215
- "step": 24300
1216
- },
1217
- {
1218
- "epoch": 29.57,
1219
- "eval_cer": 0.26694198427704247,
1220
- "eval_loss": 1.2228556871414185,
1221
- "eval_runtime": 42.5785,
1222
- "eval_samples_per_second": 52.162,
1223
- "eval_steps_per_second": 6.529,
1224
- "step": 24300
1225
- },
1226
- {
1227
- "epoch": 29.93,
1228
- "learning_rate": 9.295287505404236e-08,
1229
- "loss": 1.4234,
1230
- "step": 24600
1231
- },
1232
- {
1233
- "epoch": 29.93,
1234
- "eval_cer": 0.2594785550801075,
1235
- "eval_loss": 1.1955249309539795,
1236
- "eval_runtime": 43.02,
1237
- "eval_samples_per_second": 51.627,
1238
- "eval_steps_per_second": 6.462,
1239
- "step": 24600
1240
- }
1241
- ],
1242
- "logging_steps": 300,
1243
- "max_steps": 24630,
1244
- "num_input_tokens_seen": 0,
1245
- "num_train_epochs": 30,
1246
- "save_steps": 600,
1247
- "total_flos": 3.9184197928838064e+20,
1248
- "train_batch_size": 4,
1249
- "trial_name": null,
1250
- "trial_params": null
1251
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-24600/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:252bac47c7b7828141031a0c886132cdabae6720c7cd2b2c44aa61c53da0b413
3
- size 4664
 
 
 
 
checkpoint-24600/vocab.json DELETED
@@ -1 +0,0 @@
1
- {"(": 1, ")": 2, "*": 3, "1": 4, "2": 5, "6": 6, "A": 7, "B": 8, "C": 9, "D": 10, "G": 11, "H": 12, "I": 13, "J": 14, "K": 15, "M": 16, "O": 17, "P": 18, "Q": 19, "R": 20, "S": 21, "U": 22, "V": 23, "W": 24, "X": 25, "Y": 26, "_": 27, "a": 28, "b": 29, "c": 30, "d": 31, "e": 32, "f": 33, "g": 34, "h": 35, "i": 36, "j": 37, "k": 38, "l": 39, "m": 40, "n": 41, "o": 42, "p": 43, "r": 44, "s": 45, "t": 46, "u": 47, "v": 48, "w": 49, "x": 50, "y": 51, "{": 52, "|": 0, "}": 54, "\u3001": 55, "\u35ad": 56, "\u35ce": 57, "\u4e00": 58, "\u4e01": 59, "\u4e03": 60, "\u4e08": 61, "\u4e09": 62, "\u4e0a": 63, "\u4e0b": 64, "\u4e0d": 65, "\u4e10": 66, "\u4e11": 67, "\u4e14": 68, "\u4e16": 69, "\u4e1f": 70, "\u4e26": 71, "\u4e2d": 72, "\u4e32": 73, "\u4e38": 74, "\u4e39": 75, "\u4e3b": 76, "\u4e43": 77, "\u4e45": 78, "\u4e4b": 79, "\u4e4d": 80, "\u4e4e": 81, "\u4e4f": 82, "\u4e56": 83, "\u4e58": 84, "\u4e59": 85, "\u4e5c": 86, "\u4e5d": 87, "\u4e5e": 88, "\u4e5f": 89, "\u4e73": 90, "\u4e7e": 91, "\u4e82": 92, "\u4e86": 93, "\u4e88": 94, "\u4e8b": 95, "\u4e8c": 96, "\u4e91": 97, "\u4e92": 98, "\u4e94": 99, "\u4e95": 100, "\u4e9b": 101, "\u4e9e": 102, "\u4ea1": 103, "\u4ea4": 104, "\u4ea5": 105, "\u4ea6": 106, "\u4eab": 107, "\u4eac": 108, "\u4ead": 109, "\u4eae": 110, "\u4eba": 111, "\u4ec0": 112, "\u4ec1": 113, "\u4ec6": 114, "\u4ec7": 115, "\u4eca": 116, "\u4ecb": 117, "\u4ecd": 118, "\u4ed4": 119, "\u4ed6": 120, "\u4ed7": 121, "\u4ed8": 122, "\u4ed9": 123, "\u4ee3": 124, "\u4ee4": 125, "\u4ee5": 126, "\u4ef0": 127, "\u4ef2": 128, "\u4ef6": 129, "\u4efb": 130, "\u4efd": 131, "\u4eff": 132, "\u4f01": 133, "\u4f0a": 134, "\u4f0d": 135, "\u4f0f": 136, "\u4f10": 137, "\u4f11": 138, "\u4f19": 139, "\u4f2f": 140, "\u4f30": 141, "\u4f34": 142, "\u4f36": 143, "\u4f38": 144, "\u4f3c": 145, "\u4f46": 146, "\u4f48": 147, "\u4f4d": 148, "\u4f4e": 149, "\u4f4f": 150, "\u4f54": 151, "\u4f55": 152, "\u4f59": 153, "\u4f5b": 154, "\u4f5c": 155, "\u4f60": 156, "\u4f62": 157, "\u4f69": 158, "\u4f6c": 159, "\u4f73": 160, "\u4f75": 161, "\u4f7f": 162, "\u4f84": 163, "\u4f86": 164, "\u4f8b": 165, "\u4f8d": 166, "\u4f9b": 167, "\u4f9d": 168, "\u4fae": 169, "\u4fb5": 170, "\u4fbf": 171, "\u4fc2": 172, "\u4fc3": 173, "\u4fc4": 174, "\u4fca": 175, "\u4fcf": 176, "\u4fd7": 177, "\u4fdd": 178, "\u4fe0": 179, "\u4fe1": 180, "\u4fee": 181, "\u4fef": 182, "\u4ff1": 183, "\u4ffe": 184, "\u5009": 185, "\u500b": 186, "\u500d": 187, "\u5011": 188, "\u5012": 189, "\u5019": 190, "\u501a": 191, "\u501f": 192, "\u5021": 193, "\u5026": 194, "\u5029": 195, "\u502b": 196, "\u503c": 197, "\u5047": 198, "\u5048": 199, "\u5049": 200, "\u504f": 201, "\u505a": 202, "\u505c": 203, "\u5065": 204, "\u5074": 205, "\u5076": 206, "\u5077": 207, "\u507d": 208, "\u5085": 209, "\u508d": 210, "\u5091": 211, "\u5098": 212, "\u5099": 213, "\u50a2": 214, "\u50ac": 215, "\u50b2": 216, "\u50b3": 217, "\u50b5": 218, "\u50b7": 219, "\u50bb": 220, "\u50be": 221, "\u50c5": 222, "\u50cf": 223, "\u50d1": 224, "\u50d5": 225, "\u50da": 226, "\u50f9": 227, "\u50fb": 228, "\u5100": 229, "\u5104": 230, "\u5109": 231, "\u5112": 232, "\u5118": 233, "\u511f": 234, "\u512a": 235, "\u5132": 236, "\u5141": 237, "\u5143": 238, "\u5144": 239, "\u5145": 240, "\u5146": 241, "\u5147": 242, "\u5148": 243, "\u5149": 244, "\u514b": 245, "\u514d": 246, "\u5152": 247, "\u5154": 248, "\u515c": 249, "\u5165": 250, "\u5167": 251, "\u5168": 252, "\u5169": 253, "\u516b": 254, "\u516c": 255, "\u516d": 256, "\u5171": 257, "\u5175": 258, "\u5176": 259, "\u5177": 260, "\u5178": 261, "\u517c": 262, "\u5187": 263, "\u518a": 264, "\u518d": 265, "\u5192": 266, "\u519a": 267, "\u51a0": 268, "\u51a4": 269, "\u51a7": 270, "\u51ac": 271, "\u51b0": 272, "\u51b7": 273, "\u51c6": 274, "\u51cc": 275, "\u51cd": 276, "\u51dd": 277, "\u51e1": 278, "\u51f3": 279, "\u51f8": 280, "\u51f9": 281, "\u51fa": 282, "\u5200": 283, "\u5206": 284, "\u5207": 285, "\u520a": 286, "\u5211": 287, "\u5217": 288, "\u521d": 289, "\u5224": 290, "\u5225": 291, "\u5229": 292, "\u522a": 293, "\u522e": 294, "\u5230": 295, "\u5236": 296, "\u5237": 297, "\u523a": 298, "\u523b": 299, "\u5243": 300, "\u5247": 301, "\u524a": 302, "\u524d": 303, "\u524e": 304, "\u5254": 305, "\u5256": 306, "\u525b": 307, "\u525d": 308, "\u5269": 309, "\u526a": 310, "\u526f": 311, "\u5272": 312, "\u5275": 313, "\u5283": 314, "\u5287": 315, "\u5289": 316, "\u528d": 317, "\u5291": 318, "\u529b": 319, "\u529f": 320, "\u52a0": 321, "\u52a3": 322, "\u52a9": 323, "\u52aa": 324, "\u52ab": 325, "\u52c1": 326, "\u52c3": 327, "\u52c7": 328, "\u52c9": 329, "\u52d2": 330, "\u52d5": 331, "\u52d9": 332, "\u52dd": 333, "\u52de": 334, "\u52e2": 335, "\u52e4": 336, "\u52f5": 337, "\u52f8": 338, "\u52fe": 339, "\u5305": 340, "\u5306": 341, "\u5316": 342, "\u5317": 343, "\u5320": 344, "\u532a": 345, "\u532f": 346, "\u5339": 347, "\u533f": 348, "\u5340": 349, "\u5341": 350, "\u5343": 351, "\u5347": 352, "\u5348": 353, "\u5349": 354, "\u534a": 355, "\u5351": 356, "\u5353": 357, "\u5354": 358, "\u5357": 359, "\u535a": 360, "\u535c": 361, "\u5361": 362, "\u5370": 363, "\u5371": 364, "\u5373": 365, "\u5377": 366, "\u537b": 367, "\u5398": 368, "\u539a": 369, "\u539f": 370, "\u53ad": 371, "\u53b2": 372, "\u53bb": 373, "\u53c3": 374, "\u53c8": 375, "\u53c9": 376, "\u53ca": 377, "\u53cb": 378, "\u53cd": 379, "\u53d4": 380, "\u53d6": 381, "\u53d7": 382, "\u53db": 383, "\u53e2": 384, "\u53e3": 385, "\u53e4": 386, "\u53e5": 387, "\u53e6": 388, "\u53ea": 389, "\u53eb": 390, "\u53ec": 391, "\u53ee": 392, "\u53ef": 393, "\u53f0": 394, "\u53f2": 395, "\u53f3": 396, "\u53f8": 397, "\u53fb": 398, "\u5403": 399, "\u5404": 400, "\u5408": 401, "\u5409": 402, "\u540a": 403, "\u540c": 404, "\u540d": 405, "\u540e": 406, "\u5410": 407, "\u5411": 408, "\u5413": 409, "\u5416": 410, "\u541b": 411, "\u541e": 412, "\u541f": 413, "\u5426": 414, "\u5427": 415, "\u5429": 416, "\u542b": 417, "\u5431": 418, "\u5433": 419, "\u5435": 420, "\u5436": 421, "\u5438": 422, "\u5439": 423, "\u543b": 424, "\u543e": 425, "\u5440": 426, "\u5442": 427, "\u5443": 428, "\u5446": 429, "\u5448": 430, "\u544a": 431, "\u544e": 432, "\u5462": 433, "\u5468": 434, "\u5473": 435, "\u5475": 436, "\u547c": 437, "\u547d": 438, "\u5481": 439, "\u548b": 440, "\u548c": 441, "\u5490": 442, "\u5492": 443, "\u5496": 444, "\u5497": 445, "\u54a9": 446, "\u54aa": 447, "\u54ac": 448, "\u54af": 449, "\u54b3": 450, "\u54b8": 451, "\u54c0": 452, "\u54c1": 453, "\u54c4": 454, "\u54c7": 455, "\u54c8": 456, "\u54c9": 457, "\u54cb": 458, "\u54ce": 459, "\u54e1": 460, "\u54e5": 461, "\u54e6": 462, "\u54ea": 463, "\u54ed": 464, "\u54f2": 465, "\u54fc": 466, "\u5507": 467, "\u5509": 468, "\u550f": 469, "\u5510": 470, "\u5514": 471, "\u552e": 472, "\u552f": 473, "\u5531": 474, "\u5546": 475, "\u554a": 476, "\u554f": 477, "\u5556": 478, "\u555e": 479, "\u555f": 480, "\u5561": 481, "\u5564": 482, "\u5566": 483, "\u5569": 484, "\u556b": 485, "\u5571": 486, "\u5572": 487, "\u5582": 488, "\u5583": 489, "\u5584": 490, "\u5587": 491, "\u5589": 492, "\u558a": 493, "\u558e": 494, "\u5590": 495, "\u5598": 496, "\u559a": 497, "\u559c": 498, "\u559d": 499, "\u55a7": 500, "\u55aa": 501, "\u55ac": 502, "\u55ae": 503, "\u55b3": 504, "\u55ba": 505, "\u55bb": 506, "\u55cc": 507, "\u55ce": 508, "\u55e1": 509, "\u55e6": 510, "\u55ee": 511, "\u55ef": 512, "\u55f0": 513, "\u55f1": 514, "\u5605": 515, "\u5606": 516, "\u5608": 517, "\u5609": 518, "\u5614": 519, "\u5617": 520, "\u561b": 521, "\u561e": 522, "\u5622": 523, "\u5625": 524, "\u5629": 525, "\u5632": 526, "\u5634": 527, "\u563b": 528, "\u5643": 529, "\u5649": 530, "\u5653": 531, "\u5668": 532, "\u5674": 533, "\u5679": 534, "\u5687": 535, "\u569f": 536, "\u56ae": 537, "\u56b4": 538, "\u56b7": 539, "\u56bc": 540, "\u56c9": 541, "\u56ca": 542, "\u56d6": 543, "\u56db": 544, "\u56de": 545, "\u56e0": 546, "\u56ea": 547, "\u56f0": 548, "\u56fa": 549, "\u5708": 550, "\u570b": 551, "\u570d": 552, "\u5712": 553, "\u5713": 554, "\u5716": 555, "\u5718": 556, "\u571f": 557, "\u5728": 558, "\u5730": 559, "\u5733": 560, "\u573e": 561, "\u5740": 562, "\u5747": 563, "\u574a": 564, "\u5750": 565, "\u5751": 566, "\u5761": 567, "\u5764": 568, "\u5766": 569, "\u5782": 570, "\u5783": 571, "\u578b": 572, "\u57c3": 573, "\u57cb": 574, "\u57ce": 575, "\u57df": 576, "\u57f7": 577, "\u57f9": 578, "\u57fa": 579, "\u5802": 580, "\u5805": 581, "\u5806": 582, "\u5821": 583, "\u582a": 584, "\u5831": 585, "\u5834": 586, "\u584a": 587, "\u5851": 588, "\u5854": 589, "\u5857": 590, "\u5858": 591, "\u585e": 592, "\u586b": 593, "\u5875": 594, "\u5883": 595, "\u5893": 596, "\u589e": 597, "\u58a8": 598, "\u58ae": 599, "\u58b3": 600, "\u58bb": 601, "\u58c1": 602, "\u58c7": 603, "\u58d3": 604, "\u58d8": 605, "\u58de": 606, "\u58e4": 607, "\u58eb": 608, "\u58ef": 609, "\u58fa": 610, "\u58fd": 611, "\u590f": 612, "\u5915": 613, "\u5916": 614, "\u591a": 615, "\u591c": 616, "\u5920": 617, "\u5922": 618, "\u5925": 619, "\u5927": 620, "\u5929": 621, "\u592a": 622, "\u592b": 623, "\u592e": 624, "\u5931": 625, "\u593e": 626, "\u5947": 627, "\u5948": 628, "\u5949": 629, "\u594f": 630, "\u5951": 631, "\u5954": 632, "\u5957": 633, "\u5962": 634, "\u5967": 635, "\u596a": 636, "\u596e": 637, "\u5973": 638, "\u5974": 639, "\u5976": 640, "\u5978": 641, "\u5979": 642, "\u597d": 643, "\u5982": 644, "\u5984": 645, "\u5992": 646, "\u5999": 647, "\u599d": 648, "\u59a5": 649, "\u59a8": 650, "\u59ae": 651, "\u59b9": 652, "\u59bb": 653, "\u59c6": 654, "\u59ca": 655, "\u59cb": 656, "\u59d0": 657, "\u59d1": 658, "\u59d3": 659, "\u59d4": 660, "\u59da": 661, "\u59e6": 662, "\u59e8": 663, "\u59fb": 664, "\u59ff": 665, "\u5a01": 666, "\u5a03": 667, "\u5a18": 668, "\u5a1b": 669, "\u5a1c": 670, "\u5a1f": 671, "\u5a23": 672, "\u5a36": 673, "\u5a46": 674, "\u5a5a": 675, "\u5a66": 676, "\u5a92": 677, "\u5a9a": 678, "\u5ab3": 679, "\u5abd": 680, "\u5ac1": 681, "\u5ac2": 682, "\u5acc": 683, "\u5ae9": 684, "\u5af2": 685, "\u5b0b": 686, "\u5b0c": 687, "\u5b30": 688, "\u5b32": 689, "\u5b38": 690, "\u5b50": 691, "\u5b54": 692, "\u5b55": 693, "\u5b57": 694, "\u5b58": 695, "\u5b5d": 696, "\u5b5f": 697, "\u5b63": 698, "\u5b64": 699, "\u5b69": 700, "\u5b6b": 701, "\u5b6d": 702, "\u5b78": 703, "\u5b83": 704, "\u5b85": 705, "\u5b87": 706, "\u5b88": 707, "\u5b89": 708, "\u5b8b": 709, "\u5b8c": 710, "\u5b8f": 711, "\u5b93": 712, "\u5b97": 713, "\u5b98": 714, "\u5b99": 715, "\u5b9a": 716, "\u5b9c": 717, "\u5ba2": 718, "\u5ba3": 719, "\u5ba4": 720, "\u5bae": 721, "\u5bb0": 722, "\u5bb3": 723, "\u5bb4": 724, "\u5bb5": 725, "\u5bb6": 726, "\u5bb9": 727, "\u5bbf": 728, "\u5bc2": 729, "\u5bc4": 730, "\u5bc6": 731, "\u5bcc": 732, "\u5bd2": 733, "\u5bd3": 734, "\u5bde": 735, "\u5bdf": 736, "\u5be1": 737, "\u5be6": 738, "\u5be7": 739, "\u5be9": 740, "\u5beb": 741, "\u5bec": 742, "\u5bf5": 743, "\u5bf6": 744, "\u5bf8": 745, "\u5bfa": 746, "\u5c01": 747, "\u5c04": 748, "\u5c07": 749, "\u5c08": 750, "\u5c0a": 751, "\u5c0b": 752, "\u5c0d": 753, "\u5c0e": 754, "\u5c0f": 755, "\u5c11": 756, "\u5c16": 757, "\u5c1a": 758, "\u5c24": 759, "\u5c2c": 760, "\u5c31": 761, "\u5c37": 762, "\u5c3a": 763, "\u5c3c": 764, "\u5c3e": 765, "\u5c40": 766, "\u5c45": 767, "\u5c46": 768, "\u5c48": 769, "\u5c4b": 770, "\u5c4d": 771, "\u5c4e": 772, "\u5c4f": 773, "\u5c50": 774, "\u5c55": 775, "\u5c60": 776, "\u5c62": 777, "\u5c64": 778, "\u5c6c": 779, "\u5c71": 780, "\u5ca9": 781, "\u5cb3": 782, "\u5cb8": 783, "\u5cf0": 784, "\u5cf6": 785, "\u5d07": 786, "\u5d17": 787, "\u5d29": 788, "\u5dba": 789, "\u5ddd": 790, "\u5dde": 791, "\u5de2": 792, "\u5de5": 793, "\u5de6": 794, "\u5de7": 795, "\u5de8": 796, "\u5dee": 797, "\u5df1": 798, "\u5df2": 799, "\u5df4": 800, "\u5df7": 801, "\u5dfe": 802, "\u5e02": 803, "\u5e03": 804, "\u5e06": 805, "\u5e0c": 806, "\u5e16": 807, "\u5e1d": 808, "\u5e2b": 809, "\u5e2d": 810, "\u5e33": 811, "\u5e36": 812, "\u5e38": 813, "\u5e3d": 814, "\u5e45": 815, "\u5e55": 816, "\u5e5f": 817, "\u5e63": 818, "\u5e6b": 819, "\u5e72": 820, "\u5e73": 821, "\u5e74": 822, "\u5e76": 823, "\u5e78": 824, "\u5e79": 825, "\u5e7b": 826, "\u5e7c": 827, "\u5e7d": 828, "\u5e7e": 829, "\u5e8a": 830, "\u5e8f": 831, "\u5e95": 832, "\u5e97": 833, "\u5e9a": 834, "\u5e9c": 835, "\u5ea6": 836, "\u5ea7": 837, "\u5eab": 838, "\u5ead": 839, "\u5eb7": 840, "\u5eb8": 841, "\u5ec1": 842, "\u5ec2": 843, "\u5ec8": 844, "\u5ec9": 845, "\u5eca": 846, "\u5eda": 847, "\u5edf": 848, "\u5ee0": 849, "\u5ee2": 850, "\u5ee3": 851, "\u5ef3": 852, "\u5ef6": 853, "\u5ef7": 854, "\u5efa": 855, "\u5eff": 856, "\u5f04": 857, "\u5f0a": 858, "\u5f0f": 859, "\u5f15": 860, "\u5f1f": 861, "\u5f26": 862, "\u5f31": 863, "\u5f35": 864, "\u5f37": 865, "\u5f48": 866, "\u5f4c": 867, "\u5f4e": 868, "\u5f59": 869, "\u5f62": 870, "\u5f64": 871, "\u5f65": 872, "\u5f69": 873, "\u5f6d": 874, "\u5f71": 875, "\u5f77": 876, "\u5f79": 877, "\u5f7c": 878, "\u5f7f": 879, "\u5f80": 880, "\u5f81": 881, "\u5f85": 882, "\u5f88": 883, "\u5f8a": 884, "\u5f8b": 885, "\u5f8c": 886, "\u5f90": 887, "\u5f91": 888, "\u5f92": 889, "\u5f97": 890, "\u5f98": 891, "\u5f9e": 892, "\u5fa9": 893, "\u5faa": 894, "\u5fae": 895, "\u5fb5": 896, "\u5fb7": 897, "\u5fb9": 898, "\u5fc3": 899, "\u5fc5": 900, "\u5fcc": 901, "\u5fcd": 902, "\u5fd7": 903, "\u5fd8": 904, "\u5fd9": 905, "\u5fe0": 906, "\u5feb": 907, "\u5ff5": 908, "\u5ffd": 909, "\u600e": 910, "\u6012": 911, "\u6015": 912, "\u6016": 913, "\u601d": 914, "\u6021": 915, "\u6025": 916, "\u6027": 917, "\u6028": 918, "\u602a": 919, "\u6046": 920, "\u6050": 921, "\u6062": 922, "\u6065": 923, "\u6068": 924, "\u6069": 925, "\u606d": 926, "\u606f": 927, "\u6070": 928, "\u6084": 929, "\u6085": 930, "\u6089": 931, "\u6094": 932, "\u609f": 933, "\u60a0": 934, "\u60a3": 935, "\u60b2": 936, "\u60b6": 937, "\u60bc": 938, "\u60c5": 939, "\u60cb": 940, "\u60d1": 941, "\u60d8": 942, "\u60dc": 943, "\u60e0": 944, "\u60e1": 945, "\u60f1": 946, "\u60f3": 947, "\u60f6": 948, "\u60f9": 949, "\u6101": 950, "\u6108": 951, "\u6109": 952, "\u610f": 953, "\u611a": 954, "\u611b": 955, "\u611f": 956, "\u6127": 957, "\u6148": 958, "\u614b": 959, "\u614c": 960, "\u614e": 961, "\u6155": 962, "\u6158": 963, "\u615a": 964, "\u6162": 965, "\u6163": 966, "\u6167": 967, "\u6168": 968, "\u616e": 969, "\u6170": 970, "\u6173": 971, "\u6176": 972, "\u617e": 973, "\u6182": 974, "\u618e": 975, "\u6190": 976, "\u6191": 977, "\u61a4": 978, "\u61b2": 979, "\u61b6": 980, "\u61be": 981, "\u61c2": 982, "\u61c7": 983, "\u61c9": 984, "\u61f2": 985, "\u61f6": 986, "\u61f7": 987, "\u61f8": 988, "\u61fa": 989, "\u61fc": 990, "\u6200": 991, "\u6207": 992, "\u6210": 993, "\u6211": 994, "\u6212": 995, "\u6216": 996, "\u621a": 997, "\u622a": 998, "\u6230": 999, "\u6232": 1000, "\u6234": 1001, "\u6236": 1002, "\u623f": 1003, "\u6240": 1004, "\u6247": 1005, "\u624b": 1006, "\u624d": 1007, "\u624e": 1008, "\u6250": 1009, "\u6253": 1010, "\u6258": 1011, "\u6263": 1012, "\u626d": 1013, "\u626e": 1014, "\u626f": 1015, "\u6276": 1016, "\u6279": 1017, "\u627e": 1018, "\u627f": 1019, "\u6280": 1020, "\u6284": 1021, "\u628a": 1022, "\u6291": 1023, "\u6292": 1024, "\u6293": 1025, "\u6295": 1026, "\u6296": 1027, "\u6297": 1028, "\u6298": 1029, "\u62ab": 1030, "\u62b1": 1031, "\u62b5": 1032, "\u62b9": 1033, "\u62bc": 1034, "\u62bd": 1035, "\u62c2": 1036, "\u62c6": 1037, "\u62c9": 1038, "\u62cb": 1039, "\u62cd": 1040, "\u62ce": 1041, "\u62d0": 1042, "\u62d2": 1043, "\u62d4": 1044, "\u62d6": 1045, "\u62d7": 1046, "\u62d8": 1047, "\u62db": 1048, "\u62dc": 1049, "\u62ec": 1050, "\u62f1": 1051, "\u62f3": 1052, "\u62fc": 1053, "\u62fe": 1054, "\u62ff": 1055, "\u6301": 1056, "\u6307": 1057, "\u6309": 1058, "\u6311": 1059, "\u6316": 1060, "\u632b": 1061, "\u632f": 1062, "\u633a": 1063, "\u633d": 1064, "\u6349": 1065, "\u6350": 1066, "\u6355": 1067, "\u6367": 1068, "\u6368": 1069, "\u6371": 1070, "\u6372": 1071, "\u6377": 1072, "\u6380": 1073, "\u6382": 1074, "\u6383": 1075, "\u6388": 1076, "\u6389": 1077, "\u638c": 1078, "\u638f": 1079, "\u6392": 1080, "\u6398": 1081, "\u6399": 1082, "\u639b": 1083, "\u63a1": 1084, "\u63a2": 1085, "\u63a3": 1086, "\u63a5": 1087, "\u63a7": 1088, "\u63a8": 1089, "\u63a9": 1090, "\u63aa": 1091, "\u63c0": 1092, "\u63cf": 1093, "\u63d0": 1094, "\u63d2": 1095, "\u63da": 1096, "\u63db": 1097, "\u63e1": 1098, "\u63e9": 1099, "\u63ed": 1100, "\u63ee": 1101, "\u63f4": 1102, "\u63f8": 1103, "\u63fe": 1104, "\u640d": 1105, "\u640f": 1106, "\u6416": 1107, "\u641c": 1108, "\u641e": 1109, "\u642c": 1110, "\u642d": 1111, "\u6435": 1112, "\u6436": 1113, "\u6458": 1114, "\u6469": 1115, "\u6478": 1116, "\u647a": 1117, "\u6488": 1118, "\u6490": 1119, "\u6492": 1120, "\u6495": 1121, "\u649e": 1122, "\u64a4": 1123, "\u64a5": 1124, "\u64ab": 1125, "\u64ad": 1126, "\u64b2": 1127, "\u64b3": 1128, "\u64c1": 1129, "\u64c7": 1130, "\u64ca": 1131, "\u64cb": 1132, "\u64cd": 1133, "\u64d4": 1134, "\u64d8": 1135, "\u64da": 1136, "\u64e0": 1137, "\u64e1": 1138, "\u64e6": 1139, "\u64ec": 1140, "\u64f0": 1141, "\u64f4": 1142, "\u64fa": 1143, "\u64fe": 1144, "\u6500": 1145, "\u651d": 1146, "\u651e": 1147, "\u6524": 1148, "\u652c": 1149, "\u652f": 1150, "\u6530": 1151, "\u6536": 1152, "\u6539": 1153, "\u653b": 1154, "\u653e": 1155, "\u653f": 1156, "\u6545": 1157, "\u6548": 1158, "\u654f": 1159, "\u6551": 1160, "\u6557": 1161, "\u6558": 1162, "\u6559": 1163, "\u6562": 1164, "\u6563": 1165, "\u6566": 1166, "\u656c": 1167, "\u6572": 1168, "\u6574": 1169, "\u6575": 1170, "\u6578": 1171, "\u6582": 1172, "\u6587": 1173, "\u6591": 1174, "\u6599": 1175, "\u659c": 1176, "\u65a4": 1177, "\u65a5": 1178, "\u65ac": 1179, "\u65af": 1180, "\u65b0": 1181, "\u65b7": 1182, "\u65b9": 1183, "\u65bc": 1184, "\u65bd": 1185, "\u65c1": 1186, "\u65c5": 1187, "\u65cb": 1188, "\u65cf": 1189, "\u65d7": 1190, "\u65e2": 1191, "\u65e5": 1192, "\u65e6": 1193, "\u65e8": 1194, "\u65e9": 1195, "\u65fa": 1196, "\u6602": 1197, "\u6606": 1198, "\u660e": 1199, "\u660f": 1200, "\u6613": 1201, "\u6614": 1202, "\u661f": 1203, "\u6620": 1204, "\u6625": 1205, "\u6627": 1206, "\u6628": 1207, "\u662f": 1208, "\u6642": 1209, "\u6643": 1210, "\u664f": 1211, "\u6652": 1212, "\u665a": 1213, "\u665d": 1214, "\u6668": 1215, "\u666e": 1216, "\u666f": 1217, "\u6670": 1218, "\u6674": 1219, "\u667a": 1220, "\u6688": 1221, "\u6691": 1222, "\u6696": 1223, "\u6697": 1224, "\u66a2": 1225, "\u66ab": 1226, "\u66b4": 1227, "\u66c6": 1228, "\u66c9": 1229, "\u66ec": 1230, "\u66f2": 1231, "\u66f4": 1232, "\u66f8": 1233, "\u66f9": 1234, "\u66fc": 1235, "\u66fe": 1236, "\u66ff": 1237, "\u6700": 1238, "\u6703": 1239, "\u6708": 1240, "\u6709": 1241, "\u670b": 1242, "\u670d": 1243, "\u6717": 1244, "\u671b": 1245, "\u671d": 1246, "\u671f": 1247, "\u6726": 1248, "\u6727": 1249, "\u6728": 1250, "\u672a": 1251, "\u672b": 1252, "\u672c": 1253, "\u6731": 1254, "\u6735": 1255, "\u674e": 1256, "\u674f": 1257, "\u6750": 1258, "\u6751": 1259, "\u675c": 1260, "\u675f": 1261, "\u676f": 1262, "\u6771": 1263, "\u677e": 1264, "\u677f": 1265, "\u6789": 1266, "\u6790": 1267, "\u6795": 1268, "\u6797": 1269, "\u679c": 1270, "\u679d": 1271, "\u67af": 1272, "\u67b6": 1273, "\u67c4": 1274, "\u67cf": 1275, "\u67d0": 1276, "\u67d3": 1277, "\u67d4": 1278, "\u67d8": 1279, "\u67e5": 1280, "\u67f1": 1281, "\u67f3": 1282, "\u67f4": 1283, "\u6821": 1284, "\u6838": 1285, "\u6839": 1286, "\u683c": 1287, "\u683d": 1288, "\u6842": 1289, "\u6843": 1290, "\u6846": 1291, "\u6848": 1292, "\u684c": 1293, "\u6851": 1294, "\u6876": 1295, "\u687f": 1296, "\u6881": 1297, "\u6885": 1298, "\u6893": 1299, "\u6897": 1300, "\u689d": 1301, "\u68a2": 1302, "\u68a8": 1303, "\u68af": 1304, "\u68b0": 1305, "\u68b3": 1306, "\u68c4": 1307, "\u68c9": 1308, "\u68cd": 1309, "\u68da": 1310, "\u68df": 1311, "\u68e0": 1312, "\u68ee": 1313, "\u68f5": 1314, "\u68fa": 1315, "\u6905": 1316, "\u690d": 1317, "\u6930": 1318, "\u694a": 1319, "\u6953": 1320, "\u695a": 1321, "\u696d": 1322, "\u6975": 1323, "\u6982": 1324, "\u6986": 1325, "\u699c": 1326, "\u69ae": 1327, "\u69cb": 1328, "\u69cd": 1329, "\u69fd": 1330, "\u6a02": 1331, "\u6a11": 1332, "\u6a13": 1333, "\u6a16": 1334, "\u6a19": 1335, "\u6a21": 1336, "\u6a23": 1337, "\u6a38": 1338, "\u6a39": 1339, "\u6a3a": 1340, "\u6a3d": 1341, "\u6a4b": 1342, "\u6a5f": 1343, "\u6a62": 1344, "\u6a6b": 1345, "\u6a94": 1346, "\u6aa2": 1347, "\u6aaf": 1348, "\u6ac3": 1349, "\u6ac8": 1350, "\u6afb": 1351, "\u6b04": 1352, "\u6b0a": 1353, "\u6b20": 1354, "\u6b21": 1355, "\u6b23": 1356, "\u6b32": 1357, "\u6b3a": 1358, "\u6b3e": 1359, "\u6b47": 1360, "\u6b49": 1361, "\u6b4c": 1362, "\u6b4e": 1363, "\u6b50": 1364, "\u6b61": 1365, "\u6b62": 1366, "\u6b63": 1367, "\u6b64": 1368, "\u6b65": 1369, "\u6b66": 1370, "\u6b67": 1371, "\u6b6a": 1372, "\u6b72": 1373, "\u6b77": 1374, "\u6b78": 1375, "\u6b79": 1376, "\u6b7b": 1377, "\u6b8a": 1378, "\u6b96": 1379, "\u6b98": 1380, "\u6bb5": 1381, "\u6bb7": 1382, "\u6bba": 1383, "\u6bbc": 1384, "\u6bbf": 1385, "\u6bc0": 1386, "\u6bc5": 1387, "\u6bcd": 1388, "\u6bcf": 1389, "\u6bd2": 1390, "\u6bd4": 1391, "\u6bdb": 1392, "\u6beb": 1393, "\u6c08": 1394, "\u6c0f": 1395, "\u6c11": 1396, "\u6c1b": 1397, "\u6c23": 1398, "\u6c34": 1399, "\u6c38": 1400, "\u6c42": 1401, "\u6c57": 1402, "\u6c5f": 1403, "\u6c60": 1404, "\u6c61": 1405, "\u6c6a": 1406, "\u6c7a": 1407, "\u6c7d": 1408, "\u6c88": 1409, "\u6c89": 1410, "\u6c92": 1411, "\u6c96": 1412, "\u6c99": 1413, "\u6cab": 1414, "\u6cb3": 1415, "\u6cb9": 1416, "\u6cbb": 1417, "\u6cbe": 1418, "\u6cbf": 1419, "\u6cc1": 1420, "\u6cc9": 1421, "\u6cca": 1422, "\u6cd5": 1423, "\u6cdb": 1424, "\u6ce1": 1425, "\u6ce2": 1426, "\u6ce5": 1427, "\u6ce8": 1428, "\u6cf0": 1429, "\u6cf3": 1430, "\u6d0b": 1431, "\u6d17": 1432, "\u6d1b": 1433, "\u6d1e": 1434, "\u6d25": 1435, "\u6d29": 1436, "\u6d2a": 1437, "\u6d32": 1438, "\u6d36": 1439, "\u6d3b": 1440, "\u6d3e": 1441, "\u6d41": 1442, "\u6d69": 1443, "\u6d6a": 1444, "\u6d6e": 1445, "\u6d74": 1446, "\u6d77": 1447, "\u6d78": 1448, "\u6d88": 1449, "\u6d89": 1450, "\u6d95": 1451, "\u6daf": 1452, "\u6dbc": 1453, "\u6dcb": 1454, "\u6dd1": 1455, "\u6dd2": 1456, "\u6dda": 1457, "\u6de1": 1458, "\u6de8": 1459, "\u6dea": 1460, "\u6df1": 1461, "\u6df5": 1462, "\u6df7": 1463, "\u6dfa": 1464, "\u6dfb": 1465, "\u6e05": 1466, "\u6e1b": 1467, "\u6e20": 1468, "\u6e21": 1469, "\u6e2c": 1470, "\u6e2f": 1471, "\u6e34": 1472, "\u6e38": 1473, "\u6e3a": 1474, "\u6e3e": 1475, "\u6e4a": 1476, "\u6e56": 1477, "\u6e58": 1478, "\u6e67": 1479, "\u6e6f": 1480, "\u6e90": 1481, "\u6e96": 1482, "\u6e9c": 1483, "\u6e9d": 1484, "\u6eaa": 1485, "\u6eab": 1486, "\u6ec5": 1487, "\u6ecb": 1488, "\u6ed1": 1489, "\u6ed4": 1490, "\u6eef": 1491, "\u6ef2": 1492, "\u6ef4": 1493, "\u6efe": 1494, "\u6eff": 1495, "\u6f01": 1496, "\u6f02": 1497, "\u6f06": 1498, "\u6f0f": 1499, "\u6f14": 1500, "\u6f20": 1501, "\u6f22": 1502, "\u6f2b": 1503, "\u6f32": 1504, "\u6f38": 1505, "\u6f51": 1506, "\u6f54": 1507, "\u6f58": 1508, "\u6f5b": 1509, "\u6f64": 1510, "\u6f6d": 1511, "\u6f6e": 1512, "\u6f84": 1513, "\u6fa1": 1514, "\u6fa4": 1515, "\u6fb3": 1516, "\u6fc0": 1517, "\u6fc3": 1518, "\u6fd5": 1519, "\u6fdf": 1520, "\u6fe4": 1521, "\u6feb": 1522, "\u7011": 1523, "\u701f": 1524, "\u7051": 1525, "\u7058": 1526, "\u7063": 1527, "\u706b": 1528, "\u7070": 1529, "\u707d": 1530, "\u708e": 1531, "\u7092": 1532, "\u7095": 1533, "\u70ad": 1534, "\u70ae": 1535, "\u70b8": 1536, "\u70ba": 1537, "\u70c8": 1538, "\u70cf": 1539, "\u7121": 1540, "\u7126": 1541, "\u7136": 1542, "\u7149": 1543, "\u714c": 1544, "\u714e": 1545, "\u7159": 1546, "\u715e": 1547, "\u7164": 1548, "\u7167": 1549, "\u7169": 1550, "\u716e": 1551, "\u7172": 1552, "\u718a": 1553, "\u719f": 1554, "\u71b1": 1555, "\u71c3": 1556, "\u71c8": 1557, "\u71d2": 1558, "\u71d5": 1559, "\u71df": 1560, "\u71e6": 1561, "\u71ed": 1562, "\u7206": 1563, "\u7210": 1564, "\u721b": 1565, "\u722c": 1566, "\u722d": 1567, "\u7232": 1568, "\u7236": 1569, "\u7238": 1570, "\u723a": 1571, "\u723d": 1572, "\u723e": 1573, "\u7246": 1574, "\u7247": 1575, "\u7248": 1576, "\u724c": 1577, "\u7259": 1578, "\u725b": 1579, "\u7262": 1580, "\u7267": 1581, "\u7269": 1582, "\u7272": 1583, "\u7279": 1584, "\u727d": 1585, "\u7280": 1586, "\u72a7": 1587, "\u72af": 1588, "\u72c0": 1589, "\u72c2": 1590, "\u72d0": 1591, "\u72d7": 1592, "\u72e0": 1593, "\u72f8": 1594, "\u72f9": 1595, "\u72fc": 1596, "\u731b": 1597, "\u731c": 1598, "\u7334": 1599, "\u7336": 1600, "\u7344": 1601, "\u7345": 1602, "\u734e": 1603, "\u7368": 1604, "\u7372": 1605, "\u7375": 1606, "\u7378": 1607, "\u737b": 1608, "\u7384": 1609, "\u7387": 1610, "\u7389": 1611, "\u738b": 1612, "\u73a5": 1613, "\u73a9": 1614, "\u73ab": 1615, "\u73b2": 1616, "\u73bb": 1617, "\u73cd": 1618, "\u73e0": 1619, "\u73ed": 1620, "\u73fe": 1621, "\u7403": 1622, "\u7406": 1623, "\u7434": 1624, "\u745e": 1625, "\u7469": 1626, "\u746a": 1627, "\u7470": 1628, "\u7483": 1629, "\u74b0": 1630, "\u74dc": 1631, "\u74e6": 1632, "\u74f6": 1633, "\u74f7": 1634, "\u7518": 1635, "\u751a": 1636, "\u751c": 1637, "\u751f": 1638, "\u7522": 1639, "\u7528": 1640, "\u7529": 1641, "\u7530": 1642, "\u7531": 1643, "\u7532": 1644, "\u7533": 1645, "\u7537": 1646, "\u7540": 1647, "\u754c": 1648, "\u754f": 1649, "\u7559": 1650, "\u7562": 1651, "\u7565": 1652, "\u756a": 1653, "\u756b": 1654, "\u7570": 1655, "\u7576": 1656, "\u758a": 1657, "\u758f": 1658, "\u7591": 1659, "\u75ab": 1660, "\u75b2": 1661, "\u75be": 1662, "\u75c5": 1663, "\u75c7": 1664, "\u75d5": 1665, "\u75db": 1666, "\u760b": 1667, "\u7626": 1668, "\u7642": 1669, "\u764c": 1670, "\u7661": 1671, "\u7672": 1672, "\u767b": 1673, "\u767c": 1674, "\u767d": 1675, "\u767e": 1676, "\u7684": 1677, "\u7686": 1678, "\u7687": 1679, "\u76ae": 1680, "\u76ba": 1681, "\u76c6": 1682, "\u76c8": 1683, "\u76ca": 1684, "\u76d2": 1685, "\u76db": 1686, "\u76dc": 1687, "\u76de": 1688, "\u76e1": 1689, "\u76e3": 1690, "\u76e4": 1691, "\u76e7": 1692, "\u76ee": 1693, "\u76f2": 1694, "\u76f4": 1695, "\u76f8": 1696, "\u76fc": 1697, "\u76fe": 1698, "\u7701": 1699, "\u7709": 1700, "\u770b": 1701, "\u771f": 1702, "\u7720": 1703, "\u7728": 1704, "\u7736": 1705, "\u773c": 1706, "\u773e": 1707, "\u7740": 1708, "\u7747": 1709, "\u775b": 1710, "\u775c": 1711, "\u7761": 1712, "\u7763": 1713, "\u7779": 1714, "\u7784": 1715, "\u7787": 1716, "\u778e": 1717, "\u7793": 1718, "\u779e": 1719, "\u77aa": 1720, "\u77ac": 1721, "\u77db": 1722, "\u77e3": 1723, "\u77e5": 1724, "\u77e9": 1725, "\u77ed": 1726, "\u77ee": 1727, "\u77f3": 1728, "\u7814": 1729, "\u7834": 1730, "\u786c": 1731, "\u788c": 1732, "\u788e": 1733, "\u7891": 1734, "\u7897": 1735, "\u789f": 1736, "\u78a7": 1737, "\u78a9": 1738, "\u78b0": 1739, "\u78ba": 1740, "\u78bc": 1741, "\u78c5": 1742, "\u78cb": 1743, "\u78da": 1744, "\u78e8": 1745, "\u790e": 1746, "\u7919": 1747, "\u7926": 1748, "\u793a": 1749, "\u793e": 1750, "\u7948": 1751, "\u7956": 1752, "\u795d": 1753, "\u795e": 1754, "\u7965": 1755, "\u7968": 1756, "\u796d": 1757, "\u7981": 1758, "\u798d": 1759, "\u798f": 1760, "\u79aa": 1761, "\u79ae": 1762, "\u79b1": 1763, "\u79c0": 1764, "\u79c1": 1765, "\u79cb": 1766, "\u79d1": 1767, "\u79d2": 1768, "\u79d8": 1769, "\u79df": 1770, "\u79e6": 1771, "\u79e7": 1772, "\u79e9": 1773, "\u79fb": 1774, "\u7a00": 1775, "\u7a05": 1776, "\u7a0b": 1777, "\u7a0d": 1778, "\u7a1a": 1779, "\u7a2e": 1780, "\u7a31": 1781, "\u7a3d": 1782, "\u7a3f": 1783, "\u7a46": 1784, "\u7a4c": 1785, "\u7a4d": 1786, "\u7a69": 1787, "\u7a76": 1788, "\u7a7a": 1789, "\u7a7f": 1790, "\u7a81": 1791, "\u7a84": 1792, "\u7a97": 1793, "\u7aa9": 1794, "\u7aae": 1795, "\u7abf": 1796, "\u7acb": 1797, "\u7ad9": 1798, "\u7adf": 1799, "\u7ae0": 1800, "\u7ae5": 1801, "\u7aed": 1802, "\u7aef": 1803, "\u7af6": 1804, "\u7af9": 1805, "\u7b11": 1806, "\u7b1b": 1807, "\u7b26": 1808, "\u7b28": 1809, "\u7b2a": 1810, "\u7b2c": 1811, "\u7b46": 1812, "\u7b49": 1813, "\u7b4b": 1814, "\u7b52": 1815, "\u7b54": 1816, "\u7b56": 1817, "\u7b77": 1818, "\u7b8b": 1819, "\u7b97": 1820, "\u7ba1": 1821, "\u7bad": 1822, "\u7bb1": 1823, "\u7bc0": 1824, "\u7bc4": 1825, "\u7bc7": 1826, "\u7bc9": 1827, "\u7be4": 1828, "\u7bf7": 1829, "\u7c21": 1830, "\u7c37": 1831, "\u7c3d": 1832, "\u7c3e": 1833, "\u7c3f": 1834, "\u7c43": 1835, "\u7c4c": 1836, "\u7c4d": 1837, "\u7c60": 1838, "\u7c6c": 1839, "\u7c73": 1840, "\u7c89": 1841, "\u7c92": 1842, "\u7c97": 1843, "\u7ca5": 1844, "\u7cb5": 1845, "\u7cb9": 1846, "\u7cbe": 1847, "\u7cca": 1848, "\u7cd5": 1849, "\u7cd6": 1850, "\u7cde": 1851, "\u7cdf": 1852, "\u7ce7": 1853, "\u7cfb": 1854, "\u7cfe": 1855, "\u7d00": 1856, "\u7d04": 1857, "\u7d05": 1858, "\u7d0b": 1859, "\u7d0d": 1860, "\u7d10": 1861, "\u7d14": 1862, "\u7d17": 1863, "\u7d19": 1864, "\u7d1a": 1865, "\u7d1b": 1866, "\u7d20": 1867, "\u7d22": 1868, "\u7d2b": 1869, "\u7d2e": 1870, "\u7d2f": 1871, "\u7d30": 1872, "\u7d33": 1873, "\u7d39": 1874, "\u7d42": 1875, "\u7d44": 1876, "\u7d50": 1877, "\u7d55": 1878, "\u7d61": 1879, "\u7d66": 1880, "\u7d71": 1881, "\u7d72": 1882, "\u7d81": 1883, "\u7d93": 1884, "\u7d9c": 1885, "\u7da0": 1886, "\u7da2": 1887, "\u7dad": 1888, "\u7db2": 1889, "\u7dbf": 1890, "\u7dca": 1891, "\u7dd2": 1892, "\u7dda": 1893, "\u7dde": 1894, "\u7de3": 1895, "\u7de8": 1896, "\u7de9": 1897, "\u7df4": 1898, "\u7dfb": 1899, "\u7e23": 1900, "\u7e2b": 1901, "\u7e2e": 1902, "\u7e31": 1903, "\u7e37": 1904, "\u7e3d": 1905, "\u7e3e": 1906, "\u7e41": 1907, "\u7e54": 1908, "\u7e5e": 1909, "\u7e61": 1910, "\u7e69": 1911, "\u7e6a": 1912, "\u7e6b": 1913, "\u7e6d": 1914, "\u7e79": 1915, "\u7e7c": 1916, "\u7e8c": 1917, "\u7e8f": 1918, "\u7e9c": 1919, "\u7f38": 1920, "\u7f3a": 1921, "\u7f50": 1922, "\u7f55": 1923, "\u7f69": 1924, "\u7f6a": 1925, "\u7f6e": 1926, "\u7f70": 1927, "\u7f72": 1928, "\u7f75": 1929, "\u7f77": 1930, "\u7f85": 1931, "\u7f8a": 1932, "\u7f8e": 1933, "\u7f94": 1934, "\u7f9e": 1935, "\u7fa4": 1936, "\u7fa8": 1937, "\u7fa9": 1938, "\u7fbd": 1939, "\u7fc1": 1940, "\u7fc5": 1941, "\u7fd2": 1942, "\u7fe0": 1943, "\u7ff0": 1944, "\u7ffb": 1945, "\u7ffc": 1946, "\u8000": 1947, "\u8001": 1948, "\u8003": 1949, "\u8005": 1950, "\u800c": 1951, "\u800d": 1952, "\u8010": 1953, "\u8017": 1954, "\u8033": 1955, "\u8036": 1956, "\u803d": 1957, "\u8046": 1958, "\u804a": 1959, "\u8056": 1960, "\u8058": 1961, "\u805a": 1962, "\u805e": 1963, "\u806f": 1964, "\u8070": 1965, "\u8072": 1966, "\u8077": 1967, "\u807d": 1968, "\u8085": 1969, "\u8086": 1970, "\u8089": 1971, "\u808c": 1972, "\u809a": 1973, "\u80a1": 1974, "\u80a5": 1975, "\u80a9": 1976, "\u80af": 1977, "\u80b2": 1978, "\u80ba": 1979, "\u80c3": 1980, "\u80cc": 1981, "\u80ce": 1982, "\u80d6": 1983, "\u80de": 1984, "\u80e1": 1985, "\u80ed": 1986, "\u80f8": 1987, "\u80fd": 1988, "\u8102": 1989, "\u8106": 1990, "\u8108": 1991, "\u812b": 1992, "\u8139": 1993, "\u813e": 1994, "\u8150": 1995, "\u8154": 1996, "\u8166": 1997, "\u816b": 1998, "\u8170": 1999, "\u8173": 2000, "\u8178": 2001, "\u8179": 2002, "\u817f": 2003, "\u8180": 2004, "\u818a": 2005, "\u818f": 2006, "\u819a": 2007, "\u81a0": 2008, "\u81bd": 2009, "\u81c2": 2010, "\u81c9": 2011, "\u81d8": 2012, "\u81df": 2013, "\u81e3": 2014, "\u81e5": 2015, "\u81e8": 2016, "\u81ea": 2017, "\u81ed": 2018, "\u81f3": 2019, "\u81f4": 2020, "\u81fa": 2021, "\u8205": 2022, "\u8207": 2023, "\u8208": 2024, "\u8209": 2025, "\u820a": 2026, "\u820c": 2027, "\u820d": 2028, "\u8212": 2029, "\u8216": 2030, "\u821e": 2031, "\u821f": 2032, "\u822a": 2033, "\u822c": 2034, "\u8239": 2035, "\u8247": 2036, "\u8259": 2037, "\u8266": 2038, "\u826f": 2039, "\u8271": 2040, "\u8272": 2041, "\u8277": 2042, "\u829d": 2043, "\u82ac": 2044, "\u82b1": 2045, "\u82b3": 2046, "\u82bd": 2047, "\u82d7": 2048, "\u82e5": 2049, "\u82e6": 2050, "\u82f1": 2051, "\u8305": 2052, "\u832b": 2053, "\u8332": 2054, "\u8335": 2055, "\u8336": 2056, "\u8349": 2057, "\u834a": 2058, "\u8352": 2059, "\u8377": 2060, "\u837b": 2061, "\u838a": 2062, "\u838e": 2063, "\u83ab": 2064, "\u83c7": 2065, "\u83dc": 2066, "\u83ef": 2067, "\u83f2": 2068, "\u8404": 2069, "\u840a": 2070, "\u840d": 2071, "\u842c": 2072, "\u843d": 2073, "\u8449": 2074, "\u8457": 2075, "\u845b": 2076, "\u8461": 2077, "\u8463": 2078, "\u846c": 2079, "\u8482": 2080, "\u8499": 2081, "\u84b2": 2082, "\u84b8": 2083, "\u84bc": 2084, "\u84bf": 2085, "\u84cb": 2086, "\u84ec": 2087, "\u84ee": 2088, "\u8521": 2089, "\u8523": 2090, "\u852d": 2091, "\u8569": 2092, "\u856d": 2093, "\u8584": 2094, "\u8587": 2095, "\u85a6": 2096, "\u85a9": 2097, "\u85aa": 2098, "\u85af": 2099, "\u85b9": 2100, "\u85c9": 2101, "\u85cd": 2102, "\u85cf": 2103, "\u85dd": 2104, "\u85e4": 2105, "\u85e5": 2106, "\u85e9": 2107, "\u85f9": 2108, "\u85fb": 2109, "\u8606": 2110, "\u8607": 2111, "\u860a": 2112, "\u860b": 2113, "\u8611": 2114, "\u862d": 2115, "\u863f": 2116, "\u864e": 2117, "\u8650": 2118, "\u8655": 2119, "\u865b": 2120, "\u865f": 2121, "\u8667": 2122, "\u868a": 2123, "\u86c7": 2124, "\u86cb": 2125, "\u8702": 2126, "\u871c": 2127, "\u8766": 2128, "\u8774": 2129, "\u8776": 2130, "\u878d": 2131, "\u879e": 2132, "\u87ec": 2133, "\u87f2": 2134, "\u87f9": 2135, "\u87fb": 2136, "\u881f": 2137, "\u8822": 2138, "\u883b": 2139, "\u8840": 2140, "\u884c": 2141, "\u884d": 2142, "\u8853": 2143, "\u8857": 2144, "\u885b": 2145, "\u885d": 2146, "\u8861": 2147, "\u8863": 2148, "\u8868": 2149, "\u886b": 2150, "\u8870": 2151, "\u8877": 2152, "\u888b": 2153, "\u888d": 2154, "\u8896": 2155, "\u88ab": 2156, "\u88c1": 2157, "\u88c2": 2158, "\u88cf": 2159, "\u88d5": 2160, "\u88d9": 2161, "\u88dc": 2162, "\u88dd": 2163, "\u88e1": 2164, "\u88f3": 2165, "\u88f9": 2166, "\u88fd": 2167, "\u8907": 2168, "\u8932": 2169, "\u895f": 2170, "\u896a": 2171, "\u896f": 2172, "\u8972": 2173, "\u897f": 2174, "\u8981": 2175, "\u8986": 2176, "\u898b": 2177, "\u898f": 2178, "\u8993": 2179, "\u8996": 2180, "\u89aa": 2181, "\u89ba": 2182, "\u89bd": 2183, "\u89c0": 2184, "\u89d2": 2185, "\u89e3": 2186, "\u89f8": 2187, "\u8a00": 2188, "\u8a02": 2189, "\u8a08": 2190, "\u8a0a": 2191, "\u8a0e": 2192, "\u8a13": 2193, "\u8a17": 2194, "\u8a18": 2195, "\u8a1d": 2196, "\u8a2a": 2197, "\u8a2d": 2198, "\u8a31": 2199, "\u8a34": 2200, "\u8a3b": 2201, "\u8a55": 2202, "\u8a5e": 2203, "\u8a60": 2204, "\u8a62": 2205, "\u8a66": 2206, "\u8a69": 2207, "\u8a6b": 2208, "\u8a71": 2209, "\u8a72": 2210, "\u8a73": 2211, "\u8a87": 2212, "\u8a8c": 2213, "\u8a8d": 2214, "\u8a92": 2215, "\u8a93": 2216, "\u8a95": 2217, "\u8a98": 2218, "\u8a9e": 2219, "\u8aa0": 2220, "\u8aa4": 2221, "\u8aa6": 2222, "\u8aaa": 2223, "\u8ab0": 2224, "\u8ab2": 2225, "\u8abc": 2226, "\u8abf": 2227, "\u8ac7": 2228, "\u8acb": 2229, "\u8ad2": 2230, "\u8ad6": 2231, "\u8ad7": 2232, "\u8ae7": 2233, "\u8af7": 2234, "\u8af8": 2235, "\u8afe": 2236, "\u8b00": 2237, "\u8b02": 2238, "\u8b0e": 2239, "\u8b19": 2240, "\u8b1b": 2241, "\u8b1d": 2242, "\u8b2c": 2243, "\u8b39": 2244, "\u8b49": 2245, "\u8b58": 2246, "\u8b5c": 2247, "\u8b66": 2248, "\u8b6c": 2249, "\u8b6f": 2250, "\u8b70": 2251, "\u8b77": 2252, "\u8b7d": 2253, "\u8b80": 2254, "\u8b8a": 2255, "\u8b93": 2256, "\u8b9a": 2257, "\u8c37": 2258, "\u8c46": 2259, "\u8c48": 2260, "\u8c4e": 2261, "\u8c50": 2262, "\u8c61": 2263, "\u8c6a": 2264, "\u8c6c": 2265, "\u8c8c": 2266, "\u8c93": 2267, "\u8c9d": 2268, "\u8ca0": 2269, "\u8ca1": 2270, "\u8ca2": 2271, "\u8ca7": 2272, "\u8ca8": 2273, "\u8ca9": 2274, "\u8caa": 2275, "\u8cab": 2276, "\u8cac": 2277, "\u8cb4": 2278, "\u8cb6": 2279, "\u8cb7": 2280, "\u8cbb": 2281, "\u8cbc": 2282, "\u8cc0": 2283, "\u8cc7": 2284, "\u8cca": 2285, "\u8cd3": 2286, "\u8cde": 2287, "\u8ce0": 2288, "\u8ce2": 2289, "\u8ce3": 2290, "\u8ce4": 2291, "\u8ce6": 2292, "\u8cea": 2293, "\u8ced": 2294, "\u8cf4": 2295, "\u8cfa": 2296, "\u8cfc": 2297, "\u8cfd": 2298, "\u8d08": 2299, "\u8d0a": 2300, "\u8d0f": 2301, "\u8d64": 2302, "\u8d6b": 2303, "\u8d70": 2304, "\u8d74": 2305, "\u8d77": 2306, "\u8d81": 2307, "\u8d85": 2308, "\u8d8a": 2309, "\u8d95": 2310, "\u8d99": 2311, "\u8da3": 2312, "\u8da8": 2313, "\u8db3": 2314, "\u8dcc": 2315, "\u8dd1": 2316, "\u8ddb": 2317, "\u8ddd": 2318, "\u8ddf": 2319, "\u8de1": 2320, "\u8de8": 2321, "\u8dea": 2322, "\u8def": 2323, "\u8df3": 2324, "\u8e0e": 2325, "\u8e0f": 2326, "\u8e10": 2327, "\u8e22": 2328, "\u8e29": 2329, "\u8e2e": 2330, "\u8e48": 2331, "\u8e64": 2332, "\u8e8d": 2333, "\u8eab": 2334, "\u8eac": 2335, "\u8eb2": 2336, "\u8eba": 2337, "\u8ec0": 2338, "\u8eca": 2339, "\u8ecc": 2340, "\u8ecd": 2341, "\u8ed2": 2342, "\u8edf": 2343, "\u8f03": 2344, "\u8f09": 2345, "\u8f14": 2346, "\u8f15": 2347, "\u8f1d": 2348, "\u8f29": 2349, "\u8f2a": 2350, "\u8f2f": 2351, "\u8f38": 2352, "\u8f3e": 2353, "\u8f49": 2354, "\u8f4e": 2355, "\u8f5f": 2356, "\u8f9b": 2357, "\u8fa3": 2358, "\u8fa6": 2359, "\u8fa8": 2360, "\u8fad": 2361, "\u8fae": 2362, "\u8faf": 2363, "\u8fb1": 2364, "\u8fb2": 2365, "\u8fc5": 2366, "\u8fce": 2367, "\u8fd1": 2368, "\u8fd4": 2369, "\u8feb": 2370, "\u8ff0": 2371, "\u8ff4": 2372, "\u8ff7": 2373, "\u8ffd": 2374, "\u9000": 2375, "\u9001": 2376, "\u9003": 2377, "\u9006": 2378, "\u900f": 2379, "\u9010": 2380, "\u9014": 2381, "\u9017": 2382, "\u9019": 2383, "\u901a": 2384, "\u901b": 2385, "\u901d": 2386, "\u901f": 2387, "\u9020": 2388, "\u9022": 2389, "\u9023": 2390, "\u9031": 2391, "\u9032": 2392, "\u903c": 2393, "\u9047": 2394, "\u904a": 2395, "\u904b": 2396, "\u904d": 2397, "\u904e": 2398, "\u9053": 2399, "\u9054": 2400, "\u9055": 2401, "\u9059": 2402, "\u905c": 2403, "\u905e": 2404, "\u9060": 2405, "\u9063": 2406, "\u9069": 2407, "\u906d": 2408, "\u906e": 2409, "\u9072": 2410, "\u9077": 2411, "\u9078": 2412, "\u907a": 2413, "\u907f": 2414, "\u9080": 2415, "\u9081": 2416, "\u9084": 2417, "\u908a": 2418, "\u908f": 2419, "\u9091": 2420, "\u90a3": 2421, "\u90a6": 2422, "\u90aa": 2423, "\u90c1": 2424, "\u90ca": 2425, "\u90ce": 2426, "\u90e8": 2427, "\u90ed": 2428, "\u90f5": 2429, "\u90fd": 2430, "\u9109": 2431, "\u9127": 2432, "\u912d": 2433, "\u9130": 2434, "\u913a": 2435, "\u914d": 2436, "\u9152": 2437, "\u916c": 2438, "\u9177": 2439, "\u9178": 2440, "\u9189": 2441, "\u9192": 2442, "\u919c": 2443, "\u91ab": 2444, "\u91ac": 2445, "\u91c7": 2446, "\u91cb": 2447, "\u91cc": 2448, "\u91cd": 2449, "\u91ce": 2450, "\u91cf": 2451, "\u91d1": 2452, "\u91d8": 2453, "\u91dd": 2454, "\u91e3": 2455, "\u9234": 2456, "\u9264": 2457, "\u9280": 2458, "\u9285": 2459, "\u9298": 2460, "\u92b3": 2461, "\u92b7": 2462, "\u92d2": 2463, "\u92ea": 2464, "\u92fc": 2465, "\u9304": 2466, "\u9322": 2467, "\u9326": 2468, "\u932b": 2469, "\u932f": 2470, "\u9336": 2471, "\u934b": 2472, "\u9375": 2473, "\u937e": 2474, "\u9396": 2475, "\u93ae": 2476, "\u93e1": 2477, "\u9418": 2478, "\u9435": 2479, "\u944a": 2480, "\u9452": 2481, "\u947c": 2482, "\u947d": 2483, "\u9577": 2484, "\u9580": 2485, "\u9582": 2486, "\u9583": 2487, "\u9589": 2488, "\u958b": 2489, "\u9591": 2490, "\u9592": 2491, "\u9593": 2492, "\u9598": 2493, "\u95a3": 2494, "\u95a5": 2495, "\u95b1": 2496, "\u95bb": 2497, "\u95c6": 2498, "\u95ca": 2499, "\u95d6": 2500, "\u95dc": 2501, "\u9632": 2502, "\u963b": 2503, "\u963f": 2504, "\u9640": 2505, "\u9644": 2506, "\u964b": 2507, "\u964c": 2508, "\u964d": 2509, "\u9650": 2510, "\u9662": 2511, "\u9663": 2512, "\u9664": 2513, "\u966a": 2514, "\u9670": 2515, "\u9673": 2516, "\u9675": 2517, "\u9676": 2518, "\u9677": 2519, "\u9678": 2520, "\u967d": 2521, "\u9686": 2522, "\u968a": 2523, "\u968e": 2524, "\u9694": 2525, "\u9699": 2526, "\u969b": 2527, "\u969c": 2528, "\u96a7": 2529, "\u96a8": 2530, "\u96aa": 2531, "\u96b1": 2532, "\u96b8": 2533, "\u96bb": 2534, "\u96c0": 2535, "\u96c4": 2536, "\u96c5": 2537, "\u96c6": 2538, "\u96d5": 2539, "\u96d6": 2540, "\u96d9": 2541, "\u96dc": 2542, "\u96de": 2543, "\u96e2": 2544, "\u96e3": 2545, "\u96e8": 2546, "\u96ea": 2547, "\u96f2": 2548, "\u96f6": 2549, "\u96f7": 2550, "\u96fb": 2551, "\u9700": 2552, "\u9707": 2553, "\u970d": 2554, "\u9716": 2555, "\u971e": 2556, "\u9727": 2557, "\u9732": 2558, "\u9738": 2559, "\u9748": 2560, "\u9752": 2561, "\u975a": 2562, "\u975c": 2563, "\u975e": 2564, "\u9760": 2565, "\u9762": 2566, "\u9769": 2567, "\u978b": 2568, "\u97a0": 2569, "\u97ad": 2570, "\u97cb": 2571, "\u97d3": 2572, "\u97f3": 2573, "\u97fb": 2574, "\u97ff": 2575, "\u9801": 2576, "\u9802": 2577, "\u9805": 2578, "\u9806": 2579, "\u9808": 2580, "\u980c": 2581, "\u9810": 2582, "\u9811": 2583, "\u9812": 2584, "\u9813": 2585, "\u9817": 2586, "\u9818": 2587, "\u982d": 2588, "\u9838": 2589, "\u9839": 2590, "\u983b": 2591, "\u9846": 2592, "\u984c": 2593, "\u984d": 2594, "\u984f": 2595, "\u9858": 2596, "\u985b": 2597, "\u985e": 2598, "\u9867": 2599, "\u986f": 2600, "\u98a8": 2601, "\u98c4": 2602, "\u98db": 2603, "\u98df": 2604, "\u98e2": 2605, "\u98ef": 2606, "\u98f2": 2607, "\u98fd": 2608, "\u98fe": 2609, "\u9905": 2610, "\u990a": 2611, "\u9910": 2612, "\u9913": 2613, "\u9918": 2614, "\u9928": 2615, "\u9935": 2616, "\u9938": 2617, "\u9945": 2618, "\u9996": 2619, "\u9999": 2620, "\u99a8": 2621, "\u99ac": 2622, "\u99ae": 2623, "\u99b4": 2624, "\u99c1": 2625, "\u99d0": 2626, "\u99d5": 2627, "\u99db": 2628, "\u99f1": 2629, "\u9a0e": 2630, "\u9a19": 2631, "\u9a2e": 2632, "\u9a30": 2633, "\u9a37": 2634, "\u9a45": 2635, "\u9a55": 2636, "\u9a57": 2637, "\u9a5a": 2638, "\u9a5f": 2639, "\u9a62": 2640, "\u9aa8": 2641, "\u9ad4": 2642, "\u9ad8": 2643, "\u9aee": 2644, "\u9b06": 2645, "\u9b0d": 2646, "\u9b1a": 2647, "\u9b25": 2648, "\u9b27": 2649, "\u9b31": 2650, "\u9b3c": 2651, "\u9b42": 2652, "\u9b44": 2653, "\u9b4f": 2654, "\u9b54": 2655, "\u9b5a": 2656, "\u9b6f": 2657, "\u9bae": 2658, "\u9ce5": 2659, "\u9cf3": 2660, "\u9cf4": 2661, "\u9d09": 2662, "\u9d28": 2663, "\u9d3b": 2664, "\u9d51": 2665, "\u9d5d": 2666, "\u9e79": 2667, "\u9e7d": 2668, "\u9e7f": 2669, "\u9e97": 2670, "\u9ea5": 2671, "\u9eb5": 2672, "\u9ebb": 2673, "\u9ebc": 2674, "\u9ec3": 2675, "\u9ece": 2676, "\u9ecf": 2677, "\u9ed0": 2678, "\u9ed1": 2679, "\u9ed8": 2680, "\u9ede": 2681, "\u9ee8": 2682, "\u9f13": 2683, "\u9f20": 2684, "\u9f3b": 2685, "\u9f4a": 2686, "\u9f4b": 2687, "\u9f52": 2688, "\u9f61": 2689, "\u9f8d": 2690, "\u9f90": 2691, "\u9f9c": 2692, "\uff01": 2693, "\ud844\udcc1": 2694, "\ud851\udcd3": 2695, "[UNK]": 2695, "[PAD]": 2696}
 
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/w2v-bert-2.0",
3
  "activation_dropout": 0.0,
4
  "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
@@ -9,7 +9,7 @@
9
  "architectures": [
10
  "Wav2Vec2BertForCTC"
11
  ],
12
- "attention_dropout": 0.0,
13
  "bos_token_id": 1,
14
  "classifier_proj_size": 768,
15
  "codevector_dim": 768,
@@ -37,7 +37,7 @@
37
  "mask_feature_prob": 0.0,
38
  "mask_time_length": 10,
39
  "mask_time_min_masks": 2,
40
- "mask_time_prob": 0.1,
41
  "max_source_positions": 5000,
42
  "model_type": "wav2vec2-bert",
43
  "num_adapter_layers": 1,
@@ -74,7 +74,7 @@
74
  1
75
  ],
76
  "torch_dtype": "float32",
77
- "transformers_version": "4.37.1",
78
  "use_intermediate_ffn_before_adapter": false,
79
  "use_weighted_layer_sum": false,
80
  "vocab_size": 2699,
 
1
  {
2
+ "_name_or_path": "alvanlii/wav2vec2-BERT-cantonese",
3
  "activation_dropout": 0.0,
4
  "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
 
9
  "architectures": [
10
  "Wav2Vec2BertForCTC"
11
  ],
12
+ "attention_dropout": 0.1,
13
  "bos_token_id": 1,
14
  "classifier_proj_size": 768,
15
  "codevector_dim": 768,
 
37
  "mask_feature_prob": 0.0,
38
  "mask_time_length": 10,
39
  "mask_time_min_masks": 2,
40
+ "mask_time_prob": 0.0,
41
  "max_source_positions": 5000,
42
  "model_type": "wav2vec2-bert",
43
  "num_adapter_layers": 1,
 
74
  1
75
  ],
76
  "torch_dtype": "float32",
77
+ "transformers_version": "4.38.2",
78
  "use_intermediate_ffn_before_adapter": false,
79
  "use_weighted_layer_sum": false,
80
  "vocab_size": 2699,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce93fae4806d914ed61bbca586978bf9283d32c224193981a18d91a1fe44316d
3
- size 2433884676
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d6e20f1df78e0c88bee14ab1a223b34b069a78c787de82834da417fd459437
3
+ size 2043150336
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95740ac80d1a253ebeafa74c918c2e7a75f0cf1eafce8805c28299cabfb13ee8
3
- size 4868221674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a420558ee6dd550d0bf57ff4b79087225ff1a0db8441910732ed915ba6187fa
3
+ size 2103705600
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb34f676888fd9f6b3eaa3be33a23eae3ac676eb3a085d0292f21810095a2fca
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d41fc46d0cba18a124097418b7f25a99868be3af2511a03c16b6472d5ff5a56
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebfe7655856e7efb8f9907ba4e84bf10035582fb91bca4a965f55aa11dd46718
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5951bb0dce0845c2de727d3fe5cb2c3377d931684f23d2aea053d66c153fdad
3
  size 1064
trainer_state.json CHANGED
@@ -1,1251 +1,341 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 29.930420896543858,
5
  "eval_steps": 300,
6
- "global_step": 24600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.37,
13
- "learning_rate": 9.900000000000002e-06,
14
- "loss": 19.7382,
 
15
  "step": 300
16
  },
17
  {
18
- "epoch": 0.37,
19
- "eval_cer": 1.0,
20
- "eval_loss": 6.821648120880127,
21
- "eval_runtime": 52.3193,
22
- "eval_samples_per_second": 42.451,
23
- "eval_steps_per_second": 5.314,
24
  "step": 300
25
  },
26
  {
27
- "epoch": 0.73,
28
- "learning_rate": 1.9900000000000003e-05,
29
- "loss": 9.8181,
 
30
  "step": 600
31
  },
32
  {
33
- "epoch": 0.73,
34
- "eval_cer": 1.0,
35
- "eval_loss": 6.651111602783203,
36
- "eval_runtime": 43.577,
37
- "eval_samples_per_second": 50.967,
38
- "eval_steps_per_second": 6.38,
39
  "step": 600
40
  },
41
  {
42
- "epoch": 1.1,
43
- "learning_rate": 2.9900000000000002e-05,
44
- "loss": 9.5191,
 
45
  "step": 900
46
  },
47
  {
48
- "epoch": 1.1,
49
- "eval_cer": 0.9806448402826152,
50
- "eval_loss": 6.58424186706543,
51
- "eval_runtime": 43.1655,
52
- "eval_samples_per_second": 51.453,
53
- "eval_steps_per_second": 6.44,
54
  "step": 900
55
  },
56
  {
57
- "epoch": 1.46,
58
- "learning_rate": 3.99e-05,
59
- "loss": 8.6238,
 
60
  "step": 1200
61
  },
62
  {
63
- "epoch": 1.46,
64
- "eval_cer": 0.9216837496268285,
65
- "eval_loss": 6.142301082611084,
66
- "eval_runtime": 42.9764,
67
- "eval_samples_per_second": 51.68,
68
- "eval_steps_per_second": 6.469,
69
  "step": 1200
70
  },
71
  {
72
- "epoch": 1.83,
73
- "learning_rate": 4.99e-05,
74
- "loss": 6.883,
 
75
  "step": 1500
76
  },
77
  {
78
- "epoch": 1.83,
79
- "eval_cer": 0.850830928450592,
80
- "eval_loss": 3.596842050552368,
81
- "eval_runtime": 42.8348,
82
- "eval_samples_per_second": 51.85,
83
- "eval_steps_per_second": 6.49,
84
  "step": 1500
85
  },
86
  {
87
- "epoch": 2.19,
88
- "learning_rate": 4.93579766536965e-05,
89
- "loss": 4.0838,
 
90
  "step": 1800
91
  },
92
  {
93
- "epoch": 2.19,
94
- "eval_cer": 0.48343118718280426,
95
- "eval_loss": 2.5516390800476074,
96
- "eval_runtime": 42.9354,
97
- "eval_samples_per_second": 51.729,
98
- "eval_steps_per_second": 6.475,
99
  "step": 1800
100
  },
101
  {
102
- "epoch": 2.56,
103
- "learning_rate": 4.87094682230869e-05,
104
- "loss": 3.167,
 
105
  "step": 2100
106
  },
107
  {
108
- "epoch": 2.56,
109
- "eval_cer": 0.4450691611105583,
110
- "eval_loss": 2.2739391326904297,
111
- "eval_runtime": 42.8894,
112
- "eval_samples_per_second": 51.784,
113
- "eval_steps_per_second": 6.482,
114
  "step": 2100
115
  },
116
  {
117
- "epoch": 2.92,
118
- "learning_rate": 4.806312148724601e-05,
119
- "loss": 2.826,
 
120
  "step": 2400
121
  },
122
  {
123
- "epoch": 2.92,
124
- "eval_cer": 0.4178525226390686,
125
- "eval_loss": 2.0223917961120605,
126
- "eval_runtime": 42.9677,
127
- "eval_samples_per_second": 51.69,
128
- "eval_steps_per_second": 6.47,
129
  "step": 2400
130
  },
131
  {
132
- "epoch": 3.29,
133
- "learning_rate": 4.7414613056636405e-05,
134
- "loss": 2.6955,
 
135
  "step": 2700
136
  },
137
  {
138
- "epoch": 3.29,
139
- "eval_cer": 0.4174544730818987,
140
- "eval_loss": 1.9600275754928589,
141
- "eval_runtime": 42.8412,
142
- "eval_samples_per_second": 51.843,
143
- "eval_steps_per_second": 6.489,
144
  "step": 2700
145
  },
146
  {
147
- "epoch": 3.65,
148
- "learning_rate": 4.676610462602681e-05,
149
- "loss": 2.5812,
 
150
  "step": 3000
151
  },
152
  {
153
- "epoch": 3.65,
154
- "eval_cer": 0.40994128769031746,
155
- "eval_loss": 1.769142985343933,
156
- "eval_runtime": 42.9361,
157
- "eval_samples_per_second": 51.728,
158
- "eval_steps_per_second": 6.475,
159
  "step": 3000
160
  },
161
  {
162
- "epoch": 4.02,
163
- "learning_rate": 4.611975789018591e-05,
164
- "loss": 2.4952,
 
165
  "step": 3300
166
  },
167
  {
168
- "epoch": 4.02,
169
- "eval_cer": 0.4013832222111653,
170
- "eval_loss": 1.8323670625686646,
171
- "eval_runtime": 42.7115,
172
- "eval_samples_per_second": 52.0,
173
- "eval_steps_per_second": 6.509,
174
  "step": 3300
175
  },
176
  {
177
- "epoch": 4.38,
178
- "learning_rate": 4.547124945957631e-05,
179
- "loss": 2.3938,
 
180
  "step": 3600
181
  },
182
  {
183
- "epoch": 4.38,
184
- "eval_cer": 0.37799781072743555,
185
- "eval_loss": 1.7351980209350586,
186
- "eval_runtime": 42.7436,
187
- "eval_samples_per_second": 51.961,
188
- "eval_steps_per_second": 6.504,
189
  "step": 3600
190
  },
191
  {
192
- "epoch": 4.75,
193
- "learning_rate": 4.482490272373541e-05,
194
- "loss": 2.3584,
 
195
  "step": 3900
196
  },
197
  {
198
- "epoch": 4.75,
199
- "eval_cer": 0.3678475470196039,
200
- "eval_loss": 1.64540696144104,
201
- "eval_runtime": 42.7445,
202
- "eval_samples_per_second": 51.96,
203
- "eval_steps_per_second": 6.504,
204
  "step": 3900
205
  },
206
  {
207
- "epoch": 5.11,
208
- "learning_rate": 4.417639429312581e-05,
209
- "loss": 2.325,
 
210
  "step": 4200
211
  },
212
  {
213
- "epoch": 5.11,
214
- "eval_cer": 0.38352074833316746,
215
- "eval_loss": 1.6946874856948853,
216
- "eval_runtime": 42.4882,
217
- "eval_samples_per_second": 52.273,
218
- "eval_steps_per_second": 6.543,
219
  "step": 4200
220
  },
221
  {
222
- "epoch": 5.48,
223
- "learning_rate": 4.3527885862516214e-05,
224
- "loss": 2.2454,
 
225
  "step": 4500
226
  },
227
  {
228
- "epoch": 5.48,
229
- "eval_cer": 0.34078017713205294,
230
- "eval_loss": 1.5765234231948853,
231
- "eval_runtime": 42.1243,
232
- "eval_samples_per_second": 52.725,
233
- "eval_steps_per_second": 6.6,
234
  "step": 4500
235
  },
236
  {
237
- "epoch": 5.84,
238
- "learning_rate": 4.287937743190661e-05,
239
- "loss": 2.1954,
 
240
  "step": 4800
241
  },
242
  {
243
- "epoch": 5.84,
244
- "eval_cer": 0.37093243108767043,
245
- "eval_loss": 1.603211760520935,
246
- "eval_runtime": 42.6116,
247
- "eval_samples_per_second": 52.122,
248
- "eval_steps_per_second": 6.524,
249
  "step": 4800
250
  },
251
  {
252
- "epoch": 6.21,
253
- "learning_rate": 4.223086900129702e-05,
254
- "loss": 2.1492,
 
255
  "step": 5100
256
  },
257
  {
258
- "epoch": 6.21,
259
- "eval_cer": 0.3447606727037516,
260
- "eval_loss": 1.6078611612319946,
261
- "eval_runtime": 42.9188,
262
- "eval_samples_per_second": 51.749,
263
- "eval_steps_per_second": 6.477,
264
  "step": 5100
265
  },
266
  {
267
- "epoch": 6.57,
268
- "learning_rate": 4.1582360570687426e-05,
269
- "loss": 2.1655,
 
270
  "step": 5400
271
  },
272
  {
273
- "epoch": 6.57,
274
- "eval_cer": 0.33555577669419845,
275
- "eval_loss": 1.4955742359161377,
276
- "eval_runtime": 42.6136,
277
- "eval_samples_per_second": 52.12,
278
- "eval_steps_per_second": 6.524,
279
  "step": 5400
280
  },
281
  {
282
- "epoch": 6.94,
283
- "learning_rate": 4.093385214007782e-05,
284
- "loss": 2.1393,
 
285
  "step": 5700
286
  },
287
  {
288
- "epoch": 6.94,
289
- "eval_cer": 0.331625037317146,
290
- "eval_loss": 1.4772567749023438,
291
- "eval_runtime": 42.6929,
292
- "eval_samples_per_second": 52.023,
293
- "eval_steps_per_second": 6.512,
294
  "step": 5700
295
  },
296
  {
297
- "epoch": 7.3,
298
- "learning_rate": 4.028534370946823e-05,
299
- "loss": 2.1027,
 
300
  "step": 6000
301
  },
302
  {
303
- "epoch": 7.3,
304
- "eval_cer": 0.341427007662454,
305
- "eval_loss": 1.5089548826217651,
306
- "eval_runtime": 42.7699,
307
- "eval_samples_per_second": 51.929,
308
- "eval_steps_per_second": 6.5,
309
  "step": 6000
310
- },
311
- {
312
- "epoch": 7.67,
313
- "learning_rate": 3.9636835278858624e-05,
314
- "loss": 2.0824,
315
- "step": 6300
316
- },
317
- {
318
- "epoch": 7.67,
319
- "eval_cer": 0.34575579659667627,
320
- "eval_loss": 1.5948169231414795,
321
- "eval_runtime": 42.6031,
322
- "eval_samples_per_second": 52.132,
323
- "eval_steps_per_second": 6.525,
324
- "step": 6300
325
- },
326
- {
327
- "epoch": 8.03,
328
- "learning_rate": 3.899048854301773e-05,
329
- "loss": 2.061,
330
- "step": 6600
331
- },
332
- {
333
- "epoch": 8.03,
334
- "eval_cer": 0.35058214747736094,
335
- "eval_loss": 1.4923882484436035,
336
- "eval_runtime": 42.6516,
337
- "eval_samples_per_second": 52.073,
338
- "eval_steps_per_second": 6.518,
339
- "step": 6600
340
- },
341
- {
342
- "epoch": 8.4,
343
- "learning_rate": 3.8341980112408135e-05,
344
- "loss": 2.0212,
345
- "step": 6900
346
- },
347
- {
348
- "epoch": 8.4,
349
- "eval_cer": 0.33247089262613194,
350
- "eval_loss": 1.4590569734573364,
351
- "eval_runtime": 42.5489,
352
- "eval_samples_per_second": 52.199,
353
- "eval_steps_per_second": 6.534,
354
- "step": 6900
355
- },
356
- {
357
- "epoch": 8.76,
358
- "learning_rate": 3.769347168179853e-05,
359
- "loss": 2.0504,
360
- "step": 7200
361
- },
362
- {
363
- "epoch": 8.76,
364
- "eval_cer": 0.3344611404119813,
365
- "eval_loss": 1.4551000595092773,
366
- "eval_runtime": 42.7689,
367
- "eval_samples_per_second": 51.93,
368
- "eval_steps_per_second": 6.5,
369
- "step": 7200
370
- },
371
- {
372
- "epoch": 9.13,
373
- "learning_rate": 3.7044963251188936e-05,
374
- "loss": 2.0113,
375
- "step": 7500
376
- },
377
- {
378
- "epoch": 9.13,
379
- "eval_cer": 0.3344113842173351,
380
- "eval_loss": 1.4068984985351562,
381
- "eval_runtime": 42.6741,
382
- "eval_samples_per_second": 52.046,
383
- "eval_steps_per_second": 6.514,
384
- "step": 7500
385
- },
386
- {
387
- "epoch": 9.49,
388
- "learning_rate": 3.639645482057933e-05,
389
- "loss": 2.0057,
390
- "step": 7800
391
- },
392
- {
393
- "epoch": 9.49,
394
- "eval_cer": 0.3230669718379938,
395
- "eval_loss": 1.443265676498413,
396
- "eval_runtime": 42.7855,
397
- "eval_samples_per_second": 51.91,
398
- "eval_steps_per_second": 6.498,
399
- "step": 7800
400
- },
401
- {
402
- "epoch": 9.86,
403
- "learning_rate": 3.574794638996974e-05,
404
- "loss": 1.9741,
405
- "step": 8100
406
- },
407
- {
408
- "epoch": 9.86,
409
- "eval_cer": 0.3216240421932531,
410
- "eval_loss": 1.426885724067688,
411
- "eval_runtime": 42.8301,
412
- "eval_samples_per_second": 51.856,
413
- "eval_steps_per_second": 6.491,
414
- "step": 8100
415
- },
416
- {
417
- "epoch": 10.22,
418
- "learning_rate": 3.509943795936014e-05,
419
- "loss": 1.936,
420
- "step": 8400
421
- },
422
- {
423
- "epoch": 10.22,
424
- "eval_cer": 0.3227186784754702,
425
- "eval_loss": 1.3612221479415894,
426
- "eval_runtime": 43.0542,
427
- "eval_samples_per_second": 51.586,
428
- "eval_steps_per_second": 6.457,
429
- "step": 8400
430
- },
431
- {
432
- "epoch": 10.59,
433
- "learning_rate": 3.4450929528750544e-05,
434
- "loss": 1.9416,
435
- "step": 8700
436
- },
437
- {
438
- "epoch": 10.59,
439
- "eval_cer": 0.3027664444223306,
440
- "eval_loss": 1.363100290298462,
441
- "eval_runtime": 43.1271,
442
- "eval_samples_per_second": 51.499,
443
- "eval_steps_per_second": 6.446,
444
- "step": 8700
445
- },
446
- {
447
- "epoch": 10.95,
448
- "learning_rate": 3.380242109814095e-05,
449
- "loss": 1.9425,
450
- "step": 9000
451
- },
452
- {
453
- "epoch": 10.95,
454
- "eval_cer": 0.3038610807045477,
455
- "eval_loss": 1.3716000318527222,
456
- "eval_runtime": 43.286,
457
- "eval_samples_per_second": 51.31,
458
- "eval_steps_per_second": 6.422,
459
- "step": 9000
460
- },
461
- {
462
- "epoch": 11.32,
463
- "learning_rate": 3.3153912667531345e-05,
464
- "loss": 1.9351,
465
- "step": 9300
466
- },
467
- {
468
- "epoch": 11.32,
469
- "eval_cer": 0.31490695591601153,
470
- "eval_loss": 1.3932286500930786,
471
- "eval_runtime": 43.3029,
472
- "eval_samples_per_second": 51.29,
473
- "eval_steps_per_second": 6.42,
474
- "step": 9300
475
- },
476
- {
477
- "epoch": 11.68,
478
- "learning_rate": 3.250540423692175e-05,
479
- "loss": 1.9046,
480
- "step": 9600
481
- },
482
- {
483
- "epoch": 11.68,
484
- "eval_cer": 0.3329684545725943,
485
- "eval_loss": 1.4470584392547607,
486
- "eval_runtime": 42.9923,
487
- "eval_samples_per_second": 51.66,
488
- "eval_steps_per_second": 6.466,
489
- "step": 9600
490
- },
491
- {
492
- "epoch": 12.05,
493
- "learning_rate": 3.185905750108085e-05,
494
- "loss": 1.8587,
495
- "step": 9900
496
- },
497
- {
498
- "epoch": 12.05,
499
- "eval_cer": 0.3056523037118121,
500
- "eval_loss": 1.3519924879074097,
501
- "eval_runtime": 42.7004,
502
- "eval_samples_per_second": 52.014,
503
- "eval_steps_per_second": 6.51,
504
- "step": 9900
505
- },
506
- {
507
- "epoch": 12.41,
508
- "learning_rate": 3.1210549070471253e-05,
509
- "loss": 1.8699,
510
- "step": 10200
511
- },
512
- {
513
- "epoch": 12.41,
514
- "eval_cer": 0.3289879590008956,
515
- "eval_loss": 1.4434651136398315,
516
- "eval_runtime": 42.904,
517
- "eval_samples_per_second": 51.767,
518
- "eval_steps_per_second": 6.48,
519
- "step": 10200
520
- },
521
- {
522
- "epoch": 12.78,
523
- "learning_rate": 3.056204063986166e-05,
524
- "loss": 1.8328,
525
- "step": 10500
526
- },
527
- {
528
- "epoch": 12.78,
529
- "eval_cer": 0.31356353866056325,
530
- "eval_loss": 1.361649751663208,
531
- "eval_runtime": 42.7673,
532
- "eval_samples_per_second": 51.932,
533
- "eval_steps_per_second": 6.5,
534
- "step": 10500
535
- },
536
- {
537
- "epoch": 13.14,
538
- "learning_rate": 2.9913532209252054e-05,
539
- "loss": 1.8136,
540
- "step": 10800
541
- },
542
- {
543
- "epoch": 13.14,
544
- "eval_cer": 0.2943078913324709,
545
- "eval_loss": 1.3512203693389893,
546
- "eval_runtime": 42.5723,
547
- "eval_samples_per_second": 52.17,
548
- "eval_steps_per_second": 6.53,
549
- "step": 10800
550
- },
551
- {
552
- "epoch": 13.51,
553
- "learning_rate": 2.9265023778642458e-05,
554
- "loss": 1.8099,
555
- "step": 11100
556
- },
557
- {
558
- "epoch": 13.51,
559
- "eval_cer": 0.2956513085879192,
560
- "eval_loss": 1.3534834384918213,
561
- "eval_runtime": 42.854,
562
- "eval_samples_per_second": 51.827,
563
- "eval_steps_per_second": 6.487,
564
- "step": 11100
565
- },
566
- {
567
- "epoch": 13.87,
568
- "learning_rate": 2.861651534803286e-05,
569
- "loss": 1.8021,
570
- "step": 11400
571
- },
572
- {
573
- "epoch": 13.87,
574
- "eval_cer": 0.2981888745148771,
575
- "eval_loss": 1.3732918500900269,
576
- "eval_runtime": 42.5792,
577
- "eval_samples_per_second": 52.162,
578
- "eval_steps_per_second": 6.529,
579
- "step": 11400
580
- },
581
- {
582
- "epoch": 14.24,
583
- "learning_rate": 2.7968006917423263e-05,
584
- "loss": 1.7809,
585
- "step": 11700
586
- },
587
- {
588
- "epoch": 14.24,
589
- "eval_cer": 0.30804060105483133,
590
- "eval_loss": 1.3088232278823853,
591
- "eval_runtime": 42.9072,
592
- "eval_samples_per_second": 51.763,
593
- "eval_steps_per_second": 6.479,
594
- "step": 11700
595
- },
596
- {
597
- "epoch": 14.6,
598
- "learning_rate": 2.731949848681366e-05,
599
- "loss": 1.7734,
600
- "step": 12000
601
- },
602
- {
603
- "epoch": 14.6,
604
- "eval_cer": 0.28858592894815405,
605
- "eval_loss": 1.320089340209961,
606
- "eval_runtime": 42.6671,
607
- "eval_samples_per_second": 52.054,
608
- "eval_steps_per_second": 6.516,
609
- "step": 12000
610
- },
611
- {
612
- "epoch": 14.97,
613
- "learning_rate": 2.6670990056204063e-05,
614
- "loss": 1.7646,
615
- "step": 12300
616
- },
617
- {
618
- "epoch": 14.97,
619
- "eval_cer": 0.3268981988257538,
620
- "eval_loss": 1.3471167087554932,
621
- "eval_runtime": 42.7924,
622
- "eval_samples_per_second": 51.902,
623
- "eval_steps_per_second": 6.496,
624
- "step": 12300
625
- },
626
- {
627
- "epoch": 15.33,
628
- "learning_rate": 2.602248162559447e-05,
629
- "loss": 1.733,
630
- "step": 12600
631
- },
632
- {
633
- "epoch": 15.33,
634
- "eval_cer": 0.30321425017414666,
635
- "eval_loss": 1.3437916040420532,
636
- "eval_runtime": 42.7066,
637
- "eval_samples_per_second": 52.006,
638
- "eval_steps_per_second": 6.51,
639
- "step": 12600
640
- },
641
- {
642
- "epoch": 15.7,
643
- "learning_rate": 2.5373973194984868e-05,
644
- "loss": 1.7182,
645
- "step": 12900
646
- },
647
- {
648
- "epoch": 15.7,
649
- "eval_cer": 0.2999800975221415,
650
- "eval_loss": 1.3310909271240234,
651
- "eval_runtime": 42.787,
652
- "eval_samples_per_second": 51.908,
653
- "eval_steps_per_second": 6.497,
654
- "step": 12900
655
- },
656
- {
657
- "epoch": 16.06,
658
- "learning_rate": 2.472546476437527e-05,
659
- "loss": 1.7071,
660
- "step": 13200
661
- },
662
- {
663
- "epoch": 16.06,
664
- "eval_cer": 0.3073937705244303,
665
- "eval_loss": 1.2641910314559937,
666
- "eval_runtime": 42.6973,
667
- "eval_samples_per_second": 52.017,
668
- "eval_steps_per_second": 6.511,
669
- "step": 13200
670
- },
671
- {
672
- "epoch": 16.43,
673
- "learning_rate": 2.4076956333765675e-05,
674
- "loss": 1.7196,
675
- "step": 13500
676
- },
677
- {
678
- "epoch": 16.43,
679
- "eval_cer": 0.2859488506319037,
680
- "eval_loss": 1.2662409543991089,
681
- "eval_runtime": 42.6819,
682
- "eval_samples_per_second": 52.036,
683
- "eval_steps_per_second": 6.513,
684
- "step": 13500
685
- },
686
- {
687
- "epoch": 16.79,
688
- "learning_rate": 2.3428447903156076e-05,
689
- "loss": 1.7264,
690
- "step": 13800
691
- },
692
- {
693
- "epoch": 16.79,
694
- "eval_cer": 0.2878893422231068,
695
- "eval_loss": 1.2460156679153442,
696
- "eval_runtime": 42.7771,
697
- "eval_samples_per_second": 51.92,
698
- "eval_steps_per_second": 6.499,
699
- "step": 13800
700
- },
701
- {
702
- "epoch": 17.16,
703
- "learning_rate": 2.2782101167315176e-05,
704
- "loss": 1.6875,
705
- "step": 14100
706
- },
707
- {
708
- "epoch": 17.16,
709
- "eval_cer": 0.2931137426609613,
710
- "eval_loss": 1.3022774457931519,
711
- "eval_runtime": 42.5345,
712
- "eval_samples_per_second": 52.216,
713
- "eval_steps_per_second": 6.536,
714
- "step": 14100
715
- },
716
- {
717
- "epoch": 17.52,
718
- "learning_rate": 2.2133592736705577e-05,
719
- "loss": 1.6659,
720
- "step": 14400
721
- },
722
- {
723
- "epoch": 17.52,
724
- "eval_cer": 0.2927654492984377,
725
- "eval_loss": 1.32107675075531,
726
- "eval_runtime": 42.821,
727
- "eval_samples_per_second": 51.867,
728
- "eval_steps_per_second": 6.492,
729
- "step": 14400
730
- },
731
- {
732
- "epoch": 17.89,
733
- "learning_rate": 2.148508430609598e-05,
734
- "loss": 1.6694,
735
- "step": 14700
736
- },
737
- {
738
- "epoch": 17.89,
739
- "eval_cer": 0.2882873917802766,
740
- "eval_loss": 1.3291140794754028,
741
- "eval_runtime": 42.7715,
742
- "eval_samples_per_second": 51.927,
743
- "eval_steps_per_second": 6.5,
744
- "step": 14700
745
- },
746
- {
747
- "epoch": 18.25,
748
- "learning_rate": 2.0836575875486384e-05,
749
- "loss": 1.643,
750
- "step": 15000
751
- },
752
- {
753
- "epoch": 18.25,
754
- "eval_cer": 0.294755697084287,
755
- "eval_loss": 1.2615532875061035,
756
- "eval_runtime": 42.8646,
757
- "eval_samples_per_second": 51.814,
758
- "eval_steps_per_second": 6.486,
759
- "step": 15000
760
- },
761
- {
762
- "epoch": 18.62,
763
- "learning_rate": 2.0188067444876785e-05,
764
- "loss": 1.676,
765
- "step": 15300
766
- },
767
- {
768
- "epoch": 18.62,
769
- "eval_cer": 0.2835107970942382,
770
- "eval_loss": 1.2185758352279663,
771
- "eval_runtime": 42.7823,
772
- "eval_samples_per_second": 51.914,
773
- "eval_steps_per_second": 6.498,
774
- "step": 15300
775
- },
776
- {
777
- "epoch": 18.98,
778
- "learning_rate": 1.9539559014267185e-05,
779
- "loss": 1.6397,
780
- "step": 15600
781
- },
782
- {
783
- "epoch": 18.98,
784
- "eval_cer": 0.2810727435565728,
785
- "eval_loss": 1.3059513568878174,
786
- "eval_runtime": 42.9668,
787
- "eval_samples_per_second": 51.691,
788
- "eval_steps_per_second": 6.47,
789
- "step": 15600
790
- },
791
- {
792
- "epoch": 19.35,
793
- "learning_rate": 1.8893212278426286e-05,
794
- "loss": 1.6347,
795
- "step": 15900
796
- },
797
- {
798
- "epoch": 19.35,
799
- "eval_cer": 0.28838690416956914,
800
- "eval_loss": 1.2377227544784546,
801
- "eval_runtime": 42.8094,
802
- "eval_samples_per_second": 51.881,
803
- "eval_steps_per_second": 6.494,
804
- "step": 15900
805
- },
806
- {
807
- "epoch": 19.71,
808
- "learning_rate": 1.824470384781669e-05,
809
- "loss": 1.6328,
810
- "step": 16200
811
- },
812
- {
813
- "epoch": 19.71,
814
- "eval_cer": 0.27599761170265696,
815
- "eval_loss": 1.2721112966537476,
816
- "eval_runtime": 42.6795,
817
- "eval_samples_per_second": 52.039,
818
- "eval_steps_per_second": 6.514,
819
- "step": 16200
820
- },
821
- {
822
- "epoch": 20.08,
823
- "learning_rate": 1.7596195417207094e-05,
824
- "loss": 1.6092,
825
- "step": 16500
826
- },
827
- {
828
- "epoch": 20.08,
829
- "eval_cer": 0.28231664842272863,
830
- "eval_loss": 1.2696741819381714,
831
- "eval_runtime": 42.6768,
832
- "eval_samples_per_second": 52.042,
833
- "eval_steps_per_second": 6.514,
834
- "step": 16500
835
- },
836
- {
837
- "epoch": 20.44,
838
- "learning_rate": 1.6947686986597494e-05,
839
- "loss": 1.5737,
840
- "step": 16800
841
- },
842
- {
843
- "epoch": 20.44,
844
- "eval_cer": 0.28306299134242213,
845
- "eval_loss": 1.2230887413024902,
846
- "eval_runtime": 42.9425,
847
- "eval_samples_per_second": 51.72,
848
- "eval_steps_per_second": 6.474,
849
- "step": 16800
850
- },
851
- {
852
- "epoch": 20.81,
853
- "learning_rate": 1.6299178555987894e-05,
854
- "loss": 1.6166,
855
- "step": 17100
856
- },
857
- {
858
- "epoch": 20.81,
859
- "eval_cer": 0.2663449099412877,
860
- "eval_loss": 1.2277541160583496,
861
- "eval_runtime": 42.813,
862
- "eval_samples_per_second": 51.877,
863
- "eval_steps_per_second": 6.493,
864
- "step": 17100
865
- },
866
- {
867
- "epoch": 21.17,
868
- "learning_rate": 1.56506701253783e-05,
869
- "loss": 1.5964,
870
- "step": 17400
871
- },
872
- {
873
- "epoch": 21.17,
874
- "eval_cer": 0.27355955816499156,
875
- "eval_loss": 1.2313120365142822,
876
- "eval_runtime": 42.7309,
877
- "eval_samples_per_second": 51.976,
878
- "eval_steps_per_second": 6.506,
879
- "step": 17400
880
- },
881
- {
882
- "epoch": 21.54,
883
- "learning_rate": 1.5002161694768699e-05,
884
- "loss": 1.5237,
885
- "step": 17700
886
- },
887
- {
888
- "epoch": 21.54,
889
- "eval_cer": 0.27863469001890734,
890
- "eval_loss": 1.2411593198776245,
891
- "eval_runtime": 42.9368,
892
- "eval_samples_per_second": 51.727,
893
- "eval_steps_per_second": 6.475,
894
- "step": 17700
895
- },
896
- {
897
- "epoch": 21.9,
898
- "learning_rate": 1.4353653264159101e-05,
899
- "loss": 1.5419,
900
- "step": 18000
901
- },
902
- {
903
- "epoch": 21.9,
904
- "eval_cer": 0.28634690018907355,
905
- "eval_loss": 1.2718561887741089,
906
- "eval_runtime": 42.5781,
907
- "eval_samples_per_second": 52.163,
908
- "eval_steps_per_second": 6.529,
909
- "step": 18000
910
- },
911
- {
912
- "epoch": 22.27,
913
- "learning_rate": 1.3705144833549505e-05,
914
- "loss": 1.5654,
915
- "step": 18300
916
- },
917
- {
918
- "epoch": 22.27,
919
- "eval_cer": 0.26734003383421234,
920
- "eval_loss": 1.2373576164245605,
921
- "eval_runtime": 42.5574,
922
- "eval_samples_per_second": 52.188,
923
- "eval_steps_per_second": 6.532,
924
- "step": 18300
925
- },
926
- {
927
- "epoch": 22.63,
928
- "learning_rate": 1.3060959792477304e-05,
929
- "loss": 1.5331,
930
- "step": 18600
931
- },
932
- {
933
- "epoch": 22.63,
934
- "eval_cer": 0.2708727236540949,
935
- "eval_loss": 1.197614073753357,
936
- "eval_runtime": 42.6921,
937
- "eval_samples_per_second": 52.024,
938
- "eval_steps_per_second": 6.512,
939
- "step": 18600
940
- },
941
- {
942
- "epoch": 23.0,
943
- "learning_rate": 1.2412451361867706e-05,
944
- "loss": 1.5378,
945
- "step": 18900
946
- },
947
- {
948
- "epoch": 23.0,
949
- "eval_cer": 0.26838491392178326,
950
- "eval_loss": 1.1672557592391968,
951
- "eval_runtime": 42.9497,
952
- "eval_samples_per_second": 51.712,
953
- "eval_steps_per_second": 6.473,
954
- "step": 18900
955
- },
956
- {
957
- "epoch": 23.36,
958
- "learning_rate": 1.1763942931258106e-05,
959
- "loss": 1.4972,
960
- "step": 19200
961
- },
962
- {
963
- "epoch": 23.36,
964
- "eval_cer": 0.26938003781470793,
965
- "eval_loss": 1.1548832654953003,
966
- "eval_runtime": 42.5425,
967
- "eval_samples_per_second": 52.207,
968
- "eval_steps_per_second": 6.535,
969
- "step": 19200
970
- },
971
- {
972
- "epoch": 23.73,
973
- "learning_rate": 1.1115434500648508e-05,
974
- "loss": 1.5112,
975
- "step": 19500
976
- },
977
- {
978
- "epoch": 23.73,
979
- "eval_cer": 0.2684844263110757,
980
- "eval_loss": 1.2580962181091309,
981
- "eval_runtime": 43.0178,
982
- "eval_samples_per_second": 51.63,
983
- "eval_steps_per_second": 6.462,
984
- "step": 19500
985
- },
986
- {
987
- "epoch": 24.09,
988
- "learning_rate": 1.046692607003891e-05,
989
- "loss": 1.5026,
990
- "step": 19800
991
- },
992
- {
993
- "epoch": 24.09,
994
- "eval_cer": 0.26957906259329284,
995
- "eval_loss": 1.2475780248641968,
996
- "eval_runtime": 42.8521,
997
- "eval_samples_per_second": 51.829,
998
- "eval_steps_per_second": 6.487,
999
- "step": 19800
1000
- },
1001
- {
1002
- "epoch": 24.46,
1003
- "learning_rate": 9.818417639429313e-06,
1004
- "loss": 1.5062,
1005
- "step": 20100
1006
- },
1007
- {
1008
- "epoch": 24.46,
1009
- "eval_cer": 0.2755995621454871,
1010
- "eval_loss": 1.2111254930496216,
1011
- "eval_runtime": 42.9059,
1012
- "eval_samples_per_second": 51.764,
1013
- "eval_steps_per_second": 6.479,
1014
- "step": 20100
1015
- },
1016
- {
1017
- "epoch": 24.82,
1018
- "learning_rate": 9.169909208819715e-06,
1019
- "loss": 1.4816,
1020
- "step": 20400
1021
- },
1022
- {
1023
- "epoch": 24.82,
1024
- "eval_cer": 0.26345905065180614,
1025
- "eval_loss": 1.2007070779800415,
1026
- "eval_runtime": 42.6871,
1027
- "eval_samples_per_second": 52.03,
1028
- "eval_steps_per_second": 6.513,
1029
- "step": 20400
1030
- },
1031
- {
1032
- "epoch": 25.19,
1033
- "learning_rate": 8.521400778210117e-06,
1034
- "loss": 1.4836,
1035
- "step": 20700
1036
- },
1037
- {
1038
- "epoch": 25.19,
1039
- "eval_cer": 0.27435565727933126,
1040
- "eval_loss": 1.2548900842666626,
1041
- "eval_runtime": 42.6925,
1042
- "eval_samples_per_second": 52.023,
1043
- "eval_steps_per_second": 6.512,
1044
- "step": 20700
1045
- },
1046
- {
1047
- "epoch": 25.55,
1048
- "learning_rate": 7.87289234760052e-06,
1049
- "loss": 1.479,
1050
- "step": 21000
1051
- },
1052
- {
1053
- "epoch": 25.55,
1054
- "eval_cer": 0.26987759976117026,
1055
- "eval_loss": 1.1535056829452515,
1056
- "eval_runtime": 42.5922,
1057
- "eval_samples_per_second": 52.146,
1058
- "eval_steps_per_second": 6.527,
1059
- "step": 21000
1060
- },
1061
- {
1062
- "epoch": 25.92,
1063
- "learning_rate": 7.2243839169909205e-06,
1064
- "loss": 1.493,
1065
- "step": 21300
1066
- },
1067
- {
1068
- "epoch": 25.92,
1069
- "eval_cer": 0.26972833117723155,
1070
- "eval_loss": 1.198728084564209,
1071
- "eval_runtime": 42.6034,
1072
- "eval_samples_per_second": 52.132,
1073
- "eval_steps_per_second": 6.525,
1074
- "step": 21300
1075
- },
1076
- {
1077
- "epoch": 26.28,
1078
- "learning_rate": 6.5758754863813235e-06,
1079
- "loss": 1.4524,
1080
- "step": 21600
1081
- },
1082
- {
1083
- "epoch": 26.28,
1084
- "eval_cer": 0.27470395064185493,
1085
- "eval_loss": 1.2245545387268066,
1086
- "eval_runtime": 42.9242,
1087
- "eval_samples_per_second": 51.742,
1088
- "eval_steps_per_second": 6.477,
1089
- "step": 21600
1090
- },
1091
- {
1092
- "epoch": 26.65,
1093
- "learning_rate": 5.927367055771725e-06,
1094
- "loss": 1.4569,
1095
- "step": 21900
1096
- },
1097
- {
1098
- "epoch": 26.65,
1099
- "eval_cer": 0.2605234351676784,
1100
- "eval_loss": 1.1879122257232666,
1101
- "eval_runtime": 42.634,
1102
- "eval_samples_per_second": 52.095,
1103
- "eval_steps_per_second": 6.521,
1104
- "step": 21900
1105
- },
1106
- {
1107
- "epoch": 27.01,
1108
- "learning_rate": 5.278858625162128e-06,
1109
- "loss": 1.4535,
1110
- "step": 22200
1111
- },
1112
- {
1113
- "epoch": 27.01,
1114
- "eval_cer": 0.27281321524529806,
1115
- "eval_loss": 1.2265853881835938,
1116
- "eval_runtime": 42.7816,
1117
- "eval_samples_per_second": 51.915,
1118
- "eval_steps_per_second": 6.498,
1119
- "step": 22200
1120
- },
1121
- {
1122
- "epoch": 27.38,
1123
- "learning_rate": 4.63035019455253e-06,
1124
- "loss": 1.4452,
1125
- "step": 22500
1126
- },
1127
- {
1128
- "epoch": 27.38,
1129
- "eval_cer": 0.2566424519852722,
1130
- "eval_loss": 1.1812487840652466,
1131
- "eval_runtime": 42.5285,
1132
- "eval_samples_per_second": 52.224,
1133
- "eval_steps_per_second": 6.537,
1134
- "step": 22500
1135
- },
1136
- {
1137
- "epoch": 27.74,
1138
- "learning_rate": 3.981841763942931e-06,
1139
- "loss": 1.4513,
1140
- "step": 22800
1141
- },
1142
- {
1143
- "epoch": 27.74,
1144
- "eval_cer": 0.26286197631605135,
1145
- "eval_loss": 1.1672886610031128,
1146
- "eval_runtime": 43.0029,
1147
- "eval_samples_per_second": 51.648,
1148
- "eval_steps_per_second": 6.465,
1149
- "step": 22800
1150
- },
1151
- {
1152
- "epoch": 28.11,
1153
- "learning_rate": 3.3333333333333333e-06,
1154
- "loss": 1.4561,
1155
- "step": 23100
1156
- },
1157
- {
1158
- "epoch": 28.11,
1159
- "eval_cer": 0.26181709622848043,
1160
- "eval_loss": 1.1963270902633667,
1161
- "eval_runtime": 42.7473,
1162
- "eval_samples_per_second": 51.957,
1163
- "eval_steps_per_second": 6.503,
1164
- "step": 23100
1165
- },
1166
- {
1167
- "epoch": 28.47,
1168
- "learning_rate": 2.6848249027237355e-06,
1169
- "loss": 1.4357,
1170
- "step": 23400
1171
- },
1172
- {
1173
- "epoch": 28.47,
1174
- "eval_cer": 0.26913125684147676,
1175
- "eval_loss": 1.201293706893921,
1176
- "eval_runtime": 42.6759,
1177
- "eval_samples_per_second": 52.043,
1178
- "eval_steps_per_second": 6.514,
1179
- "step": 23400
1180
- },
1181
- {
1182
- "epoch": 28.84,
1183
- "learning_rate": 2.0363164721141376e-06,
1184
- "loss": 1.4427,
1185
- "step": 23700
1186
- },
1187
- {
1188
- "epoch": 28.84,
1189
- "eval_cer": 0.2726141904667131,
1190
- "eval_loss": 1.2448346614837646,
1191
- "eval_runtime": 42.4761,
1192
- "eval_samples_per_second": 52.288,
1193
- "eval_steps_per_second": 6.545,
1194
- "step": 23700
1195
- },
1196
- {
1197
- "epoch": 29.2,
1198
- "learning_rate": 1.3899697362732382e-06,
1199
- "loss": 1.4171,
1200
- "step": 24000
1201
- },
1202
- {
1203
- "epoch": 29.2,
1204
- "eval_cer": 0.26684247188775,
1205
- "eval_loss": 1.2063277959823608,
1206
- "eval_runtime": 42.7033,
1207
- "eval_samples_per_second": 52.01,
1208
- "eval_steps_per_second": 6.51,
1209
- "step": 24000
1210
- },
1211
- {
1212
- "epoch": 29.57,
1213
- "learning_rate": 7.414613056636403e-07,
1214
- "loss": 1.4639,
1215
- "step": 24300
1216
- },
1217
- {
1218
- "epoch": 29.57,
1219
- "eval_cer": 0.26694198427704247,
1220
- "eval_loss": 1.2228556871414185,
1221
- "eval_runtime": 42.5785,
1222
- "eval_samples_per_second": 52.162,
1223
- "eval_steps_per_second": 6.529,
1224
- "step": 24300
1225
- },
1226
- {
1227
- "epoch": 29.93,
1228
- "learning_rate": 9.295287505404236e-08,
1229
- "loss": 1.4234,
1230
- "step": 24600
1231
- },
1232
- {
1233
- "epoch": 29.93,
1234
- "eval_cer": 0.2594785550801075,
1235
- "eval_loss": 1.1955249309539795,
1236
- "eval_runtime": 43.02,
1237
- "eval_samples_per_second": 51.627,
1238
- "eval_steps_per_second": 6.462,
1239
- "step": 24600
1240
  }
1241
  ],
1242
  "logging_steps": 300,
1243
- "max_steps": 24630,
1244
  "num_input_tokens_seen": 0,
1245
- "num_train_epochs": 30,
1246
  "save_steps": 600,
1247
- "total_flos": 3.9184197928838064e+20,
1248
- "train_batch_size": 4,
1249
  "trial_name": null,
1250
  "trial_params": null
1251
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8165192041489382,
5
  "eval_steps": 300,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.04,
13
+ "grad_norm": 21.039730072021484,
14
+ "learning_rate": 7.4e-06,
15
+ "loss": 2.2676,
16
  "step": 300
17
  },
18
  {
19
+ "epoch": 0.04,
20
+ "eval_cer": 0.2868164937892176,
21
+ "eval_loss": 0.6633031964302063,
22
+ "eval_runtime": 320.6214,
23
+ "eval_samples_per_second": 8.05,
24
+ "eval_steps_per_second": 8.05,
25
  "step": 300
26
  },
27
  {
28
+ "epoch": 0.08,
29
+ "grad_norm": 9.525612831115723,
30
+ "learning_rate": 1.49e-05,
31
+ "loss": 1.8817,
32
  "step": 600
33
  },
34
  {
35
+ "epoch": 0.08,
36
+ "eval_cer": 0.2927711614803432,
37
+ "eval_loss": 0.6335302591323853,
38
+ "eval_runtime": 119.6817,
39
+ "eval_samples_per_second": 21.566,
40
+ "eval_steps_per_second": 21.566,
41
  "step": 600
42
  },
43
  {
44
+ "epoch": 0.12,
45
+ "grad_norm": 7.7516608238220215,
46
+ "learning_rate": 2.2400000000000002e-05,
47
+ "loss": 1.7845,
48
  "step": 900
49
  },
50
  {
51
+ "epoch": 0.12,
52
+ "eval_cer": 0.2928031758227686,
53
+ "eval_loss": 0.5966914296150208,
54
+ "eval_runtime": 123.0968,
55
+ "eval_samples_per_second": 20.967,
56
+ "eval_steps_per_second": 20.967,
57
  "step": 900
58
  },
59
  {
60
+ "epoch": 0.16,
61
+ "grad_norm": 4.020584583282471,
62
+ "learning_rate": 2.9875000000000004e-05,
63
+ "loss": 1.7503,
64
  "step": 1200
65
  },
66
  {
67
+ "epoch": 0.16,
68
+ "eval_cer": 0.29213087463183507,
69
+ "eval_loss": 0.5989494919776917,
70
+ "eval_runtime": 125.3567,
71
+ "eval_samples_per_second": 20.589,
72
+ "eval_steps_per_second": 20.589,
73
  "step": 1200
74
  },
75
  {
76
+ "epoch": 0.2,
77
+ "grad_norm": 3.7881321907043457,
78
+ "learning_rate": 3.737500000000001e-05,
79
+ "loss": 1.7549,
80
  "step": 1500
81
  },
82
  {
83
+ "epoch": 0.2,
84
+ "eval_cer": 0.29901395825329746,
85
+ "eval_loss": 0.6132485866546631,
86
+ "eval_runtime": 132.3811,
87
+ "eval_samples_per_second": 19.497,
88
+ "eval_steps_per_second": 19.497,
89
  "step": 1500
90
  },
91
  {
92
+ "epoch": 0.24,
93
+ "grad_norm": 3.710826873779297,
94
+ "learning_rate": 4.4875e-05,
95
+ "loss": 1.7214,
96
  "step": 1800
97
  },
98
  {
99
+ "epoch": 0.24,
100
+ "eval_cer": 0.29782942758355746,
101
+ "eval_loss": 0.6118220686912537,
102
+ "eval_runtime": 130.5327,
103
+ "eval_samples_per_second": 19.773,
104
+ "eval_steps_per_second": 19.773,
105
  "step": 1800
106
  },
107
  {
108
+ "epoch": 0.29,
109
+ "grad_norm": 4.127965927124023,
110
+ "learning_rate": 4.911181750186986e-05,
111
+ "loss": 1.6742,
112
  "step": 2100
113
  },
114
  {
115
+ "epoch": 0.29,
116
+ "eval_cer": 0.2963567678319887,
117
+ "eval_loss": 0.6121346354484558,
118
+ "eval_runtime": 131.7081,
119
+ "eval_samples_per_second": 19.596,
120
+ "eval_steps_per_second": 19.596,
121
  "step": 2100
122
  },
123
  {
124
+ "epoch": 0.33,
125
+ "grad_norm": 2.801788091659546,
126
+ "learning_rate": 4.630703066566941e-05,
127
+ "loss": 1.6562,
128
  "step": 2400
129
  },
130
  {
131
+ "epoch": 0.33,
132
+ "eval_cer": 0.3039761813292355,
133
+ "eval_loss": 0.6422853469848633,
134
+ "eval_runtime": 132.5985,
135
+ "eval_samples_per_second": 19.465,
136
+ "eval_steps_per_second": 19.465,
137
  "step": 2400
138
  },
139
  {
140
+ "epoch": 0.37,
141
+ "grad_norm": 5.572460174560547,
142
+ "learning_rate": 4.350224382946896e-05,
143
+ "loss": 1.6373,
144
  "step": 2700
145
  },
146
  {
147
+ "epoch": 0.37,
148
+ "eval_cer": 0.2989499295684467,
149
+ "eval_loss": 0.639430046081543,
150
+ "eval_runtime": 133.5523,
151
+ "eval_samples_per_second": 19.326,
152
+ "eval_steps_per_second": 19.326,
153
  "step": 2700
154
  },
155
  {
156
+ "epoch": 0.41,
157
+ "grad_norm": 4.840504169464111,
158
+ "learning_rate": 4.069745699326851e-05,
159
+ "loss": 1.5944,
160
  "step": 3000
161
  },
162
  {
163
+ "epoch": 0.41,
164
+ "eval_cer": 0.2967089255986682,
165
+ "eval_loss": 0.6272587776184082,
166
+ "eval_runtime": 134.0186,
167
+ "eval_samples_per_second": 19.259,
168
+ "eval_steps_per_second": 19.259,
169
  "step": 3000
170
  },
171
  {
172
+ "epoch": 0.45,
173
+ "grad_norm": 2.972968816757202,
174
+ "learning_rate": 3.7892670157068066e-05,
175
+ "loss": 1.5654,
176
  "step": 3300
177
  },
178
  {
179
+ "epoch": 0.45,
180
+ "eval_cer": 0.2903700857984377,
181
+ "eval_loss": 0.5716381669044495,
182
+ "eval_runtime": 134.8224,
183
+ "eval_samples_per_second": 19.144,
184
+ "eval_steps_per_second": 19.144,
185
  "step": 3300
186
  },
187
  {
188
+ "epoch": 0.49,
189
+ "grad_norm": 2.683291435241699,
190
+ "learning_rate": 3.5087883320867614e-05,
191
+ "loss": 1.5333,
192
  "step": 3600
193
  },
194
  {
195
+ "epoch": 0.49,
196
+ "eval_cer": 0.29011397105903447,
197
+ "eval_loss": 0.573725163936615,
198
+ "eval_runtime": 134.5796,
199
+ "eval_samples_per_second": 19.178,
200
+ "eval_steps_per_second": 19.178,
201
  "step": 3600
202
  },
203
  {
204
+ "epoch": 0.53,
205
+ "grad_norm": 8.39251708984375,
206
+ "learning_rate": 3.228309648466717e-05,
207
+ "loss": 1.5252,
208
  "step": 3900
209
  },
210
  {
211
+ "epoch": 0.53,
212
+ "eval_cer": 0.29501216545012166,
213
+ "eval_loss": 0.5687663555145264,
214
+ "eval_runtime": 135.3645,
215
+ "eval_samples_per_second": 19.067,
216
+ "eval_steps_per_second": 19.067,
217
  "step": 3900
218
  },
219
  {
220
+ "epoch": 0.57,
221
+ "grad_norm": 2.761993169784546,
222
+ "learning_rate": 2.9478309648466717e-05,
223
+ "loss": 1.5017,
224
  "step": 4200
225
  },
226
  {
227
+ "epoch": 0.57,
228
+ "eval_cer": 0.2957164809834806,
229
+ "eval_loss": 0.5565311312675476,
230
+ "eval_runtime": 135.2285,
231
+ "eval_samples_per_second": 19.086,
232
+ "eval_steps_per_second": 19.086,
233
  "step": 4200
234
  },
235
  {
236
+ "epoch": 0.61,
237
+ "grad_norm": 23.93025016784668,
238
+ "learning_rate": 2.667352281226627e-05,
239
+ "loss": 1.4707,
240
  "step": 4500
241
  },
242
  {
243
+ "epoch": 0.61,
244
+ "eval_cer": 0.28828915354078627,
245
+ "eval_loss": 0.5579658150672913,
246
+ "eval_runtime": 135.1123,
247
+ "eval_samples_per_second": 19.103,
248
+ "eval_steps_per_second": 19.103,
249
  "step": 4500
250
  },
251
  {
252
+ "epoch": 0.65,
253
+ "grad_norm": 1.7994115352630615,
254
+ "learning_rate": 2.386873597606582e-05,
255
+ "loss": 1.4578,
256
  "step": 4800
257
  },
258
  {
259
+ "epoch": 0.65,
260
+ "eval_cer": 0.2855359200922013,
261
+ "eval_loss": 0.5352594256401062,
262
+ "eval_runtime": 135.7058,
263
+ "eval_samples_per_second": 19.019,
264
+ "eval_steps_per_second": 19.019,
265
  "step": 4800
266
  },
267
  {
268
+ "epoch": 0.69,
269
+ "grad_norm": 1.9403347969055176,
270
+ "learning_rate": 2.1073298429319373e-05,
271
+ "loss": 1.4236,
272
  "step": 5100
273
  },
274
  {
275
+ "epoch": 0.69,
276
+ "eval_cer": 0.28758483800742735,
277
+ "eval_loss": 0.5495265126228333,
278
+ "eval_runtime": 136.3492,
279
+ "eval_samples_per_second": 18.929,
280
+ "eval_steps_per_second": 18.929,
281
  "step": 5100
282
  },
283
  {
284
+ "epoch": 0.73,
285
+ "grad_norm": 3.021031379699707,
286
+ "learning_rate": 1.8268511593118924e-05,
287
+ "loss": 1.3954,
288
  "step": 5400
289
  },
290
  {
291
+ "epoch": 0.73,
292
+ "eval_cer": 0.2876808810347036,
293
+ "eval_loss": 0.5390140414237976,
294
+ "eval_runtime": 138.4653,
295
+ "eval_samples_per_second": 18.64,
296
+ "eval_steps_per_second": 18.64,
297
  "step": 5400
298
  },
299
  {
300
+ "epoch": 0.78,
301
+ "grad_norm": 9.61117172241211,
302
+ "learning_rate": 1.5463724756918475e-05,
303
+ "loss": 1.3947,
304
  "step": 5700
305
  },
306
  {
307
+ "epoch": 0.78,
308
+ "eval_cer": 0.28633627865283645,
309
+ "eval_loss": 0.5187196731567383,
310
+ "eval_runtime": 136.0715,
311
+ "eval_samples_per_second": 18.968,
312
+ "eval_steps_per_second": 18.968,
313
  "step": 5700
314
  },
315
  {
316
+ "epoch": 0.82,
317
+ "grad_norm": 2.387241840362549,
318
+ "learning_rate": 1.2658937920718025e-05,
319
+ "loss": 1.3678,
320
  "step": 6000
321
  },
322
  {
323
+ "epoch": 0.82,
324
+ "eval_cer": 0.27993341016775514,
325
+ "eval_loss": 0.5071456432342529,
326
+ "eval_runtime": 136.2594,
327
+ "eval_samples_per_second": 18.942,
328
+ "eval_steps_per_second": 18.942,
329
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
  ],
332
  "logging_steps": 300,
333
+ "max_steps": 7348,
334
  "num_input_tokens_seen": 0,
335
+ "num_train_epochs": 1,
336
  "save_steps": 600,
337
+ "total_flos": 1.8710735171848015e+20,
338
+ "train_batch_size": 1,
339
  "trial_name": null,
340
  "trial_params": null
341
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:252bac47c7b7828141031a0c886132cdabae6720c7cd2b2c44aa61c53da0b413
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d26536955cba130ab0ce5e51b836904166383e2bf233c2c401fde5e5113deb
3
+ size 4856