Initial commit
Browse files- README.md +141 -3
- added_tokens.json +1 -0
- all_results.json +15 -0
- config.json +107 -0
- eval.py +169 -0
- eval_results.json +10 -0
- log_speech-recognition-community-v2_dev_data_zh-HK_validation_predictions.txt +10 -0
- log_speech-recognition-community-v2_dev_data_zh-HK_validation_targets.txt +10 -0
- nohup.out +0 -0
- preprocessor_config.json +9 -0
- pytorch_model.bin +3 -0
- run.sh +37 -0
- run_eval.sh +10 -0
- run_speech_recognition_ctc.py +829 -0
- runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/1644165171.7227242/events.out.tfevents.1644165171.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c +3 -0
- runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/events.out.tfevents.1644165171.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c +3 -0
- runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/events.out.tfevents.1644238077.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c +3 -0
- special_tokens_map.json +1 -0
- speech-recognition-community-v2_dev_data_zh-HK_validation_eval_results.txt +2 -0
- tokenizer_config.json +1 -0
- train_results.json +8 -0
- trainer_state.json +3003 -0
- training_args.bin +3 -0
- vocab.json +1 -0
README.md
CHANGED
@@ -1,3 +1,141 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- zh-HK
|
4 |
+
license: apache-2.0
|
5 |
+
tags:
|
6 |
+
- automatic-speech-recognition
|
7 |
+
- common_voice
|
8 |
+
- generated_from_trainer
|
9 |
+
datasets:
|
10 |
+
- common_voice
|
11 |
+
model-index:
|
12 |
+
- name: ''
|
13 |
+
results: []
|
14 |
+
---
|
15 |
+
|
16 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
17 |
+
should probably proofread and complete it, then remove this comment. -->
|
18 |
+
|
19 |
+
#
|
20 |
+
|
21 |
+
This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the COMMON_VOICE - ZH-HK dataset.
|
22 |
+
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.8089
|
24 |
+
- Wer: 1.2499
|
25 |
+
- Cer: 0.3173
|
26 |
+
|
27 |
+
## Model description
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Intended uses & limitations
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Training and evaluation data
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training procedure
|
40 |
+
|
41 |
+
### Training hyperparameters
|
42 |
+
|
43 |
+
The following hyperparameters were used during training:
|
44 |
+
- learning_rate: 0.0001
|
45 |
+
- train_batch_size: 8
|
46 |
+
- eval_batch_size: 8
|
47 |
+
- seed: 42
|
48 |
+
- gradient_accumulation_steps: 4
|
49 |
+
- total_train_batch_size: 32
|
50 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
51 |
+
- lr_scheduler_type: linear
|
52 |
+
- lr_scheduler_warmup_steps: 2000
|
53 |
+
- num_epochs: 100.0
|
54 |
+
- mixed_precision_training: Native AMP
|
55 |
+
|
56 |
+
### Training results
|
57 |
+
|
58 |
+
| Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
|
59 |
+
|:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
|
60 |
+
| 69.8341 | 1.34 | 500 | 80.0722 | 1.0 | 1.0 |
|
61 |
+
| 6.6418 | 2.68 | 1000 | 6.6346 | 1.0 | 1.0 |
|
62 |
+
| 6.2419 | 4.02 | 1500 | 6.2909 | 1.0 | 1.0 |
|
63 |
+
| 6.0813 | 5.36 | 2000 | 6.1150 | 1.0 | 1.0 |
|
64 |
+
| 5.9677 | 6.7 | 2500 | 6.0301 | 1.1386 | 1.0028 |
|
65 |
+
| 5.9296 | 8.04 | 3000 | 5.8975 | 1.2113 | 1.0058 |
|
66 |
+
| 5.6434 | 9.38 | 3500 | 5.5404 | 2.1624 | 1.0171 |
|
67 |
+
| 5.1974 | 10.72 | 4000 | 4.5440 | 2.1702 | 0.9366 |
|
68 |
+
| 4.3601 | 12.06 | 4500 | 3.3839 | 2.2464 | 0.8998 |
|
69 |
+
| 3.9321 | 13.4 | 5000 | 2.8785 | 2.3097 | 0.8400 |
|
70 |
+
| 3.6462 | 14.74 | 5500 | 2.5108 | 1.9623 | 0.6663 |
|
71 |
+
| 3.5156 | 16.09 | 6000 | 2.2790 | 1.6479 | 0.5706 |
|
72 |
+
| 3.32 | 17.43 | 6500 | 2.1450 | 1.8337 | 0.6244 |
|
73 |
+
| 3.1918 | 18.77 | 7000 | 1.8536 | 1.9394 | 0.6017 |
|
74 |
+
| 3.1139 | 20.11 | 7500 | 1.7205 | 1.9112 | 0.5638 |
|
75 |
+
| 2.8995 | 21.45 | 8000 | 1.5478 | 1.0624 | 0.3250 |
|
76 |
+
| 2.7572 | 22.79 | 8500 | 1.4068 | 1.1412 | 0.3367 |
|
77 |
+
| 2.6881 | 24.13 | 9000 | 1.3312 | 2.0100 | 0.5683 |
|
78 |
+
| 2.5993 | 25.47 | 9500 | 1.2553 | 2.0039 | 0.6450 |
|
79 |
+
| 2.5304 | 26.81 | 10000 | 1.2422 | 2.0394 | 0.5789 |
|
80 |
+
| 2.4352 | 28.15 | 10500 | 1.1582 | 1.9970 | 0.5507 |
|
81 |
+
| 2.3795 | 29.49 | 11000 | 1.1160 | 1.8255 | 0.4844 |
|
82 |
+
| 2.3287 | 30.83 | 11500 | 1.0775 | 1.4123 | 0.3780 |
|
83 |
+
| 2.2622 | 32.17 | 12000 | 1.0704 | 1.7445 | 0.4894 |
|
84 |
+
| 2.2225 | 33.51 | 12500 | 1.0272 | 1.7237 | 0.5058 |
|
85 |
+
| 2.1843 | 34.85 | 13000 | 0.9756 | 1.8042 | 0.5028 |
|
86 |
+
| 2.1 | 36.19 | 13500 | 0.9527 | 1.8909 | 0.6055 |
|
87 |
+
| 2.0741 | 37.53 | 14000 | 0.9418 | 1.9026 | 0.5880 |
|
88 |
+
| 2.0179 | 38.87 | 14500 | 0.9363 | 1.7977 | 0.5246 |
|
89 |
+
| 2.0615 | 40.21 | 15000 | 0.9635 | 1.8112 | 0.5599 |
|
90 |
+
| 1.9448 | 41.55 | 15500 | 0.9249 | 1.7250 | 0.4914 |
|
91 |
+
| 1.8966 | 42.89 | 16000 | 0.9023 | 1.5829 | 0.4319 |
|
92 |
+
| 1.8662 | 44.24 | 16500 | 0.9002 | 1.4833 | 0.4230 |
|
93 |
+
| 1.8136 | 45.58 | 17000 | 0.9076 | 1.1828 | 0.2987 |
|
94 |
+
| 1.7908 | 46.92 | 17500 | 0.8774 | 1.5773 | 0.4258 |
|
95 |
+
| 1.7354 | 48.26 | 18000 | 0.8727 | 1.5037 | 0.4024 |
|
96 |
+
| 1.6739 | 49.6 | 18500 | 0.8636 | 1.1239 | 0.2789 |
|
97 |
+
| 1.6457 | 50.94 | 19000 | 0.8516 | 1.2269 | 0.3104 |
|
98 |
+
| 1.5847 | 52.28 | 19500 | 0.8399 | 1.3309 | 0.3360 |
|
99 |
+
| 1.5971 | 53.62 | 20000 | 0.8441 | 1.3153 | 0.3335 |
|
100 |
+
| 1.602 | 54.96 | 20500 | 0.8590 | 1.2932 | 0.3433 |
|
101 |
+
| 1.5063 | 56.3 | 21000 | 0.8334 | 1.1312 | 0.2875 |
|
102 |
+
| 1.4631 | 57.64 | 21500 | 0.8474 | 1.1698 | 0.2999 |
|
103 |
+
| 1.4997 | 58.98 | 22000 | 0.8638 | 1.4279 | 0.3854 |
|
104 |
+
| 1.4301 | 60.32 | 22500 | 0.8550 | 1.2737 | 0.3300 |
|
105 |
+
| 1.3798 | 61.66 | 23000 | 0.8266 | 1.1802 | 0.2934 |
|
106 |
+
| 1.3454 | 63.0 | 23500 | 0.8235 | 1.3816 | 0.3711 |
|
107 |
+
| 1.3678 | 64.34 | 24000 | 0.8550 | 1.6427 | 0.5035 |
|
108 |
+
| 1.3761 | 65.68 | 24500 | 0.8510 | 1.6709 | 0.4907 |
|
109 |
+
| 1.2668 | 67.02 | 25000 | 0.8515 | 1.5842 | 0.4505 |
|
110 |
+
| 1.2835 | 68.36 | 25500 | 0.8283 | 1.5353 | 0.4221 |
|
111 |
+
| 1.2961 | 69.7 | 26000 | 0.8339 | 1.5743 | 0.4369 |
|
112 |
+
| 1.2656 | 71.05 | 26500 | 0.8331 | 1.5331 | 0.4217 |
|
113 |
+
| 1.2556 | 72.39 | 27000 | 0.8242 | 1.4708 | 0.4109 |
|
114 |
+
| 1.2043 | 73.73 | 27500 | 0.8245 | 1.4469 | 0.4031 |
|
115 |
+
| 1.2722 | 75.07 | 28000 | 0.8202 | 1.4924 | 0.4096 |
|
116 |
+
| 1.202 | 76.41 | 28500 | 0.8290 | 1.3807 | 0.3719 |
|
117 |
+
| 1.1679 | 77.75 | 29000 | 0.8195 | 1.4097 | 0.3749 |
|
118 |
+
| 1.1967 | 79.09 | 29500 | 0.8059 | 1.2074 | 0.3077 |
|
119 |
+
| 1.1241 | 80.43 | 30000 | 0.8137 | 1.2451 | 0.3270 |
|
120 |
+
| 1.1414 | 81.77 | 30500 | 0.8117 | 1.2031 | 0.3121 |
|
121 |
+
| 1.132 | 83.11 | 31000 | 0.8234 | 1.4266 | 0.3901 |
|
122 |
+
| 1.0982 | 84.45 | 31500 | 0.8064 | 1.3712 | 0.3607 |
|
123 |
+
| 1.0797 | 85.79 | 32000 | 0.8167 | 1.3356 | 0.3562 |
|
124 |
+
| 1.0119 | 87.13 | 32500 | 0.8215 | 1.2754 | 0.3268 |
|
125 |
+
| 1.0216 | 88.47 | 33000 | 0.8163 | 1.2512 | 0.3184 |
|
126 |
+
| 1.0375 | 89.81 | 33500 | 0.8137 | 1.2685 | 0.3290 |
|
127 |
+
| 0.9794 | 91.15 | 34000 | 0.8220 | 1.2724 | 0.3255 |
|
128 |
+
| 1.0207 | 92.49 | 34500 | 0.8165 | 1.2906 | 0.3361 |
|
129 |
+
| 1.0169 | 93.83 | 35000 | 0.8153 | 1.2819 | 0.3305 |
|
130 |
+
| 1.0127 | 95.17 | 35500 | 0.8187 | 1.2832 | 0.3252 |
|
131 |
+
| 0.9978 | 96.51 | 36000 | 0.8111 | 1.2612 | 0.3210 |
|
132 |
+
| 0.9923 | 97.85 | 36500 | 0.8076 | 1.2278 | 0.3122 |
|
133 |
+
| 1.0451 | 99.2 | 37000 | 0.8086 | 1.2451 | 0.3156 |
|
134 |
+
|
135 |
+
|
136 |
+
### Framework versions
|
137 |
+
|
138 |
+
- Transformers 4.17.0.dev0
|
139 |
+
- Pytorch 1.10.2+cu102
|
140 |
+
- Datasets 1.18.4.dev0
|
141 |
+
- Tokenizers 0.11.0
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 3653, "</s>": 3654}
|
all_results.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 100.0,
|
3 |
+
"eval_cer": 0.3173159976360413,
|
4 |
+
"eval_loss": 0.8089390397071838,
|
5 |
+
"eval_runtime": 116.9576,
|
6 |
+
"eval_samples": 2302,
|
7 |
+
"eval_samples_per_second": 19.682,
|
8 |
+
"eval_steps_per_second": 2.462,
|
9 |
+
"eval_wer": 1.2498917280207882,
|
10 |
+
"train_loss": 3.7852419735087786,
|
11 |
+
"train_runtime": 72640.1075,
|
12 |
+
"train_samples": 11949,
|
13 |
+
"train_samples_per_second": 16.45,
|
14 |
+
"train_steps_per_second": 0.513
|
15 |
+
}
|
config.json
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "facebook/wav2vec2-xls-r-300m",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": false,
|
7 |
+
"apply_spec_augment": true,
|
8 |
+
"architectures": [
|
9 |
+
"Wav2Vec2ForCTC"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.0,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 256,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"contrastive_logits_temperature": 0.1,
|
16 |
+
"conv_bias": true,
|
17 |
+
"conv_dim": [
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"conv_kernel": [
|
27 |
+
10,
|
28 |
+
3,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
2,
|
33 |
+
2
|
34 |
+
],
|
35 |
+
"conv_stride": [
|
36 |
+
5,
|
37 |
+
2,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2
|
43 |
+
],
|
44 |
+
"ctc_loss_reduction": "mean",
|
45 |
+
"ctc_zero_infinity": false,
|
46 |
+
"diversity_loss_weight": 0.1,
|
47 |
+
"do_stable_layer_norm": true,
|
48 |
+
"eos_token_id": 2,
|
49 |
+
"feat_extract_activation": "gelu",
|
50 |
+
"feat_extract_dropout": 0.0,
|
51 |
+
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.0,
|
53 |
+
"feat_quantizer_dropout": 0.0,
|
54 |
+
"final_dropout": 0.0,
|
55 |
+
"hidden_act": "gelu",
|
56 |
+
"hidden_dropout": 0.0,
|
57 |
+
"hidden_size": 1024,
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 4096,
|
60 |
+
"layer_norm_eps": 1e-05,
|
61 |
+
"layerdrop": 0.0,
|
62 |
+
"mask_feature_length": 64,
|
63 |
+
"mask_feature_min_masks": 0,
|
64 |
+
"mask_feature_prob": 0.25,
|
65 |
+
"mask_time_length": 10,
|
66 |
+
"mask_time_min_masks": 2,
|
67 |
+
"mask_time_prob": 0.75,
|
68 |
+
"model_type": "wav2vec2",
|
69 |
+
"num_adapter_layers": 3,
|
70 |
+
"num_attention_heads": 16,
|
71 |
+
"num_codevector_groups": 2,
|
72 |
+
"num_codevectors_per_group": 320,
|
73 |
+
"num_conv_pos_embedding_groups": 16,
|
74 |
+
"num_conv_pos_embeddings": 128,
|
75 |
+
"num_feat_extract_layers": 7,
|
76 |
+
"num_hidden_layers": 24,
|
77 |
+
"num_negatives": 100,
|
78 |
+
"output_hidden_size": 1024,
|
79 |
+
"pad_token_id": 3652,
|
80 |
+
"proj_codevector_dim": 768,
|
81 |
+
"tdnn_dilation": [
|
82 |
+
1,
|
83 |
+
2,
|
84 |
+
3,
|
85 |
+
1,
|
86 |
+
1
|
87 |
+
],
|
88 |
+
"tdnn_dim": [
|
89 |
+
512,
|
90 |
+
512,
|
91 |
+
512,
|
92 |
+
512,
|
93 |
+
1500
|
94 |
+
],
|
95 |
+
"tdnn_kernel": [
|
96 |
+
5,
|
97 |
+
3,
|
98 |
+
3,
|
99 |
+
1,
|
100 |
+
1
|
101 |
+
],
|
102 |
+
"torch_dtype": "float32",
|
103 |
+
"transformers_version": "4.17.0.dev0",
|
104 |
+
"use_weighted_layer_sum": false,
|
105 |
+
"vocab_size": 3655,
|
106 |
+
"xvector_output_dim": 512
|
107 |
+
}
|
eval.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import argparse
|
3 |
+
import re
|
4 |
+
from typing import Dict
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from datasets import Audio, Dataset, load_dataset, load_metric
|
8 |
+
|
9 |
+
from transformers import AutoFeatureExtractor, pipeline
|
10 |
+
|
11 |
+
|
12 |
+
def log_results(result: Dataset, args: Dict[str, str]):
|
13 |
+
"""DO NOT CHANGE. This function computes and logs the result metrics."""
|
14 |
+
|
15 |
+
log_outputs = args.log_outputs
|
16 |
+
dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
|
17 |
+
|
18 |
+
# load metric
|
19 |
+
wer = load_metric("wer")
|
20 |
+
cer = load_metric("cer")
|
21 |
+
|
22 |
+
# compute metrics
|
23 |
+
wer_result = wer.compute(
|
24 |
+
references=result["target"], predictions=result["prediction"]
|
25 |
+
)
|
26 |
+
cer_result = cer.compute(
|
27 |
+
references=result["target"], predictions=result["prediction"]
|
28 |
+
)
|
29 |
+
|
30 |
+
# print & log results
|
31 |
+
result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
|
32 |
+
print(result_str)
|
33 |
+
|
34 |
+
with open(f"{dataset_id}_eval_results.txt", "w") as f:
|
35 |
+
f.write(result_str)
|
36 |
+
|
37 |
+
# log all results in text file. Possibly interesting for analysis
|
38 |
+
if log_outputs is not None:
|
39 |
+
pred_file = f"log_{dataset_id}_predictions.txt"
|
40 |
+
target_file = f"log_{dataset_id}_targets.txt"
|
41 |
+
|
42 |
+
with open(pred_file, "w") as p, open(target_file, "w") as t:
|
43 |
+
|
44 |
+
# mapping function to write output
|
45 |
+
def write_to_file(batch, i):
|
46 |
+
p.write(f"{i}" + "\n")
|
47 |
+
p.write(batch["prediction"] + "\n")
|
48 |
+
t.write(f"{i}" + "\n")
|
49 |
+
t.write(batch["target"] + "\n")
|
50 |
+
|
51 |
+
result.map(write_to_file, with_indices=True)
|
52 |
+
|
53 |
+
|
54 |
+
def normalize_text(text: str) -> str:
|
55 |
+
"""DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
|
56 |
+
|
57 |
+
chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–!-:– 》,),?;~~…︰,(」‧《﹔、—/,「﹖·]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
|
58 |
+
|
59 |
+
text = re.sub(chars_to_ignore_regex, "", text.lower())
|
60 |
+
|
61 |
+
# In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
62 |
+
# note that order is important here!
|
63 |
+
token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
|
64 |
+
|
65 |
+
for t in token_sequences_to_ignore:
|
66 |
+
text = " ".join(text.split(t))
|
67 |
+
|
68 |
+
return text
|
69 |
+
|
70 |
+
|
71 |
+
def main(args):
|
72 |
+
# load dataset
|
73 |
+
dataset = load_dataset(
|
74 |
+
args.dataset, args.config, split=args.split, use_auth_token=True
|
75 |
+
)
|
76 |
+
|
77 |
+
# for testing: only process the first two examples as a test
|
78 |
+
# dataset = dataset.select(range(10))
|
79 |
+
|
80 |
+
# load processor
|
81 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
82 |
+
sampling_rate = feature_extractor.sampling_rate
|
83 |
+
|
84 |
+
# resample audio
|
85 |
+
dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
|
86 |
+
|
87 |
+
# load eval pipeline
|
88 |
+
if args.device is None:
|
89 |
+
args.device = 0 if torch.cuda.is_available() else -1
|
90 |
+
asr = pipeline(
|
91 |
+
"automatic-speech-recognition", model=args.model_id, device=args.device
|
92 |
+
)
|
93 |
+
|
94 |
+
# map function to decode audio
|
95 |
+
def map_to_pred(batch):
|
96 |
+
prediction = asr(
|
97 |
+
batch["audio"]["array"],
|
98 |
+
chunk_length_s=args.chunk_length_s,
|
99 |
+
stride_length_s=args.stride_length_s,
|
100 |
+
)
|
101 |
+
|
102 |
+
batch["prediction"] = prediction["text"]
|
103 |
+
batch["target"] = normalize_text(batch[args.text_column_name])
|
104 |
+
return batch
|
105 |
+
|
106 |
+
# run inference on all examples
|
107 |
+
result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
|
108 |
+
|
109 |
+
# compute and log_results
|
110 |
+
# do not change function below
|
111 |
+
log_results(result, args)
|
112 |
+
|
113 |
+
|
114 |
+
if __name__ == "__main__":
|
115 |
+
parser = argparse.ArgumentParser()
|
116 |
+
|
117 |
+
parser.add_argument(
|
118 |
+
"--model_id",
|
119 |
+
type=str,
|
120 |
+
required=True,
|
121 |
+
help="Model identifier. Should be loadable with 🤗 Transformers",
|
122 |
+
)
|
123 |
+
parser.add_argument(
|
124 |
+
"--dataset",
|
125 |
+
type=str,
|
126 |
+
required=True,
|
127 |
+
help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
|
128 |
+
)
|
129 |
+
parser.add_argument(
|
130 |
+
"--config",
|
131 |
+
type=str,
|
132 |
+
required=True,
|
133 |
+
help="Config of the dataset. *E.g.* `'en'` for Common Voice",
|
134 |
+
)
|
135 |
+
parser.add_argument(
|
136 |
+
"--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`"
|
137 |
+
)
|
138 |
+
parser.add_argument(
|
139 |
+
"--text_column_name",
|
140 |
+
type=str,
|
141 |
+
default="text",
|
142 |
+
help="The name of the dataset column containing the text data. Defaults to 'text'",
|
143 |
+
)
|
144 |
+
parser.add_argument(
|
145 |
+
"--chunk_length_s",
|
146 |
+
type=float,
|
147 |
+
default=None,
|
148 |
+
help="Chunk length in seconds. Defaults to 5 seconds.",
|
149 |
+
)
|
150 |
+
parser.add_argument(
|
151 |
+
"--stride_length_s",
|
152 |
+
type=float,
|
153 |
+
default=None,
|
154 |
+
help="Stride of the audio chunks. Defaults to 1 second.",
|
155 |
+
)
|
156 |
+
parser.add_argument(
|
157 |
+
"--log_outputs",
|
158 |
+
action="store_true",
|
159 |
+
help="If defined, write outputs to log file for analysis.",
|
160 |
+
)
|
161 |
+
parser.add_argument(
|
162 |
+
"--device",
|
163 |
+
type=int,
|
164 |
+
default=None,
|
165 |
+
help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
|
166 |
+
)
|
167 |
+
args = parser.parse_args()
|
168 |
+
|
169 |
+
main(args)
|
eval_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 100.0,
|
3 |
+
"eval_cer": 0.3173159976360413,
|
4 |
+
"eval_loss": 0.8089390397071838,
|
5 |
+
"eval_runtime": 116.9576,
|
6 |
+
"eval_samples": 2302,
|
7 |
+
"eval_samples_per_second": 19.682,
|
8 |
+
"eval_steps_per_second": 2.462,
|
9 |
+
"eval_wer": 1.2498917280207882
|
10 |
+
}
|
log_speech-recognition-community-v2_dev_data_zh-HK_validation_predictions.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0
|
2 |
+
大家好啊我阿生人好小企喺同地戰常比較風次嘅係喺文專大地陣至後力觀放鋪同地先阿今日同大家分向我提木係講真話至經過去十幾年喺喺內地彩房呀成日遇多啲天災人冇唔公平日事下覺得好方但好普阿又口你唔因為唉照故屋企兩我小朋友嘅原欣吖就返左嚟香港教書跟住返道嚟香港發港重嚟普果啲嚟普係古佢唔土客以前大陸嗰啲離浦天再人禾你經打咗底客你都知道有啊淨一定係如選啲事點發生正如頭先呀勾加倫楚爽客過去一年好多嘢係另到我哋哋估到公唔到行我哋一班關住國圈子嘅朋友係覺得有個種即急劇嘅鑽懷嘅情況係離得有急遊肯係唔有小小吃不笑所要有啲呀我你朋友都係度講笑話哋簡時啦係大陸比教多新問日時代你一債人到沖哂上跑身文今時今日啦可能我了夫換炮大錄線嘅記者都返離香港跑香港日日管字個再眼客呢個時後可能正係需要佢地我自機阿過去咁當年喺喺德大陸見到㗎撞放一係有好多嘢係好唔書服行可以睇一睇呢一個㗎呀怕我開畀嚿多齊成日見到呢千再人喎成日係啲人喺你前櫃低希望佢平事喺角種渠到新到個冤能夠透過香港我記者可以講得出嚟佢哋有啲係無死有人係諗住畀人打一身多一定要講根咗後黎啦暗至機係因咪照故屋企啦跟住就喺企寶也日就執包服阿咁我就返咗嚟教書咁今話喺啊比教號即緊張埋多令到自經唔書復嘅大陸心文彩放個度一三返黎香港咁企咗讓令我覺得好真好書復嘅就我自己嘅屋企客呀對住太太對住兩我小朋友覺得好多嘢係好親字呀唔做假客喺上面彩放啲假嘢係太多客士佢地嘅即啲摧怪即佢你扮嘢扮假嘢都扮得真個人果謝咁樣嘅我唔知咪海屎係假㗎嗰個咁香嘅你個係咪巴斯光年係咁話成面烏租辣達你都角得佢係可外客因咪佢係佢係唔收扮鬼蝦囉n都係硬你覺得好窩心啊呢幾年同小朋友我自機呀比教眼栽嘅時間係制同佢地去夜晚林訓係時候講股仔其中啦一個古仔叫文自工廠阿我個小朋友係呀白停不驗晚到成我講同有故時個古仔係講咩咁就冇講到啦拍從前係喺個國家硬如果你講嘢個話你就要畀錢去買文字㗎有錢人呢先可以講多一啲呀好有假直嘅慈會客呢啲豐附嘅語言謝有錢人講到會人窮人吖佢地就好慘呀嚇呀買唔到啲咩問字啦就時係無嘢講而有時啦就係要講埋啲真係呀唔好唉熱大家認為係好三教九留嘅嘢麗如係硬山羊個煲肚仔嘅皮古等今日喺呢個文自公廠暗麵吖呢啲咁樣嘅唉公廠呀面嘅車煙女公呀呢基係人日就錢呢民字出嚟賣喇咁咁日係好舞咁程呀呀咁就係流水作業㗎咁今日班同人點算啦呢個古仔一面一個呀主角際個非嚟坐喺係個老台嗰道老隻腳出離嗰講佢就喺個昌邊我度啦老台嗰度攞咁網就網呢文字客就希望呀望多一啲人地唔要個文字出黎啦就係嘢晚留返食飯日時後同屋企人講今日日黃呢個非歷商佢深而一個唉女仔西貝兒吖佢就好人生日喇今日非歷室特等草起一炸咁文字就想備同呢個西背兒講呀今晚口你點知佢呢啲嗰問文字都係頭先我鎖講日吐仔嘅㗎客屁古山陽個布布咁樣我呢啲淨簡淨係烏糟立撻嘅雨然係咁咁淨當佢要去搵呢個紅醫呀美笑裡西背兒嘅時候想同佢講行佢真心我說話嘅時後點知就出現咗佢嘅秤滌阿哩買喺呢度後面呀蒙絕面安忍叫做咩名呀有咩名我仔係前麵佢嘅續痾茶安茶佢啦就好有錢買要好多嘅語言有好嘅語然藝術跟住啦佢又對呢個西背講講拍硬我仔要我呢個通日係話我全心全意咁愛你嘅西背兒知道有一日地將每結分阿當騰同日非力十買唔起呢啲文字呀唔好有錢嘅呢個默安茶講出呢啲愛表白嘅時候甩飛嚟時就話係係係我死得 客有個情力當錢咁行佢去準備走嘅時候跟住先發覺原來呢個呀細貝兒啦冇畀個安茶打同跟住叻客佢就緊有個文字除育個啲杯咕同埋被估力佢仲喺啲垃圾防到啲文字就係亞灰診車嚟子同埋燈喺佢武魚言嘅情放抵吓佢就盡地一煲阿合埋傷眼牙好傾心咁樣將佢心物面真正有愛意向佢心移嘅呢有西背去講佢就話我車嚟子杯陳同埋燈西貝兒聽到之後啦客發覺聽到佢文字背後嘅愛聽到佢嘅愛意超月文呢個時後佢接受咗呢個主國飛力唔係因為佢嘅語然係術因為嘅真話為佢真深行我仔你最上台你相識一生係日停我婆布你一咁咩跟住你講你咁呢講咁行跟住阿我仔就成日要我同佢用唉我哋嘅雨言去講我哋一就同福年咗仔我佢咗玩車車唔你嗱跟住佢又佢又車車我咁人咁先人佢冇好風富嘅慈會但係呀我覺得呢一個真已經係超月哂一齊比起勇有好多持會嘅人係更加可鬼返度黎香港金時今日我發角阿我哋好多官園好多達官貴人佢地擁有話與權擁有國加基氣但係地真心佢無羅輯可以條可以跑真又得㗎啱當我你見到嗰炮跑開唔容日嗱香當係見到呀我地嘅斯局長幅綠受一齊出嚟推燒正是機已經成熟立目標非場非常清識行係時候有相有良但字普算咁目錶可能任面有啲協定嘅嘢冇講自己成熟喇我哋要針對西損打激嘅木標非常清識攪電咗呢啲人之後我哋就去做正我地睇官方界語然藝術呀睇觀文見隨咗睇佢要寫咩出嚟之外仲特別睇佢冇寫乜愛國愛講呢樣嘢係冇寫喺觀方呢個文見入面但係邊個到知道愛國愛講呢幾個折根本就係有寧價性㗎㗎作容係度我你見到所有硬內哋終鴦嘅一啲酸存港特守嘅後選人嘅條件行喺理非個港話之後第一句係話香港得守後選人係要愛國愛講呢個說話唔醒落去啦其實真係問題有關見就係語然近系術我哋都見島呀一啲電士百權行將台慶同捐錢比前線基舊係掛澳跟住時性機九去叫自己嘅元工去睇電事支持捐款我地至需有問一句係咪力年一台慶都係圈錢畀前線基舊點解捐錢比前基舊係三百幾晚捐畀七間捐得咁少如果呢個係扶合羅輯係年年都捐嘅今你你有叫人去睇嘅講得通如果年年都唔做年突然間要做客呢樣嘢就有問題啱你講你噉係人咁有係我講啱我自機成日記得句雪話就喺耐地硬好多人破和齊話一句真話架力量比整個世界仲要重當我哋香港依家睇到年理加成都要出利話香港係要有發政府運用佢嘅權力嘅時候唔可以選擇性當佢開重新再嚟嘅時後佢會選擇參預政節如果連呢啲近時護淘喺心陣日時候佢曾經對你加加成講過你生我無論點忙我一定要見一件你一個咁樣地為嘅人到金時今日佢竟然係要講出呢咁樣說話去做反激我你知道其實我你政府係出咗啲咩問題唔單只係我哋文間連上面的有已見嘅時候行我你知道其實硬香港我問題係相當嚴重阿我鋪留約小先生嘅一個廣法呀近年吖愛地去針對一啲嘅反動哂力啦所講我講法個持會上已經係有好明顯嘅唔同同喺意千年投賭囉客外地講呢啲個反動哂力仲係叫做外國勢力大佢寧四年到啦就叫做愛來力最近兩年改成外部世力又愛國專來最後進外報世力係咩意思啦即係話內地將敵人嘅圈子越滑大自己人越呱月勢外國勢力淨係只一啲國家機關是領館愛來細力只一啲包括台讀呀可我文問人事聽知老等等以愛報世力你唔我支期你唔係我自己有㗎話你已經係外部力客呢一樣野係見到嗰個敵人嘅圈子係月或月大時今日我你見到國防大學做嘅一個個關於間碟反動世力嘅宣片入面已經啄將呢個接中埋國文教育等係同外報世力係去或上等號行我哋都聽到黃維基先生室卻歌電視牌照亦都有呢咁樣可能嘅欣數制同泥資應先生氹係同中尖上邊係比人教睇睇呢段雪話黑愛地一個唔講話大家去估係邊個講阿阿我哋要噉嘩噉管下噉於兩劍對嗰啲靚惡意公擊黨阿令度公嘅社會主義制度歪曲黨士國士做遙生是嘅然論一切報安集折港台論壇會以會場可能呢個有分電影電事廣播電台冇台劇場都不能為知提公空間睇係咩時那呢個量劍係內地重福播左好多次嘅電時劇係居守三先幾次重幫五年來廣播電台不能為知提公空間電仙不能維知提供空間無台劇山文隔氣不能為知提公空間呢個說話制集近評八一九針對住傳國知賞宣全工作會意所講呀話呢有講話喺係全國黨員你入面學習得追多嘅一返日講話上面攪緊意識形體豆爭攪緊國家安全關係度黨嘅生死全黃港到有無限嘅危險香港幾近成為力黎最嚴重日拆返機地喺呢個咁呢咁緊逼嘅情況抵下啦我地關住中國圈之有朋友係感受到嗰種聲風血語咁受到支前所俾人啄嘅人唔為一嘅人任面所冇講嘅愛國講客上面所港嘅淨愛國愛甚至係愛國黨愛嚴啱一啲問題我哋個政改觀方入面鋪咗橋曉洋夫咗利非呢啲人大發律嘅工作嘅領道但係條曉樣佢係唔冇讀個發律嘅佢真係讀過幾年西班牙愛雨佢令百風嘅待然人可能你飛佢都係冇正識受過發律嘅分年佢個本科係阿殼日雨媽茶係殼學世界經濟後黎啦佢用咗大概一兩年時間喺入本呀學雙業發之後佢就成為咗為我哋香港七百萬人解識前圖決定命運嘅一個陰踩大神呢一樣嘢我相信好多香港人都唔會怪心招香港人繼續堅持要聽真話雞續堅持政府唔可以講假話我相信我地嘅政府或者北徑都唔可以太過分多食個
|
3 |
+
1
|
4 |
+
各位我琴晚發咗個忙我夢見天堂今期有個倫會大休三萬日中極精由咁木標客戶係乜嘢啦就係就黎出世嘅俾個朝係點㗎啦就係如果你肯根領到嘅指時跟住大隊走保正你一生行程緊全請六聲級嘅食熟人生嘅所有嘅正材竟點都俾你玩如果你有呢個選咋你會唔會想車我諗返氣人生大佢有八支十年一千個月三萬我過咗大半諗返起我大半生其實我就係參家咗呢個團我至西好聽爸媽麻媽嘅安排係畀心記讀書攞到好嘅成職有 幼至院至研究院我低名校讀書不該要新步一點我保博時學亦都冇係非女品讀書我入帶學關使我就揀咗一科好有用藥便記工程同咁點解電基工程就我見到所有叻人都讀呢科㗎咁讀得呢貨當年都係x人吖讀完書之後就好似八十年代長春層大學嘅好多同學一養我去咗華已街工作做咗兩年份聽做喺嚟就同其他同柴一樣去咗呀比塞高獨一b你地返返黎香港做咗幾年眼本你顧文參多時間就結分生仔而家有兩個好可愛嘅而子睇番轉頭愛咁講講我嘅成長實現咗我相上香港大部分爸爸媽或者老師校長嘅元網攪新今日同各位再座嘅青年人講千祈唔好咁聽話我希望今待嘅青年人畀我更家努力噉搵到同埋堅持到自己極激程所去泊常心你學有過蓋念叫科壓我澳集陽撈嘅二思就係苗寫一個人喺極端專主嘅撞太乾時佢全情投入好有同力參多可以網機晒所有旁邊發生嘅時求先你見到古手提到你日本打古案屎打到自機原來已經脫晒水就嚟搵都唔覺我世仔係一個好叻嘅手門園同我講佢手十二馬該時佢係可以完全聽唔到旁邊嘅集音人佢見到嘅淨係喺十二馬點個波同咪即章射頭嘅嗰個對手求員佢完全睇唔到聽唔到周圍發生嘅時我記多讀大學該時我最積極獨嘅唔係我嘅主收科係我好有興趣有關四十代中日戰爭同埋六十年代美國今禮的政府嘅一啲歷時我有興趣到個科未開始愛經去圖書管提嗰啲書唔係希望攞更好嘅成職以係我真心地好有興趣呢幾段歷市我好尊主搵知料搵力使日黨案寫文將都特別快藥成職亦都當然特別好雖然係咁我高我同當時嘅而家嘅青年日一樣覺得歷屎記搵唔到食有幫唔到我不葉有朝少醜張一就係點解里想同埋燕實好似永園溝唔埋咁㗎北過我後來聽到蘋過電腦嘅創扮人橋普師層經係一角呀大後不業嚟咁唔講過人生日鬼積啦往我係試後先會睇到佢嘅例子就係佢連稱更市係一個寫區學苑獨過一科有關字既書乾士其實冇乜用去淨係覺得啲字體好有翠好靚但係呢個咁樣嘅科技佢學島之後制電定咗將來蘋果電腦比人選治睇呢個咁創生入毛識相信再在連基大啲有人記得議錢挨邊喺電腦係無呢嗰咁樣嘅通記得有平論有平兩話講孩特質周喺唔塞知考唔爭取佢哋嘅典營搭案就係唔知今點樣今時今日我哋點樣教清年人搵到自機嘅一啲中意嘅嘢我記得我仔十八歲乾時我曾經咁同佢講我唔希望佢行返爸爸爸媽嘅腳報同一時間我亦都唔希望爸爸媽行過嘅佢就笪燈唔節學家離採以都咁講過人生佢話全統常藝全說中凍物有七層但係人就扑咗七十倍七層日愛殼亦公未必搵到自己佢教稱年人府心自萬問自己層經心愛過啲乜嘢乜嘢能夠令自己全情頭入任無比門燭如果將自己嘅自愛一自排開重佢地一黐災同埋本質請年人會晚搵屬於自己嘅真或啫返返嚟天堂呢個輪會大家圍三萬日中極正環遊有人會根大隊做好應應該做嘅時有人唔會選擇自由行自由行你可能有時會行錯路晒就啲時間但係同一時間你亦都可能會搵到一啲令你經起令到呢角得北往持生日經驗無論你係根團或者係自由行我到祝各位能夠搵都一個真正令到令滿足正屬於自己嘅精採人生女情當隻覺
|
5 |
+
2
|
6 |
+
何搵聽到呢個三過字呢咁名大家會聯想起咩嘅一物睇有無多手東菇小穿車其實我想話畀大架聽我係真正身粉係一隻白老鼠同我好都係白老鼠一讓啦其實我都有死粵嘅我個死月呢就係籠入便嗰個碌即呢一本老鼠件到個碌你無會沖上爽跑嘅我有呢個碌嘅我個碌個名叫做唔得碌支係簡單呢講啦就係每當有人嚟同我面前統講唔得我就會即刻好似果啲老鼠咁俾人咁左仔通就去嗰輪度爽跑嘅篇篇我又進入左一個最多呢啲碌嘅地方就係香港原來圈喺我嘅士業上面出現嘅第一個呢個唔得碌叻係一九八年當時我啱啱入巷黎為一嘅葉即拉就係正遇頭台先個一朋友所講好灰氣地唱咗一手卡通畀日主提曲叫措小院這的心事果然係街支學文呀咁呢就係喺為叻我重得咁灰氣原欄刻咁眼說為有鬼嘅絲兄佢橋彩一自佢就咁樣真係唔咪辦法我真要大你出去直數嘈下院唔係一你就永願咁回啦你都搞唔掂嘅咁我結果呢就根咗句去一個嘢掂就呀入到去啦見代一位渡然咁我就係好流啦坐帝同佢開始去好面強哋嘅寒圈咁好唔成共第因為寒圈咗一陣一佢已經同我講佢呀屈你個樣咁鬼權真地你我睇你都係做唔場日呢個圈度我圈你杯如係改下量成隔喇你喺呢個圈度咁樣叻係唔得架第一咁唔得得依該唔得喺若乾年後呀我已經積趕人時率開始出到我嘅兩三就點啱正當我哋開始尖自起個得應該到一風信知濟我嘅場面公司出現一個同堂所有嘅原本嘅工換晒高層一個新嘅入離咁我華而一佢睇我唔順眼人因為甩佢聽日返工就叫中入佢講度一坐低踢頭第一句佢就話嗱上面公司都畀你玩咗咁耐啦你個都玩舊呀喇以後我叫你唱你就唱我你做乜你就乜包你紅佢話左哥手係呢個魚落圈係你有惡壇咁令耐係唔得架第而咁唔其實點樣我哋有哋二方咁都唔得㗎啦好次得係我哋一出勢嘅時候就已經付送咗一個唔得嘅青蛋咁呢青單有啲咩呢仔係唔得篇嚟嗰個仔度啦唔得對抗制唔得同人地唔同天天又有好多人佢哋都好因然地接受角種嘅啲唔得因為我估係因為大家活鏡淨我哋會驚如果我要對抗呢啲唔得嘅話我可能會室去一啲嘢我可能會變成其他人晏中嘅一個壞拖再為一個經常地喺人定眼中自我位滅同賣自位前情嘅一個係反半嘅白腦鼠呢其實我都經常會問點解點解會搞齊咁日呢真係我夠竟發生咗啲咩嘅出咗啲咩挫啦結個果畀我搵到兩個原頭原頭一呢位男子佢星青自己係有位老事但係我驗唔重哋思兒其就佢有位大佬嚟因係佢返學喺校人一非常咁票仗揸呢一假金識尖咀嘅列火展車返學仲要不停咁乜修靚重點係大嘅都唔秀嘅佢最有收位個醉要大嘅學生單啲俾人踢出校就保住佢嗰引咁名叫導文習願透意呢好大家見下睇下佢個龍裝妝企褲嘅列女碌佢並不士一個煙啲啤嘅落球其實佢亦都係一位老師嚟嘅但係唔知解佢就選扎咗喺用一個啫反全統係莊速每一日u返學着到登台今乜款你喇佢一學生喺大點無耐嘅你見呢兩我喺嚇頭站麵作絕咗反半兩個辭嘅老師一位聖學另阿位而家饕係盛河今咪坐大你頭今日比個低調佢地兩多嘅鐵合生咗我呢一隻中極反伴日叉紹我中於明白咗啦原來係啲嘢n你出錯除所啲呢知來啦其實係我相信你喺元圖遇到啲咩係人呢亦人非支重要嘅咁我完討呢識到冇做咁多個壞朋友呢睇話朋友一不單次古黎我去跳賤角種嘅佢啲廣輪啦佢仲陪我一齊事企中一覺啦我唔會透路佢嘅全名我清佢圍黃x明睇以零一前係時候經我佢嘅鼓黎同碼種擁我決定出櫃再家咗兩連我地兩個嘅實驗再星我哋唔知點解一齊參與楚一個香港市上最大型嘅一個學生運動必兩個中極嘅唔得嘅滾輪我去挑賤咗之後果然係大利一啲唔同嘅後果嘅硬我鼻風法啦更全我失去長一個市場失去咗啲工助嘅機會但係萬晚我有發現原來呢一啲嘅決定係令到我可以更產然哋面對自己更產現地去面對呢個世界因為冇咗個時場令到我會去諗底啲方法去第啲嘅時場記果我發現原來冇咗期中一個係真係會餓死我不斷今蚊用我自己嘅士業用我自己就係個倒住其實就係我唔金心我唔今心去接受角種嘅呢啲唔得當然喺個努情上面我係會失敗過我會有 咀喪過甚至我有試過想放棄但係我就係想正實俾自嘅體其實路係唔可能得一條喺人地眼中不斷有乜自以位前情其實對我嚟講我係喺度打舖緊我舊有嘅啲路緊啲舊有嘅抗關常士去搵出一條更架識合自嘅嘅條努咁當然有好多人會問喂唔驚嘅咩真點解你你唔拍失去一齊嘅咩我諗其就係就魚呢一句說話所講你出嘩呢一句說話嚟子喺位美國嘅作家佢叫做標協唉作為一隻不段不由自主地跑上呢啲搵輪嘅白老鼠我一老跑一老跑跑阿跑阿我發現其實原來呢啲滾輪佢萬晚影便成咗一個過山車入便嘅嗰地圈圈我採喺呢個山車常面望返落去地面上好安全好平搵嘅其他人愛發現原來喺呢個過山車上面有高有低有起有跌更家好玩用我發現我比其他嘅人睇嘅風竟架多我噉受到嘅都更加強列同埋我嘅膽量亦都麵得更家強大如果我哋會一個人都可以將人生檔咗一個實驗將我哋自己便成自己嘢隻百老書我相順你唔會再假意做嘅嘢係啱係錯係好定壞你淨係會去諗究竟呢個實驗會成圍令到地成為一個盡化成我一個點樣嘅人聽期阿根住落嚟啦我想請出令我一隻嘅白佬鼠出有要桑年佢係係雞蛋精育餅嘅一結巴手我地係同女你有睇個投知人識下域咪左好道坊米呃扭啦啊唉呢一隻哥啦我喺望送畀上班唔我得大小仆冰呢隻個兩我希望呀送畀每一個得自己係怪拖嗰得己唔非呢個界世嘅人呀呢隻哥入變係講緊一個英紅佢叫做風件字狼走你畫職復到命知直照都知欣山講相記生五人世間知怪一點令唔見都山唔再避均姐溫室加重日今熱用秒燒的竹抗條嘅生啦知得啦人粒粒粒心醬飛官真魚得句好 唔涼泰的身區好比要一天一天氣識家姐不他今心去乜見都裝次你順間知張算打見蛋路尾車唔入細衰都都生鬧沙開乾聲長最聲地格蟲也呼置用最春的方法用d畀算下錢啦你一些些一得出多啤安的結果怕都不咁團就蒜跨啦唔他都珍式他的方一��一天氣色家者得怕今心去人唔間放聲法一聲出手唔怕終於的一聲家者她今心句數聽條
|
7 |
+
3
|
8 |
+
啦今隻落嚟呢手到奶到食場鏡我餐佢去哋你腸格好冇呀就咩啦好嘩都波卷做都句都吊d都都都都m住都都吊段度啦都好好魚魚係碌車㗎絕多的生美左地科喺屯尾越都必呢珠過左喺做但到到家見真但咩係撞人嘅唉高拉爹照會裝啦到魚條係奶小該家解五五晒嘅方但並鹽窩嘅的沙長到邊滘你叫你定但解見嘅出喜對要人氣哋對要知不難園車今添路高拉先隻真聲啦見聲啦做隻知渣尾尾杯隻㗎啦日啡d姐打人赤架一吊邊魚段隻備巴實爹都叫嗲做隻揸一段吊要晒車㗎姐路知多知家一段晒車定寫地約瓜又漏丸會姐都算呀聲業先聲定香成定神紅原樹連都該花炒硬徑硬靚兩成車你定點想書船唔該信點西界同有d點越書人有都地爹相大車地橋會去成都裝上比病訊淡見小底到環相個好啲本里成奶見但蛇人中時時再興山影但味對但車多你揚龍越會哋哋長d相都時過重定哋解天白白魚分方命炸去仙花現偷裝室街品乜車過社應死時中啱妹d本靚啦記一啊東中都都都知天老中當到當工極下知得士上真士五到極咩魚車到爆上炸多做做見周行上真心金上去但到門旺環叫竟車車竟事住姐人到呢生唔長長知多天句過唔唔旺到知車時會照姐到爆爆得唔長越見見見見見
|
9 |
+
4
|
10 |
+
今拍哥就一種可以係係一中運動呀走令到你以贏贏快涌有校咁樣去以同係嗱咁跟住你就會馬甩其實熱呢啲嘢咁揀得我都做到啦咁其就係嘅你做到係又好都好機本嘅動作咁我你唉有請我你香港非度協會嘅府原啦學原啦去幫嚟時繁咁第一過叻真係撈靈啦咁真一啲返搵嘅動作咁期語係日呢家係唉睇落好難啦大一係個好基用動就去幫咗你射嚟嘅咁就啲咩你有金評啦靚定係一啲跳藥嘅作通常呢動咗喺就你片度睇度惡好靚呀好犀利近人係咁緊住最口呢一啲平行力嘅練集啦咁其就你都做好多烈方面嘅亦好咁呢幾種講完之有一其實就係成件時最初最贏我地靚個嘅治無最初嘅早成部分咁我你將呢種所喝可以叫做靚二動嘅藝實啦兒動嘅靚句哂咁日咁係其頭應太片到知啦贏我開始嘅由叻有個朋友叫我去一喂好玩日嚟玩下喇呀咁第二朝起真就出時就真係痛呀我落d啲攞以把姐返學呀撐怪上返學原全郁唔討呀樣咁緊要我就發覺其實而我你係好唔知㗎係你唉大家支唔知贏係平時我連嘅有睇能係興物咁你你都要諗客人其實你對上一次做運動係幾時今日唉拍哥展轉反質物熟自顯轉係令到自己係係令到我邊左唉個見世教年呀咁最多人一做問我點點樣可以有錄嚿福技呀樣點樣可通記大啲人樣其仔一你將個成我康隻掉鑽咗咁我我喺呢度一就想跳戰大家對運動係睇法大家都知要運動係好㗎咁大你對上一次做運人幾時睇我堂咁企企氣係好嘅咁大大係冇論做因為身邊人冇冇人做啦冇時間啦係太忙忙啦咁咁係嘅大一係大家應該好好咁樣現啡至下自己嘅身題其實呢嘢好但嘅佢你電咗可能一連兩年三 唔係一啲或由世個開始電起呀物無啲嘅嘅時嘅我都好大個開始年曉你五年木類前見到我同里哋個生材一無養係制架係實咁呢係今日嘅主大係條邊有碌條別咁贏對我你講喺呢個係靚唔係個選擇唔選習我街別宜呀點解一應為贏我你嘅生睇一其大好味妙我你嘅身睇係最福集最福站嘅機氣力咁一我唔咪叫你亞你一定要嚟撐教我哋嘅活動呀唔需要係我太度人呀咁大唉你可以做啲咩一仔真係諗下自己可以係唔係真係去隻跑步呀成係跳醒硬搵就算看門成咁唉大你開以諗下呀有冇其他下放快運動力係條冇呀可能一再座嘅會位可能入多你會一喺個明聖運動員可能你係睇涌黃時康你係排嗰咪告隻神你唔知係大育你唔士有一你泳園都唔會知道係啲人係陣係咁呀呢要貼企位都係年有幾年嘅咁日而係唔使擔想話子太始開始我呢個返叻二十四歲先你學成嘅係嚟多地大骨魚嬲
|
log_speech-recognition-community-v2_dev_data_zh-HK_validation_targets.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0
|
2 |
+
大家好 我一生人好少企喺紅地毯上 比較諷刺嘅係 喺汶川大地震之後呢官方係鋪紅地毯 今日同大家分享嘅題目係講真話 自己過去十幾年呢喺內地採訪啦 成日遇到一啲天災人禍唔公平嘅事 吓。覺得好荒誕好離譜 之後後嚟因為照顧屋企 兩個小朋友嘅原因呢 就返咗嚟香港教書 跟住返到嚟香港發覺仲離譜 嗰啲離譜係估佢唔到嘅 吓。以前大陸嗰嘅離譜天災人禍 你已經打咗底 吓。你都知道有 即一定嘅預算啲事點發生 正如頭先呀區家麟所講 吓。過去一年好多嘢 係令到我哋啫係估都估唔到 吓。我哋一班關注中國圈子嘅朋友 係覺得有嗰種啫係急劇嘅轉壞嘅情況 係嚟得又急又恨 係有少少吃不消 所以有一啲我哋嘅朋友都喺度講笑話 啫係近時呢大陸比較多新聞嘅時代呢 我哋一乍人就衝晒上去跑新聞 今時今日呢我哋可能要呼喚啲 跑大陸線嘅記者都返嚟香港 跑香港嘅管治嘅災難 吓。呢個時候可能正係需要佢哋 吓。我自己過去噉多年呢 喺大陸見到嘅狀況呢係 有好多嘢係好唔舒服 吓。可以睇一睇呢一個嘅powerpoint 吓。比較多就係成日見到啲天災人禍 成日就係啲人喺你面前跪低 希望佢平時喺各種渠道 申唔到嘅冤能夠透過香港嘅記者 可以講得出嚟 佢哋有啲人係冒死 有啲人係諗住俾人打一生都一定要講 跟住後嚟呢自己係因為照顧屋企啦 跟住就喺cable有一日就執包伏 吓。噉我就返咗嚟教書噉 喺比較緊張同埋多令到自己唔舒服嘅 大陸新聞採訪嗰度呢返嚟香港 咁其中一樣令我覺得好真好舒服嘅 就係我自己嘅屋企 吓。對住太太 對住兩個小朋友 覺得好多嘢係好真摯唔做假 吓。喺上面採訪嗰啲假嘢係太多 吓。吓。即使佢哋嘅即係啲趣怪 吓。即使佢哋扮嘢 扮假嘢都扮得真過人 吓。嗰隻噉樣嘅我唔知係咪海獅 係假嘅 吓。嗰個噉樣嘅 呢個係咩呀 巴斯光年係假嘅 噉呀成面污糟邋遢你都覺得佢係可愛 吓。因為佢係真 佢係唔修飾。吓 扮鬼halloween都係你覺得好窩心 吓。呢幾年同小朋友 我自己比較enjoy嘅時間呢 就係同佢哋去夜晚臨瞓嘅時候講故仔 其中呢一個故仔叫文字工廠 我個小朋友係百聽不厭 成日晚晚都嗌我講同一個故事 個故仔係講咩呢 噉就講到呢吓從前呢有一個國家 如果你講嘢嘅話呢 你就要俾錢去買文字嘅 有錢人呢先可以講到 一啲好有價值嘅詞彙 吓。一啲豐富嘅詞言 剩係有錢人講到架咋 窮人呢佢哋就好慘嘅 吓。買唔到啲咩文字啦吓 有時係冇嘢講 而有時呢就係要講埋啲 啫係大家認為係好三教九流嘅嘢 例如係山羊嘅呠呠 兔仔嘅屁股等等 噉喺呢個文字工廠入邊呢 呢啲噉樣嘅工廠入邊嘅車衣女工啦吓 呢啲機械人 日日就剪啲文字出嚟賣啦噉 噉就係好冇感情嘅 吓。噉就係流水作業呀噉 噉一班窮人點算呢 呢個故仔入面嘅一個主角 就係felix坐喺個露台嗰度 露咗隻腳出嚟嗰個 吓。佢就喺個窗邊嗰度呢露台嗰度 攞個網就網啲文字 吓。就希望呢 網到一啲人哋唔要嘅文字出嚟呢 就係夜晚留返食飯嘅時候同屋企人講 噉有一日喎 呢個felix呢佢心儀一個女仔西貝兒呢 佢就生日呀 噉呀felix呢特登蓄起咗一咋嘅文字 就想同呢個西貝兒講呀 咁呀後嚟佢呢啲文字 都係頭先我所講嘅 兔仔嘅吓屁股 山羊嘅呠呠噉樣 哇呢啲真係簡直係 污糟邋遢嘅語言呀噉 噉正當佢要去搵呢個 紅姨美少女西貝兒嘅時候 想同佢講。吓。佢真心嘅說話嘅時候 點知就出現咗佢嘅情敵 吓。匿埋喺呢度後面蒙住面嗰個人 叫做咩名話叫咩名呀 我仔喺前面 佢叫做oscar oscar呢佢呢就好有錢 買咗好多嘅語言 有好多嘅語言藝術 跟住呢佢就對呢個西貝兒講 吓。我個仔成日要我呢個tone呢 就係話 我全心全意噉愛你我嘅西貝兒 我知道有一日我哋將會結緍。 哦﹗當貧窮嘅felix買唔起呢啲文字 好有錢嘅呢個咁嘅oscar講出 呢啲愛嘅表白嘅時候呢 felix就話唉。我死得啦。 吓。有個情敵當前噉 吓。佢準備走嘅時候 跟住先發覺原來呢個西貝兒呢 冇俾個oscar打動 跟住呢。吓。佢就僅有嘅文字 除咗嗰啲呠呠同埋屁股呢 佢仲喺啲垃圾房執到啲文字 就係灰塵車厘子同埋凳 喺佢冇語言嘅情況底下 佢就盡地一鋪 合埋雙眼 好傾心噉樣將佢心入邊真正嘅愛意 向佢心儀嘅呢個西貝兒去講 佢就係話 我車厘子灰塵同埋凳你。 西貝兒聽到之後呢 吓。發覺聽到佢文字背後嘅愛意 聽到佢嘅愛意超越文字 呢個時候 吓。佢接受咗呢個主角felix 吓。唔係因為佢嘅語言藝術 因為佢嘅真話 因為佢嘅真心 吓。我個仔你鍾意上台你上呀 你上嚟呀嚟呀你上嚟呀。呢度 係咪呀 係啦 我抱你呀 噉呢跟住呢 你講呀 你襟呢個你襟啦 吓。跟住呢我個仔就成日 要我同佢用我哋嘅語言去講 我哋呢就重複練咗就係我 佢好鍾意玩車 我車車你。 跟住佢又佢又車車我。噉樣 噉雖然佢冇好豐富嘅詞彙 但係我覺得呢一個真已經係超越晒一切 比起擁有好多詞彙嘅人係更加可貴 返到嚟香港 今時今日我發覺我哋 好多官員好多達官貴人 佢哋擁有話語權 擁有國家機器 但係佢哋冇真心 佢哋冇邏輯 可以跳嘅可以跑嘅 真就得架啦 當我哋見到 個powerpoint可以唔用架啦 當我哋見到我哋嘅司局長 福祿壽一齊出嚟推銷政改 時機已經成熟啦 目標非常清晰。 係時候有商有量逹至普選嘅目標。 可能入邊有啲hidden嘅嘢冇講 時機成熟啦 我哋要針對篩選打擊嘅目標非常清晰。 攪掂咗呢啲人之後 我哋就可以做政改。 我哋睇官方嘅語言藝術睇官方嘅文件 除咗睇佢要寫咩出嚟之外 仲特別睇佢冇寫乜嘢 愛國愛港呢樣嘢 係冇寫喺官方呢個文件入邊 但係邊個都知道愛國愛港呢幾個字 根本就係有凌駕性嘅作用喺度 我哋見到所有內地中央嘅一啲宣傳 講特首嘅候選人嘅條件 吓。喺李飛嗰個講話之後第一句 就係話香港特首候選人係要愛國愛港 呢句說話唔寫落去呢 其實就係問題嘅關鍵 就係語言嘅偽術 我哋都見到一啲電視霸權 吓。將台慶同捐錢俾慈善機構係掛鉤 吓。跟住慈善機構去叫自己嘅員工 去睇電視支持捐款 我哋只需要問一句 係咪歷年嘅台慶都係捐錢俾慈善機構 點解捐錢俾慈善機構係三百幾萬 捐俾七間 捐得咁少呢 如果呢個係符合邏輯 係年年都捐嘅 今年你又叫人去睇嘅 講得通 如果年年都唔做 今年突然間要做 吓。呢樣嘢就有問題 你講呀你講你講呀 仔仔你講 講者係呀噉又係我講 我自己成日記得一句說話 就喺內地 好多人quote嘅 就係話 一句真話嘅力量比整個世界仲要重 當我哋香港依家睇到 連李嘉誠都要出嚟話香港係要有法治 政府運用佢嘅權力嘅時候唔可以選擇性 當佢可以重新再嚟嘅時候 佢會選擇參與政治 如果連呢啲 有梗時胡錦濤喺深圳嘅時候 佢曾經對李嘉誠講過 李生我無論點忙 我都一定要見一見你。 一個噉樣地位嘅人到今時今日 佢竟然係要講出啲噉樣嘅說話去做反擊 我哋知道其實我哋政府係出咗啲咩問題 唔單止係我哋民間 連上面都有意見嘅時候 吓。我哋都知道 其實香港嘅問題係相當嚴重 我quote劉悅紹先生嘅一個講法 近年呢內地去針對一啲嘅反動勢力呢 所講嘅講法 個詞彙上已經係有好明顯嘅唔同 喺二千年頭度啦吓 內地講呢啲嘅反動勢力呢 仲係叫做外國勢力 大概零四年度呢 就叫做外來勢力 最近兩年呢改成外部勢力 由外國轉外來 最後轉外部勢力 係咩意思呢 即係話內地將敵人嘅圈子愈畫愈大 自己人愈框愈細 外國勢力正係指 一啲國家機關使領館 外來勢力指一啲包括台獨 海外民運人士ngo等等 而外部勢力 你唔係我支旗你唔係我自己友嘅話 你已經係外部勢力啦 吓呢一樣嘢 係見到嗰個敵人嘅圈子係愈畫愈大 今時今日我哋見到國防大學做嘅 一個關於間諜反動勢力嘅宣傳片入邊 已經將呢個佔中同埋國民教育等等呢 係同外部勢力係去劃上等號 吓。我哋都聽到 王維基先生失卻嗰個電視牌照 亦都有呢啲噉樣可能嘅因素 就係同黎智英先生 同埋同佔中沾上邊係俾人搞 睇一睇呢段說話 吓。內地呢一個講話 大家可以估下係邊個講 吓。我哋要敢抓敢管 敢於亮劍 對嗰啲呢惡意攻擊黨嘅領導 攻擊社會主義制度 歪曲黨史國史做遙生事嘅言論 一切報刊雜誌講台論壇 會議會場 可能呢個都有份 電影電視 廣播電台舞台劇場 都不能為之提供空間。 睇係咩事啦 呢個亮劍 係內地重複播咗好多次嘅電視劇 係居首呀 三千幾次重播五年內 廣播電台不能為之提供空間 電視不能為之提供空間 舞台劇刪文革戲不能為之提供空間 呢句說話就係習近平819 針對住全國思想宣傳工作會議 所講嘅講話 呢個講話呢係全國黨員呢 入面學習得最多嘅一番嘅講話嚟架 上面搞梗意識形態鬥爭 搞梗國家安全關係到黨嘅生死存亡 講到有無限嘅危險 香港幾近成為歷嚟最嚴重嘅拆彈基地 喺呢個咁啫係咁緊迫嘅情況底下呢 我哋關注中國圈子嘅朋友 係感受到嗰種腥風血雨 感受到之前所俾人啄嘅人唔係唯一嘅人 入面所冇講嘅愛國愛港 吓。上面所講嘅就係愛國愛黨 甚至係愛國愛黨愛鹽廠 有一啲問題 我哋嘅政改 官方入面quote咗喬曉陽 quote咗李飛 呢啲人大法律嘅工作嘅領導 但係喬曉陽佢係冇讀過法律嘅 佢只係讀過幾年西班牙外語 佢係靈格風嘅代言人呀可能 李飛佢都係冇正式受過法律嘅訓練 佢嘅本科係學日語 master係學世界經濟 後嚟呢佢用咗大概一兩年時間 喺日本呢學商業法 之後佢就成為咗為我哋香港七百萬人 解釋前途決定命運嘅一個欽差大臣 呢一樣嘢我相信好多香港人都唔會關心 只要香港人繼續堅持要聽真話 繼續堅持政府唔可以講假話 我相信我哋嘅政府或者北京 都唔可以太過過分 多謝各位
|
3 |
+
1
|
4 |
+
各位 我琴晚發咗個夢 我夢見天堂今期 有個輪迴大優惠 三萬日 終極精華遊 嗰目標客戶係乜嘢呢 就係就嚟出世嘅bb 個deal係點嘅呢 就係 如果你肯跟領隊嘅指示 跟住大隊走 保證你一生行程緊湊 全程 六星級嘅食宿 人生嘅所有嘅 精彩景點都俾你玩盡 如果你有呢個選擇 你會唔會上車呢 我諗番起 人生大概有八十年 一千個月三萬日 我過咗大半 諗番起我大半生 其實我就係參加咗呢個團囖 我自細好聽爸爸媽媽嘅安排 畀心機讀書攞到好好嘅成績 由幼稚園至研究院 我都喺名校讀書 不過要申報一點 我冇博士學位 亦都冇喺菲律賓讀過書 我入大學嗰陣時 我就揀咗一科 好有用嘅電機工程讀 咁點解電機工程呢 就係我見到所有叻人都讀呢科嘅 咁讀得呢科當然都係叻人喇 讀完書之後 就好似八十年代 長春藤大學嘅好多同學一樣 我去咗華爾街工作 做咗兩年financialanalyst 就同其他同袍一樣 去咗businessschool 讀mba 跟住返番嚟香港 做咗幾年嘅管理顧問 差不多時間就結婚生仔 宜家有兩個好可愛嘅兒子 睇番轉頭 我敢講我嘅成長 實現咗我相信香港大部分爸爸媽媽 或者老師校長嘅願望 但我想今日同各位在座嘅青年人講 千祈唔好咁聽話 我希望 今代嘅青年人比我更加努力咁 搵到同埋堅持到自己嘅激情 所謂嘅passion 心理學有個概念叫心流 心流嘅意思就係描寫一個人 喺極端專注嘅狀態個陣時 佢全程投入 好有動力 差唔多可以忘記曬所有旁邊發生嘅事 頭先你見到鼓手提到 喺日本打鼓嗰時打到自己原來已經脫曬水 就來暈都唔覺嘅 我細仔係一個好叻嘅守門員 佢同我講 佢守十二碼嗰陣時呢 佢係可以完全聽唔到旁邊嘅雜音嘅 佢見到嘅淨係喺十二碼點嗰波 同埋即將 射球嘅嗰個對手球員 佢完全睇唔到聽唔到 周圍發生嘅事 我記得我讀大學嗰陣時 我最積極讀嘅唔係我嘅主修科 係我好有興趣 有關四十年代中日戰爭 同埋六十年代美國 甘迺迪政府嘅一啲歷史 我有興趣到嗰科未開始 我已經去圖書館睇嗰啲書㗎喇 唔係希望攞更好嘅成績 而係我真心地好有興趣呢幾段歷史 我好專注 搵資料搵歷史檔案 寫文章都特別快嘅 成績亦都當然特別好 雖然係咁 我估我同當時或宜家嘅青年人一樣 覺得歷史既搵唔到食 又幫唔到我畢業 有少少惆悵嘅就係 點解理想同埋現實 好似永遠溝唔埋咁嘅呢 不過我後來聽到 蘋果電腦嘅創辦人喬布斯 曾經喺一個大學畢業禮咁樣講過 人生嘅軌跡呢往往係事後先會睇到嘅 佢嘅例子就係佢年青嗰時 喺一個社區學院 讀過一科有關字體嘅書 個陣時其實冇咩用 佢淨係覺得嗰啲字體好有趣好靚 但係呢個咁樣嘅科技 佢學到之後呢 就喺奠定咗將來 蘋果電腦畀人選字體呢嗰咁創新嘅模式 我相信在座年紀大啲嘅人記得 以前ibm電腦係冇呢個 咁樣嘅function 其實有評論話 有評論話係港孩有個特質 就係 唔在乎 唔思考 唔爭取 佢哋嘅典型答案就係 唔知道 今時今日我哋點樣教青年人 搵到自己嘅一啲鍾意嘅嘢呢 我記得 我個仔十八歲嗰陣時我曾經咁同佢講過 我唔希望佢行番爸爸媽媽嘅腳步 同一時間 我亦都唔希望爸爸媽媽行過嘅 佢就特登唔行 哲學家尼采亦都咁講過嘅 人生 傳說中 動物有七層皮 但係人 就剝咗七十倍七層嘅外殼 亦都未必搵到自己 佢教青年人撫心自問 問自己曾經深愛過啲乜嘢嘢 乜嘢能夠令自己全情投入又無比滿足 如果將自己嘅至愛一字排開 從佢哋嘅次序同埋本質 青年人會慢慢搵到屬於自己嘅真愛 或者返番嚟天堂 呢個輪迴 大優惠 三萬日終極精華遊 有人會跟大隊做好應該做嘅事 有人會選擇自由行 自由行你可能有時會行錯路 嘥咗啲時間 但係同一時間 你亦都可能會搵到一啲令你驚喜 令到你覺得不枉此生嘅經驗 無論你係跟團 或者係自由行 我都祝各位 能夠搵到一個真正令到你滿足 真正屬於自己嘅精彩人生旅程 多謝各位
|
5 |
+
2
|
6 |
+
何韻詩 聽到呢三個字呢個名 大家會聯想起咩嘅一個物體 有冇 歌手冬菇小丸子 其實我想話畀大家聽 我嘅真正身分係一隻白老鼠 同好多嘅白老鼠一樣 其實我都有死穴 我嗰個死穴呢就係籠入邊嗰個轆 啫係一般老鼠見到個轆 係唔係會衝上去喪跑 我都有呢個轆 我個轆個名叫做唔得 唔得轆 啫簡單啲講 就係每當有人嚟到我面前 同我���唔得 我就會即刻好似嗰個老鼠 被人襟咗個掣衝上去嗰個輪喪跑 偏偏呢我又進入咗一個 最多呢啲轆嘅地方 就係香港娛樂圈 喺我嘅事業上面嘅第一個 呢個唔得轆呢係1998年 當時我啱啱入行 唯一嘅業績呢就係 正如頭先嗰位朋友所講 好悔氣地唱咗一首卡通片嘅主題曲 叫做小丸子的心事 果然係街知巷聞 咁呢就因為呢我唱得咁悔氣嘅原因 咁跟住有一位嘅師兄佢叫蔡一智 佢就喂咁樣啫係唔係辦法 我真係要帶你出去social下 如果唔係呢你就會永遠咁霉呢 你都搞唔掂嘅。 咁我結果呢跟咗佢去一個夜店 入到去啦見到一位導演 咁我就話好啦好啦。 坐低同佢開始去好勉強地去寒喧 咁好唔成功嘅因為寒喧咗一陣 佢已經同我講話 喂你個樣咁鬼寸 啫係我睇你做唔長架啦呢個圈度。 我勸你不如都係改下你個性格啦 你喺呢個圈度咁樣呢係唔得架。 第一個唔得 第二個唔得喺若干年後 我已經開始出到我嘅兩三隻片 正當我開始沾沾自喜 覺得應該都一帆風順之際 我嘅唱片公司出現一個動盪 所有原本嘅員工換曬 高層一個新嘅入嚟 咁我懷疑呢佢都係睇我唔順眼架 因為呢佢第一日返工 就叫咗我入去間房度 一坐低劈頭第一句佢就話啦 嗱唱片公司都畀你玩咗咁耐啦 你應該都玩夠架啦。 以後我叫你唱乜你就唱乜 我叫你做乜你就做乜 包你紅嘅。 佢話做一個歌手 喺呢個娛樂圈喺呢個樂壇 咁另類係唔得架。 第二個唔得 唉其實點解我哋嘅地方 咁多唔得架 好似呢係我哋一出生嘅時候 就已經付送咗一個唔得嘅清單 咁呢個清單有啲咩 唔得偏離呢個制度 唔得對抗個制度 唔得同人哋唔同 偏偏又有好多人 佢哋都好欣然地接受嘅呢啲唔得 我估係因為大家會驚 我哋會驚如果我要對抗 呢啲唔得嘅話 可能會失去一啲嘢 我可能會變成其他人眼中嘅一個怪胎 作為一個經常地喺別人眼中 自我毀滅同埋自毀前程嘅 一個反叛嘅白老鼠 其實我都經常會問點解 點解我會搞成咁架 啫我究竟發生咗啲咩出咗啲咩錯 結果俾我搵到兩個源頭 源頭一 呢位男子佢聲稱自己係一位老師 但係我嚴重地思疑 佢係一位大佬 因為呢佢返學佢喺校園呢非常咁囂張 渣呢架金色尖咀嘅烈火戰車返學 仲要不停咁收𡃁 重點係唔曳嘅佢都唔收嘅 佢收過一個最曳嘅學生 淨啲俾人踢出校 就係佢保著佢 嗰個人個名叫杜汶澤 源頭二 呢一位大家見下 睇下佢呢個濃妝皮褲嘅烈女look 佢並不是一個indieband嘅rocker 其實佢亦都係一位老師 但係唔知點解佢就選擇咗呢 用一個反傳統嘅裝束 每一日呢返學著到登台咁嘅款 你睇下佢嘅學生係帶一點無奈嘅 你見到 呢兩位額頭上面鑿著咗 反叛兩個字嘅老師 一位姓何另外一位宜家都係姓何 今日座咗喺度今日比較低調 佢哋兩個嘅結合 生咗我呢一隻終極反叛嘅叉燒 我終於明白咗啦原來係dna出錯 除咗dna之外呢 其實我相信你喺沿途遇到一啲咩人 亦都係非常之重要 我沿途識到無數咁多個壞朋友 呢啲壞朋友呢不單止鼓勵我 去挑戰各種嘅嗰啲滾輪 佢仲陪我一齊去試 其中一個呢我唔會透露佢嘅全名 我稱佢為黃x明 喺2012年嘅時候 經過佢嘅鼓勵同埋聳恿我決定出櫃 再隔咗兩年我哋兩個嘅實驗再升呢 我哋唔知點解一齊參與咗一個 香港史上最大型嘅一個學生運動 呢兩個終極嘅唔得嘅滾輪 我去挑戰咗之後 果然係帶嚟一啲唔同嘅後果 我被封殺 跟住我失去咗一個市場 失去咗一啲工作嘅機會 但係慢慢我又發現原來呢一啲嘅決定 係令到我可以更坦然地面對自己 更坦然地去面對呢個世界 因為冇咗一個市場 令到我會去諗第二啲方法 去搵第二啲嘅市場 結果我發現原來冇咗其中一個市場 係真係唔會餓死 我不斷咁樣用我自己嘅事業 用我自己作為一個賭注 其實就係我唔甘心 我唔甘心去接受各種嘅呢啲唔得 當然喺嗰路程上面 我係會有失敗過我會有沮喪過 甚至我有試過放棄 但係我就係想證實畀自己睇 其實路係唔可能得一條 喺別人眼中不斷咁樣自毀前程 其實對我嚟講 我係喺度打破梗我舊有嘅一啲路 打破梗一啲舊有嘅框框 嘗試去搵出一條更加適合自己嘅一條路 咁當然有好多人會問 喂你唔驚架咩 啫點解你唔怕失去一切架咩 我諗其實係就如呢一句說話所講 it'sjustaride 呢一句說話嚟自一位美國嘅作家 佢叫做billhicks 作為一隻不斷不由自主地 跑上呢啲滾輪嘅白老鼠 我一路跑一路跑跑下跑下 我發現其實原來呢啲滾輪 佢慢慢已經變成咗 一個過山車入邊嘅嗰啲圈圈 我坐喺呢個過山車上邊 望返落去地面上面 好安全好平穩嘅其他人 我會發現原來喺呢個過山車上面 有高有低有起有跌係更加好玩架 我發現我比其他嘅人睇到嘅風景更加多 我感受到嘅更加強烈 同埋我嘅膽量亦都變得更加強大 如果我哋每一個人都可以 將人生當作一個實驗 將我哋自己變成自己嘅一隻白老鼠 我相信你唔會再介意 你做嘅係啱定係錯係好定係壞 你剩係會去諗究竟呢個實驗 令到你成為一個 進化成為一個點樣嘅人 thankyou 跟著落嚟 我想請出另外一隻嘅白老鼠 佢叫soni 佢係雞蛋蒸肉餅嘅一個結他手 我哋係同類你睇個頭就知 呢一隻歌我希望送畀 呢隻歌我希望送畀每一個 覺得自己係怪胎 覺得自己唔fitin呢個世界嘅人 呢隻歌入邊係講梗一個英雄 佢叫做風見志郎 送畀你哋 ♫沒名字的臉♫ ♫夕照的剪影♫ ♫因山水抗戰♫ ♫像奇異生物♫ ♫任世間指點♫ ♫怪一點點♫ ♫寧願孤身在戰♫ ♫被困這溫室甲蟲也感染♫ ♫用渺小的觸角力抗這偏見♫ ♫沙粒也顫抖♫ ♫一粒一粒一粒拯救♫ ♫伸張非一般的正義得一對手♫ ♫可能他的身軀好比妖獸♫ ♫一天一天稀釋價值♫ ♫得他甘心去守♫ ♫默然稻草田♫ ♫被瞬間剪短♫ ♫將磚片搭建♫ ♫但幪面青年♫ ♫入世雖短淺♫ ♫都死守流失的海岸線♫ ♫像最小的甲蟲都苦戰♫ ♫用最蠢的方法但勇敢依然♫ ♫冰川也顫抖♫ ♫一些一些一些拯救♫ ♫得出多悲哀的結局♫ ♫他都不甘脫勾♫ ♫就算昆蟲他都珍惜牠的罕有♫ ♫一天一天稀釋價值♫ ♫得他甘心去守♫ ♫人間風景♫ ♫沒法一些一些出手拯救♫ ♫他忠於的一些價值♫ ♫得他甘心去守♫ thankyou
|
7 |
+
3
|
8 |
+
哈囉 哈囉 跟住落嚟呢首歌呢大家都識唱嘅 我哋唱一句你哋唱一句好冇呀 準備囉喎 onetwothreefour woo 一齊唱喇 ♪allmovelikejagger♪ ♪shootforthestarsifitfeelsright♪ ♪justaimformyheartifyoufeellike♪ ♪justtakeitawayandmakeitokay♪ ♪isweari'llbehave♪ ♪youwantedcontrolsowewaited♪ ♪iputonashownowimakeit♪ ♪yousayi'makidmyegoisbig♪ ♪idon'tgiveashit♪ ♪anditgoeslikethis♪ ♪takemebythetongue andi'llknowyou♪ ♪kissmetillyou'redrunk andi'llshowyou♪ ♪allthemoveslikejagger♪ ♪igotthemoveslikejagger♪ ♪igotthemooooooveslikejagger♪ ♪maybeit'shardwhenyoufeellike♪ ♪you'rebrokenandscarred♪ ♪nothingfeelsright♪ ♪butwhenyou'rewithme♪ ♪imakeyoubelievethat i'vegotthekey♪ ♪sogetinthecarwecanrideit♪ ♪whereveryouwantgetinsideit♪ ♪andyouwanttosteer♪ ♪buti'mshiftinggears♪ ♪i'lltakeitfromhere♪ ♪anditgoeslikethis♪ ♪takemebythetongue andi'llknowyou♪ ♪kissmetillyou'redrunk andi'llshowyou♪ ♪allthemoveslikejagger♪ ♪igotthemoveslikejagger♪ ♪igotthemooooooveslikejagger♪ ♪takemebythetongue andi'llknowyou♪ 一齊拍手啦 ♪kissmetillyou'redrunk andi'llshowyou♪ ♪allthemoveslikejagger♪ ♪igotthemoveslikejagger♪ ♪igotthemooooooveslikejagger♪ ♪youwannaknow howtomakemesmile♪ ♪takecontrol ownmejustforthenight♪ ♪andifisharemysecret♪ ♪you'regonnahavetokeepit♪ ♪nobodyelsecanseethis♪ ♪heyheyheyhey♪ ♪sowatchandlearn♪ ♪iwon'tshowyoutwice♪ ♪headtotoeoohbabyrollmeright♪ ♪andifisharemysecret♪ ♪you'regonnahavetokeepit♪ ♪nobodyelsecanseethis♪ ♪hey♪ ♪takemebythetongue andi'llknowyou♪ ♪kissmetillyou'redrunk andi'llshowyou♪ ♪allthemoveslikejagger♪ ♪igotthemoveslikejagger♪ ♪igotthemooooooveslikejagger♪ ♪lookintomyeyesandi'llownyou♪ ♪idon'tneedtotrytocontrolyou♪ ♪allthemoveslikejagger♪ ♪igotthemoveslikejagger♪ ♪igotthemooooooveslikejagger♪ thankyou ♪這地球若果有樂園 會像這般嗎♪ ♪那些胭脂色的 香檳色的伸手可折的♪ ♪段段艷遇處處有染 都放在眼前♪ ♪害怕採花天黑路遠♪ ♪情願對路邊燈飾眷戀♪ ♪那些玻璃鑲的 水晶雕的一觸即碎的♪ ♪逐步逐步進佔世界 通向沒有完♪ ♪地厚天高如寂寞難免♪ ♪誰家有後園修補破損♪ ♪燕尾蝶疲倦了 在偉大佈景下♪ ♪這地球若果有樂園 會像這般嗎♪ ♪摘去鮮花然後種出大廈♪ ♪層層疊的進化♪ ♪摩天都市大放煙花♪ ♪耀眼煙花隨著記憶落下♪ ♪繁華像幅廣告畫♪ ♪蝴蝶夢裡醒來♪ ♪記不起對花蕊的牽掛♪ ♪那些山中開的 天邊飛的不知所措的♪ ♪漸漸熟習世界會變 不再受驚怕♪ ♪為免犧牲情願被同化♪ ♪移徙到鬧市找一個家♪ ♪燕尾蝶存活了 在發射塔之下♪ ♪這地球若果有樂園 會像這般嗎♪ ♪摘去鮮花然後種出大廈♪ ♪層層疊的進化♪ ♪摩天都市大放煙花♪ ♪耀眼煙花隨著記憶落下♪ ♪繁華像幅廣告畫♪ ♪蝴蝶夢裡醒來♪ ♪記不起對花蕊的牽掛♪ ♪再也不怕♪ ♪懷念昨日餘香 百合花芬芳嗎♪ ♪摘去鮮花然後種出大廈♪ ♪文明是種進化♪ ♪儘管適應別制止它♪ ♪力歇聲沙 情懷承受不起風化♪ ♪叢林不割下如何建造繁華♪ ♪別問怎麼不愛它♪ ♪蝴蝶夢裡醒來♪ ♪記不起對花蕊有過牽掛♪ ♪天地悠悠過客匆匆 潮起又潮落♪ ♪恩恩怨怨生死白頭 幾人能看透♪ ♪紅塵啊滾滾痴痴啊情深♪ ♪聚散終有時♪ ♪留一半清醒留一半醉♪ ♪至少夢裡有你追隨♪ ♪我拿青春賭明天♪ ♪你用真情換此生♪ ♪歲月不知人間多少的憂傷♪ ♪何不瀟灑走一回♪ ♪天地悠悠過客匆匆 潮起又潮落♪ ♪恩恩怨怨生死白頭 幾人能看透♪ ♪紅塵啊滾滾痴痴啊情深♪ ♪聚散終有時♪ ♪留一半清醒留一半醉♪ ♪至少夢裡有你追隨♪ ♪我拿青春賭明天♪ ♪你用真情換此生♪ ♪歲月不知人間多少的憂傷♪ ♪何不瀟灑走一回♪ ♪蛤啊啊蛤啊哎咿呀♪ ♪紅塵啊滾滾痴痴啊情深♪ ♪聚散終有時♪ ♪留一半清醒留一半醉♪ ♪至少夢裡有你追隨♪ ♪我拿青春賭明天♪ ♪你用真情換此生♪ ♪歲月不知人間多少的憂傷♪ ♪何不瀟灑走一回♪
|
9 |
+
4
|
10 |
+
噉parkour呢就係一種運動啦 就可以令到你可以快速有效噉樣去移動嘅 係啦噉跟著你哋就會問啦 其實咦 呢啲嘢噉簡單我都做到啦 噉其實係嘅你做到嘅 好多好基本嘅動作噉我哋有請我哋 香港飛躍道協會嘅會員啦學員啦 去幫我哋示範嘅 噉第一個呢就係rolling啦 噉啫係一啲翻滾嘅動作 噉其餘呢 係啦呢個係睇落好難啦 但係呢係一個好基本嘅動作 可以幫助你解力嘅 噉仲有啲咩呢 有jumping啦landing啦 一啲跳躍嘅動作 通常呢啲動作呢就係你片度睇到 喔好靚呀好犀利呀噉樣啦 係啦噉跟住最後呢就係 一啲平衡力嘅練習啦 噉其實我哋都做好多呢一方面嘅嘢 好啦噉呢幾種講完之後呢 其實就係成件事最初最初嘅 我哋language嘅字母 最初嘅組成部分 噉我哋將呢種所謂 可以叫做移動嘅藝術啦 移動嘅language呀噉樣樣 噉其實頭先睇片都知啦 我開始嘅時候呢 有個朋友叫我去 喂好玩架你嚟玩下啦噉樣 噉第二朝起身就出事啦 就真係痛啦 攞住把遮返學呀撐拐杖返學 完全郁唔到話噉樣 噉跟住我發覺其實 咦我哋係好唔fit個喎 係啦大家知唔知 平時我哋練幾多體能呢噉樣 噉你就要諗下其實你 上一次做運動係幾時呀 噉呢parkour 輾轉令到我自己變咗個健身嘅教練啦 噉最多人呢就會問我 點樣可以有六舊腹肌呀噉樣 點樣可以胸肌大啲呀噉樣 其實呢你哋將成個concept調轉咗 噉樣我喺呢度呢 就想挑戰大家對運動嘅睇法 大家都知道運動係好架嘛 噉但係你對上一次做運動係幾時呢 體育堂吓 噉係好嘅 但係係冇人做個喎 因為身邊嘅人冇人做啦 冇時間啦太忙啦噉噉係嘅 但係呢大家應該好好噉 invest一下自己嘅身體 其實呢啲嘢好簡單嘅 佢哋練咗可能一年兩年三年 唔係一啲話由細個開始練起呀噉樣 冇啲噉嘅事嘅 我都好大個開始練嘅 如果你五六年前見到我 同你哋身材一模一樣嘅 係架真係架 係啦噉呢今日嘅主題係 tobeornottobe啦 噉對我黎講呢 呢個唔係一個選擇唔選擇 我梗係be啦點解呢 因為我哋嘅身體呢其實係好美妙嘅 我哋個身體係最複雜最複雜嘅機器呢架 噉呢我唔係叫你吓 你一定要嚟參加我哋嘅活動吓 唔需要係呀太多人呀 噉但係你可以做啲咩呢 就係真係諗下自己可以 唔係真係去剩係跑步呀 剩係去跳繩呀噉樣就算 可能都冇人跳繩啦 噉但你可以諗下 有冇其他嘅方法去運動呢 跳舞呀 可能呢在座嘅每一位 可能入咗我哋會呢係一個明星運動員 可能你係體操王子 可能你係下一個michaeljackson 你唔知架 但如果你唔試嘅話呢 你永遠都唔會知道架 呢樣嘢係真嘅係啦 噉呢度依幾位呢都係練咗幾年架啫 噉呢唔使擔心話自己太遲開始嘅 我呢個翻呢二十四歲先至學成嘅 係啦多謝大家 喂完嗱喂
|
nohup.out
ADDED
The diff for this file is too large to render.
See raw diff
|
|
preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae41c55a314634e2374826b51e8a05dc0fd69a8df6da3d3be13de5d8b209df23
|
3 |
+
size 1276909233
|
run.sh
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python run_speech_recognition_ctc.py \
|
2 |
+
--dataset_name="common_voice" \
|
3 |
+
--model_name_or_path="facebook/wav2vec2-xls-r-300m" \
|
4 |
+
--dataset_config_name="zh-HK" \
|
5 |
+
--output_dir="./" \
|
6 |
+
--overwrite_output_dir \
|
7 |
+
--num_train_epochs="100" \
|
8 |
+
--per_device_train_batch_size="8" \
|
9 |
+
--per_device_eval_batch_size="8" \
|
10 |
+
--gradient_accumulation_steps="4" \
|
11 |
+
--learning_rate="1e-4" \
|
12 |
+
--warmup_steps="2000" \
|
13 |
+
--length_column_name="input_length" \
|
14 |
+
--max_duration_in_seconds="7" \
|
15 |
+
--max_eval_samples="3000" \
|
16 |
+
--evaluation_strategy="steps" \
|
17 |
+
--text_column_name="sentence" \
|
18 |
+
--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – ! - : – 。 》 , ) , ? ; ~ ~ … ︰ , ( 」 ‧ 《 ﹔ 、 — / , 「 ﹖ · \
|
19 |
+
--save_steps="500" \
|
20 |
+
--eval_steps="500" \
|
21 |
+
--logging_steps="100" \
|
22 |
+
--layerdrop="0.0" \
|
23 |
+
--activation_dropout="0.1" \
|
24 |
+
--save_total_limit="3" \
|
25 |
+
--freeze_feature_encoder \
|
26 |
+
--feat_proj_dropout="0.0" \
|
27 |
+
--mask_time_prob="0.75" \
|
28 |
+
--mask_time_length="10" \
|
29 |
+
--mask_feature_prob="0.25" \
|
30 |
+
--mask_feature_length="64" \
|
31 |
+
--gradient_checkpointing \
|
32 |
+
--use_auth_token \
|
33 |
+
--fp16 \
|
34 |
+
--group_by_length \
|
35 |
+
--do_train --do_eval \
|
36 |
+
--report_to="tensorboard" \
|
37 |
+
--push_to_hub
|
run_eval.sh
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python eval.py \
|
2 |
+
--model_id="w11wo/wav2vec2-xls-r-300m-zh-HK-v2" \
|
3 |
+
--dataset="speech-recognition-community-v2/dev_data" \
|
4 |
+
--config="zh-HK" \
|
5 |
+
--split="validation" \
|
6 |
+
--text_column_name="sentence" \
|
7 |
+
--chunk_length_s="10" \
|
8 |
+
--stride_length_s="2" \
|
9 |
+
--log_outputs \
|
10 |
+
--device="0"
|
run_speech_recognition_ctc.py
ADDED
@@ -0,0 +1,829 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding=utf-8
|
3 |
+
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
|
16 |
+
""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition"""
|
17 |
+
|
18 |
+
import functools
|
19 |
+
import json
|
20 |
+
import logging
|
21 |
+
import os
|
22 |
+
import re
|
23 |
+
import sys
|
24 |
+
import warnings
|
25 |
+
from dataclasses import dataclass, field
|
26 |
+
from typing import Dict, List, Optional, Union
|
27 |
+
|
28 |
+
import datasets
|
29 |
+
import numpy as np
|
30 |
+
import torch
|
31 |
+
from datasets import DatasetDict, load_dataset, load_metric
|
32 |
+
|
33 |
+
import transformers
|
34 |
+
from transformers import (
|
35 |
+
AutoConfig,
|
36 |
+
AutoFeatureExtractor,
|
37 |
+
AutoModelForCTC,
|
38 |
+
AutoProcessor,
|
39 |
+
AutoTokenizer,
|
40 |
+
HfArgumentParser,
|
41 |
+
Trainer,
|
42 |
+
TrainingArguments,
|
43 |
+
Wav2Vec2Processor,
|
44 |
+
set_seed,
|
45 |
+
)
|
46 |
+
from transformers.trainer_utils import get_last_checkpoint, is_main_process
|
47 |
+
from transformers.utils import check_min_version
|
48 |
+
from transformers.utils.versions import require_version
|
49 |
+
|
50 |
+
|
51 |
+
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
52 |
+
check_min_version("4.17.0.dev0")
|
53 |
+
|
54 |
+
require_version(
|
55 |
+
"datasets>=1.13.3",
|
56 |
+
"To fix: pip install -r examples/pytorch/text-classification/requirements.txt",
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
logger = logging.getLogger(__name__)
|
61 |
+
|
62 |
+
|
63 |
+
def list_field(default=None, metadata=None):
|
64 |
+
return field(default_factory=lambda: default, metadata=metadata)
|
65 |
+
|
66 |
+
|
67 |
+
@dataclass
|
68 |
+
class ModelArguments:
|
69 |
+
"""
|
70 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
|
71 |
+
"""
|
72 |
+
|
73 |
+
model_name_or_path: str = field(
|
74 |
+
metadata={
|
75 |
+
"help": "Path to pretrained model or model identifier from huggingface.co/models"
|
76 |
+
}
|
77 |
+
)
|
78 |
+
tokenizer_name_or_path: Optional[str] = field(
|
79 |
+
default=None,
|
80 |
+
metadata={
|
81 |
+
"help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"
|
82 |
+
},
|
83 |
+
)
|
84 |
+
cache_dir: Optional[str] = field(
|
85 |
+
default=None,
|
86 |
+
metadata={
|
87 |
+
"help": "Where do you want to store the pretrained models downloaded from huggingface.co"
|
88 |
+
},
|
89 |
+
)
|
90 |
+
freeze_feature_encoder: bool = field(
|
91 |
+
default=True,
|
92 |
+
metadata={"help": "Whether to freeze the feature encoder layers of the model."},
|
93 |
+
)
|
94 |
+
attention_dropout: float = field(
|
95 |
+
default=0.0,
|
96 |
+
metadata={"help": "The dropout ratio for the attention probabilities."},
|
97 |
+
)
|
98 |
+
activation_dropout: float = field(
|
99 |
+
default=0.0,
|
100 |
+
metadata={
|
101 |
+
"help": "The dropout ratio for activations inside the fully connected layer."
|
102 |
+
},
|
103 |
+
)
|
104 |
+
feat_proj_dropout: float = field(
|
105 |
+
default=0.0, metadata={"help": "The dropout ratio for the projected features."}
|
106 |
+
)
|
107 |
+
hidden_dropout: float = field(
|
108 |
+
default=0.0,
|
109 |
+
metadata={
|
110 |
+
"help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
|
111 |
+
},
|
112 |
+
)
|
113 |
+
final_dropout: float = field(
|
114 |
+
default=0.0,
|
115 |
+
metadata={"help": "The dropout probability for the final projection layer."},
|
116 |
+
)
|
117 |
+
mask_time_prob: float = field(
|
118 |
+
default=0.05,
|
119 |
+
metadata={
|
120 |
+
"help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
|
121 |
+
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
|
122 |
+
"vectors will be masked along the time axis."
|
123 |
+
},
|
124 |
+
)
|
125 |
+
mask_time_length: int = field(
|
126 |
+
default=10,
|
127 |
+
metadata={"help": "Length of vector span to mask along the time axis."},
|
128 |
+
)
|
129 |
+
mask_feature_prob: float = field(
|
130 |
+
default=0.0,
|
131 |
+
metadata={
|
132 |
+
"help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
|
133 |
+
"span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
|
134 |
+
},
|
135 |
+
)
|
136 |
+
mask_feature_length: int = field(
|
137 |
+
default=10,
|
138 |
+
metadata={"help": "Length of vector span to mask along the feature axis."},
|
139 |
+
)
|
140 |
+
layerdrop: float = field(
|
141 |
+
default=0.0, metadata={"help": "The LayerDrop probability."}
|
142 |
+
)
|
143 |
+
ctc_loss_reduction: Optional[str] = field(
|
144 |
+
default="mean",
|
145 |
+
metadata={
|
146 |
+
"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."
|
147 |
+
},
|
148 |
+
)
|
149 |
+
|
150 |
+
|
151 |
+
@dataclass
|
152 |
+
class DataTrainingArguments:
|
153 |
+
"""
|
154 |
+
Arguments pertaining to what data we are going to input our model for training and eval.
|
155 |
+
|
156 |
+
Using `HfArgumentParser` we can turn this class
|
157 |
+
into argparse arguments to be able to specify them on
|
158 |
+
the command line.
|
159 |
+
"""
|
160 |
+
|
161 |
+
dataset_name: str = field(
|
162 |
+
metadata={
|
163 |
+
"help": "The configuration name of the dataset to use (via the datasets library)."
|
164 |
+
}
|
165 |
+
)
|
166 |
+
dataset_config_name: str = field(
|
167 |
+
default=None,
|
168 |
+
metadata={
|
169 |
+
"help": "The configuration name of the dataset to use (via the datasets library)."
|
170 |
+
},
|
171 |
+
)
|
172 |
+
train_split_name: str = field(
|
173 |
+
default="train+validation",
|
174 |
+
metadata={
|
175 |
+
"help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
|
176 |
+
},
|
177 |
+
)
|
178 |
+
eval_split_name: str = field(
|
179 |
+
default="test",
|
180 |
+
metadata={
|
181 |
+
"help": "The name of the training data set split to use (via the datasets library). Defaults to 'test'"
|
182 |
+
},
|
183 |
+
)
|
184 |
+
audio_column_name: str = field(
|
185 |
+
default="audio",
|
186 |
+
metadata={
|
187 |
+
"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"
|
188 |
+
},
|
189 |
+
)
|
190 |
+
text_column_name: str = field(
|
191 |
+
default="text",
|
192 |
+
metadata={
|
193 |
+
"help": "The name of the dataset column containing the text data. Defaults to 'text'"
|
194 |
+
},
|
195 |
+
)
|
196 |
+
overwrite_cache: bool = field(
|
197 |
+
default=False,
|
198 |
+
metadata={"help": "Overwrite the cached preprocessed datasets or not."},
|
199 |
+
)
|
200 |
+
preprocessing_num_workers: Optional[int] = field(
|
201 |
+
default=None,
|
202 |
+
metadata={"help": "The number of processes to use for the preprocessing."},
|
203 |
+
)
|
204 |
+
max_train_samples: Optional[int] = field(
|
205 |
+
default=None,
|
206 |
+
metadata={
|
207 |
+
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
208 |
+
"value if set."
|
209 |
+
},
|
210 |
+
)
|
211 |
+
max_eval_samples: Optional[int] = field(
|
212 |
+
default=None,
|
213 |
+
metadata={
|
214 |
+
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
|
215 |
+
"value if set."
|
216 |
+
},
|
217 |
+
)
|
218 |
+
chars_to_ignore: Optional[List[str]] = list_field(
|
219 |
+
default=None,
|
220 |
+
metadata={"help": "A list of characters to remove from the transcripts."},
|
221 |
+
)
|
222 |
+
eval_metrics: List[str] = list_field(
|
223 |
+
default=["wer", "cer"],
|
224 |
+
metadata={
|
225 |
+
"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"
|
226 |
+
},
|
227 |
+
)
|
228 |
+
max_duration_in_seconds: float = field(
|
229 |
+
default=20.0,
|
230 |
+
metadata={
|
231 |
+
"help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
|
232 |
+
},
|
233 |
+
)
|
234 |
+
min_duration_in_seconds: float = field(
|
235 |
+
default=0.0,
|
236 |
+
metadata={
|
237 |
+
"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"
|
238 |
+
},
|
239 |
+
)
|
240 |
+
preprocessing_only: bool = field(
|
241 |
+
default=False,
|
242 |
+
metadata={
|
243 |
+
"help": "Whether to only do data preprocessing and skip training. "
|
244 |
+
"This is especially useful when data preprocessing errors out in distributed training due to timeout. "
|
245 |
+
"In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
|
246 |
+
"so that the cached datasets can consequently be loaded in distributed training"
|
247 |
+
},
|
248 |
+
)
|
249 |
+
use_auth_token: bool = field(
|
250 |
+
default=False,
|
251 |
+
metadata={
|
252 |
+
"help": "If :obj:`True`, will use the token generated when running"
|
253 |
+
":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
|
254 |
+
},
|
255 |
+
)
|
256 |
+
unk_token: str = field(
|
257 |
+
default="[UNK]", metadata={"help": "The unk token for the tokenizer"},
|
258 |
+
)
|
259 |
+
pad_token: str = field(
|
260 |
+
default="[PAD]", metadata={"help": "The padding token for the tokenizer"},
|
261 |
+
)
|
262 |
+
word_delimiter_token: str = field(
|
263 |
+
default="|", metadata={"help": "The word delimiter token for the tokenizer"},
|
264 |
+
)
|
265 |
+
phoneme_language: Optional[str] = field(
|
266 |
+
default=None,
|
267 |
+
metadata={
|
268 |
+
"help": "The target language that should be used be"
|
269 |
+
" passed to the tokenizer for tokenization. Note that"
|
270 |
+
" this is only relevant if the model classifies the"
|
271 |
+
" input audio to a sequence of phoneme sequences."
|
272 |
+
},
|
273 |
+
)
|
274 |
+
|
275 |
+
|
276 |
+
@dataclass
|
277 |
+
class DataCollatorCTCWithPadding:
|
278 |
+
"""
|
279 |
+
Data collator that will dynamically pad the inputs received.
|
280 |
+
Args:
|
281 |
+
processor (:class:`~transformers.AutoProcessor`)
|
282 |
+
The processor used for proccessing the data.
|
283 |
+
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
284 |
+
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
285 |
+
among:
|
286 |
+
* :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
287 |
+
sequence if provided).
|
288 |
+
* :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
|
289 |
+
maximum acceptable input length for the model if that argument is not provided.
|
290 |
+
* :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
|
291 |
+
different lengths).
|
292 |
+
max_length (:obj:`int`, `optional`):
|
293 |
+
Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
|
294 |
+
max_length_labels (:obj:`int`, `optional`):
|
295 |
+
Maximum length of the ``labels`` returned list and optionally padding length (see above).
|
296 |
+
pad_to_multiple_of (:obj:`int`, `optional`):
|
297 |
+
If set will pad the sequence to a multiple of the provided value.
|
298 |
+
This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
|
299 |
+
7.5 (Volta).
|
300 |
+
"""
|
301 |
+
|
302 |
+
processor: AutoProcessor
|
303 |
+
padding: Union[bool, str] = "longest"
|
304 |
+
pad_to_multiple_of: Optional[int] = None
|
305 |
+
pad_to_multiple_of_labels: Optional[int] = None
|
306 |
+
|
307 |
+
def __call__(
|
308 |
+
self, features: List[Dict[str, Union[List[int], torch.Tensor]]]
|
309 |
+
) -> Dict[str, torch.Tensor]:
|
310 |
+
# split inputs and labels since they have to be of different lenghts and need
|
311 |
+
# different padding methods
|
312 |
+
input_features = [
|
313 |
+
{"input_values": feature["input_values"]} for feature in features
|
314 |
+
]
|
315 |
+
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
316 |
+
|
317 |
+
batch = self.processor.pad(
|
318 |
+
input_features,
|
319 |
+
padding=self.padding,
|
320 |
+
pad_to_multiple_of=self.pad_to_multiple_of,
|
321 |
+
return_tensors="pt",
|
322 |
+
)
|
323 |
+
|
324 |
+
with self.processor.as_target_processor():
|
325 |
+
labels_batch = self.processor.pad(
|
326 |
+
label_features,
|
327 |
+
padding=self.padding,
|
328 |
+
pad_to_multiple_of=self.pad_to_multiple_of_labels,
|
329 |
+
return_tensors="pt",
|
330 |
+
)
|
331 |
+
|
332 |
+
# replace padding with -100 to ignore loss correctly
|
333 |
+
labels = labels_batch["input_ids"].masked_fill(
|
334 |
+
labels_batch.attention_mask.ne(1), -100
|
335 |
+
)
|
336 |
+
|
337 |
+
batch["labels"] = labels
|
338 |
+
|
339 |
+
return batch
|
340 |
+
|
341 |
+
|
342 |
+
def create_vocabulary_from_data(
|
343 |
+
datasets: DatasetDict,
|
344 |
+
word_delimiter_token: Optional[str] = None,
|
345 |
+
unk_token: Optional[str] = None,
|
346 |
+
pad_token: Optional[str] = None,
|
347 |
+
):
|
348 |
+
# Given training and test labels create vocabulary
|
349 |
+
def extract_all_chars(batch):
|
350 |
+
all_text = " ".join(batch["target_text"])
|
351 |
+
vocab = list(set(all_text))
|
352 |
+
return {"vocab": [vocab], "all_text": [all_text]}
|
353 |
+
|
354 |
+
vocabs = datasets.map(
|
355 |
+
extract_all_chars,
|
356 |
+
batched=True,
|
357 |
+
batch_size=-1,
|
358 |
+
keep_in_memory=True,
|
359 |
+
remove_columns=datasets["train"].column_names,
|
360 |
+
)
|
361 |
+
|
362 |
+
# take union of all unique characters in each dataset
|
363 |
+
vocab_set = functools.reduce(
|
364 |
+
lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]),
|
365 |
+
vocabs.values(),
|
366 |
+
)
|
367 |
+
|
368 |
+
vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
|
369 |
+
|
370 |
+
# replace white space with delimiter token
|
371 |
+
if word_delimiter_token is not None:
|
372 |
+
vocab_dict[word_delimiter_token] = vocab_dict[" "]
|
373 |
+
del vocab_dict[" "]
|
374 |
+
|
375 |
+
# add unk and pad token
|
376 |
+
if unk_token is not None:
|
377 |
+
vocab_dict[unk_token] = len(vocab_dict)
|
378 |
+
|
379 |
+
if pad_token is not None:
|
380 |
+
vocab_dict[pad_token] = len(vocab_dict)
|
381 |
+
|
382 |
+
return vocab_dict
|
383 |
+
|
384 |
+
|
385 |
+
def main():
|
386 |
+
# See all possible arguments in src/transformers/training_args.py
|
387 |
+
# or by passing the --help flag to this script.
|
388 |
+
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
389 |
+
|
390 |
+
parser = HfArgumentParser(
|
391 |
+
(ModelArguments, DataTrainingArguments, TrainingArguments)
|
392 |
+
)
|
393 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
394 |
+
# If we pass only one argument to the script and it's the path to a json file,
|
395 |
+
# let's parse it to get our arguments.
|
396 |
+
model_args, data_args, training_args = parser.parse_json_file(
|
397 |
+
json_file=os.path.abspath(sys.argv[1])
|
398 |
+
)
|
399 |
+
else:
|
400 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
401 |
+
|
402 |
+
# Detecting last checkpoint.
|
403 |
+
last_checkpoint = None
|
404 |
+
if (
|
405 |
+
os.path.isdir(training_args.output_dir)
|
406 |
+
and training_args.do_train
|
407 |
+
and not training_args.overwrite_output_dir
|
408 |
+
):
|
409 |
+
last_checkpoint = get_last_checkpoint(training_args.output_dir)
|
410 |
+
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
411 |
+
raise ValueError(
|
412 |
+
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
413 |
+
"Use --overwrite_output_dir to overcome."
|
414 |
+
)
|
415 |
+
elif last_checkpoint is not None:
|
416 |
+
logger.info(
|
417 |
+
f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
|
418 |
+
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
|
419 |
+
)
|
420 |
+
|
421 |
+
# Setup logging
|
422 |
+
logging.basicConfig(
|
423 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
424 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
425 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
426 |
+
)
|
427 |
+
logger.setLevel(
|
428 |
+
logging.INFO if is_main_process(training_args.local_rank) else logging.WARN
|
429 |
+
)
|
430 |
+
|
431 |
+
# Log on each process the small summary:
|
432 |
+
logger.warning(
|
433 |
+
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
434 |
+
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
435 |
+
)
|
436 |
+
# Set the verbosity to info of the Transformers logger (on main process only):
|
437 |
+
if is_main_process(training_args.local_rank):
|
438 |
+
transformers.utils.logging.set_verbosity_info()
|
439 |
+
logger.info("Training/evaluation parameters %s", training_args)
|
440 |
+
|
441 |
+
# Set seed before initializing model.
|
442 |
+
set_seed(training_args.seed)
|
443 |
+
|
444 |
+
# 1. First, let's load the dataset
|
445 |
+
raw_datasets = DatasetDict()
|
446 |
+
|
447 |
+
if training_args.do_train:
|
448 |
+
raw_datasets["train"] = load_dataset(
|
449 |
+
data_args.dataset_name,
|
450 |
+
data_args.dataset_config_name,
|
451 |
+
split=data_args.train_split_name,
|
452 |
+
use_auth_token=data_args.use_auth_token,
|
453 |
+
)
|
454 |
+
|
455 |
+
if data_args.audio_column_name not in raw_datasets["train"].column_names:
|
456 |
+
raise ValueError(
|
457 |
+
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
458 |
+
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
459 |
+
f"{', '.join(raw_datasets['train'].column_names)}."
|
460 |
+
)
|
461 |
+
|
462 |
+
if data_args.text_column_name not in raw_datasets["train"].column_names:
|
463 |
+
raise ValueError(
|
464 |
+
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
465 |
+
"Make sure to set `--text_column_name` to the correct text column - one of "
|
466 |
+
f"{', '.join(raw_datasets['train'].column_names)}."
|
467 |
+
)
|
468 |
+
|
469 |
+
if data_args.max_train_samples is not None:
|
470 |
+
raw_datasets["train"] = raw_datasets["train"].select(
|
471 |
+
range(data_args.max_train_samples)
|
472 |
+
)
|
473 |
+
|
474 |
+
if training_args.do_eval:
|
475 |
+
raw_datasets["eval"] = load_dataset(
|
476 |
+
data_args.dataset_name,
|
477 |
+
data_args.dataset_config_name,
|
478 |
+
split=data_args.eval_split_name,
|
479 |
+
use_auth_token=data_args.use_auth_token,
|
480 |
+
)
|
481 |
+
|
482 |
+
if data_args.max_eval_samples is not None:
|
483 |
+
raw_datasets["eval"] = raw_datasets["eval"].select(
|
484 |
+
range(data_args.max_eval_samples)
|
485 |
+
)
|
486 |
+
|
487 |
+
# 2. We remove some special characters from the datasets
|
488 |
+
# that make training complicated and do not help in transcribing the speech
|
489 |
+
# E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
|
490 |
+
# that could be easily picked up by the model
|
491 |
+
chars_to_ignore_regex = (
|
492 |
+
f'[{"".join(data_args.chars_to_ignore)}]'
|
493 |
+
if data_args.chars_to_ignore is not None
|
494 |
+
else None
|
495 |
+
)
|
496 |
+
text_column_name = data_args.text_column_name
|
497 |
+
|
498 |
+
def remove_special_characters(batch):
|
499 |
+
if chars_to_ignore_regex is not None:
|
500 |
+
batch["target_text"] = (
|
501 |
+
re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
|
502 |
+
)
|
503 |
+
else:
|
504 |
+
batch["target_text"] = batch[text_column_name].lower() + " "
|
505 |
+
return batch
|
506 |
+
|
507 |
+
with training_args.main_process_first(
|
508 |
+
desc="dataset map special characters removal"
|
509 |
+
):
|
510 |
+
raw_datasets = raw_datasets.map(
|
511 |
+
remove_special_characters,
|
512 |
+
remove_columns=[text_column_name],
|
513 |
+
desc="remove special characters from datasets",
|
514 |
+
)
|
515 |
+
|
516 |
+
# save special tokens for tokenizer
|
517 |
+
word_delimiter_token = data_args.word_delimiter_token
|
518 |
+
unk_token = data_args.unk_token
|
519 |
+
pad_token = data_args.pad_token
|
520 |
+
|
521 |
+
# 3. Next, let's load the config as we might need it to create
|
522 |
+
# the tokenizer
|
523 |
+
# load config
|
524 |
+
config = AutoConfig.from_pretrained(
|
525 |
+
model_args.model_name_or_path,
|
526 |
+
cache_dir=model_args.cache_dir,
|
527 |
+
use_auth_token=data_args.use_auth_token,
|
528 |
+
)
|
529 |
+
|
530 |
+
# 4. Next, if no tokenizer file is defined,
|
531 |
+
# we create the vocabulary of the model by extracting all unique characters from
|
532 |
+
# the training and evaluation datasets
|
533 |
+
# We need to make sure that only first rank saves vocabulary
|
534 |
+
# make sure all processes wait until vocab is created
|
535 |
+
tokenizer_name_or_path = model_args.tokenizer_name_or_path
|
536 |
+
tokenizer_kwargs = {}
|
537 |
+
if tokenizer_name_or_path is None:
|
538 |
+
# save vocab in training output dir
|
539 |
+
tokenizer_name_or_path = training_args.output_dir
|
540 |
+
|
541 |
+
vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json")
|
542 |
+
|
543 |
+
with training_args.main_process_first():
|
544 |
+
if training_args.overwrite_output_dir and os.path.isfile(vocab_file):
|
545 |
+
os.remove(vocab_file)
|
546 |
+
|
547 |
+
with training_args.main_process_first(desc="dataset map vocabulary creation"):
|
548 |
+
if not os.path.isfile(vocab_file):
|
549 |
+
os.makedirs(tokenizer_name_or_path, exist_ok=True)
|
550 |
+
vocab_dict = create_vocabulary_from_data(
|
551 |
+
raw_datasets,
|
552 |
+
word_delimiter_token=word_delimiter_token,
|
553 |
+
unk_token=unk_token,
|
554 |
+
pad_token=pad_token,
|
555 |
+
)
|
556 |
+
|
557 |
+
# save vocab dict to be loaded into tokenizer
|
558 |
+
with open(vocab_file, "w") as file:
|
559 |
+
json.dump(vocab_dict, file)
|
560 |
+
|
561 |
+
# if tokenizer has just been created
|
562 |
+
# it is defined by `tokenizer_class` if present in config else by `model_type`
|
563 |
+
tokenizer_kwargs = {
|
564 |
+
"config": config if config.tokenizer_class is not None else None,
|
565 |
+
"tokenizer_type": config.model_type
|
566 |
+
if config.tokenizer_class is None
|
567 |
+
else None,
|
568 |
+
"unk_token": unk_token,
|
569 |
+
"pad_token": pad_token,
|
570 |
+
"word_delimiter_token": word_delimiter_token,
|
571 |
+
}
|
572 |
+
|
573 |
+
# 5. Now we can instantiate the feature extractor, tokenizer and model
|
574 |
+
# Note for distributed training, the .from_pretrained methods guarantee that only
|
575 |
+
# one local process can concurrently download model & vocab.
|
576 |
+
|
577 |
+
# load feature_extractor and tokenizer
|
578 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
579 |
+
tokenizer_name_or_path,
|
580 |
+
use_auth_token=data_args.use_auth_token,
|
581 |
+
**tokenizer_kwargs,
|
582 |
+
)
|
583 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
584 |
+
model_args.model_name_or_path,
|
585 |
+
cache_dir=model_args.cache_dir,
|
586 |
+
use_auth_token=data_args.use_auth_token,
|
587 |
+
)
|
588 |
+
|
589 |
+
# adapt config
|
590 |
+
config.update(
|
591 |
+
{
|
592 |
+
"feat_proj_dropout": model_args.feat_proj_dropout,
|
593 |
+
"attention_dropout": model_args.attention_dropout,
|
594 |
+
"hidden_dropout": model_args.hidden_dropout,
|
595 |
+
"final_dropout": model_args.final_dropout,
|
596 |
+
"mask_time_prob": model_args.mask_time_prob,
|
597 |
+
"mask_time_length": model_args.mask_time_length,
|
598 |
+
"mask_feature_prob": model_args.mask_feature_prob,
|
599 |
+
"mask_feature_length": model_args.mask_feature_length,
|
600 |
+
"gradient_checkpointing": training_args.gradient_checkpointing,
|
601 |
+
"layerdrop": model_args.layerdrop,
|
602 |
+
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
603 |
+
"pad_token_id": tokenizer.pad_token_id,
|
604 |
+
"vocab_size": len(tokenizer),
|
605 |
+
"activation_dropout": model_args.activation_dropout,
|
606 |
+
}
|
607 |
+
)
|
608 |
+
|
609 |
+
# create model
|
610 |
+
model = AutoModelForCTC.from_pretrained(
|
611 |
+
model_args.model_name_or_path,
|
612 |
+
cache_dir=model_args.cache_dir,
|
613 |
+
config=config,
|
614 |
+
use_auth_token=data_args.use_auth_token,
|
615 |
+
)
|
616 |
+
|
617 |
+
# freeze encoder
|
618 |
+
if model_args.freeze_feature_encoder:
|
619 |
+
model.freeze_feature_encoder()
|
620 |
+
|
621 |
+
# 6. Now we preprocess the datasets including loading the audio, resampling and normalization
|
622 |
+
# Thankfully, `datasets` takes care of automatically loading and resampling the audio,
|
623 |
+
# so that we just need to set the correct target sampling rate and normalize the input
|
624 |
+
# via the `feature_extractor`
|
625 |
+
|
626 |
+
# make sure that dataset decodes audio with correct sampling rate
|
627 |
+
dataset_sampling_rate = (
|
628 |
+
next(iter(raw_datasets.values()))
|
629 |
+
.features[data_args.audio_column_name]
|
630 |
+
.sampling_rate
|
631 |
+
)
|
632 |
+
if dataset_sampling_rate != feature_extractor.sampling_rate:
|
633 |
+
raw_datasets = raw_datasets.cast_column(
|
634 |
+
data_args.audio_column_name,
|
635 |
+
datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate),
|
636 |
+
)
|
637 |
+
|
638 |
+
# derive max & min input length for sample rate & max duration
|
639 |
+
max_input_length = (
|
640 |
+
data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
641 |
+
)
|
642 |
+
min_input_length = (
|
643 |
+
data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
644 |
+
)
|
645 |
+
audio_column_name = data_args.audio_column_name
|
646 |
+
num_workers = data_args.preprocessing_num_workers
|
647 |
+
|
648 |
+
# `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification
|
649 |
+
phoneme_language = data_args.phoneme_language
|
650 |
+
|
651 |
+
# Preprocessing the datasets.
|
652 |
+
# We need to read the audio files as arrays and tokenize the targets.
|
653 |
+
def prepare_dataset(batch):
|
654 |
+
# load audio
|
655 |
+
sample = batch[audio_column_name]
|
656 |
+
|
657 |
+
inputs = feature_extractor(
|
658 |
+
sample["array"], sampling_rate=sample["sampling_rate"]
|
659 |
+
)
|
660 |
+
batch["input_values"] = inputs.input_values[0]
|
661 |
+
batch["input_length"] = len(batch["input_values"])
|
662 |
+
|
663 |
+
# encode targets
|
664 |
+
additional_kwargs = {}
|
665 |
+
if phoneme_language is not None:
|
666 |
+
additional_kwargs["phonemizer_lang"] = phoneme_language
|
667 |
+
|
668 |
+
batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids
|
669 |
+
return batch
|
670 |
+
|
671 |
+
with training_args.main_process_first(desc="dataset map preprocessing"):
|
672 |
+
vectorized_datasets = raw_datasets.map(
|
673 |
+
prepare_dataset,
|
674 |
+
remove_columns=next(iter(raw_datasets.values())).column_names,
|
675 |
+
num_proc=num_workers,
|
676 |
+
desc="preprocess datasets",
|
677 |
+
)
|
678 |
+
|
679 |
+
def is_audio_in_length_range(length):
|
680 |
+
return length > min_input_length and length < max_input_length
|
681 |
+
|
682 |
+
# filter data that is shorter than min_input_length
|
683 |
+
vectorized_datasets = vectorized_datasets.filter(
|
684 |
+
is_audio_in_length_range,
|
685 |
+
num_proc=num_workers,
|
686 |
+
input_columns=["input_length"],
|
687 |
+
)
|
688 |
+
|
689 |
+
# 7. Next, we can prepare the training.
|
690 |
+
# Let's use word error rate (WER) as our evaluation metric,
|
691 |
+
# instantiate a data collator and the trainer
|
692 |
+
|
693 |
+
# Define evaluation metrics during training, *i.e.* word error rate, character error rate
|
694 |
+
eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
|
695 |
+
|
696 |
+
# for large datasets it is advised to run the preprocessing on a
|
697 |
+
# single machine first with ``args.preprocessing_only`` since there will mostly likely
|
698 |
+
# be a timeout when running the script in distributed mode.
|
699 |
+
# In a second step ``args.preprocessing_only`` can then be set to `False` to load the
|
700 |
+
# cached dataset
|
701 |
+
if data_args.preprocessing_only:
|
702 |
+
logger.info(
|
703 |
+
f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}"
|
704 |
+
)
|
705 |
+
return
|
706 |
+
|
707 |
+
def compute_metrics(pred):
|
708 |
+
pred_logits = pred.predictions
|
709 |
+
pred_ids = np.argmax(pred_logits, axis=-1)
|
710 |
+
|
711 |
+
pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
|
712 |
+
|
713 |
+
pred_str = tokenizer.batch_decode(pred_ids)
|
714 |
+
# we do not want to group tokens when computing the metrics
|
715 |
+
label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
|
716 |
+
|
717 |
+
metrics = {
|
718 |
+
k: v.compute(predictions=pred_str, references=label_str)
|
719 |
+
for k, v in eval_metrics.items()
|
720 |
+
}
|
721 |
+
|
722 |
+
return metrics
|
723 |
+
|
724 |
+
# Now save everything to be able to create a single processor later
|
725 |
+
if is_main_process(training_args.local_rank):
|
726 |
+
# save feature extractor, tokenizer and config
|
727 |
+
feature_extractor.save_pretrained(training_args.output_dir)
|
728 |
+
tokenizer.save_pretrained(training_args.output_dir)
|
729 |
+
config.save_pretrained(training_args.output_dir)
|
730 |
+
|
731 |
+
try:
|
732 |
+
processor = AutoProcessor.from_pretrained(training_args.output_dir)
|
733 |
+
except (OSError, KeyError):
|
734 |
+
warnings.warn(
|
735 |
+
"Loading a processor from a feature extractor config that does not"
|
736 |
+
" include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
|
737 |
+
" attribute to your `preprocessor_config.json` file to suppress this warning: "
|
738 |
+
" `'processor_class': 'Wav2Vec2Processor'`",
|
739 |
+
FutureWarning,
|
740 |
+
)
|
741 |
+
processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
|
742 |
+
|
743 |
+
# Instantiate custom data collator
|
744 |
+
data_collator = DataCollatorCTCWithPadding(processor=processor)
|
745 |
+
|
746 |
+
# Initialize Trainer
|
747 |
+
trainer = Trainer(
|
748 |
+
model=model,
|
749 |
+
data_collator=data_collator,
|
750 |
+
args=training_args,
|
751 |
+
compute_metrics=compute_metrics,
|
752 |
+
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
753 |
+
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
754 |
+
tokenizer=feature_extractor,
|
755 |
+
)
|
756 |
+
|
757 |
+
# 8. Finally, we can start training
|
758 |
+
|
759 |
+
# Training
|
760 |
+
if training_args.do_train:
|
761 |
+
|
762 |
+
# use last checkpoint if exist
|
763 |
+
if last_checkpoint is not None:
|
764 |
+
checkpoint = last_checkpoint
|
765 |
+
elif os.path.isdir(model_args.model_name_or_path):
|
766 |
+
checkpoint = model_args.model_name_or_path
|
767 |
+
else:
|
768 |
+
checkpoint = None
|
769 |
+
|
770 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
771 |
+
trainer.save_model()
|
772 |
+
|
773 |
+
metrics = train_result.metrics
|
774 |
+
max_train_samples = (
|
775 |
+
data_args.max_train_samples
|
776 |
+
if data_args.max_train_samples is not None
|
777 |
+
else len(vectorized_datasets["train"])
|
778 |
+
)
|
779 |
+
metrics["train_samples"] = min(
|
780 |
+
max_train_samples, len(vectorized_datasets["train"])
|
781 |
+
)
|
782 |
+
|
783 |
+
trainer.log_metrics("train", metrics)
|
784 |
+
trainer.save_metrics("train", metrics)
|
785 |
+
trainer.save_state()
|
786 |
+
|
787 |
+
# Evaluation
|
788 |
+
results = {}
|
789 |
+
if training_args.do_eval:
|
790 |
+
logger.info("*** Evaluate ***")
|
791 |
+
metrics = trainer.evaluate()
|
792 |
+
max_eval_samples = (
|
793 |
+
data_args.max_eval_samples
|
794 |
+
if data_args.max_eval_samples is not None
|
795 |
+
else len(vectorized_datasets["eval"])
|
796 |
+
)
|
797 |
+
metrics["eval_samples"] = min(
|
798 |
+
max_eval_samples, len(vectorized_datasets["eval"])
|
799 |
+
)
|
800 |
+
|
801 |
+
trainer.log_metrics("eval", metrics)
|
802 |
+
trainer.save_metrics("eval", metrics)
|
803 |
+
|
804 |
+
# Write model card and (optionally) push to hub
|
805 |
+
config_name = (
|
806 |
+
data_args.dataset_config_name
|
807 |
+
if data_args.dataset_config_name is not None
|
808 |
+
else "na"
|
809 |
+
)
|
810 |
+
kwargs = {
|
811 |
+
"finetuned_from": model_args.model_name_or_path,
|
812 |
+
"tasks": "speech-recognition",
|
813 |
+
"tags": ["automatic-speech-recognition", data_args.dataset_name],
|
814 |
+
"dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
|
815 |
+
"dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
|
816 |
+
}
|
817 |
+
if "common_voice" in data_args.dataset_name:
|
818 |
+
kwargs["language"] = config_name
|
819 |
+
|
820 |
+
if training_args.push_to_hub:
|
821 |
+
trainer.push_to_hub(**kwargs)
|
822 |
+
else:
|
823 |
+
trainer.create_model_card(**kwargs)
|
824 |
+
|
825 |
+
return results
|
826 |
+
|
827 |
+
|
828 |
+
if __name__ == "__main__":
|
829 |
+
main()
|
runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/1644165171.7227242/events.out.tfevents.1644165171.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c98151adf5ee89a0d86283e74254aa6f1e4356e6d7ca722e0529d9870e9c55e6
|
3 |
+
size 4564
|
runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/events.out.tfevents.1644165171.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80dd0250f4078a862419fb4878d968d5e50c614fe277c6d63fe4bd8d0391b9ed
|
3 |
+
size 91538
|
runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/events.out.tfevents.1644238077.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5581c99d14fb2e9928d90d56c652e2392ac3325e216b4d8b4fbe6e56078e5682
|
3 |
+
size 412
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
speech-recognition-community-v2_dev_data_zh-HK_validation_eval_results.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
WER: 1.0
|
2 |
+
CER: 0.5659636239433011
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 100.0,
|
3 |
+
"train_loss": 3.7852419735087786,
|
4 |
+
"train_runtime": 72640.1075,
|
5 |
+
"train_samples": 11949,
|
6 |
+
"train_samples_per_second": 16.45,
|
7 |
+
"train_steps_per_second": 0.513
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,3003 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 99.99866131191432,
|
5 |
+
"global_step": 37300,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.27,
|
12 |
+
"learning_rate": 4.85e-06,
|
13 |
+
"loss": 153.5094,
|
14 |
+
"step": 100
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.54,
|
18 |
+
"learning_rate": 9.85e-06,
|
19 |
+
"loss": 108.8648,
|
20 |
+
"step": 200
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.8,
|
24 |
+
"learning_rate": 1.48e-05,
|
25 |
+
"loss": 92.5714,
|
26 |
+
"step": 300
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 1.07,
|
30 |
+
"learning_rate": 1.9800000000000004e-05,
|
31 |
+
"loss": 79.9356,
|
32 |
+
"step": 400
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 1.34,
|
36 |
+
"learning_rate": 2.48e-05,
|
37 |
+
"loss": 69.8341,
|
38 |
+
"step": 500
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 1.34,
|
42 |
+
"eval_cer": 1.0,
|
43 |
+
"eval_loss": 80.07215118408203,
|
44 |
+
"eval_runtime": 130.8213,
|
45 |
+
"eval_samples_per_second": 17.597,
|
46 |
+
"eval_steps_per_second": 2.201,
|
47 |
+
"eval_wer": 1.0,
|
48 |
+
"step": 500
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"epoch": 1.61,
|
52 |
+
"learning_rate": 2.98e-05,
|
53 |
+
"loss": 54.2478,
|
54 |
+
"step": 600
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 1.88,
|
58 |
+
"learning_rate": 3.48e-05,
|
59 |
+
"loss": 35.5793,
|
60 |
+
"step": 700
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"epoch": 2.14,
|
64 |
+
"learning_rate": 3.9800000000000005e-05,
|
65 |
+
"loss": 17.7978,
|
66 |
+
"step": 800
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 2.41,
|
70 |
+
"learning_rate": 4.4800000000000005e-05,
|
71 |
+
"loss": 8.1204,
|
72 |
+
"step": 900
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 2.68,
|
76 |
+
"learning_rate": 4.9800000000000004e-05,
|
77 |
+
"loss": 6.6418,
|
78 |
+
"step": 1000
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 2.68,
|
82 |
+
"eval_cer": 1.0,
|
83 |
+
"eval_loss": 6.634645938873291,
|
84 |
+
"eval_runtime": 117.0489,
|
85 |
+
"eval_samples_per_second": 19.667,
|
86 |
+
"eval_steps_per_second": 2.461,
|
87 |
+
"eval_wer": 1.0,
|
88 |
+
"step": 1000
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 2.95,
|
92 |
+
"learning_rate": 5.4800000000000004e-05,
|
93 |
+
"loss": 6.3633,
|
94 |
+
"step": 1100
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"epoch": 3.22,
|
98 |
+
"learning_rate": 5.9800000000000003e-05,
|
99 |
+
"loss": 6.364,
|
100 |
+
"step": 1200
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 3.48,
|
104 |
+
"learning_rate": 6.48e-05,
|
105 |
+
"loss": 6.2461,
|
106 |
+
"step": 1300
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"epoch": 3.75,
|
110 |
+
"learning_rate": 6.98e-05,
|
111 |
+
"loss": 6.2242,
|
112 |
+
"step": 1400
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 4.02,
|
116 |
+
"learning_rate": 7.48e-05,
|
117 |
+
"loss": 6.2419,
|
118 |
+
"step": 1500
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"epoch": 4.02,
|
122 |
+
"eval_cer": 1.0,
|
123 |
+
"eval_loss": 6.290937423706055,
|
124 |
+
"eval_runtime": 117.0781,
|
125 |
+
"eval_samples_per_second": 19.662,
|
126 |
+
"eval_steps_per_second": 2.46,
|
127 |
+
"eval_wer": 1.0,
|
128 |
+
"step": 1500
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 4.29,
|
132 |
+
"learning_rate": 7.98e-05,
|
133 |
+
"loss": 6.1691,
|
134 |
+
"step": 1600
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 4.56,
|
138 |
+
"learning_rate": 8.48e-05,
|
139 |
+
"loss": 6.1668,
|
140 |
+
"step": 1700
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 4.82,
|
144 |
+
"learning_rate": 8.98e-05,
|
145 |
+
"loss": 6.1623,
|
146 |
+
"step": 1800
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 5.09,
|
150 |
+
"learning_rate": 9.48e-05,
|
151 |
+
"loss": 6.1884,
|
152 |
+
"step": 1900
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 5.36,
|
156 |
+
"learning_rate": 9.98e-05,
|
157 |
+
"loss": 6.0813,
|
158 |
+
"step": 2000
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 5.36,
|
162 |
+
"eval_cer": 1.0,
|
163 |
+
"eval_loss": 6.115033149719238,
|
164 |
+
"eval_runtime": 117.9948,
|
165 |
+
"eval_samples_per_second": 19.509,
|
166 |
+
"eval_steps_per_second": 2.441,
|
167 |
+
"eval_wer": 1.0,
|
168 |
+
"step": 2000
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 5.63,
|
172 |
+
"learning_rate": 9.972804532577904e-05,
|
173 |
+
"loss": 6.1027,
|
174 |
+
"step": 2100
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 5.9,
|
178 |
+
"learning_rate": 9.944475920679887e-05,
|
179 |
+
"loss": 6.0586,
|
180 |
+
"step": 2200
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 6.17,
|
184 |
+
"learning_rate": 9.91614730878187e-05,
|
185 |
+
"loss": 6.0399,
|
186 |
+
"step": 2300
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"epoch": 6.43,
|
190 |
+
"learning_rate": 9.887818696883852e-05,
|
191 |
+
"loss": 6.0035,
|
192 |
+
"step": 2400
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 6.7,
|
196 |
+
"learning_rate": 9.859490084985836e-05,
|
197 |
+
"loss": 5.9677,
|
198 |
+
"step": 2500
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 6.7,
|
202 |
+
"eval_cer": 1.002818566168114,
|
203 |
+
"eval_loss": 6.030123233795166,
|
204 |
+
"eval_runtime": 116.8078,
|
205 |
+
"eval_samples_per_second": 19.708,
|
206 |
+
"eval_steps_per_second": 2.466,
|
207 |
+
"eval_wer": 1.1385881333910783,
|
208 |
+
"step": 2500
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 6.97,
|
212 |
+
"learning_rate": 9.831161473087818e-05,
|
213 |
+
"loss": 5.9617,
|
214 |
+
"step": 2600
|
215 |
+
},
|
216 |
+
{
|
217 |
+
"epoch": 7.24,
|
218 |
+
"learning_rate": 9.802832861189802e-05,
|
219 |
+
"loss": 5.9736,
|
220 |
+
"step": 2700
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"epoch": 7.51,
|
224 |
+
"learning_rate": 9.774504249291784e-05,
|
225 |
+
"loss": 5.9098,
|
226 |
+
"step": 2800
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 7.77,
|
230 |
+
"learning_rate": 9.746175637393768e-05,
|
231 |
+
"loss": 5.9069,
|
232 |
+
"step": 2900
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"epoch": 8.04,
|
236 |
+
"learning_rate": 9.717847025495752e-05,
|
237 |
+
"loss": 5.9296,
|
238 |
+
"step": 3000
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"epoch": 8.04,
|
242 |
+
"eval_cer": 1.0057735145701687,
|
243 |
+
"eval_loss": 5.897457599639893,
|
244 |
+
"eval_runtime": 116.427,
|
245 |
+
"eval_samples_per_second": 19.772,
|
246 |
+
"eval_steps_per_second": 2.474,
|
247 |
+
"eval_wer": 1.2113469034213946,
|
248 |
+
"step": 3000
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"epoch": 8.31,
|
252 |
+
"learning_rate": 9.689518413597734e-05,
|
253 |
+
"loss": 5.8213,
|
254 |
+
"step": 3100
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 8.58,
|
258 |
+
"learning_rate": 9.661189801699718e-05,
|
259 |
+
"loss": 5.8241,
|
260 |
+
"step": 3200
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 8.85,
|
264 |
+
"learning_rate": 9.632861189801701e-05,
|
265 |
+
"loss": 5.7787,
|
266 |
+
"step": 3300
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"epoch": 9.12,
|
270 |
+
"learning_rate": 9.604532577903684e-05,
|
271 |
+
"loss": 5.7529,
|
272 |
+
"step": 3400
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"epoch": 9.38,
|
276 |
+
"learning_rate": 9.576203966005666e-05,
|
277 |
+
"loss": 5.6434,
|
278 |
+
"step": 3500
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 9.38,
|
282 |
+
"eval_cer": 1.017093239987271,
|
283 |
+
"eval_loss": 5.540365219116211,
|
284 |
+
"eval_runtime": 116.7079,
|
285 |
+
"eval_samples_per_second": 19.724,
|
286 |
+
"eval_steps_per_second": 2.468,
|
287 |
+
"eval_wer": 2.16240796881767,
|
288 |
+
"step": 3500
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"epoch": 9.65,
|
292 |
+
"learning_rate": 9.54787535410765e-05,
|
293 |
+
"loss": 5.6259,
|
294 |
+
"step": 3600
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 9.92,
|
298 |
+
"learning_rate": 9.519546742209632e-05,
|
299 |
+
"loss": 5.5488,
|
300 |
+
"step": 3700
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"epoch": 10.19,
|
304 |
+
"learning_rate": 9.491218130311616e-05,
|
305 |
+
"loss": 5.5068,
|
306 |
+
"step": 3800
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"epoch": 10.46,
|
310 |
+
"learning_rate": 9.462889518413598e-05,
|
311 |
+
"loss": 5.3439,
|
312 |
+
"step": 3900
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"epoch": 10.72,
|
316 |
+
"learning_rate": 9.434560906515582e-05,
|
317 |
+
"loss": 5.1974,
|
318 |
+
"step": 4000
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"epoch": 10.72,
|
322 |
+
"eval_cer": 0.9365822612174387,
|
323 |
+
"eval_loss": 4.543997287750244,
|
324 |
+
"eval_runtime": 115.8257,
|
325 |
+
"eval_samples_per_second": 19.875,
|
326 |
+
"eval_steps_per_second": 2.486,
|
327 |
+
"eval_wer": 2.170203551320918,
|
328 |
+
"step": 4000
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"epoch": 10.99,
|
332 |
+
"learning_rate": 9.406232294617564e-05,
|
333 |
+
"loss": 4.8692,
|
334 |
+
"step": 4100
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"epoch": 11.26,
|
338 |
+
"learning_rate": 9.377903682719548e-05,
|
339 |
+
"loss": 4.7155,
|
340 |
+
"step": 4200
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"epoch": 11.53,
|
344 |
+
"learning_rate": 9.34957507082153e-05,
|
345 |
+
"loss": 4.4978,
|
346 |
+
"step": 4300
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"epoch": 11.8,
|
350 |
+
"learning_rate": 9.321246458923513e-05,
|
351 |
+
"loss": 4.4105,
|
352 |
+
"step": 4400
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 12.06,
|
356 |
+
"learning_rate": 9.292917847025496e-05,
|
357 |
+
"loss": 4.3601,
|
358 |
+
"step": 4500
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"epoch": 12.06,
|
362 |
+
"eval_cer": 0.8998045187980179,
|
363 |
+
"eval_loss": 3.383898973464966,
|
364 |
+
"eval_runtime": 117.0791,
|
365 |
+
"eval_samples_per_second": 19.662,
|
366 |
+
"eval_steps_per_second": 2.46,
|
367 |
+
"eval_wer": 2.246427024686011,
|
368 |
+
"step": 4500
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"epoch": 12.33,
|
372 |
+
"learning_rate": 9.264589235127479e-05,
|
373 |
+
"loss": 4.1745,
|
374 |
+
"step": 4600
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"epoch": 12.6,
|
378 |
+
"learning_rate": 9.236260623229462e-05,
|
379 |
+
"loss": 4.1194,
|
380 |
+
"step": 4700
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 12.87,
|
384 |
+
"learning_rate": 9.207932011331445e-05,
|
385 |
+
"loss": 4.0809,
|
386 |
+
"step": 4800
|
387 |
+
},
|
388 |
+
{
|
389 |
+
"epoch": 13.14,
|
390 |
+
"learning_rate": 9.179603399433428e-05,
|
391 |
+
"loss": 3.9556,
|
392 |
+
"step": 4900
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"epoch": 13.4,
|
396 |
+
"learning_rate": 9.151558073654392e-05,
|
397 |
+
"loss": 3.9321,
|
398 |
+
"step": 5000
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"epoch": 13.4,
|
402 |
+
"eval_cer": 0.8400236395872165,
|
403 |
+
"eval_loss": 2.8784573078155518,
|
404 |
+
"eval_runtime": 117.9053,
|
405 |
+
"eval_samples_per_second": 19.524,
|
406 |
+
"eval_steps_per_second": 2.443,
|
407 |
+
"eval_wer": 2.3096578605456908,
|
408 |
+
"step": 5000
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 13.67,
|
412 |
+
"learning_rate": 9.123229461756374e-05,
|
413 |
+
"loss": 3.8826,
|
414 |
+
"step": 5100
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 13.94,
|
418 |
+
"learning_rate": 9.094900849858358e-05,
|
419 |
+
"loss": 3.7975,
|
420 |
+
"step": 5200
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"epoch": 14.21,
|
424 |
+
"learning_rate": 9.06657223796034e-05,
|
425 |
+
"loss": 3.7704,
|
426 |
+
"step": 5300
|
427 |
+
},
|
428 |
+
{
|
429 |
+
"epoch": 14.48,
|
430 |
+
"learning_rate": 9.038243626062324e-05,
|
431 |
+
"loss": 3.6848,
|
432 |
+
"step": 5400
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"epoch": 14.74,
|
436 |
+
"learning_rate": 9.009915014164306e-05,
|
437 |
+
"loss": 3.6462,
|
438 |
+
"step": 5500
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"epoch": 14.74,
|
442 |
+
"eval_cer": 0.6663181342910397,
|
443 |
+
"eval_loss": 2.510770797729492,
|
444 |
+
"eval_runtime": 116.5338,
|
445 |
+
"eval_samples_per_second": 19.754,
|
446 |
+
"eval_steps_per_second": 2.471,
|
447 |
+
"eval_wer": 1.9623213512343005,
|
448 |
+
"step": 5500
|
449 |
+
},
|
450 |
+
{
|
451 |
+
"epoch": 15.01,
|
452 |
+
"learning_rate": 8.98158640226629e-05,
|
453 |
+
"loss": 3.584,
|
454 |
+
"step": 5600
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"epoch": 15.28,
|
458 |
+
"learning_rate": 8.953257790368272e-05,
|
459 |
+
"loss": 3.531,
|
460 |
+
"step": 5700
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"epoch": 15.55,
|
464 |
+
"learning_rate": 8.925212464589235e-05,
|
465 |
+
"loss": 3.5509,
|
466 |
+
"step": 5800
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 15.82,
|
470 |
+
"learning_rate": 8.896883852691219e-05,
|
471 |
+
"loss": 3.524,
|
472 |
+
"step": 5900
|
473 |
+
},
|
474 |
+
{
|
475 |
+
"epoch": 16.09,
|
476 |
+
"learning_rate": 8.868555240793201e-05,
|
477 |
+
"loss": 3.5156,
|
478 |
+
"step": 6000
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 16.09,
|
482 |
+
"eval_cer": 0.5705778060644633,
|
483 |
+
"eval_loss": 2.2789571285247803,
|
484 |
+
"eval_runtime": 115.731,
|
485 |
+
"eval_samples_per_second": 19.891,
|
486 |
+
"eval_steps_per_second": 2.489,
|
487 |
+
"eval_wer": 1.6478995236032914,
|
488 |
+
"step": 6000
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"epoch": 16.35,
|
492 |
+
"learning_rate": 8.840226628895184e-05,
|
493 |
+
"loss": 3.4294,
|
494 |
+
"step": 6100
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"epoch": 16.62,
|
498 |
+
"learning_rate": 8.811898016997168e-05,
|
499 |
+
"loss": 3.4156,
|
500 |
+
"step": 6200
|
501 |
+
},
|
502 |
+
{
|
503 |
+
"epoch": 16.89,
|
504 |
+
"learning_rate": 8.78356940509915e-05,
|
505 |
+
"loss": 3.3933,
|
506 |
+
"step": 6300
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 17.16,
|
510 |
+
"learning_rate": 8.755240793201134e-05,
|
511 |
+
"loss": 3.3293,
|
512 |
+
"step": 6400
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"epoch": 17.43,
|
516 |
+
"learning_rate": 8.726912181303116e-05,
|
517 |
+
"loss": 3.32,
|
518 |
+
"step": 6500
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"epoch": 17.43,
|
522 |
+
"eval_cer": 0.6244033277265082,
|
523 |
+
"eval_loss": 2.1449646949768066,
|
524 |
+
"eval_runtime": 119.2852,
|
525 |
+
"eval_samples_per_second": 19.298,
|
526 |
+
"eval_steps_per_second": 2.414,
|
527 |
+
"eval_wer": 1.833694239930706,
|
528 |
+
"step": 6500
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"epoch": 17.69,
|
532 |
+
"learning_rate": 8.6985835694051e-05,
|
533 |
+
"loss": 3.3019,
|
534 |
+
"step": 6600
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 17.96,
|
538 |
+
"learning_rate": 8.670538243626063e-05,
|
539 |
+
"loss": 3.3058,
|
540 |
+
"step": 6700
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"epoch": 18.23,
|
544 |
+
"learning_rate": 8.642209631728045e-05,
|
545 |
+
"loss": 3.289,
|
546 |
+
"step": 6800
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"epoch": 18.5,
|
550 |
+
"learning_rate": 8.613881019830029e-05,
|
551 |
+
"loss": 3.208,
|
552 |
+
"step": 6900
|
553 |
+
},
|
554 |
+
{
|
555 |
+
"epoch": 18.77,
|
556 |
+
"learning_rate": 8.585552407932011e-05,
|
557 |
+
"loss": 3.1918,
|
558 |
+
"step": 7000
|
559 |
+
},
|
560 |
+
{
|
561 |
+
"epoch": 18.77,
|
562 |
+
"eval_cer": 0.6017184161476565,
|
563 |
+
"eval_loss": 1.8536365032196045,
|
564 |
+
"eval_runtime": 116.4854,
|
565 |
+
"eval_samples_per_second": 19.762,
|
566 |
+
"eval_steps_per_second": 2.472,
|
567 |
+
"eval_wer": 1.939367691641403,
|
568 |
+
"step": 7000
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"epoch": 19.03,
|
572 |
+
"learning_rate": 8.557223796033995e-05,
|
573 |
+
"loss": 3.1877,
|
574 |
+
"step": 7100
|
575 |
+
},
|
576 |
+
{
|
577 |
+
"epoch": 19.3,
|
578 |
+
"learning_rate": 8.528895184135977e-05,
|
579 |
+
"loss": 3.0893,
|
580 |
+
"step": 7200
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"epoch": 19.57,
|
584 |
+
"learning_rate": 8.500566572237961e-05,
|
585 |
+
"loss": 3.102,
|
586 |
+
"step": 7300
|
587 |
+
},
|
588 |
+
{
|
589 |
+
"epoch": 19.84,
|
590 |
+
"learning_rate": 8.472521246458924e-05,
|
591 |
+
"loss": 3.0536,
|
592 |
+
"step": 7400
|
593 |
+
},
|
594 |
+
{
|
595 |
+
"epoch": 20.11,
|
596 |
+
"learning_rate": 8.444192634560907e-05,
|
597 |
+
"loss": 3.1139,
|
598 |
+
"step": 7500
|
599 |
+
},
|
600 |
+
{
|
601 |
+
"epoch": 20.11,
|
602 |
+
"eval_cer": 0.5638496158567078,
|
603 |
+
"eval_loss": 1.7204933166503906,
|
604 |
+
"eval_runtime": 116.1457,
|
605 |
+
"eval_samples_per_second": 19.82,
|
606 |
+
"eval_steps_per_second": 2.48,
|
607 |
+
"eval_wer": 1.9112169770463403,
|
608 |
+
"step": 7500
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 20.37,
|
612 |
+
"learning_rate": 8.41586402266289e-05,
|
613 |
+
"loss": 2.9958,
|
614 |
+
"step": 7600
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 20.64,
|
618 |
+
"learning_rate": 8.387535410764873e-05,
|
619 |
+
"loss": 3.0055,
|
620 |
+
"step": 7700
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"epoch": 20.91,
|
624 |
+
"learning_rate": 8.359206798866855e-05,
|
625 |
+
"loss": 2.9673,
|
626 |
+
"step": 7800
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 21.18,
|
630 |
+
"learning_rate": 8.330878186968839e-05,
|
631 |
+
"loss": 2.9276,
|
632 |
+
"step": 7900
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 21.45,
|
636 |
+
"learning_rate": 8.302549575070821e-05,
|
637 |
+
"loss": 2.8995,
|
638 |
+
"step": 8000
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"epoch": 21.45,
|
642 |
+
"eval_cer": 0.3250443242260308,
|
643 |
+
"eval_loss": 1.5478395223617554,
|
644 |
+
"eval_runtime": 120.8897,
|
645 |
+
"eval_samples_per_second": 19.042,
|
646 |
+
"eval_steps_per_second": 2.382,
|
647 |
+
"eval_wer": 1.0623646600259853,
|
648 |
+
"step": 8000
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"epoch": 21.71,
|
652 |
+
"learning_rate": 8.274220963172805e-05,
|
653 |
+
"loss": 2.8602,
|
654 |
+
"step": 8100
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 21.98,
|
658 |
+
"learning_rate": 8.245892351274787e-05,
|
659 |
+
"loss": 2.877,
|
660 |
+
"step": 8200
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 22.25,
|
664 |
+
"learning_rate": 8.217563739376771e-05,
|
665 |
+
"loss": 2.8283,
|
666 |
+
"step": 8300
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 22.52,
|
670 |
+
"learning_rate": 8.189235127478753e-05,
|
671 |
+
"loss": 2.7887,
|
672 |
+
"step": 8400
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"epoch": 22.79,
|
676 |
+
"learning_rate": 8.160906515580737e-05,
|
677 |
+
"loss": 2.7572,
|
678 |
+
"step": 8500
|
679 |
+
},
|
680 |
+
{
|
681 |
+
"epoch": 22.79,
|
682 |
+
"eval_cer": 0.33668227485566216,
|
683 |
+
"eval_loss": 1.406813144683838,
|
684 |
+
"eval_runtime": 117.2331,
|
685 |
+
"eval_samples_per_second": 19.636,
|
686 |
+
"eval_steps_per_second": 2.457,
|
687 |
+
"eval_wer": 1.141186660892161,
|
688 |
+
"step": 8500
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"epoch": 23.06,
|
692 |
+
"learning_rate": 8.132577903682719e-05,
|
693 |
+
"loss": 2.7576,
|
694 |
+
"step": 8600
|
695 |
+
},
|
696 |
+
{
|
697 |
+
"epoch": 23.32,
|
698 |
+
"learning_rate": 8.104249291784703e-05,
|
699 |
+
"loss": 2.7336,
|
700 |
+
"step": 8700
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"epoch": 23.59,
|
704 |
+
"learning_rate": 8.075920679886687e-05,
|
705 |
+
"loss": 2.6792,
|
706 |
+
"step": 8800
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"epoch": 23.86,
|
710 |
+
"learning_rate": 8.047592067988669e-05,
|
711 |
+
"loss": 2.6983,
|
712 |
+
"step": 8900
|
713 |
+
},
|
714 |
+
{
|
715 |
+
"epoch": 24.13,
|
716 |
+
"learning_rate": 8.019263456090653e-05,
|
717 |
+
"loss": 2.6881,
|
718 |
+
"step": 9000
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"epoch": 24.13,
|
722 |
+
"eval_cer": 0.5683047688321134,
|
723 |
+
"eval_loss": 1.3311798572540283,
|
724 |
+
"eval_runtime": 116.7773,
|
725 |
+
"eval_samples_per_second": 19.713,
|
726 |
+
"eval_steps_per_second": 2.466,
|
727 |
+
"eval_wer": 2.009961022087484,
|
728 |
+
"step": 9000
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 24.4,
|
732 |
+
"learning_rate": 7.990934844192635e-05,
|
733 |
+
"loss": 2.6439,
|
734 |
+
"step": 9100
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 24.66,
|
738 |
+
"learning_rate": 7.962606232294619e-05,
|
739 |
+
"loss": 2.6563,
|
740 |
+
"step": 9200
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 24.93,
|
744 |
+
"learning_rate": 7.934277620396601e-05,
|
745 |
+
"loss": 2.6792,
|
746 |
+
"step": 9300
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"epoch": 25.2,
|
750 |
+
"learning_rate": 7.905949008498585e-05,
|
751 |
+
"loss": 2.6905,
|
752 |
+
"step": 9400
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 25.47,
|
756 |
+
"learning_rate": 7.877620396600567e-05,
|
757 |
+
"loss": 2.5993,
|
758 |
+
"step": 9500
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 25.47,
|
762 |
+
"eval_cer": 0.6450425057962449,
|
763 |
+
"eval_loss": 1.2552706003189087,
|
764 |
+
"eval_runtime": 117.844,
|
765 |
+
"eval_samples_per_second": 19.534,
|
766 |
+
"eval_steps_per_second": 2.444,
|
767 |
+
"eval_wer": 2.003897791251624,
|
768 |
+
"step": 9500
|
769 |
+
},
|
770 |
+
{
|
771 |
+
"epoch": 25.74,
|
772 |
+
"learning_rate": 7.849291784702551e-05,
|
773 |
+
"loss": 2.6243,
|
774 |
+
"step": 9600
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"epoch": 26.01,
|
778 |
+
"learning_rate": 7.820963172804533e-05,
|
779 |
+
"loss": 2.5753,
|
780 |
+
"step": 9700
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"epoch": 26.27,
|
784 |
+
"learning_rate": 7.792634560906516e-05,
|
785 |
+
"loss": 2.521,
|
786 |
+
"step": 9800
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 26.54,
|
790 |
+
"learning_rate": 7.76458923512748e-05,
|
791 |
+
"loss": 2.5546,
|
792 |
+
"step": 9900
|
793 |
+
},
|
794 |
+
{
|
795 |
+
"epoch": 26.81,
|
796 |
+
"learning_rate": 7.736260623229463e-05,
|
797 |
+
"loss": 2.5304,
|
798 |
+
"step": 10000
|
799 |
+
},
|
800 |
+
{
|
801 |
+
"epoch": 26.81,
|
802 |
+
"eval_cer": 0.5788971223348638,
|
803 |
+
"eval_loss": 1.242166519165039,
|
804 |
+
"eval_runtime": 116.3994,
|
805 |
+
"eval_samples_per_second": 19.777,
|
806 |
+
"eval_steps_per_second": 2.474,
|
807 |
+
"eval_wer": 2.039411000433088,
|
808 |
+
"step": 10000
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"epoch": 27.08,
|
812 |
+
"learning_rate": 7.707932011331445e-05,
|
813 |
+
"loss": 2.5599,
|
814 |
+
"step": 10100
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"epoch": 27.35,
|
818 |
+
"learning_rate": 7.679603399433429e-05,
|
819 |
+
"loss": 2.4878,
|
820 |
+
"step": 10200
|
821 |
+
},
|
822 |
+
{
|
823 |
+
"epoch": 27.61,
|
824 |
+
"learning_rate": 7.651274787535411e-05,
|
825 |
+
"loss": 2.4684,
|
826 |
+
"step": 10300
|
827 |
+
},
|
828 |
+
{
|
829 |
+
"epoch": 27.88,
|
830 |
+
"learning_rate": 7.622946175637395e-05,
|
831 |
+
"loss": 2.4647,
|
832 |
+
"step": 10400
|
833 |
+
},
|
834 |
+
{
|
835 |
+
"epoch": 28.15,
|
836 |
+
"learning_rate": 7.594617563739377e-05,
|
837 |
+
"loss": 2.4352,
|
838 |
+
"step": 10500
|
839 |
+
},
|
840 |
+
{
|
841 |
+
"epoch": 28.15,
|
842 |
+
"eval_cer": 0.5506659999090785,
|
843 |
+
"eval_loss": 1.1581844091415405,
|
844 |
+
"eval_runtime": 116.2358,
|
845 |
+
"eval_samples_per_second": 19.805,
|
846 |
+
"eval_steps_per_second": 2.478,
|
847 |
+
"eval_wer": 1.9969683845820703,
|
848 |
+
"step": 10500
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"epoch": 28.42,
|
852 |
+
"learning_rate": 7.56628895184136e-05,
|
853 |
+
"loss": 2.4437,
|
854 |
+
"step": 10600
|
855 |
+
},
|
856 |
+
{
|
857 |
+
"epoch": 28.69,
|
858 |
+
"learning_rate": 7.537960339943343e-05,
|
859 |
+
"loss": 2.44,
|
860 |
+
"step": 10700
|
861 |
+
},
|
862 |
+
{
|
863 |
+
"epoch": 28.95,
|
864 |
+
"learning_rate": 7.509631728045327e-05,
|
865 |
+
"loss": 2.447,
|
866 |
+
"step": 10800
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 29.22,
|
870 |
+
"learning_rate": 7.481303116147309e-05,
|
871 |
+
"loss": 2.4203,
|
872 |
+
"step": 10900
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"epoch": 29.49,
|
876 |
+
"learning_rate": 7.452974504249293e-05,
|
877 |
+
"loss": 2.3795,
|
878 |
+
"step": 11000
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 29.49,
|
882 |
+
"eval_cer": 0.4843842342137564,
|
883 |
+
"eval_loss": 1.1159536838531494,
|
884 |
+
"eval_runtime": 117.3261,
|
885 |
+
"eval_samples_per_second": 19.621,
|
886 |
+
"eval_steps_per_second": 2.455,
|
887 |
+
"eval_wer": 1.8254655695106106,
|
888 |
+
"step": 11000
|
889 |
+
},
|
890 |
+
{
|
891 |
+
"epoch": 29.76,
|
892 |
+
"learning_rate": 7.424645892351275e-05,
|
893 |
+
"loss": 2.3967,
|
894 |
+
"step": 11100
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"epoch": 30.03,
|
898 |
+
"learning_rate": 7.396317280453257e-05,
|
899 |
+
"loss": 2.3546,
|
900 |
+
"step": 11200
|
901 |
+
},
|
902 |
+
{
|
903 |
+
"epoch": 30.29,
|
904 |
+
"learning_rate": 7.367988668555241e-05,
|
905 |
+
"loss": 2.343,
|
906 |
+
"step": 11300
|
907 |
+
},
|
908 |
+
{
|
909 |
+
"epoch": 30.56,
|
910 |
+
"learning_rate": 7.339660056657224e-05,
|
911 |
+
"loss": 2.3377,
|
912 |
+
"step": 11400
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"epoch": 30.83,
|
916 |
+
"learning_rate": 7.311331444759207e-05,
|
917 |
+
"loss": 2.3287,
|
918 |
+
"step": 11500
|
919 |
+
},
|
920 |
+
{
|
921 |
+
"epoch": 30.83,
|
922 |
+
"eval_cer": 0.3780060917397827,
|
923 |
+
"eval_loss": 1.0775071382522583,
|
924 |
+
"eval_runtime": 118.2979,
|
925 |
+
"eval_samples_per_second": 19.459,
|
926 |
+
"eval_steps_per_second": 2.435,
|
927 |
+
"eval_wer": 1.4122996968384582,
|
928 |
+
"step": 11500
|
929 |
+
},
|
930 |
+
{
|
931 |
+
"epoch": 31.1,
|
932 |
+
"learning_rate": 7.28300283286119e-05,
|
933 |
+
"loss": 2.341,
|
934 |
+
"step": 11600
|
935 |
+
},
|
936 |
+
{
|
937 |
+
"epoch": 31.37,
|
938 |
+
"learning_rate": 7.254674220963173e-05,
|
939 |
+
"loss": 2.3039,
|
940 |
+
"step": 11700
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"epoch": 31.63,
|
944 |
+
"learning_rate": 7.226345609065156e-05,
|
945 |
+
"loss": 2.2769,
|
946 |
+
"step": 11800
|
947 |
+
},
|
948 |
+
{
|
949 |
+
"epoch": 31.9,
|
950 |
+
"learning_rate": 7.198300283286119e-05,
|
951 |
+
"loss": 2.323,
|
952 |
+
"step": 11900
|
953 |
+
},
|
954 |
+
{
|
955 |
+
"epoch": 32.17,
|
956 |
+
"learning_rate": 7.169971671388103e-05,
|
957 |
+
"loss": 2.2622,
|
958 |
+
"step": 12000
|
959 |
+
},
|
960 |
+
{
|
961 |
+
"epoch": 32.17,
|
962 |
+
"eval_cer": 0.48938491612492613,
|
963 |
+
"eval_loss": 1.0703905820846558,
|
964 |
+
"eval_runtime": 116.0515,
|
965 |
+
"eval_samples_per_second": 19.836,
|
966 |
+
"eval_steps_per_second": 2.482,
|
967 |
+
"eval_wer": 1.7444781290601992,
|
968 |
+
"step": 12000
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 32.44,
|
972 |
+
"learning_rate": 7.141643059490085e-05,
|
973 |
+
"loss": 2.2663,
|
974 |
+
"step": 12100
|
975 |
+
},
|
976 |
+
{
|
977 |
+
"epoch": 32.71,
|
978 |
+
"learning_rate": 7.113314447592069e-05,
|
979 |
+
"loss": 2.2797,
|
980 |
+
"step": 12200
|
981 |
+
},
|
982 |
+
{
|
983 |
+
"epoch": 32.97,
|
984 |
+
"learning_rate": 7.084985835694051e-05,
|
985 |
+
"loss": 2.264,
|
986 |
+
"step": 12300
|
987 |
+
},
|
988 |
+
{
|
989 |
+
"epoch": 33.24,
|
990 |
+
"learning_rate": 7.056657223796033e-05,
|
991 |
+
"loss": 2.2497,
|
992 |
+
"step": 12400
|
993 |
+
},
|
994 |
+
{
|
995 |
+
"epoch": 33.51,
|
996 |
+
"learning_rate": 7.028328611898017e-05,
|
997 |
+
"loss": 2.2225,
|
998 |
+
"step": 12500
|
999 |
+
},
|
1000 |
+
{
|
1001 |
+
"epoch": 33.51,
|
1002 |
+
"eval_cer": 0.5057962449424922,
|
1003 |
+
"eval_loss": 1.0272445678710938,
|
1004 |
+
"eval_runtime": 118.2938,
|
1005 |
+
"eval_samples_per_second": 19.46,
|
1006 |
+
"eval_steps_per_second": 2.435,
|
1007 |
+
"eval_wer": 1.7236899090515374,
|
1008 |
+
"step": 12500
|
1009 |
+
},
|
1010 |
+
{
|
1011 |
+
"epoch": 33.78,
|
1012 |
+
"learning_rate": 7e-05,
|
1013 |
+
"loss": 2.2025,
|
1014 |
+
"step": 12600
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"epoch": 34.05,
|
1018 |
+
"learning_rate": 6.971671388101983e-05,
|
1019 |
+
"loss": 2.1892,
|
1020 |
+
"step": 12700
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 34.32,
|
1024 |
+
"learning_rate": 6.943342776203965e-05,
|
1025 |
+
"loss": 2.1498,
|
1026 |
+
"step": 12800
|
1027 |
+
},
|
1028 |
+
{
|
1029 |
+
"epoch": 34.58,
|
1030 |
+
"learning_rate": 6.915014164305949e-05,
|
1031 |
+
"loss": 2.1819,
|
1032 |
+
"step": 12900
|
1033 |
+
},
|
1034 |
+
{
|
1035 |
+
"epoch": 34.85,
|
1036 |
+
"learning_rate": 6.886685552407931e-05,
|
1037 |
+
"loss": 2.1843,
|
1038 |
+
"step": 13000
|
1039 |
+
},
|
1040 |
+
{
|
1041 |
+
"epoch": 34.85,
|
1042 |
+
"eval_cer": 0.5028412965404373,
|
1043 |
+
"eval_loss": 0.9756352305412292,
|
1044 |
+
"eval_runtime": 117.2229,
|
1045 |
+
"eval_samples_per_second": 19.638,
|
1046 |
+
"eval_steps_per_second": 2.457,
|
1047 |
+
"eval_wer": 1.8042442615851018,
|
1048 |
+
"step": 13000
|
1049 |
+
},
|
1050 |
+
{
|
1051 |
+
"epoch": 35.12,
|
1052 |
+
"learning_rate": 6.858356940509915e-05,
|
1053 |
+
"loss": 2.1578,
|
1054 |
+
"step": 13100
|
1055 |
+
},
|
1056 |
+
{
|
1057 |
+
"epoch": 35.39,
|
1058 |
+
"learning_rate": 6.830028328611899e-05,
|
1059 |
+
"loss": 2.1083,
|
1060 |
+
"step": 13200
|
1061 |
+
},
|
1062 |
+
{
|
1063 |
+
"epoch": 35.66,
|
1064 |
+
"learning_rate": 6.801699716713881e-05,
|
1065 |
+
"loss": 2.1531,
|
1066 |
+
"step": 13300
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"epoch": 35.92,
|
1070 |
+
"learning_rate": 6.773371104815865e-05,
|
1071 |
+
"loss": 2.11,
|
1072 |
+
"step": 13400
|
1073 |
+
},
|
1074 |
+
{
|
1075 |
+
"epoch": 36.19,
|
1076 |
+
"learning_rate": 6.745042492917847e-05,
|
1077 |
+
"loss": 2.1,
|
1078 |
+
"step": 13500
|
1079 |
+
},
|
1080 |
+
{
|
1081 |
+
"epoch": 36.19,
|
1082 |
+
"eval_cer": 0.6055371186980043,
|
1083 |
+
"eval_loss": 0.9526697993278503,
|
1084 |
+
"eval_runtime": 118.3448,
|
1085 |
+
"eval_samples_per_second": 19.452,
|
1086 |
+
"eval_steps_per_second": 2.434,
|
1087 |
+
"eval_wer": 1.8908618449545258,
|
1088 |
+
"step": 13500
|
1089 |
+
},
|
1090 |
+
{
|
1091 |
+
"epoch": 36.46,
|
1092 |
+
"learning_rate": 6.716713881019831e-05,
|
1093 |
+
"loss": 2.0948,
|
1094 |
+
"step": 13600
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"epoch": 36.73,
|
1098 |
+
"learning_rate": 6.688385269121813e-05,
|
1099 |
+
"loss": 2.071,
|
1100 |
+
"step": 13700
|
1101 |
+
},
|
1102 |
+
{
|
1103 |
+
"epoch": 37.0,
|
1104 |
+
"learning_rate": 6.660056657223797e-05,
|
1105 |
+
"loss": 2.1179,
|
1106 |
+
"step": 13800
|
1107 |
+
},
|
1108 |
+
{
|
1109 |
+
"epoch": 37.27,
|
1110 |
+
"learning_rate": 6.63172804532578e-05,
|
1111 |
+
"loss": 2.0444,
|
1112 |
+
"step": 13900
|
1113 |
+
},
|
1114 |
+
{
|
1115 |
+
"epoch": 37.53,
|
1116 |
+
"learning_rate": 6.603399433427763e-05,
|
1117 |
+
"loss": 2.0741,
|
1118 |
+
"step": 14000
|
1119 |
+
},
|
1120 |
+
{
|
1121 |
+
"epoch": 37.53,
|
1122 |
+
"eval_cer": 0.5880347320089103,
|
1123 |
+
"eval_loss": 0.941799521446228,
|
1124 |
+
"eval_runtime": 117.0385,
|
1125 |
+
"eval_samples_per_second": 19.669,
|
1126 |
+
"eval_steps_per_second": 2.461,
|
1127 |
+
"eval_wer": 1.902555218709398,
|
1128 |
+
"step": 14000
|
1129 |
+
},
|
1130 |
+
{
|
1131 |
+
"epoch": 37.8,
|
1132 |
+
"learning_rate": 6.575070821529745e-05,
|
1133 |
+
"loss": 2.0937,
|
1134 |
+
"step": 14100
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"epoch": 38.07,
|
1138 |
+
"learning_rate": 6.546742209631729e-05,
|
1139 |
+
"loss": 2.0848,
|
1140 |
+
"step": 14200
|
1141 |
+
},
|
1142 |
+
{
|
1143 |
+
"epoch": 38.34,
|
1144 |
+
"learning_rate": 6.518413597733712e-05,
|
1145 |
+
"loss": 2.0235,
|
1146 |
+
"step": 14300
|
1147 |
+
},
|
1148 |
+
{
|
1149 |
+
"epoch": 38.61,
|
1150 |
+
"learning_rate": 6.490084985835695e-05,
|
1151 |
+
"loss": 2.0165,
|
1152 |
+
"step": 14400
|
1153 |
+
},
|
1154 |
+
{
|
1155 |
+
"epoch": 38.87,
|
1156 |
+
"learning_rate": 6.461756373937678e-05,
|
1157 |
+
"loss": 2.0179,
|
1158 |
+
"step": 14500
|
1159 |
+
},
|
1160 |
+
{
|
1161 |
+
"epoch": 38.87,
|
1162 |
+
"eval_cer": 0.5245715324817021,
|
1163 |
+
"eval_loss": 0.93625807762146,
|
1164 |
+
"eval_runtime": 117.33,
|
1165 |
+
"eval_samples_per_second": 19.62,
|
1166 |
+
"eval_steps_per_second": 2.455,
|
1167 |
+
"eval_wer": 1.797747942832395,
|
1168 |
+
"step": 14500
|
1169 |
+
},
|
1170 |
+
{
|
1171 |
+
"epoch": 39.14,
|
1172 |
+
"learning_rate": 6.43342776203966e-05,
|
1173 |
+
"loss": 1.9771,
|
1174 |
+
"step": 14600
|
1175 |
+
},
|
1176 |
+
{
|
1177 |
+
"epoch": 39.41,
|
1178 |
+
"learning_rate": 6.405099150141644e-05,
|
1179 |
+
"loss": 1.9721,
|
1180 |
+
"step": 14700
|
1181 |
+
},
|
1182 |
+
{
|
1183 |
+
"epoch": 39.68,
|
1184 |
+
"learning_rate": 6.376770538243626e-05,
|
1185 |
+
"loss": 2.0099,
|
1186 |
+
"step": 14800
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 39.95,
|
1190 |
+
"learning_rate": 6.34844192634561e-05,
|
1191 |
+
"loss": 2.0237,
|
1192 |
+
"step": 14900
|
1193 |
+
},
|
1194 |
+
{
|
1195 |
+
"epoch": 40.21,
|
1196 |
+
"learning_rate": 6.320113314447592e-05,
|
1197 |
+
"loss": 2.0615,
|
1198 |
+
"step": 15000
|
1199 |
+
},
|
1200 |
+
{
|
1201 |
+
"epoch": 40.21,
|
1202 |
+
"eval_cer": 0.5598945310724189,
|
1203 |
+
"eval_loss": 0.9634870886802673,
|
1204 |
+
"eval_runtime": 118.3611,
|
1205 |
+
"eval_samples_per_second": 19.449,
|
1206 |
+
"eval_steps_per_second": 2.433,
|
1207 |
+
"eval_wer": 1.8111736682546558,
|
1208 |
+
"step": 15000
|
1209 |
+
},
|
1210 |
+
{
|
1211 |
+
"epoch": 40.48,
|
1212 |
+
"learning_rate": 6.291784702549576e-05,
|
1213 |
+
"loss": 1.9647,
|
1214 |
+
"step": 15100
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"epoch": 40.75,
|
1218 |
+
"learning_rate": 6.263456090651558e-05,
|
1219 |
+
"loss": 1.9683,
|
1220 |
+
"step": 15200
|
1221 |
+
},
|
1222 |
+
{
|
1223 |
+
"epoch": 41.02,
|
1224 |
+
"learning_rate": 6.235127478753542e-05,
|
1225 |
+
"loss": 1.9311,
|
1226 |
+
"step": 15300
|
1227 |
+
},
|
1228 |
+
{
|
1229 |
+
"epoch": 41.29,
|
1230 |
+
"learning_rate": 6.206798866855524e-05,
|
1231 |
+
"loss": 1.9126,
|
1232 |
+
"step": 15400
|
1233 |
+
},
|
1234 |
+
{
|
1235 |
+
"epoch": 41.55,
|
1236 |
+
"learning_rate": 6.178470254957506e-05,
|
1237 |
+
"loss": 1.9448,
|
1238 |
+
"step": 15500
|
1239 |
+
},
|
1240 |
+
{
|
1241 |
+
"epoch": 41.55,
|
1242 |
+
"eval_cer": 0.491430649634041,
|
1243 |
+
"eval_loss": 0.9248816967010498,
|
1244 |
+
"eval_runtime": 116.7415,
|
1245 |
+
"eval_samples_per_second": 19.719,
|
1246 |
+
"eval_steps_per_second": 2.467,
|
1247 |
+
"eval_wer": 1.7249891728020788,
|
1248 |
+
"step": 15500
|
1249 |
+
},
|
1250 |
+
{
|
1251 |
+
"epoch": 41.82,
|
1252 |
+
"learning_rate": 6.15014164305949e-05,
|
1253 |
+
"loss": 1.934,
|
1254 |
+
"step": 15600
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"epoch": 42.09,
|
1258 |
+
"learning_rate": 6.121813031161473e-05,
|
1259 |
+
"loss": 2.0163,
|
1260 |
+
"step": 15700
|
1261 |
+
},
|
1262 |
+
{
|
1263 |
+
"epoch": 42.36,
|
1264 |
+
"learning_rate": 6.093484419263457e-05,
|
1265 |
+
"loss": 1.8725,
|
1266 |
+
"step": 15800
|
1267 |
+
},
|
1268 |
+
{
|
1269 |
+
"epoch": 42.63,
|
1270 |
+
"learning_rate": 6.065155807365439e-05,
|
1271 |
+
"loss": 1.9246,
|
1272 |
+
"step": 15900
|
1273 |
+
},
|
1274 |
+
{
|
1275 |
+
"epoch": 42.89,
|
1276 |
+
"learning_rate": 6.036827195467423e-05,
|
1277 |
+
"loss": 1.8966,
|
1278 |
+
"step": 16000
|
1279 |
+
},
|
1280 |
+
{
|
1281 |
+
"epoch": 42.89,
|
1282 |
+
"eval_cer": 0.4318770741464745,
|
1283 |
+
"eval_loss": 0.9022775888442993,
|
1284 |
+
"eval_runtime": 116.8996,
|
1285 |
+
"eval_samples_per_second": 19.692,
|
1286 |
+
"eval_steps_per_second": 2.464,
|
1287 |
+
"eval_wer": 1.5829363360762234,
|
1288 |
+
"step": 16000
|
1289 |
+
},
|
1290 |
+
{
|
1291 |
+
"epoch": 43.16,
|
1292 |
+
"learning_rate": 6.008498583569405e-05,
|
1293 |
+
"loss": 1.8316,
|
1294 |
+
"step": 16100
|
1295 |
+
},
|
1296 |
+
{
|
1297 |
+
"epoch": 43.43,
|
1298 |
+
"learning_rate": 5.9804532577903686e-05,
|
1299 |
+
"loss": 1.8786,
|
1300 |
+
"step": 16200
|
1301 |
+
},
|
1302 |
+
{
|
1303 |
+
"epoch": 43.7,
|
1304 |
+
"learning_rate": 5.9521246458923516e-05,
|
1305 |
+
"loss": 1.84,
|
1306 |
+
"step": 16300
|
1307 |
+
},
|
1308 |
+
{
|
1309 |
+
"epoch": 43.97,
|
1310 |
+
"learning_rate": 5.9237960339943346e-05,
|
1311 |
+
"loss": 1.875,
|
1312 |
+
"step": 16400
|
1313 |
+
},
|
1314 |
+
{
|
1315 |
+
"epoch": 44.24,
|
1316 |
+
"learning_rate": 5.895750708215297e-05,
|
1317 |
+
"loss": 1.8662,
|
1318 |
+
"step": 16500
|
1319 |
+
},
|
1320 |
+
{
|
1321 |
+
"epoch": 44.24,
|
1322 |
+
"eval_cer": 0.42301222894031004,
|
1323 |
+
"eval_loss": 0.9001737236976624,
|
1324 |
+
"eval_runtime": 118.0444,
|
1325 |
+
"eval_samples_per_second": 19.501,
|
1326 |
+
"eval_steps_per_second": 2.44,
|
1327 |
+
"eval_wer": 1.483326115201386,
|
1328 |
+
"step": 16500
|
1329 |
+
},
|
1330 |
+
{
|
1331 |
+
"epoch": 44.5,
|
1332 |
+
"learning_rate": 5.867422096317281e-05,
|
1333 |
+
"loss": 1.8645,
|
1334 |
+
"step": 16600
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"epoch": 44.77,
|
1338 |
+
"learning_rate": 5.839093484419263e-05,
|
1339 |
+
"loss": 1.8243,
|
1340 |
+
"step": 16700
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 45.04,
|
1344 |
+
"learning_rate": 5.810764872521247e-05,
|
1345 |
+
"loss": 1.7991,
|
1346 |
+
"step": 16800
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 45.31,
|
1350 |
+
"learning_rate": 5.78243626062323e-05,
|
1351 |
+
"loss": 1.7956,
|
1352 |
+
"step": 16900
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"epoch": 45.58,
|
1356 |
+
"learning_rate": 5.754107648725213e-05,
|
1357 |
+
"loss": 1.8136,
|
1358 |
+
"step": 17000
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 45.58,
|
1362 |
+
"eval_cer": 0.2986770923307724,
|
1363 |
+
"eval_loss": 0.9075531959533691,
|
1364 |
+
"eval_runtime": 118.7643,
|
1365 |
+
"eval_samples_per_second": 19.383,
|
1366 |
+
"eval_steps_per_second": 2.425,
|
1367 |
+
"eval_wer": 1.1827631009094846,
|
1368 |
+
"step": 17000
|
1369 |
+
},
|
1370 |
+
{
|
1371 |
+
"epoch": 45.84,
|
1372 |
+
"learning_rate": 5.725779036827196e-05,
|
1373 |
+
"loss": 1.8,
|
1374 |
+
"step": 17100
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"epoch": 46.11,
|
1378 |
+
"learning_rate": 5.6974504249291784e-05,
|
1379 |
+
"loss": 1.8339,
|
1380 |
+
"step": 17200
|
1381 |
+
},
|
1382 |
+
{
|
1383 |
+
"epoch": 46.38,
|
1384 |
+
"learning_rate": 5.669121813031162e-05,
|
1385 |
+
"loss": 1.7869,
|
1386 |
+
"step": 17300
|
1387 |
+
},
|
1388 |
+
{
|
1389 |
+
"epoch": 46.65,
|
1390 |
+
"learning_rate": 5.6407932011331444e-05,
|
1391 |
+
"loss": 1.8145,
|
1392 |
+
"step": 17400
|
1393 |
+
},
|
1394 |
+
{
|
1395 |
+
"epoch": 46.92,
|
1396 |
+
"learning_rate": 5.612464589235128e-05,
|
1397 |
+
"loss": 1.7908,
|
1398 |
+
"step": 17500
|
1399 |
+
},
|
1400 |
+
{
|
1401 |
+
"epoch": 46.92,
|
1402 |
+
"eval_cer": 0.42578533436377686,
|
1403 |
+
"eval_loss": 0.8774313926696777,
|
1404 |
+
"eval_runtime": 118.9119,
|
1405 |
+
"eval_samples_per_second": 19.359,
|
1406 |
+
"eval_steps_per_second": 2.422,
|
1407 |
+
"eval_wer": 1.577306193157211,
|
1408 |
+
"step": 17500
|
1409 |
+
},
|
1410 |
+
{
|
1411 |
+
"epoch": 47.18,
|
1412 |
+
"learning_rate": 5.5841359773371105e-05,
|
1413 |
+
"loss": 1.7488,
|
1414 |
+
"step": 17600
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"epoch": 47.45,
|
1418 |
+
"learning_rate": 5.555807365439094e-05,
|
1419 |
+
"loss": 1.7289,
|
1420 |
+
"step": 17700
|
1421 |
+
},
|
1422 |
+
{
|
1423 |
+
"epoch": 47.72,
|
1424 |
+
"learning_rate": 5.5274787535410765e-05,
|
1425 |
+
"loss": 1.7722,
|
1426 |
+
"step": 17800
|
1427 |
+
},
|
1428 |
+
{
|
1429 |
+
"epoch": 47.99,
|
1430 |
+
"learning_rate": 5.49915014164306e-05,
|
1431 |
+
"loss": 1.7659,
|
1432 |
+
"step": 17900
|
1433 |
+
},
|
1434 |
+
{
|
1435 |
+
"epoch": 48.26,
|
1436 |
+
"learning_rate": 5.4708215297450426e-05,
|
1437 |
+
"loss": 1.7354,
|
1438 |
+
"step": 18000
|
1439 |
+
},
|
1440 |
+
{
|
1441 |
+
"epoch": 48.26,
|
1442 |
+
"eval_cer": 0.40241851161522024,
|
1443 |
+
"eval_loss": 0.8727295398712158,
|
1444 |
+
"eval_runtime": 117.7378,
|
1445 |
+
"eval_samples_per_second": 19.552,
|
1446 |
+
"eval_steps_per_second": 2.446,
|
1447 |
+
"eval_wer": 1.5036812472932006,
|
1448 |
+
"step": 18000
|
1449 |
+
},
|
1450 |
+
{
|
1451 |
+
"epoch": 48.52,
|
1452 |
+
"learning_rate": 5.442492917847026e-05,
|
1453 |
+
"loss": 1.7538,
|
1454 |
+
"step": 18100
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"epoch": 48.79,
|
1458 |
+
"learning_rate": 5.4141643059490086e-05,
|
1459 |
+
"loss": 1.7304,
|
1460 |
+
"step": 18200
|
1461 |
+
},
|
1462 |
+
{
|
1463 |
+
"epoch": 49.06,
|
1464 |
+
"learning_rate": 5.385835694050991e-05,
|
1465 |
+
"loss": 1.7194,
|
1466 |
+
"step": 18300
|
1467 |
+
},
|
1468 |
+
{
|
1469 |
+
"epoch": 49.33,
|
1470 |
+
"learning_rate": 5.357507082152975e-05,
|
1471 |
+
"loss": 1.6824,
|
1472 |
+
"step": 18400
|
1473 |
+
},
|
1474 |
+
{
|
1475 |
+
"epoch": 49.6,
|
1476 |
+
"learning_rate": 5.329178470254958e-05,
|
1477 |
+
"loss": 1.6739,
|
1478 |
+
"step": 18500
|
1479 |
+
},
|
1480 |
+
{
|
1481 |
+
"epoch": 49.6,
|
1482 |
+
"eval_cer": 0.27890166840932856,
|
1483 |
+
"eval_loss": 0.8635693788528442,
|
1484 |
+
"eval_runtime": 118.5578,
|
1485 |
+
"eval_samples_per_second": 19.417,
|
1486 |
+
"eval_steps_per_second": 2.429,
|
1487 |
+
"eval_wer": 1.1238631442182763,
|
1488 |
+
"step": 18500
|
1489 |
+
},
|
1490 |
+
{
|
1491 |
+
"epoch": 49.86,
|
1492 |
+
"learning_rate": 5.300849858356941e-05,
|
1493 |
+
"loss": 1.6807,
|
1494 |
+
"step": 18600
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 50.13,
|
1498 |
+
"learning_rate": 5.272521246458924e-05,
|
1499 |
+
"loss": 1.6651,
|
1500 |
+
"step": 18700
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"epoch": 50.4,
|
1504 |
+
"learning_rate": 5.2441926345609075e-05,
|
1505 |
+
"loss": 1.7008,
|
1506 |
+
"step": 18800
|
1507 |
+
},
|
1508 |
+
{
|
1509 |
+
"epoch": 50.67,
|
1510 |
+
"learning_rate": 5.21586402266289e-05,
|
1511 |
+
"loss": 1.6183,
|
1512 |
+
"step": 18900
|
1513 |
+
},
|
1514 |
+
{
|
1515 |
+
"epoch": 50.94,
|
1516 |
+
"learning_rate": 5.1875354107648735e-05,
|
1517 |
+
"loss": 1.6457,
|
1518 |
+
"step": 19000
|
1519 |
+
},
|
1520 |
+
{
|
1521 |
+
"epoch": 50.94,
|
1522 |
+
"eval_cer": 0.3103605037050507,
|
1523 |
+
"eval_loss": 0.8516315221786499,
|
1524 |
+
"eval_runtime": 117.1957,
|
1525 |
+
"eval_samples_per_second": 19.642,
|
1526 |
+
"eval_steps_per_second": 2.457,
|
1527 |
+
"eval_wer": 1.2269380684278908,
|
1528 |
+
"step": 19000
|
1529 |
+
},
|
1530 |
+
{
|
1531 |
+
"epoch": 51.21,
|
1532 |
+
"learning_rate": 5.159206798866856e-05,
|
1533 |
+
"loss": 1.6506,
|
1534 |
+
"step": 19100
|
1535 |
+
},
|
1536 |
+
{
|
1537 |
+
"epoch": 51.47,
|
1538 |
+
"learning_rate": 5.130878186968838e-05,
|
1539 |
+
"loss": 1.6612,
|
1540 |
+
"step": 19200
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"epoch": 51.74,
|
1544 |
+
"learning_rate": 5.102549575070822e-05,
|
1545 |
+
"loss": 1.6339,
|
1546 |
+
"step": 19300
|
1547 |
+
},
|
1548 |
+
{
|
1549 |
+
"epoch": 52.01,
|
1550 |
+
"learning_rate": 5.074220963172804e-05,
|
1551 |
+
"loss": 1.6134,
|
1552 |
+
"step": 19400
|
1553 |
+
},
|
1554 |
+
{
|
1555 |
+
"epoch": 52.28,
|
1556 |
+
"learning_rate": 5.045892351274788e-05,
|
1557 |
+
"loss": 1.5847,
|
1558 |
+
"step": 19500
|
1559 |
+
},
|
1560 |
+
{
|
1561 |
+
"epoch": 52.28,
|
1562 |
+
"eval_cer": 0.33600036368595715,
|
1563 |
+
"eval_loss": 0.8398524522781372,
|
1564 |
+
"eval_runtime": 116.7512,
|
1565 |
+
"eval_samples_per_second": 19.717,
|
1566 |
+
"eval_steps_per_second": 2.467,
|
1567 |
+
"eval_wer": 1.3308791684711996,
|
1568 |
+
"step": 19500
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 52.55,
|
1572 |
+
"learning_rate": 5.01756373937677e-05,
|
1573 |
+
"loss": 1.5839,
|
1574 |
+
"step": 19600
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 52.81,
|
1578 |
+
"learning_rate": 4.9892351274787533e-05,
|
1579 |
+
"loss": 1.5887,
|
1580 |
+
"step": 19700
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 53.08,
|
1584 |
+
"learning_rate": 4.9609065155807364e-05,
|
1585 |
+
"loss": 1.6578,
|
1586 |
+
"step": 19800
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 53.35,
|
1590 |
+
"learning_rate": 4.9325779036827194e-05,
|
1591 |
+
"loss": 1.5896,
|
1592 |
+
"step": 19900
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 53.62,
|
1596 |
+
"learning_rate": 4.9042492917847024e-05,
|
1597 |
+
"loss": 1.5971,
|
1598 |
+
"step": 20000
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 53.62,
|
1602 |
+
"eval_cer": 0.3334545619857253,
|
1603 |
+
"eval_loss": 0.844145655632019,
|
1604 |
+
"eval_runtime": 117.5266,
|
1605 |
+
"eval_samples_per_second": 19.587,
|
1606 |
+
"eval_steps_per_second": 2.451,
|
1607 |
+
"eval_wer": 1.3152880034647034,
|
1608 |
+
"step": 20000
|
1609 |
+
},
|
1610 |
+
{
|
1611 |
+
"epoch": 53.89,
|
1612 |
+
"learning_rate": 4.8759206798866854e-05,
|
1613 |
+
"loss": 1.5645,
|
1614 |
+
"step": 20100
|
1615 |
+
},
|
1616 |
+
{
|
1617 |
+
"epoch": 54.16,
|
1618 |
+
"learning_rate": 4.847592067988669e-05,
|
1619 |
+
"loss": 1.481,
|
1620 |
+
"step": 20200
|
1621 |
+
},
|
1622 |
+
{
|
1623 |
+
"epoch": 54.42,
|
1624 |
+
"learning_rate": 4.819263456090652e-05,
|
1625 |
+
"loss": 1.5474,
|
1626 |
+
"step": 20300
|
1627 |
+
},
|
1628 |
+
{
|
1629 |
+
"epoch": 54.69,
|
1630 |
+
"learning_rate": 4.790934844192635e-05,
|
1631 |
+
"loss": 1.576,
|
1632 |
+
"step": 20400
|
1633 |
+
},
|
1634 |
+
{
|
1635 |
+
"epoch": 54.96,
|
1636 |
+
"learning_rate": 4.762606232294618e-05,
|
1637 |
+
"loss": 1.602,
|
1638 |
+
"step": 20500
|
1639 |
+
},
|
1640 |
+
{
|
1641 |
+
"epoch": 54.96,
|
1642 |
+
"eval_cer": 0.34331954357412375,
|
1643 |
+
"eval_loss": 0.8589980602264404,
|
1644 |
+
"eval_runtime": 117.5211,
|
1645 |
+
"eval_samples_per_second": 19.588,
|
1646 |
+
"eval_steps_per_second": 2.451,
|
1647 |
+
"eval_wer": 1.2932005197055003,
|
1648 |
+
"step": 20500
|
1649 |
+
},
|
1650 |
+
{
|
1651 |
+
"epoch": 55.23,
|
1652 |
+
"learning_rate": 4.734277620396601e-05,
|
1653 |
+
"loss": 1.6106,
|
1654 |
+
"step": 20600
|
1655 |
+
},
|
1656 |
+
{
|
1657 |
+
"epoch": 55.5,
|
1658 |
+
"learning_rate": 4.7059490084985836e-05,
|
1659 |
+
"loss": 1.551,
|
1660 |
+
"step": 20700
|
1661 |
+
},
|
1662 |
+
{
|
1663 |
+
"epoch": 55.76,
|
1664 |
+
"learning_rate": 4.6776203966005666e-05,
|
1665 |
+
"loss": 1.5118,
|
1666 |
+
"step": 20800
|
1667 |
+
},
|
1668 |
+
{
|
1669 |
+
"epoch": 56.03,
|
1670 |
+
"learning_rate": 4.64957507082153e-05,
|
1671 |
+
"loss": 1.5028,
|
1672 |
+
"step": 20900
|
1673 |
+
},
|
1674 |
+
{
|
1675 |
+
"epoch": 56.3,
|
1676 |
+
"learning_rate": 4.621246458923513e-05,
|
1677 |
+
"loss": 1.5063,
|
1678 |
+
"step": 21000
|
1679 |
+
},
|
1680 |
+
{
|
1681 |
+
"epoch": 56.3,
|
1682 |
+
"eval_cer": 0.28749374914761106,
|
1683 |
+
"eval_loss": 0.8333584070205688,
|
1684 |
+
"eval_runtime": 116.6165,
|
1685 |
+
"eval_samples_per_second": 19.74,
|
1686 |
+
"eval_steps_per_second": 2.47,
|
1687 |
+
"eval_wer": 1.1312256388046773,
|
1688 |
+
"step": 21000
|
1689 |
+
},
|
1690 |
+
{
|
1691 |
+
"epoch": 56.57,
|
1692 |
+
"learning_rate": 4.592917847025496e-05,
|
1693 |
+
"loss": 1.5195,
|
1694 |
+
"step": 21100
|
1695 |
+
},
|
1696 |
+
{
|
1697 |
+
"epoch": 56.84,
|
1698 |
+
"learning_rate": 4.564589235127479e-05,
|
1699 |
+
"loss": 1.549,
|
1700 |
+
"step": 21200
|
1701 |
+
},
|
1702 |
+
{
|
1703 |
+
"epoch": 57.1,
|
1704 |
+
"learning_rate": 4.536260623229462e-05,
|
1705 |
+
"loss": 1.5636,
|
1706 |
+
"step": 21300
|
1707 |
+
},
|
1708 |
+
{
|
1709 |
+
"epoch": 57.37,
|
1710 |
+
"learning_rate": 4.507932011331445e-05,
|
1711 |
+
"loss": 1.482,
|
1712 |
+
"step": 21400
|
1713 |
+
},
|
1714 |
+
{
|
1715 |
+
"epoch": 57.64,
|
1716 |
+
"learning_rate": 4.479603399433428e-05,
|
1717 |
+
"loss": 1.4631,
|
1718 |
+
"step": 21500
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"epoch": 57.64,
|
1722 |
+
"eval_cer": 0.2999045324362413,
|
1723 |
+
"eval_loss": 0.8474038243293762,
|
1724 |
+
"eval_runtime": 118.5932,
|
1725 |
+
"eval_samples_per_second": 19.411,
|
1726 |
+
"eval_steps_per_second": 2.428,
|
1727 |
+
"eval_wer": 1.169770463404071,
|
1728 |
+
"step": 21500
|
1729 |
+
},
|
1730 |
+
{
|
1731 |
+
"epoch": 57.91,
|
1732 |
+
"learning_rate": 4.451274787535411e-05,
|
1733 |
+
"loss": 1.4869,
|
1734 |
+
"step": 21600
|
1735 |
+
},
|
1736 |
+
{
|
1737 |
+
"epoch": 58.18,
|
1738 |
+
"learning_rate": 4.422946175637394e-05,
|
1739 |
+
"loss": 1.4692,
|
1740 |
+
"step": 21700
|
1741 |
+
},
|
1742 |
+
{
|
1743 |
+
"epoch": 58.44,
|
1744 |
+
"learning_rate": 4.394617563739377e-05,
|
1745 |
+
"loss": 1.4673,
|
1746 |
+
"step": 21800
|
1747 |
+
},
|
1748 |
+
{
|
1749 |
+
"epoch": 58.71,
|
1750 |
+
"learning_rate": 4.36628895184136e-05,
|
1751 |
+
"loss": 1.501,
|
1752 |
+
"step": 21900
|
1753 |
+
},
|
1754 |
+
{
|
1755 |
+
"epoch": 58.98,
|
1756 |
+
"learning_rate": 4.3379603399433425e-05,
|
1757 |
+
"loss": 1.4997,
|
1758 |
+
"step": 22000
|
1759 |
+
},
|
1760 |
+
{
|
1761 |
+
"epoch": 58.98,
|
1762 |
+
"eval_cer": 0.38541619311724323,
|
1763 |
+
"eval_loss": 0.8637779355049133,
|
1764 |
+
"eval_runtime": 116.453,
|
1765 |
+
"eval_samples_per_second": 19.768,
|
1766 |
+
"eval_steps_per_second": 2.473,
|
1767 |
+
"eval_wer": 1.4278908618449546,
|
1768 |
+
"step": 22000
|
1769 |
+
},
|
1770 |
+
{
|
1771 |
+
"epoch": 59.25,
|
1772 |
+
"learning_rate": 4.3096317280453255e-05,
|
1773 |
+
"loss": 1.4404,
|
1774 |
+
"step": 22100
|
1775 |
+
},
|
1776 |
+
{
|
1777 |
+
"epoch": 59.52,
|
1778 |
+
"learning_rate": 4.2815864022662894e-05,
|
1779 |
+
"loss": 1.4639,
|
1780 |
+
"step": 22200
|
1781 |
+
},
|
1782 |
+
{
|
1783 |
+
"epoch": 59.78,
|
1784 |
+
"learning_rate": 4.2532577903682725e-05,
|
1785 |
+
"loss": 1.4724,
|
1786 |
+
"step": 22300
|
1787 |
+
},
|
1788 |
+
{
|
1789 |
+
"epoch": 60.05,
|
1790 |
+
"learning_rate": 4.224929178470255e-05,
|
1791 |
+
"loss": 1.4146,
|
1792 |
+
"step": 22400
|
1793 |
+
},
|
1794 |
+
{
|
1795 |
+
"epoch": 60.32,
|
1796 |
+
"learning_rate": 4.196600566572238e-05,
|
1797 |
+
"loss": 1.4301,
|
1798 |
+
"step": 22500
|
1799 |
+
},
|
1800 |
+
{
|
1801 |
+
"epoch": 60.32,
|
1802 |
+
"eval_cer": 0.32995408464790654,
|
1803 |
+
"eval_loss": 0.8549993634223938,
|
1804 |
+
"eval_runtime": 118.194,
|
1805 |
+
"eval_samples_per_second": 19.476,
|
1806 |
+
"eval_steps_per_second": 2.437,
|
1807 |
+
"eval_wer": 1.27371156344738,
|
1808 |
+
"step": 22500
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 60.59,
|
1812 |
+
"learning_rate": 4.168271954674221e-05,
|
1813 |
+
"loss": 1.4288,
|
1814 |
+
"step": 22600
|
1815 |
+
},
|
1816 |
+
{
|
1817 |
+
"epoch": 60.86,
|
1818 |
+
"learning_rate": 4.139943342776204e-05,
|
1819 |
+
"loss": 1.4183,
|
1820 |
+
"step": 22700
|
1821 |
+
},
|
1822 |
+
{
|
1823 |
+
"epoch": 61.13,
|
1824 |
+
"learning_rate": 4.111614730878187e-05,
|
1825 |
+
"loss": 1.3995,
|
1826 |
+
"step": 22800
|
1827 |
+
},
|
1828 |
+
{
|
1829 |
+
"epoch": 61.39,
|
1830 |
+
"learning_rate": 4.08328611898017e-05,
|
1831 |
+
"loss": 1.3967,
|
1832 |
+
"step": 22900
|
1833 |
+
},
|
1834 |
+
{
|
1835 |
+
"epoch": 61.66,
|
1836 |
+
"learning_rate": 4.054957507082153e-05,
|
1837 |
+
"loss": 1.3798,
|
1838 |
+
"step": 23000
|
1839 |
+
},
|
1840 |
+
{
|
1841 |
+
"epoch": 61.66,
|
1842 |
+
"eval_cer": 0.2934491066963677,
|
1843 |
+
"eval_loss": 0.8265963792800903,
|
1844 |
+
"eval_runtime": 118.5302,
|
1845 |
+
"eval_samples_per_second": 19.421,
|
1846 |
+
"eval_steps_per_second": 2.43,
|
1847 |
+
"eval_wer": 1.1801645734084019,
|
1848 |
+
"step": 23000
|
1849 |
+
},
|
1850 |
+
{
|
1851 |
+
"epoch": 61.93,
|
1852 |
+
"learning_rate": 4.026628895184136e-05,
|
1853 |
+
"loss": 1.3781,
|
1854 |
+
"step": 23100
|
1855 |
+
},
|
1856 |
+
{
|
1857 |
+
"epoch": 62.2,
|
1858 |
+
"learning_rate": 3.99830028328612e-05,
|
1859 |
+
"loss": 1.412,
|
1860 |
+
"step": 23200
|
1861 |
+
},
|
1862 |
+
{
|
1863 |
+
"epoch": 62.47,
|
1864 |
+
"learning_rate": 3.969971671388103e-05,
|
1865 |
+
"loss": 1.3643,
|
1866 |
+
"step": 23300
|
1867 |
+
},
|
1868 |
+
{
|
1869 |
+
"epoch": 62.73,
|
1870 |
+
"learning_rate": 3.941643059490085e-05,
|
1871 |
+
"loss": 1.3848,
|
1872 |
+
"step": 23400
|
1873 |
+
},
|
1874 |
+
{
|
1875 |
+
"epoch": 63.0,
|
1876 |
+
"learning_rate": 3.913314447592068e-05,
|
1877 |
+
"loss": 1.3454,
|
1878 |
+
"step": 23500
|
1879 |
+
},
|
1880 |
+
{
|
1881 |
+
"epoch": 63.0,
|
1882 |
+
"eval_cer": 0.3711415192980861,
|
1883 |
+
"eval_loss": 0.8234531879425049,
|
1884 |
+
"eval_runtime": 118.9878,
|
1885 |
+
"eval_samples_per_second": 19.347,
|
1886 |
+
"eval_steps_per_second": 2.42,
|
1887 |
+
"eval_wer": 1.3815504547423128,
|
1888 |
+
"step": 23500
|
1889 |
+
},
|
1890 |
+
{
|
1891 |
+
"epoch": 63.27,
|
1892 |
+
"learning_rate": 3.884985835694051e-05,
|
1893 |
+
"loss": 1.3549,
|
1894 |
+
"step": 23600
|
1895 |
+
},
|
1896 |
+
{
|
1897 |
+
"epoch": 63.54,
|
1898 |
+
"learning_rate": 3.856657223796034e-05,
|
1899 |
+
"loss": 1.3746,
|
1900 |
+
"step": 23700
|
1901 |
+
},
|
1902 |
+
{
|
1903 |
+
"epoch": 63.81,
|
1904 |
+
"learning_rate": 3.828328611898017e-05,
|
1905 |
+
"loss": 1.3619,
|
1906 |
+
"step": 23800
|
1907 |
+
},
|
1908 |
+
{
|
1909 |
+
"epoch": 64.07,
|
1910 |
+
"learning_rate": 3.8e-05,
|
1911 |
+
"loss": 1.4683,
|
1912 |
+
"step": 23900
|
1913 |
+
},
|
1914 |
+
{
|
1915 |
+
"epoch": 64.34,
|
1916 |
+
"learning_rate": 3.771671388101983e-05,
|
1917 |
+
"loss": 1.3678,
|
1918 |
+
"step": 24000
|
1919 |
+
},
|
1920 |
+
{
|
1921 |
+
"epoch": 64.34,
|
1922 |
+
"eval_cer": 0.5034777469654953,
|
1923 |
+
"eval_loss": 0.8549569249153137,
|
1924 |
+
"eval_runtime": 117.2623,
|
1925 |
+
"eval_samples_per_second": 19.631,
|
1926 |
+
"eval_steps_per_second": 2.456,
|
1927 |
+
"eval_wer": 1.642702468601126,
|
1928 |
+
"step": 24000
|
1929 |
+
},
|
1930 |
+
{
|
1931 |
+
"epoch": 64.61,
|
1932 |
+
"learning_rate": 3.743342776203966e-05,
|
1933 |
+
"loss": 1.3534,
|
1934 |
+
"step": 24100
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 64.88,
|
1938 |
+
"learning_rate": 3.715014164305949e-05,
|
1939 |
+
"loss": 1.341,
|
1940 |
+
"step": 24200
|
1941 |
+
},
|
1942 |
+
{
|
1943 |
+
"epoch": 65.15,
|
1944 |
+
"learning_rate": 3.686685552407932e-05,
|
1945 |
+
"loss": 1.2738,
|
1946 |
+
"step": 24300
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 65.41,
|
1950 |
+
"learning_rate": 3.658356940509915e-05,
|
1951 |
+
"loss": 1.3237,
|
1952 |
+
"step": 24400
|
1953 |
+
},
|
1954 |
+
{
|
1955 |
+
"epoch": 65.68,
|
1956 |
+
"learning_rate": 3.630028328611898e-05,
|
1957 |
+
"loss": 1.3761,
|
1958 |
+
"step": 24500
|
1959 |
+
},
|
1960 |
+
{
|
1961 |
+
"epoch": 65.68,
|
1962 |
+
"eval_cer": 0.490703277719689,
|
1963 |
+
"eval_loss": 0.8510046601295471,
|
1964 |
+
"eval_runtime": 120.5712,
|
1965 |
+
"eval_samples_per_second": 19.092,
|
1966 |
+
"eval_steps_per_second": 2.389,
|
1967 |
+
"eval_wer": 1.6708531831961888,
|
1968 |
+
"step": 24500
|
1969 |
+
},
|
1970 |
+
{
|
1971 |
+
"epoch": 65.95,
|
1972 |
+
"learning_rate": 3.6016997167138814e-05,
|
1973 |
+
"loss": 1.3209,
|
1974 |
+
"step": 24600
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 66.22,
|
1978 |
+
"learning_rate": 3.5733711048158644e-05,
|
1979 |
+
"loss": 1.4141,
|
1980 |
+
"step": 24700
|
1981 |
+
},
|
1982 |
+
{
|
1983 |
+
"epoch": 66.49,
|
1984 |
+
"learning_rate": 3.5450424929178474e-05,
|
1985 |
+
"loss": 1.3229,
|
1986 |
+
"step": 24800
|
1987 |
+
},
|
1988 |
+
{
|
1989 |
+
"epoch": 66.76,
|
1990 |
+
"learning_rate": 3.5167138810198305e-05,
|
1991 |
+
"loss": 1.3413,
|
1992 |
+
"step": 24900
|
1993 |
+
},
|
1994 |
+
{
|
1995 |
+
"epoch": 67.02,
|
1996 |
+
"learning_rate": 3.4883852691218135e-05,
|
1997 |
+
"loss": 1.2668,
|
1998 |
+
"step": 25000
|
1999 |
+
},
|
2000 |
+
{
|
2001 |
+
"epoch": 67.02,
|
2002 |
+
"eval_cer": 0.45051597945174343,
|
2003 |
+
"eval_loss": 0.8514528274536133,
|
2004 |
+
"eval_runtime": 118.6832,
|
2005 |
+
"eval_samples_per_second": 19.396,
|
2006 |
+
"eval_steps_per_second": 2.427,
|
2007 |
+
"eval_wer": 1.5842355998267648,
|
2008 |
+
"step": 25000
|
2009 |
+
},
|
2010 |
+
{
|
2011 |
+
"epoch": 67.29,
|
2012 |
+
"learning_rate": 3.4600566572237965e-05,
|
2013 |
+
"loss": 1.3151,
|
2014 |
+
"step": 25100
|
2015 |
+
},
|
2016 |
+
{
|
2017 |
+
"epoch": 67.56,
|
2018 |
+
"learning_rate": 3.4317280453257796e-05,
|
2019 |
+
"loss": 1.3491,
|
2020 |
+
"step": 25200
|
2021 |
+
},
|
2022 |
+
{
|
2023 |
+
"epoch": 67.83,
|
2024 |
+
"learning_rate": 3.4033994334277626e-05,
|
2025 |
+
"loss": 1.3392,
|
2026 |
+
"step": 25300
|
2027 |
+
},
|
2028 |
+
{
|
2029 |
+
"epoch": 68.1,
|
2030 |
+
"learning_rate": 3.375070821529745e-05,
|
2031 |
+
"loss": 1.3551,
|
2032 |
+
"step": 25400
|
2033 |
+
},
|
2034 |
+
{
|
2035 |
+
"epoch": 68.36,
|
2036 |
+
"learning_rate": 3.346742209631728e-05,
|
2037 |
+
"loss": 1.2835,
|
2038 |
+
"step": 25500
|
2039 |
+
},
|
2040 |
+
{
|
2041 |
+
"epoch": 68.36,
|
2042 |
+
"eval_cer": 0.4221030140473701,
|
2043 |
+
"eval_loss": 0.8283268213272095,
|
2044 |
+
"eval_runtime": 118.4861,
|
2045 |
+
"eval_samples_per_second": 19.428,
|
2046 |
+
"eval_steps_per_second": 2.431,
|
2047 |
+
"eval_wer": 1.5352966652230402,
|
2048 |
+
"step": 25500
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 68.63,
|
2052 |
+
"learning_rate": 3.318413597733711e-05,
|
2053 |
+
"loss": 1.2847,
|
2054 |
+
"step": 25600
|
2055 |
+
},
|
2056 |
+
{
|
2057 |
+
"epoch": 68.9,
|
2058 |
+
"learning_rate": 3.290084985835694e-05,
|
2059 |
+
"loss": 1.3164,
|
2060 |
+
"step": 25700
|
2061 |
+
},
|
2062 |
+
{
|
2063 |
+
"epoch": 69.17,
|
2064 |
+
"learning_rate": 3.261756373937677e-05,
|
2065 |
+
"loss": 1.2624,
|
2066 |
+
"step": 25800
|
2067 |
+
},
|
2068 |
+
{
|
2069 |
+
"epoch": 69.44,
|
2070 |
+
"learning_rate": 3.23342776203966e-05,
|
2071 |
+
"loss": 1.3301,
|
2072 |
+
"step": 25900
|
2073 |
+
},
|
2074 |
+
{
|
2075 |
+
"epoch": 69.7,
|
2076 |
+
"learning_rate": 3.205099150141643e-05,
|
2077 |
+
"loss": 1.2961,
|
2078 |
+
"step": 26000
|
2079 |
+
},
|
2080 |
+
{
|
2081 |
+
"epoch": 69.7,
|
2082 |
+
"eval_cer": 0.43692321680229124,
|
2083 |
+
"eval_loss": 0.8339292407035828,
|
2084 |
+
"eval_runtime": 119.6696,
|
2085 |
+
"eval_samples_per_second": 19.236,
|
2086 |
+
"eval_steps_per_second": 2.407,
|
2087 |
+
"eval_wer": 1.574274577739281,
|
2088 |
+
"step": 26000
|
2089 |
+
},
|
2090 |
+
{
|
2091 |
+
"epoch": 69.97,
|
2092 |
+
"learning_rate": 3.176770538243626e-05,
|
2093 |
+
"loss": 1.2716,
|
2094 |
+
"step": 26100
|
2095 |
+
},
|
2096 |
+
{
|
2097 |
+
"epoch": 70.24,
|
2098 |
+
"learning_rate": 3.148441926345609e-05,
|
2099 |
+
"loss": 1.2832,
|
2100 |
+
"step": 26200
|
2101 |
+
},
|
2102 |
+
{
|
2103 |
+
"epoch": 70.51,
|
2104 |
+
"learning_rate": 3.120113314447592e-05,
|
2105 |
+
"loss": 1.2607,
|
2106 |
+
"step": 26300
|
2107 |
+
},
|
2108 |
+
{
|
2109 |
+
"epoch": 70.78,
|
2110 |
+
"learning_rate": 3.091784702549575e-05,
|
2111 |
+
"loss": 1.2774,
|
2112 |
+
"step": 26400
|
2113 |
+
},
|
2114 |
+
{
|
2115 |
+
"epoch": 71.05,
|
2116 |
+
"learning_rate": 3.0637393767705384e-05,
|
2117 |
+
"loss": 1.2656,
|
2118 |
+
"step": 26500
|
2119 |
+
},
|
2120 |
+
{
|
2121 |
+
"epoch": 71.05,
|
2122 |
+
"eval_cer": 0.42169386734554715,
|
2123 |
+
"eval_loss": 0.8330555558204651,
|
2124 |
+
"eval_runtime": 120.4651,
|
2125 |
+
"eval_samples_per_second": 19.109,
|
2126 |
+
"eval_steps_per_second": 2.391,
|
2127 |
+
"eval_wer": 1.5331312256388048,
|
2128 |
+
"step": 26500
|
2129 |
+
},
|
2130 |
+
{
|
2131 |
+
"epoch": 71.31,
|
2132 |
+
"learning_rate": 3.0354107648725215e-05,
|
2133 |
+
"loss": 1.2885,
|
2134 |
+
"step": 26600
|
2135 |
+
},
|
2136 |
+
{
|
2137 |
+
"epoch": 71.58,
|
2138 |
+
"learning_rate": 3.007082152974504e-05,
|
2139 |
+
"loss": 1.2552,
|
2140 |
+
"step": 26700
|
2141 |
+
},
|
2142 |
+
{
|
2143 |
+
"epoch": 71.85,
|
2144 |
+
"learning_rate": 2.9787535410764872e-05,
|
2145 |
+
"loss": 1.2682,
|
2146 |
+
"step": 26800
|
2147 |
+
},
|
2148 |
+
{
|
2149 |
+
"epoch": 72.12,
|
2150 |
+
"learning_rate": 2.9504249291784702e-05,
|
2151 |
+
"loss": 1.2665,
|
2152 |
+
"step": 26900
|
2153 |
+
},
|
2154 |
+
{
|
2155 |
+
"epoch": 72.39,
|
2156 |
+
"learning_rate": 2.9220963172804532e-05,
|
2157 |
+
"loss": 1.2556,
|
2158 |
+
"step": 27000
|
2159 |
+
},
|
2160 |
+
{
|
2161 |
+
"epoch": 72.39,
|
2162 |
+
"eval_cer": 0.4109196708642088,
|
2163 |
+
"eval_loss": 0.8242233991622925,
|
2164 |
+
"eval_runtime": 118.9857,
|
2165 |
+
"eval_samples_per_second": 19.347,
|
2166 |
+
"eval_steps_per_second": 2.42,
|
2167 |
+
"eval_wer": 1.4707665656128195,
|
2168 |
+
"step": 27000
|
2169 |
+
},
|
2170 |
+
{
|
2171 |
+
"epoch": 72.65,
|
2172 |
+
"learning_rate": 2.8937677053824363e-05,
|
2173 |
+
"loss": 1.2125,
|
2174 |
+
"step": 27100
|
2175 |
+
},
|
2176 |
+
{
|
2177 |
+
"epoch": 72.92,
|
2178 |
+
"learning_rate": 2.8654390934844193e-05,
|
2179 |
+
"loss": 1.2157,
|
2180 |
+
"step": 27200
|
2181 |
+
},
|
2182 |
+
{
|
2183 |
+
"epoch": 73.19,
|
2184 |
+
"learning_rate": 2.8371104815864023e-05,
|
2185 |
+
"loss": 1.2664,
|
2186 |
+
"step": 27300
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 73.46,
|
2190 |
+
"learning_rate": 2.8087818696883857e-05,
|
2191 |
+
"loss": 1.2075,
|
2192 |
+
"step": 27400
|
2193 |
+
},
|
2194 |
+
{
|
2195 |
+
"epoch": 73.73,
|
2196 |
+
"learning_rate": 2.7804532577903687e-05,
|
2197 |
+
"loss": 1.2043,
|
2198 |
+
"step": 27500
|
2199 |
+
},
|
2200 |
+
{
|
2201 |
+
"epoch": 73.73,
|
2202 |
+
"eval_cer": 0.40305496204027824,
|
2203 |
+
"eval_loss": 0.8244912624359131,
|
2204 |
+
"eval_runtime": 118.8221,
|
2205 |
+
"eval_samples_per_second": 19.373,
|
2206 |
+
"eval_steps_per_second": 2.424,
|
2207 |
+
"eval_wer": 1.4469467301862278,
|
2208 |
+
"step": 27500
|
2209 |
+
},
|
2210 |
+
{
|
2211 |
+
"epoch": 73.99,
|
2212 |
+
"learning_rate": 2.7521246458923517e-05,
|
2213 |
+
"loss": 1.2218,
|
2214 |
+
"step": 27600
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 74.26,
|
2218 |
+
"learning_rate": 2.723796033994334e-05,
|
2219 |
+
"loss": 1.2257,
|
2220 |
+
"step": 27700
|
2221 |
+
},
|
2222 |
+
{
|
2223 |
+
"epoch": 74.53,
|
2224 |
+
"learning_rate": 2.695467422096317e-05,
|
2225 |
+
"loss": 1.1943,
|
2226 |
+
"step": 27800
|
2227 |
+
},
|
2228 |
+
{
|
2229 |
+
"epoch": 74.8,
|
2230 |
+
"learning_rate": 2.6671388101983e-05,
|
2231 |
+
"loss": 1.2292,
|
2232 |
+
"step": 27900
|
2233 |
+
},
|
2234 |
+
{
|
2235 |
+
"epoch": 75.07,
|
2236 |
+
"learning_rate": 2.638810198300283e-05,
|
2237 |
+
"loss": 1.2722,
|
2238 |
+
"step": 28000
|
2239 |
+
},
|
2240 |
+
{
|
2241 |
+
"epoch": 75.07,
|
2242 |
+
"eval_cer": 0.4095558485247988,
|
2243 |
+
"eval_loss": 0.8202398419380188,
|
2244 |
+
"eval_runtime": 118.3165,
|
2245 |
+
"eval_samples_per_second": 19.456,
|
2246 |
+
"eval_steps_per_second": 2.434,
|
2247 |
+
"eval_wer": 1.4924209614551753,
|
2248 |
+
"step": 28000
|
2249 |
+
},
|
2250 |
+
{
|
2251 |
+
"epoch": 75.33,
|
2252 |
+
"learning_rate": 2.6104815864022665e-05,
|
2253 |
+
"loss": 1.2,
|
2254 |
+
"step": 28100
|
2255 |
+
},
|
2256 |
+
{
|
2257 |
+
"epoch": 75.6,
|
2258 |
+
"learning_rate": 2.5821529745042495e-05,
|
2259 |
+
"loss": 1.1984,
|
2260 |
+
"step": 28200
|
2261 |
+
},
|
2262 |
+
{
|
2263 |
+
"epoch": 75.87,
|
2264 |
+
"learning_rate": 2.5538243626062326e-05,
|
2265 |
+
"loss": 1.204,
|
2266 |
+
"step": 28300
|
2267 |
+
},
|
2268 |
+
{
|
2269 |
+
"epoch": 76.14,
|
2270 |
+
"learning_rate": 2.5254957507082156e-05,
|
2271 |
+
"loss": 1.103,
|
2272 |
+
"step": 28400
|
2273 |
+
},
|
2274 |
+
{
|
2275 |
+
"epoch": 76.41,
|
2276 |
+
"learning_rate": 2.4971671388101983e-05,
|
2277 |
+
"loss": 1.202,
|
2278 |
+
"step": 28500
|
2279 |
+
},
|
2280 |
+
{
|
2281 |
+
"epoch": 76.41,
|
2282 |
+
"eval_cer": 0.37186889121243805,
|
2283 |
+
"eval_loss": 0.8290452361106873,
|
2284 |
+
"eval_runtime": 117.9552,
|
2285 |
+
"eval_samples_per_second": 19.516,
|
2286 |
+
"eval_steps_per_second": 2.442,
|
2287 |
+
"eval_wer": 1.3806842789086184,
|
2288 |
+
"step": 28500
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 76.67,
|
2292 |
+
"learning_rate": 2.4691218130311615e-05,
|
2293 |
+
"loss": 1.1915,
|
2294 |
+
"step": 28600
|
2295 |
+
},
|
2296 |
+
{
|
2297 |
+
"epoch": 76.94,
|
2298 |
+
"learning_rate": 2.4407932011331446e-05,
|
2299 |
+
"loss": 1.1898,
|
2300 |
+
"step": 28700
|
2301 |
+
},
|
2302 |
+
{
|
2303 |
+
"epoch": 77.21,
|
2304 |
+
"learning_rate": 2.4124645892351276e-05,
|
2305 |
+
"loss": 1.2433,
|
2306 |
+
"step": 28800
|
2307 |
+
},
|
2308 |
+
{
|
2309 |
+
"epoch": 77.48,
|
2310 |
+
"learning_rate": 2.3841359773371106e-05,
|
2311 |
+
"loss": 1.1887,
|
2312 |
+
"step": 28900
|
2313 |
+
},
|
2314 |
+
{
|
2315 |
+
"epoch": 77.75,
|
2316 |
+
"learning_rate": 2.3558073654390936e-05,
|
2317 |
+
"loss": 1.1679,
|
2318 |
+
"step": 29000
|
2319 |
+
},
|
2320 |
+
{
|
2321 |
+
"epoch": 77.75,
|
2322 |
+
"eval_cer": 0.3748693003591399,
|
2323 |
+
"eval_loss": 0.8194963335990906,
|
2324 |
+
"eval_runtime": 117.7002,
|
2325 |
+
"eval_samples_per_second": 19.558,
|
2326 |
+
"eval_steps_per_second": 2.447,
|
2327 |
+
"eval_wer": 1.4097011693373755,
|
2328 |
+
"step": 29000
|
2329 |
+
},
|
2330 |
+
{
|
2331 |
+
"epoch": 78.02,
|
2332 |
+
"learning_rate": 2.3274787535410767e-05,
|
2333 |
+
"loss": 1.1151,
|
2334 |
+
"step": 29100
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 78.28,
|
2338 |
+
"learning_rate": 2.2991501416430597e-05,
|
2339 |
+
"loss": 1.1638,
|
2340 |
+
"step": 29200
|
2341 |
+
},
|
2342 |
+
{
|
2343 |
+
"epoch": 78.55,
|
2344 |
+
"learning_rate": 2.2708215297450424e-05,
|
2345 |
+
"loss": 1.1516,
|
2346 |
+
"step": 29300
|
2347 |
+
},
|
2348 |
+
{
|
2349 |
+
"epoch": 78.82,
|
2350 |
+
"learning_rate": 2.2424929178470254e-05,
|
2351 |
+
"loss": 1.1652,
|
2352 |
+
"step": 29400
|
2353 |
+
},
|
2354 |
+
{
|
2355 |
+
"epoch": 79.09,
|
2356 |
+
"learning_rate": 2.2141643059490084e-05,
|
2357 |
+
"loss": 1.1967,
|
2358 |
+
"step": 29500
|
2359 |
+
},
|
2360 |
+
{
|
2361 |
+
"epoch": 79.09,
|
2362 |
+
"eval_cer": 0.30767831977087784,
|
2363 |
+
"eval_loss": 0.8058642148971558,
|
2364 |
+
"eval_runtime": 118.6259,
|
2365 |
+
"eval_samples_per_second": 19.406,
|
2366 |
+
"eval_steps_per_second": 2.428,
|
2367 |
+
"eval_wer": 1.2074491121697704,
|
2368 |
+
"step": 29500
|
2369 |
+
},
|
2370 |
+
{
|
2371 |
+
"epoch": 79.36,
|
2372 |
+
"learning_rate": 2.1858356940509918e-05,
|
2373 |
+
"loss": 1.1425,
|
2374 |
+
"step": 29600
|
2375 |
+
},
|
2376 |
+
{
|
2377 |
+
"epoch": 79.62,
|
2378 |
+
"learning_rate": 2.1575070821529748e-05,
|
2379 |
+
"loss": 1.1489,
|
2380 |
+
"step": 29700
|
2381 |
+
},
|
2382 |
+
{
|
2383 |
+
"epoch": 79.89,
|
2384 |
+
"learning_rate": 2.1291784702549575e-05,
|
2385 |
+
"loss": 1.136,
|
2386 |
+
"step": 29800
|
2387 |
+
},
|
2388 |
+
{
|
2389 |
+
"epoch": 80.16,
|
2390 |
+
"learning_rate": 2.1008498583569405e-05,
|
2391 |
+
"loss": 1.0913,
|
2392 |
+
"step": 29900
|
2393 |
+
},
|
2394 |
+
{
|
2395 |
+
"epoch": 80.43,
|
2396 |
+
"learning_rate": 2.0725212464589236e-05,
|
2397 |
+
"loss": 1.1241,
|
2398 |
+
"step": 30000
|
2399 |
+
},
|
2400 |
+
{
|
2401 |
+
"epoch": 80.43,
|
2402 |
+
"eval_cer": 0.3270445969904987,
|
2403 |
+
"eval_loss": 0.8137025833129883,
|
2404 |
+
"eval_runtime": 117.4298,
|
2405 |
+
"eval_samples_per_second": 19.603,
|
2406 |
+
"eval_steps_per_second": 2.453,
|
2407 |
+
"eval_wer": 1.2451277609354698,
|
2408 |
+
"step": 30000
|
2409 |
+
},
|
2410 |
+
{
|
2411 |
+
"epoch": 80.7,
|
2412 |
+
"learning_rate": 2.0441926345609066e-05,
|
2413 |
+
"loss": 1.1314,
|
2414 |
+
"step": 30100
|
2415 |
+
},
|
2416 |
+
{
|
2417 |
+
"epoch": 80.96,
|
2418 |
+
"learning_rate": 2.0158640226628896e-05,
|
2419 |
+
"loss": 1.1315,
|
2420 |
+
"step": 30200
|
2421 |
+
},
|
2422 |
+
{
|
2423 |
+
"epoch": 81.23,
|
2424 |
+
"learning_rate": 1.9875354107648726e-05,
|
2425 |
+
"loss": 1.1535,
|
2426 |
+
"step": 30300
|
2427 |
+
},
|
2428 |
+
{
|
2429 |
+
"epoch": 81.5,
|
2430 |
+
"learning_rate": 1.9592067988668557e-05,
|
2431 |
+
"loss": 1.1567,
|
2432 |
+
"step": 30400
|
2433 |
+
},
|
2434 |
+
{
|
2435 |
+
"epoch": 81.77,
|
2436 |
+
"learning_rate": 1.931161473087819e-05,
|
2437 |
+
"loss": 1.1414,
|
2438 |
+
"step": 30500
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 81.77,
|
2442 |
+
"eval_cer": 0.3120880120016366,
|
2443 |
+
"eval_loss": 0.8117419481277466,
|
2444 |
+
"eval_runtime": 116.2752,
|
2445 |
+
"eval_samples_per_second": 19.798,
|
2446 |
+
"eval_steps_per_second": 2.477,
|
2447 |
+
"eval_wer": 1.2031182330012993,
|
2448 |
+
"step": 30500
|
2449 |
+
},
|
2450 |
+
{
|
2451 |
+
"epoch": 82.04,
|
2452 |
+
"learning_rate": 1.902832861189802e-05,
|
2453 |
+
"loss": 1.0878,
|
2454 |
+
"step": 30600
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 82.31,
|
2458 |
+
"learning_rate": 1.8745042492917846e-05,
|
2459 |
+
"loss": 1.0806,
|
2460 |
+
"step": 30700
|
2461 |
+
},
|
2462 |
+
{
|
2463 |
+
"epoch": 82.57,
|
2464 |
+
"learning_rate": 1.8461756373937677e-05,
|
2465 |
+
"loss": 1.0914,
|
2466 |
+
"step": 30800
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 82.84,
|
2470 |
+
"learning_rate": 1.8178470254957507e-05,
|
2471 |
+
"loss": 1.1274,
|
2472 |
+
"step": 30900
|
2473 |
+
},
|
2474 |
+
{
|
2475 |
+
"epoch": 83.11,
|
2476 |
+
"learning_rate": 1.7895184135977337e-05,
|
2477 |
+
"loss": 1.132,
|
2478 |
+
"step": 31000
|
2479 |
+
},
|
2480 |
+
{
|
2481 |
+
"epoch": 83.11,
|
2482 |
+
"eval_cer": 0.390053189071237,
|
2483 |
+
"eval_loss": 0.823433518409729,
|
2484 |
+
"eval_runtime": 118.2321,
|
2485 |
+
"eval_samples_per_second": 19.47,
|
2486 |
+
"eval_steps_per_second": 2.436,
|
2487 |
+
"eval_wer": 1.4265915980944133,
|
2488 |
+
"step": 31000
|
2489 |
+
},
|
2490 |
+
{
|
2491 |
+
"epoch": 83.38,
|
2492 |
+
"learning_rate": 1.761189801699717e-05,
|
2493 |
+
"loss": 1.0891,
|
2494 |
+
"step": 31100
|
2495 |
+
},
|
2496 |
+
{
|
2497 |
+
"epoch": 83.65,
|
2498 |
+
"learning_rate": 1.7328611898016998e-05,
|
2499 |
+
"loss": 1.1041,
|
2500 |
+
"step": 31200
|
2501 |
+
},
|
2502 |
+
{
|
2503 |
+
"epoch": 83.91,
|
2504 |
+
"learning_rate": 1.7045325779036828e-05,
|
2505 |
+
"loss": 1.0895,
|
2506 |
+
"step": 31300
|
2507 |
+
},
|
2508 |
+
{
|
2509 |
+
"epoch": 84.18,
|
2510 |
+
"learning_rate": 1.6762039660056658e-05,
|
2511 |
+
"loss": 1.0966,
|
2512 |
+
"step": 31400
|
2513 |
+
},
|
2514 |
+
{
|
2515 |
+
"epoch": 84.45,
|
2516 |
+
"learning_rate": 1.647875354107649e-05,
|
2517 |
+
"loss": 1.0982,
|
2518 |
+
"step": 31500
|
2519 |
+
},
|
2520 |
+
{
|
2521 |
+
"epoch": 84.45,
|
2522 |
+
"eval_cer": 0.3606855480292767,
|
2523 |
+
"eval_loss": 0.8063952326774597,
|
2524 |
+
"eval_runtime": 119.0446,
|
2525 |
+
"eval_samples_per_second": 19.337,
|
2526 |
+
"eval_steps_per_second": 2.419,
|
2527 |
+
"eval_wer": 1.3711563447379818,
|
2528 |
+
"step": 31500
|
2529 |
+
},
|
2530 |
+
{
|
2531 |
+
"epoch": 84.72,
|
2532 |
+
"learning_rate": 1.619546742209632e-05,
|
2533 |
+
"loss": 1.0969,
|
2534 |
+
"step": 31600
|
2535 |
+
},
|
2536 |
+
{
|
2537 |
+
"epoch": 84.99,
|
2538 |
+
"learning_rate": 1.5912181303116146e-05,
|
2539 |
+
"loss": 1.0552,
|
2540 |
+
"step": 31700
|
2541 |
+
},
|
2542 |
+
{
|
2543 |
+
"epoch": 85.25,
|
2544 |
+
"learning_rate": 1.562889518413598e-05,
|
2545 |
+
"loss": 1.079,
|
2546 |
+
"step": 31800
|
2547 |
+
},
|
2548 |
+
{
|
2549 |
+
"epoch": 85.52,
|
2550 |
+
"learning_rate": 1.534560906515581e-05,
|
2551 |
+
"loss": 1.0959,
|
2552 |
+
"step": 31900
|
2553 |
+
},
|
2554 |
+
{
|
2555 |
+
"epoch": 85.79,
|
2556 |
+
"learning_rate": 1.5062322946175638e-05,
|
2557 |
+
"loss": 1.0797,
|
2558 |
+
"step": 32000
|
2559 |
+
},
|
2560 |
+
{
|
2561 |
+
"epoch": 85.79,
|
2562 |
+
"eval_cer": 0.356184934309224,
|
2563 |
+
"eval_loss": 0.8166823983192444,
|
2564 |
+
"eval_runtime": 117.1158,
|
2565 |
+
"eval_samples_per_second": 19.656,
|
2566 |
+
"eval_steps_per_second": 2.459,
|
2567 |
+
"eval_wer": 1.335643135556518,
|
2568 |
+
"step": 32000
|
2569 |
+
},
|
2570 |
+
{
|
2571 |
+
"epoch": 86.06,
|
2572 |
+
"learning_rate": 1.477903682719547e-05,
|
2573 |
+
"loss": 1.1358,
|
2574 |
+
"step": 32100
|
2575 |
+
},
|
2576 |
+
{
|
2577 |
+
"epoch": 86.33,
|
2578 |
+
"learning_rate": 1.4495750708215297e-05,
|
2579 |
+
"loss": 1.0718,
|
2580 |
+
"step": 32200
|
2581 |
+
},
|
2582 |
+
{
|
2583 |
+
"epoch": 86.59,
|
2584 |
+
"learning_rate": 1.4212464589235127e-05,
|
2585 |
+
"loss": 1.0725,
|
2586 |
+
"step": 32300
|
2587 |
+
},
|
2588 |
+
{
|
2589 |
+
"epoch": 86.86,
|
2590 |
+
"learning_rate": 1.3929178470254959e-05,
|
2591 |
+
"loss": 1.0705,
|
2592 |
+
"step": 32400
|
2593 |
+
},
|
2594 |
+
{
|
2595 |
+
"epoch": 87.13,
|
2596 |
+
"learning_rate": 1.364589235127479e-05,
|
2597 |
+
"loss": 1.0119,
|
2598 |
+
"step": 32500
|
2599 |
+
},
|
2600 |
+
{
|
2601 |
+
"epoch": 87.13,
|
2602 |
+
"eval_cer": 0.3267718325226167,
|
2603 |
+
"eval_loss": 0.8214733600616455,
|
2604 |
+
"eval_runtime": 118.8447,
|
2605 |
+
"eval_samples_per_second": 19.37,
|
2606 |
+
"eval_steps_per_second": 2.423,
|
2607 |
+
"eval_wer": 1.2754439151147683,
|
2608 |
+
"step": 32500
|
2609 |
+
},
|
2610 |
+
{
|
2611 |
+
"epoch": 87.4,
|
2612 |
+
"learning_rate": 1.336260623229462e-05,
|
2613 |
+
"loss": 1.0571,
|
2614 |
+
"step": 32600
|
2615 |
+
},
|
2616 |
+
{
|
2617 |
+
"epoch": 87.67,
|
2618 |
+
"learning_rate": 1.3079320113314446e-05,
|
2619 |
+
"loss": 1.0636,
|
2620 |
+
"step": 32700
|
2621 |
+
},
|
2622 |
+
{
|
2623 |
+
"epoch": 87.93,
|
2624 |
+
"learning_rate": 1.2796033994334278e-05,
|
2625 |
+
"loss": 1.0892,
|
2626 |
+
"step": 32800
|
2627 |
+
},
|
2628 |
+
{
|
2629 |
+
"epoch": 88.2,
|
2630 |
+
"learning_rate": 1.2512747875354109e-05,
|
2631 |
+
"loss": 1.1356,
|
2632 |
+
"step": 32900
|
2633 |
+
},
|
2634 |
+
{
|
2635 |
+
"epoch": 88.47,
|
2636 |
+
"learning_rate": 1.2229461756373939e-05,
|
2637 |
+
"loss": 1.0216,
|
2638 |
+
"step": 33000
|
2639 |
+
},
|
2640 |
+
{
|
2641 |
+
"epoch": 88.47,
|
2642 |
+
"eval_cer": 0.3183615947629222,
|
2643 |
+
"eval_loss": 0.816307008266449,
|
2644 |
+
"eval_runtime": 118.8553,
|
2645 |
+
"eval_samples_per_second": 19.368,
|
2646 |
+
"eval_steps_per_second": 2.423,
|
2647 |
+
"eval_wer": 1.2511909917713295,
|
2648 |
+
"step": 33000
|
2649 |
+
},
|
2650 |
+
{
|
2651 |
+
"epoch": 88.74,
|
2652 |
+
"learning_rate": 1.1946175637393768e-05,
|
2653 |
+
"loss": 1.0226,
|
2654 |
+
"step": 33100
|
2655 |
+
},
|
2656 |
+
{
|
2657 |
+
"epoch": 89.01,
|
2658 |
+
"learning_rate": 1.1662889518413598e-05,
|
2659 |
+
"loss": 1.0002,
|
2660 |
+
"step": 33200
|
2661 |
+
},
|
2662 |
+
{
|
2663 |
+
"epoch": 89.28,
|
2664 |
+
"learning_rate": 1.1379603399433428e-05,
|
2665 |
+
"loss": 1.0296,
|
2666 |
+
"step": 33300
|
2667 |
+
},
|
2668 |
+
{
|
2669 |
+
"epoch": 89.54,
|
2670 |
+
"learning_rate": 1.1096317280453258e-05,
|
2671 |
+
"loss": 1.0635,
|
2672 |
+
"step": 33400
|
2673 |
+
},
|
2674 |
+
{
|
2675 |
+
"epoch": 89.81,
|
2676 |
+
"learning_rate": 1.0813031161473089e-05,
|
2677 |
+
"loss": 1.0375,
|
2678 |
+
"step": 33500
|
2679 |
+
},
|
2680 |
+
{
|
2681 |
+
"epoch": 89.81,
|
2682 |
+
"eval_cer": 0.3290448697549666,
|
2683 |
+
"eval_loss": 0.8136931657791138,
|
2684 |
+
"eval_runtime": 119.3202,
|
2685 |
+
"eval_samples_per_second": 19.293,
|
2686 |
+
"eval_steps_per_second": 2.414,
|
2687 |
+
"eval_wer": 1.2685145084452143,
|
2688 |
+
"step": 33500
|
2689 |
+
},
|
2690 |
+
{
|
2691 |
+
"epoch": 90.08,
|
2692 |
+
"learning_rate": 1.0529745042492919e-05,
|
2693 |
+
"loss": 1.096,
|
2694 |
+
"step": 33600
|
2695 |
+
},
|
2696 |
+
{
|
2697 |
+
"epoch": 90.35,
|
2698 |
+
"learning_rate": 1.0246458923512749e-05,
|
2699 |
+
"loss": 1.0465,
|
2700 |
+
"step": 33700
|
2701 |
+
},
|
2702 |
+
{
|
2703 |
+
"epoch": 90.62,
|
2704 |
+
"learning_rate": 9.963172804532578e-06,
|
2705 |
+
"loss": 1.0637,
|
2706 |
+
"step": 33800
|
2707 |
+
},
|
2708 |
+
{
|
2709 |
+
"epoch": 90.88,
|
2710 |
+
"learning_rate": 9.679886685552408e-06,
|
2711 |
+
"loss": 1.0514,
|
2712 |
+
"step": 33900
|
2713 |
+
},
|
2714 |
+
{
|
2715 |
+
"epoch": 91.15,
|
2716 |
+
"learning_rate": 9.396600566572238e-06,
|
2717 |
+
"loss": 0.9794,
|
2718 |
+
"step": 34000
|
2719 |
+
},
|
2720 |
+
{
|
2721 |
+
"epoch": 91.15,
|
2722 |
+
"eval_cer": 0.32549893167250077,
|
2723 |
+
"eval_loss": 0.8219542503356934,
|
2724 |
+
"eval_runtime": 117.5416,
|
2725 |
+
"eval_samples_per_second": 19.585,
|
2726 |
+
"eval_steps_per_second": 2.45,
|
2727 |
+
"eval_wer": 1.2724122996968386,
|
2728 |
+
"step": 34000
|
2729 |
+
},
|
2730 |
+
{
|
2731 |
+
"epoch": 91.42,
|
2732 |
+
"learning_rate": 9.113314447592068e-06,
|
2733 |
+
"loss": 1.0268,
|
2734 |
+
"step": 34100
|
2735 |
+
},
|
2736 |
+
{
|
2737 |
+
"epoch": 91.69,
|
2738 |
+
"learning_rate": 8.830028328611899e-06,
|
2739 |
+
"loss": 1.0211,
|
2740 |
+
"step": 34200
|
2741 |
+
},
|
2742 |
+
{
|
2743 |
+
"epoch": 91.96,
|
2744 |
+
"learning_rate": 8.546742209631727e-06,
|
2745 |
+
"loss": 1.0557,
|
2746 |
+
"step": 34300
|
2747 |
+
},
|
2748 |
+
{
|
2749 |
+
"epoch": 92.22,
|
2750 |
+
"learning_rate": 8.26345609065156e-06,
|
2751 |
+
"loss": 1.0814,
|
2752 |
+
"step": 34400
|
2753 |
+
},
|
2754 |
+
{
|
2755 |
+
"epoch": 92.49,
|
2756 |
+
"learning_rate": 7.98016997167139e-06,
|
2757 |
+
"loss": 1.0207,
|
2758 |
+
"step": 34500
|
2759 |
+
},
|
2760 |
+
{
|
2761 |
+
"epoch": 92.49,
|
2762 |
+
"eval_cer": 0.33609128517525116,
|
2763 |
+
"eval_loss": 0.8165063261985779,
|
2764 |
+
"eval_runtime": 117.4538,
|
2765 |
+
"eval_samples_per_second": 19.599,
|
2766 |
+
"eval_steps_per_second": 2.452,
|
2767 |
+
"eval_wer": 1.2906019922044174,
|
2768 |
+
"step": 34500
|
2769 |
+
},
|
2770 |
+
{
|
2771 |
+
"epoch": 92.76,
|
2772 |
+
"learning_rate": 7.696883852691218e-06,
|
2773 |
+
"loss": 1.0183,
|
2774 |
+
"step": 34600
|
2775 |
+
},
|
2776 |
+
{
|
2777 |
+
"epoch": 93.03,
|
2778 |
+
"learning_rate": 7.413597733711048e-06,
|
2779 |
+
"loss": 0.9889,
|
2780 |
+
"step": 34700
|
2781 |
+
},
|
2782 |
+
{
|
2783 |
+
"epoch": 93.3,
|
2784 |
+
"learning_rate": 7.130311614730878e-06,
|
2785 |
+
"loss": 1.0047,
|
2786 |
+
"step": 34800
|
2787 |
+
},
|
2788 |
+
{
|
2789 |
+
"epoch": 93.56,
|
2790 |
+
"learning_rate": 6.847025495750709e-06,
|
2791 |
+
"loss": 1.0318,
|
2792 |
+
"step": 34900
|
2793 |
+
},
|
2794 |
+
{
|
2795 |
+
"epoch": 93.83,
|
2796 |
+
"learning_rate": 6.563739376770539e-06,
|
2797 |
+
"loss": 1.0169,
|
2798 |
+
"step": 35000
|
2799 |
+
},
|
2800 |
+
{
|
2801 |
+
"epoch": 93.83,
|
2802 |
+
"eval_cer": 0.3305450743283175,
|
2803 |
+
"eval_loss": 0.8153378367424011,
|
2804 |
+
"eval_runtime": 118.1425,
|
2805 |
+
"eval_samples_per_second": 19.485,
|
2806 |
+
"eval_steps_per_second": 2.438,
|
2807 |
+
"eval_wer": 1.281940233867475,
|
2808 |
+
"step": 35000
|
2809 |
+
},
|
2810 |
+
{
|
2811 |
+
"epoch": 94.1,
|
2812 |
+
"learning_rate": 6.2804532577903686e-06,
|
2813 |
+
"loss": 1.0584,
|
2814 |
+
"step": 35100
|
2815 |
+
},
|
2816 |
+
{
|
2817 |
+
"epoch": 94.37,
|
2818 |
+
"learning_rate": 5.997167138810199e-06,
|
2819 |
+
"loss": 1.0074,
|
2820 |
+
"step": 35200
|
2821 |
+
},
|
2822 |
+
{
|
2823 |
+
"epoch": 94.64,
|
2824 |
+
"learning_rate": 5.71671388101983e-06,
|
2825 |
+
"loss": 1.0278,
|
2826 |
+
"step": 35300
|
2827 |
+
},
|
2828 |
+
{
|
2829 |
+
"epoch": 94.9,
|
2830 |
+
"learning_rate": 5.433427762039661e-06,
|
2831 |
+
"loss": 1.0251,
|
2832 |
+
"step": 35400
|
2833 |
+
},
|
2834 |
+
{
|
2835 |
+
"epoch": 95.17,
|
2836 |
+
"learning_rate": 5.15014164305949e-06,
|
2837 |
+
"loss": 1.0127,
|
2838 |
+
"step": 35500
|
2839 |
+
},
|
2840 |
+
{
|
2841 |
+
"epoch": 95.17,
|
2842 |
+
"eval_cer": 0.3251807064599718,
|
2843 |
+
"eval_loss": 0.8187472224235535,
|
2844 |
+
"eval_runtime": 117.413,
|
2845 |
+
"eval_samples_per_second": 19.606,
|
2846 |
+
"eval_steps_per_second": 2.453,
|
2847 |
+
"eval_wer": 1.2832394976180164,
|
2848 |
+
"step": 35500
|
2849 |
+
},
|
2850 |
+
{
|
2851 |
+
"epoch": 95.44,
|
2852 |
+
"learning_rate": 4.86685552407932e-06,
|
2853 |
+
"loss": 1.0153,
|
2854 |
+
"step": 35600
|
2855 |
+
},
|
2856 |
+
{
|
2857 |
+
"epoch": 95.71,
|
2858 |
+
"learning_rate": 4.58356940509915e-06,
|
2859 |
+
"loss": 1.0098,
|
2860 |
+
"step": 35700
|
2861 |
+
},
|
2862 |
+
{
|
2863 |
+
"epoch": 95.98,
|
2864 |
+
"learning_rate": 4.300283286118981e-06,
|
2865 |
+
"loss": 1.0034,
|
2866 |
+
"step": 35800
|
2867 |
+
},
|
2868 |
+
{
|
2869 |
+
"epoch": 96.25,
|
2870 |
+
"learning_rate": 4.01699716713881e-06,
|
2871 |
+
"loss": 1.017,
|
2872 |
+
"step": 35900
|
2873 |
+
},
|
2874 |
+
{
|
2875 |
+
"epoch": 96.51,
|
2876 |
+
"learning_rate": 3.7337110481586406e-06,
|
2877 |
+
"loss": 0.9978,
|
2878 |
+
"step": 36000
|
2879 |
+
},
|
2880 |
+
{
|
2881 |
+
"epoch": 96.51,
|
2882 |
+
"eval_cer": 0.3209528572078011,
|
2883 |
+
"eval_loss": 0.811066746711731,
|
2884 |
+
"eval_runtime": 117.24,
|
2885 |
+
"eval_samples_per_second": 19.635,
|
2886 |
+
"eval_steps_per_second": 2.456,
|
2887 |
+
"eval_wer": 1.2611520138588133,
|
2888 |
+
"step": 36000
|
2889 |
+
},
|
2890 |
+
{
|
2891 |
+
"epoch": 96.78,
|
2892 |
+
"learning_rate": 3.4504249291784704e-06,
|
2893 |
+
"loss": 1.0111,
|
2894 |
+
"step": 36100
|
2895 |
+
},
|
2896 |
+
{
|
2897 |
+
"epoch": 97.05,
|
2898 |
+
"learning_rate": 3.1671388101983003e-06,
|
2899 |
+
"loss": 1.0185,
|
2900 |
+
"step": 36200
|
2901 |
+
},
|
2902 |
+
{
|
2903 |
+
"epoch": 97.32,
|
2904 |
+
"learning_rate": 2.8838526912181305e-06,
|
2905 |
+
"loss": 0.9678,
|
2906 |
+
"step": 36300
|
2907 |
+
},
|
2908 |
+
{
|
2909 |
+
"epoch": 97.59,
|
2910 |
+
"learning_rate": 2.6005665722379608e-06,
|
2911 |
+
"loss": 1.0271,
|
2912 |
+
"step": 36400
|
2913 |
+
},
|
2914 |
+
{
|
2915 |
+
"epoch": 97.85,
|
2916 |
+
"learning_rate": 2.3172804532577906e-06,
|
2917 |
+
"loss": 0.9923,
|
2918 |
+
"step": 36500
|
2919 |
+
},
|
2920 |
+
{
|
2921 |
+
"epoch": 97.85,
|
2922 |
+
"eval_cer": 0.3122243942355776,
|
2923 |
+
"eval_loss": 0.8076378703117371,
|
2924 |
+
"eval_runtime": 116.5522,
|
2925 |
+
"eval_samples_per_second": 19.751,
|
2926 |
+
"eval_steps_per_second": 2.471,
|
2927 |
+
"eval_wer": 1.227804244261585,
|
2928 |
+
"step": 36500
|
2929 |
+
},
|
2930 |
+
{
|
2931 |
+
"epoch": 98.12,
|
2932 |
+
"learning_rate": 2.0339943342776205e-06,
|
2933 |
+
"loss": 0.9955,
|
2934 |
+
"step": 36600
|
2935 |
+
},
|
2936 |
+
{
|
2937 |
+
"epoch": 98.39,
|
2938 |
+
"learning_rate": 1.7507082152974505e-06,
|
2939 |
+
"loss": 1.0009,
|
2940 |
+
"step": 36700
|
2941 |
+
},
|
2942 |
+
{
|
2943 |
+
"epoch": 98.66,
|
2944 |
+
"learning_rate": 1.4674220963172806e-06,
|
2945 |
+
"loss": 0.9855,
|
2946 |
+
"step": 36800
|
2947 |
+
},
|
2948 |
+
{
|
2949 |
+
"epoch": 98.93,
|
2950 |
+
"learning_rate": 1.1869688385269122e-06,
|
2951 |
+
"loss": 1.0056,
|
2952 |
+
"step": 36900
|
2953 |
+
},
|
2954 |
+
{
|
2955 |
+
"epoch": 99.2,
|
2956 |
+
"learning_rate": 9.036827195467423e-07,
|
2957 |
+
"loss": 1.0451,
|
2958 |
+
"step": 37000
|
2959 |
+
},
|
2960 |
+
{
|
2961 |
+
"epoch": 99.2,
|
2962 |
+
"eval_cer": 0.3155884893394554,
|
2963 |
+
"eval_loss": 0.8086187243461609,
|
2964 |
+
"eval_runtime": 119.7199,
|
2965 |
+
"eval_samples_per_second": 19.228,
|
2966 |
+
"eval_steps_per_second": 2.406,
|
2967 |
+
"eval_wer": 1.2451277609354698,
|
2968 |
+
"step": 37000
|
2969 |
+
},
|
2970 |
+
{
|
2971 |
+
"epoch": 99.46,
|
2972 |
+
"learning_rate": 6.203966005665723e-07,
|
2973 |
+
"loss": 1.0189,
|
2974 |
+
"step": 37100
|
2975 |
+
},
|
2976 |
+
{
|
2977 |
+
"epoch": 99.73,
|
2978 |
+
"learning_rate": 3.371104815864023e-07,
|
2979 |
+
"loss": 0.9878,
|
2980 |
+
"step": 37200
|
2981 |
+
},
|
2982 |
+
{
|
2983 |
+
"epoch": 100.0,
|
2984 |
+
"learning_rate": 5.3824362606232296e-08,
|
2985 |
+
"loss": 0.96,
|
2986 |
+
"step": 37300
|
2987 |
+
},
|
2988 |
+
{
|
2989 |
+
"epoch": 100.0,
|
2990 |
+
"step": 37300,
|
2991 |
+
"total_flos": 1.39626030021533e+20,
|
2992 |
+
"train_loss": 3.7852419735087786,
|
2993 |
+
"train_runtime": 72640.1075,
|
2994 |
+
"train_samples_per_second": 16.45,
|
2995 |
+
"train_steps_per_second": 0.513
|
2996 |
+
}
|
2997 |
+
],
|
2998 |
+
"max_steps": 37300,
|
2999 |
+
"num_train_epochs": 100,
|
3000 |
+
"total_flos": 1.39626030021533e+20,
|
3001 |
+
"trial_name": null,
|
3002 |
+
"trial_params": null
|
3003 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:472e2ebcb99d59b6b693f009ff1df20cb7c55629d4fab148f61d3dc117b7c960
|
3 |
+
size 2991
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"a": 1, "b": 2, "c": 3, "d": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "l": 10, "m": 11, "n": 12, "p": 13, "q": 14, "r": 15, "s": 16, "t": 17, "u": 18, "v": 19, "w": 20, "x": 21, "y": 22, "~": 23, "⋯": 24, "⠀": 25, "⻣": 26, "ㄧ": 27, "㗎": 28, "㩒": 29, "㩿": 30, "㪐": 31, "䒏": 32, "䒐": 33, "䰧": 34, "一": 35, "丁": 36, "七": 37, "丈": 38, "三": 39, "上": 40, "下": 41, "不": 42, "丑": 43, "且": 44, "丕": 45, "世": 46, "丘": 47, "丙": 48, "丟": 49, "両": 50, "並": 51, "丫": 52, "中": 53, "丰": 54, "串": 55, "丶": 56, "丸": 57, "丹": 58, "主": 59, "丼": 60, "乃": 61, "久": 62, "义": 63, "之": 64, "乍": 65, "乎": 66, "乏": 67, "乒": 68, "乓": 69, "乖": 70, "乘": 71, "乙": 72, "乜": 73, "九": 74, "乞": 75, "也": 76, "乳": 77, "乸": 78, "乾": 79, "亂": 80, "了": 81, "予": 82, "事": 83, "二": 84, "于": 85, "互": 86, "五": 87, "井": 88, "些": 89, "亞": 90, "亡": 91, "亢": 92, "交": 93, "亦": 94, "亨": 95, "享": 96, "京": 97, "亭": 98, "亮": 99, "人": 100, "什": 101, "仁": 102, "仆": 103, "仇": 104, "今": 105, "介": 106, "仍": 107, "仔": 108, "仕": 109, "他": 110, "仗": 111, "付": 112, "仙": 113, "仞": 114, "代": 115, "令": 116, "以": 117, "仰": 118, "仲": 119, "件": 120, "任": 121, "份": 122, "仿": 123, "企": 124, "伊": 125, "伏": 126, "伐": 127, "休": 128, "伙": 129, "伯": 130, "估": 131, "伴": 132, "伶": 133, "伸": 134, "似": 135, "伽": 136, "佃": 137, "但": 138, "佈": 139, "位": 140, "低": 141, "住": 142, "佐": 143, "佑": 144, "佔": 145, "何": 146, "佗": 147, "余": 148, "佚": 149, "佛": 150, "作": 151, "你": 152, "佢": 153, "佣": 154, "佩": 155, "佬": 156, "佳": 157, "併": 158, "佻": 159, "使": 160, "侄": 161, "來": 162, "例": 163, "侍": 164, "供": 165, "依": 166, "侮": 167, "侯": 168, "侵": 169, "侶": 170, "便": 171, "係": 172, "促": 173, "俄": 174, "俊": 175, "俎": 176, "俏": 177, "俐": 178, "俗": 179, "俚": 180, "保": 181, "俠": 182, "信": 183, "俬": 184, "修": 185, "俸": 186, "俾": 187, "倉": 188, "個": 189, "倍": 190, "們": 191, "倒": 192, "候": 193, "倚": 194, "借": 195, "倦": 196, "倫": 197, "值": 198, "假": 199, "偈": 200, "偉": 201, "偏": 202, "偕": 203, "做": 204, "停": 205, "健": 206, "側": 207, "偶": 208, "偷": 209, "偽": 210, "傅": 211, "傍": 212, "傑": 213, "傘": 214, "備": 215, "傢": 216, "催": 217, "傭": 218, "傲": 219, "傳": 220, "債": 221, "傷": 222, "傻": 223, "傾": 224, "僅": 225, "像": 226, "僑": 227, "僕": 228, "僭": 229, "僱": 230, "價": 231, "僻": 232, "儀": 233, "億": 234, "儈": 235, "儍": 236, "儒": 237, "儘": 238, "優": 239, "儲": 240, "允": 241, "元": 242, "兄": 243, "充": 244, "兆": 245, "兇": 246, "先": 247, "光": 248, "克": 249, "兌": 250, "免": 251, "兒": 252, "兔": 253, "兜": 254, "入": 255, "內": 256, "全": 257, "兩": 258, "八": 259, "公": 260, "六": 261, "兮": 262, "共": 263, "兵": 264, "其": 265, "具": 266, "典": 267, "兼": 268, "内": 269, "冇": 270, "冊": 271, "再": 272, "冒": 273, "冕": 274, "冗": 275, "冚": 276, "冠": 277, "冤": 278, "冧": 279, "冬": 280, "冰": 281, "冷": 282, "准": 283, "凈": 284, "凌": 285, "凍": 286, "凝": 287, "凡": 288, "凰": 289, "凱": 290, "凳": 291, "凶": 292, "凹": 293, "出": 294, "函": 295, "刀": 296, "刁": 297, "刃": 298, "分": 299, "切": 300, "刑": 301, "划": 302, "列": 303, "初": 304, "判": 305, "別": 306, "刨": 307, "利": 308, "刮": 309, "到": 310, "制": 311, "刷": 312, "券": 313, "刺": 314, "刻": 315, "則": 316, "前": 317, "剎": 318, "剔": 319, "剛": 320, "剝": 321, "剩": 322, "剪": 323, "副": 324, "割": 325, "創": 326, "剷": 327, "劃": 328, "劇": 329, "劉": 330, "劊": 331, "劍": 332, "劑": 333, "劖": 334, "力": 335, "功": 336, "加": 337, "劣": 338, "助": 339, "努": 340, "勁": 341, "勃": 342, "勇": 343, "勉": 344, "勒": 345, "動": 346, "勘": 347, "務": 348, "勝": 349, "勞": 350, "勢": 351, "勤": 352, "勳": 353, "勵": 354, "勸": 355, "勻": 356, "勾": 357, "勿": 358, "包": 359, "匈": 360, "化": 361, "北": 362, "匙": 363, "匡": 364, "匯": 365, "匹": 366, "匿": 367, "區": 368, "十": 369, "千": 370, "升": 371, "午": 372, "半": 373, "卑": 374, "卒": 375, "卓": 376, "協": 377, "南": 378, "博": 379, "卜": 380, "卡": 381, "卦": 382, "卧": 383, "印": 384, "危": 385, "即": 386, "卵": 387, "卷": 388, "卸": 389, "卻": 390, "卿": 391, "厄": 392, "厘": 393, "厚": 394, "原": 395, "厥": 396, "厭": 397, "厲": 398, "厴": 399, "去": 400, "參": 401, "又": 402, "叉": 403, "及": 404, "友": 405, "反": 406, "叔": 407, "取": 408, "受": 409, "叛": 410, "叢": 411, "口": 412, "古": 413, "句": 414, "另": 415, "叨": 416, "只": 417, "叫": 418, "召": 419, "叭": 420, "叮": 421, "可": 422, "台": 423, "史": 424, "右": 425, "司": 426, "叻": 427, "吃": 428, "各": 429, "合": 430, "吉": 431, "吊": 432, "吋": 433, "同": 434, "名": 435, "后": 436, "吐": 437, "向": 438, "���": 439, "吖": 440, "君": 441, "吝": 442, "吞": 443, "吟": 444, "吠": 445, "否": 446, "吧": 447, "吩": 448, "含": 449, "吱": 450, "吳": 451, "吵": 452, "吶": 453, "吸": 454, "吹": 455, "吻": 456, "吼": 457, "吽": 458, "吾": 459, "呀": 460, "呂": 461, "呃": 462, "呆": 463, "呈": 464, "告": 465, "呎": 466, "呔": 467, "呢": 468, "周": 469, "呱": 470, "味": 471, "呷": 472, "呻": 473, "呼": 474, "命": 475, "咀": 476, "咁": 477, "咄": 478, "咇": 479, "咋": 480, "和": 481, "咐": 482, "咕": 483, "咖": 484, "咗": 485, "咦": 486, "咧": 487, "咩": 488, "咪": 489, "咬": 490, "咯": 491, "咳": 492, "咸": 493, "咽": 494, "咿": 495, "哀": 496, "品": 497, "哂": 498, "哄": 499, "哇": 500, "哈": 501, "哉": 502, "哋": 503, "响": 504, "哎": 505, "員": 506, "哣": 507, "哥": 508, "哦": 509, "哨": 510, "哩": 511, "哪": 512, "哭": 513, "哲": 514, "哺": 515, "哼": 516, "唇": 517, "唈": 518, "唉": 519, "唎": 520, "唏": 521, "唐": 522, "唔": 523, "唞": 524, "唥": 525, "唧": 526, "唪": 527, "售": 528, "唯": 529, "唱": 530, "唸": 531, "啄": 532, "啅": 533, "商": 534, "啊": 535, "啋": 536, "問": 537, "啕": 538, "啖": 539, "啜": 540, "啞": 541, "啟": 542, "啡": 543, "啤": 544, "啦": 545, "啩": 546, "啪": 547, "啫": 548, "啱": 549, "啲": 550, "啵": 551, "喀": 552, "喂": 553, "喃": 554, "善": 555, "喇": 556, "喉": 557, "喊": 558, "喎": 559, "喐": 560, "喔": 561, "喙": 562, "喚": 563, "喜": 564, "喝": 565, "喪": 566, "喫": 567, "喬": 568, "單": 569, "喱": 570, "喳": 571, "喺": 572, "喻": 573, "喼": 574, "嗅": 575, "嗇": 576, "嗌": 577, "嗎": 578, "嗒": 579, "嗚": 580, "嗜": 581, "嗡": 582, "嗤": 583, "嗦": 584, "嗰": 585, "嗱": 586, "嗲": 587, "嗶": 588, "嗷": 589, "嗽": 590, "嘅": 591, "嘆": 592, "嘈": 593, "嘉": 594, "嘔": 595, "嘗": 596, "嘛": 597, "嘜": 598, "嘞": 599, "嘟": 600, "嘢": 601, "嘥": 602, "嘩": 603, "嘲": 604, "嘴": 605, "嘸": 606, "噁": 607, "噃": 608, "噄": 609, "噉": 610, "噌": 611, "噎": 612, "噏": 613, "噓": 614, "噚": 615, "噤": 616, "器": 617, "噪": 618, "噬": 619, "噴": 620, "噶": 621, "噹": 622, "嚇": 623, "嚎": 624, "嚐": 625, "嚕": 626, "嚟": 627, "嚡": 628, "嚢": 629, "嚥": 630, "嚨": 631, "嚴": 632, "嚷": 633, "嚼": 634, "嚿": 635, "囉": 636, "囊": 637, "囌": 638, "囍": 639, "囑": 640, "囚": 641, "四": 642, "囝": 643, "回": 644, "因": 645, "囡": 646, "囪": 647, "困": 648, "固": 649, "圃": 650, "圈": 651, "國": 652, "圍": 653, "圑": 654, "園": 655, "圓": 656, "圖": 657, "團": 658, "土": 659, "在": 660, "圭": 661, "地": 662, "圳": 663, "圾": 664, "址": 665, "均": 666, "坊": 667, "坎": 668, "坐": 669, "坑": 670, "坡": 671, "坤": 672, "坦": 673, "坪": 674, "坭": 675, "坳": 676, "垂": 677, "垃": 678, "型": 679, "垢": 680, "埃": 681, "埋": 682, "城": 683, "埔": 684, "埗": 685, "埞": 686, "域": 687, "埠": 688, "埲": 689, "執": 690, "培": 691, "基": 692, "堂": 693, "堅": 694, "堆": 695, "堡": 696, "堤": 697, "堪": 698, "報": 699, "場": 700, "堵": 701, "塊": 702, "塑": 703, "塔": 704, "塗": 705, "塘": 706, "塞": 707, "塢": 708, "填": 709, "塱": 710, "塵": 711, "塾": 712, "境": 713, "墅": 714, "墊": 715, "墓": 716, "墜": 717, "增": 718, "墟": 719, "墨": 720, "墩": 721, "墮": 722, "墳": 723, "壁": 724, "壆": 725, "壇": 726, "壓": 727, "壘": 728, "壞": 729, "壟": 730, "壩": 731, "士": 732, "壯": 733, "壹": 734, "壺": 735, "壽": 736, "夏": 737, "夕": 738, "外": 739, "多": 740, "夜": 741, "夠": 742, "夢": 743, "夥": 744, "大": 745, "天": 746, "太": 747, "夫": 748, "央": 749, "失": 750, "夷": 751, "夾": 752, "奀": 753, "奄": 754, "奇": 755, "奈": 756, "奉": 757, "奏": 758, "契": 759, "奔": 760, "奕": 761, "套": 762, "奚": 763, "奧": 764, "奪": 765, "奮": 766, "女": 767, "奴": 768, "奶": 769, "奸": 770, "她": 771, "好": 772, "如": 773, "妄": 774, "妒": 775, "妓": 776, "妙": 777, "妝": 778, "妥": 779, "妨": 780, "妳": 781, "妹": 782, "妻": 783, "姆": 784, "姊": 785, "始": 786, "姐": 787, "姑": 788, "姓": 789, "委": 790, "姣": 791, "姦": 792, "姨": 793, "姬": 794, "姻": 795, "姿": 796, "威": 797, "娃": 798, "娘": 799, "娛": 800, "娜": 801, "娥": 802, "娶": 803, "婆": 804, "婚": 805, "婦": 806, "媒": 807, "媽": 808, "媾": 809, "嫁": 810, "嫂": 811, "嫉": 812, "嫌": 813, "嫩": 814, "嫪": 815, "嫲": 816, "嫻": 817, "嬉": 818, "嬲": 819, "嬸": 820, "子": 821, "孔": 822, "孖": 823, "字": 824, "存": 825, "孚": 826, "孝": 827, "孟": 828, "季": 829, "孤": 830, "孥": 831, "孩": 832, "孫": 833, "孭": 834, "孰": 835, "孱": 836, "學": 837, "孽": 838, "它": 839, "宅": 840, "宇": 841, "守": 842, "安": 843, "宋": 844, "完": 845, "宏": 846, "宗": 847, "官": 848, "宙": 849, "定": 850, "宛": 851, "宜": 852, "客": 853, "宣": 854, "室": 855, "宮": 856, "宰": 857, "害": 858, "宴": 859, "宵": 860, "家": 861, "宸": 862, "容": 863, "宿": 864, "寂": 865, "寃": 866, "寄": 867, "寅": 868, "密": 869, "寇": 870, "富": 871, "寒": 872, "寓": 873, "寞": 874, "察": 875, "寡": 876, "寢": 877, "實": 878, "寧": 879, "寨": 880, "審": 881, "寫": 882, "寬": 883, "寮": 884, "寶": 885, "寸": 886, "寺": 887, "封": 888, "射": 889, "將": 890, "專": 891, "尊": 892, "尋": 893, "對": 894, "導": 895, "小": 896, "少": 897, "尖": 898, "尚": 899, "尤": 900, "尬": 901, "就": 902, "尷": 903, "尺": 904, "尼": 905, "尾": 906, "尿": 907, "局": 908, "屁": 909, "居": 910, "屆": 911, "屈": 912, "屋": 913, "屌": 914, "屍": 915, "屎": 916, "屏": 917, "屑": 918, "展": 919, "屙": 920, "屠": 921, "層": 922, "履": 923, "屬": 924, "屯": 925, "山": 926, "屹": 927, "岀": 928, "岡": 929, "岩": 930, "岬": 931, "岳": 932, "岸": 933, "峒": 934, "峯": 935, "峰": 936, "島": 937, "峻": 938, "峽": 939, "崆": 940, "崇": 941, "崗": 942, "崙": 943, "崧": 944, "崩": 945, "嵌": 946, "嶄": 947, "嶙": 948, "嶺": 949, "嶼": 950, "巉": 951, "巒": 952, "川": 953, "州": 954, "巡": 955, "巢": 956, "工": 957, "左": 958, "巧": 959, "巨": 960, "巫": 961, "差": 962, "己": 963, "已": 964, "巴": 965, "巷": 966, "巾": 967, "市": 968, "布": 969, "帆": 970, "希": 971, "帖": 972, "帚": 973, "帝": 974, "帥": 975, "師": 976, "席": 977, "帳": 978, "帶": 979, "常": 980, "帽": 981, "幅": 982, "幕": 983, "幡": 984, "幢": 985, "幣": 986, "幫": 987, "干": 988, "平": 989, "年": 990, "幸": 991, "幹": 992, "幻": 993, "幼": 994, "幽": 995, "幾": 996, "庇": 997, "床": 998, "序": 999, "底": 1000, "店": 1001, "庚": 1002, "府": 1003, "度": 1004, "座": 1005, "庫": 1006, "庭": 1007, "庵": 1008, "庶": 1009, "康": 1010, "庸": 1011, "廁": 1012, "廂": 1013, "廈": 1014, "廉": 1015, "廊": 1016, "廖": 1017, "廚": 1018, "廟": 1019, "廠": 1020, "廢": 1021, "廣": 1022, "廬": 1023, "廳": 1024, "延": 1025, "廷": 1026, "建": 1027, "廿": 1028, "弄": 1029, "弊": 1030, "弍": 1031, "式": 1032, "弓": 1033, "引": 1034, "弟": 1035, "弱": 1036, "張": 1037, "強": 1038, "弸": 1039, "强": 1040, "弼": 1041, "彈": 1042, "彌": 1043, "彎": 1044, "彗": 1045, "彙": 1046, "形": 1047, "彤": 1048, "彥": 1049, "彩": 1050, "彪": 1051, "彭": 1052, "影": 1053, "彷": 1054, "役": 1055, "彼": 1056, "彿": 1057, "往": 1058, "征": 1059, "待": 1060, "徇": 1061, "很": 1062, "徊": 1063, "律": 1064, "後": 1065, "徐": 1066, "徑": 1067, "徒": 1068, "得": 1069, "徘": 1070, "從": 1071, "御": 1072, "復": 1073, "循": 1074, "微": 1075, "徵": 1076, "德": 1077, "徹": 1078, "徽": 1079, "心": 1080, "必": 1081, "忌": 1082, "忍": 1083, "志": 1084, "忘": 1085, "忙": 1086, "忠": 1087, "快": 1088, "念": 1089, "忽": 1090, "忿": 1091, "怎": 1092, "怒": 1093, "怕": 1094, "思": 1095, "怡": 1096, "急": 1097, "怦": 1098, "性": 1099, "怨": 1100, "怪": 1101, "怯": 1102, "恃": 1103, "恆": 1104, "恐": 1105, "恒": 1106, "恕": 1107, "恙": 1108, "恢": 1109, "恤": 1110, "恥": 1111, "恨": 1112, "恩": 1113, "恭": 1114, "息": 1115, "恰": 1116, "悅": 1117, "悉": 1118, "悒": 1119, "悔": 1120, "悖": 1121, "悗": 1122, "悟": 1123, "悠": 1124, "患": 1125, "您": 1126, "悲": 1127, "悶": 1128, "情": 1129, "惇": 1130, "惑": 1131, "惘": 1132, "惜": 1133, "惟": 1134, "惠": 1135, "惡": 1136, "惦": 1137, "惰": 1138, "惱": 1139, "想": 1140, "惶": 1141, "惹": 1142, "愁": 1143, "愈": 1144, "愉": 1145, "意": 1146, "愚": 1147, "愛": 1148, "感": 1149, "愧": 1150, "慈": 1151, "態": 1152, "慌": 1153, "慎": 1154, "慕": 1155, "慘": 1156, "慚": 1157, "慢": 1158, "慣": 1159, "慤": 1160, "慧": 1161, "慨": 1162, "慮": 1163, "慰": 1164, "慳": 1165, "慶": 1166, "慷": 1167, "慾": 1168, "憂": 1169, "憎": 1170, "憐": 1171, "憑": 1172, "憚": 1173, "憤": 1174, "憧": 1175, "憩": 1176, "憫": 1177, "憬": 1178, "憶": 1179, "憾": 1180, "懂": 1181, "懇": 1182, "應": 1183, "懊": 1184, "懞": 1185, "懣": 1186, "懵": 1187, "懶": 1188, "懷": 1189, "懺": 1190, "懼": 1191, "懿": 1192, "戀": 1193, "戇": 1194, "戊": 1195, "戎": 1196, "成": 1197, "我": 1198, "戒": 1199, "戕": 1200, "或": 1201, "戚": 1202, "戟": 1203, "戥": 1204, "截": 1205, "戰": 1206, "戲": 1207, "戴": 1208, "戶": 1209, "戽": 1210, "戾": 1211, "房": 1212, "所": 1213, "扂": 1214, "扇": 1215, "手": 1216, "才": 1217, "扎": 1218, "扑": 1219, "扒": 1220, "打": 1221, "托": 1222, "扣": 1223, "扭": 1224, "扮": 1225, "扯": 1226, "扶": 1227, "批": 1228, "扻": 1229, "扼": 1230, "找": 1231, "承": 1232, "技": 1233, "抄": 1234, "抆": 1235, "把": 1236, "抑": 1237, "抓": 1238, "投": 1239, "抖": 1240, "抗": 1241, "折": 1242, "抦": 1243, "抬": 1244, "抱": 1245, "抵": 1246, "抹": 1247, "押": 1248, "抽": 1249, "抿": 1250, "拂": 1251, "拃": 1252, "拆": 1253, "拉": 1254, "拋": 1255, "拌": 1256, "拍": 1257, "拎": 1258, "拐": 1259, "拒": 1260, "拓": 1261, "拔": 1262, "拖": 1263, "拗": 1264, "拘": 1265, "拙": 1266, "招": 1267, "拜": 1268, "括": 1269, "拮": 1270, "拯": 1271, "拱": 1272, "拳": 1273, "拼": 1274, "拾": 1275, "拿": 1276, "持": 1277, "指": 1278, "挈": 1279, "按": 1280, "挑": 1281, "挖": 1282, "挨": 1283, "挪": 1284, "挫": 1285, "振": 1286, "挺": 1287, "挽": 1288, "挾": 1289, "捉": 1290, "捋": 1291, "捌": 1292, "捐": 1293, "捕": 1294, "捨": 1295, "捩": 1296, "据": 1297, "捱": 1298, "捲": 1299, "捶": 1300, "捷": 1301, "捺": 1302, "捽": 1303, "掂": 1304, "掃": 1305, "掅": 1306, "授": 1307, "掉": 1308, "掌": 1309, "排": 1310, "掕": 1311, "掗": 1312, "掘": 1313, "掙": 1314, "掛": 1315, "掟": 1316, "掠": 1317, "採": 1318, "探": 1319, "掣": 1320, "接": 1321, "控": 1322, "推": 1323, "掩": 1324, "措": 1325, "揀": 1326, "揇": 1327, "揈": 1328, "揉": 1329, "提": 1330, "插": 1331, "揗": 1332, "揚": 1333, "換": 1334, "揞": 1335, "握": 1336, "揣": 1337, "揦": 1338, "揩": 1339, "揪": 1340, "揭": 1341, "揮": 1342, "揳": 1343, "援": 1344, "揸": 1345, "揼": 1346, "揾": 1347, "損": 1348, "搏": 1349, "搖": 1350, "搗": 1351, "搜": 1352, "搞": 1353, "搣": 1354, "搬": 1355, "搭": 1356, "搵": 1357, "搶": 1358, "搽": 1359, "摑": 1360, "摘": 1361, "摙": 1362, "摞": 1363, "摧": 1364, "摩": 1365, "摯": 1366, "摳": 1367, "摷": 1368, "摸": 1369, "摺": 1370, "撇": 1371, "撈": 1372, "撐": 1373, "撒": 1374, "撓": 1375, "撕": 1376, "撚": 1377, "撞": 1378, "撤": 1379, "撥": 1380, "撩": 1381, "撫": 1382, "播": 1383, "撮": 1384, "撲": 1385, "撳": 1386, "撻": 1387, "撼": 1388, "撿": 1389, "擁": 1390, "擂": 1391, "擅": 1392, "擇": 1393, "擊": 1394, "擋": 1395, "操": 1396, "擎": 1397, "擒": 1398, "擔": 1399, "擘": 1400, "據": 1401, "擤": 1402, "擦": 1403, "擬": 1404, "擰": 1405, "擲": 1406, "擴": 1407, "擸": 1408, "擺": 1409, "擾": 1410, "攀": 1411, "攋": 1412, "攏": 1413, "攔": 1414, "攘": 1415, "攝": 1416, "攞": 1417, "攣": 1418, "攤": 1419, "攪": 1420, "攬": 1421, "支": 1422, "攰": 1423, "收": 1424, "攸": 1425, "改": 1426, "攻": 1427, "放": 1428, "政": 1429, "故": 1430, "效": 1431, "敏": 1432, "救": 1433, "敗": 1434, "敘": 1435, "教": 1436, "敝": 1437, "敢": 1438, "散": 1439, "敦": 1440, "敬": 1441, "敲": 1442, "整": 1443, "敵": 1444, "敷": 1445, "數": 1446, "斂": 1447, "斃": 1448, "文": 1449, "斐": 1450, "斑": 1451, "斗": 1452, "料": 1453, "斜": 1454, "斟": 1455, "斤": 1456, "斧": 1457, "斬": 1458, "斯": 1459, "新": 1460, "斷": 1461, "方": 1462, "於": 1463, "施": 1464, "旁": 1465, "旅": 1466, "旋": 1467, "族": 1468, "旗": 1469, "既": 1470, "旣": 1471, "日": 1472, "旦": 1473, "旨": 1474, "早": 1475, "旬": 1476, "旭": 1477, "旳": 1478, "旺": 1479, "昂": 1480, "昃": 1481, "昆": 1482, "昇": 1483, "昌": 1484, "明": 1485, "昏": 1486, "昐": 1487, "易": 1488, "昔": 1489, "星": 1490, "映": 1491, "春": 1492, "昧": 1493, "昨": 1494, "昭": 1495, "是": 1496, "昺": 1497, "時": 1498, "晃": 1499, "晉": 1500, "晌": 1501, "晏": 1502, "晒": 1503, "晚": 1504, "晝": 1505, "晤": 1506, "晨": 1507, "普": 1508, "景": 1509, "晴": 1510, "晶": 1511, "智": 1512, "晾": 1513, "暇": 1514, "暈": 1515, "暉": 1516, "暑": 1517, "暖": 1518, "暗": 1519, "暢": 1520, "暨": 1521, "暫": 1522, "暮": 1523, "暴": 1524, "暸": 1525, "曆": 1526, "曉": 1527, "曖": 1528, "曜": 1529, "曬": 1530, "曱": 1531, "曲": 1532, "曳": 1533, "更": 1534, "書": 1535, "曹": 1536, "曼": 1537, "曾": 1538, "替": 1539, "最": 1540, "會": 1541, "月": 1542, "有": 1543, "朋": 1544, "服": 1545, "朕": 1546, "朗": 1547, "望": 1548, "朝": 1549, "期": 1550, "朦": 1551, "朧": 1552, "木": 1553, "未": 1554, "末": 1555, "本": 1556, "札": 1557, "朱": 1558, "朴": 1559, "朵": 1560, "朽": 1561, "杆": 1562, "杉": 1563, "李": 1564, "杏": 1565, "材": 1566, "村": 1567, "杖": 1568, "杜": 1569, "杞": 1570, "束": 1571, "来": 1572, "杭": 1573, "杯": 1574, "杰": 1575, "東": 1576, "杷": 1577, "松": 1578, "板": 1579, "枇": 1580, "枉": 1581, "枕": 1582, "林": 1583, "枚": 1584, "果": 1585, "枝": 1586, "枯": 1587, "枱": 1588, "架": 1589, "柄": 1590, "柏": 1591, "某": 1592, "柑": 1593, "柒": 1594, "染": 1595, "柔": 1596, "柚": 1597, "柞": 1598, "查": 1599, "柯": 1600, "柱": 1601, "柳": 1602, "柴": 1603, "柵": 1604, "柺": 1605, "柿": 1606, "栗": 1607, "校": 1608, "栢": 1609, "核": 1610, "根": 1611, "格": 1612, "栽": 1613, "桂": 1614, "桃": 1615, "桅": 1616, "案": 1617, "桌": 1618, "桐": 1619, "桑": 1620, "桔": 1621, "桶": 1622, "桿": 1623, "梁": 1624, "梅": 1625, "梓": 1626, "梗": 1627, "梘": 1628, "條": 1629, "梧": 1630, "梨": 1631, "梯": 1632, "械": 1633, "梳": 1634, "梵": 1635, "棄": 1636, "棉": 1637, "棋": 1638, "棍": 1639, "棒": 1640, "棕": 1641, "棖": 1642, "棗": 1643, "棘": 1644, "棚": 1645, "棟": 1646, "棠": 1647, "棧": 1648, "森": 1649, "棲": 1650, "棺": 1651, "椅": 1652, "植": 1653, "椏": 1654, "椒": 1655, "椰": 1656, "楂": 1657, "楊": 1658, "楋": 1659, "楓": 1660, "楚": 1661, "楣": 1662, "業": 1663, "極": 1664, "概": 1665, "榆": 1666, "榕": 1667, "榚": 1668, "榛": 1669, "榜": 1670, "榨": 1671, "榮": 1672, "榴": 1673, "構": 1674, "槍": 1675, "槐": 1676, "槤": 1677, "槽": 1678, "樂": 1679, "樊": 1680, "樑": 1681, "樓": 1682, "標": 1683, "樞": 1684, "樟": 1685, "模": 1686, "樣": 1687, "樸": 1688, "樹": 1689, "樺": 1690, "樽": 1691, "橋": 1692, "橘": 1693, "橙": 1694, "機": 1695, "橡": 1696, "橢": 1697, "橫": 1698, "檀": 1699, "檔": 1700, "檢": 1701, "檬": 1702, "檯": 1703, "檳": 1704, "檸": 1705, "檻": 1706, "櫃": 1707, "櫈": 1708, "櫚": 1709, "櫸": 1710, "櫻": 1711, "欄": 1712, "權": 1713, "欖": 1714, "欠": 1715, "次": 1716, "欣": 1717, "欲": 1718, "欺": 1719, "欽": 1720, "款": 1721, "歇": 1722, "歉": 1723, "歌": 1724, "歎": 1725, "歐": 1726, "歛": 1727, "歡": 1728, "止": 1729, "正": 1730, "此": 1731, "步": 1732, "武": 1733, "歧": 1734, "歪": 1735, "歲": 1736, "歷": 1737, "歸": 1738, "歹": 1739, "死": 1740, "殄": 1741, "殆": 1742, "殊": 1743, "殖": 1744, "殘": 1745, "殮": 1746, "段": 1747, "殷": 1748, "殺": 1749, "殼": 1750, "殿": 1751, "毀": 1752, "毅": 1753, "毋": 1754, "母": 1755, "每": 1756, "毒": 1757, "毓": 1758, "比": 1759, "毛": 1760, "毡": 1761, "毫": 1762, "氏": 1763, "民": 1764, "氓": 1765, "氛": 1766, "氣": 1767, "氧": 1768, "氯": 1769, "水": 1770, "永": 1771, "氹": 1772, "汀": 1773, "汁": 1774, "求": 1775, "汕": 1776, "汗": 1777, "汝": 1778, "江": 1779, "池": 1780, "污": 1781, "汪": 1782, "汰": 1783, "汶": 1784, "決": 1785, "汽": 1786, "沃": 1787, "沈": 1788, "沉": 1789, "沐": 1790, "沒": 1791, "沖": 1792, "沙": 1793, "沛": 1794, "沫": 1795, "沮": 1796, "沱": 1797, "河": 1798, "油": 1799, "治": 1800, "沽": 1801, "沾": 1802, "沿": 1803, "況": 1804, "泄": 1805, "泉": 1806, "泊": 1807, "泌": 1808, "泓": 1809, "法": 1810, "泛": 1811, "泡": 1812, "波": 1813, "泥": 1814, "注": 1815, "泮": 1816, "泰": 1817, "泳": 1818, "洋": 1819, "洗": 1820, "洛": 1821, "洞": 1822, "津": 1823, "洪": 1824, "洱": 1825, "洲": 1826, "洶": 1827, "活": 1828, "洽": 1829, "派": 1830, "流": 1831, "浙": 1832, "浚": 1833, "浣": 1834, "浦": 1835, "浩": 1836, "浪": 1837, "浮": 1838, "浴": 1839, "海": 1840, "浸": 1841, "涂": 1842, "消": 1843, "涉": 1844, "涌": 1845, "涕": 1846, "涯": 1847, "液": 1848, "涷": 1849, "涼": 1850, "淋": 1851, "淒": 1852, "淘": 1853, "淚": 1854, "淡": 1855, "淥": 1856, "淨": 1857, "淩": 1858, "淪": 1859, "淫": 1860, "深": 1861, "混": 1862, "淸": 1863, "淺": 1864, "添": 1865, "清": 1866, "減": 1867, "渝": 1868, "渠": 1869, "渡": 1870, "渣": 1871, "渦": 1872, "温": 1873, "測": 1874, "渭": 1875, "港": 1876, "渴": 1877, "游": 1878, "渺": 1879, "渾": 1880, "湃": 1881, "湖": 1882, "湘": 1883, "湧": 1884, "湯": 1885, "溋": 1886, "源": 1887, "準": 1888, "溜": 1889, "溝": 1890, "溢": 1891, "溪": 1892, "溫": 1893, "溶": 1894, "滂": 1895, "滄": 1896, "滅": 1897, "滋": 1898, "滌": 1899, "滑": 1900, "滔": 1901, "滘": 1902, "滙": 1903, "滯": 1904, "滷": 1905, "滾": 1906, "滿": 1907, "漁": 1908, "漂": 1909, "漆": 1910, "漏": 1911, "漓": 1912, "演": 1913, "漠": 1914, "漢": 1915, "漫": 1916, "漬": 1917, "漲": 1918, "漸": 1919, "漾": 1920, "漿": 1921, "潑": 1922, "潔": 1923, "潛": 1924, "潤": 1925, "潭": 1926, "潮": 1927, "潰": 1928, "潲": 1929, "潷": 1930, "潺": 1931, "澄": 1932, "澍": 1933, "澎": 1934, "澡": 1935, "澤": 1936, "澩": 1937, "澱": 1938, "澳": 1939, "激": 1940, "濃": 1941, "濕": 1942, "濛": 1943, "濟": 1944, "濠": 1945, "濤": 1946, "濫": 1947, "濱": 1948, "濾": 1949, "瀉": 1950, "瀚": 1951, "瀝": 1952, "瀟": 1953, "瀨": 1954, "瀾": 1955, "灑": 1956, "灘": 1957, "灣": 1958, "火": 1959, "灰": 1960, "灼": 1961, "災": 1962, "炆": 1963, "炊": 1964, "炎": 1965, "炒": 1966, "炕": 1967, "炙": 1968, "炭": 1969, "炮": 1970, "炳": 1971, "炸": 1972, "為": 1973, "烈": 1974, "烏": 1975, "烘": 1976, "烙": 1977, "烟": 1978, "烤": 1979, "烹": 1980, "焉": 1981, "焗": 1982, "焚": 1983, "無": 1984, "焦": 1985, "然": 1986, "煉": 1987, "煎": 1988, "煖": 1989, "煙": 1990, "煞": 1991, "煤": 1992, "照": 1993, "煨": 1994, "煩": 1995, "煮": 1996, "煲": 1997, "煽": 1998, "熄": 1999, "熊": 2000, "熒": 2001, "熔": 2002, "熙": 2003, "熟": 2004, "熬": 2005, "熱": 2006, "熾": 2007, "燃": 2008, "燈": 2009, "燉": 2010, "燒": 2011, "燕": 2012, "燜": 2013, "營": 2014, "燥": 2015, "燭": 2016, "燴": 2017, "燶": 2018, "爆": 2019, "爐": 2020, "爛": 2021, "爪": 2022, "爬": 2023, "爭": 2024, "爲": 2025, "爵": 2026, "父": 2027, "爸": 2028, "爹": 2029, "爺": 2030, "爽": 2031, "爾": 2032, "牀": 2033, "牆": 2034, "片": 2035, "版": 2036, "牌": 2037, "牘": 2038, "牙": 2039, "牛": 2040, "牡": 2041, "牢": 2042, "牧": 2043, "物": 2044, "牯": 2045, "牲": 2046, "特": 2047, "牽": 2048, "犀": 2049, "犧": 2050, "犬": 2051, "犯": 2052, "狀": 2053, "狂": 2054, "狄": 2055, "狐": 2056, "狗": 2057, "狠": 2058, "狡": 2059, "狩": 2060, "狸": 2061, "狹": 2062, "狼": 2063, "猄": 2064, "猛": 2065, "猜": 2066, "猴": 2067, "猶": 2068, "猾": 2069, "獄": 2070, "獅": 2071, "獎": 2072, "獠": 2073, "獨": 2074, "獲": 2075, "獵": 2076, "獸": 2077, "獻": 2078, "玄": 2079, "率": 2080, "玉": 2081, "王": 2082, "玟": 2083, "玩": 2084, "玫": 2085, "玻": 2086, "珀": 2087, "珊": 2088, "珍": 2089, "珏": 2090, "珒": 2091, "珠": 2092, "班": 2093, "現": 2094, "球": 2095, "理": 2096, "琉": 2097, "琛": 2098, "琦": 2099, "琳": 2100, "琴": 2101, "琵": 2102, "琶": 2103, "瑕": 2104, "瑙": 2105, "瑜": 2106, "瑞": 2107, "瑟": 2108, "瑤": 2109, "瑧": 2110, "瑪": 2111, "瑰": 2112, "璀": 2113, "璃": 2114, "璇": 2115, "璉": 2116, "璐": 2117, "璟": 2118, "璧": 2119, "璨": 2120, "環": 2121, "璵": 2122, "璽": 2123, "瓊": 2124, "瓏": 2125, "瓜": 2126, "瓦": 2127, "瓶": 2128, "甘": 2129, "甚": 2130, "甜": 2131, "生": 2132, "產": 2133, "甥": 2134, "用": 2135, "甩": 2136, "甫": 2137, "田": 2138, "由": 2139, "甲": 2140, "申": 2141, "甴": 2142, "男": 2143, "甸": 2144, "畀": 2145, "畋": 2146, "界": 2147, "畏": 2148, "畐": 2149, "畔": 2150, "留": 2151, "畜": 2152, "畢": 2153, "略": 2154, "番": 2155, "畫": 2156, "異": 2157, "當": 2158, "畿": 2159, "疆": 2160, "疇": 2161, "疊": 2162, "疏": 2163, "疑": 2164, "疤": 2165, "疫": 2166, "疲": 2167, "疵": 2168, "疹": 2169, "疼": 2170, "疾": 2171, "病": 2172, "症": 2173, "痕": 2174, "痛": 2175, "痢": 2176, "痰": 2177, "痱": 2178, "痴": 2179, "痺": 2180, "痾": 2181, "瘀": 2182, "瘁": 2183, "瘋": 2184, "瘓": 2185, "瘟": 2186, "瘡": 2187, "瘦": 2188, "療": 2189, "癆": 2190, "癌": 2191, "癡": 2192, "癢": 2193, "癩": 2194, "癮": 2195, "癱": 2196, "癲": 2197, "登": 2198, "發": 2199, "白": 2200, "百": 2201, "皂": 2202, "的": 2203, "皆": 2204, "皇": 2205, "皚": 2206, "皮": 2207, "皺": 2208, "盃": 2209, "盅": 2210, "盆": 2211, "盈": 2212, "益": 2213, "盏": 2214, "盒": 2215, "盔": 2216, "盛": 2217, "盜": 2218, "盞": 2219, "盟": 2220, "盡": 2221, "監": 2222, "盤": 2223, "盧": 2224, "盪": 2225, "目": 2226, "盲": 2227, "直": 2228, "相": 2229, "盼": 2230, "盾": 2231, "省": 2232, "眉": 2233, "看": 2234, "眞": 2235, "真": 2236, "眠": 2237, "眨": 2238, "眯": 2239, "眶": 2240, "眼": 2241, "眾": 2242, "着": 2243, "睄": 2244, "睇": 2245, "睏": 2246, "睛": 2247, "睜": 2248, "睡": 2249, "督": 2250, "睥": 2251, "睦": 2252, "睨": 2253, "睬": 2254, "睹": 2255, "瞅": 2256, "瞌": 2257, "瞓": 2258, "瞞": 2259, "瞬": 2260, "瞭": 2261, "矛": 2262, "知": 2263, "矩": 2264, "短": 2265, "矮": 2266, "石": 2267, "砂": 2268, "砌": 2269, "砍": 2270, "研": 2271, "砰": 2272, "砲": 2273, "破": 2274, "砵": 2275, "砸": 2276, "硤": 2277, "硬": 2278, "碇": 2279, "碉": 2280, "碌": 2281, "碎": 2282, "碑": 2283, "碗": 2284, "碘": 2285, "碟": 2286, "碧": 2287, "碰": 2288, "確": 2289, "碼": 2290, "磅": 2291, "磐": 2292, "磚": 2293, "磡": 2294, "磨": 2295, "磯": 2296, "礎": 2297, "礙": 2298, "礦": 2299, "礫": 2300, "示": 2301, "社": 2302, "祈": 2303, "祐": 2304, "祖": 2305, "祝": 2306, "神": 2307, "祟": 2308, "祠": 2309, "祥": 2310, "票": 2311, "祭": 2312, "祿": 2313, "禁": 2314, "禍": 2315, "福": 2316, "禡": 2317, "禧": 2318, "禪": 2319, "禮": 2320, "禱": 2321, "禽": 2322, "禾": 2323, "秀": 2324, "私": 2325, "秅": 2326, "秉": 2327, "秋": 2328, "科": 2329, "秒": 2330, "秘": 2331, "租": 2332, "秤": 2333, "秦": 2334, "秧": 2335, "秩": 2336, "移": 2337, "稀": 2338, "稅": 2339, "稈": 2340, "程": 2341, "稍": 2342, "稔": 2343, "稚": 2344, "稠": 2345, "種": 2346, "稱": 2347, "稻": 2348, "稿": 2349, "穀": 2350, "穌": 2351, "積": 2352, "穎": 2353, "穗": 2354, "穢": 2355, "穩": 2356, "穫": 2357, "穴": 2358, "究": 2359, "空": 2360, "穿": 2361, "突": 2362, "窄": 2363, "窒": 2364, "窗": 2365, "窠": 2366, "窩": 2367, "窮": 2368, "窰": 2369, "窿": 2370, "竄": 2371, "竅": 2372, "竇": 2373, "竊": 2374, "立": 2375, "站": 2376, "竟": 2377, "章": 2378, "童": 2379, "端": 2380, "競": 2381, "竹": 2382, "笆": 2383, "笈": 2384, "笏": 2385, "笑": 2386, "笛": 2387, "笠": 2388, "符": 2389, "笨": 2390, "笪": 2391, "第": 2392, "筆": 2393, "等": 2394, "筋": 2395, "筍": 2396, "筏": 2397, "筒": 2398, "答": 2399, "策": 2400, "筲": 2401, "筵": 2402, "筷": 2403, "箋": 2404, "箍": 2405, "箕": 2406, "算": 2407, "管": 2408, "箭": 2409, "箱": 2410, "箴": 2411, "節": 2412, "範": 2413, "篇": 2414, "築": 2415, "篋": 2416, "篙": 2417, "篤": 2418, "篳": 2419, "簍": 2420, "簡": 2421, "簽": 2422, "簾": 2423, "簿": 2424, "籃": 2425, "籌": 2426, "籍": 2427, "籐": 2428, "籠": 2429, "籤": 2430, "籬": 2431, "籮": 2432, "籲": 2433, "米": 2434, "籽": 2435, "粉": 2436, "粒": 2437, "粗": 2438, "粟": 2439, "粥": 2440, "粳": 2441, "粵": 2442, "粹": 2443, "粼": 2444, "精": 2445, "粿": 2446, "糉": 2447, "糊": 2448, "糍": 2449, "糕": 2450, "糖": 2451, "糞": 2452, "糟": 2453, "糧": 2454, "糯": 2455, "糰": 2456, "糴": 2457, "系": 2458, "糾": 2459, "紀": 2460, "約": 2461, "紅": 2462, "納": 2463, "紐": 2464, "紓": 2465, "純": 2466, "紗": 2467, "紙": 2468, "級": 2469, "紛": 2470, "素": 2471, "索": 2472, "紥": 2473, "紫": 2474, "紮": 2475, "累": 2476, "細": 2477, "紳": 2478, "紹": 2479, "終": 2480, "組": 2481, "結": 2482, "絕": 2483, "絞": 2484, "絡": 2485, "給": 2486, "絨": 2487, "統": 2488, "絲": 2489, "絶": 2490, "綁": 2491, "經": 2492, "綜": 2493, "綠": 2494, "綫": 2495, "維": 2496, "網": 2497, "綿": 2498, "緊": 2499, "緒": 2500, "緘": 2501, "線": 2502, "緣": 2503, "編": 2504, "緩": 2505, "緬": 2506, "練": 2507, "緻": 2508, "縉": 2509, "縊": 2510, "縛": 2511, "縫": 2512, "縮": 2513, "縱": 2514, "縷": 2515, "總": 2516, "績": 2517, "繁": 2518, "織": 2519, "繞": 2520, "繩": 2521, "繫": 2522, "繳": 2523, "繼": 2524, "續": 2525, "纏": 2526, "纔": 2527, "纖": 2528, "纜": 2529, "缸": 2530, "缺": 2531, "缽": 2532, "罅": 2533, "罐": 2534, "罔": 2535, "罕": 2536, "罟": 2537, "罩": 2538, "罪": 2539, "置": 2540, "罰": 2541, "署": 2542, "罵": 2543, "罷": 2544, "羅": 2545, "羈": 2546, "羊": 2547, "羌": 2548, "美": 2549, "羞": 2550, "羣": 2551, "群": 2552, "義": 2553, "羲": 2554, "羹": 2555, "羽": 2556, "翁": 2557, "翅": 2558, "翌": 2559, "習": 2560, "翔": 2561, "翠": 2562, "翡": 2563, "翩": 2564, "翰": 2565, "翱": 2566, "翻": 2567, "翼": 2568, "耀": 2569, "老": 2570, "考": 2571, "者": 2572, "而": 2573, "耍": 2574, "耐": 2575, "耕": 2576, "耗": 2577, "耘": 2578, "耳": 2579, "耶": 2580, "耷": 2581, "聆": 2582, "聊": 2583, "聖": 2584, "聘": 2585, "聚": 2586, "聞": 2587, "聯": 2588, "聰": 2589, "聲": 2590, "聳": 2591, "聶": 2592, "職": 2593, "聽": 2594, "肅": 2595, "肆": 2596, "肇": 2597, "肉": 2598, "肋": 2599, "肌": 2600, "肓": 2601, "肖": 2602, "肘": 2603, "肚": 2604, "肛": 2605, "肝": 2606, "股": 2607, "肥": 2608, "肨": 2609, "肩": 2610, "肯": 2611, "育": 2612, "肴": 2613, "肺": 2614, "胃": 2615, "背": 2616, "胎": 2617, "胚": 2618, "胡": 2619, "胭": 2620, "胸": 2621, "胺": 2622, "能": 2623, "脂": 2624, "脅": 2625, "脆": 2626, "脈": 2627, "脊": 2628, "脫": 2629, "脷": 2630, "脹": 2631, "脾": 2632, "腋": 2633, "腍": 2634, "腎": 2635, "腐": 2636, "腔": 2637, "腕": 2638, "腥": 2639, "腦": 2640, "腩": 2641, "腫": 2642, "腰": 2643, "腳": 2644, "腸": 2645, "腺": 2646, "腿": 2647, "膀": 2648, "膊": 2649, "膏": 2650, "膚": 2651, "膜": 2652, "膝": 2653, "膠": 2654, "膨": 2655, "膩": 2656, "膳": 2657, "膺": 2658, "膽": 2659, "臂": 2660, "臉": 2661, "臘": 2662, "臟": 2663, "臣": 2664, "臨": 2665, "自": 2666, "臭": 2667, "至": 2668, "致": 2669, "臺": 2670, "臻": 2671, "臼": 2672, "舂": 2673, "舅": 2674, "與": 2675, "興": 2676, "舉": 2677, "舊": 2678, "舌": 2679, "舍": 2680, "舐": 2681, "舒": 2682, "舔": 2683, "舖": 2684, "舞": 2685, "舟": 2686, "舢": 2687, "舨": 2688, "航": 2689, "般": 2690, "舶": 2691, "船": 2692, "艇": 2693, "艦": 2694, "良": 2695, "艱": 2696, "色": 2697, "艷": 2698, "芋": 2699, "芒": 2700, "芙": 2701, "芝": 2702, "芥": 2703, "芬": 2704, "芭": 2705, "芯": 2706, "花": 2707, "芳": 2708, "芹": 2709, "芽": 2710, "苑": 2711, "苔": 2712, "苗": 2713, "苟": 2714, "苣": 2715, "若": 2716, "苦": 2717, "英": 2718, "茂": 2719, "范": 2720, "茄": 2721, "茅": 2722, "茫": 2723, "茵": 2724, "茶": 2725, "茸": 2726, "荃": 2727, "草": 2728, "荊": 2729, "荒": 2730, "荔": 2731, "荷": 2732, "莆": 2733, "莉": 2734, "莊": 2735, "莎": 2736, "莓": 2737, "莞": 2738, "莫": 2739, "莽": 2740, "菁": 2741, "菇": 2742, "菊": 2743, "菌": 2744, "菓": 2745, "菜": 2746, "菠": 2747, "菩": 2748, "華": 2749, "菱": 2750, "菲": 2751, "菴": 2752, "萃": 2753, "萄": 2754, "萊": 2755, "萍": 2756, "萬": 2757, "萺": 2758, "落": 2759, "葉": 2760, "著": 2761, "葛": 2762, "葡": 2763, "董": 2764, "葫": 2765, "葬": 2766, "葳": 2767, "葵": 2768, "蒂": 2769, "蒙": 2770, "蒜": 2771, "蒡": 2772, "蒲": 2773, "蒸": 2774, "蒼": 2775, "蓀": 2776, "蓆": 2777, "蓉": 2778, "蓋": 2779, "蓓": 2780, "蓬": 2781, "蓮": 2782, "蓺": 2783, "蔓": 2784, "蔔": 2785, "蔗": 2786, "蔥": 2787, "蔫": 2788, "蔬": 2789, "蔭": 2790, "蔽": 2791, "蕃": 2792, "蕉": 2793, "蕎": 2794, "蕙": 2795, "蕩": 2796, "蕪": 2797, "蕭": 2798, "蕾": 2799, "薄": 2800, "薇": 2801, "薈": 2802, "薏": 2803, "薑": 2804, "薩": 2805, "薪": 2806, "薯": 2807, "薰": 2808, "藉": 2809, "藍": 2810, "藏": 2811, "藐": 2812, "藕": 2813, "藝": 2814, "藤": 2815, "藥": 2816, "藹": 2817, "蘅": 2818, "蘆": 2819, "蘇": 2820, "蘋": 2821, "蘑": 2822, "蘭": 2823, "蘸": 2824, "蘿": 2825, "虎": 2826, "虐": 2827, "虓": 2828, "處": 2829, "虛": 2830, "號": 2831, "虧": 2832, "虱": 2833, "虹": 2834, "蚊": 2835, "蚌": 2836, "蚝": 2837, "蚵": 2838, "蚺": 2839, "蛇": 2840, "蛋": 2841, "蛛": 2842, "蛟": 2843, "蛤": 2844, "蜂": 2845, "蜆": 2846, "蜊": 2847, "蜘": 2848, "蜜": 2849, "蜢": 2850, "蝕": 2851, "蝗": 2852, "蝦": 2853, "蝨": 2854, "蝴": 2855, "蝶": 2856, "蝸": 2857, "融": 2858, "螞": 2859, "螢": 2860, "螺": 2861, "蟀": 2862, "蟆": 2863, "蟋": 2864, "蟠": 2865, "蟬": 2866, "蟲": 2867, "蟹": 2868, "蟻": 2869, "蠅": 2870, "蠔": 2871, "蠟": 2872, "蠢": 2873, "蠱": 2874, "蠻": 2875, "血": 2876, "衆": 2877, "行": 2878, "衍": 2879, "術": 2880, "街": 2881, "衙": 2882, "衛": 2883, "衝": 2884, "衞": 2885, "衡": 2886, "衣": 2887, "表": 2888, "衫": 2889, "衰": 2890, "衲": 2891, "衷": 2892, "袁": 2893, "袋": 2894, "袖": 2895, "被": 2896, "裁": 2897, "裏": 2898, "裔": 2899, "裕": 2900, "裙": 2901, "補": 2902, "裝": 2903, "裡": 2904, "裴": 2905, "製": 2906, "複": 2907, "褒": 2908, "褦": 2909, "褪": 2910, "褲": 2911, "褸": 2912, "襟": 2913, "襪": 2914, "襯": 2915, "襲": 2916, "西": 2917, "要": 2918, "覆": 2919, "見": 2920, "規": 2921, "覓": 2922, "視": 2923, "親": 2924, "覲": 2925, "覺": 2926, "覽": 2927, "觀": 2928, "角": 2929, "解": 2930, "觸": 2931, "言": 2932, "訂": 2933, "計": 2934, "訊": 2935, "討": 2936, "訓": 2937, "訕": 2938, "託": 2939, "記": 2940, "訝": 2941, "訪": 2942, "設": 2943, "許": 2944, "訴": 2945, "診": 2946, "註": 2947, "証": 2948, "詆": 2949, "詐": 2950, "評": 2951, "詞": 2952, "詢": 2953, "試": 2954, "詩": 2955, "詭": 2956, "話": 2957, "該": 2958, "詳": 2959, "詹": 2960, "誅": 2961, "誇": 2962, "誌": 2963, "認": 2964, "誓": 2965, "誕": 2966, "誘": 2967, "語": 2968, "誠": 2969, "誡": 2970, "誤": 2971, "誨": 2972, "說": 2973, "説": 2974, "誰": 2975, "課": 2976, "誼": 2977, "調": 2978, "談": 2979, "請": 2980, "諒": 2981, "論": 2982, "諗": 2983, "諜": 2984, "諦": 2985, "諧": 2986, "諫": 2987, "諷": 2988, "諸": 2989, "諺": 2990, "諾": 2991, "謀": 2992, "謁": 2993, "謂": 2994, "謊": 2995, "謎": 2996, "謙": 2997, "講": 2998, "謝": 2999, "謢": 3000, "謬": 3001, "謹": 3002, "謾": 3003, "證": 3004, "譎": 3005, "譖": 3006, "識": 3007, "譚": 3008, "譜": 3009, "警": 3010, "譬": 3011, "譯": 3012, "議": 3013, "譴": 3014, "護": 3015, "譽": 3016, "讀": 3017, "變": 3018, "讎": 3019, "讓": 3020, "讚": 3021, "谷": 3022, "豁": 3023, "豂": 3024, "豆": 3025, "豈": 3026, "豉": 3027, "豎": 3028, "豐": 3029, "豚": 3030, "象": 3031, "豪": 3032, "豫": 3033, "豬": 3034, "豹": 3035, "貂": 3036, "貌": 3037, "貓": 3038, "貝": 3039, "負": 3040, "財": 3041, "貢": 3042, "貧": 3043, "貨": 3044, "販": 3045, "貪": 3046, "貫": 3047, "責": 3048, "貴": 3049, "貶": 3050, "買": 3051, "貸": 3052, "費": 3053, "貼": 3054, "貿": 3055, "賀": 3056, "賃": 3057, "資": 3058, "賈": 3059, "賊": 3060, "賒": 3061, "賓": 3062, "賜": 3063, "賞": 3064, "賢": 3065, "賣": 3066, "賤": 3067, "賦": 3068, "質": 3069, "賬": 3070, "賭": 3071, "賴": 3072, "賺": 3073, "購": 3074, "賽": 3075, "贅": 3076, "贈": 3077, "贊": 3078, "贏": 3079, "贼": 3080, "赤": 3081, "赫": 3082, "走": 3083, "赴": 3084, "起": 3085, "趁": 3086, "超": 3087, "越": 3088, "趌": 3089, "趕": 3090, "趙": 3091, "趣": 3092, "趨": 3093, "足": 3094, "趴": 3095, "趺": 3096, "趾": 3097, "跋": 3098, "跌": 3099, "跑": 3100, "跛": 3101, "距": 3102, "跟": 3103, "跡": 3104, "跣": 3105, "跨": 3106, "跪": 3107, "路": 3108, "跳": 3109, "踎": 3110, "踏": 3111, "踐": 3112, "踢": 3113, "踩": 3114, "踪": 3115, "踱": 3116, "踹": 3117, "蹄": 3118, "蹈": 3119, "蹋": 3120, "蹟": 3121, "蹤": 3122, "蹲": 3123, "蹺": 3124, "躁": 3125, "躉": 3126, "躍": 3127, "躝": 3128, "身": 3129, "躬": 3130, "躲": 3131, "車": 3132, "軌": 3133, "軍": 3134, "軒": 3135, "軟": 3136, "較": 3137, "載": 3138, "輊": 3139, "輋": 3140, "輔": 3141, "輕": 3142, "輘": 3143, "輝": 3144, "輟": 3145, "輩": 3146, "輪": 3147, "輯": 3148, "輷": 3149, "輸": 3150, "輻": 3151, "輾": 3152, "轄": 3153, "轆": 3154, "轉": 3155, "轍": 3156, "轡": 3157, "辛": 3158, "辜": 3159, "辣": 3160, "辦": 3161, "辨": 3162, "辭": 3163, "辯": 3164, "辰": 3165, "辱": 3166, "農": 3167, "迂": 3168, "迅": 3169, "迍": 3170, "迎": 3171, "近": 3172, "返": 3173, "迦": 3174, "迪": 3175, "迫": 3176, "述": 3177, "迴": 3178, "迷": 3179, "追": 3180, "迾": 3181, "退": 3182, "送": 3183, "逃": 3184, "逆": 3185, "透": 3186, "逐": 3187, "途": 3188, "逗": 3189, "這": 3190, "通": 3191, "逝": 3192, "逞": 3193, "速": 3194, "造": 3195, "逢": 3196, "連": 3197, "週": 3198, "進": 3199, "逸": 3200, "逹": 3201, "逼": 3202, "逾": 3203, "遂": 3204, "遇": 3205, "遊": 3206, "運": 3207, "遍": 3208, "過": 3209, "遏": 3210, "道": 3211, "達": 3212, "違": 3213, "遙": 3214, "遜": 3215, "遞": 3216, "遠": 3217, "遢": 3218, "遣": 3219, "適": 3220, "遭": 3221, "遮": 3222, "遲": 3223, "遴": 3224, "遵": 3225, "遷": 3226, "選": 3227, "遺": 3228, "避": 3229, "邀": 3230, "還": 3231, "邊": 3232, "邋": 3233, "邏": 3234, "那": 3235, "邦": 3236, "邨": 3237, "邪": 3238, "��": 3239, "邵": 3240, "邸": 3241, "郁": 3242, "郊": 3243, "郎": 3244, "郝": 3245, "部": 3246, "郭": 3247, "郵": 3248, "都": 3249, "鄂": 3250, "鄉": 3251, "鄙": 3252, "鄧": 3253, "鄭": 3254, "鄰": 3255, "酌": 3256, "配": 3257, "酒": 3258, "酥": 3259, "酪": 3260, "酬": 3261, "酮": 3262, "酱": 3263, "酷": 3264, "酸": 3265, "醇": 3266, "醉": 3267, "醋": 3268, "醒": 3269, "醜": 3270, "醫": 3271, "醬": 3272, "醺": 3273, "釀": 3274, "采": 3275, "釋": 3276, "里": 3277, "重": 3278, "野": 3279, "量": 3280, "金": 3281, "釗": 3282, "釘": 3283, "釜": 3284, "針": 3285, "釣": 3286, "釵": 3287, "鈍": 3288, "鈔": 3289, "鈕": 3290, "鈴": 3291, "鉛": 3292, "鉤": 3293, "鉸": 3294, "銀": 3295, "銅": 3296, "銘": 3297, "銳": 3298, "銷": 3299, "鋁": 3300, "鋒": 3301, "鋪": 3302, "鋼": 3303, "錄": 3304, "錐": 3305, "錢": 3306, "錦": 3307, "錫": 3308, "錯": 3309, "錶": 3310, "鍊": 3311, "鍋": 3312, "鍚": 3313, "鍵": 3314, "鍾": 3315, "鎖": 3316, "鎗": 3317, "鎭": 3318, "鎮": 3319, "鏈": 3320, "鏟": 3321, "鏡": 3322, "鏰": 3323, "鐘": 3324, "鐡": 3325, "鐵": 3326, "鐸": 3327, "鑄": 3328, "鑊": 3329, "鑑": 3330, "鑫": 3331, "鑲": 3332, "鑼": 3333, "鑽": 3334, "鑿": 3335, "長": 3336, "門": 3337, "閂": 3338, "閃": 3339, "閉": 3340, "開": 3341, "閏": 3342, "閒": 3343, "間": 3344, "閘": 3345, "閣": 3346, "閨": 3347, "閩": 3348, "閱": 3349, "閻": 3350, "闆": 3351, "闊": 3352, "闌": 3353, "闔": 3354, "闖": 3355, "關": 3356, "闢": 3357, "阜": 3358, "阪": 3359, "阱": 3360, "防": 3361, "阻": 3362, "阿": 3363, "陀": 3364, "陂": 3365, "附": 3366, "陌": 3367, "降": 3368, "限": 3369, "陞": 3370, "院": 3371, "陣": 3372, "除": 3373, "陪": 3374, "陰": 3375, "陳": 3376, "陶": 3377, "陷": 3378, "陸": 3379, "陽": 3380, "隆": 3381, "隊": 3382, "階": 3383, "隔": 3384, "隙": 3385, "際": 3386, "障": 3387, "隧": 3388, "隨": 3389, "險": 3390, "隱": 3391, "隴": 3392, "隸": 3393, "隻": 3394, "雀": 3395, "雁": 3396, "雄": 3397, "雅": 3398, "集": 3399, "雋": 3400, "雌": 3401, "雍": 3402, "雖": 3403, "雙": 3404, "雜": 3405, "雞": 3406, "離": 3407, "難": 3408, "雨": 3409, "雪": 3410, "雲": 3411, "零": 3412, "雷": 3413, "電": 3414, "需": 3415, "霄": 3416, "震": 3417, "霉": 3418, "霎": 3419, "霖": 3420, "霜": 3421, "霧": 3422, "露": 3423, "霸": 3424, "靈": 3425, "青": 3426, "靖": 3427, "靚": 3428, "靜": 3429, "非": 3430, "靠": 3431, "面": 3432, "革": 3433, "靴": 3434, "靶": 3435, "鞋": 3436, "鞍": 3437, "鞦": 3438, "鞭": 3439, "韆": 3440, "韌": 3441, "韓": 3442, "音": 3443, "韻": 3444, "響": 3445, "頁": 3446, "頂": 3447, "項": 3448, "順": 3449, "須": 3450, "頌": 3451, "預": 3452, "頒": 3453, "頓": 3454, "頗": 3455, "領": 3456, "頤": 3457, "頭": 3458, "頸": 3459, "頻": 3460, "題": 3461, "額": 3462, "顏": 3463, "顔": 3464, "願": 3465, "顛": 3466, "類": 3467, "顧": 3468, "顯": 3469, "顱": 3470, "風": 3471, "颱": 3472, "飄": 3473, "飛": 3474, "食": 3475, "飢": 3476, "飯": 3477, "飲": 3478, "飼": 3479, "飽": 3480, "飾": 3481, "餃": 3482, "餅": 3483, "餉": 3484, "養": 3485, "餋": 3486, "餐": 3487, "餒": 3488, "餓": 3489, "餘": 3490, "館": 3491, "餵": 3492, "餸": 3493, "餼": 3494, "饅": 3495, "饌": 3496, "饑": 3497, "饒": 3498, "饕": 3499, "首": 3500, "香": 3501, "馨": 3502, "馬": 3503, "馮": 3504, "馳": 3505, "駁": 3506, "駐": 3507, "駒": 3508, "駕": 3509, "駛": 3510, "駝": 3511, "駟": 3512, "駱": 3513, "駿": 3514, "騅": 3515, "騎": 3516, "騙": 3517, "騭": 3518, "騮": 3519, "騰": 3520, "騷": 3521, "騾": 3522, "驅": 3523, "驕": 3524, "驗": 3525, "驚": 3526, "驟": 3527, "驥": 3528, "骨": 3529, "骹": 3530, "髀": 3531, "髓": 3532, "體": 3533, "高": 3534, "髮": 3535, "髻": 3536, "鬆": 3537, "鬚": 3538, "鬠": 3539, "鬢": 3540, "鬥": 3541, "鬧": 3542, "鬱": 3543, "鬼": 3544, "魁": 3545, "魂": 3546, "魄": 3547, "魅": 3548, "魏": 3549, "魔": 3550, "魚": 3551, "魯": 3552, "魷": 3553, "鮑": 3554, "鮟": 3555, "鮫": 3556, "鮭": 3557, "鮮": 3558, "鯇": 3559, "鯉": 3560, "鯊": 3561, "鯖": 3562, "鯛": 3563, "鯪": 3564, "鰂": 3565, "鰭": 3566, "鰻": 3567, "鱇": 3568, "鱈": 3569, "鱔": 3570, "鱗": 3571, "鱲": 3572, "鱷": 3573, "鱸": 3574, "鲁": 3575, "鳥": 3576, "鳩": 3577, "鳳": 3578, "鳴": 3579, "鳶": 3580, "鴉": 3581, "鴛": 3582, "鴦": 3583, "鴨": 3584, "鴻": 3585, "鴿": 3586, "鵝": 3587, "鵪": 3588, "鵬": 3589, "鵲": 3590, "鶉": 3591, "鶴": 3592, "鷄": 3593, "鷯": 3594, "鷹": 3595, "鸞": 3596, "鹅": 3597, "鹹": 3598, "鹼": 3599, "鹽": 3600, "鹿": 3601, "麒": 3602, "麗": 3603, "麝": 3604, "麟": 3605, "麥": 3606, "麪": 3607, "麵": 3608, "麻": 3609, "麼": 3610, "黃": 3611, "黎": 3612, "黏": 3613, "黐": 3614, "黑": 3615, "默": 3616, "黚": 3617, "黛": 3618, "黜": 3619, "點": 3620, "黨": 3621, "黯": 3622, "鼆": 3623, "鼎": 3624, "鼓": 3625, "鼠": 3626, "鼻": 3627, "齊": 3628, "齋": 3629, "齒": 3630, "齡": 3631, "齪": 3632, "齷": 3633, "龍": 3634, "龐": 3635, "龜": 3636, "龢": 3637, "更": 3638, "來": 3639, "不": 3640, "年": 3641, "聯": 3642, "料": 3643, "利": 3644, "立": 3645, "行": 3646, ".": 3647, "a": 3648, "b": 3649, "": 3650, "|": 0, "[UNK]": 3651, "[PAD]": 3652}
|