comodoro commited on
Commit
c6f57da
1 Parent(s): 1ec219f

Initial model

Browse files
README.md CHANGED
@@ -1,3 +1,99 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: wav2vec2-xls-r-300m-west-slavic-cv8
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # wav2vec2-xls-r-300m-west-slavic-cv8
14
+
15
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the None dataset.
16
+ It achieves the following results on the evaluation set:
17
+ - Loss: 2.3462
18
+ - Wer: 0.8556
19
+ - Cer: 0.2799
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0003
39
+ - train_batch_size: 32
40
+ - eval_batch_size: 16
41
+ - seed: 42
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: linear
44
+ - lr_scheduler_warmup_steps: 500
45
+ - num_epochs: 50
46
+ - mixed_precision_training: Native AMP
47
+
48
+ ### Training results
49
+
50
+ | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
51
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|
52
+ | 6.548 | 1.23 | 400 | 3.4763 | 1.0 | 1.0 |
53
+ | 3.42 | 2.45 | 800 | 3.3156 | 1.0 | 1.0 |
54
+ | 3.291 | 3.68 | 1200 | 3.2396 | 1.0 | 1.0 |
55
+ | 2.6515 | 4.91 | 1600 | 2.0422 | 0.9997 | 0.5835 |
56
+ | 1.7019 | 6.13 | 2000 | 1.6337 | 0.9893 | 0.4797 |
57
+ | 1.3604 | 7.36 | 2400 | 1.5221 | 0.9875 | 0.4463 |
58
+ | 1.1965 | 8.59 | 2800 | 1.5284 | 0.9766 | 0.4247 |
59
+ | 1.069 | 9.82 | 3200 | 1.5228 | 0.9672 | 0.4124 |
60
+ | 0.9536 | 11.04 | 3600 | 1.4059 | 0.9600 | 0.3868 |
61
+ | 0.8487 | 12.27 | 4000 | 1.4083 | 0.9501 | 0.3739 |
62
+ | 0.7655 | 13.5 | 4400 | 1.4079 | 0.9369 | 0.3612 |
63
+ | 0.6956 | 14.72 | 4800 | 1.4170 | 0.9411 | 0.3459 |
64
+ | 0.6287 | 15.95 | 5200 | 1.4000 | 0.9235 | 0.3384 |
65
+ | 0.561 | 17.18 | 5600 | 1.4735 | 0.9023 | 0.3295 |
66
+ | 0.5155 | 18.4 | 6000 | 1.5386 | 0.9202 | 0.3223 |
67
+ | 0.4864 | 19.63 | 6400 | 1.6186 | 0.9073 | 0.3259 |
68
+ | 0.4261 | 20.86 | 6800 | 1.6417 | 0.9217 | 0.3130 |
69
+ | 0.4051 | 22.09 | 7200 | 1.6295 | 0.8954 | 0.3026 |
70
+ | 0.3779 | 23.31 | 7600 | 1.8218 | 0.8979 | 0.3153 |
71
+ | 0.35 | 24.54 | 8000 | 1.7790 | 0.8921 | 0.3036 |
72
+ | 0.3343 | 25.77 | 8400 | 1.8588 | 0.9114 | 0.3072 |
73
+ | 0.3137 | 26.99 | 8800 | 1.8096 | 0.8756 | 0.2935 |
74
+ | 0.299 | 28.22 | 9200 | 1.9721 | 0.8863 | 0.3023 |
75
+ | 0.2894 | 29.45 | 9600 | 1.9907 | 0.8872 | 0.2958 |
76
+ | 0.2784 | 30.67 | 10000 | 1.9494 | 0.9090 | 0.2945 |
77
+ | 0.2662 | 31.9 | 10400 | 1.9952 | 0.8978 | 0.2935 |
78
+ | 0.2614 | 33.13 | 10800 | 2.0600 | 0.8949 | 0.2979 |
79
+ | 0.2401 | 34.36 | 11200 | 2.1180 | 0.8914 | 0.2950 |
80
+ | 0.2392 | 35.58 | 11600 | 2.1197 | 0.8713 | 0.2895 |
81
+ | 0.23 | 36.81 | 12000 | 2.1680 | 0.8713 | 0.2941 |
82
+ | 0.2246 | 38.04 | 12400 | 2.1526 | 0.8741 | 0.2879 |
83
+ | 0.2152 | 39.26 | 12800 | 2.2631 | 0.8790 | 0.2889 |
84
+ | 0.212 | 40.49 | 13200 | 2.2724 | 0.8661 | 0.2843 |
85
+ | 0.2044 | 41.72 | 13600 | 2.2438 | 0.8691 | 0.2878 |
86
+ | 0.2029 | 42.94 | 14000 | 2.2519 | 0.8577 | 0.2833 |
87
+ | 0.1972 | 44.17 | 14400 | 2.2697 | 0.8604 | 0.2813 |
88
+ | 0.1884 | 45.4 | 14800 | 2.3294 | 0.8662 | 0.2847 |
89
+ | 0.1877 | 46.63 | 15200 | 2.3077 | 0.8561 | 0.2793 |
90
+ | 0.1871 | 47.85 | 15600 | 2.3518 | 0.8563 | 0.2801 |
91
+ | 0.1838 | 49.08 | 16000 | 2.3462 | 0.8556 | 0.2799 |
92
+
93
+
94
+ ### Framework versions
95
+
96
+ - Transformers 4.16.0.dev0
97
+ - Pytorch 1.10.1+cu102
98
+ - Datasets 1.18.3
99
+ - Tokenizers 0.11.0
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 55, "</s>": 56}
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1024,
80
+ "pad_token_id": 54,
81
+ "proj_codevector_dim": 768,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.16.0.dev0",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 57,
107
+ "xvector_output_dim": 512
108
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c6846c706f81b178e7a856cc5831baf4feb947ba1a60f83d8d2289e840883c0
3
+ size 1262157361
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2Processor"}
trainer_state.json ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 49.079754601226995,
5
+ "global_step": 16000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.23,
12
+ "learning_rate": 0.0002376,
13
+ "loss": 6.548,
14
+ "step": 400
15
+ },
16
+ {
17
+ "epoch": 1.23,
18
+ "eval_cer": 1.0,
19
+ "eval_loss": 3.476283073425293,
20
+ "eval_runtime": 60.3486,
21
+ "eval_samples_per_second": 26.513,
22
+ "eval_steps_per_second": 1.657,
23
+ "eval_wer": 1.0,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 2.45,
28
+ "learning_rate": 0.000294379746835443,
29
+ "loss": 3.42,
30
+ "step": 800
31
+ },
32
+ {
33
+ "epoch": 2.45,
34
+ "eval_cer": 1.0,
35
+ "eval_loss": 3.3155558109283447,
36
+ "eval_runtime": 60.3859,
37
+ "eval_samples_per_second": 26.496,
38
+ "eval_steps_per_second": 1.656,
39
+ "eval_wer": 1.0,
40
+ "step": 800
41
+ },
42
+ {
43
+ "epoch": 3.68,
44
+ "learning_rate": 0.00028678481012658224,
45
+ "loss": 3.291,
46
+ "step": 1200
47
+ },
48
+ {
49
+ "epoch": 3.68,
50
+ "eval_cer": 1.0,
51
+ "eval_loss": 3.2396233081817627,
52
+ "eval_runtime": 60.8505,
53
+ "eval_samples_per_second": 26.294,
54
+ "eval_steps_per_second": 1.643,
55
+ "eval_wer": 1.0,
56
+ "step": 1200
57
+ },
58
+ {
59
+ "epoch": 4.91,
60
+ "learning_rate": 0.0002791898734177215,
61
+ "loss": 2.6515,
62
+ "step": 1600
63
+ },
64
+ {
65
+ "epoch": 4.91,
66
+ "eval_cer": 0.5835108495504928,
67
+ "eval_loss": 2.042246103286743,
68
+ "eval_runtime": 60.4335,
69
+ "eval_samples_per_second": 26.475,
70
+ "eval_steps_per_second": 1.655,
71
+ "eval_wer": 0.999671772428884,
72
+ "step": 1600
73
+ },
74
+ {
75
+ "epoch": 6.13,
76
+ "learning_rate": 0.00027159493670886076,
77
+ "loss": 1.7019,
78
+ "step": 2000
79
+ },
80
+ {
81
+ "epoch": 6.13,
82
+ "eval_cer": 0.4797450987471567,
83
+ "eval_loss": 1.6337121725082397,
84
+ "eval_runtime": 60.2878,
85
+ "eval_samples_per_second": 26.539,
86
+ "eval_steps_per_second": 1.659,
87
+ "eval_wer": 0.9892778993435448,
88
+ "step": 2000
89
+ },
90
+ {
91
+ "epoch": 7.36,
92
+ "learning_rate": 0.00026399999999999997,
93
+ "loss": 1.3604,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 7.36,
98
+ "eval_cer": 0.44627576993898255,
99
+ "eval_loss": 1.5220645666122437,
100
+ "eval_runtime": 60.8256,
101
+ "eval_samples_per_second": 26.305,
102
+ "eval_steps_per_second": 1.644,
103
+ "eval_wer": 0.987527352297593,
104
+ "step": 2400
105
+ },
106
+ {
107
+ "epoch": 8.59,
108
+ "learning_rate": 0.0002564050632911392,
109
+ "loss": 1.1965,
110
+ "step": 2800
111
+ },
112
+ {
113
+ "epoch": 8.59,
114
+ "eval_cer": 0.4246669314366177,
115
+ "eval_loss": 1.528436303138733,
116
+ "eval_runtime": 60.5379,
117
+ "eval_samples_per_second": 26.43,
118
+ "eval_steps_per_second": 1.652,
119
+ "eval_wer": 0.9765864332603938,
120
+ "step": 2800
121
+ },
122
+ {
123
+ "epoch": 9.82,
124
+ "learning_rate": 0.00024881012658227843,
125
+ "loss": 1.069,
126
+ "step": 3200
127
+ },
128
+ {
129
+ "epoch": 9.82,
130
+ "eval_cer": 0.41239123370762176,
131
+ "eval_loss": 1.5227978229522705,
132
+ "eval_runtime": 60.2014,
133
+ "eval_samples_per_second": 26.577,
134
+ "eval_steps_per_second": 1.661,
135
+ "eval_wer": 0.9671772428884027,
136
+ "step": 3200
137
+ },
138
+ {
139
+ "epoch": 11.04,
140
+ "learning_rate": 0.00024121518987341772,
141
+ "loss": 0.9536,
142
+ "step": 3600
143
+ },
144
+ {
145
+ "epoch": 11.04,
146
+ "eval_cer": 0.38684695093331406,
147
+ "eval_loss": 1.4059038162231445,
148
+ "eval_runtime": 60.2947,
149
+ "eval_samples_per_second": 26.536,
150
+ "eval_steps_per_second": 1.659,
151
+ "eval_wer": 0.9599562363238512,
152
+ "step": 3600
153
+ },
154
+ {
155
+ "epoch": 12.27,
156
+ "learning_rate": 0.00023362025316455695,
157
+ "loss": 0.8487,
158
+ "step": 4000
159
+ },
160
+ {
161
+ "epoch": 12.27,
162
+ "eval_cer": 0.37390331082788747,
163
+ "eval_loss": 1.4082870483398438,
164
+ "eval_runtime": 60.1938,
165
+ "eval_samples_per_second": 26.581,
166
+ "eval_steps_per_second": 1.661,
167
+ "eval_wer": 0.950109409190372,
168
+ "step": 4000
169
+ },
170
+ {
171
+ "epoch": 13.5,
172
+ "learning_rate": 0.00022602531645569618,
173
+ "loss": 0.7655,
174
+ "step": 4400
175
+ },
176
+ {
177
+ "epoch": 13.5,
178
+ "eval_cer": 0.36121240567570495,
179
+ "eval_loss": 1.40787935256958,
180
+ "eval_runtime": 60.2423,
181
+ "eval_samples_per_second": 26.559,
182
+ "eval_steps_per_second": 1.66,
183
+ "eval_wer": 0.9368708971553611,
184
+ "step": 4400
185
+ },
186
+ {
187
+ "epoch": 14.72,
188
+ "learning_rate": 0.0002184303797468354,
189
+ "loss": 0.6956,
190
+ "step": 4800
191
+ },
192
+ {
193
+ "epoch": 14.72,
194
+ "eval_cer": 0.34590388850778064,
195
+ "eval_loss": 1.417035698890686,
196
+ "eval_runtime": 60.1538,
197
+ "eval_samples_per_second": 26.598,
198
+ "eval_steps_per_second": 1.662,
199
+ "eval_wer": 0.9411378555798687,
200
+ "step": 4800
201
+ },
202
+ {
203
+ "epoch": 15.95,
204
+ "learning_rate": 0.00021083544303797464,
205
+ "loss": 0.6287,
206
+ "step": 5200
207
+ },
208
+ {
209
+ "epoch": 15.95,
210
+ "eval_cer": 0.3383579449037802,
211
+ "eval_loss": 1.3999766111373901,
212
+ "eval_runtime": 60.2038,
213
+ "eval_samples_per_second": 26.576,
214
+ "eval_steps_per_second": 1.661,
215
+ "eval_wer": 0.9235229759299781,
216
+ "step": 5200
217
+ },
218
+ {
219
+ "epoch": 17.18,
220
+ "learning_rate": 0.00020324050632911393,
221
+ "loss": 0.561,
222
+ "step": 5600
223
+ },
224
+ {
225
+ "epoch": 17.18,
226
+ "eval_cer": 0.32953027403689933,
227
+ "eval_loss": 1.4734982252120972,
228
+ "eval_runtime": 60.3387,
229
+ "eval_samples_per_second": 26.517,
230
+ "eval_steps_per_second": 1.657,
231
+ "eval_wer": 0.9022975929978119,
232
+ "step": 5600
233
+ },
234
+ {
235
+ "epoch": 18.4,
236
+ "learning_rate": 0.00019564556962025316,
237
+ "loss": 0.5155,
238
+ "step": 6000
239
+ },
240
+ {
241
+ "epoch": 18.4,
242
+ "eval_cer": 0.32232732786944435,
243
+ "eval_loss": 1.538634181022644,
244
+ "eval_runtime": 69.1066,
245
+ "eval_samples_per_second": 23.153,
246
+ "eval_steps_per_second": 1.447,
247
+ "eval_wer": 0.9202407002188184,
248
+ "step": 6000
249
+ },
250
+ {
251
+ "epoch": 19.63,
252
+ "learning_rate": 0.0001880506329113924,
253
+ "loss": 0.4864,
254
+ "step": 6400
255
+ },
256
+ {
257
+ "epoch": 19.63,
258
+ "eval_cer": 0.32590172220818137,
259
+ "eval_loss": 1.618619680404663,
260
+ "eval_runtime": 69.0557,
261
+ "eval_samples_per_second": 23.17,
262
+ "eval_steps_per_second": 1.448,
263
+ "eval_wer": 0.9073304157549235,
264
+ "step": 6400
265
+ },
266
+ {
267
+ "epoch": 20.86,
268
+ "learning_rate": 0.00018045569620253163,
269
+ "loss": 0.4261,
270
+ "step": 6800
271
+ },
272
+ {
273
+ "epoch": 20.86,
274
+ "eval_cer": 0.313030292089396,
275
+ "eval_loss": 1.6417021751403809,
276
+ "eval_runtime": 68.875,
277
+ "eval_samples_per_second": 23.23,
278
+ "eval_steps_per_second": 1.452,
279
+ "eval_wer": 0.9216630196936543,
280
+ "step": 6800
281
+ },
282
+ {
283
+ "epoch": 22.09,
284
+ "learning_rate": 0.00017286075949367088,
285
+ "loss": 0.4051,
286
+ "step": 7200
287
+ },
288
+ {
289
+ "epoch": 22.09,
290
+ "eval_cer": 0.30263205401307003,
291
+ "eval_loss": 1.6295086145401,
292
+ "eval_runtime": 69.3362,
293
+ "eval_samples_per_second": 23.076,
294
+ "eval_steps_per_second": 1.442,
295
+ "eval_wer": 0.8954048140043763,
296
+ "step": 7200
297
+ },
298
+ {
299
+ "epoch": 23.31,
300
+ "learning_rate": 0.00016526582278481012,
301
+ "loss": 0.3779,
302
+ "step": 7600
303
+ },
304
+ {
305
+ "epoch": 23.31,
306
+ "eval_cer": 0.31534101166191286,
307
+ "eval_loss": 1.8218317031860352,
308
+ "eval_runtime": 68.8091,
309
+ "eval_samples_per_second": 23.253,
310
+ "eval_steps_per_second": 1.453,
311
+ "eval_wer": 0.8979212253829322,
312
+ "step": 7600
313
+ },
314
+ {
315
+ "epoch": 24.54,
316
+ "learning_rate": 0.00015767088607594935,
317
+ "loss": 0.35,
318
+ "step": 8000
319
+ },
320
+ {
321
+ "epoch": 24.54,
322
+ "eval_cer": 0.303570783839405,
323
+ "eval_loss": 1.779032588005066,
324
+ "eval_runtime": 68.3457,
325
+ "eval_samples_per_second": 23.41,
326
+ "eval_steps_per_second": 1.463,
327
+ "eval_wer": 0.8921225382932166,
328
+ "step": 8000
329
+ },
330
+ {
331
+ "epoch": 25.77,
332
+ "learning_rate": 0.00015007594936708858,
333
+ "loss": 0.3343,
334
+ "step": 8400
335
+ },
336
+ {
337
+ "epoch": 25.77,
338
+ "eval_cer": 0.3071812831714626,
339
+ "eval_loss": 1.8588141202926636,
340
+ "eval_runtime": 60.0463,
341
+ "eval_samples_per_second": 26.646,
342
+ "eval_steps_per_second": 1.665,
343
+ "eval_wer": 0.9113785557986871,
344
+ "step": 8400
345
+ },
346
+ {
347
+ "epoch": 26.99,
348
+ "learning_rate": 0.00014248101265822784,
349
+ "loss": 0.3137,
350
+ "step": 8800
351
+ },
352
+ {
353
+ "epoch": 26.99,
354
+ "eval_cer": 0.29351554319962453,
355
+ "eval_loss": 1.8095606565475464,
356
+ "eval_runtime": 59.7272,
357
+ "eval_samples_per_second": 26.788,
358
+ "eval_steps_per_second": 1.674,
359
+ "eval_wer": 0.8756017505470459,
360
+ "step": 8800
361
+ },
362
+ {
363
+ "epoch": 28.22,
364
+ "learning_rate": 0.00013488607594936707,
365
+ "loss": 0.299,
366
+ "step": 9200
367
+ },
368
+ {
369
+ "epoch": 28.22,
370
+ "eval_cer": 0.3023251615698451,
371
+ "eval_loss": 1.9720637798309326,
372
+ "eval_runtime": 60.067,
373
+ "eval_samples_per_second": 26.637,
374
+ "eval_steps_per_second": 1.665,
375
+ "eval_wer": 0.8863238512035011,
376
+ "step": 9200
377
+ },
378
+ {
379
+ "epoch": 29.45,
380
+ "learning_rate": 0.00012729113924050633,
381
+ "loss": 0.2894,
382
+ "step": 9600
383
+ },
384
+ {
385
+ "epoch": 29.45,
386
+ "eval_cer": 0.29584431526880167,
387
+ "eval_loss": 1.9907439947128296,
388
+ "eval_runtime": 68.8361,
389
+ "eval_samples_per_second": 23.244,
390
+ "eval_steps_per_second": 1.453,
391
+ "eval_wer": 0.887199124726477,
392
+ "step": 9600
393
+ },
394
+ {
395
+ "epoch": 30.67,
396
+ "learning_rate": 0.00011969620253164556,
397
+ "loss": 0.2784,
398
+ "step": 10000
399
+ },
400
+ {
401
+ "epoch": 30.67,
402
+ "eval_cer": 0.2944542730259595,
403
+ "eval_loss": 1.9494301080703735,
404
+ "eval_runtime": 68.9865,
405
+ "eval_samples_per_second": 23.193,
406
+ "eval_steps_per_second": 1.45,
407
+ "eval_wer": 0.9089715536105033,
408
+ "step": 10000
409
+ },
410
+ {
411
+ "epoch": 31.9,
412
+ "learning_rate": 0.00011210126582278481,
413
+ "loss": 0.2662,
414
+ "step": 10400
415
+ },
416
+ {
417
+ "epoch": 31.9,
418
+ "eval_cer": 0.29346138570964364,
419
+ "eval_loss": 1.995172142982483,
420
+ "eval_runtime": 68.9796,
421
+ "eval_samples_per_second": 23.195,
422
+ "eval_steps_per_second": 1.45,
423
+ "eval_wer": 0.8978118161925602,
424
+ "step": 10400
425
+ },
426
+ {
427
+ "epoch": 33.13,
428
+ "learning_rate": 0.00010450632911392404,
429
+ "loss": 0.2614,
430
+ "step": 10800
431
+ },
432
+ {
433
+ "epoch": 33.13,
434
+ "eval_cer": 0.29790229988807454,
435
+ "eval_loss": 2.0600392818450928,
436
+ "eval_runtime": 68.6491,
437
+ "eval_samples_per_second": 23.307,
438
+ "eval_steps_per_second": 1.457,
439
+ "eval_wer": 0.8948577680525164,
440
+ "step": 10800
441
+ },
442
+ {
443
+ "epoch": 34.36,
444
+ "learning_rate": 9.691139240506327e-05,
445
+ "loss": 0.2401,
446
+ "step": 11200
447
+ },
448
+ {
449
+ "epoch": 34.36,
450
+ "eval_cer": 0.29495974293244753,
451
+ "eval_loss": 2.118035316467285,
452
+ "eval_runtime": 68.8384,
453
+ "eval_samples_per_second": 23.243,
454
+ "eval_steps_per_second": 1.453,
455
+ "eval_wer": 0.8913566739606127,
456
+ "step": 11200
457
+ },
458
+ {
459
+ "epoch": 35.58,
460
+ "learning_rate": 8.933544303797467e-05,
461
+ "loss": 0.2392,
462
+ "step": 11600
463
+ },
464
+ {
465
+ "epoch": 35.58,
466
+ "eval_cer": 0.28950788894104057,
467
+ "eval_loss": 2.1196768283843994,
468
+ "eval_runtime": 68.573,
469
+ "eval_samples_per_second": 23.333,
470
+ "eval_steps_per_second": 1.458,
471
+ "eval_wer": 0.8713347921225383,
472
+ "step": 11600
473
+ },
474
+ {
475
+ "epoch": 36.81,
476
+ "learning_rate": 8.175949367088606e-05,
477
+ "loss": 0.23,
478
+ "step": 12000
479
+ },
480
+ {
481
+ "epoch": 36.81,
482
+ "eval_cer": 0.2940932230927537,
483
+ "eval_loss": 2.168043851852417,
484
+ "eval_runtime": 68.6136,
485
+ "eval_samples_per_second": 23.319,
486
+ "eval_steps_per_second": 1.457,
487
+ "eval_wer": 0.8713347921225383,
488
+ "step": 12000
489
+ },
490
+ {
491
+ "epoch": 38.04,
492
+ "learning_rate": 7.416455696202532e-05,
493
+ "loss": 0.2246,
494
+ "step": 12400
495
+ },
496
+ {
497
+ "epoch": 38.04,
498
+ "eval_cer": 0.2879192692349352,
499
+ "eval_loss": 2.1525843143463135,
500
+ "eval_runtime": 59.7644,
501
+ "eval_samples_per_second": 26.772,
502
+ "eval_steps_per_second": 1.673,
503
+ "eval_wer": 0.874070021881838,
504
+ "step": 12400
505
+ },
506
+ {
507
+ "epoch": 39.26,
508
+ "learning_rate": 6.656962025316455e-05,
509
+ "loss": 0.2152,
510
+ "step": 12800
511
+ },
512
+ {
513
+ "epoch": 39.26,
514
+ "eval_cer": 0.28893020904791133,
515
+ "eval_loss": 2.263143301010132,
516
+ "eval_runtime": 60.4724,
517
+ "eval_samples_per_second": 26.458,
518
+ "eval_steps_per_second": 1.654,
519
+ "eval_wer": 0.8789934354485777,
520
+ "step": 12800
521
+ },
522
+ {
523
+ "epoch": 40.49,
524
+ "learning_rate": 5.897468354430379e-05,
525
+ "loss": 0.212,
526
+ "step": 13200
527
+ },
528
+ {
529
+ "epoch": 40.49,
530
+ "eval_cer": 0.28430876990287757,
531
+ "eval_loss": 2.2723913192749023,
532
+ "eval_runtime": 60.2908,
533
+ "eval_samples_per_second": 26.538,
534
+ "eval_steps_per_second": 1.659,
535
+ "eval_wer": 0.8660831509846827,
536
+ "step": 13200
537
+ },
538
+ {
539
+ "epoch": 41.72,
540
+ "learning_rate": 5.1379746835443034e-05,
541
+ "loss": 0.2044,
542
+ "step": 13600
543
+ },
544
+ {
545
+ "epoch": 41.72,
546
+ "eval_cer": 0.2877929017583132,
547
+ "eval_loss": 2.2438297271728516,
548
+ "eval_runtime": 70.0914,
549
+ "eval_samples_per_second": 22.827,
550
+ "eval_steps_per_second": 1.427,
551
+ "eval_wer": 0.8691466083150985,
552
+ "step": 13600
553
+ },
554
+ {
555
+ "epoch": 42.94,
556
+ "learning_rate": 4.380379746835442e-05,
557
+ "loss": 0.2029,
558
+ "step": 14000
559
+ },
560
+ {
561
+ "epoch": 42.94,
562
+ "eval_cer": 0.28327977759324113,
563
+ "eval_loss": 2.2518999576568604,
564
+ "eval_runtime": 69.2505,
565
+ "eval_samples_per_second": 23.105,
566
+ "eval_steps_per_second": 1.444,
567
+ "eval_wer": 0.8576586433260394,
568
+ "step": 14000
569
+ },
570
+ {
571
+ "epoch": 44.17,
572
+ "learning_rate": 3.6208860759493666e-05,
573
+ "loss": 0.1972,
574
+ "step": 14400
575
+ },
576
+ {
577
+ "epoch": 44.17,
578
+ "eval_cer": 0.28133010795393004,
579
+ "eval_loss": 2.2697150707244873,
580
+ "eval_runtime": 68.5533,
581
+ "eval_samples_per_second": 23.34,
582
+ "eval_steps_per_second": 1.459,
583
+ "eval_wer": 0.8603938730853392,
584
+ "step": 14400
585
+ },
586
+ {
587
+ "epoch": 45.4,
588
+ "learning_rate": 2.861392405063291e-05,
589
+ "loss": 0.1884,
590
+ "step": 14800
591
+ },
592
+ {
593
+ "epoch": 45.4,
594
+ "eval_cer": 0.2846878723327436,
595
+ "eval_loss": 2.3294308185577393,
596
+ "eval_runtime": 60.5028,
597
+ "eval_samples_per_second": 26.445,
598
+ "eval_steps_per_second": 1.653,
599
+ "eval_wer": 0.8661925601750547,
600
+ "step": 14800
601
+ },
602
+ {
603
+ "epoch": 46.63,
604
+ "learning_rate": 2.101898734177215e-05,
605
+ "loss": 0.1877,
606
+ "step": 15200
607
+ },
608
+ {
609
+ "epoch": 46.63,
610
+ "eval_cer": 0.2792721233346572,
611
+ "eval_loss": 2.3077094554901123,
612
+ "eval_runtime": 60.8923,
613
+ "eval_samples_per_second": 26.276,
614
+ "eval_steps_per_second": 1.642,
615
+ "eval_wer": 0.8561269146608315,
616
+ "step": 15200
617
+ },
618
+ {
619
+ "epoch": 47.85,
620
+ "learning_rate": 1.3424050632911391e-05,
621
+ "loss": 0.1871,
622
+ "step": 15600
623
+ },
624
+ {
625
+ "epoch": 47.85,
626
+ "eval_cer": 0.2801025381810304,
627
+ "eval_loss": 2.351794481277466,
628
+ "eval_runtime": 60.3104,
629
+ "eval_samples_per_second": 26.529,
630
+ "eval_steps_per_second": 1.658,
631
+ "eval_wer": 0.8563457330415755,
632
+ "step": 15600
633
+ },
634
+ {
635
+ "epoch": 49.08,
636
+ "learning_rate": 5.829113924050632e-06,
637
+ "loss": 0.1838,
638
+ "step": 16000
639
+ },
640
+ {
641
+ "epoch": 49.08,
642
+ "eval_cer": 0.27992201321442756,
643
+ "eval_loss": 2.3462095260620117,
644
+ "eval_runtime": 60.2933,
645
+ "eval_samples_per_second": 26.537,
646
+ "eval_steps_per_second": 1.659,
647
+ "eval_wer": 0.8555798687089715,
648
+ "step": 16000
649
+ }
650
+ ],
651
+ "max_steps": 16300,
652
+ "num_train_epochs": 50,
653
+ "total_flos": 6.654066355887293e+19,
654
+ "trial_name": null,
655
+ "trial_params": null
656
+ }
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "á": 27, "ä": 28, "è": 29, "é": 30, "í": 31, "ó": 32, "ô": 33, "ú": 34, "ý": 35, "č": 36, "ď": 37, "ě": 38, "ĺ": 39, "ľ": 40, "ł": 41, "ń": 42, "ň": 43, "ŕ": 44, "ř": 45, "ś": 46, "š": 47, "ť": 48, "ů": 49, "ź": 50, "ż": 51, "ž": 52, "|": 0, "[UNK]": 53, "[PAD]": 54}