Yehor Smoliakov commited on
Commit
a7ae7cb
1 Parent(s): 16f43bd
.gitattributes CHANGED
@@ -24,4 +24,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
 
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *.arpa filter=lfs diff=lfs merge=lfs -text
28
  *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,85 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - uk
4
+ license: apache-2.0
5
+ tags:
6
+ - automatic-speech-recognition
7
+ - mozilla-foundation/common_voice_7_0
8
+ - generated_from_trainer
9
+ - uk
10
+ - robust-speech-event
11
+ datasets:
12
+ - common_voice
13
+ model-index:
14
+ - name: wav2vec2-xls-r-1b-uk-with-lm
15
+ results: []
16
+ ---
17
+
18
+ # Ukrainian STT model (with Language Model)
19
+
20
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the MOZILLA-FOUNDATION/COMMON_VOICE_7_0 - UK dataset.
21
+
22
+ It achieves the following results on the evaluation set without the language model:
23
+
24
+ - Loss: 0.1875
25
+ - Wer: 0.2033
26
+ - Cer: 0.0384
27
+
28
+ Follow our comminity in Telegram: https://t.me/speech_recognition_uk
29
+
30
+ ## Model description
31
+
32
+ On 100 test example the model shows the following results:
33
+
34
+ Without LM:
35
+
36
+ - WER: 0.1862
37
+ - CER: 0.0277
38
+
39
+ With LM:
40
+
41
+ - WER: 0.1218
42
+ - CER: 0.0190
43
+
44
+
45
+ ## Training procedure
46
+
47
+ ### Training hyperparameters
48
+
49
+ The following hyperparameters were used during training:
50
+ - learning_rate: 5e-05
51
+ - train_batch_size: 8
52
+ - eval_batch_size: 8
53
+ - seed: 42
54
+ - gradient_accumulation_steps: 20
55
+ - total_train_batch_size: 160
56
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
+ - lr_scheduler_type: linear
58
+ - lr_scheduler_warmup_steps: 500
59
+ - num_epochs: 100.0
60
+ - mixed_precision_training: Native AMP
61
+
62
+ ### Training results
63
+
64
+ | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
65
+ |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|
66
+ | 1.2815 | 7.93 | 500 | 0.3536 | 0.4753 | 0.1009 |
67
+ | 1.0869 | 15.86 | 1000 | 0.2317 | 0.3111 | 0.0614 |
68
+ | 0.9984 | 23.8 | 1500 | 0.2022 | 0.2676 | 0.0521 |
69
+ | 0.975 | 31.74 | 2000 | 0.1948 | 0.2469 | 0.0487 |
70
+ | 0.9306 | 39.67 | 2500 | 0.1916 | 0.2377 | 0.0464 |
71
+ | 0.8868 | 47.61 | 3000 | 0.1903 | 0.2257 | 0.0439 |
72
+ | 0.8424 | 55.55 | 3500 | 0.1786 | 0.2206 | 0.0423 |
73
+ | 0.8126 | 63.49 | 4000 | 0.1849 | 0.2160 | 0.0416 |
74
+ | 0.7901 | 71.42 | 4500 | 0.1869 | 0.2138 | 0.0413 |
75
+ | 0.7671 | 79.36 | 5000 | 0.1855 | 0.2075 | 0.0394 |
76
+ | 0.7467 | 87.3 | 5500 | 0.1884 | 0.2049 | 0.0389 |
77
+ | 0.731 | 95.24 | 6000 | 0.1877 | 0.2060 | 0.0387 |
78
+
79
+
80
+ ### Framework versions
81
+
82
+ - Transformers 4.16.0.dev0
83
+ - Pytorch 1.10.1+cu102
84
+ - Datasets 1.18.1.dev0
85
+ - Tokenizers 0.11.0
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 51, "</s>": 52}
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 99.99,
3
+ "eval_cer": 0.03843434091927693,
4
+ "eval_loss": 0.18747110664844513,
5
+ "eval_runtime": 268.1183,
6
+ "eval_samples": 4332,
7
+ "eval_samples_per_second": 16.157,
8
+ "eval_steps_per_second": 2.021,
9
+ "eval_wer": 0.20326104163368688,
10
+ "train_loss": 1.049089940994505,
11
+ "train_runtime": 95054.1856,
12
+ "train_samples": 10193,
13
+ "train_samples_per_second": 10.723,
14
+ "train_steps_per_second": 0.066
15
+ }
alphabet.json ADDED
@@ -0,0 +1 @@
 
1
+ {"labels": [" ", "a", "c", "e", "i", "j", "k", "l", "m", "n", "o", "p", "u", "x", "y", "\u0301", "\u0430", "\u0431", "\u0432", "\u0433", "\u0434", "\u0435", "\u0436", "\u0437", "\u0438", "\u0439", "\u043a", "\u043b", "\u043c", "\u043d", "\u043e", "\u043f", "\u0440", "\u0441", "\u0442", "\u0443", "\u0444", "\u0445", "\u0446", "\u0447", "\u0448", "\u0449", "\u044c", "\u044e", "\u044f", "\u0454", "\u0456", "\u0457", "\u0491", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-1b",
3
+ "activation_dropout": 0.1,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 1024,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.0,
57
+ "hidden_size": 1280,
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 5120,
60
+ "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.0,
62
+ "mask_feature_length": 64,
63
+ "mask_feature_min_masks": 0,
64
+ "mask_feature_prob": 0.25,
65
+ "mask_time_length": 10,
66
+ "mask_time_min_masks": 2,
67
+ "mask_time_prob": 0.75,
68
+ "model_type": "wav2vec2",
69
+ "num_adapter_layers": 3,
70
+ "num_attention_heads": 16,
71
+ "num_codevector_groups": 2,
72
+ "num_codevectors_per_group": 320,
73
+ "num_conv_pos_embedding_groups": 16,
74
+ "num_conv_pos_embeddings": 128,
75
+ "num_feat_extract_layers": 7,
76
+ "num_hidden_layers": 48,
77
+ "num_negatives": 100,
78
+ "output_hidden_size": 1280,
79
+ "pad_token_id": 50,
80
+ "proj_codevector_dim": 1024,
81
+ "tdnn_dilation": [
82
+ 1,
83
+ 2,
84
+ 3,
85
+ 1,
86
+ 1
87
+ ],
88
+ "tdnn_dim": [
89
+ 512,
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 1500
94
+ ],
95
+ "tdnn_kernel": [
96
+ 5,
97
+ 3,
98
+ 3,
99
+ 1,
100
+ 1
101
+ ],
102
+ "torch_dtype": "float32",
103
+ "transformers_version": "4.16.0.dev0",
104
+ "use_weighted_layer_sum": false,
105
+ "vocab_size": 53,
106
+ "xvector_output_dim": 512
107
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 99.99,
3
+ "eval_cer": 0.03843434091927693,
4
+ "eval_loss": 0.18747110664844513,
5
+ "eval_runtime": 268.1183,
6
+ "eval_samples": 4332,
7
+ "eval_samples_per_second": 16.157,
8
+ "eval_steps_per_second": 2.021,
9
+ "eval_wer": 0.20326104163368688
10
+ }
language_model/5gram_correct.arpa ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8720d88d4f86f9bdd24a806713c0d4be2eb694227daed83bedade8ba69c16516
3
+ size 28311207
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
The diff for this file is too large to render. See raw diff
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6933edff2d809c0035d6ce0fffe2bf975c5d0d3f2932f2c4545f47a769d04ce4
3
+ size 3850584305
runs/Jan26_11-22-42_job-df329b21-d243-4736-8f96-d11192aeb370/1643196546.6195295/events.out.tfevents.1643196546.job-df329b21-d243-4736-8f96-d11192aeb370.13810.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccefa87dfb9f1a70748307d90324380c39d8b93d9af111a89d51ba81de70d24
3
+ size 4778
runs/Jan26_11-22-42_job-df329b21-d243-4736-8f96-d11192aeb370/events.out.tfevents.1643196546.job-df329b21-d243-4736-8f96-d11192aeb370.13810.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1dd1de0119ae5b997385fc2a7feb994debd200ac3055775715f8355a07eb0d2
3
+ size 29217
runs/Jan26_11-22-42_job-df329b21-d243-4736-8f96-d11192aeb370/events.out.tfevents.1643291875.job-df329b21-d243-4736-8f96-d11192aeb370.13810.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cddb9d99a97b8c4323dfd96a1f6fef25c3c7ff9009b73aa856629224e0ce4325
3
+ size 405
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "final", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2ProcessorWithLM"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 99.99,
3
+ "train_loss": 1.049089940994505,
4
+ "train_runtime": 95054.1856,
5
+ "train_samples": 10193,
6
+ "train_samples_per_second": 10.723,
7
+ "train_steps_per_second": 0.066
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,901 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 99.98823529411764,
5
+ "global_step": 6300,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.78,
12
+ "learning_rate": 4.9000000000000005e-06,
13
+ "loss": 11.1133,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 1.58,
18
+ "learning_rate": 9.900000000000002e-06,
19
+ "loss": 3.3967,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 2.38,
24
+ "learning_rate": 1.49e-05,
25
+ "loss": 3.2205,
26
+ "step": 150
27
+ },
28
+ {
29
+ "epoch": 3.17,
30
+ "learning_rate": 1.9900000000000003e-05,
31
+ "loss": 2.8143,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 3.96,
36
+ "learning_rate": 2.4900000000000002e-05,
37
+ "loss": 1.9249,
38
+ "step": 250
39
+ },
40
+ {
41
+ "epoch": 4.75,
42
+ "learning_rate": 2.9900000000000002e-05,
43
+ "loss": 1.6708,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 5.55,
48
+ "learning_rate": 3.49e-05,
49
+ "loss": 1.5501,
50
+ "step": 350
51
+ },
52
+ {
53
+ "epoch": 6.35,
54
+ "learning_rate": 3.99e-05,
55
+ "loss": 1.4258,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 7.14,
60
+ "learning_rate": 4.49e-05,
61
+ "loss": 1.332,
62
+ "step": 450
63
+ },
64
+ {
65
+ "epoch": 7.93,
66
+ "learning_rate": 4.99e-05,
67
+ "loss": 1.2815,
68
+ "step": 500
69
+ },
70
+ {
71
+ "epoch": 7.93,
72
+ "eval_cer": 0.10093122852447588,
73
+ "eval_loss": 0.35359087586402893,
74
+ "eval_runtime": 257.0651,
75
+ "eval_samples_per_second": 16.852,
76
+ "eval_steps_per_second": 2.108,
77
+ "eval_wer": 0.47525724236188066,
78
+ "step": 500
79
+ },
80
+ {
81
+ "epoch": 8.72,
82
+ "learning_rate": 4.957758620689655e-05,
83
+ "loss": 1.2632,
84
+ "step": 550
85
+ },
86
+ {
87
+ "epoch": 9.52,
88
+ "learning_rate": 4.9146551724137934e-05,
89
+ "loss": 1.2239,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 10.31,
94
+ "learning_rate": 4.871551724137931e-05,
95
+ "loss": 1.2044,
96
+ "step": 650
97
+ },
98
+ {
99
+ "epoch": 11.11,
100
+ "learning_rate": 4.828448275862069e-05,
101
+ "loss": 1.1918,
102
+ "step": 700
103
+ },
104
+ {
105
+ "epoch": 11.89,
106
+ "learning_rate": 4.785344827586207e-05,
107
+ "loss": 1.1641,
108
+ "step": 750
109
+ },
110
+ {
111
+ "epoch": 12.69,
112
+ "learning_rate": 4.742241379310345e-05,
113
+ "loss": 1.1718,
114
+ "step": 800
115
+ },
116
+ {
117
+ "epoch": 13.49,
118
+ "learning_rate": 4.699137931034483e-05,
119
+ "loss": 1.1638,
120
+ "step": 850
121
+ },
122
+ {
123
+ "epoch": 14.28,
124
+ "learning_rate": 4.656034482758621e-05,
125
+ "loss": 1.1317,
126
+ "step": 900
127
+ },
128
+ {
129
+ "epoch": 15.08,
130
+ "learning_rate": 4.612931034482759e-05,
131
+ "loss": 1.1334,
132
+ "step": 950
133
+ },
134
+ {
135
+ "epoch": 15.86,
136
+ "learning_rate": 4.569827586206897e-05,
137
+ "loss": 1.0869,
138
+ "step": 1000
139
+ },
140
+ {
141
+ "epoch": 15.86,
142
+ "eval_cer": 0.06135152631841044,
143
+ "eval_loss": 0.23165984451770782,
144
+ "eval_runtime": 262.4285,
145
+ "eval_samples_per_second": 16.507,
146
+ "eval_steps_per_second": 2.065,
147
+ "eval_wer": 0.3110653791356657,
148
+ "step": 1000
149
+ },
150
+ {
151
+ "epoch": 16.66,
152
+ "learning_rate": 4.526724137931035e-05,
153
+ "loss": 1.104,
154
+ "step": 1050
155
+ },
156
+ {
157
+ "epoch": 17.45,
158
+ "learning_rate": 4.4836206896551726e-05,
159
+ "loss": 1.109,
160
+ "step": 1100
161
+ },
162
+ {
163
+ "epoch": 18.25,
164
+ "learning_rate": 4.440517241379311e-05,
165
+ "loss": 1.0902,
166
+ "step": 1150
167
+ },
168
+ {
169
+ "epoch": 19.05,
170
+ "learning_rate": 4.397413793103449e-05,
171
+ "loss": 1.0676,
172
+ "step": 1200
173
+ },
174
+ {
175
+ "epoch": 19.83,
176
+ "learning_rate": 4.3543103448275865e-05,
177
+ "loss": 1.0453,
178
+ "step": 1250
179
+ },
180
+ {
181
+ "epoch": 20.63,
182
+ "learning_rate": 4.311206896551725e-05,
183
+ "loss": 1.0489,
184
+ "step": 1300
185
+ },
186
+ {
187
+ "epoch": 21.42,
188
+ "learning_rate": 4.268103448275862e-05,
189
+ "loss": 1.0495,
190
+ "step": 1350
191
+ },
192
+ {
193
+ "epoch": 22.22,
194
+ "learning_rate": 4.2250000000000004e-05,
195
+ "loss": 1.0325,
196
+ "step": 1400
197
+ },
198
+ {
199
+ "epoch": 23.02,
200
+ "learning_rate": 4.181896551724138e-05,
201
+ "loss": 1.0298,
202
+ "step": 1450
203
+ },
204
+ {
205
+ "epoch": 23.8,
206
+ "learning_rate": 4.138793103448276e-05,
207
+ "loss": 0.9984,
208
+ "step": 1500
209
+ },
210
+ {
211
+ "epoch": 23.8,
212
+ "eval_cer": 0.052054180568696776,
213
+ "eval_loss": 0.20215292274951935,
214
+ "eval_runtime": 259.0562,
215
+ "eval_samples_per_second": 16.722,
216
+ "eval_steps_per_second": 2.092,
217
+ "eval_wer": 0.26762703815102107,
218
+ "step": 1500
219
+ },
220
+ {
221
+ "epoch": 24.6,
222
+ "learning_rate": 4.0956896551724136e-05,
223
+ "loss": 1.0118,
224
+ "step": 1550
225
+ },
226
+ {
227
+ "epoch": 25.39,
228
+ "learning_rate": 4.053448275862069e-05,
229
+ "loss": 1.0165,
230
+ "step": 1600
231
+ },
232
+ {
233
+ "epoch": 26.19,
234
+ "learning_rate": 4.0103448275862074e-05,
235
+ "loss": 1.0075,
236
+ "step": 1650
237
+ },
238
+ {
239
+ "epoch": 26.97,
240
+ "learning_rate": 3.967241379310345e-05,
241
+ "loss": 1.003,
242
+ "step": 1700
243
+ },
244
+ {
245
+ "epoch": 27.77,
246
+ "learning_rate": 3.924137931034483e-05,
247
+ "loss": 0.9905,
248
+ "step": 1750
249
+ },
250
+ {
251
+ "epoch": 28.56,
252
+ "learning_rate": 3.8810344827586206e-05,
253
+ "loss": 1.0019,
254
+ "step": 1800
255
+ },
256
+ {
257
+ "epoch": 29.36,
258
+ "learning_rate": 3.837931034482759e-05,
259
+ "loss": 1.0085,
260
+ "step": 1850
261
+ },
262
+ {
263
+ "epoch": 30.16,
264
+ "learning_rate": 3.794827586206896e-05,
265
+ "loss": 0.9868,
266
+ "step": 1900
267
+ },
268
+ {
269
+ "epoch": 30.94,
270
+ "learning_rate": 3.7517241379310345e-05,
271
+ "loss": 0.9816,
272
+ "step": 1950
273
+ },
274
+ {
275
+ "epoch": 31.74,
276
+ "learning_rate": 3.708620689655173e-05,
277
+ "loss": 0.975,
278
+ "step": 2000
279
+ },
280
+ {
281
+ "epoch": 31.74,
282
+ "eval_cer": 0.04868781435187491,
283
+ "eval_loss": 0.19483695924282074,
284
+ "eval_runtime": 259.0795,
285
+ "eval_samples_per_second": 16.721,
286
+ "eval_steps_per_second": 2.092,
287
+ "eval_wer": 0.24688934620864333,
288
+ "step": 2000
289
+ },
290
+ {
291
+ "epoch": 32.53,
292
+ "learning_rate": 3.66551724137931e-05,
293
+ "loss": 0.9552,
294
+ "step": 2050
295
+ },
296
+ {
297
+ "epoch": 33.33,
298
+ "learning_rate": 3.6224137931034484e-05,
299
+ "loss": 0.9649,
300
+ "step": 2100
301
+ },
302
+ {
303
+ "epoch": 34.13,
304
+ "learning_rate": 3.5793103448275866e-05,
305
+ "loss": 0.9632,
306
+ "step": 2150
307
+ },
308
+ {
309
+ "epoch": 34.91,
310
+ "learning_rate": 3.536206896551724e-05,
311
+ "loss": 0.9542,
312
+ "step": 2200
313
+ },
314
+ {
315
+ "epoch": 35.71,
316
+ "learning_rate": 3.493103448275862e-05,
317
+ "loss": 0.9686,
318
+ "step": 2250
319
+ },
320
+ {
321
+ "epoch": 36.5,
322
+ "learning_rate": 3.45e-05,
323
+ "loss": 0.9418,
324
+ "step": 2300
325
+ },
326
+ {
327
+ "epoch": 37.3,
328
+ "learning_rate": 3.406896551724138e-05,
329
+ "loss": 0.9295,
330
+ "step": 2350
331
+ },
332
+ {
333
+ "epoch": 38.09,
334
+ "learning_rate": 3.363793103448276e-05,
335
+ "loss": 0.9372,
336
+ "step": 2400
337
+ },
338
+ {
339
+ "epoch": 38.88,
340
+ "learning_rate": 3.320689655172414e-05,
341
+ "loss": 0.9205,
342
+ "step": 2450
343
+ },
344
+ {
345
+ "epoch": 39.67,
346
+ "learning_rate": 3.277586206896552e-05,
347
+ "loss": 0.9306,
348
+ "step": 2500
349
+ },
350
+ {
351
+ "epoch": 39.67,
352
+ "eval_cer": 0.046377172451571136,
353
+ "eval_loss": 0.19161736965179443,
354
+ "eval_runtime": 258.3157,
355
+ "eval_samples_per_second": 16.77,
356
+ "eval_steps_per_second": 2.098,
357
+ "eval_wer": 0.2377394332752889,
358
+ "step": 2500
359
+ },
360
+ {
361
+ "epoch": 40.47,
362
+ "learning_rate": 3.23448275862069e-05,
363
+ "loss": 0.9331,
364
+ "step": 2550
365
+ },
366
+ {
367
+ "epoch": 41.27,
368
+ "learning_rate": 3.1913793103448276e-05,
369
+ "loss": 0.8936,
370
+ "step": 2600
371
+ },
372
+ {
373
+ "epoch": 42.06,
374
+ "learning_rate": 3.148275862068966e-05,
375
+ "loss": 0.8987,
376
+ "step": 2650
377
+ },
378
+ {
379
+ "epoch": 42.85,
380
+ "learning_rate": 3.105172413793104e-05,
381
+ "loss": 0.8853,
382
+ "step": 2700
383
+ },
384
+ {
385
+ "epoch": 43.64,
386
+ "learning_rate": 3.0620689655172415e-05,
387
+ "loss": 0.9106,
388
+ "step": 2750
389
+ },
390
+ {
391
+ "epoch": 44.44,
392
+ "learning_rate": 3.0189655172413794e-05,
393
+ "loss": 0.8932,
394
+ "step": 2800
395
+ },
396
+ {
397
+ "epoch": 45.24,
398
+ "learning_rate": 2.9758620689655176e-05,
399
+ "loss": 0.9096,
400
+ "step": 2850
401
+ },
402
+ {
403
+ "epoch": 46.03,
404
+ "learning_rate": 2.932758620689655e-05,
405
+ "loss": 0.8919,
406
+ "step": 2900
407
+ },
408
+ {
409
+ "epoch": 46.82,
410
+ "learning_rate": 2.8896551724137933e-05,
411
+ "loss": 0.8744,
412
+ "step": 2950
413
+ },
414
+ {
415
+ "epoch": 47.61,
416
+ "learning_rate": 2.8465517241379315e-05,
417
+ "loss": 0.8868,
418
+ "step": 3000
419
+ },
420
+ {
421
+ "epoch": 47.61,
422
+ "eval_cer": 0.04391713560081669,
423
+ "eval_loss": 0.19031885266304016,
424
+ "eval_runtime": 265.4438,
425
+ "eval_samples_per_second": 16.32,
426
+ "eval_steps_per_second": 2.042,
427
+ "eval_wer": 0.2257400664872566,
428
+ "step": 3000
429
+ },
430
+ {
431
+ "epoch": 48.41,
432
+ "learning_rate": 2.803448275862069e-05,
433
+ "loss": 0.8793,
434
+ "step": 3050
435
+ },
436
+ {
437
+ "epoch": 49.2,
438
+ "learning_rate": 2.7603448275862072e-05,
439
+ "loss": 0.8739,
440
+ "step": 3100
441
+ },
442
+ {
443
+ "epoch": 49.99,
444
+ "learning_rate": 2.717241379310345e-05,
445
+ "loss": 0.8696,
446
+ "step": 3150
447
+ },
448
+ {
449
+ "epoch": 50.78,
450
+ "learning_rate": 2.674137931034483e-05,
451
+ "loss": 0.863,
452
+ "step": 3200
453
+ },
454
+ {
455
+ "epoch": 51.58,
456
+ "learning_rate": 2.6310344827586207e-05,
457
+ "loss": 0.8612,
458
+ "step": 3250
459
+ },
460
+ {
461
+ "epoch": 52.38,
462
+ "learning_rate": 2.587931034482759e-05,
463
+ "loss": 0.8639,
464
+ "step": 3300
465
+ },
466
+ {
467
+ "epoch": 53.17,
468
+ "learning_rate": 2.5448275862068964e-05,
469
+ "loss": 0.8523,
470
+ "step": 3350
471
+ },
472
+ {
473
+ "epoch": 53.96,
474
+ "learning_rate": 2.5017241379310346e-05,
475
+ "loss": 0.8577,
476
+ "step": 3400
477
+ },
478
+ {
479
+ "epoch": 54.75,
480
+ "learning_rate": 2.4586206896551725e-05,
481
+ "loss": 0.8465,
482
+ "step": 3450
483
+ },
484
+ {
485
+ "epoch": 55.55,
486
+ "learning_rate": 2.4155172413793103e-05,
487
+ "loss": 0.8424,
488
+ "step": 3500
489
+ },
490
+ {
491
+ "epoch": 55.55,
492
+ "eval_cer": 0.042293710472586024,
493
+ "eval_loss": 0.17861121892929077,
494
+ "eval_runtime": 259.0288,
495
+ "eval_samples_per_second": 16.724,
496
+ "eval_steps_per_second": 2.092,
497
+ "eval_wer": 0.22061104954883648,
498
+ "step": 3500
499
+ },
500
+ {
501
+ "epoch": 56.35,
502
+ "learning_rate": 2.3724137931034485e-05,
503
+ "loss": 0.8436,
504
+ "step": 3550
505
+ },
506
+ {
507
+ "epoch": 57.14,
508
+ "learning_rate": 2.3293103448275864e-05,
509
+ "loss": 0.8404,
510
+ "step": 3600
511
+ },
512
+ {
513
+ "epoch": 57.93,
514
+ "learning_rate": 2.2862068965517242e-05,
515
+ "loss": 0.8304,
516
+ "step": 3650
517
+ },
518
+ {
519
+ "epoch": 58.72,
520
+ "learning_rate": 2.2431034482758624e-05,
521
+ "loss": 0.8331,
522
+ "step": 3700
523
+ },
524
+ {
525
+ "epoch": 59.52,
526
+ "learning_rate": 2.2000000000000003e-05,
527
+ "loss": 0.824,
528
+ "step": 3750
529
+ },
530
+ {
531
+ "epoch": 60.31,
532
+ "learning_rate": 2.1568965517241378e-05,
533
+ "loss": 0.8328,
534
+ "step": 3800
535
+ },
536
+ {
537
+ "epoch": 61.11,
538
+ "learning_rate": 2.113793103448276e-05,
539
+ "loss": 0.8234,
540
+ "step": 3850
541
+ },
542
+ {
543
+ "epoch": 61.89,
544
+ "learning_rate": 2.070689655172414e-05,
545
+ "loss": 0.8098,
546
+ "step": 3900
547
+ },
548
+ {
549
+ "epoch": 62.69,
550
+ "learning_rate": 2.0275862068965517e-05,
551
+ "loss": 0.8287,
552
+ "step": 3950
553
+ },
554
+ {
555
+ "epoch": 63.49,
556
+ "learning_rate": 1.98448275862069e-05,
557
+ "loss": 0.8126,
558
+ "step": 4000
559
+ },
560
+ {
561
+ "epoch": 63.49,
562
+ "eval_cer": 0.04164135252228475,
563
+ "eval_loss": 0.18486249446868896,
564
+ "eval_runtime": 261.7127,
565
+ "eval_samples_per_second": 16.553,
566
+ "eval_steps_per_second": 2.071,
567
+ "eval_wer": 0.2159886021845813,
568
+ "step": 4000
569
+ },
570
+ {
571
+ "epoch": 64.28,
572
+ "learning_rate": 1.9413793103448277e-05,
573
+ "loss": 0.8089,
574
+ "step": 4050
575
+ },
576
+ {
577
+ "epoch": 65.08,
578
+ "learning_rate": 1.8982758620689656e-05,
579
+ "loss": 0.8126,
580
+ "step": 4100
581
+ },
582
+ {
583
+ "epoch": 65.86,
584
+ "learning_rate": 1.8551724137931034e-05,
585
+ "loss": 0.7975,
586
+ "step": 4150
587
+ },
588
+ {
589
+ "epoch": 66.66,
590
+ "learning_rate": 1.8120689655172416e-05,
591
+ "loss": 0.8049,
592
+ "step": 4200
593
+ },
594
+ {
595
+ "epoch": 67.45,
596
+ "learning_rate": 1.7698275862068966e-05,
597
+ "loss": 0.8088,
598
+ "step": 4250
599
+ },
600
+ {
601
+ "epoch": 68.25,
602
+ "learning_rate": 1.7267241379310344e-05,
603
+ "loss": 0.8038,
604
+ "step": 4300
605
+ },
606
+ {
607
+ "epoch": 69.05,
608
+ "learning_rate": 1.6836206896551726e-05,
609
+ "loss": 0.7886,
610
+ "step": 4350
611
+ },
612
+ {
613
+ "epoch": 69.83,
614
+ "learning_rate": 1.6405172413793105e-05,
615
+ "loss": 0.7735,
616
+ "step": 4400
617
+ },
618
+ {
619
+ "epoch": 70.63,
620
+ "learning_rate": 1.5974137931034483e-05,
621
+ "loss": 0.7837,
622
+ "step": 4450
623
+ },
624
+ {
625
+ "epoch": 71.42,
626
+ "learning_rate": 1.5543103448275865e-05,
627
+ "loss": 0.7901,
628
+ "step": 4500
629
+ },
630
+ {
631
+ "epoch": 71.42,
632
+ "eval_cer": 0.04126786514615806,
633
+ "eval_loss": 0.18691900372505188,
634
+ "eval_runtime": 261.5464,
635
+ "eval_samples_per_second": 16.563,
636
+ "eval_steps_per_second": 2.072,
637
+ "eval_wer": 0.21383568149438023,
638
+ "step": 4500
639
+ },
640
+ {
641
+ "epoch": 72.22,
642
+ "learning_rate": 1.5112068965517242e-05,
643
+ "loss": 0.7949,
644
+ "step": 4550
645
+ },
646
+ {
647
+ "epoch": 73.02,
648
+ "learning_rate": 1.468103448275862e-05,
649
+ "loss": 0.7893,
650
+ "step": 4600
651
+ },
652
+ {
653
+ "epoch": 73.8,
654
+ "learning_rate": 1.4249999999999999e-05,
655
+ "loss": 0.7603,
656
+ "step": 4650
657
+ },
658
+ {
659
+ "epoch": 74.6,
660
+ "learning_rate": 1.3818965517241381e-05,
661
+ "loss": 0.776,
662
+ "step": 4700
663
+ },
664
+ {
665
+ "epoch": 75.39,
666
+ "learning_rate": 1.338793103448276e-05,
667
+ "loss": 0.7755,
668
+ "step": 4750
669
+ },
670
+ {
671
+ "epoch": 76.19,
672
+ "learning_rate": 1.2956896551724138e-05,
673
+ "loss": 0.7751,
674
+ "step": 4800
675
+ },
676
+ {
677
+ "epoch": 76.97,
678
+ "learning_rate": 1.2525862068965518e-05,
679
+ "loss": 0.7608,
680
+ "step": 4850
681
+ },
682
+ {
683
+ "epoch": 77.77,
684
+ "learning_rate": 1.2094827586206897e-05,
685
+ "loss": 0.7663,
686
+ "step": 4900
687
+ },
688
+ {
689
+ "epoch": 78.56,
690
+ "learning_rate": 1.1663793103448277e-05,
691
+ "loss": 0.7656,
692
+ "step": 4950
693
+ },
694
+ {
695
+ "epoch": 79.36,
696
+ "learning_rate": 1.1232758620689656e-05,
697
+ "loss": 0.7671,
698
+ "step": 5000
699
+ },
700
+ {
701
+ "epoch": 79.36,
702
+ "eval_cer": 0.03937054927543449,
703
+ "eval_loss": 0.18550464510917664,
704
+ "eval_runtime": 260.3539,
705
+ "eval_samples_per_second": 16.639,
706
+ "eval_steps_per_second": 2.082,
707
+ "eval_wer": 0.20747190121893302,
708
+ "step": 5000
709
+ },
710
+ {
711
+ "epoch": 80.16,
712
+ "learning_rate": 1.0801724137931036e-05,
713
+ "loss": 0.7694,
714
+ "step": 5050
715
+ },
716
+ {
717
+ "epoch": 80.94,
718
+ "learning_rate": 1.0370689655172414e-05,
719
+ "loss": 0.7672,
720
+ "step": 5100
721
+ },
722
+ {
723
+ "epoch": 81.74,
724
+ "learning_rate": 9.939655172413793e-06,
725
+ "loss": 0.7444,
726
+ "step": 5150
727
+ },
728
+ {
729
+ "epoch": 82.53,
730
+ "learning_rate": 9.508620689655173e-06,
731
+ "loss": 0.7534,
732
+ "step": 5200
733
+ },
734
+ {
735
+ "epoch": 83.33,
736
+ "learning_rate": 9.077586206896552e-06,
737
+ "loss": 0.7453,
738
+ "step": 5250
739
+ },
740
+ {
741
+ "epoch": 84.13,
742
+ "learning_rate": 8.646551724137932e-06,
743
+ "loss": 0.7494,
744
+ "step": 5300
745
+ },
746
+ {
747
+ "epoch": 84.91,
748
+ "learning_rate": 8.224137931034483e-06,
749
+ "loss": 0.7425,
750
+ "step": 5350
751
+ },
752
+ {
753
+ "epoch": 85.71,
754
+ "learning_rate": 7.793103448275863e-06,
755
+ "loss": 0.7499,
756
+ "step": 5400
757
+ },
758
+ {
759
+ "epoch": 86.5,
760
+ "learning_rate": 7.370689655172413e-06,
761
+ "loss": 0.735,
762
+ "step": 5450
763
+ },
764
+ {
765
+ "epoch": 87.3,
766
+ "learning_rate": 6.939655172413794e-06,
767
+ "loss": 0.7467,
768
+ "step": 5500
769
+ },
770
+ {
771
+ "epoch": 87.3,
772
+ "eval_cer": 0.03894228375080922,
773
+ "eval_loss": 0.18841499090194702,
774
+ "eval_runtime": 261.43,
775
+ "eval_samples_per_second": 16.57,
776
+ "eval_steps_per_second": 2.073,
777
+ "eval_wer": 0.20490739274972297,
778
+ "step": 5500
779
+ },
780
+ {
781
+ "epoch": 88.09,
782
+ "learning_rate": 6.508620689655173e-06,
783
+ "loss": 0.7348,
784
+ "step": 5550
785
+ },
786
+ {
787
+ "epoch": 88.88,
788
+ "learning_rate": 6.0775862068965515e-06,
789
+ "loss": 0.7244,
790
+ "step": 5600
791
+ },
792
+ {
793
+ "epoch": 89.67,
794
+ "learning_rate": 5.646551724137932e-06,
795
+ "loss": 0.7394,
796
+ "step": 5650
797
+ },
798
+ {
799
+ "epoch": 90.47,
800
+ "learning_rate": 5.21551724137931e-06,
801
+ "loss": 0.7423,
802
+ "step": 5700
803
+ },
804
+ {
805
+ "epoch": 91.27,
806
+ "learning_rate": 4.78448275862069e-06,
807
+ "loss": 0.7251,
808
+ "step": 5750
809
+ },
810
+ {
811
+ "epoch": 92.06,
812
+ "learning_rate": 4.353448275862069e-06,
813
+ "loss": 0.7304,
814
+ "step": 5800
815
+ },
816
+ {
817
+ "epoch": 92.85,
818
+ "learning_rate": 3.9224137931034484e-06,
819
+ "loss": 0.7153,
820
+ "step": 5850
821
+ },
822
+ {
823
+ "epoch": 93.64,
824
+ "learning_rate": 3.491379310344828e-06,
825
+ "loss": 0.7287,
826
+ "step": 5900
827
+ },
828
+ {
829
+ "epoch": 94.44,
830
+ "learning_rate": 3.0603448275862068e-06,
831
+ "loss": 0.7349,
832
+ "step": 5950
833
+ },
834
+ {
835
+ "epoch": 95.24,
836
+ "learning_rate": 2.6293103448275866e-06,
837
+ "loss": 0.731,
838
+ "step": 6000
839
+ },
840
+ {
841
+ "epoch": 95.24,
842
+ "eval_cer": 0.03871819132513321,
843
+ "eval_loss": 0.1877404898405075,
844
+ "eval_runtime": 259.3367,
845
+ "eval_samples_per_second": 16.704,
846
+ "eval_steps_per_second": 2.09,
847
+ "eval_wer": 0.2059838530948235,
848
+ "step": 6000
849
+ },
850
+ {
851
+ "epoch": 96.03,
852
+ "learning_rate": 2.1982758620689655e-06,
853
+ "loss": 0.7151,
854
+ "step": 6050
855
+ },
856
+ {
857
+ "epoch": 96.82,
858
+ "learning_rate": 1.7672413793103449e-06,
859
+ "loss": 0.713,
860
+ "step": 6100
861
+ },
862
+ {
863
+ "epoch": 97.61,
864
+ "learning_rate": 1.3362068965517243e-06,
865
+ "loss": 0.7257,
866
+ "step": 6150
867
+ },
868
+ {
869
+ "epoch": 98.41,
870
+ "learning_rate": 9.051724137931035e-07,
871
+ "loss": 0.7287,
872
+ "step": 6200
873
+ },
874
+ {
875
+ "epoch": 99.2,
876
+ "learning_rate": 4.7413793103448276e-07,
877
+ "loss": 0.7273,
878
+ "step": 6250
879
+ },
880
+ {
881
+ "epoch": 99.99,
882
+ "learning_rate": 4.310344827586207e-08,
883
+ "loss": 0.7082,
884
+ "step": 6300
885
+ },
886
+ {
887
+ "epoch": 99.99,
888
+ "step": 6300,
889
+ "total_flos": 4.0887685530877926e+20,
890
+ "train_loss": 1.049089940994505,
891
+ "train_runtime": 95054.1856,
892
+ "train_samples_per_second": 10.723,
893
+ "train_steps_per_second": 0.066
894
+ }
895
+ ],
896
+ "max_steps": 6300,
897
+ "num_train_epochs": 100,
898
+ "total_flos": 4.0887685530877926e+20,
899
+ "trial_name": null,
900
+ "trial_params": null
901
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d28ad45d3721baa2d9d1ae37754a308d7348290cd7784d5d9c00aa9c1ffe0061
3
+ size 3055
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"a": 1, "c": 2, "e": 3, "i": 4, "j": 5, "k": 6, "l": 7, "m": 8, "n": 9, "o": 10, "p": 11, "u": 12, "x": 13, "y": 14, "́": 15, "а": 16, "б": 17, "в": 18, "г": 19, "д": 20, "е": 21, "ж": 22, "з": 23, "и": 24, "й": 25, "к": 26, "л": 27, "м": 28, "н": 29, "о": 30, "п": 31, "р": 32, "с": 33, "т": 34, "у": 35, "ф": 36, "х": 37, "ц": 38, "ч": 39, "ш": 40, "щ": 41, "ь": 42, "ю": 43, "я": 44, "є": 45, "і": 46, "ї": 47, "ґ": 48, "|": 0, "[UNK]": 49, "[PAD]": 50}