Yehor Smoliakov commited on
Commit
a7390db
1 Parent(s): a041cd2

Update the AM

Browse files
added_tokens.json CHANGED
@@ -1 +1,4 @@
1
- {"<s>": 37, "</s>": 38}
 
 
 
1
+ {
2
+ "</s>": 39,
3
+ "<s>": 38
4
+ }
alphabet.json CHANGED
@@ -1 +1 @@
1
- {"labels": [" ", "\u0430", "\u0431", "\u0432", "\u0433", "\u0434", "\u0435", "\u0436", "\u0437", "\u0438", "\u0439", "\u043a", "\u043b", "\u043c", "\u043d", "\u043e", "\u043f", "\u0440", "\u0441", "\u0442", "\u0443", "\u0444", "\u0445", "\u0446", "\u0447", "\u0448", "\u0449", "\u044c", "\u044e", "\u044f", "\u0454", "\u0456", "\u0457", "'", "-", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
1
+ {"labels": ["'", "-", "", "\u2047", " ", "\u0430", "\u0431", "\u0432", "\u0433", "\u0434", "\u0435", "\u0436", "\u0437", "\u0438", "\u0439", "\u043a", "\u043b", "\u043c", "\u043d", "\u043e", "\u043f", "\u0440", "\u0441", "\u0442", "\u0443", "\u0444", "\u0445", "\u0446", "\u0447", "\u0448", "\u0449", "\u044c", "\u044e", "\u044f", "\u0454", "\u0456", "\u0457", "\u0491", "<s>", "</s>"], "is_bpe": false}
config.json CHANGED
@@ -77,7 +77,7 @@
77
  "num_hidden_layers": 24,
78
  "num_negatives": 100,
79
  "output_hidden_size": 1024,
80
- "pad_token_id": 36,
81
  "proj_codevector_dim": 768,
82
  "tdnn_dilation": [
83
  1,
@@ -101,8 +101,8 @@
101
  1
102
  ],
103
  "torch_dtype": "float32",
104
- "transformers_version": "4.20.0.dev0",
105
  "use_weighted_layer_sum": false,
106
- "vocab_size": 39,
107
  "xvector_output_dim": 512
108
  }
77
  "num_hidden_layers": 24,
78
  "num_negatives": 100,
79
  "output_hidden_size": 1024,
80
+ "pad_token_id": 2,
81
  "proj_codevector_dim": 768,
82
  "tdnn_dilation": [
83
  1,
101
  1
102
  ],
103
  "torch_dtype": "float32",
104
+ "transformers_version": "4.20.1",
105
  "use_weighted_layer_sum": false,
106
+ "vocab_size": 40,
107
  "xvector_output_dim": 512
108
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b618404ca0a65d2e3cc80eaa58de06663ed98986395485c918ef5acc23c2bca1
3
- size 1262058609
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39730846db353ab2c7d5fa1bcf158b42c9310443b7eeca4afc4be41864e96ee
3
+ size 1262062705
special_tokens_map.json CHANGED
@@ -1 +1,22 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<s>",
19
+ "eos_token": "</s>",
20
+ "pad_token": "[PAD]",
21
+ "unk_token": "[UNK]"
22
+ }
tokenizer_config.json CHANGED
@@ -1 +1,13 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "name_or_path": "Yehor/wav2vec2-xls-r-300m-uk-with-small-lm", "processor_class": "Wav2Vec2ProcessorWithLM", "special_tokens_map_file": null, "trust_remote_code": false, "tokenizer_class": "Wav2Vec2CTCTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "name_or_path": "./",
6
+ "pad_token": "[PAD]",
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "replace_word_delimiter_char": " ",
9
+ "special_tokens_map_file": null,
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "[UNK]",
12
+ "word_delimiter_token": "|"
13
+ }
vocab.json CHANGED
@@ -1 +1,40 @@
1
- {"'": 33, "-": 34, "[PAD]": 36, "[UNK]": 35, "|": 0, "а": 1, "б": 2, "в": 3, "г": 4, "д": 5, "е": 6, "ж": 7, "з": 8, "и": 9, "й": 10, "к": 11, "л": 12, "м": 13, "н": 14, "о": 15, "п": 16, "р": 17, "с": 18, "т": 19, "у": 20, "ф": 21, "х": 22, "ц": 23, "ч": 24, "ш": 25, "щ": 26, "ь": 27, "ю": 28, "я": 29, "є": 30, "і": 31, "ї": 32}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "'": 0,
3
+ "-": 1,
4
+ "[PAD]": 2,
5
+ "[UNK]": 3,
6
+ "|": 4,
7
+ "а": 5,
8
+ "б": 6,
9
+ "в": 7,
10
+ "г": 8,
11
+ "д": 9,
12
+ "е": 10,
13
+ "ж": 11,
14
+ "з": 12,
15
+ "и": 13,
16
+ "й": 14,
17
+ "к": 15,
18
+ "л": 16,
19
+ "м": 17,
20
+ "н": 18,
21
+ "о": 19,
22
+ "п": 20,
23
+ "р": 21,
24
+ "с": 22,
25
+ "т": 23,
26
+ "у": 24,
27
+ "ф": 25,
28
+ "х": 26,
29
+ "ц": 27,
30
+ "ч": 28,
31
+ "ш": 29,
32
+ "щ": 30,
33
+ "ь": 31,
34
+ "ю": 32,
35
+ "я": 33,
36
+ "є": 34,
37
+ "і": 35,
38
+ "ї": 36,
39
+ "ґ": 37
40
+ }