spsither commited on
Commit
a77b0c3
1 Parent(s): 9864140

Upload processor

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 106,
3
+ "<s>": 105
4
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "103": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "104": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "105": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "106": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "[PAD]",
42
+ "processor_class": "Wav2Vec2Processor",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "[UNK]",
47
+ "word_delimiter_token": "|"
48
+ }
vocab.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 104,
3
+ "[UNK]": 103,
4
+ "|": 0,
5
+ "ༀ": 59,
6
+ "་": 31,
7
+ "།": 42,
8
+ "༡": 23,
9
+ "༢": 2,
10
+ "༣": 15,
11
+ "༤": 81,
12
+ "༥": 70,
13
+ "༦": 34,
14
+ "༧": 25,
15
+ "༨": 52,
16
+ "༩": 29,
17
+ "ཀ": 44,
18
+ "ཁ": 11,
19
+ "ག": 17,
20
+ "གྷ": 96,
21
+ "ང": 74,
22
+ "ཅ": 56,
23
+ "ཆ": 69,
24
+ "ཇ": 45,
25
+ "ཉ": 67,
26
+ "ཊ": 101,
27
+ "ཋ": 76,
28
+ "ཌ": 83,
29
+ "ཌྷ": 68,
30
+ "ཎ": 13,
31
+ "ཏ": 10,
32
+ "ཐ": 16,
33
+ "ད": 63,
34
+ "དྷ": 72,
35
+ "ན": 91,
36
+ "པ": 86,
37
+ "ཕ": 61,
38
+ "བ": 26,
39
+ "བྷ": 87,
40
+ "མ": 28,
41
+ "ཙ": 92,
42
+ "ཚ": 37,
43
+ "ཛ": 39,
44
+ "ཝ": 64,
45
+ "ཞ": 65,
46
+ "ཟ": 19,
47
+ "འ": 47,
48
+ "ཡ": 97,
49
+ "ར": 57,
50
+ "ལ": 33,
51
+ "ཤ": 22,
52
+ "ཥ": 18,
53
+ "ས": 54,
54
+ "ཧ": 100,
55
+ "ཨ": 93,
56
+ "ཀྵ": 20,
57
+ "ཪ": 50,
58
+ "ཱ": 71,
59
+ "ི": 9,
60
+ "ཱི": 41,
61
+ "ུ": 6,
62
+ "ཱུ": 98,
63
+ "ཷ": 35,
64
+ "ེ": 32,
65
+ "ོ": 58,
66
+ "ཾ": 84,
67
+ "ྀ": 4,
68
+ "ཱྀ": 12,
69
+ "ྃ": 24,
70
+ "ྐ": 46,
71
+ "ྑ": 21,
72
+ "ྒ": 53,
73
+ "ྔ": 1,
74
+ "ྕ": 14,
75
+ "ྖ": 66,
76
+ "ྗ": 40,
77
+ "ྙ": 79,
78
+ "ྚ": 75,
79
+ "ྛ": 102,
80
+ "ྜ": 49,
81
+ "ྞ": 43,
82
+ "ྟ": 82,
83
+ "ྠ": 38,
84
+ "ྡ": 90,
85
+ "ྡྷ": 51,
86
+ "ྣ": 7,
87
+ "ྤ": 48,
88
+ "ྥ": 80,
89
+ "ྦ": 36,
90
+ "ྦྷ": 95,
91
+ "ྨ": 77,
92
+ "ྩ": 5,
93
+ "ྪ": 62,
94
+ "ྫ": 94,
95
+ "ྭ": 99,
96
+ "ྰ": 27,
97
+ "ྱ": 89,
98
+ "ྲ": 73,
99
+ "ླ": 85,
100
+ "ྴ": 3,
101
+ "ྵ": 30,
102
+ "ྶ": 60,
103
+ "ྷ": 88,
104
+ "ྸ": 8,
105
+ "ྻ": 55,
106
+ "ྼ": 78
107
+ }