nbaden commited on
Commit
3194be8
1 Parent(s): c807540

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +43 -43
vocab.json CHANGED
@@ -1,48 +1,48 @@
1
  {
2
  "[PAD]": 45,
3
  "[UNK]": 44,
4
- "c": 20,
5
- "i": 15,
6
- "o": 30,
7
- "y": 35,
8
- "|": 36,
9
- "«": 37,
10
- "»": 31,
11
- "а": 18,
12
- "б": 22,
13
- "в": 23,
14
- "г": 42,
15
- "д": 41,
16
- "е": 17,
17
- "ж": 2,
18
- "з": 32,
19
- "и": 0,
20
- "й": 34,
21
- "к": 3,
22
- "л": 38,
23
- "м": 14,
24
- "н": 28,
25
- "о": 33,
26
- "п": 12,
27
- "р": 11,
28
  "с": 1,
29
- "т": 24,
30
- "у": 7,
31
- "ф": 40,
32
- "х": 16,
33
- "ц": 9,
34
- "ч": 21,
35
- "ш": 6,
36
- "щ": 19,
37
- "ы": 39,
38
- "ь": 27,
39
- "э": 8,
40
- "ю": 5,
41
- "я": 4,
42
- "і": 26,
43
- "ҕ": 43,
44
- "ҥ": 25,
45
- "ү": 13,
46
- "һ": 10,
47
- "ө": 29
48
  }
 
1
  {
2
  "[PAD]": 45,
3
  "[UNK]": 44,
4
+ "c": 34,
5
+ "i": 42,
6
+ "o": 9,
7
+ "y": 16,
8
+ "|": 8,
9
+ "«": 0,
10
+ "»": 12,
11
+ "а": 40,
12
+ "б": 25,
13
+ "в": 39,
14
+ "г": 24,
15
+ "д": 21,
16
+ "е": 29,
17
+ "ж": 6,
18
+ "з": 33,
19
+ "и": 7,
20
+ "й": 13,
21
+ "к": 10,
22
+ "л": 31,
23
+ "м": 23,
24
+ "н": 22,
25
+ "о": 11,
26
+ "п": 20,
27
+ "р": 15,
28
  "с": 1,
29
+ "т": 32,
30
+ "у": 5,
31
+ "ф": 26,
32
+ "х": 35,
33
+ "ц": 41,
34
+ "ч": 38,
35
+ "ш": 4,
36
+ "щ": 37,
37
+ "ы": 43,
38
+ "ь": 28,
39
+ "э": 3,
40
+ "ю": 17,
41
+ "я": 30,
42
+ "і": 18,
43
+ "ҕ": 27,
44
+ "ҥ": 14,
45
+ "ү": 36,
46
+ "һ": 2,
47
+ "ө": 19
48
  }