Lasion commited on
Commit
7651e3d
1 Parent(s): 2430f23

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +90 -90
vocab.json CHANGED
@@ -1,94 +1,94 @@
1
  {
2
  "[PAD]": 91,
3
  "[UNK]": 90,
4
- "a": 41,
5
- "b": 44,
6
- "c": 4,
7
- "d": 85,
8
- "e": 12,
9
- "g": 7,
10
- "h": 34,
11
- "i": 58,
12
- "k": 52,
13
- "l": 89,
14
- "m": 80,
15
- "n": 64,
16
- "o": 26,
17
- "p": 25,
18
- "q": 10,
19
- "r": 5,
20
- "s": 81,
21
- "t": 14,
22
- "u": 88,
23
- "v": 55,
24
- "x": 23,
25
- "y": 73,
26
- "|": 24,
27
- "à": 2,
28
- "á": 48,
29
- "â": 36,
30
- "ã": 67,
31
- "è": 75,
32
- "é": 32,
33
- "ê": 65,
34
- "ì": 29,
35
- "í": 51,
36
- "ò": 66,
37
- "ó": 1,
38
- "ô": 79,
39
- "õ": 71,
40
- "ù": 68,
41
- "ú": 59,
42
- "ý": 40,
43
- "ă": 0,
44
- "đ": 3,
45
- "ĩ": 87,
46
- "ũ": 78,
47
- "ơ": 77,
48
- "ư": 42,
49
- "ạ": 54,
50
- "ả": 33,
51
- "ấ": 76,
52
- "ầ": 37,
53
- "ẩ": 31,
54
- "ẫ": 13,
55
- "ậ": 60,
56
- "ắ": 49,
57
- "ằ": 15,
58
- "ẳ": 46,
59
- "ẵ": 18,
60
- "ặ": 35,
61
- "ẹ": 27,
62
- "ẻ": 70,
63
- "ẽ": 53,
64
- "ế": 45,
65
- "ề": 39,
66
- "ể": 43,
67
- "ễ": 84,
68
- "ệ": 17,
69
- "ỉ": 47,
70
- "ị": 28,
71
- "ọ": 57,
72
- "ỏ": 21,
73
- "ố": 22,
74
- "ồ": 19,
75
- "ổ": 9,
76
- "ỗ": 20,
77
- "ộ": 69,
78
- "ớ": 6,
79
- "ờ": 56,
80
- "ở": 50,
81
- "ỡ": 82,
82
- "ợ": 30,
83
- "ụ": 62,
84
- "ủ": 8,
85
- "ứ": 83,
86
- "ừ": 61,
87
- "ử": 72,
88
- "ữ": 38,
89
- "ự": 74,
90
- "ỳ": 16,
91
- "ỵ": 63,
92
- "ỷ": 86,
93
- "ỹ": 11
94
  }
 
1
  {
2
  "[PAD]": 91,
3
  "[UNK]": 90,
4
+ "a": 80,
5
+ "b": 26,
6
+ "c": 78,
7
+ "d": 47,
8
+ "e": 41,
9
+ "g": 37,
10
+ "h": 64,
11
+ "i": 85,
12
+ "k": 5,
13
+ "l": 38,
14
+ "m": 43,
15
+ "n": 81,
16
+ "o": 14,
17
+ "p": 21,
18
+ "q": 16,
19
+ "r": 55,
20
+ "s": 10,
21
+ "t": 40,
22
+ "u": 9,
23
+ "v": 25,
24
+ "x": 69,
25
+ "y": 62,
26
+ "|": 58,
27
+ "à": 23,
28
+ "á": 7,
29
+ "â": 42,
30
+ "ã": 59,
31
+ "è": 6,
32
+ "é": 63,
33
+ "ê": 36,
34
+ "ì": 28,
35
+ "í": 89,
36
+ "ò": 3,
37
+ "ó": 34,
38
+ "ô": 75,
39
+ "õ": 35,
40
+ "ù": 27,
41
+ "ú": 39,
42
+ "ý": 19,
43
+ "ă": 86,
44
+ "đ": 45,
45
+ "ĩ": 18,
46
+ "ũ": 54,
47
+ "ơ": 68,
48
+ "ư": 77,
49
+ "ạ": 84,
50
+ "ả": 56,
51
+ "ấ": 71,
52
+ "ầ": 15,
53
+ "ẩ": 0,
54
+ "ẫ": 72,
55
+ "ậ": 87,
56
+ "ắ": 82,
57
+ "ằ": 61,
58
+ "ẳ": 1,
59
+ "ẵ": 33,
60
+ "ặ": 88,
61
+ "ẹ": 70,
62
+ "ẻ": 20,
63
+ "ẽ": 76,
64
+ "ế": 2,
65
+ "ề": 44,
66
+ "ể": 46,
67
+ "ễ": 31,
68
+ "ệ": 29,
69
+ "ỉ": 22,
70
+ "ị": 12,
71
+ "ọ": 50,
72
+ "ỏ": 4,
73
+ "ố": 57,
74
+ "ồ": 13,
75
+ "ổ": 24,
76
+ "ỗ": 48,
77
+ "ộ": 32,
78
+ "ớ": 51,
79
+ "ờ": 83,
80
+ "ở": 17,
81
+ "ỡ": 79,
82
+ "ợ": 52,
83
+ "ụ": 67,
84
+ "ủ": 53,
85
+ "ứ": 30,
86
+ "ừ": 65,
87
+ "ử": 60,
88
+ "ữ": 66,
89
+ "ự": 11,
90
+ "ỳ": 8,
91
+ "ỵ": 73,
92
+ "ỷ": 49,
93
+ "ỹ": 74
94
  }