paruwka commited on
Commit
38662ed
1 Parent(s): 1e4d23c

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +88 -88
vocab.json CHANGED
@@ -1,92 +1,92 @@
1
  {
2
- "&": 34,
3
- "'": 58,
4
- "(": 56,
5
- ")": 62,
6
- "=": 22,
7
  "[PAD]": 89,
8
  "[UNK]": 88,
9
- "a": 21,
10
- "b": 50,
11
- "c": 59,
12
- "d": 15,
13
- "e": 36,
14
- "f": 71,
15
- "g": 5,
16
- "h": 75,
17
- "i": 23,
18
- "j": 29,
19
- "k": 82,
20
- "l": 83,
21
- "m": 16,
22
- "n": 37,
23
- "o": 9,
24
- "p": 44,
25
- "q": 80,
26
- "r": 3,
27
- "s": 14,
28
- "t": 66,
29
- "u": 27,
30
- "v": 52,
31
- "w": 4,
32
- "x": 41,
33
- "y": 12,
34
- "z": 72,
35
- "|": 53,
36
- "«": 6,
37
- "´": 31,
38
- "»": 1,
39
- "ß": 74,
40
- "à": 13,
41
- "á": 26,
42
- "â": 79,
43
- "ã": 64,
44
- "ä": 54,
45
- "å": 48,
46
- "æ": 8,
47
- "ç": 10,
48
- "è": 63,
49
- "é": 20,
50
- "ê": 60,
51
- "ë": 65,
52
- "ì": 32,
53
- "î": 78,
54
- "ï": 30,
55
- "ð": 76,
56
- "ñ": 7,
57
- "ó": 61,
58
- "ô": 25,
59
- "ö": 0,
60
- "ø": 40,
61
- "ù": 33,
62
- "ú": 81,
63
- "û": 69,
64
- "ü": 84,
65
- "þ": 43,
66
- "ÿ": 24,
67
- "ă": 18,
68
- "ć": 28,
69
- "č": 47,
70
- "ē": 57,
71
- "ğ": 39,
72
- "ō": 49,
73
- "œ": 46,
74
- "ş": 86,
75
- "š": 73,
76
- "ū": 19,
77
- "ž": 51,
78
- "ș": 17,
79
- "ʻ": 70,
80
- "̀": 68,
81
- "́": 87,
82
- "̂": 11,
83
- "̧": 77,
84
- "–": 67,
85
- "—": 42,
86
- "’": 38,
87
- "“": 35,
88
- "”": 55,
89
- "…": 2,
90
- "の": 85,
91
- "ひ": 45
92
  }
 
1
  {
2
+ "&": 80,
3
+ "'": 37,
4
+ "(": 54,
5
+ ")": 8,
6
+ "=": 67,
7
  "[PAD]": 89,
8
  "[UNK]": 88,
9
+ "a": 74,
10
+ "b": 33,
11
+ "c": 82,
12
+ "d": 47,
13
+ "e": 49,
14
+ "f": 68,
15
+ "g": 58,
16
+ "h": 13,
17
+ "i": 63,
18
+ "j": 77,
19
+ "k": 61,
20
+ "l": 81,
21
+ "m": 24,
22
+ "n": 17,
23
+ "o": 12,
24
+ "p": 16,
25
+ "q": 69,
26
+ "r": 39,
27
+ "s": 20,
28
+ "t": 1,
29
+ "u": 53,
30
+ "v": 11,
31
+ "w": 29,
32
+ "x": 86,
33
+ "y": 27,
34
+ "z": 64,
35
+ "|": 48,
36
+ "«": 40,
37
+ "´": 10,
38
+ "»": 7,
39
+ "ß": 76,
40
+ "à": 25,
41
+ "á": 14,
42
+ "â": 44,
43
+ "ã": 79,
44
+ "ä": 15,
45
+ "å": 4,
46
+ "æ": 18,
47
+ "ç": 42,
48
+ "è": 71,
49
+ "é": 85,
50
+ "ê": 5,
51
+ "ë": 83,
52
+ "ì": 45,
53
+ "î": 35,
54
+ "ï": 46,
55
+ "ð": 22,
56
+ "ñ": 84,
57
+ "ó": 72,
58
+ "ô": 6,
59
+ "ö": 66,
60
+ "ø": 59,
61
+ "ù": 9,
62
+ "ú": 38,
63
+ "û": 78,
64
+ "ü": 31,
65
+ "þ": 23,
66
+ "ÿ": 26,
67
+ "ă": 3,
68
+ "ć": 87,
69
+ "č": 28,
70
+ "ē": 65,
71
+ "ğ": 52,
72
+ "ō": 73,
73
+ "œ": 56,
74
+ "ş": 62,
75
+ "š": 19,
76
+ "ū": 51,
77
+ "ž": 50,
78
+ "ș": 57,
79
+ "ʻ": 41,
80
+ "̀": 30,
81
+ "́": 36,
82
+ "̂": 70,
83
+ "̧": 75,
84
+ "–": 2,
85
+ "—": 43,
86
+ "’": 55,
87
+ "“": 34,
88
+ "”": 60,
89
+ "…": 21,
90
+ "の": 0,
91
+ "ひ": 32
92
  }