ad019el commited on
Commit
1ea1168
1 Parent(s): a245345

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +46 -37
vocab.json CHANGED
@@ -1,41 +1,50 @@
1
  {
2
- "[PAD]": 38,
3
- "[UNK]": 37,
4
- "a": 33,
5
- "b": 19,
6
- "c": 20,
7
- "e": 0,
8
- "f": 5,
9
- "g": 23,
10
- "j": 18,
 
 
 
11
  "k": 28,
12
- "l": 34,
13
- "m": 15,
14
- "o": 1,
15
- "q": 26,
16
- "r": 10,
17
- "t": 32,
18
- "u": 29,
 
 
 
 
19
  "w": 31,
20
- "x": 27,
21
- "y": 25,
22
- "z": 4,
23
- "|": 35,
24
- "ĉ": 13,
25
- "č": 24,
26
- "ğ": 36,
27
- "ţ": 7,
28
- "ǧ": 22,
29
- "ɛ": 17,
30
- "ɣ": 30,
31
- "̣": 11,
32
- "γ": 14,
33
- "ε": 6,
34
- "ԑ": 9,
35
- "": 12,
36
- "": 2,
37
- "": 21,
38
- "": 8,
39
- "": 3,
40
- "": 16
 
 
41
  }
 
1
  {
2
+ "[PAD]": 47,
3
+ "[UNK]": 46,
4
+ "a": 9,
5
+ "b": 1,
6
+ "c": 25,
7
+ "d": 8,
8
+ "e": 4,
9
+ "f": 33,
10
+ "g": 2,
11
+ "h": 41,
12
+ "i": 23,
13
+ "j": 35,
14
  "k": 28,
15
+ "l": 19,
16
+ "m": 30,
17
+ "n": 21,
18
+ "o": 11,
19
+ "p": 13,
20
+ "q": 37,
21
+ "r": 5,
22
+ "s": 16,
23
+ "t": 40,
24
+ "u": 45,
25
+ "v": 29,
26
  "w": 31,
27
+ "x": 38,
28
+ "y": 7,
29
+ "z": 34,
30
+ "|": 22,
31
+ " ": 44,
32
+ "ĉ": 20,
33
+ "č": 27,
34
+ "ğ": 15,
35
+ "ţ": 24,
36
+ "ǧ": 18,
37
+ "ɛ": 32,
38
+ "ɣ": 12,
39
+ "̣": 39,
40
+ "γ": 43,
41
+ "ε": 0,
42
+ "ԑ": 42,
43
+ "چ": 14,
44
+ "": 10,
45
+ "": 26,
46
+ "": 6,
47
+ "": 17,
48
+ "ṭ": 36,
49
+ "ẓ": 3
50
  }