ntsema commited on
Commit
b82a113
1 Parent(s): 06c7d44

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +2 -2
  2. vocab.json +38 -63
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 67,
3
- "<s>": 66
4
  }
 
1
  {
2
+ "</s>": 42,
3
+ "<s>": 41
4
  }
vocab.json CHANGED
@@ -1,68 +1,43 @@
1
  {
2
- "[PAD]": 65,
3
- "[UNK]": 64,
4
  "a": 1,
5
  "b": 2,
6
- "d": 3,
7
- "e": 4,
8
- "f": 5,
9
- "g": 6,
10
- "h": 7,
11
- "i": 8,
12
- "j": 9,
13
- "k": 10,
14
- "l": 11,
15
- "m": 12,
16
- "n": 13,
17
- "o": 14,
18
- "p": 15,
19
- "q": 16,
20
- "r": 17,
21
- "s": 18,
22
- "t": 19,
23
- "u": 20,
24
- "v": 21,
25
- "w": 22,
26
- "x": 23,
27
- "z": 24,
 
 
28
  "|": 0,
29
- "æ": 25,
30
- "ð": 26,
31
- "ø": 27,
32
- "ü": 28,
33
- "ŋ": 29,
34
- "ɐ": 30,
35
- "ɑ": 31,
36
- "ɒ": 32,
37
- "ɔ": 33,
38
- "ɕ": 34,
39
- "ə": 35,
40
- "ɛ": 36,
41
- "ɜ": 37,
42
- "ɡ": 38,
43
- "ɣ": 39,
44
- "ɨ": 40,
45
- "ɪ": 41,
46
- "ɯ": 42,
47
- "ɵ": 43,
48
- "ɸ": 44,
49
- "ʃ": 45,
50
- "ʉ": 46,
51
- "ʊ": 47,
52
- "ʌ": 48,
53
- "ʐ": 49,
54
- "ʑ": 50,
55
- "ʔ": 51,
56
- "ː": 52,
57
- "ˑ": 53,
58
- "̆": 54,
59
- "̈": 55,
60
- "̞": 56,
61
- "̟": 57,
62
- "̠": 58,
63
- "̥": 59,
64
- "̬": 60,
65
- "͡": 61,
66
- "β": 62,
67
- "θ": 63
68
  }
 
1
  {
2
+ "[PAD]": 40,
3
+ "[UNK]": 39,
4
  "a": 1,
5
  "b": 2,
6
+ "c": 3,
7
+ "d": 4,
8
+ "e": 5,
9
+ "f": 6,
10
+ "g": 7,
11
+ "h": 8,
12
+ "i": 9,
13
+ "j": 10,
14
+ "k": 11,
15
+ "l": 12,
16
+ "m": 13,
17
+ "n": 14,
18
+ "o": 15,
19
+ "p": 16,
20
+ "q": 17,
21
+ "r": 18,
22
+ "s": 19,
23
+ "t": 20,
24
+ "u": 21,
25
+ "v": 22,
26
+ "w": 23,
27
+ "x": 24,
28
+ "y": 25,
29
+ "z": 26,
30
  "|": 0,
31
+ "æ": 27,
32
+ "ð": 28,
33
+ "ü": 29,
34
+ "ŋ": 30,
35
+ "œ": 31,
36
+ "ə": 32,
37
+ "ɣ": 33,
38
+ "ʃ": 34,
39
+ "ʒ": 35,
40
+ "ʔ": 36,
41
+ "͡": 37,
42
+ "θ": 38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }