ntsema commited on
Commit
b0e9342
1 Parent(s): cedd29a

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +2 -2
  2. vocab.json +17 -7
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 45,
3
- "<s>": 44
4
  }
 
1
  {
2
+ "</s>": 55,
3
+ "<s>": 54
4
  }
vocab.json CHANGED
@@ -1,26 +1,39 @@
1
  {
2
- "[PAD]": 43,
3
- "[UNK]": 42,
4
  "a": 1,
 
 
5
  "b": 2,
6
  "c": 3,
7
  "d": 4,
 
8
  "e": 5,
 
9
  "f": 6,
10
  "g": 7,
11
  "h": 8,
12
  "i": 9,
 
13
  "j": 10,
14
  "k": 11,
15
  "l": 12,
16
  "m": 13,
 
17
  "n": 14,
 
 
18
  "o": 15,
 
19
  "p": 16,
20
  "r": 17,
21
  "s": 18,
 
22
  "t": 19,
 
 
23
  "u": 20,
 
24
  "v": 21,
25
  "w": 22,
26
  "x": 23,
@@ -32,15 +45,12 @@
32
  "ɒ": 28,
33
  "ɔ": 29,
34
  "ə": 30,
 
35
  "ɣ": 31,
36
  "ɨ": 32,
37
  "ɲ": 33,
38
  "ʃ": 34,
39
  "ʒ": 35,
40
  "ʲ": 36,
41
- "ː": 37,
42
- "́": 38,
43
- "̄": 39,
44
- "͡": 40,
45
- "χ": 41
46
  }
 
1
  {
2
+ "[PAD]": 53,
3
+ "[UNK]": 52,
4
  "a": 1,
5
+ "aː": 41,
6
+ "ā": 49,
7
  "b": 2,
8
  "c": 3,
9
  "d": 4,
10
+ "dʲ": 50,
11
  "e": 5,
12
+ "eː": 43,
13
  "f": 6,
14
  "g": 7,
15
  "h": 8,
16
  "i": 9,
17
+ "iː": 39,
18
  "j": 10,
19
  "k": 11,
20
  "l": 12,
21
  "m": 13,
22
+ "mʲ": 47,
23
  "n": 14,
24
+ "nʲ": 37,
25
+ "ń": 44,
26
  "o": 15,
27
+ "oː": 42,
28
  "p": 16,
29
  "r": 17,
30
  "s": 18,
31
+ "sʲ": 38,
32
  "t": 19,
33
+ "tʲ": 45,
34
+ "t͡ʃ": 46,
35
  "u": 20,
36
+ "uː": 40,
37
  "v": 21,
38
  "w": 22,
39
  "x": 23,
 
45
  "ɒ": 28,
46
  "ɔ": 29,
47
  "ə": 30,
48
+ "ə̄": 48,
49
  "ɣ": 31,
50
  "ɨ": 32,
51
  "ɲ": 33,
52
  "ʃ": 34,
53
  "ʒ": 35,
54
  "ʲ": 36,
55
+ "χ": 51
 
 
 
 
56
  }