aaniket commited on
Commit
c5bd509
1 Parent(s): a235995

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +31 -63
vocab.json CHANGED
@@ -1,65 +1,33 @@
1
  {
2
- "[PAD]": 62,
3
- "[UNK]": 61,
4
- "aa": 16,
5
- "ae": 36,
6
- "ah": 32,
7
- "ao": 50,
8
- "aw": 18,
9
- "ax": 34,
10
- "ax-h": 29,
11
- "axr": 54,
12
- "ay": 58,
13
- "b": 25,
14
- "bcl": 49,
15
- "ch": 33,
16
- "d": 0,
17
- "dcl": 53,
18
- "dh": 56,
19
- "dx": 47,
20
- "eh": 23,
21
- "el": 55,
22
- "em": 44,
23
- "en": 7,
24
- "eng": 59,
25
- "epi": 6,
26
- "er": 26,
27
- "ey": 22,
28
- "f": 51,
29
- "g": 5,
30
- "gcl": 40,
31
- "h#": 12,
32
- "hh": 43,
33
- "hv": 9,
34
- "ih": 31,
35
- "ix": 1,
36
- "iy": 48,
37
- "jh": 30,
38
- "k": 39,
39
- "kcl": 52,
40
- "l": 24,
41
- "m": 14,
42
- "n": 28,
43
- "ng": 57,
44
- "nx": 35,
45
- "ow": 45,
46
- "oy": 60,
47
- "p": 19,
48
- "pau": 17,
49
- "pcl": 11,
50
- "q": 4,
51
- "r": 21,
52
- "s": 38,
53
- "sh": 8,
54
- "t": 3,
55
- "tcl": 42,
56
- "th": 46,
57
- "uh": 41,
58
- "uw": 13,
59
- "ux": 15,
60
- "v": 37,
61
- "w": 20,
62
- "y": 27,
63
- "z": 2,
64
- "zh": 10
65
  }
 
1
  {
2
+ " ": 1,
3
+ "#": 28,
4
+ "-": 8,
5
+ "[PAD]": 30,
6
+ "[UNK]": 29,
7
+ "a": 9,
8
+ "b": 22,
9
+ "c": 13,
10
+ "d": 15,
11
+ "e": 20,
12
+ "f": 18,
13
+ "g": 17,
14
+ "h": 26,
15
+ "i": 16,
16
+ "j": 23,
17
+ "k": 25,
18
+ "l": 27,
19
+ "m": 5,
20
+ "n": 10,
21
+ "o": 19,
22
+ "p": 12,
23
+ "q": 24,
24
+ "r": 4,
25
+ "s": 21,
26
+ "t": 0,
27
+ "u": 11,
28
+ "v": 3,
29
+ "w": 2,
30
+ "x": 7,
31
+ "y": 6,
32
+ "z": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }