aaniket commited on
Commit
43acdc6
1 Parent(s): 229e238

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +63 -31
vocab.json CHANGED
@@ -1,33 +1,65 @@
1
  {
2
- " ": 15,
3
- "#": 17,
4
- "-": 12,
5
- "[PAD]": 30,
6
- "[UNK]": 29,
7
- "a": 7,
8
- "b": 6,
9
- "c": 20,
10
- "d": 26,
11
- "e": 5,
12
- "f": 8,
13
- "g": 10,
14
- "h": 25,
15
- "i": 28,
16
- "j": 21,
17
- "k": 23,
18
- "l": 0,
19
- "m": 1,
20
- "n": 11,
21
- "o": 27,
22
- "p": 4,
23
- "q": 14,
24
- "r": 13,
25
- "s": 2,
26
- "t": 22,
27
- "u": 18,
28
- "v": 19,
29
- "w": 3,
30
- "x": 16,
31
- "y": 24,
32
- "z": 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
 
1
  {
2
+ "[PAD]": 62,
3
+ "[UNK]": 61,
4
+ "aa": 45,
5
+ "ae": 7,
6
+ "ah": 12,
7
+ "ao": 52,
8
+ "aw": 43,
9
+ "ax": 11,
10
+ "ax-h": 38,
11
+ "axr": 60,
12
+ "ay": 9,
13
+ "b": 47,
14
+ "bcl": 42,
15
+ "ch": 36,
16
+ "d": 2,
17
+ "dcl": 59,
18
+ "dh": 20,
19
+ "dx": 3,
20
+ "eh": 44,
21
+ "el": 49,
22
+ "em": 16,
23
+ "en": 37,
24
+ "eng": 27,
25
+ "epi": 25,
26
+ "er": 8,
27
+ "ey": 55,
28
+ "f": 29,
29
+ "g": 54,
30
+ "gcl": 57,
31
+ "h#": 19,
32
+ "hh": 51,
33
+ "hv": 22,
34
+ "ih": 32,
35
+ "ix": 53,
36
+ "iy": 26,
37
+ "jh": 28,
38
+ "k": 30,
39
+ "kcl": 34,
40
+ "l": 21,
41
+ "m": 13,
42
+ "n": 50,
43
+ "ng": 15,
44
+ "nx": 31,
45
+ "ow": 17,
46
+ "oy": 10,
47
+ "p": 0,
48
+ "pau": 5,
49
+ "pcl": 41,
50
+ "q": 48,
51
+ "r": 56,
52
+ "s": 18,
53
+ "sh": 14,
54
+ "t": 58,
55
+ "tcl": 24,
56
+ "th": 1,
57
+ "uh": 23,
58
+ "uw": 33,
59
+ "ux": 4,
60
+ "v": 35,
61
+ "w": 40,
62
+ "y": 46,
63
+ "z": 6,
64
+ "zh": 39
65
  }