nathanhunt commited on
Commit
ee36210
1 Parent(s): 949de36

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +3 -2
  2. special_tokens_map.json +7 -0
  3. vocab.json +3 -5
added_tokens.json CHANGED
@@ -1,4 +1,5 @@
1
  {
2
- "</s>": 29,
3
- "<s>": 28
 
4
  }
 
1
  {
2
+ "</s>": 26,
3
+ "<s>": 25,
4
+ "[PAD]": 27
5
  }
special_tokens_map.json CHANGED
@@ -13,6 +13,13 @@
13
  "normalized": true,
14
  "rstrip": false,
15
  "single_word": false
 
 
 
 
 
 
 
16
  }
17
  ],
18
  "bos_token": "<s>",
 
13
  "normalized": true,
14
  "rstrip": false,
15
  "single_word": false
16
+ },
17
+ {
18
+ "content": "[PAD]",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
  }
24
  ],
25
  "bos_token": "<s>",
vocab.json CHANGED
@@ -1127,8 +1127,7 @@
1127
  "ꞌ": 7
1128
  },
1129
  "aka": {
1130
- " ": 0,
1131
- "[PAD]": 27,
1132
  "[UNK]": 26,
1133
  "a": 1,
1134
  "b": 2,
@@ -1150,11 +1149,10 @@
1150
  "u": 18,
1151
  "w": 19,
1152
  "y": 20,
 
1153
  "á": 21,
1154
  "ɔ": 22,
1155
- "ɛ": 23,
1156
- "’": 24,
1157
- "•": 25
1158
  },
1159
  "akb": {
1160
  "'": 25,
 
1127
  "ꞌ": 7
1128
  },
1129
  "aka": {
1130
+ "[PAD]": 26,
 
1131
  "[UNK]": 26,
1132
  "a": 1,
1133
  "b": 2,
 
1149
  "u": 18,
1150
  "w": 19,
1151
  "y": 20,
1152
+ "|": 0,
1153
  "á": 21,
1154
  "ɔ": 22,
1155
+ "ɛ": 23
 
 
1156
  },
1157
  "akb": {
1158
  "'": 25,