tuanio commited on
Commit
6015fd8
1 Parent(s): faf4d9c

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -3
  2. special_tokens_map.json +0 -7
  3. vocab.json +3 -4
added_tokens.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "</s>": 146,
3
- "<s>": 145,
4
- "[PAD]": 147
5
  }
 
1
  {
2
+ "</s>": 145,
3
+ "<s>": 144
 
4
  }
special_tokens_map.json CHANGED
@@ -13,13 +13,6 @@
13
  "normalized": true,
14
  "rstrip": false,
15
  "single_word": false
16
- },
17
- {
18
- "content": "[PAD]",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
  }
24
  ],
25
  "bos_token": "<s>",
 
13
  "normalized": true,
14
  "rstrip": false,
15
  "single_word": false
 
 
 
 
 
 
 
16
  }
17
  ],
18
  "bos_token": "<s>",
vocab.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "!": 1,
3
  "#": 2,
4
  "&": 3,
5
  "'": 4,
@@ -24,6 +23,8 @@
24
  ">": 23,
25
  "?": 24,
26
  "@": 25,
 
 
27
  "_": 26,
28
  "a": 27,
29
  "b": 28,
@@ -136,12 +137,10 @@
136
  "ỵ": 134,
137
  "ỷ": 135,
138
  "ỹ": 136,
139
- "‎": 137,
140
  "–": 138,
141
  "‘": 139,
142
  "’": 140,
143
  "“": 141,
144
  "”": 142,
145
- "…": 143,
146
- "": 144
147
  }
 
1
  {
 
2
  "#": 2,
3
  "&": 3,
4
  "'": 4,
 
23
  ">": 23,
24
  "?": 24,
25
  "@": 25,
26
+ "[PAD]": 146,
27
+ "[UNK]": 145,
28
  "_": 26,
29
  "a": 27,
30
  "b": 28,
 
137
  "ỵ": 134,
138
  "ỷ": 135,
139
  "ỹ": 136,
 
140
  "–": 138,
141
  "‘": 139,
142
  "’": 140,
143
  "“": 141,
144
  "”": 142,
145
+ "…": 143
 
146
  }