update tokenizer configs
Browse files- .gitattributes +1 -0
- tokenizer.json +3 -0
- tokenizer_config.json +241 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15eb04bc5ad609fb26533e8525302c5640a945e5f67f65b7c849900acda7d99
|
3 |
+
size 17518497
|
tokenizer_config.json
CHANGED
@@ -1114,6 +1114,246 @@
|
|
1114 |
"single_word": false,
|
1115 |
"special": false
|
1116 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1117 |
"169": {
|
1118 |
"content": "<table>",
|
1119 |
"lstrip": false,
|
@@ -1504,6 +1744,7 @@
|
|
1504 |
"<end_of_turn>"
|
1505 |
],
|
1506 |
"bos_token": "<bos>",
|
|
|
1507 |
"clean_up_tokenization_spaces": false,
|
1508 |
"eos_token": "<eos>",
|
1509 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
1114 |
"single_word": false,
|
1115 |
"special": false
|
1116 |
},
|
1117 |
+
"139": {
|
1118 |
+
"content": "ββ",
|
1119 |
+
"lstrip": false,
|
1120 |
+
"normalized": false,
|
1121 |
+
"rstrip": false,
|
1122 |
+
"single_word": false,
|
1123 |
+
"special": false
|
1124 |
+
},
|
1125 |
+
"140": {
|
1126 |
+
"content": "βββ",
|
1127 |
+
"lstrip": false,
|
1128 |
+
"normalized": false,
|
1129 |
+
"rstrip": false,
|
1130 |
+
"single_word": false,
|
1131 |
+
"special": false
|
1132 |
+
},
|
1133 |
+
"141": {
|
1134 |
+
"content": "ββββ",
|
1135 |
+
"lstrip": false,
|
1136 |
+
"normalized": false,
|
1137 |
+
"rstrip": false,
|
1138 |
+
"single_word": false,
|
1139 |
+
"special": false
|
1140 |
+
},
|
1141 |
+
"142": {
|
1142 |
+
"content": "βββββ",
|
1143 |
+
"lstrip": false,
|
1144 |
+
"normalized": false,
|
1145 |
+
"rstrip": false,
|
1146 |
+
"single_word": false,
|
1147 |
+
"special": false
|
1148 |
+
},
|
1149 |
+
"143": {
|
1150 |
+
"content": "ββββββ",
|
1151 |
+
"lstrip": false,
|
1152 |
+
"normalized": false,
|
1153 |
+
"rstrip": false,
|
1154 |
+
"single_word": false,
|
1155 |
+
"special": false
|
1156 |
+
},
|
1157 |
+
"144": {
|
1158 |
+
"content": "βββββββ",
|
1159 |
+
"lstrip": false,
|
1160 |
+
"normalized": false,
|
1161 |
+
"rstrip": false,
|
1162 |
+
"single_word": false,
|
1163 |
+
"special": false
|
1164 |
+
},
|
1165 |
+
"145": {
|
1166 |
+
"content": "ββββββββ",
|
1167 |
+
"lstrip": false,
|
1168 |
+
"normalized": false,
|
1169 |
+
"rstrip": false,
|
1170 |
+
"single_word": false,
|
1171 |
+
"special": false
|
1172 |
+
},
|
1173 |
+
"146": {
|
1174 |
+
"content": "βββββββββ",
|
1175 |
+
"lstrip": false,
|
1176 |
+
"normalized": false,
|
1177 |
+
"rstrip": false,
|
1178 |
+
"single_word": false,
|
1179 |
+
"special": false
|
1180 |
+
},
|
1181 |
+
"147": {
|
1182 |
+
"content": "ββββββββββ",
|
1183 |
+
"lstrip": false,
|
1184 |
+
"normalized": false,
|
1185 |
+
"rstrip": false,
|
1186 |
+
"single_word": false,
|
1187 |
+
"special": false
|
1188 |
+
},
|
1189 |
+
"148": {
|
1190 |
+
"content": "βββββββββββ",
|
1191 |
+
"lstrip": false,
|
1192 |
+
"normalized": false,
|
1193 |
+
"rstrip": false,
|
1194 |
+
"single_word": false,
|
1195 |
+
"special": false
|
1196 |
+
},
|
1197 |
+
"149": {
|
1198 |
+
"content": "ββββββββββββ",
|
1199 |
+
"lstrip": false,
|
1200 |
+
"normalized": false,
|
1201 |
+
"rstrip": false,
|
1202 |
+
"single_word": false,
|
1203 |
+
"special": false
|
1204 |
+
},
|
1205 |
+
"150": {
|
1206 |
+
"content": "βββββββββββββ",
|
1207 |
+
"lstrip": false,
|
1208 |
+
"normalized": false,
|
1209 |
+
"rstrip": false,
|
1210 |
+
"single_word": false,
|
1211 |
+
"special": false
|
1212 |
+
},
|
1213 |
+
"151": {
|
1214 |
+
"content": "ββββββββββββββ",
|
1215 |
+
"lstrip": false,
|
1216 |
+
"normalized": false,
|
1217 |
+
"rstrip": false,
|
1218 |
+
"single_word": false,
|
1219 |
+
"special": false
|
1220 |
+
},
|
1221 |
+
"152": {
|
1222 |
+
"content": "βββββββββββββββ",
|
1223 |
+
"lstrip": false,
|
1224 |
+
"normalized": false,
|
1225 |
+
"rstrip": false,
|
1226 |
+
"single_word": false,
|
1227 |
+
"special": false
|
1228 |
+
},
|
1229 |
+
"153": {
|
1230 |
+
"content": "ββββββββββββββββ",
|
1231 |
+
"lstrip": false,
|
1232 |
+
"normalized": false,
|
1233 |
+
"rstrip": false,
|
1234 |
+
"single_word": false,
|
1235 |
+
"special": false
|
1236 |
+
},
|
1237 |
+
"154": {
|
1238 |
+
"content": "βββββββββββββββββ",
|
1239 |
+
"lstrip": false,
|
1240 |
+
"normalized": false,
|
1241 |
+
"rstrip": false,
|
1242 |
+
"single_word": false,
|
1243 |
+
"special": false
|
1244 |
+
},
|
1245 |
+
"155": {
|
1246 |
+
"content": "ββββββββββββββββββ",
|
1247 |
+
"lstrip": false,
|
1248 |
+
"normalized": false,
|
1249 |
+
"rstrip": false,
|
1250 |
+
"single_word": false,
|
1251 |
+
"special": false
|
1252 |
+
},
|
1253 |
+
"156": {
|
1254 |
+
"content": "βββββββββββββββββββ",
|
1255 |
+
"lstrip": false,
|
1256 |
+
"normalized": false,
|
1257 |
+
"rstrip": false,
|
1258 |
+
"single_word": false,
|
1259 |
+
"special": false
|
1260 |
+
},
|
1261 |
+
"157": {
|
1262 |
+
"content": "ββββββββββββββββββββ",
|
1263 |
+
"lstrip": false,
|
1264 |
+
"normalized": false,
|
1265 |
+
"rstrip": false,
|
1266 |
+
"single_word": false,
|
1267 |
+
"special": false
|
1268 |
+
},
|
1269 |
+
"158": {
|
1270 |
+
"content": "βββββββββββββββββββββ",
|
1271 |
+
"lstrip": false,
|
1272 |
+
"normalized": false,
|
1273 |
+
"rstrip": false,
|
1274 |
+
"single_word": false,
|
1275 |
+
"special": false
|
1276 |
+
},
|
1277 |
+
"159": {
|
1278 |
+
"content": "ββββββββββββββββββββββ",
|
1279 |
+
"lstrip": false,
|
1280 |
+
"normalized": false,
|
1281 |
+
"rstrip": false,
|
1282 |
+
"single_word": false,
|
1283 |
+
"special": false
|
1284 |
+
},
|
1285 |
+
"160": {
|
1286 |
+
"content": "βββββββββββββββββββββββ",
|
1287 |
+
"lstrip": false,
|
1288 |
+
"normalized": false,
|
1289 |
+
"rstrip": false,
|
1290 |
+
"single_word": false,
|
1291 |
+
"special": false
|
1292 |
+
},
|
1293 |
+
"161": {
|
1294 |
+
"content": "ββββββββββββββββββββββββ",
|
1295 |
+
"lstrip": false,
|
1296 |
+
"normalized": false,
|
1297 |
+
"rstrip": false,
|
1298 |
+
"single_word": false,
|
1299 |
+
"special": false
|
1300 |
+
},
|
1301 |
+
"162": {
|
1302 |
+
"content": "βββββββββββββββββββββββββ",
|
1303 |
+
"lstrip": false,
|
1304 |
+
"normalized": false,
|
1305 |
+
"rstrip": false,
|
1306 |
+
"single_word": false,
|
1307 |
+
"special": false
|
1308 |
+
},
|
1309 |
+
"163": {
|
1310 |
+
"content": "ββββββββββββββββββββββββββ",
|
1311 |
+
"lstrip": false,
|
1312 |
+
"normalized": false,
|
1313 |
+
"rstrip": false,
|
1314 |
+
"single_word": false,
|
1315 |
+
"special": false
|
1316 |
+
},
|
1317 |
+
"164": {
|
1318 |
+
"content": "βββββββββββββββββββββββββββ",
|
1319 |
+
"lstrip": false,
|
1320 |
+
"normalized": false,
|
1321 |
+
"rstrip": false,
|
1322 |
+
"single_word": false,
|
1323 |
+
"special": false
|
1324 |
+
},
|
1325 |
+
"165": {
|
1326 |
+
"content": "ββββββββββββββββββββββββββββ",
|
1327 |
+
"lstrip": false,
|
1328 |
+
"normalized": false,
|
1329 |
+
"rstrip": false,
|
1330 |
+
"single_word": false,
|
1331 |
+
"special": false
|
1332 |
+
},
|
1333 |
+
"166": {
|
1334 |
+
"content": "βββββββββββββββββββββββββββββ",
|
1335 |
+
"lstrip": false,
|
1336 |
+
"normalized": false,
|
1337 |
+
"rstrip": false,
|
1338 |
+
"single_word": false,
|
1339 |
+
"special": false
|
1340 |
+
},
|
1341 |
+
"167": {
|
1342 |
+
"content": "ββββββββββββββββββββββββββββββ",
|
1343 |
+
"lstrip": false,
|
1344 |
+
"normalized": false,
|
1345 |
+
"rstrip": false,
|
1346 |
+
"single_word": false,
|
1347 |
+
"special": false
|
1348 |
+
},
|
1349 |
+
"168": {
|
1350 |
+
"content": "βββββββββββββββββββββββββββββββ",
|
1351 |
+
"lstrip": false,
|
1352 |
+
"normalized": false,
|
1353 |
+
"rstrip": false,
|
1354 |
+
"single_word": false,
|
1355 |
+
"special": false
|
1356 |
+
},
|
1357 |
"169": {
|
1358 |
"content": "<table>",
|
1359 |
"lstrip": false,
|
|
|
1744 |
"<end_of_turn>"
|
1745 |
],
|
1746 |
"bos_token": "<bos>",
|
1747 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
1748 |
"clean_up_tokenization_spaces": false,
|
1749 |
"eos_token": "<eos>",
|
1750 |
"model_max_length": 1000000000000000019884624838656,
|