EddieChen372
commited on
Commit
•
ef59ff8
1
Parent(s):
39598bf
add tokenizer
Browse files- special_tokens_map.json +8 -1
- tokenizer.json +59 -8
special_tokens_map.json
CHANGED
@@ -1,3 +1,10 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
}
|
1 |
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"[react]",
|
4 |
+
"[end]",
|
5 |
+
"[jest]"
|
6 |
+
],
|
7 |
+
"bos_token": "<|endoftext|>",
|
8 |
+
"eos_token": "<|endoftext|>",
|
9 |
+
"pad_token": "<|endoftext|>"
|
10 |
}
|
tokenizer.json
CHANGED
@@ -2324,6 +2324,33 @@
|
|
2324 |
"rstrip": false,
|
2325 |
"normalized": false,
|
2326 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2327 |
}
|
2328 |
],
|
2329 |
"normalizer": null,
|
@@ -2350,14 +2377,14 @@
|
|
2350 |
"type": "TemplateProcessing",
|
2351 |
"single": [
|
2352 |
{
|
2353 |
-
"
|
2354 |
-
"id": "
|
2355 |
"type_id": 0
|
2356 |
}
|
2357 |
},
|
2358 |
{
|
2359 |
-
"
|
2360 |
-
"id": "
|
2361 |
"type_id": 0
|
2362 |
}
|
2363 |
}
|
@@ -2365,7 +2392,7 @@
|
|
2365 |
"pair": [
|
2366 |
{
|
2367 |
"SpecialToken": {
|
2368 |
-
"id": "
|
2369 |
"type_id": 0
|
2370 |
}
|
2371 |
},
|
@@ -2377,14 +2404,20 @@
|
|
2377 |
},
|
2378 |
{
|
2379 |
"SpecialToken": {
|
2380 |
-
"id": "
|
2381 |
-
"type_id":
|
2382 |
}
|
2383 |
},
|
2384 |
{
|
2385 |
"Sequence": {
|
2386 |
"id": "B",
|
2387 |
-
"type_id":
|
|
|
|
|
|
|
|
|
|
|
|
|
2388 |
}
|
2389 |
}
|
2390 |
],
|
@@ -2397,6 +2430,24 @@
|
|
2397 |
"tokens": [
|
2398 |
"<|endoftext|>"
|
2399 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2400 |
}
|
2401 |
}
|
2402 |
},
|
2324 |
"rstrip": false,
|
2325 |
"normalized": false,
|
2326 |
"special": true
|
2327 |
+
},
|
2328 |
+
{
|
2329 |
+
"id": 52000,
|
2330 |
+
"content": "[react]",
|
2331 |
+
"single_word": false,
|
2332 |
+
"lstrip": false,
|
2333 |
+
"rstrip": false,
|
2334 |
+
"normalized": false,
|
2335 |
+
"special": true
|
2336 |
+
},
|
2337 |
+
{
|
2338 |
+
"id": 52001,
|
2339 |
+
"content": "[end]",
|
2340 |
+
"single_word": false,
|
2341 |
+
"lstrip": false,
|
2342 |
+
"rstrip": false,
|
2343 |
+
"normalized": false,
|
2344 |
+
"special": true
|
2345 |
+
},
|
2346 |
+
{
|
2347 |
+
"id": 52002,
|
2348 |
+
"content": "[jest]",
|
2349 |
+
"single_word": false,
|
2350 |
+
"lstrip": false,
|
2351 |
+
"rstrip": false,
|
2352 |
+
"normalized": false,
|
2353 |
+
"special": true
|
2354 |
}
|
2355 |
],
|
2356 |
"normalizer": null,
|
2377 |
"type": "TemplateProcessing",
|
2378 |
"single": [
|
2379 |
{
|
2380 |
+
"Sequence": {
|
2381 |
+
"id": "A",
|
2382 |
"type_id": 0
|
2383 |
}
|
2384 |
},
|
2385 |
{
|
2386 |
+
"SpecialToken": {
|
2387 |
+
"id": "<|endoftext|>",
|
2388 |
"type_id": 0
|
2389 |
}
|
2390 |
}
|
2392 |
"pair": [
|
2393 |
{
|
2394 |
"SpecialToken": {
|
2395 |
+
"id": "[react]",
|
2396 |
"type_id": 0
|
2397 |
}
|
2398 |
},
|
2404 |
},
|
2405 |
{
|
2406 |
"SpecialToken": {
|
2407 |
+
"id": "[jest]",
|
2408 |
+
"type_id": 0
|
2409 |
}
|
2410 |
},
|
2411 |
{
|
2412 |
"Sequence": {
|
2413 |
"id": "B",
|
2414 |
+
"type_id": 0
|
2415 |
+
}
|
2416 |
+
},
|
2417 |
+
{
|
2418 |
+
"SpecialToken": {
|
2419 |
+
"id": "<|endoftext|>",
|
2420 |
+
"type_id": 0
|
2421 |
}
|
2422 |
}
|
2423 |
],
|
2430 |
"tokens": [
|
2431 |
"<|endoftext|>"
|
2432 |
]
|
2433 |
+
},
|
2434 |
+
"[jest]": {
|
2435 |
+
"id": "[jest]",
|
2436 |
+
"ids": [
|
2437 |
+
52002
|
2438 |
+
],
|
2439 |
+
"tokens": [
|
2440 |
+
"[jest]"
|
2441 |
+
]
|
2442 |
+
},
|
2443 |
+
"[react]": {
|
2444 |
+
"id": "[react]",
|
2445 |
+
"ids": [
|
2446 |
+
52000
|
2447 |
+
],
|
2448 |
+
"tokens": [
|
2449 |
+
"[react]"
|
2450 |
+
]
|
2451 |
}
|
2452 |
}
|
2453 |
},
|