lonestar108
commited on
Commit
•
ed665c0
1
Parent(s):
29f2bf6
Upload tokenizer
Browse files- special_tokens_map.json +1 -1
- tokenizer.json +7 -7
- tokenizer_config.json +1 -3
special_tokens_map.json
CHANGED
@@ -3,4 +3,4 @@
|
|
3 |
"eos_token": "</s>",
|
4 |
"pad_token": "</s>",
|
5 |
"unk_token": "<unk>"
|
6 |
-
}
|
|
|
3 |
"eos_token": "</s>",
|
4 |
"pad_token": "</s>",
|
5 |
"unk_token": "<unk>"
|
6 |
+
}
|
tokenizer.json
CHANGED
@@ -8790,7 +8790,7 @@
|
|
8790 |
"\"?": 8652,
|
8791 |
"▁>>>": 8653,
|
8792 |
"Que": 8654,
|
8793 |
-
"
|
8794 |
"▁plain": 8656,
|
8795 |
"ativa": 8657,
|
8796 |
"ocker": 8658,
|
@@ -18026,7 +18026,7 @@
|
|
18026 |
"▁farm": 17888,
|
18027 |
"▁rôle": 17889,
|
18028 |
"▁статьи": 17890,
|
18029 |
-
"
|
18030 |
"subfigure": 17892,
|
18031 |
"èces": 17893,
|
18032 |
"ziel": 17894,
|
@@ -20381,7 +20381,7 @@
|
|
20381 |
"▁gcc": 20243,
|
20382 |
"▁scène": 20244,
|
20383 |
"Navigation": 20245,
|
20384 |
-
"▁
|
20385 |
"▁кан": 20247,
|
20386 |
"▁towns": 20248,
|
20387 |
"Username": 20249,
|
@@ -30216,7 +30216,7 @@
|
|
30216 |
"æ": 30078,
|
30217 |
"њ": 30079,
|
30218 |
" ": 30080,
|
30219 |
-
"
|
30220 |
"Э": 30082,
|
30221 |
"ë": 30083,
|
30222 |
"õ": 30084,
|
@@ -51054,7 +51054,7 @@
|
|
51054 |
"▁>> >",
|
51055 |
"Qu e",
|
51056 |
"Q ue",
|
51057 |
-
"
|
51058 |
"▁p lain",
|
51059 |
"▁pl ain",
|
51060 |
"▁pla in",
|
@@ -70279,7 +70279,7 @@
|
|
70279 |
"▁fa rm",
|
70280 |
"▁r ôle",
|
70281 |
"▁стать и",
|
70282 |
-
"
|
70283 |
"sub figure",
|
70284 |
"èce s",
|
70285 |
"è ces",
|
@@ -74942,7 +74942,7 @@
|
|
74942 |
"▁ gcc",
|
74943 |
"▁sc ène",
|
74944 |
"N avigation",
|
74945 |
-
"▁
|
74946 |
"▁к ан",
|
74947 |
"▁ка н",
|
74948 |
"▁ кан",
|
|
|
8790 |
"\"?": 8652,
|
8791 |
"▁>>>": 8653,
|
8792 |
"Que": 8654,
|
8793 |
+
" ": 8655,
|
8794 |
"▁plain": 8656,
|
8795 |
"ativa": 8657,
|
8796 |
"ocker": 8658,
|
|
|
18026 |
"▁farm": 17888,
|
18027 |
"▁rôle": 17889,
|
18028 |
"▁статьи": 17890,
|
18029 |
+
" ": 17891,
|
18030 |
"subfigure": 17892,
|
18031 |
"èces": 17893,
|
18032 |
"ziel": 17894,
|
|
|
20381 |
"▁gcc": 20243,
|
20382 |
"▁scène": 20244,
|
20383 |
"Navigation": 20245,
|
20384 |
+
"▁ ": 20246,
|
20385 |
"▁кан": 20247,
|
20386 |
"▁towns": 20248,
|
20387 |
"Username": 20249,
|
|
|
30216 |
"æ": 30078,
|
30217 |
"њ": 30079,
|
30218 |
" ": 30080,
|
30219 |
+
" ": 30081,
|
30220 |
"Э": 30082,
|
30221 |
"ë": 30083,
|
30222 |
"õ": 30084,
|
|
|
51054 |
"▁>> >",
|
51055 |
"Qu e",
|
51056 |
"Q ue",
|
51057 |
+
" ",
|
51058 |
"▁p lain",
|
51059 |
"▁pl ain",
|
51060 |
"▁pla in",
|
|
|
70279 |
"▁fa rm",
|
70280 |
"▁r ôle",
|
70281 |
"▁стать и",
|
70282 |
+
" ",
|
70283 |
"sub figure",
|
70284 |
"èce s",
|
70285 |
"è ces",
|
|
|
74942 |
"▁ gcc",
|
74943 |
"▁sc ène",
|
74944 |
"N avigation",
|
74945 |
+
"▁ ",
|
74946 |
"▁к ан",
|
74947 |
"▁ка н",
|
74948 |
"▁ кан",
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
{
|
2 |
-
"add_bos_token": true,
|
3 |
-
"add_eos_token": false,
|
4 |
"bos_token": {
|
5 |
"__type": "AddedToken",
|
6 |
"content": "<s>",
|
@@ -36,4 +34,4 @@
|
|
36 |
},
|
37 |
"use_default_system_prompt": true,
|
38 |
"use_fast": true
|
39 |
-
}
|
|
|
1 |
{
|
|
|
|
|
2 |
"bos_token": {
|
3 |
"__type": "AddedToken",
|
4 |
"content": "<s>",
|
|
|
34 |
},
|
35 |
"use_default_system_prompt": true,
|
36 |
"use_fast": true
|
37 |
+
}
|