lonestar108 commited on
Commit
ed665c0
1 Parent(s): 29f2bf6

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -3,4 +3,4 @@
3
  "eos_token": "</s>",
4
  "pad_token": "</s>",
5
  "unk_token": "<unk>"
6
- }
 
3
  "eos_token": "</s>",
4
  "pad_token": "</s>",
5
  "unk_token": "<unk>"
6
+ }
tokenizer.json CHANGED
@@ -8790,7 +8790,7 @@
8790
  "\"?": 8652,
8791
  "▁>>>": 8653,
8792
  "Que": 8654,
8793
- " ": 8655,
8794
  "▁plain": 8656,
8795
  "ativa": 8657,
8796
  "ocker": 8658,
@@ -18026,7 +18026,7 @@
18026
  "▁farm": 17888,
18027
  "▁rôle": 17889,
18028
  "▁статьи": 17890,
18029
- " ": 17891,
18030
  "subfigure": 17892,
18031
  "èces": 17893,
18032
  "ziel": 17894,
@@ -20381,7 +20381,7 @@
20381
  "▁gcc": 20243,
20382
  "▁scène": 20244,
20383
  "Navigation": 20245,
20384
- "▁ ": 20246,
20385
  "▁кан": 20247,
20386
  "▁towns": 20248,
20387
  "Username": 20249,
@@ -30216,7 +30216,7 @@
30216
  "æ": 30078,
30217
  "њ": 30079,
30218
  " ": 30080,
30219
- " ": 30081,
30220
  "Э": 30082,
30221
  "ë": 30083,
30222
  "õ": 30084,
@@ -51054,7 +51054,7 @@
51054
  "▁>> >",
51055
  "Qu e",
51056
  "Q ue",
51057
- " ",
51058
  "▁p lain",
51059
  "▁pl ain",
51060
  "▁pla in",
@@ -70279,7 +70279,7 @@
70279
  "▁fa rm",
70280
  "▁r ôle",
70281
  "▁стать и",
70282
- " ",
70283
  "sub figure",
70284
  "èce s",
70285
  "è ces",
@@ -74942,7 +74942,7 @@
74942
  "▁ gcc",
74943
  "▁sc ène",
74944
  "N avigation",
74945
- "▁ ",
74946
  "▁к ан",
74947
  "▁ка н",
74948
  "▁ кан",
 
8790
  "\"?": 8652,
8791
  "▁>>>": 8653,
8792
  "Que": 8654,
8793
+ "  ": 8655,
8794
  "▁plain": 8656,
8795
  "ativa": 8657,
8796
  "ocker": 8658,
 
18026
  "▁farm": 17888,
18027
  "▁rôle": 17889,
18028
  "▁статьи": 17890,
18029
+ "    ": 17891,
18030
  "subfigure": 17892,
18031
  "èces": 17893,
18032
  "ziel": 17894,
 
20381
  "▁gcc": 20243,
20382
  "▁scène": 20244,
20383
  "Navigation": 20245,
20384
+ "▁ ": 20246,
20385
  "▁кан": 20247,
20386
  "▁towns": 20248,
20387
  "Username": 20249,
 
30216
  "æ": 30078,
30217
  "њ": 30079,
30218
  " ": 30080,
30219
+ " ": 30081,
30220
  "Э": 30082,
30221
  "ë": 30083,
30222
  "õ": 30084,
 
51054
  "▁>> >",
51055
  "Qu e",
51056
  "Q ue",
51057
+ "   ",
51058
  "▁p lain",
51059
  "▁pl ain",
51060
  "▁pla in",
 
70279
  "▁fa rm",
70280
  "▁r ôle",
70281
  "▁стать и",
70282
+ "     ",
70283
  "sub figure",
70284
  "èce s",
70285
  "è ces",
 
74942
  "▁ gcc",
74943
  "▁sc ène",
74944
  "N avigation",
74945
+ "▁  ",
74946
  "▁к ан",
74947
  "▁ка н",
74948
  "▁ кан",
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "bos_token": {
5
  "__type": "AddedToken",
6
  "content": "<s>",
@@ -36,4 +34,4 @@
36
  },
37
  "use_default_system_prompt": true,
38
  "use_fast": true
39
- }
 
1
  {
 
 
2
  "bos_token": {
3
  "__type": "AddedToken",
4
  "content": "<s>",
 
34
  },
35
  "use_default_system_prompt": true,
36
  "use_fast": true
37
+ }