codelion commited on
Commit
4082ea6
β€’
1 Parent(s): 914b382

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +25 -3
  2. tokenizer_config.json +7 -1
special_tokens_map.json CHANGED
@@ -11,9 +11,31 @@
11
  "▁<PRE>",
12
  "▁<MID>",
13
  "▁<SUF>",
 
 
 
 
14
  "▁<EOT>"
15
  ],
16
- "bos_token": "<s>",
17
- "eos_token": "</s>",
18
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
 
11
  "▁<PRE>",
12
  "▁<MID>",
13
  "▁<SUF>",
14
+ "▁<EOT>",
15
+ "▁<PRE>",
16
+ "▁<MID>",
17
+ "▁<SUF>",
18
  "▁<EOT>"
19
  ],
20
+ "bos_token": {
21
+ "content": "<s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "eos_token": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "unk_token": {
35
+ "content": "<unk>",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ }
41
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -69,6 +71,10 @@
69
  "▁<PRE>",
70
  "▁<MID>",
71
  "▁<SUF>",
 
 
 
 
72
  "▁<EOT>"
73
  ],
74
  "bos_token": "<s>",
@@ -76,7 +82,7 @@
76
  "eos_token": "</s>",
77
  "eot_token": "▁<EOT>",
78
  "fill_token": "<FILL_ME>",
79
- "legacy": null,
80
  "middle_token": "▁<MID>",
81
  "model_max_length": 1000000000000000019884624838656,
82
  "pad_token": null,
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
71
  "▁<PRE>",
72
  "▁<MID>",
73
  "▁<SUF>",
74
+ "▁<EOT>",
75
+ "▁<PRE>",
76
+ "▁<MID>",
77
+ "▁<SUF>",
78
  "▁<EOT>"
79
  ],
80
  "bos_token": "<s>",
 
82
  "eos_token": "</s>",
83
  "eot_token": "▁<EOT>",
84
  "fill_token": "<FILL_ME>",
85
+ "legacy": false,
86
  "middle_token": "▁<MID>",
87
  "model_max_length": 1000000000000000019884624838656,
88
  "pad_token": null,