santyzenith commited on
Commit
4eb8a8b
1 Parent(s): 2798c01

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,7 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|endoftext|>"
4
- ],
5
  "bos_token": "<|endoftext|>",
6
  "eos_token": "<|endoftext|>",
7
  "pad_token": "<|endoftext|>",
 
1
  {
 
 
 
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
  "pad_token": "<|endoftext|>",
tokenizer.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
  "special": true
14
  },
15
  {
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": true,
13
  "special": true
14
  },
15
  {
tokenizer_config.json CHANGED
@@ -5,7 +5,7 @@
5
  "0": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
- "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
@@ -27,9 +27,7 @@
27
  "special": false
28
  }
29
  },
30
- "additional_special_tokens": [
31
- "<|endoftext|>"
32
- ],
33
  "bos_token": "<|endoftext|>",
34
  "clean_up_tokenization_spaces": true,
35
  "eos_token": "<|endoftext|>",
 
5
  "0": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
+ "normalized": true,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
 
27
  "special": false
28
  }
29
  },
30
+ "additional_special_tokens": [],
 
 
31
  "bos_token": "<|endoftext|>",
32
  "clean_up_tokenization_spaces": true,
33
  "eos_token": "<|endoftext|>",