fix(tokenizer_config): Adjusts `rstrip` of special tokens.

#53
Files changed (1) hide show
  1. tokenizer_config.json +13 -13
tokenizer_config.json CHANGED
@@ -22,9 +22,9 @@
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
- "rstrip": false,
26
  "single_word": false,
27
- "special": true
28
  },
29
  "32000": {
30
  "content": "<|endoftext|>",
@@ -38,7 +38,7 @@
38
  "content": "<|assistant|>",
39
  "lstrip": false,
40
  "normalized": false,
41
- "rstrip": false,
42
  "single_word": false,
43
  "special": true
44
  },
@@ -46,7 +46,7 @@
46
  "content": "<|placeholder1|>",
47
  "lstrip": false,
48
  "normalized": false,
49
- "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  },
@@ -54,7 +54,7 @@
54
  "content": "<|placeholder2|>",
55
  "lstrip": false,
56
  "normalized": false,
57
- "rstrip": false,
58
  "single_word": false,
59
  "special": true
60
  },
@@ -62,7 +62,7 @@
62
  "content": "<|placeholder3|>",
63
  "lstrip": false,
64
  "normalized": false,
65
- "rstrip": false,
66
  "single_word": false,
67
  "special": true
68
  },
@@ -70,7 +70,7 @@
70
  "content": "<|placeholder4|>",
71
  "lstrip": false,
72
  "normalized": false,
73
- "rstrip": false,
74
  "single_word": false,
75
  "special": true
76
  },
@@ -78,7 +78,7 @@
78
  "content": "<|system|>",
79
  "lstrip": false,
80
  "normalized": false,
81
- "rstrip": false,
82
  "single_word": false,
83
  "special": true
84
  },
@@ -86,7 +86,7 @@
86
  "content": "<|end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
- "rstrip": false,
90
  "single_word": false,
91
  "special": true
92
  },
@@ -94,7 +94,7 @@
94
  "content": "<|placeholder5|>",
95
  "lstrip": false,
96
  "normalized": false,
97
- "rstrip": false,
98
  "single_word": false,
99
  "special": true
100
  },
@@ -102,7 +102,7 @@
102
  "content": "<|placeholder6|>",
103
  "lstrip": false,
104
  "normalized": false,
105
- "rstrip": false,
106
  "single_word": false,
107
  "special": true
108
  },
@@ -110,7 +110,7 @@
110
  "content": "<|user|>",
111
  "lstrip": false,
112
  "normalized": false,
113
- "rstrip": false,
114
  "single_word": false,
115
  "special": true
116
  }
@@ -127,4 +127,4 @@
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
129
  "use_default_system_prompt": false
130
- }
 
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
+ "rstrip": true,
26
  "single_word": false,
27
+ "special": false
28
  },
29
  "32000": {
30
  "content": "<|endoftext|>",
 
38
  "content": "<|assistant|>",
39
  "lstrip": false,
40
  "normalized": false,
41
+ "rstrip": true,
42
  "single_word": false,
43
  "special": true
44
  },
 
46
  "content": "<|placeholder1|>",
47
  "lstrip": false,
48
  "normalized": false,
49
+ "rstrip": true,
50
  "single_word": false,
51
  "special": true
52
  },
 
54
  "content": "<|placeholder2|>",
55
  "lstrip": false,
56
  "normalized": false,
57
+ "rstrip": true,
58
  "single_word": false,
59
  "special": true
60
  },
 
62
  "content": "<|placeholder3|>",
63
  "lstrip": false,
64
  "normalized": false,
65
+ "rstrip": true,
66
  "single_word": false,
67
  "special": true
68
  },
 
70
  "content": "<|placeholder4|>",
71
  "lstrip": false,
72
  "normalized": false,
73
+ "rstrip": true,
74
  "single_word": false,
75
  "special": true
76
  },
 
78
  "content": "<|system|>",
79
  "lstrip": false,
80
  "normalized": false,
81
+ "rstrip": true,
82
  "single_word": false,
83
  "special": true
84
  },
 
86
  "content": "<|end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
+ "rstrip": true,
90
  "single_word": false,
91
  "special": true
92
  },
 
94
  "content": "<|placeholder5|>",
95
  "lstrip": false,
96
  "normalized": false,
97
+ "rstrip": true,
98
  "single_word": false,
99
  "special": true
100
  },
 
102
  "content": "<|placeholder6|>",
103
  "lstrip": false,
104
  "normalized": false,
105
+ "rstrip": true,
106
  "single_word": false,
107
  "special": true
108
  },
 
110
  "content": "<|user|>",
111
  "lstrip": false,
112
  "normalized": false,
113
+ "rstrip": true,
114
  "single_word": false,
115
  "special": true
116
  }
 
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
129
  "use_default_system_prompt": false
130
+ }