Update tokenizer_config.json
Browse files- tokenizer_config.json +5 -5
tokenizer_config.json
CHANGED
@@ -113,7 +113,7 @@
|
|
113 |
"special": true
|
114 |
},
|
115 |
"128014": {
|
116 |
-
"content": "<|
|
117 |
"lstrip": false,
|
118 |
"normalized": false,
|
119 |
"rstrip": false,
|
@@ -121,7 +121,7 @@
|
|
121 |
"special": true
|
122 |
},
|
123 |
"128015": {
|
124 |
-
"content": "<|
|
125 |
"lstrip": false,
|
126 |
"normalized": false,
|
127 |
"rstrip": false,
|
@@ -129,7 +129,7 @@
|
|
129 |
"special": true
|
130 |
},
|
131 |
"128016": {
|
132 |
-
"content": "<|
|
133 |
"lstrip": false,
|
134 |
"normalized": false,
|
135 |
"rstrip": false,
|
@@ -2050,7 +2050,7 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{{-
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
2056 |
"model_input_names": [
|
@@ -2061,4 +2061,4 @@
|
|
2061 |
"pad_token": "<|finetune_right_pad_id|>",
|
2062 |
"padding_side": "left",
|
2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2064 |
-
}
|
|
|
113 |
"special": true
|
114 |
},
|
115 |
"128014": {
|
116 |
+
"content": "<|fim_prefix|>",
|
117 |
"lstrip": false,
|
118 |
"normalized": false,
|
119 |
"rstrip": false,
|
|
|
121 |
"special": true
|
122 |
},
|
123 |
"128015": {
|
124 |
+
"content": "<|fim_hole|>",
|
125 |
"lstrip": false,
|
126 |
"normalized": false,
|
127 |
"rstrip": false,
|
|
|
129 |
"special": true
|
130 |
},
|
131 |
"128016": {
|
132 |
+
"content": "<|fim_suffix|>",
|
133 |
"lstrip": false,
|
134 |
"normalized": false,
|
135 |
"rstrip": false,
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{{- if .Suffix }}<|fim_begin|>{{ .Prompt }}<|fim_hole|>{{ .Suffix }}<|fim_end|>\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = "" %}\n{%- endif %}\n{#- System message #}\n{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}\n{{- system_message }}\n{{- "<|eot_id|>" }}\n{%- for message in messages %}\n{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
2056 |
"model_input_names": [
|
|
|
2061 |
"pad_token": "<|finetune_right_pad_id|>",
|
2062 |
"padding_side": "left",
|
2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2064 |
+
}
|