bjoernp commited on
Commit
eac5191
1 Parent(s): c5acfca

Add chat_template

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +261 -1
tokenizer_config.json CHANGED
@@ -2049,13 +2049,273 @@
2049
  "special": true
2050
  }
2051
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2052
  "bos_token": "<|begin_of_text|>",
 
2053
  "clean_up_tokenization_spaces": true,
2054
- "eos_token": "<|end_of_text|>",
2055
  "model_input_names": [
2056
  "input_ids",
2057
  "attention_mask"
2058
  ],
2059
  "model_max_length": 1000000000000000019884624838656,
 
2060
  "tokenizer_class": "PreTrainedTokenizerFast"
2061
  }
 
2049
  "special": true
2050
  }
2051
  },
2052
+ "additional_special_tokens": [
2053
+ "<|begin_of_text|>",
2054
+ "<|end_of_text|>",
2055
+ "<|reserved_special_token_0|>",
2056
+ "<|reserved_special_token_1|>",
2057
+ "<|reserved_special_token_2|>",
2058
+ "<|reserved_special_token_3|>",
2059
+ "<|start_header_id|>",
2060
+ "<|end_header_id|>",
2061
+ "<|reserved_special_token_4|>",
2062
+ "<|eot_id|>",
2063
+ "<|reserved_special_token_5|>",
2064
+ "<|reserved_special_token_6|>",
2065
+ "<|reserved_special_token_7|>",
2066
+ "<|reserved_special_token_8|>",
2067
+ "<|reserved_special_token_9|>",
2068
+ "<|reserved_special_token_10|>",
2069
+ "<|reserved_special_token_11|>",
2070
+ "<|reserved_special_token_12|>",
2071
+ "<|reserved_special_token_13|>",
2072
+ "<|reserved_special_token_14|>",
2073
+ "<|reserved_special_token_15|>",
2074
+ "<|reserved_special_token_16|>",
2075
+ "<|reserved_special_token_17|>",
2076
+ "<|reserved_special_token_18|>",
2077
+ "<|reserved_special_token_19|>",
2078
+ "<|reserved_special_token_20|>",
2079
+ "<|reserved_special_token_21|>",
2080
+ "<|reserved_special_token_22|>",
2081
+ "<|reserved_special_token_23|>",
2082
+ "<|reserved_special_token_24|>",
2083
+ "<|reserved_special_token_25|>",
2084
+ "<|reserved_special_token_26|>",
2085
+ "<|reserved_special_token_27|>",
2086
+ "<|reserved_special_token_28|>",
2087
+ "<|reserved_special_token_29|>",
2088
+ "<|reserved_special_token_30|>",
2089
+ "<|reserved_special_token_31|>",
2090
+ "<|reserved_special_token_32|>",
2091
+ "<|reserved_special_token_33|>",
2092
+ "<|reserved_special_token_34|>",
2093
+ "<|reserved_special_token_35|>",
2094
+ "<|reserved_special_token_36|>",
2095
+ "<|reserved_special_token_37|>",
2096
+ "<|reserved_special_token_38|>",
2097
+ "<|reserved_special_token_39|>",
2098
+ "<|reserved_special_token_40|>",
2099
+ "<|reserved_special_token_41|>",
2100
+ "<|reserved_special_token_42|>",
2101
+ "<|reserved_special_token_43|>",
2102
+ "<|reserved_special_token_44|>",
2103
+ "<|reserved_special_token_45|>",
2104
+ "<|reserved_special_token_46|>",
2105
+ "<|reserved_special_token_47|>",
2106
+ "<|reserved_special_token_48|>",
2107
+ "<|reserved_special_token_49|>",
2108
+ "<|reserved_special_token_50|>",
2109
+ "<|reserved_special_token_51|>",
2110
+ "<|reserved_special_token_52|>",
2111
+ "<|reserved_special_token_53|>",
2112
+ "<|reserved_special_token_54|>",
2113
+ "<|reserved_special_token_55|>",
2114
+ "<|reserved_special_token_56|>",
2115
+ "<|reserved_special_token_57|>",
2116
+ "<|reserved_special_token_58|>",
2117
+ "<|reserved_special_token_59|>",
2118
+ "<|reserved_special_token_60|>",
2119
+ "<|reserved_special_token_61|>",
2120
+ "<|reserved_special_token_62|>",
2121
+ "<|reserved_special_token_63|>",
2122
+ "<|reserved_special_token_64|>",
2123
+ "<|reserved_special_token_65|>",
2124
+ "<|reserved_special_token_66|>",
2125
+ "<|reserved_special_token_67|>",
2126
+ "<|reserved_special_token_68|>",
2127
+ "<|reserved_special_token_69|>",
2128
+ "<|reserved_special_token_70|>",
2129
+ "<|reserved_special_token_71|>",
2130
+ "<|reserved_special_token_72|>",
2131
+ "<|reserved_special_token_73|>",
2132
+ "<|reserved_special_token_74|>",
2133
+ "<|reserved_special_token_75|>",
2134
+ "<|reserved_special_token_76|>",
2135
+ "<|reserved_special_token_77|>",
2136
+ "<|reserved_special_token_78|>",
2137
+ "<|reserved_special_token_79|>",
2138
+ "<|reserved_special_token_80|>",
2139
+ "<|reserved_special_token_81|>",
2140
+ "<|reserved_special_token_82|>",
2141
+ "<|reserved_special_token_83|>",
2142
+ "<|reserved_special_token_84|>",
2143
+ "<|reserved_special_token_85|>",
2144
+ "<|reserved_special_token_86|>",
2145
+ "<|reserved_special_token_87|>",
2146
+ "<|reserved_special_token_88|>",
2147
+ "<|reserved_special_token_89|>",
2148
+ "<|reserved_special_token_90|>",
2149
+ "<|reserved_special_token_91|>",
2150
+ "<|reserved_special_token_92|>",
2151
+ "<|reserved_special_token_93|>",
2152
+ "<|reserved_special_token_94|>",
2153
+ "<|reserved_special_token_95|>",
2154
+ "<|reserved_special_token_96|>",
2155
+ "<|reserved_special_token_97|>",
2156
+ "<|reserved_special_token_98|>",
2157
+ "<|reserved_special_token_99|>",
2158
+ "<|reserved_special_token_100|>",
2159
+ "<|reserved_special_token_101|>",
2160
+ "<|reserved_special_token_102|>",
2161
+ "<|reserved_special_token_103|>",
2162
+ "<|reserved_special_token_104|>",
2163
+ "<|reserved_special_token_105|>",
2164
+ "<|reserved_special_token_106|>",
2165
+ "<|reserved_special_token_107|>",
2166
+ "<|reserved_special_token_108|>",
2167
+ "<|reserved_special_token_109|>",
2168
+ "<|reserved_special_token_110|>",
2169
+ "<|reserved_special_token_111|>",
2170
+ "<|reserved_special_token_112|>",
2171
+ "<|reserved_special_token_113|>",
2172
+ "<|reserved_special_token_114|>",
2173
+ "<|reserved_special_token_115|>",
2174
+ "<|reserved_special_token_116|>",
2175
+ "<|reserved_special_token_117|>",
2176
+ "<|reserved_special_token_118|>",
2177
+ "<|reserved_special_token_119|>",
2178
+ "<|reserved_special_token_120|>",
2179
+ "<|reserved_special_token_121|>",
2180
+ "<|reserved_special_token_122|>",
2181
+ "<|reserved_special_token_123|>",
2182
+ "<|reserved_special_token_124|>",
2183
+ "<|reserved_special_token_125|>",
2184
+ "<|reserved_special_token_126|>",
2185
+ "<|reserved_special_token_127|>",
2186
+ "<|reserved_special_token_128|>",
2187
+ "<|reserved_special_token_129|>",
2188
+ "<|reserved_special_token_130|>",
2189
+ "<|reserved_special_token_131|>",
2190
+ "<|reserved_special_token_132|>",
2191
+ "<|reserved_special_token_133|>",
2192
+ "<|reserved_special_token_134|>",
2193
+ "<|reserved_special_token_135|>",
2194
+ "<|reserved_special_token_136|>",
2195
+ "<|reserved_special_token_137|>",
2196
+ "<|reserved_special_token_138|>",
2197
+ "<|reserved_special_token_139|>",
2198
+ "<|reserved_special_token_140|>",
2199
+ "<|reserved_special_token_141|>",
2200
+ "<|reserved_special_token_142|>",
2201
+ "<|reserved_special_token_143|>",
2202
+ "<|reserved_special_token_144|>",
2203
+ "<|reserved_special_token_145|>",
2204
+ "<|reserved_special_token_146|>",
2205
+ "<|reserved_special_token_147|>",
2206
+ "<|reserved_special_token_148|>",
2207
+ "<|reserved_special_token_149|>",
2208
+ "<|reserved_special_token_150|>",
2209
+ "<|reserved_special_token_151|>",
2210
+ "<|reserved_special_token_152|>",
2211
+ "<|reserved_special_token_153|>",
2212
+ "<|reserved_special_token_154|>",
2213
+ "<|reserved_special_token_155|>",
2214
+ "<|reserved_special_token_156|>",
2215
+ "<|reserved_special_token_157|>",
2216
+ "<|reserved_special_token_158|>",
2217
+ "<|reserved_special_token_159|>",
2218
+ "<|reserved_special_token_160|>",
2219
+ "<|reserved_special_token_161|>",
2220
+ "<|reserved_special_token_162|>",
2221
+ "<|reserved_special_token_163|>",
2222
+ "<|reserved_special_token_164|>",
2223
+ "<|reserved_special_token_165|>",
2224
+ "<|reserved_special_token_166|>",
2225
+ "<|reserved_special_token_167|>",
2226
+ "<|reserved_special_token_168|>",
2227
+ "<|reserved_special_token_169|>",
2228
+ "<|reserved_special_token_170|>",
2229
+ "<|reserved_special_token_171|>",
2230
+ "<|reserved_special_token_172|>",
2231
+ "<|reserved_special_token_173|>",
2232
+ "<|reserved_special_token_174|>",
2233
+ "<|reserved_special_token_175|>",
2234
+ "<|reserved_special_token_176|>",
2235
+ "<|reserved_special_token_177|>",
2236
+ "<|reserved_special_token_178|>",
2237
+ "<|reserved_special_token_179|>",
2238
+ "<|reserved_special_token_180|>",
2239
+ "<|reserved_special_token_181|>",
2240
+ "<|reserved_special_token_182|>",
2241
+ "<|reserved_special_token_183|>",
2242
+ "<|reserved_special_token_184|>",
2243
+ "<|reserved_special_token_185|>",
2244
+ "<|reserved_special_token_186|>",
2245
+ "<|reserved_special_token_187|>",
2246
+ "<|reserved_special_token_188|>",
2247
+ "<|reserved_special_token_189|>",
2248
+ "<|reserved_special_token_190|>",
2249
+ "<|reserved_special_token_191|>",
2250
+ "<|reserved_special_token_192|>",
2251
+ "<|reserved_special_token_193|>",
2252
+ "<|reserved_special_token_194|>",
2253
+ "<|reserved_special_token_195|>",
2254
+ "<|reserved_special_token_196|>",
2255
+ "<|reserved_special_token_197|>",
2256
+ "<|reserved_special_token_198|>",
2257
+ "<|reserved_special_token_199|>",
2258
+ "<|reserved_special_token_200|>",
2259
+ "<|reserved_special_token_201|>",
2260
+ "<|reserved_special_token_202|>",
2261
+ "<|reserved_special_token_203|>",
2262
+ "<|reserved_special_token_204|>",
2263
+ "<|reserved_special_token_205|>",
2264
+ "<|reserved_special_token_206|>",
2265
+ "<|reserved_special_token_207|>",
2266
+ "<|reserved_special_token_208|>",
2267
+ "<|reserved_special_token_209|>",
2268
+ "<|reserved_special_token_210|>",
2269
+ "<|reserved_special_token_211|>",
2270
+ "<|reserved_special_token_212|>",
2271
+ "<|reserved_special_token_213|>",
2272
+ "<|reserved_special_token_214|>",
2273
+ "<|reserved_special_token_215|>",
2274
+ "<|reserved_special_token_216|>",
2275
+ "<|reserved_special_token_217|>",
2276
+ "<|reserved_special_token_218|>",
2277
+ "<|reserved_special_token_219|>",
2278
+ "<|reserved_special_token_220|>",
2279
+ "<|reserved_special_token_221|>",
2280
+ "<|reserved_special_token_222|>",
2281
+ "<|reserved_special_token_223|>",
2282
+ "<|reserved_special_token_224|>",
2283
+ "<|reserved_special_token_225|>",
2284
+ "<|reserved_special_token_226|>",
2285
+ "<|reserved_special_token_227|>",
2286
+ "<|reserved_special_token_228|>",
2287
+ "<|reserved_special_token_229|>",
2288
+ "<|reserved_special_token_230|>",
2289
+ "<|reserved_special_token_231|>",
2290
+ "<|reserved_special_token_232|>",
2291
+ "<|reserved_special_token_233|>",
2292
+ "<|reserved_special_token_234|>",
2293
+ "<|reserved_special_token_235|>",
2294
+ "<|reserved_special_token_236|>",
2295
+ "<|reserved_special_token_237|>",
2296
+ "<|reserved_special_token_238|>",
2297
+ "<|reserved_special_token_239|>",
2298
+ "<|reserved_special_token_240|>",
2299
+ "<|reserved_special_token_241|>",
2300
+ "<|reserved_special_token_242|>",
2301
+ "<|reserved_special_token_243|>",
2302
+ "<|reserved_special_token_244|>",
2303
+ "<|reserved_special_token_245|>",
2304
+ "<|reserved_special_token_246|>",
2305
+ "<|reserved_special_token_247|>",
2306
+ "<|reserved_special_token_248|>",
2307
+ "<|reserved_special_token_249|>",
2308
+ "<|reserved_special_token_250|>"
2309
+ ],
2310
  "bos_token": "<|begin_of_text|>",
2311
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2312
  "clean_up_tokenization_spaces": true,
2313
+ "eos_token": "<|eot_id|>",
2314
  "model_input_names": [
2315
  "input_ids",
2316
  "attention_mask"
2317
  ],
2318
  "model_max_length": 1000000000000000019884624838656,
2319
+ "pad_token": "<|end_of_text|>",
2320
  "tokenizer_class": "PreTrainedTokenizerFast"
2321
  }