Ba2han commited on
Commit
9a37f01
·
verified ·
1 Parent(s): 12d2055

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show
  1. chat_template.jinja +49 -328
  2. processor_config.json +28 -64
  3. tokenizer.json +2 -2
  4. tokenizer_config.json +23 -92
chat_template.jinja CHANGED
@@ -1,344 +1,65 @@
1
- {%- macro format_parameters(properties, required) -%}
2
- {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
- {%- set ns = namespace(found_first=false) -%}
4
- {%- for key, value in properties | dictsort -%}
5
- {%- set add_comma = false -%}
6
- {%- if key not in standard_keys -%}
7
- {%- if ns.found_first %},{% endif -%}
8
- {%- set ns.found_first = true -%}
9
- {{ key }}:{
10
- {%- if value['description'] -%}
11
- description:<|"|>{{ value['description'] }}<|"|>
12
- {%- set add_comma = true -%}
13
- {%- endif -%}
14
- {%- if value['type'] | upper == 'STRING' -%}
15
- {%- if value['enum'] -%}
16
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
17
- enum:{{ format_argument(value['enum']) }}
18
- {%- endif -%}
19
- {%- elif value['type'] | upper == 'ARRAY' -%}
20
- {%- if value['items'] is mapping and value['items'] -%}
21
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
22
- items:{
23
- {%- set ns_items = namespace(found_first=false) -%}
24
- {%- for item_key, item_value in value['items'] | dictsort -%}
25
- {%- if item_value is not none -%}
26
- {%- if ns_items.found_first %},{% endif -%}
27
- {%- set ns_items.found_first = true -%}
28
- {%- if item_key == 'properties' -%}
29
- properties:{
30
- {%- if item_value is mapping -%}
31
- {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
32
- {%- endif -%}
33
- }
34
- {%- elif item_key == 'required' -%}
35
- required:[
36
- {%- for req_item in item_value -%}
37
- <|"|>{{- req_item -}}<|"|>
38
- {%- if not loop.last %},{% endif -%}
39
- {%- endfor -%}
40
- ]
41
- {%- elif item_key == 'type' -%}
42
- {%- if item_value is string -%}
43
- type:{{ format_argument(item_value | upper) }}
44
- {%- else -%}
45
- type:{{ format_argument(item_value | map('upper') | list) }}
46
- {%- endif -%}
47
- {%- else -%}
48
- {{ item_key }}:{{ format_argument(item_value) }}
49
- {%- endif -%}
50
- {%- endif -%}
51
- {%- endfor -%}
52
- }
53
- {%- endif -%}
54
- {%- endif -%}
55
- {%- if value['nullable'] %}
56
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
57
- nullable:true
58
- {%- endif -%}
59
- {%- if value['type'] | upper == 'OBJECT' -%}
60
- {%- if value['properties'] is defined and value['properties'] is mapping -%}
61
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
62
- properties:{
63
- {{- format_parameters(value['properties'], value['required'] | default([])) -}}
64
- }
65
- {%- elif value is mapping -%}
66
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
- properties:{
68
- {{- format_parameters(value, value['required'] | default([])) -}}
69
- }
70
- {%- endif -%}
71
- {%- if value['required'] -%}
72
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
73
- required:[
74
- {%- for item in value['required'] | default([]) -%}
75
- <|"|>{{- item -}}<|"|>
76
- {%- if not loop.last %},{% endif -%}
77
- {%- endfor -%}
78
- ]
79
- {%- endif -%}
80
- {%- endif -%}
81
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
82
- type:<|"|>{{ value['type'] | upper }}<|"|>}
83
- {%- endif -%}
84
- {%- endfor -%}
85
- {%- endmacro -%}
86
- {%- macro format_function_declaration(tool_data) -%}
87
- declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
88
- {%- set params = tool_data['function']['parameters'] -%}
89
- {%- if params -%}
90
- ,parameters:{
91
- {%- if params['properties'] -%}
92
- properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
93
- {%- endif -%}
94
- {%- if params['required'] -%}
95
- required:[
96
- {%- for item in params['required'] -%}
97
- <|"|>{{- item -}}<|"|>
98
- {{- ',' if not loop.last -}}
99
- {%- endfor -%}
100
- ],
101
- {%- endif -%}
102
- {%- if params['type'] -%}
103
- type:<|"|>{{- params['type'] | upper -}}<|"|>}
104
- {%- endif -%}
105
- {%- endif -%}
106
- {%- if 'response' in tool_data['function'] -%}
107
- {%- set response_declaration = tool_data['function']['response'] -%}
108
- ,response:{
109
- {%- if response_declaration['description'] -%}
110
- description:<|"|>{{- response_declaration['description'] -}}<|"|>,
111
- {%- endif -%}
112
- {%- if response_declaration['type'] | upper == 'OBJECT' -%}
113
- type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
114
- {%- endif -%}
115
- {%- endif -%}
116
- }
117
- {%- endmacro -%}
118
- {%- macro format_argument(argument, escape_keys=True) -%}
119
- {%- if argument is string -%}
120
- {{- '<|"|>' + argument + '<|"|>' -}}
121
- {%- elif argument is boolean -%}
122
- {{- 'true' if argument else 'false' -}}
123
- {%- elif argument is mapping -%}
124
- {{- '{' -}}
125
- {%- set ns = namespace(found_first=false) -%}
126
- {%- for key, value in argument | dictsort -%}
127
- {%- if ns.found_first %},{% endif -%}
128
- {%- set ns.found_first = true -%}
129
- {%- if escape_keys -%}
130
- {{- '<|"|>' + key + '<|"|>' -}}
131
- {%- else -%}
132
- {{- key -}}
133
  {%- endif -%}
134
- :{{- format_argument(value, escape_keys=escape_keys) -}}
135
- {%- endfor -%}
136
- {{- '}' -}}
137
- {%- elif argument is sequence -%}
138
- {{- '[' -}}
139
- {%- for item in argument -%}
140
- {{- format_argument(item, escape_keys=escape_keys) -}}
141
- {%- if not loop.last %},{% endif -%}
142
  {%- endfor -%}
143
- {{- ']' -}}
144
  {%- else -%}
145
- {{- argument -}}
146
  {%- endif -%}
147
- {%- endmacro -%}
148
- {%- macro strip_thinking(text) -%}
149
- {%- set ns = namespace(result='') -%}
150
- {%- for part in text.split('<channel|>') -%}
151
- {%- if '<|channel>' in part -%}
152
- {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
153
- {%- else -%}
154
- {%- set ns.result = ns.result + part -%}
 
 
 
155
  {%- endif -%}
156
  {%- endfor -%}
157
- {{- ns.result | trim -}}
158
- {%- endmacro -%}
159
-
160
- {%- macro format_tool_response_block(tool_name, response) -%}
161
- {{- '<|tool_response>' -}}
162
- {%- if response is mapping -%}
163
- {{- 'response:' + tool_name + '{' -}}
164
- {%- for key, value in response | dictsort -%}
165
- {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
166
- {%- if not loop.last %},{% endif -%}
167
- {%- endfor -%}
168
- {{- '}' -}}
169
- {%- else -%}
170
- {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
171
- {%- endif -%}
172
- {{- '<tool_response|>' -}}
173
- {%- endmacro -%}
174
-
175
- {%- set ns = namespace(prev_message_type=None) -%}
176
- {%- set loop_messages = messages -%}
177
- {{- bos_token -}}
178
- {#- Handle System/Tool Definitions Block -#}
179
- {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
- {{- '<|turn>system\n' -}}
181
-
182
- {#- Inject Thinking token at the very top of the FIRST system turn -#}
183
- {%- if enable_thinking is defined and enable_thinking -%}
184
- {{- '<|think|>\n' -}}
185
- {%- set ns.prev_message_type = 'think' -%}
186
- {%- endif -%}
187
-
188
- {%- if messages[0]['role'] in ['system', 'developer'] -%}
189
- {{- messages[0]['content'] | trim -}}
190
- {%- set loop_messages = messages[1:] -%}
191
- {%- endif -%}
192
-
193
- {%- if tools -%}
194
- {%- for tool in tools %}
195
- {{- '<|tool>' -}}
196
- {{- format_function_declaration(tool) | trim -}}
197
- {{- '<tool|>' -}}
198
- {%- endfor %}
199
- {%- set ns.prev_message_type = 'tool' -%}
200
- {%- endif -%}
201
-
202
- {{- '<turn|>\n' -}}
203
- {%- endif %}
204
-
205
- {#- Pre-scan: find last user message index for reasoning guard -#}
206
- {%- set ns_turn = namespace(last_user_idx=-1) -%}
207
- {%- for i in range(loop_messages | length) -%}
208
- {%- if loop_messages[i]['role'] == 'user' -%}
209
- {%- set ns_turn.last_user_idx = i -%}
210
  {%- endif -%}
211
  {%- endfor -%}
212
-
213
- {#- Loop through messages -#}
214
- {%- for message in loop_messages -%}
215
- {%- if message['role'] != 'tool' -%}
216
- {%- set ns.prev_message_type = None -%}
217
- {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
218
- {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
219
- {%- set prev_nt = namespace(role=None, found=false) -%}
220
- {%- if loop.index0 > 0 -%}
221
- {%- for j in range(loop.index0 - 1, -1, -1) -%}
222
- {%- if not prev_nt.found -%}
223
- {%- if loop_messages[j]['role'] != 'tool' -%}
224
- {%- set prev_nt.role = loop_messages[j]['role'] -%}
225
- {%- set prev_nt.found = true -%}
226
- {%- endif -%}
227
  {%- endif -%}
228
  {%- endfor -%}
 
 
 
229
  {%- endif -%}
230
- {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
231
- {%- if not continue_same_model_turn -%}
232
- {{- '<|turn>' + role + '\n' }}
233
- {%- endif -%}
234
-
235
- {#- Render reasoning/reasoning_content as thinking channel -#}
236
- {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
237
- {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
238
- {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
239
- {%- endif -%}
240
-
241
- {%- if message['tool_calls'] -%}
242
- {%- for tool_call in message['tool_calls'] -%}
243
- {%- set function = tool_call['function'] -%}
244
- {{- '<|tool_call>call:' + function['name'] + '{' -}}
245
- {%- if function['arguments'] is mapping -%}
246
- {%- set ns_args = namespace(found_first=false) -%}
247
- {%- for key, value in function['arguments'] | dictsort -%}
248
- {%- if ns_args.found_first %},{% endif -%}
249
- {%- set ns_args.found_first = true -%}
250
- {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
251
- {%- endfor -%}
252
- {%- elif function['arguments'] is string -%}
253
- {{- function['arguments'] -}}
254
- {%- endif -%}
255
- {{- '}<tool_call|>' -}}
256
- {%- endfor -%}
257
- {%- set ns.prev_message_type = 'tool_call' -%}
258
- {%- endif -%}
259
-
260
- {%- set ns_tr_out = namespace(flag=false) -%}
261
- {%- if message.get('tool_responses') -%}
262
- {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
263
- {%- for tool_response in message['tool_responses'] -%}
264
- {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
265
- {%- set ns_tr_out.flag = true -%}
266
- {%- set ns.prev_message_type = 'tool_response' -%}
267
- {%- endfor -%}
268
- {%- elif message.get('tool_calls') -%}
269
- {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
270
- {%- set ns_tool_scan = namespace(stopped=false) -%}
271
- {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
272
- {%- if ns_tool_scan.stopped -%}
273
- {%- elif loop_messages[k]['role'] != 'tool' -%}
274
- {%- set ns_tool_scan.stopped = true -%}
275
- {%- else -%}
276
- {%- set follow = loop_messages[k] -%}
277
- {#- Resolve tool_call_id to function name -#}
278
- {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
279
- {%- for tc in message['tool_calls'] -%}
280
- {%- if tc.get('id') == follow.get('tool_call_id') -%}
281
- {%- set ns_tname.name = tc['function']['name'] -%}
282
- {%- endif -%}
283
- {%- endfor -%}
284
- {#- Handle content as string or content-parts array -#}
285
- {%- set tool_body = follow.get('content') -%}
286
- {%- if tool_body is string -%}
287
- {{- format_tool_response_block(ns_tname.name, tool_body) -}}
288
- {%- elif tool_body is sequence and tool_body is not string -%}
289
- {%- set ns_txt = namespace(s='') -%}
290
- {%- for part in tool_body -%}
291
- {%- if part.get('type') == 'text' -%}
292
- {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
293
- {%- endif -%}
294
- {%- endfor -%}
295
- {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
296
- {%- else -%}
297
- {{- format_tool_response_block(ns_tname.name, tool_body) -}}
298
- {%- endif -%}
299
- {%- set ns_tr_out.flag = true -%}
300
- {%- set ns.prev_message_type = 'tool_response' -%}
301
- {%- endif -%}
302
- {%- endfor -%}
303
- {%- endif -%}
304
-
305
- {%- if message['content'] is string -%}
306
- {%- if role == 'model' -%}
307
- {{- strip_thinking(message['content']) -}}
308
- {%- else -%}
309
- {{- message['content'] | trim -}}
310
- {%- endif -%}
311
- {%- elif message['content'] is sequence -%}
312
- {%- for item in message['content'] -%}
313
- {%- if item['type'] == 'text' -%}
314
- {%- if role == 'model' -%}
315
- {{- strip_thinking(item['text']) -}}
316
- {%- else -%}
317
- {{- item['text'] | trim -}}
318
- {%- endif -%}
319
- {%- elif item['type'] == 'image' -%}
320
- {{- '<|image|>' -}}
321
- {%- set ns.prev_message_type = 'image' -%}
322
- {%- elif item['type'] == 'audio' -%}
323
- {{- '<|audio|>' -}}
324
- {%- set ns.prev_message_type = 'audio' -%}
325
- {%- elif item['type'] == 'video' -%}
326
- {{- '<|video|>' -}}
327
- {%- set ns.prev_message_type = 'video' -%}
328
- {%- endif -%}
329
- {%- endfor -%}
330
- {%- endif -%}
331
-
332
- {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
333
- {{- '<|tool_response>' -}}
334
- {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
335
- {{- '<turn|>\n' -}}
336
  {%- endif -%}
337
  {%- endif -%}
 
338
  {%- endfor -%}
339
-
340
  {%- if add_generation_prompt -%}
341
- {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
- {{- '<|turn>model\n' -}}
343
- {%- endif -%}
344
  {%- endif -%}
 
1
+ {{- bos_token -}}
2
+ {%- set keep_past_thinking = keep_past_thinking | default(false) -%}
3
+ {%- set ns = namespace(system_prompt="") -%}
4
+ {%- if messages[0]["role"] == "system" -%}
5
+ {%- set sys_content = messages[0]["content"] -%}
6
+ {%- if sys_content is not string -%}
7
+ {%- for item in sys_content -%}
8
+ {%- if item["type"] == "text" -%}
9
+ {%- set ns.system_prompt = ns.system_prompt + item["text"] -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {%- endif -%}
 
 
 
 
 
 
 
 
11
  {%- endfor -%}
 
12
  {%- else -%}
13
+ {%- set ns.system_prompt = sys_content -%}
14
  {%- endif -%}
15
+ {%- set messages = messages[1:] -%}
16
+ {%- endif -%}
17
+ {%- if tools -%}
18
+ {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
19
+ {%- for tool in tools -%}
20
+ {%- if tool is not string -%}
21
+ {%- set tool = tool | tojson -%}
22
+ {%- endif -%}
23
+ {%- set ns.system_prompt = ns.system_prompt + tool -%}
24
+ {%- if not loop.last -%}
25
+ {%- set ns.system_prompt = ns.system_prompt + ", " -%}
26
  {%- endif -%}
27
  {%- endfor -%}
28
+ {%- set ns.system_prompt = ns.system_prompt + "]" -%}
29
+ {%- endif -%}
30
+ {%- if ns.system_prompt -%}
31
+ {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
32
+ {%- endif -%}
33
+ {%- set ns.last_assistant_index = -1 -%}
34
+ {%- for message in messages -%}
35
+ {%- if message["role"] == "assistant" -%}
36
+ {%- set ns.last_assistant_index = loop.index0 -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  {%- endif -%}
38
  {%- endfor -%}
39
+ {%- for message in messages -%}
40
+ {{- "<|im_start|>" + message["role"] + "\n" -}}
41
+ {%- if message["content"] is not string -%}
42
+ {%- set ns.content = "" -%}
43
+ {%- for item in message["content"] -%}
44
+ {%- if item["type"] == "image" -%}
45
+ {%- set ns.content = ns.content + "<image>" -%}
46
+ {%- elif item["type"] == "text" -%}
47
+ {%- set ns.content = ns.content + item["text"] -%}
48
+ {%- else -%}
49
+ {%- set ns.content = ns.content + item | tojson -%}
 
 
 
 
50
  {%- endif -%}
51
  {%- endfor -%}
52
+ {%- set content = ns.content -%}
53
+ {%- else -%}
54
+ {%- set content = message["content"] -%}
55
  {%- endif -%}
56
+ {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
57
+ {%- if "</think>" in content -%}
58
+ {%- set content = content.split("</think>")[-1] | trim -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  {%- endif -%}
60
  {%- endif -%}
61
+ {{- content + "<|im_end|>\n" -}}
62
  {%- endfor -%}
 
63
  {%- if add_generation_prompt -%}
64
+ {{- "<|im_start|>assistant\n" -}}
 
 
65
  {%- endif -%}
processor_config.json CHANGED
@@ -1,75 +1,39 @@
1
  {
2
- "audio_ms_per_token": 40,
3
- "audio_seq_length": 750,
4
- "feature_extractor": {
5
- "dither": 0.0,
6
- "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
- "feature_size": 128,
8
- "fft_length": 512,
9
- "fft_overdrive": false,
10
- "frame_length": 320,
11
- "hop_length": 160,
12
- "input_scale_factor": 1.0,
13
- "max_frequency": 8000.0,
14
- "mel_floor": 0.001,
15
- "min_frequency": 0.0,
16
- "padding_side": "right",
17
- "padding_value": 0.0,
18
- "per_bin_mean": null,
19
- "per_bin_stddev": null,
20
- "preemphasis": 0.0,
21
- "preemphasis_htk_flavor": true,
22
- "return_attention_mask": true,
23
- "sampling_rate": 16000
24
- },
25
  "image_processor": {
26
- "do_convert_rgb": true,
27
- "do_normalize": false,
28
- "do_rescale": true,
29
- "do_resize": true,
30
- "image_mean": [
31
- 0.0,
32
- 0.0,
33
- 0.0
34
- ],
35
- "image_processor_type": "Gemma4ImageProcessor",
36
- "image_seq_length": 280,
37
- "image_std": [
38
- 1.0,
39
- 1.0,
40
- 1.0
41
- ],
42
- "max_soft_tokens": 280,
43
- "patch_size": 16,
44
- "pooling_kernel_size": 3,
45
- "resample": 3,
46
- "rescale_factor": 0.00392156862745098
47
- },
48
- "image_seq_length": 280,
49
- "processor_class": "Gemma4Processor",
50
- "video_processor": {
51
- "do_convert_rgb": true,
52
  "do_normalize": true,
 
53
  "do_rescale": true,
54
  "do_resize": true,
55
- "do_sample_frames": true,
 
56
  "image_mean": [
57
- 0.0,
58
- 0.0,
59
- 0.0
60
  ],
 
61
  "image_std": [
62
- 1.0,
63
- 1.0,
64
- 1.0
65
  ],
66
- "max_soft_tokens": 70,
67
- "num_frames": 32,
68
- "patch_size": 16,
69
- "pooling_kernel_size": 3,
70
- "resample": 3,
 
 
71
  "rescale_factor": 0.00392156862745098,
72
- "return_metadata": false,
73
- "video_processor_type": "Gemma4VideoProcessor"
74
- }
 
 
 
 
 
 
75
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_image_splitting": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "do_normalize": true,
6
+ "do_pad": true,
7
  "do_rescale": true,
8
  "do_resize": true,
9
+ "downsample_factor": 2,
10
+ "encoder_patch_size": 16,
11
  "image_mean": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
  ],
16
+ "image_processor_type": "Lfm2VlImageProcessor",
17
  "image_std": [
18
+ 0.5,
19
+ 0.5,
20
+ 0.5
21
  ],
22
+ "max_image_tokens": 256,
23
+ "max_num_patches": 1024,
24
+ "max_pixels_tolerance": 2.0,
25
+ "max_tiles": 10,
26
+ "min_image_tokens": 64,
27
+ "min_tiles": 2,
28
+ "resample": 2,
29
  "rescale_factor": 0.00392156862745098,
30
+ "return_row_col_info": true,
31
+ "size": {
32
+ "height": 512,
33
+ "width": 512
34
+ },
35
+ "tile_size": 512,
36
+ "use_thumbnail": true
37
+ },
38
+ "processor_class": "Lfm2VlProcessor"
39
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
- size 32169626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3910942aa907c48b0cc20ec426ee38bfa8dcda8feecf035ced981918cb30f14
3
+ size 4733040
tokenizer_config.json CHANGED
@@ -1,96 +1,27 @@
1
  {
2
- "audio_token": "<|audio|>",
3
  "backend": "tokenizers",
4
- "boa_token": "<|audio>",
5
- "boi_token": "<|image>",
6
- "bos_token": "<bos>",
7
- "eoa_token": "<audio|>",
8
- "eoc_token": "<channel|>",
9
- "eoi_token": "<image|>",
10
- "eos_token": "<turn|>",
11
- "eot_token": "<turn|>",
12
- "escape_token": "<|\"|>",
13
- "etc_token": "<tool_call|>",
14
- "etd_token": "<tool|>",
15
- "etr_token": "<tool_response|>",
16
- "extra_special_tokens": [
17
- "<|video|>"
18
- ],
19
- "image_token": "<|image|>",
20
- "is_local": true,
21
- "mask_token": "<mask>",
22
- "model_max_length": 131072,
23
  "model_specific_special_tokens": {
24
- "audio_token": "<|audio|>",
25
- "boa_token": "<|audio>",
26
- "boi_token": "<|image>",
27
- "eoa_token": "<audio|>",
28
- "eoc_token": "<channel|>",
29
- "eoi_token": "<image|>",
30
- "eot_token": "<turn|>",
31
- "escape_token": "<|\"|>",
32
- "etc_token": "<tool_call|>",
33
- "etd_token": "<tool|>",
34
- "etr_token": "<tool_response|>",
35
- "image_token": "<|image|>",
36
- "soc_token": "<|channel>",
37
- "sot_token": "<|turn>",
38
- "stc_token": "<|tool_call>",
39
- "std_token": "<|tool>",
40
- "str_token": "<|tool_response>",
41
- "think_token": "<|think|>"
42
  },
43
- "pad_token": "<pad>",
44
- "padding_side": "left",
45
- "processor_class": "Gemma4Processor",
46
- "response_schema": {
47
- "properties": {
48
- "content": {
49
- "type": "string"
50
- },
51
- "role": {
52
- "const": "assistant"
53
- },
54
- "thinking": {
55
- "type": "string"
56
- },
57
- "tool_calls": {
58
- "items": {
59
- "properties": {
60
- "function": {
61
- "properties": {
62
- "arguments": {
63
- "additionalProperties": {},
64
- "type": "object",
65
- "x-parser": "gemma4-tool-call"
66
- },
67
- "name": {
68
- "type": "string"
69
- }
70
- },
71
- "type": "object",
72
- "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
73
- },
74
- "type": {
75
- "const": "function"
76
- }
77
- },
78
- "type": "object"
79
- },
80
- "type": "array",
81
- "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
82
- }
83
- },
84
- "type": "object",
85
- "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
86
- },
87
- "soc_token": "<|channel>",
88
- "sot_token": "<|turn>",
89
- "stc_token": "<|tool_call>",
90
- "std_token": "<|tool>",
91
- "str_token": "<|tool_response>",
92
- "think_token": "<|think|>",
93
- "tokenizer_class": "GemmaTokenizer",
94
- "unk_token": "<unk>",
95
- "chat_template": "{%- macro format_parameters(properties, required) -%}\n {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}\n {%- set ns = namespace(found_first=false) -%}\n {%- for key, value in properties | dictsort -%}\n {%- set add_comma = false -%}\n {%- if key not in standard_keys -%}\n {%- if ns.found_first %},{% endif -%}\n {%- set ns.found_first = true -%}\n {{ key }}:{\n {%- if value['description'] -%}\n description:<|\"|>{{ value['description'] }}<|\"|>\n {%- set add_comma = true -%}\n {%- endif -%}\n {%- if value['type'] | upper == 'STRING' -%}\n {%- if value['enum'] -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n enum:{{ format_argument(value['enum']) }}\n {%- endif -%}\n {%- elif value['type'] | upper == 'ARRAY' -%}\n {%- if value['items'] is mapping and value['items'] -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n items:{\n {%- set ns_items = namespace(found_first=false) -%}\n {%- for item_key, item_value in value['items'] | dictsort -%}\n {%- if item_value is not none -%}\n {%- if ns_items.found_first %},{% endif -%}\n {%- set ns_items.found_first = true -%}\n {%- if item_key == 'properties' -%}\n properties:{\n {%- if item_value is mapping -%}\n {{- format_parameters(item_value, value['items']['required'] | default([])) -}}\n {%- endif -%}\n }\n {%- elif item_key == 'required' -%}\n required:[\n {%- for req_item in item_value -%}\n <|\"|>{{- req_item -}}<|\"|>\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n ]\n {%- elif item_key == 'type' -%}\n {%- if item_value is string -%}\n type:{{ format_argument(item_value | upper) }}\n {%- else -%}\n type:{{ format_argument(item_value | map('upper') | list) }}\n {%- endif -%}\n {%- else -%}\n {{ item_key }}:{{ format_argument(item_value) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n }\n {%- endif -%}\n {%- endif -%}\n {%- if value['nullable'] %}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n nullable:true\n {%- endif -%}\n {%- if value['type'] | upper == 'OBJECT' -%}\n {%- if value['properties'] is defined and value['properties'] is mapping -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n properties:{\n {{- format_parameters(value['properties'], value['required'] | default([])) -}}\n }\n {%- elif value is mapping -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n properties:{\n {{- format_parameters(value, value['required'] | default([])) -}}\n }\n {%- endif -%}\n {%- if value['required'] -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n required:[\n {%- for item in value['required'] | default([]) -%}\n <|\"|>{{- item -}}<|\"|>\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n ]\n {%- endif -%}\n {%- endif -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n type:<|\"|>{{ value['type'] | upper }}<|\"|>}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n{%- macro format_function_declaration(tool_data) -%}\n declaration:{{- tool_data['function']['name'] -}}{description:<|\"|>{{- tool_data['function']['description'] -}}<|\"|>\n {%- set params = tool_data['function']['parameters'] -%}\n {%- if params -%}\n ,parameters:{\n {%- if params['properties'] -%}\n properties:{ {{- format_parameters(params['properties'], params['required']) -}} },\n {%- endif -%}\n {%- if params['required'] -%}\n required:[\n {%- for item in params['required'] -%}\n <|\"|>{{- item -}}<|\"|>\n {{- ',' if not loop.last -}}\n {%- endfor -%}\n ],\n {%- endif -%}\n {%- if params['type'] -%}\n type:<|\"|>{{- params['type'] | upper -}}<|\"|>}\n {%- endif -%}\n {%- endif -%}\n {%- if 'response' in tool_data['function'] -%}\n {%- set response_declaration = tool_data['function']['response'] -%}\n ,response:{\n {%- if response_declaration['description'] -%}\n description:<|\"|>{{- response_declaration['description'] -}}<|\"|>,\n {%- endif -%}\n {%- if response_declaration['type'] | upper == 'OBJECT' -%}\n type:<|\"|>{{- response_declaration['type'] | upper -}}<|\"|>}\n {%- endif -%}\n {%- endif -%}\n }\n{%- endmacro -%}\n{%- macro format_argument(argument, escape_keys=True) -%}\n {%- if argument is string -%}\n {{- '<|\"|>' + argument + '<|\"|>' -}}\n {%- elif argument is boolean -%}\n {{- 'true' if argument else 'false' -}}\n {%- elif argument is mapping -%}\n {{- '{' -}}\n {%- set ns = namespace(found_first=false) -%}\n {%- for key, value in argument | dictsort -%}\n {%- if ns.found_first %},{% endif -%}\n {%- set ns.found_first = true -%}\n {%- if escape_keys -%}\n {{- '<|\"|>' + key + '<|\"|>' -}}\n {%- else -%}\n {{- key -}}\n {%- endif -%}\n :{{- format_argument(value, escape_keys=escape_keys) -}}\n {%- endfor -%}\n {{- '}' -}}\n {%- elif argument is sequence -%}\n {{- '[' -}}\n {%- for item in argument -%}\n {{- format_argument(item, escape_keys=escape_keys) -}}\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n {{- ']' -}}\n {%- else -%}\n {{- argument -}}\n {%- endif -%}\n{%- endmacro -%}\n{%- macro strip_thinking(text) -%}\n {%- set ns = namespace(result='') -%}\n {%- for part in text.split('<channel|>') -%}\n {%- if '<|channel>' in part -%}\n {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}\n {%- else -%}\n {%- set ns.result = ns.result + part -%}\n {%- endif -%}\n {%- endfor -%}\n {{- ns.result | trim -}}\n{%- endmacro -%}\n\n{%- macro format_tool_response_block(tool_name, response) -%}\n {{- '<|tool_response>' -}}\n {%- if response is mapping -%}\n {{- 'response:' + tool_name + '{' -}}\n {%- for key, value in response | dictsort -%}\n {{- key -}}:{{- format_argument(value, escape_keys=False) -}}\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n {{- '}' -}}\n {%- else -%}\n {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}\n {%- endif -%}\n {{- '<tool_response|>' -}}\n{%- endmacro -%}\n\n{%- set ns = namespace(prev_message_type=None) -%}\n{%- set loop_messages = messages -%}\n{{- bos_token -}}\n{#- Handle System/Tool Definitions Block -#}\n{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}\n {{- '<|turn>system\\n' -}}\n\n {#- Inject Thinking token at the very top of the FIRST system turn -#}\n {%- if enable_thinking is defined and enable_thinking -%}\n {{- '<|think|>\\n' -}}\n {%- set ns.prev_message_type = 'think' -%}\n {%- endif -%}\n\n {%- if messages[0]['role'] in ['system', 'developer'] -%}\n {{- messages[0]['content'] | trim -}}\n {%- set loop_messages = messages[1:] -%}\n {%- endif -%}\n\n {%- if tools -%}\n {%- for tool in tools %}\n {{- '<|tool>' -}}\n {{- format_function_declaration(tool) | trim -}}\n {{- '<tool|>' -}}\n {%- endfor %}\n {%- set ns.prev_message_type = 'tool' -%}\n {%- endif -%}\n\n {{- '<turn|>\\n' -}}\n{%- endif %}\n\n{#- Pre-scan: find last user message index for reasoning guard -#}\n{%- set ns_turn = namespace(last_user_idx=-1) -%}\n{%- for i in range(loop_messages | length) -%}\n {%- if loop_messages[i]['role'] == 'user' -%}\n {%- set ns_turn.last_user_idx = i -%}\n {%- endif -%}\n{%- endfor -%}\n\n{#- Loop through messages -#}\n{%- for message in loop_messages -%}\n {%- if message['role'] != 'tool' -%}\n {%- set ns.prev_message_type = None -%}\n {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}\n {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}\n {%- set prev_nt = namespace(role=None, found=false) -%}\n {%- if loop.index0 > 0 -%}\n {%- for j in range(loop.index0 - 1, -1, -1) -%}\n {%- if not prev_nt.found -%}\n {%- if loop_messages[j]['role'] != 'tool' -%}\n {%- set prev_nt.role = loop_messages[j]['role'] -%}\n {%- set prev_nt.found = true -%}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}\n {%- if not continue_same_model_turn -%}\n {{- '<|turn>' + role + '\\n' }}\n {%- endif -%}\n\n {#- Render reasoning/reasoning_content as thinking channel -#}\n {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}\n {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}\n {{- '<|channel>thought\\n' + thinking_text + '\\n<channel|>' -}}\n {%- endif -%}\n\n {%- if message['tool_calls'] -%}\n {%- for tool_call in message['tool_calls'] -%}\n {%- set function = tool_call['function'] -%}\n {{- '<|tool_call>call:' + function['name'] + '{' -}}\n {%- if function['arguments'] is mapping -%}\n {%- set ns_args = namespace(found_first=false) -%}\n {%- for key, value in function['arguments'] | dictsort -%}\n {%- if ns_args.found_first %},{% endif -%}\n {%- set ns_args.found_first = true -%}\n {{- key -}}:{{- format_argument(value, escape_keys=False) -}}\n {%- endfor -%}\n {%- elif function['arguments'] is string -%}\n {{- function['arguments'] -}}\n {%- endif -%}\n {{- '}<tool_call|>' -}}\n {%- endfor -%}\n {%- set ns.prev_message_type = 'tool_call' -%}\n {%- endif -%}\n\n {%- set ns_tr_out = namespace(flag=false) -%}\n {%- if message.get('tool_responses') -%}\n {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}\n {%- for tool_response in message['tool_responses'] -%}\n {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}\n {%- set ns_tr_out.flag = true -%}\n {%- set ns.prev_message_type = 'tool_response' -%}\n {%- endfor -%}\n {%- elif message.get('tool_calls') -%}\n {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}\n {%- set ns_tool_scan = namespace(stopped=false) -%}\n {%- for k in range(loop.index0 + 1, loop_messages | length) -%}\n {%- if ns_tool_scan.stopped -%}\n {%- elif loop_messages[k]['role'] != 'tool' -%}\n {%- set ns_tool_scan.stopped = true -%}\n {%- else -%}\n {%- set follow = loop_messages[k] -%}\n {#- Resolve tool_call_id to function name -#}\n {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}\n {%- for tc in message['tool_calls'] -%}\n {%- if tc.get('id') == follow.get('tool_call_id') -%}\n {%- set ns_tname.name = tc['function']['name'] -%}\n {%- endif -%}\n {%- endfor -%}\n {#- Handle content as string or content-parts array -#}\n {%- set tool_body = follow.get('content') -%}\n {%- if tool_body is string -%}\n {{- format_tool_response_block(ns_tname.name, tool_body) -}}\n {%- elif tool_body is sequence and tool_body is not string -%}\n {%- set ns_txt = namespace(s='') -%}\n {%- for part in tool_body -%}\n {%- if part.get('type') == 'text' -%}\n {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}\n {%- endif -%}\n {%- endfor -%}\n {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}\n {%- else -%}\n {{- format_tool_response_block(ns_tname.name, tool_body) -}}\n {%- endif -%}\n {%- set ns_tr_out.flag = true -%}\n {%- set ns.prev_message_type = 'tool_response' -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n\n {%- if message['content'] is string -%}\n {%- if role == 'model' -%}\n {{- strip_thinking(message['content']) -}}\n {%- else -%}\n {{- message['content'] | trim -}}\n {%- endif -%}\n {%- elif message['content'] is sequence -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'text' -%}\n {%- if role == 'model' -%}\n {{- strip_thinking(item['text']) -}}\n {%- else -%}\n {{- item['text'] | trim -}}\n {%- endif -%}\n {%- elif item['type'] == 'image' -%}\n {{- '<|image|>' -}}\n {%- set ns.prev_message_type = 'image' -%}\n {%- elif item['type'] == 'audio' -%}\n {{- '<|audio|>' -}}\n {%- set ns.prev_message_type = 'audio' -%}\n {%- elif item['type'] == 'video' -%}\n {{- '<|video|>' -}}\n {%- set ns.prev_message_type = 'video' -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n\n {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}\n {{- '<|tool_response>' -}}\n {%- elif not (ns_tr_out.flag and not message.get('content')) -%}\n {{- '<turn|>\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n\n{%- if add_generation_prompt -%}\n {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}\n {{- '<|turn>model\\n' -}}\n {%- endif -%}\n{%- endif -%}"
96
- }
 
1
  {
 
2
  "backend": "tokenizers",
3
+ "bos_token": "<|startoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|im_end|>",
6
+ "image_end_token": "<|image_end|>",
7
+ "image_start_token": "<|image_start|>",
8
+ "image_thumbnail": "<|img_thumbnail|>",
9
+ "image_token": "<image>",
10
+ "is_local": false,
11
+ "legacy": false,
12
+ "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
 
 
13
  "model_specific_special_tokens": {
14
+ "image_end_token": "<|image_end|>",
15
+ "image_start_token": "<|image_start|>",
16
+ "image_token": "<image>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
+ "pad_token": "<|pad|>",
19
+ "padding_side": "right",
20
+ "processor_class": "Lfm2VlProcessor",
21
+ "return_token_type_ids": false,
22
+ "sp_model_kwargs": {},
23
+ "spaces_between_special_tokens": false,
24
+ "tokenizer_class": "TokenizersBackend",
25
+ "use_default_system_prompt": false,
26
+ "use_fast": true
27
+ }