mudler commited on
Commit
dac778a
·
verified ·
1 Parent(s): d772624

Upload finetuned FunctionGemma for Italian function calling

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. README.md +58 -0
  3. chat_template.jinja +279 -0
  4. checkpoint-154/chat_template.jinja +279 -0
  5. checkpoint-154/config.json +62 -0
  6. checkpoint-154/generation_config.json +15 -0
  7. checkpoint-154/model.safetensors +3 -0
  8. checkpoint-154/optimizer.pt +3 -0
  9. checkpoint-154/rng_state.pth +3 -0
  10. checkpoint-154/scheduler.pt +3 -0
  11. checkpoint-154/tokenizer.json +3 -0
  12. checkpoint-154/tokenizer_config.json +26 -0
  13. checkpoint-154/trainer_state.json +97 -0
  14. checkpoint-154/training_args.bin +3 -0
  15. checkpoint-231/chat_template.jinja +279 -0
  16. checkpoint-231/config.json +62 -0
  17. checkpoint-231/generation_config.json +15 -0
  18. checkpoint-231/model.safetensors +3 -0
  19. checkpoint-231/optimizer.pt +3 -0
  20. checkpoint-231/rng_state.pth +3 -0
  21. checkpoint-231/scheduler.pt +3 -0
  22. checkpoint-231/tokenizer.json +3 -0
  23. checkpoint-231/tokenizer_config.json +26 -0
  24. checkpoint-231/trainer_state.json +118 -0
  25. checkpoint-231/training_args.bin +3 -0
  26. checkpoint-308/chat_template.jinja +279 -0
  27. checkpoint-308/config.json +62 -0
  28. checkpoint-308/generation_config.json +15 -0
  29. checkpoint-308/model.safetensors +3 -0
  30. checkpoint-308/optimizer.pt +3 -0
  31. checkpoint-308/rng_state.pth +3 -0
  32. checkpoint-308/scheduler.pt +3 -0
  33. checkpoint-308/tokenizer.json +3 -0
  34. checkpoint-308/tokenizer_config.json +26 -0
  35. checkpoint-308/trainer_state.json +171 -0
  36. checkpoint-308/training_args.bin +3 -0
  37. checkpoint-77/chat_template.jinja +279 -0
  38. checkpoint-77/config.json +62 -0
  39. checkpoint-77/generation_config.json +15 -0
  40. checkpoint-77/model.safetensors +3 -0
  41. checkpoint-77/optimizer.pt +3 -0
  42. checkpoint-77/rng_state.pth +3 -0
  43. checkpoint-77/scheduler.pt +3 -0
  44. checkpoint-77/tokenizer.json +3 -0
  45. checkpoint-77/tokenizer_config.json +26 -0
  46. checkpoint-77/trainer_state.json +55 -0
  47. checkpoint-77/training_args.bin +3 -0
  48. config.json +62 -0
  49. generation_config.json +15 -0
  50. model.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-154/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-231/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-308/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-77/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/functiongemma-270m-it
3
+ library_name: transformers
4
+ model_name: outputs
5
+ tags:
6
+ - generated_from_trainer
7
+ - sft
8
+ - trl
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for outputs
13
+
14
+ This model is a fine-tuned version of [google/functiongemma-270m-it](https://huggingface.co/google/functiongemma-270m-it).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="None", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+
31
+
32
+
33
+
34
+ This model was trained with SFT.
35
+
36
+ ### Framework versions
37
+
38
+ - TRL: 1.0.0
39
+ - Transformers: 5.5.1
40
+ - Pytorch: 2.11.0
41
+ - Datasets: 4.8.4
42
+ - Tokenizers: 0.22.2
43
+
44
+ ## Citations
45
+
46
+
47
+
48
+ Cite TRL as:
49
+
50
+ ```bibtex
51
+ @software{vonwerra2020trl,
52
+ title = {{TRL: Transformers Reinforcement Learning}},
53
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
54
+ license = {Apache-2.0},
55
+ url = {https://github.com/huggingface/trl},
56
+ year = {2020}
57
+ }
58
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-154/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-154/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "bfloat16",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
+ "sliding_window": 512,
57
+ "tie_word_embeddings": true,
58
+ "transformers_version": "5.5.1",
59
+ "use_bidirectional_attention": false,
60
+ "use_cache": false,
61
+ "vocab_size": 262144
62
+ }
checkpoint-154/generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 1,
8
+ 50,
9
+ 106
10
+ ],
11
+ "pad_token_id": 0,
12
+ "top_k": 64,
13
+ "top_p": 0.95,
14
+ "transformers_version": "5.5.1"
15
+ }
checkpoint-154/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93fb534a4891753c3ebb1a9de98b341c468d2af48358952ad356dba5d9a6c73a
3
+ size 536223056
checkpoint-154/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6462a18da18cd52b1d0a0207bad998d42dc86e0f7df8492a865e5145bb25c45
3
+ size 1072594443
checkpoint-154/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718a0f3db00824213036a2c0441849791319b7d9cf189065873bb26a7020738e
3
+ size 14645
checkpoint-154/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e83b8ffb0782b40f36dd8317e0757ffe7f134c174b4c60d0bd74dcd9d506e7
3
+ size 1465
checkpoint-154/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d7f800b949accd7eb940bac75e642f9468e4df157403032a55bf54ed23b650
3
+ size 33384898
checkpoint-154/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-154/trainer_state.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 50,
7
+ "global_step": 154,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": 0.8290567851066589,
14
+ "epoch": 0.6568144499178982,
15
+ "grad_norm": 0.75390625,
16
+ "learning_rate": 9.388394947836278e-06,
17
+ "loss": 0.11062156677246093,
18
+ "mean_token_accuracy": 0.9778690934181213,
19
+ "num_tokens": 389732.0,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.6568144499178982,
24
+ "eval_entropy": 0.7784549756483599,
25
+ "eval_loss": 0.03614399954676628,
26
+ "eval_mean_token_accuracy": 0.9917981918756064,
27
+ "eval_num_tokens": 389732.0,
28
+ "eval_runtime": 12.9713,
29
+ "eval_samples_per_second": 23.513,
30
+ "eval_steps_per_second": 5.936,
31
+ "step": 50
32
+ },
33
+ {
34
+ "entropy": 0.7608775209834557,
35
+ "epoch": 1.3021346469622332,
36
+ "grad_norm": 0.6796875,
37
+ "learning_rate": 7.660160382576683e-06,
38
+ "loss": 0.022044627666473388,
39
+ "mean_token_accuracy": 0.9940849557784374,
40
+ "num_tokens": 771178.0,
41
+ "step": 100
42
+ },
43
+ {
44
+ "epoch": 1.3021346469622332,
45
+ "eval_entropy": 0.7180899474527929,
46
+ "eval_loss": 0.019734159111976624,
47
+ "eval_mean_token_accuracy": 0.9949120081864394,
48
+ "eval_num_tokens": 771178.0,
49
+ "eval_runtime": 13.0354,
50
+ "eval_samples_per_second": 23.398,
51
+ "eval_steps_per_second": 5.907,
52
+ "step": 100
53
+ },
54
+ {
55
+ "entropy": 0.7120695394277573,
56
+ "epoch": 1.9589490968801315,
57
+ "grad_norm": 0.4453125,
58
+ "learning_rate": 5.25488887635095e-06,
59
+ "loss": 0.013736556768417358,
60
+ "mean_token_accuracy": 0.9967586588859558,
61
+ "num_tokens": 1165084.0,
62
+ "step": 150
63
+ },
64
+ {
65
+ "epoch": 1.9589490968801315,
66
+ "eval_entropy": 0.6842893903905695,
67
+ "eval_loss": 0.013124481774866581,
68
+ "eval_mean_token_accuracy": 0.9965388542645938,
69
+ "eval_num_tokens": 1165084.0,
70
+ "eval_runtime": 13.0811,
71
+ "eval_samples_per_second": 23.316,
72
+ "eval_steps_per_second": 5.886,
73
+ "step": 150
74
+ }
75
+ ],
76
+ "logging_steps": 50,
77
+ "max_steps": 308,
78
+ "num_input_tokens_seen": 0,
79
+ "num_train_epochs": 4,
80
+ "save_steps": 500,
81
+ "stateful_callbacks": {
82
+ "TrainerControl": {
83
+ "args": {
84
+ "should_epoch_stop": false,
85
+ "should_evaluate": false,
86
+ "should_log": false,
87
+ "should_save": true,
88
+ "should_training_stop": false
89
+ },
90
+ "attributes": {}
91
+ }
92
+ },
93
+ "total_flos": 1006601818940928.0,
94
+ "train_batch_size": 4,
95
+ "trial_name": null,
96
+ "trial_params": null
97
+ }
checkpoint-154/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5363ce9d99dd9dbf1bc2c5ff6c4b0ce553147005c7381cbde8798f93d5971fb
3
+ size 5649
checkpoint-231/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-231/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "bfloat16",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
+ "sliding_window": 512,
57
+ "tie_word_embeddings": true,
58
+ "transformers_version": "5.5.1",
59
+ "use_bidirectional_attention": false,
60
+ "use_cache": false,
61
+ "vocab_size": 262144
62
+ }
checkpoint-231/generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 1,
8
+ 50,
9
+ 106
10
+ ],
11
+ "pad_token_id": 0,
12
+ "top_k": 64,
13
+ "top_p": 0.95,
14
+ "transformers_version": "5.5.1"
15
+ }
checkpoint-231/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b9632e5cf5540c78cd35b1e0e220c54e92aff80343623998b0b23885f9b141
3
+ size 536223056
checkpoint-231/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea1cca6a4544a5b70c2fd53427f81667a461bda7b06d55017ebe5de993aa40c
3
+ size 1072594443
checkpoint-231/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f196323d7423b60f8e4ceb7dbf8715ee326c0d068e5ff164f13c63b279b9f1a0
3
+ size 14645
checkpoint-231/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c2b23c7e4352465f050c1d63ce488c1582e84995535f53d01e1408547e53ea
3
+ size 1465
checkpoint-231/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d7f800b949accd7eb940bac75e642f9468e4df157403032a55bf54ed23b650
3
+ size 33384898
checkpoint-231/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-231/trainer_state.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 50,
7
+ "global_step": 231,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": 0.8290567851066589,
14
+ "epoch": 0.6568144499178982,
15
+ "grad_norm": 0.75390625,
16
+ "learning_rate": 9.388394947836278e-06,
17
+ "loss": 0.11062156677246093,
18
+ "mean_token_accuracy": 0.9778690934181213,
19
+ "num_tokens": 389732.0,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.6568144499178982,
24
+ "eval_entropy": 0.7784549756483599,
25
+ "eval_loss": 0.03614399954676628,
26
+ "eval_mean_token_accuracy": 0.9917981918756064,
27
+ "eval_num_tokens": 389732.0,
28
+ "eval_runtime": 12.9713,
29
+ "eval_samples_per_second": 23.513,
30
+ "eval_steps_per_second": 5.936,
31
+ "step": 50
32
+ },
33
+ {
34
+ "entropy": 0.7608775209834557,
35
+ "epoch": 1.3021346469622332,
36
+ "grad_norm": 0.6796875,
37
+ "learning_rate": 7.660160382576683e-06,
38
+ "loss": 0.022044627666473388,
39
+ "mean_token_accuracy": 0.9940849557784374,
40
+ "num_tokens": 771178.0,
41
+ "step": 100
42
+ },
43
+ {
44
+ "epoch": 1.3021346469622332,
45
+ "eval_entropy": 0.7180899474527929,
46
+ "eval_loss": 0.019734159111976624,
47
+ "eval_mean_token_accuracy": 0.9949120081864394,
48
+ "eval_num_tokens": 771178.0,
49
+ "eval_runtime": 13.0354,
50
+ "eval_samples_per_second": 23.398,
51
+ "eval_steps_per_second": 5.907,
52
+ "step": 100
53
+ },
54
+ {
55
+ "entropy": 0.7120695394277573,
56
+ "epoch": 1.9589490968801315,
57
+ "grad_norm": 0.4453125,
58
+ "learning_rate": 5.25488887635095e-06,
59
+ "loss": 0.013736556768417358,
60
+ "mean_token_accuracy": 0.9967586588859558,
61
+ "num_tokens": 1165084.0,
62
+ "step": 150
63
+ },
64
+ {
65
+ "epoch": 1.9589490968801315,
66
+ "eval_entropy": 0.6842893903905695,
67
+ "eval_loss": 0.013124481774866581,
68
+ "eval_mean_token_accuracy": 0.9965388542645938,
69
+ "eval_num_tokens": 1165084.0,
70
+ "eval_runtime": 13.0811,
71
+ "eval_samples_per_second": 23.316,
72
+ "eval_steps_per_second": 5.886,
73
+ "step": 150
74
+ },
75
+ {
76
+ "entropy": 0.6924438694961198,
77
+ "epoch": 2.6042692939244665,
78
+ "grad_norm": 0.423828125,
79
+ "learning_rate": 2.7847456480060476e-06,
80
+ "loss": 0.008616942763328552,
81
+ "mean_token_accuracy": 0.9980061287795011,
82
+ "num_tokens": 1550848.0,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 2.6042692939244665,
87
+ "eval_entropy": 0.678199358961799,
88
+ "eval_loss": 0.012337171472609043,
89
+ "eval_mean_token_accuracy": 0.996950343831793,
90
+ "eval_num_tokens": 1550848.0,
91
+ "eval_runtime": 13.0386,
92
+ "eval_samples_per_second": 23.392,
93
+ "eval_steps_per_second": 5.906,
94
+ "step": 200
95
+ }
96
+ ],
97
+ "logging_steps": 50,
98
+ "max_steps": 308,
99
+ "num_input_tokens_seen": 0,
100
+ "num_train_epochs": 4,
101
+ "save_steps": 500,
102
+ "stateful_callbacks": {
103
+ "TrainerControl": {
104
+ "args": {
105
+ "should_epoch_stop": false,
106
+ "should_evaluate": false,
107
+ "should_log": false,
108
+ "should_save": true,
109
+ "should_training_stop": false
110
+ },
111
+ "attributes": {}
112
+ }
113
+ },
114
+ "total_flos": 1508776468555776.0,
115
+ "train_batch_size": 4,
116
+ "trial_name": null,
117
+ "trial_params": null
118
+ }
checkpoint-231/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5363ce9d99dd9dbf1bc2c5ff6c4b0ce553147005c7381cbde8798f93d5971fb
3
+ size 5649
checkpoint-308/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-308/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "bfloat16",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
+ "sliding_window": 512,
57
+ "tie_word_embeddings": true,
58
+ "transformers_version": "5.5.1",
59
+ "use_bidirectional_attention": false,
60
+ "use_cache": false,
61
+ "vocab_size": 262144
62
+ }
checkpoint-308/generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 1,
8
+ 50,
9
+ 106
10
+ ],
11
+ "pad_token_id": 0,
12
+ "top_k": 64,
13
+ "top_p": 0.95,
14
+ "transformers_version": "5.5.1"
15
+ }
checkpoint-308/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdad25a78c297aab9ec2b809949a6f0b1968f7a6a09d486343d48fe3f5c51da
3
+ size 536223056
checkpoint-308/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251ca04ead3677dedb4846271e2205f3277a2eca493a068019cb64e3e4754342
3
+ size 1072594443
checkpoint-308/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea11996454b5587fcf33ae0ab5cf14b2031bf5f53f8c2ed5a48e87de31e29c84
3
+ size 14645
checkpoint-308/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46bf8c059ac0682006a9531cce7258bd0ef62fc0ab3b4eceb1892efacaf6680b
3
+ size 1465
checkpoint-308/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d7f800b949accd7eb940bac75e642f9468e4df157403032a55bf54ed23b650
3
+ size 33384898
checkpoint-308/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-308/trainer_state.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
+ "eval_steps": 50,
7
+ "global_step": 308,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": 0.8290567851066589,
14
+ "epoch": 0.6568144499178982,
15
+ "grad_norm": 0.75390625,
16
+ "learning_rate": 9.388394947836278e-06,
17
+ "loss": 0.11062156677246093,
18
+ "mean_token_accuracy": 0.9778690934181213,
19
+ "num_tokens": 389732.0,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.6568144499178982,
24
+ "eval_entropy": 0.7784549756483599,
25
+ "eval_loss": 0.03614399954676628,
26
+ "eval_mean_token_accuracy": 0.9917981918756064,
27
+ "eval_num_tokens": 389732.0,
28
+ "eval_runtime": 12.9713,
29
+ "eval_samples_per_second": 23.513,
30
+ "eval_steps_per_second": 5.936,
31
+ "step": 50
32
+ },
33
+ {
34
+ "entropy": 0.7608775209834557,
35
+ "epoch": 1.3021346469622332,
36
+ "grad_norm": 0.6796875,
37
+ "learning_rate": 7.660160382576683e-06,
38
+ "loss": 0.022044627666473388,
39
+ "mean_token_accuracy": 0.9940849557784374,
40
+ "num_tokens": 771178.0,
41
+ "step": 100
42
+ },
43
+ {
44
+ "epoch": 1.3021346469622332,
45
+ "eval_entropy": 0.7180899474527929,
46
+ "eval_loss": 0.019734159111976624,
47
+ "eval_mean_token_accuracy": 0.9949120081864394,
48
+ "eval_num_tokens": 771178.0,
49
+ "eval_runtime": 13.0354,
50
+ "eval_samples_per_second": 23.398,
51
+ "eval_steps_per_second": 5.907,
52
+ "step": 100
53
+ },
54
+ {
55
+ "entropy": 0.7120695394277573,
56
+ "epoch": 1.9589490968801315,
57
+ "grad_norm": 0.4453125,
58
+ "learning_rate": 5.25488887635095e-06,
59
+ "loss": 0.013736556768417358,
60
+ "mean_token_accuracy": 0.9967586588859558,
61
+ "num_tokens": 1165084.0,
62
+ "step": 150
63
+ },
64
+ {
65
+ "epoch": 1.9589490968801315,
66
+ "eval_entropy": 0.6842893903905695,
67
+ "eval_loss": 0.013124481774866581,
68
+ "eval_mean_token_accuracy": 0.9965388542645938,
69
+ "eval_num_tokens": 1165084.0,
70
+ "eval_runtime": 13.0811,
71
+ "eval_samples_per_second": 23.316,
72
+ "eval_steps_per_second": 5.886,
73
+ "step": 150
74
+ },
75
+ {
76
+ "entropy": 0.6924438694961198,
77
+ "epoch": 2.6042692939244665,
78
+ "grad_norm": 0.423828125,
79
+ "learning_rate": 2.7847456480060476e-06,
80
+ "loss": 0.008616942763328552,
81
+ "mean_token_accuracy": 0.9980061287795011,
82
+ "num_tokens": 1550848.0,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 2.6042692939244665,
87
+ "eval_entropy": 0.678199358961799,
88
+ "eval_loss": 0.012337171472609043,
89
+ "eval_mean_token_accuracy": 0.996950343831793,
90
+ "eval_num_tokens": 1550848.0,
91
+ "eval_runtime": 13.0386,
92
+ "eval_samples_per_second": 23.392,
93
+ "eval_steps_per_second": 5.906,
94
+ "step": 200
95
+ },
96
+ {
97
+ "entropy": 0.6876007438312657,
98
+ "epoch": 3.2495894909688015,
99
+ "grad_norm": 0.6484375,
100
+ "learning_rate": 8.784064067287057e-07,
101
+ "loss": 0.008523799180984497,
102
+ "mean_token_accuracy": 0.9980273214915326,
103
+ "num_tokens": 1935501.0,
104
+ "step": 250
105
+ },
106
+ {
107
+ "epoch": 3.2495894909688015,
108
+ "eval_entropy": 0.6709755380432327,
109
+ "eval_loss": 0.011750386096537113,
110
+ "eval_mean_token_accuracy": 0.9970997378423616,
111
+ "eval_num_tokens": 1935501.0,
112
+ "eval_runtime": 13.0005,
113
+ "eval_samples_per_second": 23.461,
114
+ "eval_steps_per_second": 5.923,
115
+ "step": 250
116
+ },
117
+ {
118
+ "entropy": 0.6889240379631519,
119
+ "epoch": 3.9064039408866993,
120
+ "grad_norm": 0.6015625,
121
+ "learning_rate": 2.1053210266875346e-08,
122
+ "loss": 0.00826053500175476,
123
+ "mean_token_accuracy": 0.9980920545756817,
124
+ "num_tokens": 2322603.0,
125
+ "step": 300
126
+ },
127
+ {
128
+ "epoch": 3.9064039408866993,
129
+ "eval_entropy": 0.6722380197667456,
130
+ "eval_loss": 0.011939619667828083,
131
+ "eval_mean_token_accuracy": 0.996877890128594,
132
+ "eval_num_tokens": 2322603.0,
133
+ "eval_runtime": 13.2474,
134
+ "eval_samples_per_second": 23.023,
135
+ "eval_steps_per_second": 5.812,
136
+ "step": 300
137
+ },
138
+ {
139
+ "epoch": 4.0,
140
+ "eval_entropy": 0.6724205961475125,
141
+ "eval_loss": 0.01190057210624218,
142
+ "eval_mean_token_accuracy": 0.9969809620411365,
143
+ "eval_num_tokens": 2378596.0,
144
+ "eval_runtime": 13.6271,
145
+ "eval_samples_per_second": 22.382,
146
+ "eval_steps_per_second": 5.651,
147
+ "step": 308
148
+ }
149
+ ],
150
+ "logging_steps": 50,
151
+ "max_steps": 308,
152
+ "num_input_tokens_seen": 0,
153
+ "num_train_epochs": 4,
154
+ "save_steps": 500,
155
+ "stateful_callbacks": {
156
+ "TrainerControl": {
157
+ "args": {
158
+ "should_epoch_stop": false,
159
+ "should_evaluate": false,
160
+ "should_log": false,
161
+ "should_save": true,
162
+ "should_training_stop": true
163
+ },
164
+ "attributes": {}
165
+ }
166
+ },
167
+ "total_flos": 2009014625409792.0,
168
+ "train_batch_size": 4,
169
+ "trial_name": null,
170
+ "trial_params": null
171
+ }
checkpoint-308/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5363ce9d99dd9dbf1bc2c5ff6c4b0ce553147005c7381cbde8798f93d5971fb
3
+ size 5649
checkpoint-77/chat_template.jinja ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- if key not in standard_keys -%}
6
+ {%- if ns.found_first %},{% endif -%}
7
+ {%- set ns.found_first = true -%}
8
+ {{- key }}:{description:<escape>{{ value['description'] }}<escape>
9
+ {%- if value['type'] | upper == 'STRING' -%}
10
+ {%- if value['enum'] -%}
11
+ ,enum:{{ format_argument(value['enum']) }}
12
+ {%- endif -%}
13
+ {%- elif value['type'] | upper == 'OBJECT' -%}
14
+ ,properties:{
15
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
16
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
17
+ {%- elif value is mapping -%}
18
+ {{- format_parameters(value, value['required'] | default([])) -}}
19
+ {%- endif -%}
20
+ }
21
+ {%- if value['required'] -%}
22
+ ,required:[
23
+ {%- for item in value['required'] | default([]) -%}
24
+ <escape>{{- item -}}<escape>
25
+ {%- if not loop.last %},{% endif -%}
26
+ {%- endfor -%}
27
+ ]
28
+ {%- endif -%}
29
+ {%- elif value['type'] | upper == 'ARRAY' -%}
30
+ {%- if value['items'] is mapping and value['items'] -%}
31
+ ,items:{
32
+ {%- set ns_items = namespace(found_first=false) -%}
33
+ {%- for item_key, item_value in value['items'] | dictsort -%}
34
+ {%- if item_value is not none -%}
35
+ {%- if ns_items.found_first %},{% endif -%}
36
+ {%- set ns_items.found_first = true -%}
37
+ {%- if item_key == 'properties' -%}
38
+ properties:{
39
+ {%- if item_value is mapping -%}
40
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
41
+ {%- endif -%}
42
+ }
43
+ {%- elif item_key == 'required' -%}
44
+ required:[
45
+ {%- for req_item in item_value -%}
46
+ <escape>{{- req_item -}}<escape>
47
+ {%- if not loop.last %},{% endif -%}
48
+ {%- endfor -%}
49
+ ]
50
+ {%- elif item_key == 'type' -%}
51
+ {%- if item_value is string -%}
52
+ type:{{ format_argument(item_value | upper) }}
53
+ {%- else -%}
54
+ type:{{ format_argument(item_value | map('upper') | list) }}
55
+ {%- endif -%}
56
+ {%- else -%}
57
+ {{ item_key }}:{{ format_argument(item_value) }}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+ }
62
+ {%- endif -%}
63
+ {%- endif -%}
64
+ ,type:<escape>{{ value['type'] | upper }}<escape>}
65
+ {%- endif -%}
66
+ {%- endfor -%}
67
+ {%- endmacro -%}
68
+ {% macro format_function_declaration(tool_data) -%}
69
+ declaration:{{- tool_data['function']['name'] -}}
70
+ {description:<escape>{{- tool_data['function']['description'] -}}<escape>
71
+ {%- set params = tool_data['function']['parameters'] -%}
72
+ {%- if params -%}
73
+ ,parameters:{
74
+ {%- if params['properties'] -%}
75
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
76
+ {%- endif -%}
77
+ {%- if params['required'] -%}
78
+ required:[
79
+ {%- for item in params['required'] -%}
80
+ <escape>{{- item -}}<escape>
81
+ {{- ',' if not loop.last -}}
82
+ {%- endfor -%}
83
+ ],
84
+ {%- endif -%}
85
+ {%- if params['type'] -%}
86
+ type:<escape>{{- params['type'] | upper -}}<escape>}
87
+ {%- endif -%}
88
+ {%- endif -%}
89
+ }
90
+ {%- endmacro -%}
91
+ {% macro format_argument(argument, escape_keys=True) -%}
92
+ {%- if argument is string -%}
93
+ {{- '<escape>' + argument + '<escape>' -}}
94
+ {%- elif argument is boolean -%}
95
+ {%- if argument -%}
96
+ {{- 'true' -}}
97
+ {%- else -%}
98
+ {{- 'false' -}}
99
+ {%- endif -%}
100
+ {%- elif argument is mapping -%}
101
+ {{- '{' -}}
102
+ {%- set ns = namespace(found_first=false) -%}
103
+ {%- for key, value in argument | dictsort -%}
104
+ {%- if ns.found_first %},{% endif -%}
105
+ {%- set ns.found_first = true -%}
106
+ {%- if escape_keys -%}
107
+ {{- '<escape>' + key + '<escape>' -}}
108
+ {%- else -%}
109
+ {{- key -}}
110
+ {%- endif -%}
111
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
112
+ {%- endfor -%}
113
+ {{- '}' -}}
114
+ {%- elif argument is sequence -%}
115
+ {{- '[' -}}
116
+ {%- for item in argument -%}
117
+ {{- format_argument(item, escape_keys=escape_keys) -}}
118
+ {%- if not loop.last %},{% endif -%}
119
+ {%- endfor -%}
120
+ {{- ']' -}}
121
+ {%- else -%}
122
+ {{- argument -}}
123
+ {%- endif -%}
124
+ {%- endmacro -%}
125
+ {{ bos_token }}
126
+ {%- set ns = namespace(prev_message_type=None) -%}
127
+ {#- Tool Declarations -#}
128
+ {%- set loop_messages = messages -%}
129
+ {%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
130
+ {{- '<start_of_turn>developer\n' -}}
131
+ {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
132
+ {%- if messages[0]['content'] is string -%}
133
+ {{- messages[0]['content'] | trim -}}
134
+ {%- elif messages[0]['content'] is sequence -%}
135
+ {%- for item in messages[0]['content'] -%}
136
+ {%- if item['type'] == 'text' -%}
137
+ {{- item['text'] | trim -}}
138
+ {%- endif -%}
139
+ {%- endfor -%}
140
+ {%- endif -%}
141
+ {%- set loop_messages = messages[1:] -%}
142
+ {%- endif -%}
143
+ {%- if tools -%}
144
+ {%- for tool in tools %}
145
+ {{- '<start_function_declaration>' -}}
146
+ {{- format_function_declaration(tool) | trim }}
147
+ {{- '<end_function_declaration>' -}}
148
+ {%- endfor %}
149
+ {%- endif -%}
150
+ {{- '<end_of_turn>\n' }}
151
+ {%- endif %}
152
+ {#- Loop through messages. -#}
153
+ {%- for message in loop_messages -%}
154
+ {%- if (message['role'] == 'assistant') -%}
155
+ {#- Rename "assistant" to "model". -#}
156
+ {%- set role = "model" -%}
157
+ {%- else -%}
158
+ {%- set role = message['role'] -%}
159
+ {%- endif -%}
160
+ {%- if role != 'tool' -%}
161
+ {%- if ns.prev_message_type != 'tool_response' -%}
162
+ {{- '<start_of_turn>' + role + '\n' }}
163
+ {%- endif -%}
164
+ {%- set ns.prev_message_type = None -%}
165
+ {%- if 'content' in message and message['content'] is not none -%}
166
+ {%- if message['content'] is string -%}
167
+ {{ message['content'] | trim }}
168
+ {%- elif message['content'] is sequence -%}
169
+ {%- for item in message['content'] -%}
170
+ {%- if item['type'] == 'image' -%}
171
+ {{ '<start_of_image>' }}
172
+ {%- elif item['type'] == 'text' -%}
173
+ {{ item['text'] | trim }}
174
+ {%- endif -%}
175
+ {%- endfor -%}
176
+ {%- else -%}
177
+ {{ raise_exception("Invalid content type in user/assistant message") }}
178
+ {%- endif -%}
179
+ {%- set ns.prev_message_type = 'content' -%}
180
+ {%- endif -%}
181
+ {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
182
+ {#- Tool Calls -#}
183
+ {%- for tool_call in message['tool_calls'] -%}
184
+ {% set function = tool_call['function'] %}
185
+ {{- '<start_function_call>call:' + function['name'] + '{' -}}
186
+ {%- if 'arguments' in function -%}
187
+ {%- if function['arguments'] is mapping -%}
188
+ {%- set ns = namespace(found_first=false) -%}
189
+ {%- for key, value in function['arguments'] | dictsort -%}
190
+ {%- if ns.found_first %},{% endif -%}
191
+ {%- set ns.found_first = true -%}
192
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
193
+ {%- endfor -%}
194
+ {%- elif function['arguments'] is string -%}
195
+ {# This handles string-JSON, just in case #}
196
+ {{ function['arguments'] }}
197
+ {%- endif %}
198
+ {%- endif -%}
199
+ {{- '}<end_function_call>' -}}
200
+ {%- endfor -%}
201
+ {%- if loop.last -%}
202
+ {{ '<start_function_response>' }}
203
+ {%- endif -%}
204
+ {%- set ns.prev_message_type = 'tool_call' -%}
205
+ {%- endif -%}
206
+ {%- else -%}
207
+ {#- Tool Responses -#}
208
+ {%- if 'content' in message and message['content'] -%}
209
+ {%- if message['content'] is mapping -%}
210
+ {%- if 'name' in message['content'] and 'response' in message['content'] -%}
211
+ {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
212
+ {%- set response_ns = namespace(found_first=false) -%}
213
+ {%- for key, value in message['content']['response'] | dictsort -%}
214
+ {%- if response_ns.found_first %},{% endif -%}
215
+ {%- set response_ns.found_first = true -%}
216
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
217
+ {%- endfor -%}
218
+ {{- '}<end_function_response>' -}}
219
+ {%- elif 'name' in message -%}
220
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
221
+ {%- set response_ns = namespace(found_first=false) -%}
222
+ {%- for key, value in message['content'] | dictsort -%}
223
+ {%- if response_ns.found_first %},{% endif -%}
224
+ {%- set response_ns.found_first = true -%}
225
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
226
+ {%- endfor -%}
227
+ {{- '}<end_function_response>' -}}
228
+ {%- else -%}
229
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
230
+ {%- endif -%}
231
+ {%- elif message['content'] is string -%}
232
+ {%- if 'name' in message -%}
233
+ {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
234
+ {%- else -%}
235
+ {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
236
+ {%- endif -%}
237
+ {%- elif message['content'] is sequence -%}
238
+ {%- for item in message['content'] -%}
239
+ {%- if item is mapping -%}
240
+ {%- if 'name' in item and 'response' in item -%}
241
+ {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
242
+ {%- set response_ns = namespace(found_first=false) -%}
243
+ {%- for key, value in item['response'] | dictsort -%}
244
+ {%- if response_ns.found_first %},{% endif -%}
245
+ {%- set response_ns.found_first = true -%}
246
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
247
+ {%- endfor -%}
248
+ {{- '}<end_function_response>' -}}
249
+ {%- elif 'name' in message -%}
250
+ {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
251
+ {%- set response_ns = namespace(found_first=false) -%}
252
+ {%- for key, value in item | dictsort -%}
253
+ {%- if response_ns.found_first %},{% endif -%}
254
+ {%- set response_ns.found_first = true -%}
255
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
256
+ {%- endfor -%}
257
+ {{- '}<end_function_response>' -}}
258
+ {%- else -%}
259
+ {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
260
+ {%- endif -%}
261
+ {%- else -%}
262
+ {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
263
+ {%- endif -%}
264
+ {%- endfor -%}
265
+ {%- else -%}
266
+ {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
267
+ {%- endif -%}
268
+ {%- endif -%}
269
+ {%- set ns.prev_message_type = 'tool_response' -%}
270
+ {%- endif -%}
271
+ {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
272
+ {{ '<end_of_turn>\n' }}
273
+ {%- endif -%}
274
+ {%- endfor -%}
275
+ {%- if add_generation_prompt -%}
276
+ {%- if ns.prev_message_type != 'tool_response' -%}
277
+ {{- '<start_of_turn>model\n' -}}
278
+ {%- endif -%}
279
+ {%- endif -%}
checkpoint-77/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "bfloat16",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
+ "sliding_window": 512,
57
+ "tie_word_embeddings": true,
58
+ "transformers_version": "5.5.1",
59
+ "use_bidirectional_attention": false,
60
+ "use_cache": false,
61
+ "vocab_size": 262144
62
+ }
checkpoint-77/generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 1,
8
+ 50,
9
+ 106
10
+ ],
11
+ "pad_token_id": 0,
12
+ "top_k": 64,
13
+ "top_p": 0.95,
14
+ "transformers_version": "5.5.1"
15
+ }
checkpoint-77/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394256bfee05b42e5f2df4d6d70a8cc4803914120d528782a9e2906a5796970f
3
+ size 536223056
checkpoint-77/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb7d1d4bed0c5d4d8629496b7d67361f2a17dec7dd93d23d32503c6aa170495
3
+ size 1072594443
checkpoint-77/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
3
+ size 14645
checkpoint-77/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41252c4518652c4d654b361ca3ba34bc3d27477f1e95b590d2d97028117662b4
3
+ size 1465
checkpoint-77/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d7f800b949accd7eb940bac75e642f9468e4df157403032a55bf54ed23b650
3
+ size 33384898
checkpoint-77/tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
4
+ "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
7
+ "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
9
+ "is_local": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>",
16
+ "sfr_token": "<start_function_response>"
17
+ },
18
+ "pad_token": "<pad>",
19
+ "padding_side": "left",
20
+ "sfr_token": "<start_function_response>",
21
+ "sp_model_kwargs": null,
22
+ "spaces_between_special_tokens": false,
23
+ "tokenizer_class": "GemmaTokenizer",
24
+ "unk_token": "<unk>",
25
+ "use_default_system_prompt": false
26
+ }
checkpoint-77/trainer_state.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 50,
7
+ "global_step": 77,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": 0.8290567851066589,
14
+ "epoch": 0.6568144499178982,
15
+ "grad_norm": 0.75390625,
16
+ "learning_rate": 9.388394947836278e-06,
17
+ "loss": 0.11062156677246093,
18
+ "mean_token_accuracy": 0.9778690934181213,
19
+ "num_tokens": 389732.0,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.6568144499178982,
24
+ "eval_entropy": 0.7784549756483599,
25
+ "eval_loss": 0.03614399954676628,
26
+ "eval_mean_token_accuracy": 0.9917981918756064,
27
+ "eval_num_tokens": 389732.0,
28
+ "eval_runtime": 12.9713,
29
+ "eval_samples_per_second": 23.513,
30
+ "eval_steps_per_second": 5.936,
31
+ "step": 50
32
+ }
33
+ ],
34
+ "logging_steps": 50,
35
+ "max_steps": 308,
36
+ "num_input_tokens_seen": 0,
37
+ "num_train_epochs": 4,
38
+ "save_steps": 500,
39
+ "stateful_callbacks": {
40
+ "TrainerControl": {
41
+ "args": {
42
+ "should_epoch_stop": false,
43
+ "should_evaluate": false,
44
+ "should_log": false,
45
+ "should_save": true,
46
+ "should_training_stop": false
47
+ },
48
+ "attributes": {}
49
+ }
50
+ },
51
+ "total_flos": 503646432269568.0,
52
+ "train_batch_size": 4,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }
checkpoint-77/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5363ce9d99dd9dbf1bc2c5ff6c4b0ce553147005c7381cbde8798f93d5971fb
3
+ size 5649
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "bfloat16",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
+ "sliding_window": 512,
57
+ "tie_word_embeddings": true,
58
+ "transformers_version": "5.5.1",
59
+ "use_bidirectional_attention": false,
60
+ "use_cache": false,
61
+ "vocab_size": 262144
62
+ }
generation_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 1,
8
+ 50,
9
+ 106
10
+ ],
11
+ "pad_token_id": 0,
12
+ "top_k": 64,
13
+ "top_p": 0.95,
14
+ "transformers_version": "5.5.1"
15
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdad25a78c297aab9ec2b809949a6f0b1968f7a6a09d486343d48fe3f5c51da
3
+ size 536223056