JackeyLai commited on
Commit
4a63656
·
verified ·
1 Parent(s): 92ac392

Upload 6 files

Browse files
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "padding_side": "right",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
trainer_state.json ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 44.44444444444444,
5
+ "eval_steps": 500,
6
+ "global_step": 150,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.4814814814814814,
13
+ "grad_norm": NaN,
14
+ "learning_rate": 4.995066821070679e-05,
15
+ "loss": 3.9182,
16
+ "num_input_tokens_seen": 11376,
17
+ "step": 5
18
+ },
19
+ {
20
+ "epoch": 2.962962962962963,
21
+ "grad_norm": 4.32528018951416,
22
+ "learning_rate": 4.964990092676263e-05,
23
+ "loss": 2.9902,
24
+ "num_input_tokens_seen": 23120,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 4.444444444444445,
29
+ "grad_norm": 3.1273458003997803,
30
+ "learning_rate": 4.907906416994146e-05,
31
+ "loss": 2.0558,
32
+ "num_input_tokens_seen": 34960,
33
+ "step": 15
34
+ },
35
+ {
36
+ "epoch": 5.925925925925926,
37
+ "grad_norm": 3.0323095321655273,
38
+ "learning_rate": 4.8244412147206284e-05,
39
+ "loss": 1.5538,
40
+ "num_input_tokens_seen": 46368,
41
+ "step": 20
42
+ },
43
+ {
44
+ "epoch": 7.407407407407407,
45
+ "grad_norm": 1.668092131614685,
46
+ "learning_rate": 4.715508948078037e-05,
47
+ "loss": 1.2808,
48
+ "num_input_tokens_seen": 58112,
49
+ "step": 25
50
+ },
51
+ {
52
+ "epoch": 8.88888888888889,
53
+ "grad_norm": 1.1815176010131836,
54
+ "learning_rate": 4.5823031017752485e-05,
55
+ "loss": 1.0824,
56
+ "num_input_tokens_seen": 69600,
57
+ "step": 30
58
+ },
59
+ {
60
+ "epoch": 10.37037037037037,
61
+ "grad_norm": 1.2929940223693848,
62
+ "learning_rate": 4.426283106939474e-05,
63
+ "loss": 0.9639,
64
+ "num_input_tokens_seen": 81392,
65
+ "step": 35
66
+ },
67
+ {
68
+ "epoch": 11.851851851851851,
69
+ "grad_norm": 0.8027080297470093,
70
+ "learning_rate": 4.249158351283414e-05,
71
+ "loss": 0.8585,
72
+ "num_input_tokens_seen": 92784,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 13.333333333333334,
77
+ "grad_norm": 0.9231483936309814,
78
+ "learning_rate": 4.052869450695776e-05,
79
+ "loss": 0.7936,
80
+ "num_input_tokens_seen": 104656,
81
+ "step": 45
82
+ },
83
+ {
84
+ "epoch": 14.814814814814815,
85
+ "grad_norm": 0.8608518838882446,
86
+ "learning_rate": 3.8395669874474915e-05,
87
+ "loss": 0.6884,
88
+ "num_input_tokens_seen": 116352,
89
+ "step": 50
90
+ },
91
+ {
92
+ "epoch": 16.296296296296298,
93
+ "grad_norm": 1.0098791122436523,
94
+ "learning_rate": 3.611587947962319e-05,
95
+ "loss": 0.6035,
96
+ "num_input_tokens_seen": 127968,
97
+ "step": 55
98
+ },
99
+ {
100
+ "epoch": 17.77777777777778,
101
+ "grad_norm": 0.9739498496055603,
102
+ "learning_rate": 3.3714301183045385e-05,
103
+ "loss": 0.5181,
104
+ "num_input_tokens_seen": 139184,
105
+ "step": 60
106
+ },
107
+ {
108
+ "epoch": 19.25925925925926,
109
+ "grad_norm": 1.0032066106796265,
110
+ "learning_rate": 3.121724717912138e-05,
111
+ "loss": 0.4617,
112
+ "num_input_tokens_seen": 150848,
113
+ "step": 65
114
+ },
115
+ {
116
+ "epoch": 20.74074074074074,
117
+ "grad_norm": 0.943274974822998,
118
+ "learning_rate": 2.8652075714060295e-05,
119
+ "loss": 0.3967,
120
+ "num_input_tokens_seen": 162544,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 22.22222222222222,
125
+ "grad_norm": 1.0475025177001953,
126
+ "learning_rate": 2.604689134322999e-05,
127
+ "loss": 0.3591,
128
+ "num_input_tokens_seen": 174128,
129
+ "step": 75
130
+ },
131
+ {
132
+ "epoch": 23.703703703703702,
133
+ "grad_norm": 0.9079996943473816,
134
+ "learning_rate": 2.3430237011767167e-05,
135
+ "loss": 0.2662,
136
+ "num_input_tokens_seen": 185968,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 25.185185185185187,
141
+ "grad_norm": 1.1758606433868408,
142
+ "learning_rate": 2.0830781332097446e-05,
143
+ "loss": 0.2432,
144
+ "num_input_tokens_seen": 197680,
145
+ "step": 85
146
+ },
147
+ {
148
+ "epoch": 26.666666666666668,
149
+ "grad_norm": 1.1381499767303467,
150
+ "learning_rate": 1.827700448461836e-05,
151
+ "loss": 0.2029,
152
+ "num_input_tokens_seen": 209504,
153
+ "step": 90
154
+ },
155
+ {
156
+ "epoch": 28.14814814814815,
157
+ "grad_norm": 0.9022475481033325,
158
+ "learning_rate": 1.5796886182883053e-05,
159
+ "loss": 0.1476,
160
+ "num_input_tokens_seen": 220880,
161
+ "step": 95
162
+ },
163
+ {
164
+ "epoch": 29.62962962962963,
165
+ "grad_norm": 1.102629542350769,
166
+ "learning_rate": 1.3417599122003464e-05,
167
+ "loss": 0.1575,
168
+ "num_input_tokens_seen": 232544,
169
+ "step": 100
170
+ },
171
+ {
172
+ "epoch": 31.11111111111111,
173
+ "grad_norm": 0.8161982297897339,
174
+ "learning_rate": 1.11652112689164e-05,
175
+ "loss": 0.1061,
176
+ "num_input_tokens_seen": 244144,
177
+ "step": 105
178
+ },
179
+ {
180
+ "epoch": 32.592592592592595,
181
+ "grad_norm": 0.6333702802658081,
182
+ "learning_rate": 9.064400256282757e-06,
183
+ "loss": 0.1196,
184
+ "num_input_tokens_seen": 255952,
185
+ "step": 110
186
+ },
187
+ {
188
+ "epoch": 34.074074074074076,
189
+ "grad_norm": 0.6209620833396912,
190
+ "learning_rate": 7.138183009179922e-06,
191
+ "loss": 0.0896,
192
+ "num_input_tokens_seen": 267328,
193
+ "step": 115
194
+ },
195
+ {
196
+ "epoch": 35.55555555555556,
197
+ "grad_norm": 0.5725650787353516,
198
+ "learning_rate": 5.4076635668540075e-06,
199
+ "loss": 0.1,
200
+ "num_input_tokens_seen": 279264,
201
+ "step": 120
202
+ },
203
+ {
204
+ "epoch": 37.03703703703704,
205
+ "grad_norm": 0.5420427918434143,
206
+ "learning_rate": 3.891801862449629e-06,
207
+ "loss": 0.0777,
208
+ "num_input_tokens_seen": 290736,
209
+ "step": 125
210
+ },
211
+ {
212
+ "epoch": 38.51851851851852,
213
+ "grad_norm": 0.608285129070282,
214
+ "learning_rate": 2.6072059940146775e-06,
215
+ "loss": 0.0705,
216
+ "num_input_tokens_seen": 302128,
217
+ "step": 130
218
+ },
219
+ {
220
+ "epoch": 40.0,
221
+ "grad_norm": 0.6441460251808167,
222
+ "learning_rate": 1.5679502627027136e-06,
223
+ "loss": 0.0845,
224
+ "num_input_tokens_seen": 314000,
225
+ "step": 135
226
+ },
227
+ {
228
+ "epoch": 41.48148148148148,
229
+ "grad_norm": 0.6497734189033508,
230
+ "learning_rate": 7.854209717842231e-07,
231
+ "loss": 0.0748,
232
+ "num_input_tokens_seen": 325600,
233
+ "step": 140
234
+ },
235
+ {
236
+ "epoch": 42.96296296296296,
237
+ "grad_norm": 0.6788354516029358,
238
+ "learning_rate": 2.681916759252917e-07,
239
+ "loss": 0.0746,
240
+ "num_input_tokens_seen": 337232,
241
+ "step": 145
242
+ },
243
+ {
244
+ "epoch": 44.44444444444444,
245
+ "grad_norm": 0.6091296076774597,
246
+ "learning_rate": 2.192924752854042e-08,
247
+ "loss": 0.0824,
248
+ "num_input_tokens_seen": 349136,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 44.44444444444444,
253
+ "num_input_tokens_seen": 349136,
254
+ "step": 150,
255
+ "total_flos": 5843993170542592.0,
256
+ "train_loss": 0.6807288352648417,
257
+ "train_runtime": 762.8508,
258
+ "train_samples_per_second": 7.079,
259
+ "train_steps_per_second": 0.197
260
+ }
261
+ ],
262
+ "logging_steps": 5,
263
+ "max_steps": 150,
264
+ "num_input_tokens_seen": 349136,
265
+ "num_train_epochs": 50,
266
+ "save_steps": 100,
267
+ "stateful_callbacks": {
268
+ "TrainerControl": {
269
+ "args": {
270
+ "should_epoch_stop": false,
271
+ "should_evaluate": false,
272
+ "should_log": false,
273
+ "should_save": true,
274
+ "should_training_stop": true
275
+ },
276
+ "attributes": {}
277
+ }
278
+ },
279
+ "total_flos": 5843993170542592.0,
280
+ "train_batch_size": 2,
281
+ "trial_name": null,
282
+ "trial_params": null
283
+ }
training_args.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adapter_name_or_path: saves/Qwen2.5-3B-Instruct/lora/train_2024-10-31-11-14-25
2
+ cutoff_len: 1024
3
+ dataset: self_cognition
4
+ dataset_dir: data
5
+ ddp_timeout: 180000000
6
+ do_train: true
7
+ finetuning_type: lora
8
+ flash_attn: auto
9
+ fp16: true
10
+ gradient_accumulation_steps: 8
11
+ include_num_input_tokens_seen: true
12
+ learning_rate: 5.0e-05
13
+ logging_steps: 5
14
+ lora_alpha: 16
15
+ lora_dropout: 0
16
+ lora_rank: 8
17
+ lora_target: all
18
+ lr_scheduler_type: cosine
19
+ max_grad_norm: 1.0
20
+ max_samples: 100000
21
+ model_name_or_path: /mnt/w2/lzq/nex_LLM/weights/Qwen2.5-3B-Instruct
22
+ num_train_epochs: 100.0
23
+ optim: adamw_torch
24
+ output_dir: saves/Qwen2.5-3B-Instruct/lora/train_2024-10-31-11-14-25
25
+ packing: false
26
+ per_device_train_batch_size: 2
27
+ plot_loss: true
28
+ preprocessing_num_workers: 16
29
+ report_to: none
30
+ save_steps: 100
31
+ stage: sft
32
+ template: qwen
33
+ warmup_steps: 0
vocab.json ADDED
The diff for this file is too large to render. See raw diff