YangZhoumill commited on
Commit
b9b59cc
·
verified ·
1 Parent(s): 4a3ffeb

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -6
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +143 -108
README.md CHANGED
@@ -1,19 +1,17 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-0.5B-Instruct
3
- datasets: YangZhoumill/bestofn
4
  library_name: transformers
5
- model_name: Qwen2.5-0.5B-Instruct-4230297
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - sft
11
  licence: license
12
  ---
13
 
14
- # Model Card for Qwen2.5-0.5B-Instruct-4230297
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the [YangZhoumill/bestofn](https://huggingface.co/datasets/YangZhoumill/bestofn) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/stevenzhou0816100/huggingface/runs/uag1gn9n)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
2
  base_model: Qwen/Qwen2.5-0.5B-Instruct
 
3
  library_name: transformers
4
+ model_name: Qwen2.5-0.5B-Instruct
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
10
  ---
11
 
12
+ # Model Card for Qwen2.5-0.5B-Instruct
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/stevenzhou0816100/huggingface/runs/qm9rbkf1)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 3.486616950459597e+16,
3
- "train_loss": 0.9243612613677978,
4
- "train_runtime": 251.2658,
5
- "train_samples": 7473,
6
- "train_samples_per_second": 3.98,
7
- "train_steps_per_second": 0.497
8
  }
 
1
  {
2
+ "total_flos": 4.211382991139635e+16,
3
+ "train_loss": 0.7568846257527669,
4
+ "train_runtime": 323.9308,
5
+ "train_samples": 9308,
6
+ "train_samples_per_second": 3.704,
7
+ "train_steps_per_second": 0.463
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 3.486616950459597e+16,
3
- "train_loss": 0.9243612613677978,
4
- "train_runtime": 251.2658,
5
- "train_samples": 7473,
6
- "train_samples_per_second": 3.98,
7
- "train_steps_per_second": 0.497
8
  }
 
1
  {
2
+ "total_flos": 4.211382991139635e+16,
3
+ "train_loss": 0.7568846257527669,
4
+ "train_runtime": 323.9308,
5
+ "train_samples": 9308,
6
+ "train_samples_per_second": 3.704,
7
+ "train_steps_per_second": 0.463
8
  }
trainer_state.json CHANGED
@@ -4,198 +4,233 @@
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
  "eval_steps": 500,
7
- "global_step": 125,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.04,
14
- "grad_norm": 59.25,
15
- "learning_rate": 2.857142857142857e-05,
16
- "loss": 3.2542,
17
  "step": 5
18
  },
19
  {
20
- "epoch": 0.08,
21
- "grad_norm": 9.75,
22
- "learning_rate": 4.996811065272715e-05,
23
- "loss": 1.5907,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 0.12,
28
- "grad_norm": 7.21875,
29
- "learning_rate": 4.9610392803331726e-05,
30
- "loss": 1.2725,
31
  "step": 15
32
  },
33
  {
34
- "epoch": 0.16,
35
- "grad_norm": 6.21875,
36
- "learning_rate": 4.8861446190538576e-05,
37
- "loss": 1.1719,
38
  "step": 20
39
  },
40
  {
41
- "epoch": 0.2,
42
- "grad_norm": 6.84375,
43
- "learning_rate": 4.7734522928852436e-05,
44
- "loss": 1.1011,
45
  "step": 25
46
  },
47
  {
48
- "epoch": 0.24,
49
- "grad_norm": 7.8125,
50
- "learning_rate": 4.624956317935659e-05,
51
- "loss": 1.0381,
52
  "step": 30
53
  },
54
  {
55
- "epoch": 0.28,
56
- "grad_norm": 33.25,
57
- "learning_rate": 4.443284232176311e-05,
58
- "loss": 0.9895,
59
  "step": 35
60
  },
61
  {
62
- "epoch": 0.32,
63
- "grad_norm": 62.0,
64
- "learning_rate": 4.2316506028963374e-05,
65
- "loss": 0.9336,
66
  "step": 40
67
  },
68
  {
69
- "epoch": 0.36,
70
- "grad_norm": 57.75,
71
- "learning_rate": 3.993800147062685e-05,
72
- "loss": 0.9134,
73
  "step": 45
74
  },
75
  {
76
- "epoch": 0.4,
77
- "grad_norm": 17.375,
78
- "learning_rate": 3.733941471032425e-05,
79
- "loss": 0.8617,
80
  "step": 50
81
  },
82
  {
83
- "epoch": 0.44,
84
- "grad_norm": 92.0,
85
- "learning_rate": 3.4566726020493854e-05,
86
- "loss": 0.8964,
87
  "step": 55
88
  },
89
  {
90
- "epoch": 0.48,
91
- "grad_norm": 8.9375,
92
- "learning_rate": 3.1668996291960073e-05,
93
- "loss": 0.7954,
94
  "step": 60
95
  },
96
  {
97
- "epoch": 0.52,
98
- "grad_norm": 12.6875,
99
- "learning_rate": 2.869749893394902e-05,
100
- "loss": 0.7644,
101
  "step": 65
102
  },
103
  {
104
- "epoch": 0.56,
105
- "grad_norm": 25.5,
106
- "learning_rate": 2.570481262505563e-05,
107
- "loss": 0.7105,
108
  "step": 70
109
  },
110
  {
111
- "epoch": 0.6,
112
- "grad_norm": 12.0625,
113
- "learning_rate": 2.2743890968333453e-05,
114
- "loss": 0.6844,
115
  "step": 75
116
  },
117
  {
118
- "epoch": 0.64,
119
- "grad_norm": 11.125,
120
- "learning_rate": 1.986712551234432e-05,
121
- "loss": 0.673,
122
  "step": 80
123
  },
124
  {
125
- "epoch": 0.68,
126
- "grad_norm": 10.0,
127
- "learning_rate": 1.7125418717390167e-05,
128
- "loss": 0.6426,
129
  "step": 85
130
  },
131
  {
132
- "epoch": 0.72,
133
- "grad_norm": 9.6875,
134
- "learning_rate": 1.4567283270175847e-05,
135
- "loss": 0.6182,
136
  "step": 90
137
  },
138
  {
139
- "epoch": 0.76,
140
- "grad_norm": 11.25,
141
- "learning_rate": 1.2237983683933638e-05,
142
- "loss": 0.6088,
143
  "step": 95
144
  },
145
  {
146
- "epoch": 0.8,
147
- "grad_norm": 11.25,
148
- "learning_rate": 1.0178735372827107e-05,
149
- "loss": 0.6127,
150
  "step": 100
151
  },
152
  {
153
- "epoch": 0.84,
154
- "grad_norm": 9.75,
155
- "learning_rate": 8.425975372482405e-06,
156
- "loss": 0.5978,
157
  "step": 105
158
  },
159
  {
160
- "epoch": 0.88,
161
- "grad_norm": 13.6875,
162
- "learning_rate": 7.010717610764453e-06,
163
- "loss": 0.6071,
164
  "step": 110
165
  },
166
  {
167
- "epoch": 0.92,
168
  "grad_norm": 10.25,
169
- "learning_rate": 5.9580041368548775e-06,
170
- "loss": 0.5962,
171
  "step": 115
172
  },
173
  {
174
- "epoch": 0.96,
175
- "grad_norm": 9.3125,
176
- "learning_rate": 5.286462018769748e-06,
177
- "loss": 0.5897,
178
  "step": 120
179
  },
180
  {
181
- "epoch": 1.0,
182
- "grad_norm": 11.0625,
183
- "learning_rate": 5.007973749722316e-06,
184
- "loss": 0.5851,
185
  "step": 125
186
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  {
188
  "epoch": 1.0,
189
- "step": 125,
190
- "total_flos": 3.486616950459597e+16,
191
- "train_loss": 0.9243612613677978,
192
- "train_runtime": 251.2658,
193
- "train_samples_per_second": 3.98,
194
- "train_steps_per_second": 0.497
195
  }
196
  ],
197
  "logging_steps": 5,
198
- "max_steps": 125,
199
  "num_input_tokens_seen": 0,
200
  "num_train_epochs": 1,
201
  "save_steps": 500,
@@ -211,7 +246,7 @@
211
  "attributes": {}
212
  }
213
  },
214
- "total_flos": 3.486616950459597e+16,
215
  "train_batch_size": 2,
216
  "trial_name": null,
217
  "trial_params": null
 
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.03333333333333333,
14
+ "grad_norm": 61.0,
15
+ "learning_rate": 2.5e-05,
16
+ "loss": 3.1111,
17
  "step": 5
18
  },
19
  {
20
+ "epoch": 0.06666666666666667,
21
+ "grad_norm": 12.75,
22
+ "learning_rate": 4.9994493725417974e-05,
23
+ "loss": 1.4101,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 0.1,
28
+ "grad_norm": 7.59375,
29
+ "learning_rate": 4.980205694523683e-05,
30
+ "loss": 1.0893,
31
  "step": 15
32
  },
33
  {
34
+ "epoch": 0.13333333333333333,
35
+ "grad_norm": 376.0,
36
+ "learning_rate": 4.933699554028027e-05,
37
+ "loss": 1.0086,
38
  "step": 20
39
  },
40
  {
41
+ "epoch": 0.16666666666666666,
42
+ "grad_norm": 6.90625,
43
+ "learning_rate": 4.8604994510426774e-05,
44
+ "loss": 0.9507,
45
  "step": 25
46
  },
47
  {
48
+ "epoch": 0.2,
49
+ "grad_norm": 6.8125,
50
+ "learning_rate": 4.761500197676621e-05,
51
+ "loss": 0.9555,
52
  "step": 30
53
  },
54
  {
55
+ "epoch": 0.23333333333333334,
56
+ "grad_norm": 9.125,
57
+ "learning_rate": 4.63791197980501e-05,
58
+ "loss": 0.8328,
59
  "step": 35
60
  },
61
  {
62
+ "epoch": 0.26666666666666666,
63
+ "grad_norm": 10.125,
64
+ "learning_rate": 4.49124556352474e-05,
65
+ "loss": 0.8174,
66
  "step": 40
67
  },
68
  {
69
+ "epoch": 0.3,
70
+ "grad_norm": 8.8125,
71
+ "learning_rate": 4.323293827259707e-05,
72
+ "loss": 0.7522,
73
  "step": 45
74
  },
75
  {
76
+ "epoch": 0.3333333333333333,
77
+ "grad_norm": 8.4375,
78
+ "learning_rate": 4.13610984527082e-05,
79
+ "loss": 0.7246,
80
  "step": 50
81
  },
82
  {
83
+ "epoch": 0.36666666666666664,
84
+ "grad_norm": 13.875,
85
+ "learning_rate": 3.931981790482172e-05,
86
+ "loss": 0.7451,
87
  "step": 55
88
  },
89
  {
90
+ "epoch": 0.4,
91
+ "grad_norm": 13.125,
92
+ "learning_rate": 3.713404963416025e-05,
93
+ "loss": 0.671,
94
  "step": 60
95
  },
96
  {
97
+ "epoch": 0.43333333333333335,
98
+ "grad_norm": 59.0,
99
+ "learning_rate": 3.483051289160265e-05,
100
+ "loss": 0.6582,
101
  "step": 65
102
  },
103
  {
104
+ "epoch": 0.4666666666666667,
105
+ "grad_norm": 10.0,
106
+ "learning_rate": 3.243736655243287e-05,
107
+ "loss": 0.6431,
108
  "step": 70
109
  },
110
  {
111
+ "epoch": 0.5,
112
+ "grad_norm": 15.375,
113
+ "learning_rate": 2.9983864896843578e-05,
114
+ "loss": 0.5837,
115
  "step": 75
116
  },
117
  {
118
+ "epoch": 0.5333333333333333,
119
+ "grad_norm": 22.0,
120
+ "learning_rate": 2.7500000000000004e-05,
121
+ "loss": 0.6217,
122
  "step": 80
123
  },
124
  {
125
+ "epoch": 0.5666666666666667,
126
+ "grad_norm": 14.5,
127
+ "learning_rate": 2.5016135103156434e-05,
128
+ "loss": 0.5935,
129
  "step": 85
130
  },
131
  {
132
+ "epoch": 0.6,
133
+ "grad_norm": 13.8125,
134
+ "learning_rate": 2.2562633447567137e-05,
135
+ "loss": 0.563,
136
  "step": 90
137
  },
138
  {
139
+ "epoch": 0.6333333333333333,
140
+ "grad_norm": 9.4375,
141
+ "learning_rate": 2.0169487108397363e-05,
142
+ "loss": 0.5695,
143
  "step": 95
144
  },
145
  {
146
+ "epoch": 0.6666666666666666,
147
+ "grad_norm": 63.0,
148
+ "learning_rate": 1.7865950365839762e-05,
149
+ "loss": 0.5332,
150
  "step": 100
151
  },
152
  {
153
+ "epoch": 0.7,
154
+ "grad_norm": 14.4375,
155
+ "learning_rate": 1.568018209517828e-05,
156
+ "loss": 0.5306,
157
  "step": 105
158
  },
159
  {
160
+ "epoch": 0.7333333333333333,
161
+ "grad_norm": 10.75,
162
+ "learning_rate": 1.3638901547291804e-05,
163
+ "loss": 0.4998,
164
  "step": 110
165
  },
166
  {
167
+ "epoch": 0.7666666666666667,
168
  "grad_norm": 10.25,
169
+ "learning_rate": 1.1767061727402935e-05,
170
+ "loss": 0.4856,
171
  "step": 115
172
  },
173
  {
174
+ "epoch": 0.8,
175
+ "grad_norm": 16.625,
176
+ "learning_rate": 1.0087544364752604e-05,
177
+ "loss": 0.4837,
178
  "step": 120
179
  },
180
  {
181
+ "epoch": 0.8333333333333334,
182
+ "grad_norm": 15.0,
183
+ "learning_rate": 8.62088020194991e-06,
184
+ "loss": 0.4811,
185
  "step": 125
186
  },
187
+ {
188
+ "epoch": 0.8666666666666667,
189
+ "grad_norm": 9.375,
190
+ "learning_rate": 7.3849980232337995e-06,
191
+ "loss": 0.4875,
192
+ "step": 130
193
+ },
194
+ {
195
+ "epoch": 0.9,
196
+ "grad_norm": 8.75,
197
+ "learning_rate": 6.3950054895732334e-06,
198
+ "loss": 0.4877,
199
+ "step": 135
200
+ },
201
+ {
202
+ "epoch": 0.9333333333333333,
203
+ "grad_norm": 11.5625,
204
+ "learning_rate": 5.663004459719738e-06,
205
+ "loss": 0.4812,
206
+ "step": 140
207
+ },
208
+ {
209
+ "epoch": 0.9666666666666667,
210
+ "grad_norm": 11.0,
211
+ "learning_rate": 5.197943054763173e-06,
212
+ "loss": 0.4589,
213
+ "step": 145
214
+ },
215
+ {
216
+ "epoch": 1.0,
217
+ "grad_norm": 10.375,
218
+ "learning_rate": 5.005506274582033e-06,
219
+ "loss": 0.4762,
220
+ "step": 150
221
+ },
222
  {
223
  "epoch": 1.0,
224
+ "step": 150,
225
+ "total_flos": 4.211382991139635e+16,
226
+ "train_loss": 0.7568846257527669,
227
+ "train_runtime": 323.9308,
228
+ "train_samples_per_second": 3.704,
229
+ "train_steps_per_second": 0.463
230
  }
231
  ],
232
  "logging_steps": 5,
233
+ "max_steps": 150,
234
  "num_input_tokens_seen": 0,
235
  "num_train_epochs": 1,
236
  "save_steps": 500,
 
246
  "attributes": {}
247
  }
248
  },
249
+ "total_flos": 4.211382991139635e+16,
250
  "train_batch_size": 2,
251
  "trial_name": null,
252
  "trial_params": null