fats-fme commited on
Commit
01afe28
·
verified ·
1 Parent(s): 913a1b1

Training in progress, step 32, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcc8412abfdd26b4fdc11436b83cc9ce406a97cc9f5426a26ceb58c2a845927f
3
  size 63592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a0fa8ff10e2b47c91f216fca7ae09ce7d67db73074f8e58c5084d72a1497e5
3
  size 63592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d07392bdc566ee30c0eb06765a5c2d248b27113aa2c3dce9bc74e22b0015d19
3
  size 136814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c036e462fc01b009e149797ee73941675bd15cf7f6ca35412a8358dceba4e3
3
  size 136814
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caa27e6740300163cb9718d2eaf886a9f832addc97756f5df0931b049feebcc5
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f435e2cf5af084ff87f38ac364aa32f5f9250d67a5fd3cb62ee9bb2e9e48da7
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dbd690c0e1bfde4a7db1bd4ec971148018ad862429cc2318689bd0d458baddc
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dfdc610d6111b3c5b2c5f58f7b4bc723b1377af735ffdde336defa263ece09d
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:605d23f463d574da26fdafa2a2fc396d0b00160053ea75ca175c5c8e6f2990e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a4ecc45073a35262869a0d9392090bbde50163f9775b63db54f8daf68eadbe7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.256,
5
  "eval_steps": 16,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -135,6 +135,126 @@
135
  "eval_samples_per_second": 129.18,
136
  "eval_steps_per_second": 32.904,
137
  "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
  ],
140
  "logging_steps": 1,
@@ -154,7 +274,7 @@
154
  "attributes": {}
155
  }
156
  },
157
- "total_flos": 479157288960.0,
158
  "train_batch_size": 2,
159
  "trial_name": null,
160
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.512,
5
  "eval_steps": 16,
6
+ "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
135
  "eval_samples_per_second": 129.18,
136
  "eval_steps_per_second": 32.904,
137
  "step": 16
138
+ },
139
+ {
140
+ "epoch": 0.272,
141
+ "grad_norm": 1.444272756576538,
142
+ "learning_rate": 0.00017,
143
+ "loss": 88.719,
144
+ "step": 17
145
+ },
146
+ {
147
+ "epoch": 0.288,
148
+ "grad_norm": 1.4536869525909424,
149
+ "learning_rate": 0.00018,
150
+ "loss": 88.7756,
151
+ "step": 18
152
+ },
153
+ {
154
+ "epoch": 0.304,
155
+ "grad_norm": 1.6990916728973389,
156
+ "learning_rate": 0.00019,
157
+ "loss": 88.719,
158
+ "step": 19
159
+ },
160
+ {
161
+ "epoch": 0.32,
162
+ "grad_norm": 1.6907556056976318,
163
+ "learning_rate": 0.0002,
164
+ "loss": 88.6939,
165
+ "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.336,
169
+ "grad_norm": 1.7854983806610107,
170
+ "learning_rate": 0.00019972037971811802,
171
+ "loss": 88.6928,
172
+ "step": 21
173
+ },
174
+ {
175
+ "epoch": 0.352,
176
+ "grad_norm": 1.7495782375335693,
177
+ "learning_rate": 0.00019888308262251285,
178
+ "loss": 88.7139,
179
+ "step": 22
180
+ },
181
+ {
182
+ "epoch": 0.368,
183
+ "grad_norm": 1.8135170936584473,
184
+ "learning_rate": 0.00019749279121818235,
185
+ "loss": 88.6808,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.384,
190
+ "grad_norm": 1.9571936130523682,
191
+ "learning_rate": 0.0001955572805786141,
192
+ "loss": 88.7278,
193
+ "step": 24
194
+ },
195
+ {
196
+ "epoch": 0.4,
197
+ "grad_norm": 1.9375219345092773,
198
+ "learning_rate": 0.00019308737486442045,
199
+ "loss": 88.661,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.416,
204
+ "grad_norm": 2.0492427349090576,
205
+ "learning_rate": 0.0001900968867902419,
206
+ "loss": 88.6735,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.432,
211
+ "grad_norm": 2.057321786880493,
212
+ "learning_rate": 0.00018660254037844388,
213
+ "loss": 88.5661,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.448,
218
+ "grad_norm": 2.227236747741699,
219
+ "learning_rate": 0.0001826238774315995,
220
+ "loss": 88.6781,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.464,
225
+ "grad_norm": 2.3857967853546143,
226
+ "learning_rate": 0.000178183148246803,
227
+ "loss": 88.6794,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.48,
232
+ "grad_norm": 2.7329936027526855,
233
+ "learning_rate": 0.00017330518718298264,
234
+ "loss": 88.6323,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.496,
239
+ "grad_norm": 3.2216603755950928,
240
+ "learning_rate": 0.00016801727377709194,
241
+ "loss": 88.5737,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.512,
246
+ "grad_norm": 1.5606772899627686,
247
+ "learning_rate": 0.00016234898018587337,
248
+ "loss": 88.6774,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.512,
253
+ "eval_loss": 11.075674057006836,
254
+ "eval_runtime": 0.8047,
255
+ "eval_samples_per_second": 131.72,
256
+ "eval_steps_per_second": 33.551,
257
+ "step": 32
258
  }
259
  ],
260
  "logging_steps": 1,
 
274
  "attributes": {}
275
  }
276
  },
277
+ "total_flos": 958314577920.0,
278
  "train_batch_size": 2,
279
  "trial_name": null,
280
  "trial_params": null