mdsam33r commited on
Commit
f0365c9
·
verified ·
1 Parent(s): f85d733

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b3aefb0fbd710b7cd994f4c43892e2b7be9c484498cffcf26bcbe066348a36f
3
  size 121537408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20491829e0be4b1b4e8e0e277797b9bf1c8ad29983a72101f2fc3f64084fa5b8
3
  size 121537408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c798f26631e369bb9d847535b11306542209fa62620793e0cea66711e56a76b
3
  size 61998229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310a9f3c78b1bf175abf83bdf7f5bcbe5a72b117dc5643746a9e552cbaf5b1dc
3
  size 61998229
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bf60e7c9d9858c9c78135386b6f7a9a021fd02a4d9f1a54b5e62667812d6cc3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d835d6083fdb1e5238226cd4cbca157d080c178a650e6d8ecd9ca4cd32b543
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9190399999999999,
6
  "eval_steps": 200,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -218,6 +218,76 @@
218
  "learning_rate": 1.8855743544078363e-05,
219
  "loss": 1.2614,
220
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  }
222
  ],
223
  "logging_steps": 50,
@@ -237,7 +307,7 @@
237
  "attributes": {}
238
  }
239
  },
240
- "total_flos": 1.790044790658048e+16,
241
  "train_batch_size": 8,
242
  "trial_name": null,
243
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.55808,
6
  "eval_steps": 200,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
218
  "learning_rate": 1.8855743544078363e-05,
219
  "loss": 1.2614,
220
  "step": 1500
221
+ },
222
+ {
223
+ "epoch": 1.98304,
224
+ "grad_norm": 3.592540979385376,
225
+ "learning_rate": 1.77426536064114e-05,
226
+ "loss": 1.2626,
227
+ "step": 1550
228
+ },
229
+ {
230
+ "epoch": 2.04608,
231
+ "grad_norm": 3.5584168434143066,
232
+ "learning_rate": 1.6629563668744434e-05,
233
+ "loss": 1.2448,
234
+ "step": 1600
235
+ },
236
+ {
237
+ "epoch": 2.11008,
238
+ "grad_norm": 3.483705759048462,
239
+ "learning_rate": 1.5516473731077473e-05,
240
+ "loss": 1.2606,
241
+ "step": 1650
242
+ },
243
+ {
244
+ "epoch": 2.17408,
245
+ "grad_norm": 3.3508358001708984,
246
+ "learning_rate": 1.4403383793410507e-05,
247
+ "loss": 1.2423,
248
+ "step": 1700
249
+ },
250
+ {
251
+ "epoch": 2.23808,
252
+ "grad_norm": 3.4544994831085205,
253
+ "learning_rate": 1.3290293855743544e-05,
254
+ "loss": 1.2494,
255
+ "step": 1750
256
+ },
257
+ {
258
+ "epoch": 2.30208,
259
+ "grad_norm": 3.5390095710754395,
260
+ "learning_rate": 1.2177203918076581e-05,
261
+ "loss": 1.2459,
262
+ "step": 1800
263
+ },
264
+ {
265
+ "epoch": 2.36608,
266
+ "grad_norm": 3.303083658218384,
267
+ "learning_rate": 1.1064113980409617e-05,
268
+ "loss": 1.2496,
269
+ "step": 1850
270
+ },
271
+ {
272
+ "epoch": 2.4300800000000002,
273
+ "grad_norm": 3.295470714569092,
274
+ "learning_rate": 9.951024042742654e-06,
275
+ "loss": 1.2363,
276
+ "step": 1900
277
+ },
278
+ {
279
+ "epoch": 2.49408,
280
+ "grad_norm": 3.5985326766967773,
281
+ "learning_rate": 8.837934105075691e-06,
282
+ "loss": 1.2258,
283
+ "step": 1950
284
+ },
285
+ {
286
+ "epoch": 2.55808,
287
+ "grad_norm": 3.7451696395874023,
288
+ "learning_rate": 7.724844167408728e-06,
289
+ "loss": 1.2202,
290
+ "step": 2000
291
  }
292
  ],
293
  "logging_steps": 50,
 
307
  "attributes": {}
308
  }
309
  },
310
+ "total_flos": 2.3883581752743936e+16,
311
  "train_batch_size": 8,
312
  "trial_name": null,
313
  "trial_params": null