iamnguyen commited on
Commit
83b4c31
·
verified ·
1 Parent(s): 52e9323

Training in progress, step 5904, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51ac3bb4c5fcc7547d13a6f3177921af8b0ac323a92c1b2ebd751378b49005c5
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b79b765163726683a5b97fb0c1abea490722bbbf1fc391f9b7a985bcdd16c3
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c8d921bc3819800c6bfb305e9ed76511a11b74bf2e480cf600d82fb81c1fd2d
3
  size 240728404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4be8aec95d333750d7914e18caa878b08e168adf924ebf31647ab744346164
3
  size 240728404
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e44d2265428e258699e7753397e520393bda7a4c701384d0238f0149ff68231
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08e0bef61039a4f769cc5e4c3c08f715d445eab2242d23ea2e8a9e30cd2439d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3806660686433761,
5
  "eval_steps": 500,
6
- "global_step": 5888,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -41223,6 +41223,118 @@
41223
  "learning_rate": 6.921943831625517e-06,
41224
  "loss": 1.087,
41225
  "step": 5888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41226
  }
41227
  ],
41228
  "logging_steps": 1,
@@ -41242,7 +41354,7 @@
41242
  "attributes": {}
41243
  }
41244
  },
41245
- "total_flos": 5.332207877349507e+18,
41246
  "train_batch_size": 4,
41247
  "trial_name": null,
41248
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.38170048730816786,
5
  "eval_steps": 500,
6
+ "global_step": 5904,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
41223
  "learning_rate": 6.921943831625517e-06,
41224
  "loss": 1.087,
41225
  "step": 5888
41226
+ },
41227
+ {
41228
+ "epoch": 0.38073071980992557,
41229
+ "grad_norm": 2.504150390625,
41230
+ "learning_rate": 6.92099674695922e-06,
41231
+ "loss": 1.1731,
41232
+ "step": 5889
41233
+ },
41234
+ {
41235
+ "epoch": 0.38079537097647503,
41236
+ "grad_norm": 2.604978322982788,
41237
+ "learning_rate": 6.920049581427559e-06,
41238
+ "loss": 1.0603,
41239
+ "step": 5890
41240
+ },
41241
+ {
41242
+ "epoch": 0.38086002214302456,
41243
+ "grad_norm": 2.4367499351501465,
41244
+ "learning_rate": 6.91910233507041e-06,
41245
+ "loss": 1.1082,
41246
+ "step": 5891
41247
+ },
41248
+ {
41249
+ "epoch": 0.380924673309574,
41250
+ "grad_norm": 2.471052885055542,
41251
+ "learning_rate": 6.918155007927646e-06,
41252
+ "loss": 1.164,
41253
+ "step": 5892
41254
+ },
41255
+ {
41256
+ "epoch": 0.3809893244761235,
41257
+ "grad_norm": 2.616631507873535,
41258
+ "learning_rate": 6.917207600039144e-06,
41259
+ "loss": 1.2193,
41260
+ "step": 5893
41261
+ },
41262
+ {
41263
+ "epoch": 0.381053975642673,
41264
+ "grad_norm": 2.8797757625579834,
41265
+ "learning_rate": 6.9162601114447855e-06,
41266
+ "loss": 1.1593,
41267
+ "step": 5894
41268
+ },
41269
+ {
41270
+ "epoch": 0.3811186268092225,
41271
+ "grad_norm": 2.8945086002349854,
41272
+ "learning_rate": 6.915312542184458e-06,
41273
+ "loss": 1.0885,
41274
+ "step": 5895
41275
+ },
41276
+ {
41277
+ "epoch": 0.38118327797577195,
41278
+ "grad_norm": 3.071239709854126,
41279
+ "learning_rate": 6.91436489229805e-06,
41280
+ "loss": 1.1257,
41281
+ "step": 5896
41282
+ },
41283
+ {
41284
+ "epoch": 0.3812479291423215,
41285
+ "grad_norm": 2.603156805038452,
41286
+ "learning_rate": 6.913417161825449e-06,
41287
+ "loss": 1.1818,
41288
+ "step": 5897
41289
+ },
41290
+ {
41291
+ "epoch": 0.38131258030887094,
41292
+ "grad_norm": 2.5029022693634033,
41293
+ "learning_rate": 6.912469350806554e-06,
41294
+ "loss": 1.0913,
41295
+ "step": 5898
41296
+ },
41297
+ {
41298
+ "epoch": 0.3813772314754204,
41299
+ "grad_norm": 2.4416632652282715,
41300
+ "learning_rate": 6.911521459281265e-06,
41301
+ "loss": 1.2458,
41302
+ "step": 5899
41303
+ },
41304
+ {
41305
+ "epoch": 0.38144188264196993,
41306
+ "grad_norm": 2.3060193061828613,
41307
+ "learning_rate": 6.910573487289479e-06,
41308
+ "loss": 1.0826,
41309
+ "step": 5900
41310
+ },
41311
+ {
41312
+ "epoch": 0.3815065338085194,
41313
+ "grad_norm": 2.7792911529541016,
41314
+ "learning_rate": 6.909625434871104e-06,
41315
+ "loss": 1.2411,
41316
+ "step": 5901
41317
+ },
41318
+ {
41319
+ "epoch": 0.38157118497506887,
41320
+ "grad_norm": 2.7977821826934814,
41321
+ "learning_rate": 6.90867730206605e-06,
41322
+ "loss": 1.0796,
41323
+ "step": 5902
41324
+ },
41325
+ {
41326
+ "epoch": 0.3816358361416184,
41327
+ "grad_norm": 2.568824529647827,
41328
+ "learning_rate": 6.907729088914228e-06,
41329
+ "loss": 1.1366,
41330
+ "step": 5903
41331
+ },
41332
+ {
41333
+ "epoch": 0.38170048730816786,
41334
+ "grad_norm": 2.810534954071045,
41335
+ "learning_rate": 6.906780795455553e-06,
41336
+ "loss": 1.2322,
41337
+ "step": 5904
41338
  }
41339
  ],
41340
  "logging_steps": 1,
 
41354
  "attributes": {}
41355
  }
41356
  },
41357
+ "total_flos": 5.346276383970312e+18,
41358
  "train_batch_size": 4,
41359
  "trial_name": null,
41360
  "trial_params": null