ncbateman commited on
Commit
5c62a22
1 Parent(s): 0dbb875

Training in progress, step 635, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3e2031f7a699e8ef5bcbb1fa8c3e083bf4011adc2087af257fdd7fcd1d2160e
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d9ca924d8d90e795e86de8369f2c6d48dc26135b122cf5d4b6c82671b86a4ed
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:071fd869a99418f429ea9573dbc75263ba429d3b76f73105073623a19e90d13e
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:129495fdf41d513360a3ce9d47abca649f2bf128467121d3f900e1098d9d5e75
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d7c573c8b86ae1dd5b87ff18be79cde55bc2e7808d0b786a684f7321060ea8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098658a8b5f74b202fbb5e29652962f5112c3b574b4ce341756735375ee72208
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2580ef175e3eaadc95b4a9970cf624730cbbd91e97d33400a87932d531bfdd4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17863c6ebccc1ff43b06a14aeb4d77d5b06180d5676f46d6243e5056d6bc48af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8152701391135555,
5
  "eval_steps": 386,
6
- "global_step": 630,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4433,6 +4433,41 @@
4433
  "learning_rate": 9.439013679899262e-05,
4434
  "loss": 0.8106,
4435
  "step": 630
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4436
  }
4437
  ],
4438
  "logging_steps": 1,
@@ -4452,7 +4487,7 @@
4452
  "attributes": {}
4453
  }
4454
  },
4455
- "total_flos": 7.043004181669478e+17,
4456
  "train_batch_size": 4,
4457
  "trial_name": null,
4458
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8217405370430282,
5
  "eval_steps": 386,
6
+ "global_step": 635,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4433
  "learning_rate": 9.439013679899262e-05,
4434
  "loss": 0.8106,
4435
  "step": 630
4436
+ },
4437
+ {
4438
+ "epoch": 0.8165642186994501,
4439
+ "grad_norm": 0.7175205945968628,
4440
+ "learning_rate": 9.437114747270612e-05,
4441
+ "loss": 0.8622,
4442
+ "step": 631
4443
+ },
4444
+ {
4445
+ "epoch": 0.8178582982853445,
4446
+ "grad_norm": 0.8083619475364685,
4447
+ "learning_rate": 9.435212797814798e-05,
4448
+ "loss": 0.8608,
4449
+ "step": 632
4450
+ },
4451
+ {
4452
+ "epoch": 0.8191523778712391,
4453
+ "grad_norm": 0.9113181233406067,
4454
+ "learning_rate": 9.433307832824974e-05,
4455
+ "loss": 0.9445,
4456
+ "step": 633
4457
+ },
4458
+ {
4459
+ "epoch": 0.8204464574571336,
4460
+ "grad_norm": 0.7915831208229065,
4461
+ "learning_rate": 9.431399853596336e-05,
4462
+ "loss": 0.8651,
4463
+ "step": 634
4464
+ },
4465
+ {
4466
+ "epoch": 0.8217405370430282,
4467
+ "grad_norm": 0.7277971506118774,
4468
+ "learning_rate": 9.429488861426137e-05,
4469
+ "loss": 0.8799,
4470
+ "step": 635
4471
  }
4472
  ],
4473
  "logging_steps": 1,
 
4487
  "attributes": {}
4488
  }
4489
  },
4490
+ "total_flos": 7.098901040254157e+17,
4491
  "train_batch_size": 4,
4492
  "trial_name": null,
4493
  "trial_params": null