ncbateman commited on
Commit
f7a2414
·
verified ·
1 Parent(s): 29862d3

Training in progress, step 545, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c96ebe863ff3466272b3ab2c8447b3aca22869b17cb07ec332ff2f920b6b49e
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34dff9b81c5a01b4e58e26af9ff3d94726dcf3fbfd8df467a5b2b3ca91f55c31
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf43448dcb4f245500eca4da91ce018a0ed0cb1998dc0a1fa8423feb43ece1aa
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f5055a21ea311851fcf516b2248e5a06f15c0402c2bc16301d1bcb3390e648c
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fedba96dcac876f28e2be6306578220e85fa67624b620e4b7d8b2b9b53c25230
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0baef92f29724762003fb5a5e60a068b25f0dcaabdd1738999cdd71657bcdcc2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8032cf65e0ff4317d8e367449a794976c91fc02ffa62a4c4ec4e22022cce0247
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c07f2bd0b59249f39d32534072619919b7f36bea7e48a381937d6e08de9e282
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6988029763830476,
5
  "eval_steps": 386,
6
- "global_step": 540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3803,6 +3803,41 @@
3803
  "learning_rate": 9.597407348941865e-05,
3804
  "loss": 0.7678,
3805
  "step": 540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3806
  }
3807
  ],
3808
  "logging_steps": 1,
@@ -3822,7 +3857,7 @@
3822
  "attributes": {}
3823
  }
3824
  },
3825
- "total_flos": 6.036860727145267e+17,
3826
  "train_batch_size": 4,
3827
  "trial_name": null,
3828
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7052733743125202,
5
  "eval_steps": 386,
6
+ "global_step": 545,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3803
  "learning_rate": 9.597407348941865e-05,
3804
  "loss": 0.7678,
3805
  "step": 540
3806
+ },
3807
+ {
3808
+ "epoch": 0.7000970559689421,
3809
+ "grad_norm": 0.7815922498703003,
3810
+ "learning_rate": 9.59578496623617e-05,
3811
+ "loss": 0.8866,
3812
+ "step": 541
3813
+ },
3814
+ {
3815
+ "epoch": 0.7013911355548367,
3816
+ "grad_norm": 0.9351438283920288,
3817
+ "learning_rate": 9.594159458822257e-05,
3818
+ "loss": 0.9607,
3819
+ "step": 542
3820
+ },
3821
+ {
3822
+ "epoch": 0.7026852151407311,
3823
+ "grad_norm": 0.8216230869293213,
3824
+ "learning_rate": 9.592530827805322e-05,
3825
+ "loss": 1.0062,
3826
+ "step": 543
3827
+ },
3828
+ {
3829
+ "epoch": 0.7039792947266257,
3830
+ "grad_norm": 0.8280504941940308,
3831
+ "learning_rate": 9.59089907429268e-05,
3832
+ "loss": 0.8715,
3833
+ "step": 544
3834
+ },
3835
+ {
3836
+ "epoch": 0.7052733743125202,
3837
+ "grad_norm": 0.9679670929908752,
3838
+ "learning_rate": 9.589264199393776e-05,
3839
+ "loss": 1.0543,
3840
+ "step": 545
3841
  }
3842
  ],
3843
  "logging_steps": 1,
 
3857
  "attributes": {}
3858
  }
3859
  },
3860
+ "total_flos": 6.092757585729946e+17,
3861
  "train_batch_size": 4,
3862
  "trial_name": null,
3863
  "trial_params": null