Femboyuwu2000 commited on
Commit
7a3f619
1 Parent(s): 928a5be

Training in progress, step 2660, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54c37478ba6579cff2d0646434ea7d1892cb48a4219f90126afbdf7bde329abb
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d5da627a83cc018d4bf0bfc59c1098cf129633a1abc41bd7d51fb13114c0b1
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b0e5872ff2188ce6710b4b1015cc3fcaf69768dd90c4fa6ab9ac4aecfb9a52a
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:682f21df5b619b8fc7a877dafd3f697aa66e4f2b393f8b846f0e3646f7ba1e83
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27177ac056c4d9dbf19f5cb10b2bae9ee9392da9370d73a04aa8b3b9bc1e770d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfdb4aa20ae8b96f35fdee422b168c0d1ffe465638f08b59f076576138bbe18e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea196c4dba55ede2883451624fdfc54856529c0e1f9c30be6a7f36bcba4e1179
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6164b3c850dce9a36a31f30919cac3506bfad9cadb5f9ef73ee2c76d96b38ff
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2112,
5
  "eval_steps": 500,
6
- "global_step": 2640,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -931,6 +931,13 @@
931
  "learning_rate": 2.918891825699857e-05,
932
  "loss": 3.6067,
933
  "step": 2640
 
 
 
 
 
 
 
934
  }
935
  ],
936
  "logging_steps": 20,
@@ -938,7 +945,7 @@
938
  "num_input_tokens_seen": 0,
939
  "num_train_epochs": 2,
940
  "save_steps": 20,
941
- "total_flos": 6257137317445632.0,
942
  "train_batch_size": 8,
943
  "trial_name": null,
944
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2128,
5
  "eval_steps": 500,
6
+ "global_step": 2660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
931
  "learning_rate": 2.918891825699857e-05,
932
  "loss": 3.6067,
933
  "step": 2640
934
+ },
935
+ {
936
+ "epoch": 0.21,
937
+ "grad_norm": 40.04263687133789,
938
+ "learning_rate": 2.9173085201008144e-05,
939
+ "loss": 3.5365,
940
+ "step": 2660
941
  }
942
  ],
943
  "logging_steps": 20,
 
945
  "num_input_tokens_seen": 0,
946
  "num_train_epochs": 2,
947
  "save_steps": 20,
948
+ "total_flos": 6299096079630336.0,
949
  "train_batch_size": 8,
950
  "trial_name": null,
951
  "trial_params": null