ncbateman commited on
Commit
dbe703a
·
verified ·
1 Parent(s): 144c668

Training in progress, step 145, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feee0a69dbcd321bf78ea0b4b76e9c5124d68ccf8b93018334bb8f2f734b7ad3
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aafad848e81bd1d81e2af2ea9fafb99b49a6d95ebc286c86b8ee31aa5edbccf4
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d7c39e6e6222cb74622e3254d42bf0e9b0b3b058c628cd0ef84b4a2e1322e35
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9d939b609dab234b7bb9d7daace8308f1bf7b81dd32b9f6d084edcd20ac6ce3
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fb787d7b71a8f1899c8d99ef06e195074c24a984dea6c9926cd15c76a124153
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deccb07511b8131b8a085ace65a43ba4c84d902fddc80cd24f284c9d76e3a5f6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80407f1621aa96065dd58d5f53331105dff3bac7a38e8f9d80f24f4c3788046c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c82db81101935c1ef6b1e22d89d4bc0889a9580697f0068651662ac9609b43a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.18117114202523454,
5
  "eval_steps": 386,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -995,6 +995,41 @@
995
  "learning_rate": 9.986238191873874e-05,
996
  "loss": 0.875,
997
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998
  }
999
  ],
1000
  "logging_steps": 1,
@@ -1014,7 +1049,7 @@
1014
  "attributes": {}
1015
  }
1016
  },
1017
- "total_flos": 1.5651120403709952e+17,
1018
  "train_batch_size": 4,
1019
  "trial_name": null,
1020
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.18764153995470723,
5
  "eval_steps": 386,
6
+ "global_step": 145,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
995
  "learning_rate": 9.986238191873874e-05,
996
  "loss": 0.875,
997
  "step": 140
998
+ },
999
+ {
1000
+ "epoch": 0.18246522161112907,
1001
+ "grad_norm": 0.8143295645713806,
1002
+ "learning_rate": 9.985930819224247e-05,
1003
+ "loss": 0.8454,
1004
+ "step": 141
1005
+ },
1006
+ {
1007
+ "epoch": 0.1837593011970236,
1008
+ "grad_norm": 0.8755755424499512,
1009
+ "learning_rate": 9.985620056603348e-05,
1010
+ "loss": 0.8029,
1011
+ "step": 142
1012
+ },
1013
+ {
1014
+ "epoch": 0.18505338078291814,
1015
+ "grad_norm": 0.899174690246582,
1016
+ "learning_rate": 9.985305904222469e-05,
1017
+ "loss": 0.9608,
1018
+ "step": 143
1019
+ },
1020
+ {
1021
+ "epoch": 0.1863474603688127,
1022
+ "grad_norm": 0.920137882232666,
1023
+ "learning_rate": 9.984988362295203e-05,
1024
+ "loss": 0.9022,
1025
+ "step": 144
1026
+ },
1027
+ {
1028
+ "epoch": 0.18764153995470723,
1029
+ "grad_norm": 1.1012908220291138,
1030
+ "learning_rate": 9.984667431037447e-05,
1031
+ "loss": 0.9621,
1032
+ "step": 145
1033
  }
1034
  ],
1035
  "logging_steps": 1,
 
1049
  "attributes": {}
1050
  }
1051
  },
1052
+ "total_flos": 1.6210088989556736e+17,
1053
  "train_batch_size": 4,
1054
  "trial_name": null,
1055
  "trial_params": null