mgh6 commited on
Commit
87f470f
1 Parent(s): 4571aea

Training in progress, step 21000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c3635274eee21ba1976640f8142d5c1fbf66c82e539af94ae87a1f3ee9eb023
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c303733e68b4d6a3464f154a0cef27f187ffb143a75e316fdc2ae74d008e2ebe
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36b31c0839e033c8fdb5f6e0bfc78e2d9735a0fe9264510a0e2f8d12a0d58d55
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e33ab760cf381a1b3364cd0d214e029ee42b9595779e052acbaab68a7bbff730
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ce1f6957d6020f16416f4fd345e9fae1e2983c0b203bfade0ebefc01a476151
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac704f203b889a3f255581a24033dc58a68d7b38d0c3e2670b79fd002470370
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d84e3c52aa4a82118b4d8d3c4b8e8c4226ad3a7764da780159cac8984f3a66d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8323278f0ff55c37775f3c33790ba47e5be2f0255a208e4fc7ee51b8d608539
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.817535830422739,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -147,6 +147,13 @@
147
  "learning_rate": 0.0008393961294467197,
148
  "loss": 1.068,
149
  "step": 20000
 
 
 
 
 
 
 
150
  }
151
  ],
152
  "logging_steps": 1000,
@@ -166,7 +173,7 @@
166
  "attributes": {}
167
  }
168
  },
169
- "total_flos": 2.0007346657191526e+17,
170
  "train_batch_size": 64,
171
  "trial_name": null,
172
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.058412621943876,
5
  "eval_steps": 500,
6
+ "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
147
  "learning_rate": 0.0008393961294467197,
148
  "loss": 1.068,
149
  "step": 20000
150
+ },
151
+ {
152
+ "epoch": 5.058412621943876,
153
+ "grad_norm": 0.4861578345298767,
154
+ "learning_rate": 0.0008313659359190556,
155
+ "loss": 1.0596,
156
+ "step": 21000
157
  }
158
  ],
159
  "logging_steps": 1000,
 
173
  "attributes": {}
174
  }
175
  },
176
+ "total_flos": 2.1007632729269862e+17,
177
  "train_batch_size": 64,
178
  "trial_name": null,
179
  "trial_params": null