brixeus commited on
Commit
8861665
·
verified ·
1 Parent(s): 0bc8cb1

Training in progress, step 70, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91af94eb4c5415d1a468c9b94c24f6f4eb665b5e18798cc0709c272b261e4b01
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9149805fd0ff6c9295a21f0ceed866ec5e19b987dc9b024c5713996cbccee03
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9a0ed33acc4b1549d8d32d9a92007a622c069ce765f2bb5b27b3224b50e48fa
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2f761486d2d2277401dace4f44681830fd254ea3d41966c9a17c77fec745aa
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45fa2328ce571cd92a5087de8545d212e2efaf113a696e08cab907cc8ae2b386
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abd8c451028780b83d24be580224b85633dd02b3c6970e7c2e5f6a76f5c96a5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5747c56edca12686176331b1c30c5158c6a70283eb79bb7b20753e8e93d73f7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34cc44421081656693b70701aad2c568b7fd366e841bbfc436ac5b6eb6c2b321
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.597938144329897,
5
  "eval_steps": 7,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -234,6 +234,28 @@
234
  "eval_samples_per_second": 13.589,
235
  "eval_steps_per_second": 1.989,
236
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  }
238
  ],
239
  "logging_steps": 3,
@@ -253,7 +275,7 @@
253
  "attributes": {}
254
  }
255
  },
256
- "total_flos": 9.301411817639117e+16,
257
  "train_batch_size": 8,
258
  "trial_name": null,
259
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.88659793814433,
5
  "eval_steps": 7,
6
+ "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
234
  "eval_samples_per_second": 13.589,
235
  "eval_steps_per_second": 1.989,
236
  "step": 63
237
+ },
238
+ {
239
+ "epoch": 2.7216494845360826,
240
+ "grad_norm": 1.3034956455230713,
241
+ "learning_rate": 3.0153689607045845e-06,
242
+ "loss": 0.8017,
243
+ "step": 66
244
+ },
245
+ {
246
+ "epoch": 2.845360824742268,
247
+ "grad_norm": 1.2311569452285767,
248
+ "learning_rate": 9.913756075728087e-07,
249
+ "loss": 0.8266,
250
+ "step": 69
251
+ },
252
+ {
253
+ "epoch": 2.88659793814433,
254
+ "eval_loss": 1.205170750617981,
255
+ "eval_runtime": 3.0134,
256
+ "eval_samples_per_second": 13.606,
257
+ "eval_steps_per_second": 1.991,
258
+ "step": 70
259
  }
260
  ],
261
  "logging_steps": 3,
 
275
  "attributes": {}
276
  }
277
  },
278
+ "total_flos": 1.0340054014623744e+17,
279
  "train_batch_size": 8,
280
  "trial_name": null,
281
  "trial_params": null