HiTruong commited on
Commit
0ecf281
1 Parent(s): 7c8d6f8

Training in progress, epoch 14, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8755cbe7e2398d1959416d6feb911384858a351204adb4f45a8e698f8710fd55
3
  size 30026872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7415a33eadc4a72f7ee8a58c72bed36a025bbcc1ca1e89dfa2d2ce164b000c5c
3
  size 30026872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f66a5ce1a780e3e1ff27f566dc0f0ffc09cedbb81984f67263cfbe2c21bb457c
3
  size 60252034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32fbae7221f081ebb469450f3a8f3fa245312ed697db0b4357b8f97b88f52fb4
3
  size 60252034
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:235ab0e10f5c7cdfbbd49cc3926a6ca3af658975caba14cd01311559832e2b6e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51e3e2f1bea9b7e703a1b7ac2c1ee4d3b75a36bbde79aa557a24cb0493a46b1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bbd78e2bc5c115f006119ec58fcb69b8d548f43ab2f01e37b0763759baab3bc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02613a6afb86ae474ea068b51099dfd9dc4abfb57724ef7ae1f766d5528370f2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.0,
5
  "eval_steps": 500,
6
- "global_step": 3474,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -245,6 +245,27 @@
245
  "learning_rate": 0.0002,
246
  "loss": 0.1555,
247
  "step": 3400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  }
249
  ],
250
  "logging_steps": 100,
@@ -259,12 +280,12 @@
259
  "should_evaluate": false,
260
  "should_log": false,
261
  "should_save": true,
262
- "should_training_stop": false
263
  },
264
  "attributes": {}
265
  }
266
  },
267
- "total_flos": 5.235863557546967e+17,
268
  "train_batch_size": 4,
269
  "trial_name": null,
270
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.989939637826962,
5
  "eval_steps": 500,
6
+ "global_step": 3720,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
245
  "learning_rate": 0.0002,
246
  "loss": 0.1555,
247
  "step": 3400
248
+ },
249
+ {
250
+ "epoch": 14.104627766599597,
251
+ "grad_norm": 0.3850514888763428,
252
+ "learning_rate": 0.0002,
253
+ "loss": 0.1537,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 14.507042253521126,
258
+ "grad_norm": 0.3872847855091095,
259
+ "learning_rate": 0.0002,
260
+ "loss": 0.1427,
261
+ "step": 3600
262
+ },
263
+ {
264
+ "epoch": 14.909456740442655,
265
+ "grad_norm": 0.3384864032268524,
266
+ "learning_rate": 0.0002,
267
+ "loss": 0.1471,
268
+ "step": 3700
269
  }
270
  ],
271
  "logging_steps": 100,
 
280
  "should_evaluate": false,
281
  "should_log": false,
282
  "should_save": true,
283
+ "should_training_stop": true
284
  },
285
  "attributes": {}
286
  }
287
  },
288
+ "total_flos": 5.606624189428531e+17,
289
  "train_batch_size": 4,
290
  "trial_name": null,
291
  "trial_params": null