dixedus commited on
Commit
50e5fc1
·
verified ·
1 Parent(s): 86b3fb1

Training in progress, step 284, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47d0a48cb21ecf7db40a461679c66c506d6cfce3cd3eb8bf6e5d0f056870dcad
3
  size 1521616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26226a0ed5fc078123a27e5d0e3a363288fca9118e63f2f7f87d0122233668ee
3
  size 1521616
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87ce9374edcd3515eb5bae4c9f5b33ce8a590226d39f6b9a7aa63808bc1b8f87
3
- size 1923578
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0909eeb4115aadf68e80281a69764465be936d01c1d760552f9a95d387f12b9
3
+ size 1923706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0e78a99081077a5b76d0235937a3b74eb8046bd77c8cc2ff3a6d6cab0d236cc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e04b0f2df816607a4c8831ab4eb641e4ebe05967ddc2acfa4a47402d04dc09e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dbf4abce9ef0842fe48244ef30ddf3b876559fbed9651b267b9cce55ab7a6d9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e782346571a35cc87bfc48f2a321ddc2384cd7948f88ca7d7f2b32f169c07e6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 6.470786094665527,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 2.1164021164021163,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,62 @@
171
  "eval_samples_per_second": 223.132,
172
  "eval_steps_per_second": 55.783,
173
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 10,
@@ -194,12 +250,12 @@
194
  "should_evaluate": false,
195
  "should_log": false,
196
  "should_save": true,
197
- "should_training_stop": false
198
  },
199
  "attributes": {}
200
  }
201
  },
202
- "total_flos": 117420732973056.0,
203
  "train_batch_size": 8,
204
  "trial_name": null,
205
  "trial_params": null
 
1
  {
2
  "best_metric": 6.470786094665527,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 3.005291005291005,
5
  "eval_steps": 100,
6
+ "global_step": 284,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "eval_samples_per_second": 223.132,
172
  "eval_steps_per_second": 55.783,
173
  "step": 200
174
+ },
175
+ {
176
+ "epoch": 2.2222222222222223,
177
+ "grad_norm": 1.4395525455474854,
178
+ "learning_rate": 3.3886016253128326e-05,
179
+ "loss": 6.2536,
180
+ "step": 210
181
+ },
182
+ {
183
+ "epoch": 2.328042328042328,
184
+ "grad_norm": 1.4456923007965088,
185
+ "learning_rate": 2.5736686013646228e-05,
186
+ "loss": 6.6578,
187
+ "step": 220
188
+ },
189
+ {
190
+ "epoch": 2.433862433862434,
191
+ "grad_norm": 1.5295335054397583,
192
+ "learning_rate": 1.8562563466230576e-05,
193
+ "loss": 6.5134,
194
+ "step": 230
195
+ },
196
+ {
197
+ "epoch": 2.5396825396825395,
198
+ "grad_norm": 1.9139333963394165,
199
+ "learning_rate": 1.2457857435084408e-05,
200
+ "loss": 6.5716,
201
+ "step": 240
202
+ },
203
+ {
204
+ "epoch": 2.6455026455026456,
205
+ "grad_norm": 1.5985289812088013,
206
+ "learning_rate": 7.502733428044683e-06,
207
+ "loss": 6.5857,
208
+ "step": 250
209
+ },
210
+ {
211
+ "epoch": 2.751322751322751,
212
+ "grad_norm": 1.7326596975326538,
213
+ "learning_rate": 3.7622609227231818e-06,
214
+ "loss": 6.4011,
215
+ "step": 260
216
+ },
217
+ {
218
+ "epoch": 2.857142857142857,
219
+ "grad_norm": 1.2264372110366821,
220
+ "learning_rate": 1.2855588900269056e-06,
221
+ "loss": 6.6658,
222
+ "step": 270
223
+ },
224
+ {
225
+ "epoch": 2.962962962962963,
226
+ "grad_norm": 1.4076132774353027,
227
+ "learning_rate": 1.0515077583498344e-07,
228
+ "loss": 6.3814,
229
+ "step": 280
230
  }
231
  ],
232
  "logging_steps": 10,
 
250
  "should_evaluate": false,
251
  "should_log": false,
252
  "should_save": true,
253
+ "should_training_stop": true
254
  },
255
  "attributes": {}
256
  }
257
  },
258
+ "total_flos": 166705538924544.0,
259
  "train_batch_size": 8,
260
  "trial_name": null,
261
  "trial_params": null