eeeebbb2 commited on
Commit
e072d45
·
verified ·
1 Parent(s): a8b2b81

Training in progress, step 45, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e891d7f472c05578b1e11cb9a8f155ba81deba816fd82fed601222d321ee8b55
3
  size 493712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:269e57ed8bb717336b1b94c6127320c63ef4406a0cd07dc09c234c950fb9213b
3
  size 493712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03200b4cf7ceef10384b84de8a6a658803bf9aef86f41309d72a31e0d43645d9
3
  size 997038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1b57314896570d62b32d47220e17f58e732161b49fd928802c6602f4bd0a76
3
  size 997038
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1013fab26d9f10b2cd7ce9ca6fce490010f925e040647938cd66d039dead508
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2470326e63720b9c2b532e840ba2aa37daeb6c106b95fcdb0e29979c4af37d07
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd4a43979577f417f5f52529ccd29b04d89e0c6f7076c4680bb2b1c2398c53dc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7d6723f6bf70a0e9b9b2444eaa4e83cdcce7ababf578c2fa2a2d5d73b7405f
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3469e9e26082b3400296dbe6c557ba60f63fb947c8d998e7ebc102c0d4ef9a43
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb48e3a2249fe0e2cda8ec792bb13f5c38195c570365a1757dd81a2cbac5436
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43fe536ec202674af63e7f35ddde3a5d1cee1ab1d10debc7f3f085a607de6d11
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d896ece4807ef77f7639aae20bd1c38d22d912978931ea25e03186050002542f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3177ea4b394d20689f047a52c65a4c736ff1c10b2534d22548525087dc2bae5a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69229c49401e88b3b3378c31d82dab5bb5c4af56b04c5c6fd06f1abbc7474757
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 10.823450088500977,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 1.7341772151898733,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,146 @@
198
  "eval_samples_per_second": 475.75,
199
  "eval_steps_per_second": 123.695,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +361,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 19055561932800.0,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": 10.823450088500977,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
+ "epoch": 3.1772151898734178,
5
  "eval_steps": 25,
6
+ "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 475.75,
199
  "eval_steps_per_second": 123.695,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 1.8016877637130801,
204
+ "grad_norm": 12.02143383026123,
205
+ "learning_rate": 4.682634170592537e-05,
206
+ "loss": 173.4692,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 1.869198312236287,
211
+ "grad_norm": 12.537296295166016,
212
+ "learning_rate": 4.3617997792374365e-05,
213
+ "loss": 172.9671,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 1.9367088607594938,
218
+ "grad_norm": 12.867912292480469,
219
+ "learning_rate": 4.0470381785284936e-05,
220
+ "loss": 172.5768,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 2.050632911392405,
225
+ "grad_norm": 11.798124313354492,
226
+ "learning_rate": 3.7400287577942993e-05,
227
+ "loss": 173.2267,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 2.1181434599156117,
232
+ "grad_norm": 11.980865478515625,
233
+ "learning_rate": 3.4424095451291274e-05,
234
+ "loss": 172.9524,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 2.1856540084388185,
239
+ "grad_norm": 12.464712142944336,
240
+ "learning_rate": 3.155768467804314e-05,
241
+ "loss": 172.4342,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 2.2531645569620253,
246
+ "grad_norm": 12.398478507995605,
247
+ "learning_rate": 2.8816348799892133e-05,
248
+ "loss": 172.3243,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 2.320675105485232,
253
+ "grad_norm": 11.792886734008789,
254
+ "learning_rate": 2.621471402984991e-05,
255
+ "loss": 172.8272,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 2.388185654008439,
260
+ "grad_norm": 11.965614318847656,
261
+ "learning_rate": 2.3766661215071475e-05,
262
+ "loss": 172.4491,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 2.4556962025316453,
267
+ "grad_norm": 12.57142162322998,
268
+ "learning_rate": 2.148525177652982e-05,
269
+ "loss": 171.9388,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 2.523206751054852,
274
+ "grad_norm": 11.402908325195312,
275
+ "learning_rate": 1.938265802068357e-05,
276
+ "loss": 172.6287,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 2.590717299578059,
281
+ "grad_norm": 11.887864112854004,
282
+ "learning_rate": 1.74700981949555e-05,
283
+ "loss": 172.3424,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 2.6582278481012658,
288
+ "grad_norm": 12.034871101379395,
289
+ "learning_rate": 1.5757776633528655e-05,
290
+ "loss": 172.0158,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 2.7257383966244726,
295
+ "grad_norm": 12.191680908203125,
296
+ "learning_rate": 1.4254829312808404e-05,
297
+ "loss": 171.8472,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 2.7932489451476794,
302
+ "grad_norm": 11.363740921020508,
303
+ "learning_rate": 1.2969275107035345e-05,
304
+ "loss": 172.5804,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 2.8607594936708862,
309
+ "grad_norm": 11.75307559967041,
310
+ "learning_rate": 1.1907973004121738e-05,
311
+ "loss": 172.0639,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 2.928270042194093,
316
+ "grad_norm": 12.174725532531738,
317
+ "learning_rate": 1.1076585509983283e-05,
318
+ "loss": 171.687,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 3.042194092827004,
323
+ "grad_norm": 11.464780807495117,
324
+ "learning_rate": 1.0479548436620041e-05,
325
+ "loss": 172.3345,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 3.109704641350211,
330
+ "grad_norm": 11.445940017700195,
331
+ "learning_rate": 1.0120047235140178e-05,
332
+ "loss": 172.2803,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 3.1772151898734178,
337
+ "grad_norm": 11.784713745117188,
338
+ "learning_rate": 1e-05,
339
+ "loss": 171.8565,
340
+ "step": 45
341
  }
342
  ],
343
  "logging_steps": 1,
 
361
  "should_evaluate": false,
362
  "should_log": false,
363
  "should_save": true,
364
+ "should_training_stop": true
365
  },
366
  "attributes": {}
367
  }
368
  },
369
+ "total_flos": 34300011479040.0,
370
  "train_batch_size": 2,
371
  "trial_name": null,
372
  "trial_params": null