Krish356 commited on
Commit
63a2ab3
·
verified ·
1 Parent(s): 369fd09

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7c6b2007d17f2c05a1ff04511bd6c420c2fa1d7dac0716249ba3481abf3fe1b
3
  size 3380768360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9979eaa45a61fd8cebcbeaa53ecd0411b03747b0f7cb88d56f45fa404f1050
3
  size 3380768360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:313c9668e74d9a6b584989bb8166e2be73c567a816346feb31eeb827ebe378fb
3
  size 1855337587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33cc08aed51b001f8c2bffba8429bca64232aa72d2436c4d76f03b9157a32060
3
  size 1855337587
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d391cbd422024319290b62fae06f5beac24520f574ab878e69073735038bbc28
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e714f56ef384df6bb4ffceaa2247da8c000483c6f7f2f5dce28e97af82a6ac6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.1437699680511182,
6
  "eval_steps": 500,
7
- "global_step": 210,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -309,6 +309,48 @@
309
  "learning_rate": 8.087098307042959e-06,
310
  "loss": 0.2071,
311
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  }
313
  ],
314
  "logging_steps": 5,
@@ -328,7 +370,7 @@
328
  "attributes": {}
329
  }
330
  },
331
- "total_flos": 2.539219665713701e+18,
332
  "train_batch_size": 16,
333
  "trial_name": null,
334
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.4313099041533546,
6
  "eval_steps": 500,
7
+ "global_step": 240,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
309
  "learning_rate": 8.087098307042959e-06,
310
  "loss": 0.2071,
311
  "step": 210
312
+ },
313
+ {
314
+ "epoch": 1.1916932907348243,
315
+ "grad_norm": 0.06395677477121353,
316
+ "learning_rate": 7.410974280025551e-06,
317
+ "loss": 0.205,
318
+ "step": 215
319
+ },
320
+ {
321
+ "epoch": 1.2396166134185305,
322
+ "grad_norm": 0.06315111368894577,
323
+ "learning_rate": 6.754974984940941e-06,
324
+ "loss": 0.232,
325
+ "step": 220
326
+ },
327
+ {
328
+ "epoch": 1.2875399361022364,
329
+ "grad_norm": 0.06133367493748665,
330
+ "learning_rate": 6.12084001399091e-06,
331
+ "loss": 0.2382,
332
+ "step": 225
333
+ },
334
+ {
335
+ "epoch": 1.3354632587859425,
336
+ "grad_norm": 0.05861750617623329,
337
+ "learning_rate": 5.5102509791201946e-06,
338
+ "loss": 0.205,
339
+ "step": 230
340
+ },
341
+ {
342
+ "epoch": 1.3833865814696487,
343
+ "grad_norm": 0.06707581132650375,
344
+ "learning_rate": 4.924827052683887e-06,
345
+ "loss": 0.2436,
346
+ "step": 235
347
+ },
348
+ {
349
+ "epoch": 1.4313099041533546,
350
+ "grad_norm": 0.06414289027452469,
351
+ "learning_rate": 4.366120673693442e-06,
352
+ "loss": 0.1972,
353
+ "step": 240
354
  }
355
  ],
356
  "logging_steps": 5,
 
370
  "attributes": {}
371
  }
372
  },
373
+ "total_flos": 2.7690417101519217e+18,
374
  "train_batch_size": 16,
375
  "trial_name": null,
376
  "trial_params": null