dq158 commited on
Commit
b8b39e7
1 Parent(s): ffbc819

Training in progress, step 30000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1162f3dd69f25f8696965e77a13ee78f76a56faa207df54198db0aa2c1ff8d34
3
  size 18915040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb840687881657e048d395c385a9084b7bcca678b6abb14165c472542383ceb
3
  size 18915040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a392000d9293e83d945e935d13a886a99f359ba416db00324a34f83ac689a60
3
  size 37990394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df5f5ee2b56d6c0f93fb801be312fc1bd48ab0eacd78bb34294fb243c2b7397a
3
  size 37990394
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:052cb623d333c55cb6aa932d620c4240d6da273a0db3bf72bfdbcc2be7693707
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfd5efb5880d038e40ef818b6a478489100d8537842fb87344a0d7f88275ee0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c007ea3613e729374f49a992d59f46f0bb1762290dd86d4c3105289d354272e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d106c12a183d47d2349d6d228d20595c1cad95f8d19fec2a8622032de302f5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7267124980015406,
5
  "eval_steps": 500,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,13 +307,73 @@
307
  "learning_rate": 7.617450650796032e-05,
308
  "loss": 3.1865,
309
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 500,
313
  "max_steps": 172005,
314
  "num_train_epochs": 5,
315
  "save_steps": 5000,
316
- "total_flos": 8.566811394048e+17,
317
  "trial_name": null,
318
  "trial_params": null
319
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8720549976018488,
5
  "eval_steps": 500,
6
+ "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "learning_rate": 7.617450650796032e-05,
308
  "loss": 3.1865,
309
  "step": 25000
310
+ },
311
+ {
312
+ "epoch": 0.74,
313
+ "learning_rate": 7.601617768363678e-05,
314
+ "loss": 3.2224,
315
+ "step": 25500
316
+ },
317
+ {
318
+ "epoch": 0.76,
319
+ "learning_rate": 7.58548099572581e-05,
320
+ "loss": 3.1192,
321
+ "step": 26000
322
+ },
323
+ {
324
+ "epoch": 0.77,
325
+ "learning_rate": 7.569041694439229e-05,
326
+ "loss": 3.1802,
327
+ "step": 26500
328
+ },
329
+ {
330
+ "epoch": 0.78,
331
+ "learning_rate": 7.552301251586894e-05,
332
+ "loss": 3.1781,
333
+ "step": 27000
334
+ },
335
+ {
336
+ "epoch": 0.8,
337
+ "learning_rate": 7.5352610796609e-05,
338
+ "loss": 3.1921,
339
+ "step": 27500
340
+ },
341
+ {
342
+ "epoch": 0.81,
343
+ "learning_rate": 7.517922616443289e-05,
344
+ "loss": 3.1896,
345
+ "step": 28000
346
+ },
347
+ {
348
+ "epoch": 0.83,
349
+ "learning_rate": 7.500287324884736e-05,
350
+ "loss": 3.1911,
351
+ "step": 28500
352
+ },
353
+ {
354
+ "epoch": 0.84,
355
+ "learning_rate": 7.482356692981116e-05,
356
+ "loss": 3.1367,
357
+ "step": 29000
358
+ },
359
+ {
360
+ "epoch": 0.86,
361
+ "learning_rate": 7.464132233647945e-05,
362
+ "loss": 3.1416,
363
+ "step": 29500
364
+ },
365
+ {
366
+ "epoch": 0.87,
367
+ "learning_rate": 7.445615484592736e-05,
368
+ "loss": 3.1682,
369
+ "step": 30000
370
  }
371
  ],
372
  "logging_steps": 500,
373
  "max_steps": 172005,
374
  "num_train_epochs": 5,
375
  "save_steps": 5000,
376
+ "total_flos": 1.02801736728576e+18,
377
  "trial_name": null,
378
  "trial_params": null
379
  }