leixa commited on
Commit
8c9051c
·
verified ·
1 Parent(s): 353dba9

Training in progress, step 210, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a2a60feb1e96d790294c0b5eae9acc5fb705f4771269491fe8bf9cd16a835e5
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc6d404f7fd69fe907c7d54d9d2ba1b802a5e0867a87634f546ea28477a5f17
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cbcfaf89e28391d7c82167b07cd38f9662d61b8bf262f3ab44b55dd4fde9bc5
3
  size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ee2b6ae3076a559fcb80a8ff9862fd211c95d781fac1e407e8f1030ac38660
3
  size 102864548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98eb662dc5bc7d04a24c041329941502e3aed4d1fcb64bac57ca17d20f66e522
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a58fcd0d4e63bda996273295cf19629f45f15b4f31eb5cecaee6502ebb86992
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f516a6e4e8a8eba956b80cb2ea416b9fd98f0dec12d7d9d9a36274d0eef4a63
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd95a1db0e917ddf11b12a343f06e907fcec4b81104002e2471b4778587b465
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03521643433602348,
5
  "eval_steps": 42,
6
- "global_step": 168,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -439,6 +439,112 @@
439
  "eval_samples_per_second": 48.695,
440
  "eval_steps_per_second": 6.091,
441
  "step": 168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  }
443
  ],
444
  "logging_steps": 3,
@@ -458,7 +564,7 @@
458
  "attributes": {}
459
  }
460
  },
461
- "total_flos": 3.5836883921534976e+16,
462
  "train_batch_size": 8,
463
  "trial_name": null,
464
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04402054292002935,
5
  "eval_steps": 42,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
439
  "eval_samples_per_second": 48.695,
440
  "eval_steps_per_second": 6.091,
441
  "step": 168
442
+ },
443
+ {
444
+ "epoch": 0.03584529923488104,
445
+ "grad_norm": 0.3241373300552368,
446
+ "learning_rate": 7.564496387029532e-05,
447
+ "loss": 1.1243,
448
+ "step": 171
449
+ },
450
+ {
451
+ "epoch": 0.0364741641337386,
452
+ "grad_norm": 0.33574438095092773,
453
+ "learning_rate": 7.481469062821252e-05,
454
+ "loss": 1.1031,
455
+ "step": 174
456
+ },
457
+ {
458
+ "epoch": 0.037103029032596165,
459
+ "grad_norm": 0.38013267517089844,
460
+ "learning_rate": 7.39752373250527e-05,
461
+ "loss": 1.1191,
462
+ "step": 177
463
+ },
464
+ {
465
+ "epoch": 0.03773189393145373,
466
+ "grad_norm": 0.35769525170326233,
467
+ "learning_rate": 7.312691451204178e-05,
468
+ "loss": 1.0992,
469
+ "step": 180
470
+ },
471
+ {
472
+ "epoch": 0.03836075883031129,
473
+ "grad_norm": 0.32820338010787964,
474
+ "learning_rate": 7.227003602163295e-05,
475
+ "loss": 1.0621,
476
+ "step": 183
477
+ },
478
+ {
479
+ "epoch": 0.03898962372916885,
480
+ "grad_norm": 0.32962238788604736,
481
+ "learning_rate": 7.14049188514063e-05,
482
+ "loss": 1.0828,
483
+ "step": 186
484
+ },
485
+ {
486
+ "epoch": 0.03961848862802641,
487
+ "grad_norm": 0.34197545051574707,
488
+ "learning_rate": 7.05318830467969e-05,
489
+ "loss": 1.1168,
490
+ "step": 189
491
+ },
492
+ {
493
+ "epoch": 0.040247353526883974,
494
+ "grad_norm": 0.37021368741989136,
495
+ "learning_rate": 6.965125158269619e-05,
496
+ "loss": 1.1374,
497
+ "step": 192
498
+ },
499
+ {
500
+ "epoch": 0.04087621842574154,
501
+ "grad_norm": 0.3301369547843933,
502
+ "learning_rate": 6.876335024396872e-05,
503
+ "loss": 1.054,
504
+ "step": 195
505
+ },
506
+ {
507
+ "epoch": 0.041505083324599096,
508
+ "grad_norm": 0.35479190945625305,
509
+ "learning_rate": 6.786850750493006e-05,
510
+ "loss": 1.0923,
511
+ "step": 198
512
+ },
513
+ {
514
+ "epoch": 0.04213394822345666,
515
+ "grad_norm": 0.35211730003356934,
516
+ "learning_rate": 6.696705440782938e-05,
517
+ "loss": 1.1124,
518
+ "step": 201
519
+ },
520
+ {
521
+ "epoch": 0.042762813122314225,
522
+ "grad_norm": 0.4026525616645813,
523
+ "learning_rate": 6.605932444038229e-05,
524
+ "loss": 1.1229,
525
+ "step": 204
526
+ },
527
+ {
528
+ "epoch": 0.04339167802117178,
529
+ "grad_norm": 0.40407177805900574,
530
+ "learning_rate": 6.514565341239861e-05,
531
+ "loss": 1.066,
532
+ "step": 207
533
+ },
534
+ {
535
+ "epoch": 0.04402054292002935,
536
+ "grad_norm": 0.3534255027770996,
537
+ "learning_rate": 6.422637933155162e-05,
538
+ "loss": 1.0462,
539
+ "step": 210
540
+ },
541
+ {
542
+ "epoch": 0.04402054292002935,
543
+ "eval_loss": 1.0946003198623657,
544
+ "eval_runtime": 165.0602,
545
+ "eval_samples_per_second": 48.679,
546
+ "eval_steps_per_second": 6.089,
547
+ "step": 210
548
  }
549
  ],
550
  "logging_steps": 3,
 
564
  "attributes": {}
565
  }
566
  },
567
+ "total_flos": 4.479610490191872e+16,
568
  "train_batch_size": 8,
569
  "trial_name": null,
570
  "trial_params": null