diagonalge commited on
Commit
2f29494
1 Parent(s): edfb86a

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af267734d45ab8807778ad6c8034835e5516c44f08ff54b6b0b37983658ad08d
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432eec1fba0080d963fdc6ae82a834cb1e734041f7aff1118b89ad7b18c207db
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247139f56b8ec3e96f45512ce47e9bbbde14d0c26d612c653a70b2b450d5f3e0
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05128a102a188bf1805db3f87861a8ec32a704f4a163c27ce580f009fdd12730
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb86d30a39fedc74b03df6b4c27c7a36b5a0080347bc29b002aed513ce38fe7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5181f6310a2ee8021297af86cebbb89fed67729cda31b24d40e1d23f86a348a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49b8a1dbbf2c2a0b7fde326d57c34bd6c5e5d17e0aaf8b19016c1f721c049db1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb97b24f34ff3e53eec5be9cf35c1a7161c58dbc2fed7dda160fb3eb64e5f353
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1909307875894988,
5
  "eval_steps": 25,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -599,6 +599,76 @@
599
  "learning_rate": 2.339555568810221e-05,
600
  "loss": 0.6797,
601
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  }
603
  ],
604
  "logging_steps": 1,
@@ -618,7 +688,7 @@
618
  "attributes": {}
619
  }
620
  },
621
- "total_flos": 1.548356111302656e+16,
622
  "train_batch_size": 2,
623
  "trial_name": null,
624
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.21479713603818615,
5
  "eval_steps": 25,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
599
  "learning_rate": 2.339555568810221e-05,
600
  "loss": 0.6797,
601
  "step": 80
602
+ },
603
+ {
604
+ "epoch": 0.19331742243436753,
605
+ "grad_norm": 0.3806484341621399,
606
+ "learning_rate": 2.119892463932781e-05,
607
+ "loss": 0.6941,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 0.1957040572792363,
612
+ "grad_norm": 0.5402078032493591,
613
+ "learning_rate": 1.9098300562505266e-05,
614
+ "loss": 0.7054,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 0.19809069212410502,
619
+ "grad_norm": 0.5552183985710144,
620
+ "learning_rate": 1.7096242744495837e-05,
621
+ "loss": 0.6607,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 0.20047732696897375,
626
+ "grad_norm": 0.5727123022079468,
627
+ "learning_rate": 1.5195190384357404e-05,
628
+ "loss": 0.8021,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 0.20286396181384247,
633
+ "grad_norm": 0.45054370164871216,
634
+ "learning_rate": 1.339745962155613e-05,
635
+ "loss": 0.9153,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 0.2052505966587112,
640
+ "grad_norm": 0.43339234590530396,
641
+ "learning_rate": 1.1705240714107302e-05,
642
+ "loss": 0.675,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 0.20763723150357996,
647
+ "grad_norm": 0.5368764996528625,
648
+ "learning_rate": 1.0120595370083318e-05,
649
+ "loss": 0.6827,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 0.2100238663484487,
654
+ "grad_norm": 0.5663527846336365,
655
+ "learning_rate": 8.645454235739903e-06,
656
+ "loss": 0.9642,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 0.21241050119331742,
661
+ "grad_norm": 0.46338632702827454,
662
+ "learning_rate": 7.281614543321269e-06,
663
+ "loss": 0.9681,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 0.21479713603818615,
668
+ "grad_norm": 0.4531870186328888,
669
+ "learning_rate": 6.030737921409169e-06,
670
+ "loss": 0.8122,
671
+ "step": 90
672
  }
673
  ],
674
  "logging_steps": 1,
 
688
  "attributes": {}
689
  }
690
  },
691
+ "total_flos": 1.741900625215488e+16,
692
  "train_batch_size": 2,
693
  "trial_name": null,
694
  "trial_params": null