besimray commited on
Commit
e3db09e
·
verified ·
1 Parent(s): cfcb77b

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91a3b29856143aecc3a1fd76b949ca0dd86fb486460a8825bb04011135aea6da
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e54408fb2a33f5c91595c277d43af9ca7442d78555f6734133f909076eb5ee0b
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efdd3a8541cf52ef664e6919d0834665f7aee03a1cc90078eecd7f50255b8cf0
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3fcc665ceb8166c03b90dbe8b812cc65aef67e5c4040511438f0bcec2d036b
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63c9cc0f3c04b2c00adeda1f5d60d230e61e99e3404c96ea1e859f8d4e96fa90
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e023d57ffc9febc8fefa58b1faee1161f6172e0c816bc8ad1dc30d22145a166
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c84ec0ff3c8c6aa13b25568668096db118f67ce80a9fa015a625446606f15d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.602448582649231,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
- "epoch": 0.025414754677020826,
5
  "eval_steps": 10,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -717,6 +717,84 @@
717
  "eval_samples_per_second": 5.584,
718
  "eval_steps_per_second": 5.584,
719
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
  }
721
  ],
722
  "logging_steps": 1,
@@ -731,7 +809,7 @@
731
  "early_stopping_threshold": 0.0
732
  },
733
  "attributes": {
734
- "early_stopping_patience_counter": 1
735
  }
736
  },
737
  "TrainerControl": {
@@ -745,7 +823,7 @@
745
  "attributes": {}
746
  }
747
  },
748
- "total_flos": 8809232266690560.0,
749
  "train_batch_size": 1,
750
  "trial_name": null,
751
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6011497974395752,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.028238616307800918,
5
  "eval_steps": 10,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
717
  "eval_samples_per_second": 5.584,
718
  "eval_steps_per_second": 5.584,
719
  "step": 90
720
+ },
721
+ {
722
+ "epoch": 0.025697140840098835,
723
+ "grad_norm": 1.4146398305892944,
724
+ "learning_rate": 0.00018681546242521786,
725
+ "loss": 1.1993,
726
+ "step": 91
727
+ },
728
+ {
729
+ "epoch": 0.025979527003176843,
730
+ "grad_norm": 2.2005670070648193,
731
+ "learning_rate": 0.00018649548579446936,
732
+ "loss": 1.9517,
733
+ "step": 92
734
+ },
735
+ {
736
+ "epoch": 0.026261913166254855,
737
+ "grad_norm": 1.241758108139038,
738
+ "learning_rate": 0.0001861719536730795,
739
+ "loss": 2.0892,
740
+ "step": 93
741
+ },
742
+ {
743
+ "epoch": 0.026544299329332863,
744
+ "grad_norm": 1.4617339372634888,
745
+ "learning_rate": 0.00018584487936018661,
746
+ "loss": 2.2815,
747
+ "step": 94
748
+ },
749
+ {
750
+ "epoch": 0.026826685492410872,
751
+ "grad_norm": 1.677581548690796,
752
+ "learning_rate": 0.00018551427630053463,
753
+ "loss": 1.75,
754
+ "step": 95
755
+ },
756
+ {
757
+ "epoch": 0.02710907165548888,
758
+ "grad_norm": 3.2750422954559326,
759
+ "learning_rate": 0.00018518015808392045,
760
+ "loss": 1.8473,
761
+ "step": 96
762
+ },
763
+ {
764
+ "epoch": 0.02739145781856689,
765
+ "grad_norm": 1.7410293817520142,
766
+ "learning_rate": 0.00018484253844463526,
767
+ "loss": 1.2498,
768
+ "step": 97
769
+ },
770
+ {
771
+ "epoch": 0.0276738439816449,
772
+ "grad_norm": 1.0431251525878906,
773
+ "learning_rate": 0.00018450143126090015,
774
+ "loss": 2.3196,
775
+ "step": 98
776
+ },
777
+ {
778
+ "epoch": 0.02795623014472291,
779
+ "grad_norm": 2.758586883544922,
780
+ "learning_rate": 0.00018415685055429533,
781
+ "loss": 1.9701,
782
+ "step": 99
783
+ },
784
+ {
785
+ "epoch": 0.028238616307800918,
786
+ "grad_norm": 1.7685903310775757,
787
+ "learning_rate": 0.00018380881048918405,
788
+ "loss": 1.4758,
789
+ "step": 100
790
+ },
791
+ {
792
+ "epoch": 0.028238616307800918,
793
+ "eval_loss": 1.6011497974395752,
794
+ "eval_runtime": 133.8006,
795
+ "eval_samples_per_second": 5.575,
796
+ "eval_steps_per_second": 5.575,
797
+ "step": 100
798
  }
799
  ],
800
  "logging_steps": 1,
 
809
  "early_stopping_threshold": 0.0
810
  },
811
  "attributes": {
812
+ "early_stopping_patience_counter": 0
813
  }
814
  },
815
  "TrainerControl": {
 
823
  "attributes": {}
824
  }
825
  },
826
+ "total_flos": 9788035851878400.0,
827
  "train_batch_size": 1,
828
  "trial_name": null,
829
  "trial_params": null