Alphatao commited on
Commit
86958e7
·
verified ·
1 Parent(s): cd22aac

Training in progress, step 2520, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79dbc21c20485148f3d5103ccb74438d521b3e24f1af3ff2eb11ab60a18d09c6
3
  size 50503544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd41ebaf0126bcbc9cb857ac5378df1eabfc3bdefaf0428fc58c9548b03fb716
3
  size 50503544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30cdb38be8ef4f72aaf8bd15a8abf5ecc17f7245cde262cdfc919b1aedf403fc
3
  size 25986468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1fc2cc37d953fa48873d103c237d567951d0ea49cb5dad104417190acaae8c
3
  size 25986468
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cad609a9755c89c6b502f894c0f517f77e1ef69f34e4a31f5b485de169798ec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f712a67228f3bcffa37c019c35e988ecd34b25e0960d7fa983f4ae67e4c1e5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b638b56b9dc8398196af3daa4ccf42a12d198b3dc3ab675be9fa49de3b5e01a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd166862c66a199e9571546b71d0678fd9a94525d7d5a7b2b2e955704972144
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.023890294134616852,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2500",
4
- "epoch": 1.3958682300390843,
5
  "eval_steps": 100,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17715,6 +17715,146 @@
17715
  "eval_samples_per_second": 27.795,
17716
  "eval_steps_per_second": 6.949,
17717
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17718
  }
17719
  ],
17720
  "logging_steps": 1,
@@ -17738,12 +17878,12 @@
17738
  "should_evaluate": false,
17739
  "should_log": false,
17740
  "should_save": true,
17741
- "should_training_stop": false
17742
  },
17743
  "attributes": {}
17744
  }
17745
  },
17746
- "total_flos": 1.02936877203456e+18,
17747
  "train_batch_size": 4,
17748
  "trial_name": null,
17749
  "trial_params": null
 
1
  {
2
  "best_metric": 0.023890294134616852,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2500",
4
+ "epoch": 1.4070351758793969,
5
  "eval_steps": 100,
6
+ "global_step": 2520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17715
  "eval_samples_per_second": 27.795,
17716
  "eval_steps_per_second": 6.949,
17717
  "step": 2500
17718
+ },
17719
+ {
17720
+ "epoch": 1.3964265773311,
17721
+ "grad_norm": 0.06104286387562752,
17722
+ "learning_rate": 2.8275418474443814e-08,
17723
+ "loss": 0.0183,
17724
+ "step": 2501
17725
+ },
17726
+ {
17727
+ "epoch": 1.3969849246231156,
17728
+ "grad_norm": 0.06953344494104385,
17729
+ "learning_rate": 2.537750647535475e-08,
17730
+ "loss": 0.0209,
17731
+ "step": 2502
17732
+ },
17733
+ {
17734
+ "epoch": 1.3975432719151313,
17735
+ "grad_norm": 0.08410750329494476,
17736
+ "learning_rate": 2.26362126006352e-08,
17737
+ "loss": 0.0195,
17738
+ "step": 2503
17739
+ },
17740
+ {
17741
+ "epoch": 1.398101619207147,
17742
+ "grad_norm": 0.07485437393188477,
17743
+ "learning_rate": 2.0051541144749943e-08,
17744
+ "loss": 0.0232,
17745
+ "step": 2504
17746
+ },
17747
+ {
17748
+ "epoch": 1.3986599664991624,
17749
+ "grad_norm": 0.06569929420948029,
17750
+ "learning_rate": 1.762349615677117e-08,
17751
+ "loss": 0.0212,
17752
+ "step": 2505
17753
+ },
17754
+ {
17755
+ "epoch": 1.3992183137911782,
17756
+ "grad_norm": 0.05091328173875809,
17757
+ "learning_rate": 1.5352081440422884e-08,
17758
+ "loss": 0.016,
17759
+ "step": 2506
17760
+ },
17761
+ {
17762
+ "epoch": 1.3997766610831937,
17763
+ "grad_norm": 0.045666009187698364,
17764
+ "learning_rate": 1.3237300554069798e-08,
17765
+ "loss": 0.0142,
17766
+ "step": 2507
17767
+ },
17768
+ {
17769
+ "epoch": 1.4003350083752093,
17770
+ "grad_norm": 0.05261168256402016,
17771
+ "learning_rate": 1.1279156810684034e-08,
17772
+ "loss": 0.0145,
17773
+ "step": 2508
17774
+ },
17775
+ {
17776
+ "epoch": 1.400893355667225,
17777
+ "grad_norm": 0.05364307388663292,
17778
+ "learning_rate": 9.477653277834009e-09,
17779
+ "loss": 0.0143,
17780
+ "step": 2509
17781
+ },
17782
+ {
17783
+ "epoch": 1.4014517029592406,
17784
+ "grad_norm": 0.07531214505434036,
17785
+ "learning_rate": 7.83279277773996e-09,
17786
+ "loss": 0.0189,
17787
+ "step": 2510
17788
+ },
17789
+ {
17790
+ "epoch": 1.4020100502512562,
17791
+ "grad_norm": 0.07607190310955048,
17792
+ "learning_rate": 6.344577887185121e-09,
17793
+ "loss": 0.0206,
17794
+ "step": 2511
17795
+ },
17796
+ {
17797
+ "epoch": 1.402568397543272,
17798
+ "grad_norm": 0.08191350847482681,
17799
+ "learning_rate": 5.01301093758233e-09,
17800
+ "loss": 0.0211,
17801
+ "step": 2512
17802
+ },
17803
+ {
17804
+ "epoch": 1.4031267448352875,
17805
+ "grad_norm": 0.07381971925497055,
17806
+ "learning_rate": 3.838094014940729e-09,
17807
+ "loss": 0.0184,
17808
+ "step": 2513
17809
+ },
17810
+ {
17811
+ "epoch": 1.4036850921273032,
17812
+ "grad_norm": 0.06222749873995781,
17813
+ "learning_rate": 2.8198289598435625e-09,
17814
+ "loss": 0.0184,
17815
+ "step": 2514
17816
+ },
17817
+ {
17818
+ "epoch": 1.4042434394193188,
17819
+ "grad_norm": 0.07456585019826889,
17820
+ "learning_rate": 1.958217367514781e-09,
17821
+ "loss": 0.0239,
17822
+ "step": 2515
17823
+ },
17824
+ {
17825
+ "epoch": 1.4048017867113345,
17826
+ "grad_norm": 0.07716374844312668,
17827
+ "learning_rate": 1.2532605877080273e-09,
17828
+ "loss": 0.0228,
17829
+ "step": 2516
17830
+ },
17831
+ {
17832
+ "epoch": 1.4053601340033501,
17833
+ "grad_norm": 0.08897180110216141,
17834
+ "learning_rate": 7.049597248065532e-10,
17835
+ "loss": 0.0247,
17836
+ "step": 2517
17837
+ },
17838
+ {
17839
+ "epoch": 1.4059184812953658,
17840
+ "grad_norm": 0.061453305184841156,
17841
+ "learning_rate": 3.13315637756606e-10,
17842
+ "loss": 0.0199,
17843
+ "step": 2518
17844
+ },
17845
+ {
17846
+ "epoch": 1.4064768285873814,
17847
+ "grad_norm": 0.07491685450077057,
17848
+ "learning_rate": 7.832894011183811e-11,
17849
+ "loss": 0.0209,
17850
+ "step": 2519
17851
+ },
17852
+ {
17853
+ "epoch": 1.4070351758793969,
17854
+ "grad_norm": 0.10247643291950226,
17855
+ "learning_rate": 0.0,
17856
+ "loss": 0.0198,
17857
+ "step": 2520
17858
  }
17859
  ],
17860
  "logging_steps": 1,
 
17878
  "should_evaluate": false,
17879
  "should_log": false,
17880
  "should_save": true,
17881
+ "should_training_stop": true
17882
  },
17883
  "attributes": {}
17884
  }
17885
  },
17886
+ "total_flos": 1.0376037222108365e+18,
17887
  "train_batch_size": 4,
17888
  "trial_name": null,
17889
  "trial_params": null