ben81828 commited on
Commit
3256785
·
verified ·
1 Parent(s): 022a7ff

Training in progress, step 3250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf91cb70edf4762f7eda2712d2bbbe2e8db267086af287aae6ab31428da6d543
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd2ef8b4ec22a6a44f487539bb38bf8a3cfbf6e4cda8dbbad18255c7b196a6b
3
  size 18516456
last-checkpoint/global_step3249/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:239c5f72e7fca7b1304d8a4c31d62020adbd183c86ac72f269fbd1590d050199
3
+ size 27700976
last-checkpoint/global_step3249/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707499cb541bde5ccd0e2d3dcaebca558ae0a1e6e96f9fde4fb8fab003e298fd
3
+ size 27700976
last-checkpoint/global_step3249/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b0ddf2cfb1aa13d296518d2f9bf7111853dd8ea3e978909b2b973f065667360
3
+ size 27700976
last-checkpoint/global_step3249/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4f2bcf01b5213ab6bc2bb0e2ed279c82a77dcc5ce2197e6e4e891a361a66fc
3
+ size 27700976
last-checkpoint/global_step3249/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802974d7bd7e6d8cf5121193e585f69c25cd6ef721c08f9970ec32bf943c6565
3
+ size 411571
last-checkpoint/global_step3249/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3156e0133b88013b5a3b37d156dc87b2a279a71326ba4e26f91af72251348a3a
3
+ size 411507
last-checkpoint/global_step3249/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8747465b7428b359cc78c7bfb11588184857000949ea2c257aac39f8711d174a
3
+ size 411507
last-checkpoint/global_step3249/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f93606d190c084485ab366f86527d0690c2836a92abfc67250c9bdf2a28ef839
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3199
 
1
+ global_step3249
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbe41a7cf0c0baf2cbd9c6a8c2d572d7807492b0cef50528f3655343ea1b64d5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d8a584abdea9bed1d2dc22d8c9034ba07ae85ad6668d6cd55747af816dcecc1
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a52ad24d6a9f3bd519fc26476a8d09236301af97836545a4d2ac6bddc629489e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7862a59bb6480df16b96e840172772c30d9d3037819d18063df33360e83b04a2
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67e13ea1a63841414e59ea8fa8ddcdbabf6d8e38c6583e14a24c83f676a1e986
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b509c445a6a6ed6d0215c0e232a66489dc20e39b5202d274561183fd43ef3e5
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c233218300ca098f4464c8e75ae36c65a8a1eaae6f304c77db92627a8b385448
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cf1a6a26872b9918c1086ace4fcda88a76d9b6eef501300fd67bf0cc8946d81
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4756a584dac90cc9bdfc6529aec3d745541ed8f41536120e43dc084a360376c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:513043e75e41275111fc2a7e55fb4c3a862256ab8b8062676e12ebb4154e419d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 1.6479526139582796,
5
  "eval_steps": 50,
6
- "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5703,11 +5703,100 @@
5703
  "eval_steps_per_second": 0.928,
5704
  "num_input_tokens_seen": 37421416,
5705
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5706
  }
5707
  ],
5708
  "logging_steps": 5,
5709
  "max_steps": 3400,
5710
- "num_input_tokens_seen": 37421416,
5711
  "num_train_epochs": 2,
5712
  "save_steps": 50,
5713
  "stateful_callbacks": {
@@ -5722,7 +5811,7 @@
5722
  "attributes": {}
5723
  }
5724
  },
5725
- "total_flos": 2101393008361472.0,
5726
  "train_batch_size": 1,
5727
  "trial_name": null,
5728
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.6737058975019314,
5
  "eval_steps": 50,
6
+ "global_step": 3250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5703
  "eval_steps_per_second": 0.928,
5704
  "num_input_tokens_seen": 37421416,
5705
  "step": 3200
5706
+ },
5707
+ {
5708
+ "epoch": 1.6505279423126449,
5709
+ "grad_norm": 12.027460444161642,
5710
+ "learning_rate": 8.966060675892951e-07,
5711
+ "loss": 0.2865,
5712
+ "num_input_tokens_seen": 37479848,
5713
+ "step": 3205
5714
+ },
5715
+ {
5716
+ "epoch": 1.65310327066701,
5717
+ "grad_norm": 6.851221931248735,
5718
+ "learning_rate": 8.513450158049108e-07,
5719
+ "loss": 0.3299,
5720
+ "num_input_tokens_seen": 37538312,
5721
+ "step": 3210
5722
+ },
5723
+ {
5724
+ "epoch": 1.6556785990213752,
5725
+ "grad_norm": 6.971651790450948,
5726
+ "learning_rate": 8.072463369597993e-07,
5727
+ "loss": 0.3218,
5728
+ "num_input_tokens_seen": 37596800,
5729
+ "step": 3215
5730
+ },
5731
+ {
5732
+ "epoch": 1.6582539273757404,
5733
+ "grad_norm": 10.994527310957624,
5734
+ "learning_rate": 7.643110739942172e-07,
5735
+ "loss": 0.2593,
5736
+ "num_input_tokens_seen": 37655312,
5737
+ "step": 3220
5738
+ },
5739
+ {
5740
+ "epoch": 1.6608292557301056,
5741
+ "grad_norm": 13.542379224085927,
5742
+ "learning_rate": 7.225402423334693e-07,
5743
+ "loss": 0.3072,
5744
+ "num_input_tokens_seen": 37713800,
5745
+ "step": 3225
5746
+ },
5747
+ {
5748
+ "epoch": 1.663404584084471,
5749
+ "grad_norm": 5.442561929450427,
5750
+ "learning_rate": 6.819348298638839e-07,
5751
+ "loss": 0.2276,
5752
+ "num_input_tokens_seen": 37772280,
5753
+ "step": 3230
5754
+ },
5755
+ {
5756
+ "epoch": 1.665979912438836,
5757
+ "grad_norm": 8.128386248398428,
5758
+ "learning_rate": 6.424957969094536e-07,
5759
+ "loss": 0.2489,
5760
+ "num_input_tokens_seen": 37830800,
5761
+ "step": 3235
5762
+ },
5763
+ {
5764
+ "epoch": 1.6685552407932012,
5765
+ "grad_norm": 3.9766881915113266,
5766
+ "learning_rate": 6.0422407620912e-07,
5767
+ "loss": 0.2552,
5768
+ "num_input_tokens_seen": 37889280,
5769
+ "step": 3240
5770
+ },
5771
+ {
5772
+ "epoch": 1.6711305691475662,
5773
+ "grad_norm": 5.555365927504982,
5774
+ "learning_rate": 5.671205728947305e-07,
5775
+ "loss": 0.226,
5776
+ "num_input_tokens_seen": 37947728,
5777
+ "step": 3245
5778
+ },
5779
+ {
5780
+ "epoch": 1.6737058975019314,
5781
+ "grad_norm": 5.733028191926084,
5782
+ "learning_rate": 5.311861644696048e-07,
5783
+ "loss": 0.2785,
5784
+ "num_input_tokens_seen": 38006200,
5785
+ "step": 3250
5786
+ },
5787
+ {
5788
+ "epoch": 1.6737058975019314,
5789
+ "eval_loss": 0.8640011548995972,
5790
+ "eval_runtime": 16.0965,
5791
+ "eval_samples_per_second": 3.728,
5792
+ "eval_steps_per_second": 0.932,
5793
+ "num_input_tokens_seen": 38006200,
5794
+ "step": 3250
5795
  }
5796
  ],
5797
  "logging_steps": 5,
5798
  "max_steps": 3400,
5799
+ "num_input_tokens_seen": 38006200,
5800
  "num_train_epochs": 2,
5801
  "save_steps": 50,
5802
  "stateful_callbacks": {
 
5811
  "attributes": {}
5812
  }
5813
  },
5814
+ "total_flos": 2134231948460032.0,
5815
  "train_batch_size": 1,
5816
  "trial_name": null,
5817
  "trial_params": null