ben81828 commited on
Commit
b2ba723
·
verified ·
1 Parent(s): 2052fb4

Training in progress, step 2650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:273fcf9e0b0197d44caacef12013e816c21f5d50361370148cdbf9380973f3f1
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:827ad766e3a13984fdb182a9e1ff3663479a4a48e1ebea8bd0fa17625232d440
3
  size 18516456
last-checkpoint/global_step2649/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2b8b863f065477a0c40a5eede162578edbe6fd787a379d77ea32c01096bd2f0
3
+ size 27700976
last-checkpoint/global_step2649/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd9ed8c3f4189afca7b9b42f08145b379927ea4d344c52f2c32782785b9f665
3
+ size 27700976
last-checkpoint/global_step2649/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671300e3ae3ba0a81972b9ff040af49caa347ab4420c349f8ab1d9062a74d019
3
+ size 27700976
last-checkpoint/global_step2649/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b641c32a40965f349b0d85732aa748416355167ab1f70e4a8e58128db5bddc0
3
+ size 27700976
last-checkpoint/global_step2649/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2fba811b3d3680bbb95ae1d35a4a8ff72f871b681760e1f8c2a1331494a1d85
3
+ size 411571
last-checkpoint/global_step2649/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab0edc40941eb6c7b6893992d22c466389a0f4c35083eecf5e4b3ef79cd43ce5
3
+ size 411507
last-checkpoint/global_step2649/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:392c683814beae79e971d20e2dff1be4d32e72639e37b8144ae68462dee3fed6
3
+ size 411507
last-checkpoint/global_step2649/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ea766ce87c859064beb69b922a66ee40f6159fdc3d2a718f2ac2dd7b599ff6
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2599
 
1
+ global_step2649
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bf1e520c0ffd0912cdaf36292baf0834dd187b6292436dd343acbd7d39f37df
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e599331812a34463d102d64a4034a0b702a893f362f752003aa577fe71dcc1d
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:480a3937172137fc1b4a3886bc929d7dd5dd0507aebd6b20448e5d03ffe9f33f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed431e5e71393a0174ad2fd492755f8c1142596f1af3bfe7827c1f8f815dd80
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cc7d6afeed03ecf805019b6cd001604bbcd5e4ef0cbc38e4dc6f5587e48dbf2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e70789f26a9f56b6b779e87cb1a405615af81562a256e5afe579f40972e827
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c0ace59112a18d7f177314d1a9eb2b6b894f2028df1dd368b8c67c8732e18d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c8c18bc74d5211e761da269c814d7da0687633993838ec22e81ac939a14e91b
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51d19c5591b92170b594a14288df1f8fed02718a1409cb25856483f8dff91ef9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfcf85c736e61a53c653b2d1f3342ce104fa9dc3f2c57b7753ecb2c25635e267
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 1.338913211434458,
5
  "eval_steps": 50,
6
- "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4635,11 +4635,100 @@
4635
  "eval_steps_per_second": 0.923,
4636
  "num_input_tokens_seen": 30403640,
4637
  "step": 2600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4638
  }
4639
  ],
4640
  "logging_steps": 5,
4641
  "max_steps": 3400,
4642
- "num_input_tokens_seen": 30403640,
4643
  "num_train_epochs": 2,
4644
  "save_steps": 50,
4645
  "stateful_callbacks": {
@@ -4654,7 +4743,7 @@
4654
  "attributes": {}
4655
  }
4656
  },
4657
- "total_flos": 1707311142797312.0,
4658
  "train_batch_size": 1,
4659
  "trial_name": null,
4660
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.3646664949781098,
5
  "eval_steps": 50,
6
+ "global_step": 2650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4635
  "eval_steps_per_second": 0.923,
4636
  "num_input_tokens_seen": 30403640,
4637
  "step": 2600
4638
+ },
4639
+ {
4640
+ "epoch": 1.341488539788823,
4641
+ "grad_norm": 6.900087606750275,
4642
+ "learning_rate": 1.4217439014387251e-05,
4643
+ "loss": 0.3037,
4644
+ "num_input_tokens_seen": 30462128,
4645
+ "step": 2605
4646
+ },
4647
+ {
4648
+ "epoch": 1.3440638681431882,
4649
+ "grad_norm": 9.361737062462586,
4650
+ "learning_rate": 1.404802767838176e-05,
4651
+ "loss": 0.2905,
4652
+ "num_input_tokens_seen": 30520616,
4653
+ "step": 2610
4654
+ },
4655
+ {
4656
+ "epoch": 1.3466391964975535,
4657
+ "grad_norm": 11.101564672040755,
4658
+ "learning_rate": 1.3879466611824199e-05,
4659
+ "loss": 0.317,
4660
+ "num_input_tokens_seen": 30579024,
4661
+ "step": 2615
4662
+ },
4663
+ {
4664
+ "epoch": 1.3492145248519187,
4665
+ "grad_norm": 5.213355428878847,
4666
+ "learning_rate": 1.371175980120864e-05,
4667
+ "loss": 0.2794,
4668
+ "num_input_tokens_seen": 30637464,
4669
+ "step": 2620
4670
+ },
4671
+ {
4672
+ "epoch": 1.3517898532062838,
4673
+ "grad_norm": 4.8688198861459915,
4674
+ "learning_rate": 1.3544911212825906e-05,
4675
+ "loss": 0.3056,
4676
+ "num_input_tokens_seen": 30695936,
4677
+ "step": 2625
4678
+ },
4679
+ {
4680
+ "epoch": 1.354365181560649,
4681
+ "grad_norm": 9.002025840794365,
4682
+ "learning_rate": 1.337892479266974e-05,
4683
+ "loss": 0.2712,
4684
+ "num_input_tokens_seen": 30754408,
4685
+ "step": 2630
4686
+ },
4687
+ {
4688
+ "epoch": 1.356940509915014,
4689
+ "grad_norm": 4.793656741683869,
4690
+ "learning_rate": 1.3213804466343421e-05,
4691
+ "loss": 0.2615,
4692
+ "num_input_tokens_seen": 30812848,
4693
+ "step": 2635
4694
+ },
4695
+ {
4696
+ "epoch": 1.3595158382693793,
4697
+ "grad_norm": 5.128300113893045,
4698
+ "learning_rate": 1.3049554138967051e-05,
4699
+ "loss": 0.2661,
4700
+ "num_input_tokens_seen": 30871344,
4701
+ "step": 2640
4702
+ },
4703
+ {
4704
+ "epoch": 1.3620911666237445,
4705
+ "grad_norm": 6.038434247454305,
4706
+ "learning_rate": 1.2886177695085078e-05,
4707
+ "loss": 0.3272,
4708
+ "num_input_tokens_seen": 30929824,
4709
+ "step": 2645
4710
+ },
4711
+ {
4712
+ "epoch": 1.3646664949781098,
4713
+ "grad_norm": 5.501317116522042,
4714
+ "learning_rate": 1.2723678998574512e-05,
4715
+ "loss": 0.2962,
4716
+ "num_input_tokens_seen": 30988344,
4717
+ "step": 2650
4718
+ },
4719
+ {
4720
+ "epoch": 1.3646664949781098,
4721
+ "eval_loss": 0.7657458186149597,
4722
+ "eval_runtime": 16.0821,
4723
+ "eval_samples_per_second": 3.731,
4724
+ "eval_steps_per_second": 0.933,
4725
+ "num_input_tokens_seen": 30988344,
4726
+ "step": 2650
4727
  }
4728
  ],
4729
  "logging_steps": 5,
4730
  "max_steps": 3400,
4731
+ "num_input_tokens_seen": 30988344,
4732
  "num_train_epochs": 2,
4733
  "save_steps": 50,
4734
  "stateful_callbacks": {
 
4743
  "attributes": {}
4744
  }
4745
  },
4746
+ "total_flos": 1740143807954944.0,
4747
  "train_batch_size": 1,
4748
  "trial_name": null,
4749
  "trial_params": null