ben81828 commited on
Commit
b638d19
·
verified ·
1 Parent(s): 31ad751

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6d99739705e6763281ee09273da0b205242e8f372f5506b947c515878799f9
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:637d017e8d4d94ab07a703145f2af78722084a2aaf476d5f6576729ec37c71f4
3
  size 29034840
last-checkpoint/global_step2250/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2292ed571eafd83f38d033cab54f76cba090db77842fe87c2c26e30d37e86f4
3
+ size 43429616
last-checkpoint/global_step2250/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25330de14a2a7a8c32c9e128ee1666207ef374a8fae1f7543ac205757c4b9033
3
+ size 43429616
last-checkpoint/global_step2250/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d84a2cf4489ad6e1e259923528baa41542f5de61a667d47f2b3306f6dad6c821
3
+ size 43429616
last-checkpoint/global_step2250/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0a6a4f2b365bf38afd016141f04a6496ed68d5ceafc6d208c94a9654a28e613
3
+ size 43429616
last-checkpoint/global_step2250/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f99379505febcadc237a59cecf2552528eb390c3978101368a8ac5b8853c9b5
3
+ size 637299
last-checkpoint/global_step2250/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d49f0d970a2b9df29ffe9f492743d2307bf9fe6bd2e38143249cdb7f5f0e293
3
+ size 637171
last-checkpoint/global_step2250/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7c8b014ca30d5b1dc72587375b9f455cff11745e3176748b2272acb7119967e
3
+ size 637171
last-checkpoint/global_step2250/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9bfd1dc4266f394a09f9aad5bd7f5df2494ff6223d028f74d91a136e6fce80
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2200
 
1
+ global_step2250
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c2f72d01585273766959f0cc9805fab753b53f20e581399855a293176ace988
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3187a61ccc2722c440dc24ae4a6eefe6b9e5daccf9e92473bbb4483c7751ea77
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fd1ecda2bb159be37a2a23800e098324f5b0334e7189df47c343ca6cb7605a2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f2a0df922fb3337cf2562745ebe8d5adf433ca45cb4e3da33a21b48183c000
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf71c84ea2995fbc545b918d03f7f94c92293ca2e33343f177e6fd04531b7b19
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d84b5276f687f44c9af60b1e41cd7b93a6d1659e36831a7bc021b5635d663b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c53116f0f4c80841c24cd681d5fbd5a5992b259583a4cfb493f8f3e4544d82
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d69159433c88b97106cf21b92eb5a3f66f0c826aa268d82a47b3faed1ac86cd
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01d30aec100967976eb875a41f6c605190fbb2f410e1523b990ce51daf9cd1a5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f09364fdb885abeed72e22e296309c763b98e8380e640753aee0cb72764dc9df
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.25809118151664734,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-2150",
4
- "epoch": 0.6499261447562777,
5
  "eval_steps": 50,
6
- "global_step": 2200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3923,11 +3923,100 @@
3923
  "eval_steps_per_second": 0.782,
3924
  "num_input_tokens_seen": 22849552,
3925
  "step": 2200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3926
  }
3927
  ],
3928
  "logging_steps": 5,
3929
  "max_steps": 6770,
3930
- "num_input_tokens_seen": 22849552,
3931
  "num_train_epochs": 2,
3932
  "save_steps": 50,
3933
  "stateful_callbacks": {
@@ -3942,7 +4031,7 @@
3942
  "attributes": {}
3943
  }
3944
  },
3945
- "total_flos": 1507548125986816.0,
3946
  "train_batch_size": 1,
3947
  "trial_name": null,
3948
  "trial_params": null
 
1
  {
2
  "best_metric": 0.25809118151664734,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale6/lora/sft/checkpoint-2150",
4
+ "epoch": 0.6646971935007385,
5
  "eval_steps": 50,
6
+ "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3923
  "eval_steps_per_second": 0.782,
3924
  "num_input_tokens_seen": 22849552,
3925
  "step": 2200
3926
+ },
3927
+ {
3928
+ "epoch": 0.6514032496307238,
3929
+ "grad_norm": 1.9654706291736397,
3930
+ "learning_rate": 8.062585611681758e-05,
3931
+ "loss": 0.3016,
3932
+ "num_input_tokens_seen": 22900184,
3933
+ "step": 2205
3934
+ },
3935
+ {
3936
+ "epoch": 0.6528803545051699,
3937
+ "grad_norm": 4.7148538513910285,
3938
+ "learning_rate": 8.052922874540722e-05,
3939
+ "loss": 0.2661,
3940
+ "num_input_tokens_seen": 22951816,
3941
+ "step": 2210
3942
+ },
3943
+ {
3944
+ "epoch": 0.654357459379616,
3945
+ "grad_norm": 15.154253210273424,
3946
+ "learning_rate": 8.043241923692436e-05,
3947
+ "loss": 0.2698,
3948
+ "num_input_tokens_seen": 23003952,
3949
+ "step": 2215
3950
+ },
3951
+ {
3952
+ "epoch": 0.6558345642540621,
3953
+ "grad_norm": 1.544288424619005,
3954
+ "learning_rate": 8.03354281689335e-05,
3955
+ "loss": 0.2683,
3956
+ "num_input_tokens_seen": 23056272,
3957
+ "step": 2220
3958
+ },
3959
+ {
3960
+ "epoch": 0.6573116691285081,
3961
+ "grad_norm": 1.7746096025671347,
3962
+ "learning_rate": 8.023825612008242e-05,
3963
+ "loss": 0.3237,
3964
+ "num_input_tokens_seen": 23106472,
3965
+ "step": 2225
3966
+ },
3967
+ {
3968
+ "epoch": 0.6587887740029542,
3969
+ "grad_norm": 8.974887056755184,
3970
+ "learning_rate": 8.014090367009859e-05,
3971
+ "loss": 0.2906,
3972
+ "num_input_tokens_seen": 23158064,
3973
+ "step": 2230
3974
+ },
3975
+ {
3976
+ "epoch": 0.6602658788774003,
3977
+ "grad_norm": 0.9042498724932153,
3978
+ "learning_rate": 8.004337139978574e-05,
3979
+ "loss": 0.2067,
3980
+ "num_input_tokens_seen": 23210584,
3981
+ "step": 2235
3982
+ },
3983
+ {
3984
+ "epoch": 0.6617429837518464,
3985
+ "grad_norm": 1.7463381173392456,
3986
+ "learning_rate": 7.994565989102042e-05,
3987
+ "loss": 0.2648,
3988
+ "num_input_tokens_seen": 23262864,
3989
+ "step": 2240
3990
+ },
3991
+ {
3992
+ "epoch": 0.6632200886262924,
3993
+ "grad_norm": 2.846956174007256,
3994
+ "learning_rate": 7.98477697267485e-05,
3995
+ "loss": 0.2496,
3996
+ "num_input_tokens_seen": 23314568,
3997
+ "step": 2245
3998
+ },
3999
+ {
4000
+ "epoch": 0.6646971935007385,
4001
+ "grad_norm": 4.665885044883532,
4002
+ "learning_rate": 7.974970149098174e-05,
4003
+ "loss": 0.2332,
4004
+ "num_input_tokens_seen": 23366784,
4005
+ "step": 2250
4006
+ },
4007
+ {
4008
+ "epoch": 0.6646971935007385,
4009
+ "eval_loss": 0.4966147541999817,
4010
+ "eval_runtime": 19.1792,
4011
+ "eval_samples_per_second": 3.128,
4012
+ "eval_steps_per_second": 0.782,
4013
+ "num_input_tokens_seen": 23366784,
4014
+ "step": 2250
4015
  }
4016
  ],
4017
  "logging_steps": 5,
4018
  "max_steps": 6770,
4019
+ "num_input_tokens_seen": 23366784,
4020
  "num_train_epochs": 2,
4021
  "save_steps": 50,
4022
  "stateful_callbacks": {
 
4031
  "attributes": {}
4032
  }
4033
  },
4034
+ "total_flos": 1541699557392384.0,
4035
  "train_batch_size": 1,
4036
  "trial_name": null,
4037
  "trial_params": null