Training in progress, step 2800, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2799/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2799/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2799/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2799/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2799/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2799/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2799/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2799/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18516456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d8105e08b44734413954313ee3f59582f2323ad431f91a8d42d31e834f41c4c
|
3 |
size 18516456
|
last-checkpoint/global_step2799/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed61ae05c7e207208a57e99e9c7b23ccf0adef6c711c3daf1878f7f7baeb82d4
|
3 |
+
size 27700976
|
last-checkpoint/global_step2799/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d77f154dbfad53dd9c46f2c8758ac2dc27a1b017f9406698b5b26e99636df7d
|
3 |
+
size 27700976
|
last-checkpoint/global_step2799/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f77ae3efa9f4d34bc4fddefb730de1851905c8f19a59dd89433a3198a01db13
|
3 |
+
size 27700976
|
last-checkpoint/global_step2799/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de1f156dd1c74f4abed59f4cb92efe3d6c7bb3c87f2005b1a6d359c059e54d10
|
3 |
+
size 27700976
|
last-checkpoint/global_step2799/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0c0be6cc674bb639b4ec7baf63317da27aa182e28279625298e4fb57664d53a
|
3 |
+
size 411571
|
last-checkpoint/global_step2799/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69dd93b13a7c83a007ed01f8158aa0d3c23abff7686d47b321a7585318ae6439
|
3 |
+
size 411507
|
last-checkpoint/global_step2799/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9abca242f770cca89ea6a43c472921b374458558880f50808f2e470bff246a17
|
3 |
+
size 411507
|
last-checkpoint/global_step2799/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f52198786ecf09c894a82afe94ca79b2505b075200315fa9dc59e7cb879e72db
|
3 |
+
size 411507
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step2799
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff8dba2341c0517760edfde50521977f02a5bd982ffd3bc03de6109439c4f478
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2bf831df9fbade9ac2a8db79798bc2a7b1afb85a78a6e463ec7a7db4acc0f8e
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8602ff0a0fa366d46b61c0ef2b23ce468387898cf2bc1027e5450de73ddf647f
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bb51d675cf23603b1b765cd645f53d6b66ddb104d56d48674e9c798e086f696
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61a7c605cf80a46d2e0c661d5469c16671b681f268e3ecd5d1d64188653910db
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.6319106221199036,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4902,11 +4902,100 @@
|
|
4902 |
"eval_steps_per_second": 0.929,
|
4903 |
"num_input_tokens_seen": 32158144,
|
4904 |
"step": 2750
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4905 |
}
|
4906 |
],
|
4907 |
"logging_steps": 5,
|
4908 |
"max_steps": 3400,
|
4909 |
-
"num_input_tokens_seen":
|
4910 |
"num_train_epochs": 2,
|
4911 |
"save_steps": 50,
|
4912 |
"stateful_callbacks": {
|
@@ -4921,7 +5010,7 @@
|
|
4921 |
"attributes": {}
|
4922 |
}
|
4923 |
},
|
4924 |
-
"total_flos":
|
4925 |
"train_batch_size": 1,
|
4926 |
"trial_name": null,
|
4927 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.6319106221199036,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
|
4 |
+
"epoch": 1.4419263456090652,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 2800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4902 |
"eval_steps_per_second": 0.929,
|
4903 |
"num_input_tokens_seen": 32158144,
|
4904 |
"step": 2750
|
4905 |
+
},
|
4906 |
+
{
|
4907 |
+
"epoch": 1.4187483904197786,
|
4908 |
+
"grad_norm": 6.584628814510667,
|
4909 |
+
"learning_rate": 9.520584561109864e-06,
|
4910 |
+
"loss": 0.3333,
|
4911 |
+
"num_input_tokens_seen": 32216656,
|
4912 |
+
"step": 2755
|
4913 |
+
},
|
4914 |
+
{
|
4915 |
+
"epoch": 1.4213237187741437,
|
4916 |
+
"grad_norm": 7.509676086247465,
|
4917 |
+
"learning_rate": 9.378330907169386e-06,
|
4918 |
+
"loss": 0.2993,
|
4919 |
+
"num_input_tokens_seen": 32275168,
|
4920 |
+
"step": 2760
|
4921 |
+
},
|
4922 |
+
{
|
4923 |
+
"epoch": 1.423899047128509,
|
4924 |
+
"grad_norm": 5.1775193353141535,
|
4925 |
+
"learning_rate": 9.237037961694223e-06,
|
4926 |
+
"loss": 0.2683,
|
4927 |
+
"num_input_tokens_seen": 32333664,
|
4928 |
+
"step": 2765
|
4929 |
+
},
|
4930 |
+
{
|
4931 |
+
"epoch": 1.4264743754828741,
|
4932 |
+
"grad_norm": 7.856433365965151,
|
4933 |
+
"learning_rate": 9.096709066283354e-06,
|
4934 |
+
"loss": 0.3145,
|
4935 |
+
"num_input_tokens_seen": 32392088,
|
4936 |
+
"step": 2770
|
4937 |
+
},
|
4938 |
+
{
|
4939 |
+
"epoch": 1.4290497038372392,
|
4940 |
+
"grad_norm": 8.252870521534577,
|
4941 |
+
"learning_rate": 8.957347539735872e-06,
|
4942 |
+
"loss": 0.3092,
|
4943 |
+
"num_input_tokens_seen": 32450584,
|
4944 |
+
"step": 2775
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 1.4316250321916044,
|
4948 |
+
"grad_norm": 9.74883489294415,
|
4949 |
+
"learning_rate": 8.818956677972406e-06,
|
4950 |
+
"loss": 0.2993,
|
4951 |
+
"num_input_tokens_seen": 32509096,
|
4952 |
+
"step": 2780
|
4953 |
+
},
|
4954 |
+
{
|
4955 |
+
"epoch": 1.4342003605459697,
|
4956 |
+
"grad_norm": 4.008158818829899,
|
4957 |
+
"learning_rate": 8.681539753957269e-06,
|
4958 |
+
"loss": 0.326,
|
4959 |
+
"num_input_tokens_seen": 32567560,
|
4960 |
+
"step": 2785
|
4961 |
+
},
|
4962 |
+
{
|
4963 |
+
"epoch": 1.436775688900335,
|
4964 |
+
"grad_norm": 3.4229494980881174,
|
4965 |
+
"learning_rate": 8.545100017620988e-06,
|
4966 |
+
"loss": 0.2494,
|
4967 |
+
"num_input_tokens_seen": 32626056,
|
4968 |
+
"step": 2790
|
4969 |
+
},
|
4970 |
+
{
|
4971 |
+
"epoch": 1.4393510172547,
|
4972 |
+
"grad_norm": 4.425295787830864,
|
4973 |
+
"learning_rate": 8.409640695783443e-06,
|
4974 |
+
"loss": 0.2691,
|
4975 |
+
"num_input_tokens_seen": 32684520,
|
4976 |
+
"step": 2795
|
4977 |
+
},
|
4978 |
+
{
|
4979 |
+
"epoch": 1.4419263456090652,
|
4980 |
+
"grad_norm": 5.132559476583136,
|
4981 |
+
"learning_rate": 8.275164992077556e-06,
|
4982 |
+
"loss": 0.2939,
|
4983 |
+
"num_input_tokens_seen": 32743032,
|
4984 |
+
"step": 2800
|
4985 |
+
},
|
4986 |
+
{
|
4987 |
+
"epoch": 1.4419263456090652,
|
4988 |
+
"eval_loss": 0.791334331035614,
|
4989 |
+
"eval_runtime": 16.1142,
|
4990 |
+
"eval_samples_per_second": 3.723,
|
4991 |
+
"eval_steps_per_second": 0.931,
|
4992 |
+
"num_input_tokens_seen": 32743032,
|
4993 |
+
"step": 2800
|
4994 |
}
|
4995 |
],
|
4996 |
"logging_steps": 5,
|
4997 |
"max_steps": 3400,
|
4998 |
+
"num_input_tokens_seen": 32743032,
|
4999 |
"num_train_epochs": 2,
|
5000 |
"save_steps": 50,
|
5001 |
"stateful_callbacks": {
|
|
|
5010 |
"attributes": {}
|
5011 |
}
|
5012 |
},
|
5013 |
+
"total_flos": 1838675721715712.0,
|
5014 |
"train_batch_size": 1,
|
5015 |
"trial_name": null,
|
5016 |
"trial_params": null
|