ben81828 commited on
Commit
e0c219a
·
verified ·
1 Parent(s): 617078b

Training in progress, step 1950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a8e3aa0c4d9915ebbc3628a83afcf846f6259c463c3f47358f3fd82d6ae549
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d060453114c8a92294690d88c0be250011d18ae38a75a4c22a50983826ea4699
3
  size 18516456
last-checkpoint/global_step1949/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:546a92216c72a39eb99efb82fc516e32c365a8b47c5a16d1c7ede2ff3a5c8ebd
3
+ size 27700976
last-checkpoint/global_step1949/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10454cba4a71ff7a7f07c94ca6fc955485a69adfb150fb23cc1f4e9ecb5853cb
3
+ size 27700976
last-checkpoint/global_step1949/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb8e7dab67e60af7f03b74c0bff9c97dbae5d9d755056c3a83ed823431c8abe
3
+ size 27700976
last-checkpoint/global_step1949/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ab6baa177c7b02fad53332d6d2cb2dbb2ae790f310f190e0c36ae0890fc3344
3
+ size 27700976
last-checkpoint/global_step1949/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f9bd37f82947efc95f1000b293fa467734181ba4157eea186f29a31a7874c1
3
+ size 411571
last-checkpoint/global_step1949/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9050283d257c09c592616965f3dcd05bae03562a630e5f55ce8de4b4d0210c
3
+ size 411507
last-checkpoint/global_step1949/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd86a7d8a219c63d3f9ff2bd6c566da8bffa04efd4e9027356e9f53e81699fe
3
+ size 411507
last-checkpoint/global_step1949/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bebe5c8f10aa68dabcefd3cf8787e0fb4b4b8a5760e0ac5282ec7769035ff073
3
+ size 411507
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1900
 
1
+ global_step1949
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:585b8316bac42fce8431ad71be5542adc79fdd5ff1839401f8374f6ab8226086
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76c7a1c4ad326ceb79f3afbc6d47975b14a4cb17c9f8fb7483b37b11ee134aac
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc4c7cc343f6d985da07523b86ffd7b02d0b8ae40ff925936c48aff8a7385f39
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:364e129a29ed2420756ce71165221396b3418a310a60e2d96548d62cc7590232
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b364386e5b3618cb74f99527a8e31fa3c7d8d93018d2d0cf0b819ae4c3c1794b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89d078b36f7a96070fb2b399b9fe9bc0196d5110cb8255158e3354d836845a5
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed71fe0f2fd47c414d9fd305b847e65b2f95946543d7b867318fdc05f761e6bc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10672d33daa64ff34468d947c3c30b17fe906ae6c3d9ace60ba1c3e119c8efa4
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61ff63be94031aef4e0bc87499ed4f6f68ae7e907768f3e33f975dfe48df5202
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83645aef3795b979dc9a5c77e06d9eebfefa4998e2a5da58eb5de75d197fb29f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
- "epoch": 0.978624774658769,
5
  "eval_steps": 50,
6
- "global_step": 1900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3389,11 +3389,100 @@
3389
  "eval_steps_per_second": 0.938,
3390
  "num_input_tokens_seen": 22222632,
3391
  "step": 1900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3392
  }
3393
  ],
3394
  "logging_steps": 5,
3395
  "max_steps": 3400,
3396
- "num_input_tokens_seen": 22222632,
3397
  "num_train_epochs": 2,
3398
  "save_steps": 50,
3399
  "stateful_callbacks": {
@@ -3408,7 +3497,7 @@
3408
  "attributes": {}
3409
  }
3410
  },
3411
- "total_flos": 1247860954234880.0,
3412
  "train_batch_size": 1,
3413
  "trial_name": null,
3414
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6319106221199036,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4_frozenVision/lora/sft/checkpoint-1600",
4
+ "epoch": 1.0041205253669843,
5
  "eval_steps": 50,
6
+ "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3389
  "eval_steps_per_second": 0.938,
3390
  "num_input_tokens_seen": 22222632,
3391
  "step": 1900
3392
+ },
3393
+ {
3394
+ "epoch": 0.9812001030131342,
3395
+ "grad_norm": 6.997624273550619,
3396
+ "learning_rate": 4.41774648053257e-05,
3397
+ "loss": 0.4437,
3398
+ "num_input_tokens_seen": 22281080,
3399
+ "step": 1905
3400
+ },
3401
+ {
3402
+ "epoch": 0.9837754313674993,
3403
+ "grad_norm": 5.030616381143982,
3404
+ "learning_rate": 4.3936031610520124e-05,
3405
+ "loss": 0.465,
3406
+ "num_input_tokens_seen": 22339552,
3407
+ "step": 1910
3408
+ },
3409
+ {
3410
+ "epoch": 0.9863507597218646,
3411
+ "grad_norm": 5.025845260709186,
3412
+ "learning_rate": 4.3694741829461336e-05,
3413
+ "loss": 0.4975,
3414
+ "num_input_tokens_seen": 22398056,
3415
+ "step": 1915
3416
+ },
3417
+ {
3418
+ "epoch": 0.9889260880762297,
3419
+ "grad_norm": 6.43843242330618,
3420
+ "learning_rate": 4.345360116868823e-05,
3421
+ "loss": 0.4504,
3422
+ "num_input_tokens_seen": 22456520,
3423
+ "step": 1920
3424
+ },
3425
+ {
3426
+ "epoch": 0.9915014164305949,
3427
+ "grad_norm": 5.281203851622467,
3428
+ "learning_rate": 4.321261533121303e-05,
3429
+ "loss": 0.4528,
3430
+ "num_input_tokens_seen": 22515024,
3431
+ "step": 1925
3432
+ },
3433
+ {
3434
+ "epoch": 0.9940767447849601,
3435
+ "grad_norm": 6.158304256456398,
3436
+ "learning_rate": 4.2971790016386286e-05,
3437
+ "loss": 0.441,
3438
+ "num_input_tokens_seen": 22573480,
3439
+ "step": 1930
3440
+ },
3441
+ {
3442
+ "epoch": 0.9966520731393252,
3443
+ "grad_norm": 3.898263595049965,
3444
+ "learning_rate": 4.273113091976225e-05,
3445
+ "loss": 0.4678,
3446
+ "num_input_tokens_seen": 22631960,
3447
+ "step": 1935
3448
+ },
3449
+ {
3450
+ "epoch": 0.9992274014936905,
3451
+ "grad_norm": 6.266433889699235,
3452
+ "learning_rate": 4.249064373296403e-05,
3453
+ "loss": 0.4352,
3454
+ "num_input_tokens_seen": 22690432,
3455
+ "step": 1940
3456
+ },
3457
+ {
3458
+ "epoch": 1.001545197012619,
3459
+ "grad_norm": 2.4601530377865695,
3460
+ "learning_rate": 4.225033414354908e-05,
3461
+ "loss": 0.3792,
3462
+ "num_input_tokens_seen": 22743048,
3463
+ "step": 1945
3464
+ },
3465
+ {
3466
+ "epoch": 1.0041205253669843,
3467
+ "grad_norm": 4.761740260797231,
3468
+ "learning_rate": 4.201020783487464e-05,
3469
+ "loss": 0.3783,
3470
+ "num_input_tokens_seen": 22801512,
3471
+ "step": 1950
3472
+ },
3473
+ {
3474
+ "epoch": 1.0041205253669843,
3475
+ "eval_loss": 0.6983156204223633,
3476
+ "eval_runtime": 16.3172,
3477
+ "eval_samples_per_second": 3.677,
3478
+ "eval_steps_per_second": 0.919,
3479
+ "num_input_tokens_seen": 22801512,
3480
+ "step": 1950
3481
  }
3482
  ],
3483
  "logging_steps": 5,
3484
  "max_steps": 3400,
3485
+ "num_input_tokens_seen": 22801512,
3486
  "num_train_epochs": 2,
3487
  "save_steps": 50,
3488
  "stateful_callbacks": {
 
3497
  "attributes": {}
3498
  }
3499
  },
3500
+ "total_flos": 1280406731358208.0,
3501
  "train_batch_size": 1,
3502
  "trial_name": null,
3503
  "trial_params": null