PEFT
Safetensors
English
German
vidore
multimodal_embedding
colqwen2-2b-v1.0 / checkpoint-1516 /trainer_state.json
tattrongvu's picture
Upload 57 files
7e6afe8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 50,
"global_step": 1516,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002638522427440633,
"eval_loss": 0.3697243332862854,
"eval_runtime": 31.4109,
"eval_samples_per_second": 63.672,
"eval_steps_per_second": 0.255,
"step": 1
},
{
"epoch": 0.052770448548812667,
"grad_norm": 0.26953125,
"learning_rate": 0.00010526315789473685,
"loss": 0.3823,
"step": 20
},
{
"epoch": 0.10554089709762533,
"grad_norm": 0.201171875,
"learning_rate": 0.0001997845988152935,
"loss": 0.2239,
"step": 40
},
{
"epoch": 0.13192612137203166,
"eval_loss": 0.11808302253484726,
"eval_runtime": 29.4538,
"eval_samples_per_second": 67.903,
"eval_steps_per_second": 0.272,
"step": 50
},
{
"epoch": 0.158311345646438,
"grad_norm": 0.1962890625,
"learning_rate": 0.00019763058696822833,
"loss": 0.1799,
"step": 60
},
{
"epoch": 0.21108179419525067,
"grad_norm": 0.1943359375,
"learning_rate": 0.0001954765751211632,
"loss": 0.1651,
"step": 80
},
{
"epoch": 0.2638522427440633,
"grad_norm": 0.2255859375,
"learning_rate": 0.00019332256327409802,
"loss": 0.1571,
"step": 100
},
{
"epoch": 0.2638522427440633,
"eval_loss": 0.09250890463590622,
"eval_runtime": 28.2273,
"eval_samples_per_second": 70.853,
"eval_steps_per_second": 0.283,
"step": 100
},
{
"epoch": 0.316622691292876,
"grad_norm": 0.2333984375,
"learning_rate": 0.00019116855142703286,
"loss": 0.1535,
"step": 120
},
{
"epoch": 0.36939313984168864,
"grad_norm": 0.1611328125,
"learning_rate": 0.00018901453957996772,
"loss": 0.1456,
"step": 140
},
{
"epoch": 0.39577836411609496,
"eval_loss": 0.08707328885793686,
"eval_runtime": 27.6259,
"eval_samples_per_second": 72.396,
"eval_steps_per_second": 0.29,
"step": 150
},
{
"epoch": 0.42216358839050133,
"grad_norm": 0.1884765625,
"learning_rate": 0.00018686052773290255,
"loss": 0.1402,
"step": 160
},
{
"epoch": 0.47493403693931396,
"grad_norm": 0.2109375,
"learning_rate": 0.0001847065158858374,
"loss": 0.142,
"step": 180
},
{
"epoch": 0.5277044854881267,
"grad_norm": 0.1533203125,
"learning_rate": 0.00018255250403877222,
"loss": 0.1318,
"step": 200
},
{
"epoch": 0.5277044854881267,
"eval_loss": 0.080934077501297,
"eval_runtime": 27.3743,
"eval_samples_per_second": 73.061,
"eval_steps_per_second": 0.292,
"step": 200
},
{
"epoch": 0.5804749340369393,
"grad_norm": 0.216796875,
"learning_rate": 0.00018039849219170706,
"loss": 0.1301,
"step": 220
},
{
"epoch": 0.633245382585752,
"grad_norm": 0.162109375,
"learning_rate": 0.0001782444803446419,
"loss": 0.1317,
"step": 240
},
{
"epoch": 0.6596306068601583,
"eval_loss": 0.0750429555773735,
"eval_runtime": 27.7505,
"eval_samples_per_second": 72.071,
"eval_steps_per_second": 0.288,
"step": 250
},
{
"epoch": 0.6860158311345647,
"grad_norm": 0.185546875,
"learning_rate": 0.00017609046849757676,
"loss": 0.1269,
"step": 260
},
{
"epoch": 0.7387862796833773,
"grad_norm": 0.203125,
"learning_rate": 0.0001739364566505116,
"loss": 0.1267,
"step": 280
},
{
"epoch": 0.7915567282321899,
"grad_norm": 0.1455078125,
"learning_rate": 0.00017178244480344642,
"loss": 0.1226,
"step": 300
},
{
"epoch": 0.7915567282321899,
"eval_loss": 0.07792137563228607,
"eval_runtime": 27.3248,
"eval_samples_per_second": 73.194,
"eval_steps_per_second": 0.293,
"step": 300
},
{
"epoch": 0.8443271767810027,
"grad_norm": 0.1630859375,
"learning_rate": 0.00016962843295638126,
"loss": 0.1222,
"step": 320
},
{
"epoch": 0.8970976253298153,
"grad_norm": 0.173828125,
"learning_rate": 0.0001674744211093161,
"loss": 0.1254,
"step": 340
},
{
"epoch": 0.9234828496042217,
"eval_loss": 0.07484881579875946,
"eval_runtime": 27.8135,
"eval_samples_per_second": 71.907,
"eval_steps_per_second": 0.288,
"step": 350
},
{
"epoch": 0.9498680738786279,
"grad_norm": 0.1728515625,
"learning_rate": 0.00016532040926225093,
"loss": 0.1177,
"step": 360
},
{
"epoch": 1.0026385224274406,
"grad_norm": 0.1220703125,
"learning_rate": 0.0001631663974151858,
"loss": 0.1207,
"step": 380
},
{
"epoch": 1.0554089709762533,
"grad_norm": 0.1591796875,
"learning_rate": 0.00016101238556812063,
"loss": 0.1046,
"step": 400
},
{
"epoch": 1.0554089709762533,
"eval_loss": 0.0715707540512085,
"eval_runtime": 27.7758,
"eval_samples_per_second": 72.005,
"eval_steps_per_second": 0.288,
"step": 400
},
{
"epoch": 1.108179419525066,
"grad_norm": 0.1142578125,
"learning_rate": 0.0001588583737210555,
"loss": 0.1041,
"step": 420
},
{
"epoch": 1.1609498680738786,
"grad_norm": 0.177734375,
"learning_rate": 0.00015670436187399032,
"loss": 0.1034,
"step": 440
},
{
"epoch": 1.187335092348285,
"eval_loss": 0.0693235993385315,
"eval_runtime": 27.7658,
"eval_samples_per_second": 72.031,
"eval_steps_per_second": 0.288,
"step": 450
},
{
"epoch": 1.2137203166226913,
"grad_norm": 0.1630859375,
"learning_rate": 0.00015455035002692516,
"loss": 0.1042,
"step": 460
},
{
"epoch": 1.266490765171504,
"grad_norm": 0.1611328125,
"learning_rate": 0.00015239633817986,
"loss": 0.1032,
"step": 480
},
{
"epoch": 1.3192612137203166,
"grad_norm": 0.169921875,
"learning_rate": 0.00015024232633279485,
"loss": 0.1021,
"step": 500
},
{
"epoch": 1.3192612137203166,
"eval_loss": 0.06579812616109848,
"eval_runtime": 27.42,
"eval_samples_per_second": 72.939,
"eval_steps_per_second": 0.292,
"step": 500
},
{
"epoch": 1.3720316622691293,
"grad_norm": 0.1611328125,
"learning_rate": 0.0001480883144857297,
"loss": 0.1041,
"step": 520
},
{
"epoch": 1.424802110817942,
"grad_norm": 0.11474609375,
"learning_rate": 0.00014593430263866452,
"loss": 0.1006,
"step": 540
},
{
"epoch": 1.4511873350923483,
"eval_loss": 0.06417644023895264,
"eval_runtime": 27.5371,
"eval_samples_per_second": 72.629,
"eval_steps_per_second": 0.291,
"step": 550
},
{
"epoch": 1.4775725593667546,
"grad_norm": 0.1259765625,
"learning_rate": 0.00014378029079159936,
"loss": 0.1001,
"step": 560
},
{
"epoch": 1.5303430079155673,
"grad_norm": 0.146484375,
"learning_rate": 0.0001416262789445342,
"loss": 0.1013,
"step": 580
},
{
"epoch": 1.58311345646438,
"grad_norm": 0.1591796875,
"learning_rate": 0.00013947226709746903,
"loss": 0.1,
"step": 600
},
{
"epoch": 1.58311345646438,
"eval_loss": 0.06583409756422043,
"eval_runtime": 28.0223,
"eval_samples_per_second": 71.372,
"eval_steps_per_second": 0.285,
"step": 600
},
{
"epoch": 1.6358839050131926,
"grad_norm": 0.1611328125,
"learning_rate": 0.0001373182552504039,
"loss": 0.1021,
"step": 620
},
{
"epoch": 1.6886543535620053,
"grad_norm": 0.14453125,
"learning_rate": 0.00013516424340333873,
"loss": 0.1002,
"step": 640
},
{
"epoch": 1.7150395778364116,
"eval_loss": 0.06498919427394867,
"eval_runtime": 28.3581,
"eval_samples_per_second": 70.527,
"eval_steps_per_second": 0.282,
"step": 650
},
{
"epoch": 1.741424802110818,
"grad_norm": 0.111328125,
"learning_rate": 0.00013301023155627356,
"loss": 0.0967,
"step": 660
},
{
"epoch": 1.7941952506596306,
"grad_norm": 0.1884765625,
"learning_rate": 0.0001308562197092084,
"loss": 0.1004,
"step": 680
},
{
"epoch": 1.8469656992084431,
"grad_norm": 0.13671875,
"learning_rate": 0.00012870220786214323,
"loss": 0.0992,
"step": 700
},
{
"epoch": 1.8469656992084431,
"eval_loss": 0.06491042673587799,
"eval_runtime": 27.748,
"eval_samples_per_second": 72.077,
"eval_steps_per_second": 0.288,
"step": 700
},
{
"epoch": 1.899736147757256,
"grad_norm": 0.15234375,
"learning_rate": 0.0001265481960150781,
"loss": 0.0967,
"step": 720
},
{
"epoch": 1.9525065963060686,
"grad_norm": 0.12451171875,
"learning_rate": 0.00012439418416801293,
"loss": 0.0956,
"step": 740
},
{
"epoch": 1.978891820580475,
"eval_loss": 0.06425958126783371,
"eval_runtime": 27.654,
"eval_samples_per_second": 72.322,
"eval_steps_per_second": 0.289,
"step": 750
},
{
"epoch": 2.005277044854881,
"grad_norm": 0.12060546875,
"learning_rate": 0.0001222401723209478,
"loss": 0.0934,
"step": 760
},
{
"epoch": 2.058047493403694,
"grad_norm": 0.171875,
"learning_rate": 0.00012008616047388261,
"loss": 0.0907,
"step": 780
},
{
"epoch": 2.1108179419525066,
"grad_norm": 0.16796875,
"learning_rate": 0.00011793214862681745,
"loss": 0.0861,
"step": 800
},
{
"epoch": 2.1108179419525066,
"eval_loss": 0.06223862245678902,
"eval_runtime": 27.4046,
"eval_samples_per_second": 72.981,
"eval_steps_per_second": 0.292,
"step": 800
},
{
"epoch": 2.163588390501319,
"grad_norm": 0.134765625,
"learning_rate": 0.0001157781367797523,
"loss": 0.0864,
"step": 820
},
{
"epoch": 2.216358839050132,
"grad_norm": 0.123046875,
"learning_rate": 0.00011362412493268713,
"loss": 0.0842,
"step": 840
},
{
"epoch": 2.242744063324538,
"eval_loss": 0.060463495552539825,
"eval_runtime": 27.4597,
"eval_samples_per_second": 72.834,
"eval_steps_per_second": 0.291,
"step": 850
},
{
"epoch": 2.2691292875989446,
"grad_norm": 0.1435546875,
"learning_rate": 0.00011147011308562199,
"loss": 0.0863,
"step": 860
},
{
"epoch": 2.321899736147757,
"grad_norm": 0.1494140625,
"learning_rate": 0.00010931610123855683,
"loss": 0.0858,
"step": 880
},
{
"epoch": 2.37467018469657,
"grad_norm": 0.1259765625,
"learning_rate": 0.00010716208939149166,
"loss": 0.0866,
"step": 900
},
{
"epoch": 2.37467018469657,
"eval_loss": 0.06099672615528107,
"eval_runtime": 27.7635,
"eval_samples_per_second": 72.037,
"eval_steps_per_second": 0.288,
"step": 900
},
{
"epoch": 2.4274406332453826,
"grad_norm": 0.1376953125,
"learning_rate": 0.0001050080775444265,
"loss": 0.0873,
"step": 920
},
{
"epoch": 2.480211081794195,
"grad_norm": 0.158203125,
"learning_rate": 0.00010285406569736133,
"loss": 0.0853,
"step": 940
},
{
"epoch": 2.5065963060686016,
"eval_loss": 0.06115744262933731,
"eval_runtime": 27.8521,
"eval_samples_per_second": 71.808,
"eval_steps_per_second": 0.287,
"step": 950
},
{
"epoch": 2.532981530343008,
"grad_norm": 0.1259765625,
"learning_rate": 0.00010070005385029618,
"loss": 0.0849,
"step": 960
},
{
"epoch": 2.5857519788918206,
"grad_norm": 0.1318359375,
"learning_rate": 9.854604200323103e-05,
"loss": 0.0814,
"step": 980
},
{
"epoch": 2.638522427440633,
"grad_norm": 0.1376953125,
"learning_rate": 9.639203015616588e-05,
"loss": 0.0864,
"step": 1000
},
{
"epoch": 2.638522427440633,
"eval_loss": 0.05968466028571129,
"eval_runtime": 27.6897,
"eval_samples_per_second": 72.229,
"eval_steps_per_second": 0.289,
"step": 1000
},
{
"epoch": 2.691292875989446,
"grad_norm": 0.16015625,
"learning_rate": 9.423801830910071e-05,
"loss": 0.0869,
"step": 1020
},
{
"epoch": 2.7440633245382586,
"grad_norm": 0.12890625,
"learning_rate": 9.208400646203555e-05,
"loss": 0.0821,
"step": 1040
},
{
"epoch": 2.7704485488126647,
"eval_loss": 0.059157080948352814,
"eval_runtime": 27.7435,
"eval_samples_per_second": 72.089,
"eval_steps_per_second": 0.288,
"step": 1050
},
{
"epoch": 2.796833773087071,
"grad_norm": 0.1337890625,
"learning_rate": 8.99299946149704e-05,
"loss": 0.0842,
"step": 1060
},
{
"epoch": 2.849604221635884,
"grad_norm": 0.1513671875,
"learning_rate": 8.777598276790523e-05,
"loss": 0.0846,
"step": 1080
},
{
"epoch": 2.9023746701846966,
"grad_norm": 0.1328125,
"learning_rate": 8.562197092084006e-05,
"loss": 0.0841,
"step": 1100
},
{
"epoch": 2.9023746701846966,
"eval_loss": 0.05879725515842438,
"eval_runtime": 27.612,
"eval_samples_per_second": 72.432,
"eval_steps_per_second": 0.29,
"step": 1100
},
{
"epoch": 2.955145118733509,
"grad_norm": 0.1455078125,
"learning_rate": 8.346795907377491e-05,
"loss": 0.0809,
"step": 1120
},
{
"epoch": 3.007915567282322,
"grad_norm": 0.1259765625,
"learning_rate": 8.131394722670975e-05,
"loss": 0.0815,
"step": 1140
},
{
"epoch": 3.034300791556728,
"eval_loss": 0.05831225588917732,
"eval_runtime": 27.6258,
"eval_samples_per_second": 72.396,
"eval_steps_per_second": 0.29,
"step": 1150
},
{
"epoch": 3.0606860158311346,
"grad_norm": 0.130859375,
"learning_rate": 7.91599353796446e-05,
"loss": 0.0793,
"step": 1160
},
{
"epoch": 3.113456464379947,
"grad_norm": 0.1435546875,
"learning_rate": 7.700592353257944e-05,
"loss": 0.0775,
"step": 1180
},
{
"epoch": 3.16622691292876,
"grad_norm": 0.1357421875,
"learning_rate": 7.485191168551428e-05,
"loss": 0.0795,
"step": 1200
},
{
"epoch": 3.16622691292876,
"eval_loss": 0.0580158606171608,
"eval_runtime": 27.9777,
"eval_samples_per_second": 71.485,
"eval_steps_per_second": 0.286,
"step": 1200
},
{
"epoch": 3.2189973614775726,
"grad_norm": 0.1220703125,
"learning_rate": 7.269789983844911e-05,
"loss": 0.0766,
"step": 1220
},
{
"epoch": 3.271767810026385,
"grad_norm": 0.1318359375,
"learning_rate": 7.054388799138396e-05,
"loss": 0.0732,
"step": 1240
},
{
"epoch": 3.2981530343007917,
"eval_loss": 0.057783834636211395,
"eval_runtime": 28.6683,
"eval_samples_per_second": 69.763,
"eval_steps_per_second": 0.279,
"step": 1250
},
{
"epoch": 3.324538258575198,
"grad_norm": 0.130859375,
"learning_rate": 6.83898761443188e-05,
"loss": 0.0754,
"step": 1260
},
{
"epoch": 3.3773087071240107,
"grad_norm": 0.1611328125,
"learning_rate": 6.623586429725363e-05,
"loss": 0.0793,
"step": 1280
},
{
"epoch": 3.430079155672823,
"grad_norm": 0.1181640625,
"learning_rate": 6.408185245018848e-05,
"loss": 0.076,
"step": 1300
},
{
"epoch": 3.430079155672823,
"eval_loss": 0.05801219865679741,
"eval_runtime": 28.2125,
"eval_samples_per_second": 70.891,
"eval_steps_per_second": 0.284,
"step": 1300
},
{
"epoch": 3.4828496042216357,
"grad_norm": 0.1611328125,
"learning_rate": 6.192784060312333e-05,
"loss": 0.0745,
"step": 1320
},
{
"epoch": 3.5356200527704487,
"grad_norm": 0.1142578125,
"learning_rate": 5.9773828756058156e-05,
"loss": 0.0766,
"step": 1340
},
{
"epoch": 3.5620052770448547,
"eval_loss": 0.05800151824951172,
"eval_runtime": 27.919,
"eval_samples_per_second": 71.636,
"eval_steps_per_second": 0.287,
"step": 1350
},
{
"epoch": 3.588390501319261,
"grad_norm": 0.140625,
"learning_rate": 5.7619816908993005e-05,
"loss": 0.0753,
"step": 1360
},
{
"epoch": 3.641160949868074,
"grad_norm": 0.1328125,
"learning_rate": 5.5465805061927846e-05,
"loss": 0.0772,
"step": 1380
},
{
"epoch": 3.6939313984168867,
"grad_norm": 0.1328125,
"learning_rate": 5.331179321486268e-05,
"loss": 0.0716,
"step": 1400
},
{
"epoch": 3.6939313984168867,
"eval_loss": 0.057653266936540604,
"eval_runtime": 28.2955,
"eval_samples_per_second": 70.683,
"eval_steps_per_second": 0.283,
"step": 1400
},
{
"epoch": 3.746701846965699,
"grad_norm": 0.1513671875,
"learning_rate": 5.115778136779753e-05,
"loss": 0.0744,
"step": 1420
},
{
"epoch": 3.7994722955145117,
"grad_norm": 0.1259765625,
"learning_rate": 4.9003769520732365e-05,
"loss": 0.0777,
"step": 1440
},
{
"epoch": 3.825857519788918,
"eval_loss": 0.05697743222117424,
"eval_runtime": 28.2563,
"eval_samples_per_second": 70.781,
"eval_steps_per_second": 0.283,
"step": 1450
},
{
"epoch": 3.8522427440633247,
"grad_norm": 0.1640625,
"learning_rate": 4.6849757673667206e-05,
"loss": 0.0736,
"step": 1460
},
{
"epoch": 3.905013192612137,
"grad_norm": 0.1318359375,
"learning_rate": 4.469574582660205e-05,
"loss": 0.0753,
"step": 1480
},
{
"epoch": 3.9577836411609497,
"grad_norm": 0.12255859375,
"learning_rate": 4.254173397953689e-05,
"loss": 0.0745,
"step": 1500
},
{
"epoch": 3.9577836411609497,
"eval_loss": 0.05676369369029999,
"eval_runtime": 27.6767,
"eval_samples_per_second": 72.263,
"eval_steps_per_second": 0.289,
"step": 1500
}
],
"logging_steps": 20,
"max_steps": 1895,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.03702605821971e+19,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}