|
{ |
|
"best_metric": 0.7874015748031497, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-y728az4t/checkpoint-800", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 800, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 4.511281490325928, |
|
"learning_rate": 5.742539561589771e-05, |
|
"loss": 0.5537, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 21.07503890991211, |
|
"learning_rate": 5.713682578868214e-05, |
|
"loss": 0.635, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 6.964919567108154, |
|
"learning_rate": 5.6848255961466576e-05, |
|
"loss": 0.6774, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.1289658546447754, |
|
"learning_rate": 5.6559686134251004e-05, |
|
"loss": 0.5411, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 7.270464897155762, |
|
"learning_rate": 5.627111630703544e-05, |
|
"loss": 0.6009, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 16.581676483154297, |
|
"learning_rate": 5.598254647981987e-05, |
|
"loss": 0.5943, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 23.3575382232666, |
|
"learning_rate": 5.569397665260431e-05, |
|
"loss": 0.647, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.81038761138916, |
|
"learning_rate": 5.540540682538874e-05, |
|
"loss": 0.4295, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 19.09532928466797, |
|
"learning_rate": 5.5116836998173175e-05, |
|
"loss": 0.6079, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 19.97694206237793, |
|
"learning_rate": 5.482826717095761e-05, |
|
"loss": 0.6182, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 19.958236694335938, |
|
"learning_rate": 5.453969734374204e-05, |
|
"loss": 0.5946, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.271239757537842, |
|
"learning_rate": 5.425112751652647e-05, |
|
"loss": 0.26, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 3.8671107292175293, |
|
"learning_rate": 5.396255768931091e-05, |
|
"loss": 0.346, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.9892425537109375, |
|
"learning_rate": 5.367398786209535e-05, |
|
"loss": 0.3437, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.6231233477592468, |
|
"learning_rate": 5.338541803487978e-05, |
|
"loss": 0.2564, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 7.22336483001709, |
|
"learning_rate": 5.3096848207664216e-05, |
|
"loss": 0.8952, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 10.588624000549316, |
|
"learning_rate": 5.280827838044865e-05, |
|
"loss": 0.4772, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.097969651222229, |
|
"learning_rate": 5.251970855323308e-05, |
|
"loss": 0.3356, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 19.84050941467285, |
|
"learning_rate": 5.223113872601751e-05, |
|
"loss": 0.647, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.249490737915039, |
|
"learning_rate": 5.1942568898801946e-05, |
|
"loss": 0.4706, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 2.187814712524414, |
|
"learning_rate": 5.165399907158638e-05, |
|
"loss": 0.4385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 7.703945159912109, |
|
"learning_rate": 5.1365429244370815e-05, |
|
"loss": 0.3925, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 9.645771980285645, |
|
"learning_rate": 5.107685941715525e-05, |
|
"loss": 0.4914, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.81449556350708, |
|
"learning_rate": 5.0788289589939684e-05, |
|
"loss": 0.5944, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 12.817668914794922, |
|
"learning_rate": 5.049971976272411e-05, |
|
"loss": 0.4712, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.410495758056641, |
|
"learning_rate": 5.0211149935508546e-05, |
|
"loss": 0.451, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 2.6431968212127686, |
|
"learning_rate": 4.992258010829298e-05, |
|
"loss": 0.1548, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.6477805972099304, |
|
"learning_rate": 4.9634010281077414e-05, |
|
"loss": 0.5608, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 5.920861721038818, |
|
"learning_rate": 4.934544045386185e-05, |
|
"loss": 0.4278, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.376490116119385, |
|
"learning_rate": 4.905687062664628e-05, |
|
"loss": 0.2859, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 17.03923988342285, |
|
"learning_rate": 4.876830079943072e-05, |
|
"loss": 0.2231, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 15.971346855163574, |
|
"learning_rate": 4.8479730972215145e-05, |
|
"loss": 0.5975, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 72.75434112548828, |
|
"learning_rate": 4.819116114499958e-05, |
|
"loss": 0.652, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 41.155269622802734, |
|
"learning_rate": 4.7902591317784014e-05, |
|
"loss": 0.6102, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 17.774869918823242, |
|
"learning_rate": 4.761402149056845e-05, |
|
"loss": 0.4959, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.649765014648438, |
|
"learning_rate": 4.732545166335288e-05, |
|
"loss": 0.7816, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 6.404572486877441, |
|
"learning_rate": 4.703688183613732e-05, |
|
"loss": 0.4796, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 13.620793342590332, |
|
"learning_rate": 4.674831200892175e-05, |
|
"loss": 0.3894, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 10.787906646728516, |
|
"learning_rate": 4.6459742181706185e-05, |
|
"loss": 0.2826, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.09124770760536194, |
|
"learning_rate": 4.617117235449062e-05, |
|
"loss": 0.3121, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8175, |
|
"eval_f1": 0.7345454545454545, |
|
"eval_loss": 0.5034022331237793, |
|
"eval_precision": 0.6644736842105263, |
|
"eval_recall": 0.8211382113821138, |
|
"eval_runtime": 1.5181, |
|
"eval_samples_per_second": 263.484, |
|
"eval_steps_per_second": 16.468, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 8.29990291595459, |
|
"learning_rate": 4.5882602527275054e-05, |
|
"loss": 0.4983, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.2915505170822144, |
|
"learning_rate": 4.559403270005949e-05, |
|
"loss": 0.3114, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 36.152198791503906, |
|
"learning_rate": 4.530546287284392e-05, |
|
"loss": 0.5468, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 17.699785232543945, |
|
"learning_rate": 4.501689304562836e-05, |
|
"loss": 0.3794, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 0.3559480607509613, |
|
"learning_rate": 4.4728323218412785e-05, |
|
"loss": 0.4284, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 6.532162666320801, |
|
"learning_rate": 4.443975339119722e-05, |
|
"loss": 0.4065, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 0.3651299774646759, |
|
"learning_rate": 4.4151183563981653e-05, |
|
"loss": 0.4627, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.09214647114276886, |
|
"learning_rate": 4.386261373676609e-05, |
|
"loss": 0.2161, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 35.60824203491211, |
|
"learning_rate": 4.357404390955052e-05, |
|
"loss": 0.4197, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.518726110458374, |
|
"learning_rate": 4.3285474082334956e-05, |
|
"loss": 0.3998, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 0.9324079155921936, |
|
"learning_rate": 4.299690425511939e-05, |
|
"loss": 0.5359, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.24253502488136292, |
|
"learning_rate": 4.270833442790382e-05, |
|
"loss": 0.2544, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 58.45193099975586, |
|
"learning_rate": 4.241976460068825e-05, |
|
"loss": 0.3068, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.18596920371055603, |
|
"learning_rate": 4.213119477347269e-05, |
|
"loss": 0.1352, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 0.09809605032205582, |
|
"learning_rate": 4.184262494625712e-05, |
|
"loss": 0.2002, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 99.6656723022461, |
|
"learning_rate": 4.1554055119041556e-05, |
|
"loss": 0.5991, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 0.20418468117713928, |
|
"learning_rate": 4.126548529182599e-05, |
|
"loss": 0.1033, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 6.0724196434021, |
|
"learning_rate": 4.0976915464610424e-05, |
|
"loss": 0.2963, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 32.76648712158203, |
|
"learning_rate": 4.068834563739485e-05, |
|
"loss": 0.5506, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 6.148083686828613, |
|
"learning_rate": 4.0399775810179286e-05, |
|
"loss": 0.4567, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 0.12815533578395844, |
|
"learning_rate": 4.011120598296372e-05, |
|
"loss": 0.0237, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.6783960461616516, |
|
"learning_rate": 3.9822636155748155e-05, |
|
"loss": 0.5228, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 0.062089353799819946, |
|
"learning_rate": 3.9534066328532596e-05, |
|
"loss": 0.2315, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 4.188582420349121, |
|
"learning_rate": 3.924549650131703e-05, |
|
"loss": 0.0173, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 0.3549104928970337, |
|
"learning_rate": 3.8956926674101465e-05, |
|
"loss": 0.3312, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.1310347616672516, |
|
"learning_rate": 3.866835684688589e-05, |
|
"loss": 0.1958, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 19.055049896240234, |
|
"learning_rate": 3.837978701967033e-05, |
|
"loss": 0.5, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.1129627674818039, |
|
"learning_rate": 3.809121719245476e-05, |
|
"loss": 0.2486, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 0.21243779361248016, |
|
"learning_rate": 3.7802647365239195e-05, |
|
"loss": 0.0712, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.06997116655111313, |
|
"learning_rate": 3.751407753802363e-05, |
|
"loss": 0.48, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 0.12350662797689438, |
|
"learning_rate": 3.7225507710808064e-05, |
|
"loss": 0.1861, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 9.771814346313477, |
|
"learning_rate": 3.69369378835925e-05, |
|
"loss": 0.5302, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 1.4791642427444458, |
|
"learning_rate": 3.6648368056376926e-05, |
|
"loss": 0.4248, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 9.070107460021973, |
|
"learning_rate": 3.635979822916136e-05, |
|
"loss": 0.4693, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 25.12293815612793, |
|
"learning_rate": 3.6071228401945795e-05, |
|
"loss": 0.3694, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.19772209227085114, |
|
"learning_rate": 3.578265857473023e-05, |
|
"loss": 0.3738, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 18.277198791503906, |
|
"learning_rate": 3.5494088747514663e-05, |
|
"loss": 0.3531, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.32979539036750793, |
|
"learning_rate": 3.52055189202991e-05, |
|
"loss": 0.0431, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 0.03908811882138252, |
|
"learning_rate": 3.491694909308353e-05, |
|
"loss": 0.4406, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.19753161072731018, |
|
"learning_rate": 3.462837926586796e-05, |
|
"loss": 0.2476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.865, |
|
"eval_f1": 0.7874015748031497, |
|
"eval_loss": 0.5722174644470215, |
|
"eval_precision": 0.7633587786259542, |
|
"eval_recall": 0.8130081300813008, |
|
"eval_runtime": 1.5153, |
|
"eval_samples_per_second": 263.971, |
|
"eval_steps_per_second": 16.498, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423630740901888.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 5.771396544311327e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 4, |
|
"seed": 4 |
|
} |
|
} |
|
|