|
{ |
|
"best_metric": 0.5765425562858582, |
|
"best_model_checkpoint": "v23/checkpoint-358", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 358, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03910614525139665, |
|
"grad_norm": Infinity, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 2.8181, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0782122905027933, |
|
"grad_norm": 48.7786750793457, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 2.4279, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11731843575418995, |
|
"grad_norm": 16.421829223632812, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.9836, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1564245810055866, |
|
"grad_norm": 37.26057052612305, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.637, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19553072625698323, |
|
"grad_norm": 24.32502555847168, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 1.4235, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2346368715083799, |
|
"grad_norm": 4.514742374420166, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.8819, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2737430167597765, |
|
"grad_norm": 17.023542404174805, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.847, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3128491620111732, |
|
"grad_norm": 18.13689422607422, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.7611, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.35195530726256985, |
|
"grad_norm": 21.782562255859375, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 0.7189, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.39106145251396646, |
|
"grad_norm": 13.720254898071289, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.7178, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4301675977653631, |
|
"grad_norm": 4.693215847015381, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.7092, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4692737430167598, |
|
"grad_norm": 14.818086624145508, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.682, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5083798882681564, |
|
"grad_norm": 26.538074493408203, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.6824, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.547486033519553, |
|
"grad_norm": 29.219240188598633, |
|
"learning_rate": 4.975155279503106e-05, |
|
"loss": 0.681, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5865921787709497, |
|
"grad_norm": 8.444730758666992, |
|
"learning_rate": 4.93167701863354e-05, |
|
"loss": 0.7867, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6256983240223464, |
|
"grad_norm": 8.663554191589355, |
|
"learning_rate": 4.888198757763975e-05, |
|
"loss": 0.7689, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.664804469273743, |
|
"grad_norm": 15.8271484375, |
|
"learning_rate": 4.8447204968944106e-05, |
|
"loss": 0.8417, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7039106145251397, |
|
"grad_norm": 6.195271968841553, |
|
"learning_rate": 4.801242236024845e-05, |
|
"loss": 0.629, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7430167597765364, |
|
"grad_norm": 4.593315601348877, |
|
"learning_rate": 4.75776397515528e-05, |
|
"loss": 0.6039, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7821229050279329, |
|
"grad_norm": 22.80195426940918, |
|
"learning_rate": 4.714285714285714e-05, |
|
"loss": 0.5824, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8212290502793296, |
|
"grad_norm": 13.558725357055664, |
|
"learning_rate": 4.6770186335403726e-05, |
|
"loss": 0.876, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8603351955307262, |
|
"grad_norm": 4.1830668449401855, |
|
"learning_rate": 4.633540372670807e-05, |
|
"loss": 0.7458, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8994413407821229, |
|
"grad_norm": 14.1422119140625, |
|
"learning_rate": 4.590062111801243e-05, |
|
"loss": 0.6289, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9385474860335196, |
|
"grad_norm": 15.986943244934082, |
|
"learning_rate": 4.546583850931677e-05, |
|
"loss": 0.8139, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9776536312849162, |
|
"grad_norm": 10.396794319152832, |
|
"learning_rate": 4.5031055900621124e-05, |
|
"loss": 0.7859, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1_macro": 0.4025140193447718, |
|
"eval_f1_micro": 0.6, |
|
"eval_f1_weighted": 0.4689184747817452, |
|
"eval_loss": 0.7313841581344604, |
|
"eval_precision_macro": 0.5837690631808279, |
|
"eval_precision_micro": 0.6, |
|
"eval_precision_weighted": 0.5306172839506172, |
|
"eval_recall_macro": 0.43363545726457, |
|
"eval_recall_micro": 0.6, |
|
"eval_recall_weighted": 0.6, |
|
"eval_runtime": 0.8236, |
|
"eval_samples_per_second": 382.454, |
|
"eval_steps_per_second": 24.283, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.0167597765363128, |
|
"grad_norm": 15.96173095703125, |
|
"learning_rate": 4.4596273291925465e-05, |
|
"loss": 0.5898, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.0558659217877095, |
|
"grad_norm": 21.0263729095459, |
|
"learning_rate": 4.416149068322982e-05, |
|
"loss": 0.6724, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.094972067039106, |
|
"grad_norm": 8.833733558654785, |
|
"learning_rate": 4.372670807453416e-05, |
|
"loss": 0.5225, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.1340782122905029, |
|
"grad_norm": 13.83945083618164, |
|
"learning_rate": 4.3291925465838515e-05, |
|
"loss": 0.6694, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.1731843575418994, |
|
"grad_norm": 13.772929191589355, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.5541, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2122905027932962, |
|
"grad_norm": 8.899124145507812, |
|
"learning_rate": 4.2422360248447204e-05, |
|
"loss": 0.713, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.2513966480446927, |
|
"grad_norm": 8.794002532958984, |
|
"learning_rate": 4.198757763975156e-05, |
|
"loss": 0.5879, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.2905027932960893, |
|
"grad_norm": 4.091240882873535, |
|
"learning_rate": 4.15527950310559e-05, |
|
"loss": 0.5424, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.329608938547486, |
|
"grad_norm": 16.86524772644043, |
|
"learning_rate": 4.1118012422360255e-05, |
|
"loss": 0.6323, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.3687150837988826, |
|
"grad_norm": 10.920906066894531, |
|
"learning_rate": 4.0683229813664596e-05, |
|
"loss": 0.5727, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4078212290502794, |
|
"grad_norm": 17.164987564086914, |
|
"learning_rate": 4.024844720496895e-05, |
|
"loss": 0.5847, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.446927374301676, |
|
"grad_norm": 12.27508544921875, |
|
"learning_rate": 3.981366459627329e-05, |
|
"loss": 0.4845, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.4860335195530725, |
|
"grad_norm": 12.798267364501953, |
|
"learning_rate": 3.9378881987577646e-05, |
|
"loss": 0.4762, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.5251396648044693, |
|
"grad_norm": 3.783871650695801, |
|
"learning_rate": 3.894409937888199e-05, |
|
"loss": 0.4889, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.564245810055866, |
|
"grad_norm": 17.070810317993164, |
|
"learning_rate": 3.8509316770186335e-05, |
|
"loss": 0.5382, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6033519553072626, |
|
"grad_norm": 27.77778434753418, |
|
"learning_rate": 3.807453416149068e-05, |
|
"loss": 0.5336, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.6424581005586592, |
|
"grad_norm": 52.91617202758789, |
|
"learning_rate": 3.763975155279503e-05, |
|
"loss": 0.6025, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.6815642458100557, |
|
"grad_norm": 17.698348999023438, |
|
"learning_rate": 3.7204968944099385e-05, |
|
"loss": 0.3589, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.7206703910614525, |
|
"grad_norm": 14.64693832397461, |
|
"learning_rate": 3.6770186335403726e-05, |
|
"loss": 0.3654, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.7597765363128492, |
|
"grad_norm": 13.599970817565918, |
|
"learning_rate": 3.633540372670808e-05, |
|
"loss": 0.6611, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.7988826815642458, |
|
"grad_norm": 6.364068984985352, |
|
"learning_rate": 3.590062111801242e-05, |
|
"loss": 0.5289, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.8379888268156424, |
|
"grad_norm": 10.628365516662598, |
|
"learning_rate": 3.546583850931677e-05, |
|
"loss": 0.6834, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.8770949720670391, |
|
"grad_norm": 7.663080215454102, |
|
"learning_rate": 3.503105590062112e-05, |
|
"loss": 0.4576, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.916201117318436, |
|
"grad_norm": 9.863435745239258, |
|
"learning_rate": 3.4596273291925466e-05, |
|
"loss": 0.4385, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.9553072625698324, |
|
"grad_norm": 7.04995059967041, |
|
"learning_rate": 3.4161490683229814e-05, |
|
"loss": 0.3589, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.994413407821229, |
|
"grad_norm": 12.553130149841309, |
|
"learning_rate": 3.372670807453416e-05, |
|
"loss": 0.6421, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7492063492063492, |
|
"eval_f1_macro": 0.7349736157447978, |
|
"eval_f1_micro": 0.7492063492063492, |
|
"eval_f1_weighted": 0.7506575340838828, |
|
"eval_loss": 0.5765425562858582, |
|
"eval_precision_macro": 0.7150911360799, |
|
"eval_precision_micro": 0.7492063492063492, |
|
"eval_precision_weighted": 0.7534924677486475, |
|
"eval_recall_macro": 0.7595969666757304, |
|
"eval_recall_micro": 0.7492063492063492, |
|
"eval_recall_weighted": 0.7492063492063492, |
|
"eval_runtime": 0.8337, |
|
"eval_samples_per_second": 377.837, |
|
"eval_steps_per_second": 23.99, |
|
"step": 358 |
|
} |
|
], |
|
"logging_steps": 7, |
|
"max_steps": 895, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 188389207093248.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|