{ "best_metric": 0.4904100298881531, "best_model_checkpoint": "strategydisofmaterialimpactsv1/checkpoint-410", "epoch": 5.0, "eval_steps": 500, "global_step": 410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04878048780487805, "grad_norm": 5.26182746887207, "learning_rate": 9.75609756097561e-07, "loss": 2.0959, "step": 4 }, { "epoch": 0.0975609756097561, "grad_norm": 3.954611301422119, "learning_rate": 1.951219512195122e-06, "loss": 2.084, "step": 8 }, { "epoch": 0.14634146341463414, "grad_norm": 2.762833595275879, "learning_rate": 2.926829268292683e-06, "loss": 2.1114, "step": 12 }, { "epoch": 0.1951219512195122, "grad_norm": 3.979111909866333, "learning_rate": 3.902439024390244e-06, "loss": 2.0699, "step": 16 }, { "epoch": 0.24390243902439024, "grad_norm": 2.9182474613189697, "learning_rate": 4.8780487804878055e-06, "loss": 2.0512, "step": 20 }, { "epoch": 0.2926829268292683, "grad_norm": 4.365296840667725, "learning_rate": 5.853658536585366e-06, "loss": 2.049, "step": 24 }, { "epoch": 0.34146341463414637, "grad_norm": 3.4509384632110596, "learning_rate": 6.829268292682928e-06, "loss": 2.0762, "step": 28 }, { "epoch": 0.3902439024390244, "grad_norm": 3.717407703399658, "learning_rate": 7.804878048780489e-06, "loss": 2.1205, "step": 32 }, { "epoch": 0.43902439024390244, "grad_norm": 3.0547749996185303, "learning_rate": 8.78048780487805e-06, "loss": 2.0718, "step": 36 }, { "epoch": 0.4878048780487805, "grad_norm": 5.152350902557373, "learning_rate": 9.756097560975611e-06, "loss": 2.0393, "step": 40 }, { "epoch": 0.5365853658536586, "grad_norm": 3.143453359603882, "learning_rate": 9.91869918699187e-06, "loss": 2.075, "step": 44 }, { "epoch": 0.5853658536585366, "grad_norm": 3.0932018756866455, "learning_rate": 9.81029810298103e-06, "loss": 2.0555, "step": 48 }, { "epoch": 0.6341463414634146, "grad_norm": 3.708493232727051, "learning_rate": 9.70189701897019e-06, "loss": 2.0697, "step": 52 }, { "epoch": 0.6829268292682927, "grad_norm": 5.804870128631592, "learning_rate": 9.59349593495935e-06, "loss": 2.0829, "step": 56 }, { "epoch": 0.7317073170731707, "grad_norm": 4.4362616539001465, "learning_rate": 9.485094850948512e-06, "loss": 2.0122, "step": 60 }, { "epoch": 0.7804878048780488, "grad_norm": 3.124617099761963, "learning_rate": 9.37669376693767e-06, "loss": 2.0341, "step": 64 }, { "epoch": 0.8292682926829268, "grad_norm": 6.305838108062744, "learning_rate": 9.268292682926831e-06, "loss": 2.006, "step": 68 }, { "epoch": 0.8780487804878049, "grad_norm": 4.479095458984375, "learning_rate": 9.15989159891599e-06, "loss": 1.9879, "step": 72 }, { "epoch": 0.926829268292683, "grad_norm": 4.230350494384766, "learning_rate": 9.051490514905151e-06, "loss": 2.0266, "step": 76 }, { "epoch": 0.975609756097561, "grad_norm": 4.699102401733398, "learning_rate": 8.94308943089431e-06, "loss": 1.9846, "step": 80 }, { "epoch": 1.0, "eval_accuracy": 0.4329268292682927, "eval_f1_macro": 0.3427293862494566, "eval_f1_micro": 0.4329268292682927, "eval_f1_weighted": 0.34234078365494675, "eval_loss": 1.8970143795013428, "eval_precision_macro": 0.5188128944226504, "eval_precision_micro": 0.4329268292682927, "eval_precision_weighted": 0.51827396280103, "eval_recall_macro": 0.43154761904761907, "eval_recall_micro": 0.4329268292682927, "eval_recall_weighted": 0.4329268292682927, "eval_runtime": 0.9572, "eval_samples_per_second": 171.331, "eval_steps_per_second": 11.492, "step": 82 }, { "epoch": 1.024390243902439, "grad_norm": 5.648143768310547, "learning_rate": 8.834688346883469e-06, "loss": 1.8587, "step": 84 }, { "epoch": 1.0731707317073171, "grad_norm": 5.579315185546875, "learning_rate": 8.726287262872629e-06, "loss": 1.8257, "step": 88 }, { "epoch": 1.1219512195121952, "grad_norm": 6.962705135345459, "learning_rate": 8.617886178861789e-06, "loss": 1.8158, "step": 92 }, { "epoch": 1.170731707317073, "grad_norm": 7.015571117401123, "learning_rate": 8.509485094850949e-06, "loss": 1.6853, "step": 96 }, { "epoch": 1.2195121951219512, "grad_norm": 6.109464168548584, "learning_rate": 8.401084010840109e-06, "loss": 1.8065, "step": 100 }, { "epoch": 1.2682926829268293, "grad_norm": 7.725495338439941, "learning_rate": 8.292682926829268e-06, "loss": 1.7683, "step": 104 }, { "epoch": 1.3170731707317074, "grad_norm": 8.51897144317627, "learning_rate": 8.184281842818428e-06, "loss": 1.5712, "step": 108 }, { "epoch": 1.3658536585365852, "grad_norm": 7.310000419616699, "learning_rate": 8.075880758807588e-06, "loss": 1.5885, "step": 112 }, { "epoch": 1.4146341463414633, "grad_norm": 6.727824687957764, "learning_rate": 7.967479674796748e-06, "loss": 1.4469, "step": 116 }, { "epoch": 1.4634146341463414, "grad_norm": 5.8941450119018555, "learning_rate": 7.859078590785908e-06, "loss": 1.441, "step": 120 }, { "epoch": 1.5121951219512195, "grad_norm": 8.939291954040527, "learning_rate": 7.750677506775068e-06, "loss": 1.3494, "step": 124 }, { "epoch": 1.5609756097560976, "grad_norm": 6.784910678863525, "learning_rate": 7.64227642276423e-06, "loss": 1.4556, "step": 128 }, { "epoch": 1.6097560975609757, "grad_norm": 5.674899578094482, "learning_rate": 7.5338753387533885e-06, "loss": 1.1797, "step": 132 }, { "epoch": 1.6585365853658538, "grad_norm": 6.600795269012451, "learning_rate": 7.425474254742548e-06, "loss": 1.3146, "step": 136 }, { "epoch": 1.7073170731707317, "grad_norm": 7.859283447265625, "learning_rate": 7.317073170731707e-06, "loss": 1.2627, "step": 140 }, { "epoch": 1.7560975609756098, "grad_norm": 7.052417278289795, "learning_rate": 7.208672086720868e-06, "loss": 1.144, "step": 144 }, { "epoch": 1.8048780487804879, "grad_norm": 5.840285301208496, "learning_rate": 7.100271002710027e-06, "loss": 1.1237, "step": 148 }, { "epoch": 1.8536585365853657, "grad_norm": 7.636930465698242, "learning_rate": 6.991869918699188e-06, "loss": 1.0481, "step": 152 }, { "epoch": 1.9024390243902438, "grad_norm": 7.1155877113342285, "learning_rate": 6.883468834688347e-06, "loss": 1.2339, "step": 156 }, { "epoch": 1.951219512195122, "grad_norm": 6.952579975128174, "learning_rate": 6.775067750677508e-06, "loss": 0.9287, "step": 160 }, { "epoch": 2.0, "grad_norm": 10.576449394226074, "learning_rate": 6.666666666666667e-06, "loss": 1.1756, "step": 164 }, { "epoch": 2.0, "eval_accuracy": 0.8048780487804879, "eval_f1_macro": 0.7928380685977737, "eval_f1_micro": 0.8048780487804879, "eval_f1_weighted": 0.7953137507177211, "eval_loss": 0.9494345784187317, "eval_precision_macro": 0.8125730994152047, "eval_precision_micro": 0.8048780487804879, "eval_precision_weighted": 0.8141135358722009, "eval_recall_macro": 0.8014880952380952, "eval_recall_micro": 0.8048780487804879, "eval_recall_weighted": 0.8048780487804879, "eval_runtime": 0.942, "eval_samples_per_second": 174.105, "eval_steps_per_second": 11.678, "step": 164 }, { "epoch": 2.048780487804878, "grad_norm": 7.403735637664795, "learning_rate": 6.558265582655827e-06, "loss": 1.0034, "step": 168 }, { "epoch": 2.097560975609756, "grad_norm": 6.475048542022705, "learning_rate": 6.449864498644986e-06, "loss": 0.8662, "step": 172 }, { "epoch": 2.1463414634146343, "grad_norm": 8.434925079345703, "learning_rate": 6.368563685636857e-06, "loss": 1.1553, "step": 176 }, { "epoch": 2.1951219512195124, "grad_norm": 5.570111274719238, "learning_rate": 6.260162601626017e-06, "loss": 0.8746, "step": 180 }, { "epoch": 2.2439024390243905, "grad_norm": 8.765731811523438, "learning_rate": 6.1517615176151765e-06, "loss": 1.0089, "step": 184 }, { "epoch": 2.292682926829268, "grad_norm": 4.403049468994141, "learning_rate": 6.043360433604336e-06, "loss": 0.8467, "step": 188 }, { "epoch": 2.341463414634146, "grad_norm": 9.121601104736328, "learning_rate": 5.934959349593496e-06, "loss": 0.9069, "step": 192 }, { "epoch": 2.3902439024390243, "grad_norm": 6.2799811363220215, "learning_rate": 5.826558265582656e-06, "loss": 0.7733, "step": 196 }, { "epoch": 2.4390243902439024, "grad_norm": 6.013058185577393, "learning_rate": 5.718157181571816e-06, "loss": 0.8735, "step": 200 }, { "epoch": 2.4878048780487805, "grad_norm": 6.162946701049805, "learning_rate": 5.609756097560977e-06, "loss": 0.6666, "step": 204 }, { "epoch": 2.5365853658536586, "grad_norm": 4.041454792022705, "learning_rate": 5.501355013550136e-06, "loss": 0.6262, "step": 208 }, { "epoch": 2.5853658536585367, "grad_norm": 6.468296051025391, "learning_rate": 5.3929539295392965e-06, "loss": 0.8259, "step": 212 }, { "epoch": 2.6341463414634148, "grad_norm": 9.301799774169922, "learning_rate": 5.2845528455284555e-06, "loss": 0.8074, "step": 216 }, { "epoch": 2.682926829268293, "grad_norm": 9.052480697631836, "learning_rate": 5.176151761517616e-06, "loss": 0.7582, "step": 220 }, { "epoch": 2.7317073170731705, "grad_norm": 10.193408012390137, "learning_rate": 5.067750677506775e-06, "loss": 0.7229, "step": 224 }, { "epoch": 2.7804878048780486, "grad_norm": 9.648282051086426, "learning_rate": 4.959349593495935e-06, "loss": 0.714, "step": 228 }, { "epoch": 2.8292682926829267, "grad_norm": 5.158674240112305, "learning_rate": 4.850948509485095e-06, "loss": 0.701, "step": 232 }, { "epoch": 2.8780487804878048, "grad_norm": 6.773287296295166, "learning_rate": 4.742547425474256e-06, "loss": 0.579, "step": 236 }, { "epoch": 2.926829268292683, "grad_norm": 4.932857513427734, "learning_rate": 4.634146341463416e-06, "loss": 0.6223, "step": 240 }, { "epoch": 2.975609756097561, "grad_norm": 6.144374370574951, "learning_rate": 4.5257452574525755e-06, "loss": 0.7543, "step": 244 }, { "epoch": 3.0, "eval_accuracy": 0.8536585365853658, "eval_f1_macro": 0.8338492833656121, "eval_f1_micro": 0.8536585365853658, "eval_f1_weighted": 0.8362103254837173, "eval_loss": 0.6258153915405273, "eval_precision_macro": 0.8495264546035806, "eval_precision_micro": 0.8536585365853658, "eval_precision_weighted": 0.8505702817977668, "eval_recall_macro": 0.850297619047619, "eval_recall_micro": 0.8536585365853658, "eval_recall_weighted": 0.8536585365853658, "eval_runtime": 0.964, "eval_samples_per_second": 170.127, "eval_steps_per_second": 11.411, "step": 246 }, { "epoch": 3.024390243902439, "grad_norm": 7.524605751037598, "learning_rate": 4.4173441734417345e-06, "loss": 0.7291, "step": 248 }, { "epoch": 3.073170731707317, "grad_norm": 6.1862945556640625, "learning_rate": 4.308943089430894e-06, "loss": 0.664, "step": 252 }, { "epoch": 3.1219512195121952, "grad_norm": 4.109325885772705, "learning_rate": 4.200542005420054e-06, "loss": 0.5575, "step": 256 }, { "epoch": 3.1707317073170733, "grad_norm": 5.083375453948975, "learning_rate": 4.092140921409214e-06, "loss": 0.5287, "step": 260 }, { "epoch": 3.2195121951219514, "grad_norm": 4.418028354644775, "learning_rate": 3.983739837398374e-06, "loss": 0.4596, "step": 264 }, { "epoch": 3.2682926829268295, "grad_norm": 8.103421211242676, "learning_rate": 3.875338753387534e-06, "loss": 0.5384, "step": 268 }, { "epoch": 3.317073170731707, "grad_norm": 9.259288787841797, "learning_rate": 3.7669376693766942e-06, "loss": 0.6603, "step": 272 }, { "epoch": 3.3658536585365852, "grad_norm": 8.9814453125, "learning_rate": 3.6585365853658537e-06, "loss": 0.614, "step": 276 }, { "epoch": 3.4146341463414633, "grad_norm": 7.275993824005127, "learning_rate": 3.5501355013550136e-06, "loss": 0.6409, "step": 280 }, { "epoch": 3.4634146341463414, "grad_norm": 7.694216251373291, "learning_rate": 3.4417344173441734e-06, "loss": 0.5708, "step": 284 }, { "epoch": 3.5121951219512195, "grad_norm": 4.590734481811523, "learning_rate": 3.3333333333333333e-06, "loss": 0.4449, "step": 288 }, { "epoch": 3.5609756097560976, "grad_norm": 9.011459350585938, "learning_rate": 3.224932249322493e-06, "loss": 0.764, "step": 292 }, { "epoch": 3.6097560975609757, "grad_norm": 6.224943161010742, "learning_rate": 3.116531165311653e-06, "loss": 0.5983, "step": 296 }, { "epoch": 3.658536585365854, "grad_norm": 7.525081157684326, "learning_rate": 3.0081300813008134e-06, "loss": 0.6081, "step": 300 }, { "epoch": 3.7073170731707314, "grad_norm": 7.639159202575684, "learning_rate": 2.8997289972899733e-06, "loss": 0.5492, "step": 304 }, { "epoch": 3.7560975609756095, "grad_norm": 5.162847518920898, "learning_rate": 2.791327913279133e-06, "loss": 0.489, "step": 308 }, { "epoch": 3.8048780487804876, "grad_norm": 6.004736423492432, "learning_rate": 2.682926829268293e-06, "loss": 0.4218, "step": 312 }, { "epoch": 3.8536585365853657, "grad_norm": 5.897964000701904, "learning_rate": 2.574525745257453e-06, "loss": 0.5438, "step": 316 }, { "epoch": 3.902439024390244, "grad_norm": 7.25752592086792, "learning_rate": 2.4661246612466128e-06, "loss": 0.5559, "step": 320 }, { "epoch": 3.951219512195122, "grad_norm": 6.005576133728027, "learning_rate": 2.3577235772357727e-06, "loss": 0.6216, "step": 324 }, { "epoch": 4.0, "grad_norm": 13.720139503479004, "learning_rate": 2.2493224932249325e-06, "loss": 0.6314, "step": 328 }, { "epoch": 4.0, "eval_accuracy": 0.8658536585365854, "eval_f1_macro": 0.851601435352396, "eval_f1_micro": 0.8658536585365854, "eval_f1_weighted": 0.8538194199208925, "eval_loss": 0.5200314521789551, "eval_precision_macro": 0.8594329005283454, "eval_precision_micro": 0.8658536585365854, "eval_precision_weighted": 0.8606490578892111, "eval_recall_macro": 0.862797619047619, "eval_recall_micro": 0.8658536585365854, "eval_recall_weighted": 0.8658536585365854, "eval_runtime": 0.943, "eval_samples_per_second": 173.915, "eval_steps_per_second": 11.665, "step": 328 }, { "epoch": 4.048780487804878, "grad_norm": 6.544665336608887, "learning_rate": 2.1409214092140924e-06, "loss": 0.5041, "step": 332 }, { "epoch": 4.097560975609756, "grad_norm": 7.109484672546387, "learning_rate": 2.0325203252032523e-06, "loss": 0.4561, "step": 336 }, { "epoch": 4.146341463414634, "grad_norm": 4.8075127601623535, "learning_rate": 1.924119241192412e-06, "loss": 0.4234, "step": 340 }, { "epoch": 4.195121951219512, "grad_norm": 8.269268035888672, "learning_rate": 1.8157181571815718e-06, "loss": 0.3753, "step": 344 }, { "epoch": 4.2439024390243905, "grad_norm": 11.827970504760742, "learning_rate": 1.707317073170732e-06, "loss": 0.6272, "step": 348 }, { "epoch": 4.2926829268292686, "grad_norm": 6.7040228843688965, "learning_rate": 1.5989159891598918e-06, "loss": 0.4453, "step": 352 }, { "epoch": 4.341463414634147, "grad_norm": 4.821525573730469, "learning_rate": 1.4905149051490517e-06, "loss": 0.4374, "step": 356 }, { "epoch": 4.390243902439025, "grad_norm": 5.777258396148682, "learning_rate": 1.3821138211382116e-06, "loss": 0.4603, "step": 360 }, { "epoch": 4.439024390243903, "grad_norm": 4.959315299987793, "learning_rate": 1.2737127371273714e-06, "loss": 0.49, "step": 364 }, { "epoch": 4.487804878048781, "grad_norm": 5.7751898765563965, "learning_rate": 1.1653116531165313e-06, "loss": 0.5202, "step": 368 }, { "epoch": 4.536585365853659, "grad_norm": 8.084724426269531, "learning_rate": 1.0569105691056912e-06, "loss": 0.5615, "step": 372 }, { "epoch": 4.585365853658536, "grad_norm": 4.794303894042969, "learning_rate": 9.485094850948511e-07, "loss": 0.5217, "step": 376 }, { "epoch": 4.634146341463414, "grad_norm": 6.632653713226318, "learning_rate": 8.401084010840109e-07, "loss": 0.4886, "step": 380 }, { "epoch": 4.682926829268292, "grad_norm": 3.7842323780059814, "learning_rate": 7.317073170731707e-07, "loss": 0.3898, "step": 384 }, { "epoch": 4.7317073170731705, "grad_norm": 7.341838836669922, "learning_rate": 6.233062330623307e-07, "loss": 0.4803, "step": 388 }, { "epoch": 4.780487804878049, "grad_norm": 5.619211196899414, "learning_rate": 5.149051490514906e-07, "loss": 0.4797, "step": 392 }, { "epoch": 4.829268292682927, "grad_norm": 6.9567742347717285, "learning_rate": 4.0650406504065046e-07, "loss": 0.4595, "step": 396 }, { "epoch": 4.878048780487805, "grad_norm": 4.7969136238098145, "learning_rate": 2.9810298102981034e-07, "loss": 0.3441, "step": 400 }, { "epoch": 4.926829268292683, "grad_norm": 6.794863700866699, "learning_rate": 1.897018970189702e-07, "loss": 0.6099, "step": 404 }, { "epoch": 4.975609756097561, "grad_norm": 3.76300311088562, "learning_rate": 8.130081300813009e-08, "loss": 0.4751, "step": 408 }, { "epoch": 5.0, "eval_accuracy": 0.8658536585365854, "eval_f1_macro": 0.851601435352396, "eval_f1_micro": 0.8658536585365854, "eval_f1_weighted": 0.8538194199208925, "eval_loss": 0.4904100298881531, "eval_precision_macro": 0.8594329005283454, "eval_precision_micro": 0.8658536585365854, "eval_precision_weighted": 0.8606490578892111, "eval_recall_macro": 0.862797619047619, "eval_recall_micro": 0.8658536585365854, "eval_recall_weighted": 0.8658536585365854, "eval_runtime": 0.9624, "eval_samples_per_second": 170.414, "eval_steps_per_second": 11.43, "step": 410 } ], "logging_steps": 4, "max_steps": 410, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 431889927536640.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }