{ "best_metric": 0.9948348999023438, "best_model_checkpoint": "xblock-social-screenshots-2/checkpoint-1737", "epoch": 3.0, "eval_steps": 500, "global_step": 1737, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 12.944127082824707, "learning_rate": 7.4712643678160925e-06, "loss": 2.5595, "step": 28 }, { "epoch": 0.1, "grad_norm": 10.593692779541016, "learning_rate": 1.5517241379310346e-05, "loss": 2.1472, "step": 56 }, { "epoch": 0.15, "grad_norm": 9.828369140625, "learning_rate": 2.327586206896552e-05, "loss": 1.7244, "step": 84 }, { "epoch": 0.19, "grad_norm": 8.050122261047363, "learning_rate": 3.132183908045977e-05, "loss": 1.5718, "step": 112 }, { "epoch": 0.24, "grad_norm": 7.566161155700684, "learning_rate": 3.936781609195402e-05, "loss": 1.4015, "step": 140 }, { "epoch": 0.29, "grad_norm": 8.954880714416504, "learning_rate": 4.741379310344828e-05, "loss": 1.3705, "step": 168 }, { "epoch": 0.34, "grad_norm": 8.316436767578125, "learning_rate": 4.9392194497760715e-05, "loss": 1.3727, "step": 196 }, { "epoch": 0.39, "grad_norm": 4.580837726593018, "learning_rate": 4.8496481126039675e-05, "loss": 1.2824, "step": 224 }, { "epoch": 0.44, "grad_norm": 12.312744140625, "learning_rate": 4.760076775431862e-05, "loss": 1.3119, "step": 252 }, { "epoch": 0.48, "grad_norm": 10.076079368591309, "learning_rate": 4.670505438259757e-05, "loss": 1.252, "step": 280 }, { "epoch": 0.53, "grad_norm": 9.073230743408203, "learning_rate": 4.580934101087652e-05, "loss": 1.3154, "step": 308 }, { "epoch": 0.58, "grad_norm": 9.829570770263672, "learning_rate": 4.491362763915547e-05, "loss": 1.3688, "step": 336 }, { "epoch": 0.63, "grad_norm": 7.165198802947998, "learning_rate": 4.4017914267434426e-05, "loss": 1.4376, "step": 364 }, { "epoch": 0.68, "grad_norm": 8.911142349243164, "learning_rate": 4.312220089571337e-05, "loss": 1.3533, "step": 392 }, { "epoch": 0.73, "grad_norm": 10.684996604919434, "learning_rate": 4.2226487523992326e-05, "loss": 1.2825, "step": 420 }, { "epoch": 0.77, "grad_norm": 10.35905647277832, "learning_rate": 4.133077415227127e-05, "loss": 1.3382, "step": 448 }, { "epoch": 0.82, "grad_norm": 9.137449264526367, "learning_rate": 4.0435060780550225e-05, "loss": 1.3054, "step": 476 }, { "epoch": 0.87, "grad_norm": 6.174429416656494, "learning_rate": 3.953934740882918e-05, "loss": 1.4294, "step": 504 }, { "epoch": 0.92, "grad_norm": 9.742380142211914, "learning_rate": 3.864363403710813e-05, "loss": 1.2502, "step": 532 }, { "epoch": 0.97, "grad_norm": 7.022644519805908, "learning_rate": 3.774792066538708e-05, "loss": 1.2926, "step": 560 }, { "epoch": 1.0, "eval_accuracy": 0.5591026747195859, "eval_f1_macro": 0.17116779008569874, "eval_f1_micro": 0.5591026747195859, "eval_f1_weighted": 0.47555675422704724, "eval_loss": 1.1608415842056274, "eval_precision_macro": 0.17146675898922856, "eval_precision_micro": 0.5591026747195859, "eval_precision_weighted": 0.4635728502463956, "eval_recall_macro": 0.1928392780592314, "eval_recall_micro": 0.5591026747195859, "eval_recall_weighted": 0.5591026747195859, "eval_runtime": 47.359, "eval_samples_per_second": 24.473, "eval_steps_per_second": 1.541, "step": 579 }, { "epoch": 1.02, "grad_norm": 7.127846717834473, "learning_rate": 3.685220729366603e-05, "loss": 1.161, "step": 588 }, { "epoch": 1.06, "grad_norm": 6.150296211242676, "learning_rate": 3.5956493921944976e-05, "loss": 1.1594, "step": 616 }, { "epoch": 1.11, "grad_norm": 6.171022415161133, "learning_rate": 3.506078055022393e-05, "loss": 1.1253, "step": 644 }, { "epoch": 1.16, "grad_norm": 12.212858200073242, "learning_rate": 3.416506717850288e-05, "loss": 1.2149, "step": 672 }, { "epoch": 1.21, "grad_norm": 8.228123664855957, "learning_rate": 3.3269353806781835e-05, "loss": 1.1905, "step": 700 }, { "epoch": 1.26, "grad_norm": 6.095876693725586, "learning_rate": 3.237364043506078e-05, "loss": 1.1387, "step": 728 }, { "epoch": 1.31, "grad_norm": 10.43623161315918, "learning_rate": 3.1477927063339734e-05, "loss": 1.2243, "step": 756 }, { "epoch": 1.35, "grad_norm": 6.94878625869751, "learning_rate": 3.058221369161868e-05, "loss": 1.0566, "step": 784 }, { "epoch": 1.4, "grad_norm": 10.651735305786133, "learning_rate": 2.9686500319897637e-05, "loss": 1.1425, "step": 812 }, { "epoch": 1.45, "grad_norm": 8.426492691040039, "learning_rate": 2.8790786948176586e-05, "loss": 1.173, "step": 840 }, { "epoch": 1.5, "grad_norm": 7.550307273864746, "learning_rate": 2.789507357645554e-05, "loss": 1.2052, "step": 868 }, { "epoch": 1.55, "grad_norm": 9.533821105957031, "learning_rate": 2.6999360204734485e-05, "loss": 1.2043, "step": 896 }, { "epoch": 1.6, "grad_norm": 7.776467323303223, "learning_rate": 2.6103646833013435e-05, "loss": 1.0199, "step": 924 }, { "epoch": 1.64, "grad_norm": 9.101223945617676, "learning_rate": 2.5207933461292388e-05, "loss": 1.1681, "step": 952 }, { "epoch": 1.69, "grad_norm": 7.713497161865234, "learning_rate": 2.4312220089571338e-05, "loss": 1.1177, "step": 980 }, { "epoch": 1.74, "grad_norm": 7.732503414154053, "learning_rate": 2.341650671785029e-05, "loss": 1.1099, "step": 1008 }, { "epoch": 1.79, "grad_norm": 9.252326965332031, "learning_rate": 2.252079334612924e-05, "loss": 1.2187, "step": 1036 }, { "epoch": 1.84, "grad_norm": 7.301546096801758, "learning_rate": 2.162507997440819e-05, "loss": 1.0414, "step": 1064 }, { "epoch": 1.89, "grad_norm": 8.406270027160645, "learning_rate": 2.0729366602687143e-05, "loss": 1.0479, "step": 1092 }, { "epoch": 1.93, "grad_norm": 7.392611980438232, "learning_rate": 1.9833653230966092e-05, "loss": 1.2113, "step": 1120 }, { "epoch": 1.98, "grad_norm": 7.991569995880127, "learning_rate": 1.8937939859245045e-05, "loss": 1.1279, "step": 1148 }, { "epoch": 2.0, "eval_accuracy": 0.548748921484038, "eval_f1_macro": 0.23354766712716346, "eval_f1_micro": 0.548748921484038, "eval_f1_weighted": 0.5206705245021934, "eval_loss": 1.076399326324463, "eval_precision_macro": 0.2591242803710448, "eval_precision_micro": 0.548748921484038, "eval_precision_weighted": 0.5086032820438354, "eval_recall_macro": 0.2392300811955355, "eval_recall_micro": 0.548748921484038, "eval_recall_weighted": 0.548748921484038, "eval_runtime": 45.7915, "eval_samples_per_second": 25.31, "eval_steps_per_second": 1.594, "step": 1158 }, { "epoch": 2.03, "grad_norm": 8.247747421264648, "learning_rate": 1.8042226487523995e-05, "loss": 1.0308, "step": 1176 }, { "epoch": 2.08, "grad_norm": 5.560178279876709, "learning_rate": 1.7146513115802944e-05, "loss": 1.0764, "step": 1204 }, { "epoch": 2.13, "grad_norm": 6.093762397766113, "learning_rate": 1.6250799744081894e-05, "loss": 1.1365, "step": 1232 }, { "epoch": 2.18, "grad_norm": 4.060959815979004, "learning_rate": 1.5355086372360844e-05, "loss": 1.0535, "step": 1260 }, { "epoch": 2.22, "grad_norm": 7.9812822341918945, "learning_rate": 1.4459373000639795e-05, "loss": 1.0241, "step": 1288 }, { "epoch": 2.27, "grad_norm": 5.996217250823975, "learning_rate": 1.3563659628918746e-05, "loss": 1.0137, "step": 1316 }, { "epoch": 2.32, "grad_norm": 10.663773536682129, "learning_rate": 1.2667946257197696e-05, "loss": 1.0274, "step": 1344 }, { "epoch": 2.37, "grad_norm": 9.829933166503906, "learning_rate": 1.1772232885476649e-05, "loss": 0.9893, "step": 1372 }, { "epoch": 2.42, "grad_norm": 9.35245418548584, "learning_rate": 1.0876519513755598e-05, "loss": 1.0497, "step": 1400 }, { "epoch": 2.47, "grad_norm": 9.116876602172852, "learning_rate": 9.98080614203455e-06, "loss": 0.972, "step": 1428 }, { "epoch": 2.51, "grad_norm": 8.650375366210938, "learning_rate": 9.085092770313499e-06, "loss": 1.0058, "step": 1456 }, { "epoch": 2.56, "grad_norm": 9.76453971862793, "learning_rate": 8.18937939859245e-06, "loss": 1.0451, "step": 1484 }, { "epoch": 2.61, "grad_norm": 5.405215263366699, "learning_rate": 7.293666026871402e-06, "loss": 0.9628, "step": 1512 }, { "epoch": 2.66, "grad_norm": 10.716626167297363, "learning_rate": 6.397952655150352e-06, "loss": 1.1384, "step": 1540 }, { "epoch": 2.71, "grad_norm": 4.47056770324707, "learning_rate": 5.502239283429303e-06, "loss": 0.9374, "step": 1568 }, { "epoch": 2.76, "grad_norm": 7.823177814483643, "learning_rate": 4.606525911708254e-06, "loss": 1.019, "step": 1596 }, { "epoch": 2.8, "grad_norm": 10.679405212402344, "learning_rate": 3.7108125399872046e-06, "loss": 1.0615, "step": 1624 }, { "epoch": 2.85, "grad_norm": 16.2053165435791, "learning_rate": 2.815099168266155e-06, "loss": 0.959, "step": 1652 }, { "epoch": 2.9, "grad_norm": 8.753532409667969, "learning_rate": 1.9193857965451054e-06, "loss": 1.0041, "step": 1680 }, { "epoch": 2.95, "grad_norm": 7.493074417114258, "learning_rate": 1.0236724248240563e-06, "loss": 0.998, "step": 1708 }, { "epoch": 3.0, "grad_norm": 7.273109436035156, "learning_rate": 1.2795905310300704e-07, "loss": 0.9851, "step": 1736 }, { "epoch": 3.0, "eval_accuracy": 0.5823986194995686, "eval_f1_macro": 0.23526955685803672, "eval_f1_micro": 0.5823986194995686, "eval_f1_weighted": 0.5233223208450835, "eval_loss": 0.9948348999023438, "eval_precision_macro": 0.2973178657776674, "eval_precision_micro": 0.5823986194995686, "eval_precision_weighted": 0.5462756602202016, "eval_recall_macro": 0.24224828041633692, "eval_recall_micro": 0.5823986194995686, "eval_recall_weighted": 0.5823986194995686, "eval_runtime": 46.2253, "eval_samples_per_second": 25.073, "eval_steps_per_second": 1.579, "step": 1737 } ], "logging_steps": 28, "max_steps": 1737, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.0764602068237517e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }