{ "best_metric": 0.7649385333061218, "best_model_checkpoint": "xblock-social-screenshots-4/checkpoint-1542", "epoch": 3.0, "eval_steps": 500, "global_step": 1542, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 12.289417266845703, "learning_rate": 8.064516129032258e-06, "loss": 1.0277, "step": 25 }, { "epoch": 0.1, "grad_norm": 9.727346420288086, "learning_rate": 1.5806451612903226e-05, "loss": 1.1841, "step": 50 }, { "epoch": 0.15, "grad_norm": 14.582966804504395, "learning_rate": 2.3870967741935486e-05, "loss": 1.1487, "step": 75 }, { "epoch": 0.19, "grad_norm": 13.005010604858398, "learning_rate": 3.193548387096774e-05, "loss": 1.2848, "step": 100 }, { "epoch": 0.24, "grad_norm": 7.13414192199707, "learning_rate": 4e-05, "loss": 1.2483, "step": 125 }, { "epoch": 0.29, "grad_norm": 10.042699813842773, "learning_rate": 4.806451612903226e-05, "loss": 1.0897, "step": 150 }, { "epoch": 0.34, "grad_norm": 11.965900421142578, "learning_rate": 4.935111751982697e-05, "loss": 1.1733, "step": 175 }, { "epoch": 0.39, "grad_norm": 6.2085347175598145, "learning_rate": 4.844989185291997e-05, "loss": 1.2392, "step": 200 }, { "epoch": 0.44, "grad_norm": 7.71270751953125, "learning_rate": 4.7548666186012983e-05, "loss": 1.1152, "step": 225 }, { "epoch": 0.49, "grad_norm": 17.0334529876709, "learning_rate": 4.6647440519105984e-05, "loss": 1.1423, "step": 250 }, { "epoch": 0.54, "grad_norm": 9.85234546661377, "learning_rate": 4.574621485219899e-05, "loss": 1.1417, "step": 275 }, { "epoch": 0.58, "grad_norm": 7.187493801116943, "learning_rate": 4.4844989185292e-05, "loss": 1.1791, "step": 300 }, { "epoch": 0.63, "grad_norm": 8.927938461303711, "learning_rate": 4.394376351838501e-05, "loss": 1.1275, "step": 325 }, { "epoch": 0.68, "grad_norm": 10.718963623046875, "learning_rate": 4.304253785147801e-05, "loss": 1.0876, "step": 350 }, { "epoch": 0.73, "grad_norm": 9.936092376708984, "learning_rate": 4.214131218457102e-05, "loss": 1.0834, "step": 375 }, { "epoch": 0.78, "grad_norm": 11.561012268066406, "learning_rate": 4.124008651766402e-05, "loss": 0.9958, "step": 400 }, { "epoch": 0.83, "grad_norm": 8.201454162597656, "learning_rate": 4.033886085075703e-05, "loss": 1.1471, "step": 425 }, { "epoch": 0.88, "grad_norm": 10.632408142089844, "learning_rate": 3.943763518385004e-05, "loss": 1.1176, "step": 450 }, { "epoch": 0.92, "grad_norm": 13.286883354187012, "learning_rate": 3.8536409516943046e-05, "loss": 1.1054, "step": 475 }, { "epoch": 0.97, "grad_norm": 9.36621379852295, "learning_rate": 3.763518385003605e-05, "loss": 1.1142, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.7281553398058253, "eval_f1_macro": 0.3571554737240677, "eval_f1_micro": 0.7281553398058251, "eval_f1_weighted": 0.6902205394370374, "eval_loss": 0.8712501525878906, "eval_precision_macro": 0.4901931986654834, "eval_precision_micro": 0.7281553398058253, "eval_precision_weighted": 0.698922565321561, "eval_recall_macro": 0.33272931310043147, "eval_recall_micro": 0.7281553398058253, "eval_recall_weighted": 0.7281553398058253, "eval_runtime": 41.8624, "eval_samples_per_second": 24.604, "eval_steps_per_second": 1.553, "step": 514 }, { "epoch": 1.02, "grad_norm": 2.906583786010742, "learning_rate": 3.673395818312906e-05, "loss": 1.0156, "step": 525 }, { "epoch": 1.07, "grad_norm": 4.102613925933838, "learning_rate": 3.583273251622206e-05, "loss": 0.962, "step": 550 }, { "epoch": 1.12, "grad_norm": 8.407520294189453, "learning_rate": 3.493150684931507e-05, "loss": 0.9899, "step": 575 }, { "epoch": 1.17, "grad_norm": 7.069764137268066, "learning_rate": 3.403028118240808e-05, "loss": 1.0278, "step": 600 }, { "epoch": 1.22, "grad_norm": 15.905261993408203, "learning_rate": 3.3129055515501085e-05, "loss": 1.0595, "step": 625 }, { "epoch": 1.26, "grad_norm": 6.158019542694092, "learning_rate": 3.2227829848594086e-05, "loss": 1.1077, "step": 650 }, { "epoch": 1.31, "grad_norm": 9.805268287658691, "learning_rate": 3.13266041816871e-05, "loss": 0.8815, "step": 675 }, { "epoch": 1.36, "grad_norm": 15.077588081359863, "learning_rate": 3.04253785147801e-05, "loss": 0.967, "step": 700 }, { "epoch": 1.41, "grad_norm": 9.116120338439941, "learning_rate": 2.952415284787311e-05, "loss": 0.9285, "step": 725 }, { "epoch": 1.46, "grad_norm": 6.24492883682251, "learning_rate": 2.8622927180966113e-05, "loss": 0.8676, "step": 750 }, { "epoch": 1.51, "grad_norm": 8.600728988647461, "learning_rate": 2.7721701514059124e-05, "loss": 1.0795, "step": 775 }, { "epoch": 1.56, "grad_norm": 12.541471481323242, "learning_rate": 2.6820475847152128e-05, "loss": 0.7429, "step": 800 }, { "epoch": 1.61, "grad_norm": 7.928451061248779, "learning_rate": 2.5919250180245136e-05, "loss": 0.8436, "step": 825 }, { "epoch": 1.65, "grad_norm": 12.788630485534668, "learning_rate": 2.501802451333814e-05, "loss": 1.0718, "step": 850 }, { "epoch": 1.7, "grad_norm": 12.828117370605469, "learning_rate": 2.4116798846431148e-05, "loss": 1.0588, "step": 875 }, { "epoch": 1.75, "grad_norm": 8.542132377624512, "learning_rate": 2.3215573179524152e-05, "loss": 0.9365, "step": 900 }, { "epoch": 1.8, "grad_norm": 6.623122692108154, "learning_rate": 2.231434751261716e-05, "loss": 0.788, "step": 925 }, { "epoch": 1.85, "grad_norm": 10.277917861938477, "learning_rate": 2.1413121845710167e-05, "loss": 0.7988, "step": 950 }, { "epoch": 1.9, "grad_norm": 10.783781051635742, "learning_rate": 2.051189617880317e-05, "loss": 0.9515, "step": 975 }, { "epoch": 1.95, "grad_norm": 9.338010787963867, "learning_rate": 1.961067051189618e-05, "loss": 0.8298, "step": 1000 }, { "epoch": 1.99, "grad_norm": 8.552199363708496, "learning_rate": 1.8709444844989187e-05, "loss": 1.0186, "step": 1025 }, { "epoch": 2.0, "eval_accuracy": 0.7495145631067961, "eval_f1_macro": 0.4164105966638123, "eval_f1_micro": 0.7495145631067962, "eval_f1_weighted": 0.7192389597003817, "eval_loss": 0.7742837071418762, "eval_precision_macro": 0.6024626428820864, "eval_precision_micro": 0.7495145631067961, "eval_precision_weighted": 0.7393294285471907, "eval_recall_macro": 0.374337750947193, "eval_recall_micro": 0.7495145631067961, "eval_recall_weighted": 0.7495145631067961, "eval_runtime": 42.4523, "eval_samples_per_second": 24.263, "eval_steps_per_second": 1.531, "step": 1028 }, { "epoch": 2.04, "grad_norm": 9.136883735656738, "learning_rate": 1.780821917808219e-05, "loss": 0.779, "step": 1050 }, { "epoch": 2.09, "grad_norm": 10.608307838439941, "learning_rate": 1.6943042537851476e-05, "loss": 0.9101, "step": 1075 }, { "epoch": 2.14, "grad_norm": 7.822091579437256, "learning_rate": 1.6041816870944484e-05, "loss": 0.892, "step": 1100 }, { "epoch": 2.19, "grad_norm": 8.77257251739502, "learning_rate": 1.5140591204037492e-05, "loss": 0.6383, "step": 1125 }, { "epoch": 2.24, "grad_norm": 5.078084468841553, "learning_rate": 1.4239365537130498e-05, "loss": 0.842, "step": 1150 }, { "epoch": 2.29, "grad_norm": 6.022009372711182, "learning_rate": 1.3338139870223503e-05, "loss": 0.6754, "step": 1175 }, { "epoch": 2.33, "grad_norm": 9.004426002502441, "learning_rate": 1.2436914203316511e-05, "loss": 0.9429, "step": 1200 }, { "epoch": 2.38, "grad_norm": 8.876343727111816, "learning_rate": 1.1535688536409517e-05, "loss": 0.801, "step": 1225 }, { "epoch": 2.43, "grad_norm": 7.864119052886963, "learning_rate": 1.0634462869502523e-05, "loss": 0.7809, "step": 1250 }, { "epoch": 2.48, "grad_norm": 9.604496955871582, "learning_rate": 9.73323720259553e-06, "loss": 0.7273, "step": 1275 }, { "epoch": 2.53, "grad_norm": 10.21464729309082, "learning_rate": 8.832011535688536e-06, "loss": 0.8211, "step": 1300 }, { "epoch": 2.58, "grad_norm": 10.150245666503906, "learning_rate": 7.930785868781542e-06, "loss": 0.7916, "step": 1325 }, { "epoch": 2.63, "grad_norm": 8.246322631835938, "learning_rate": 7.029560201874549e-06, "loss": 0.6806, "step": 1350 }, { "epoch": 2.68, "grad_norm": 5.490677356719971, "learning_rate": 6.128334534967556e-06, "loss": 0.6664, "step": 1375 }, { "epoch": 2.72, "grad_norm": 9.993110656738281, "learning_rate": 5.227108868060563e-06, "loss": 0.8779, "step": 1400 }, { "epoch": 2.77, "grad_norm": 11.303633689880371, "learning_rate": 4.325883201153569e-06, "loss": 0.8084, "step": 1425 }, { "epoch": 2.82, "grad_norm": 12.19831657409668, "learning_rate": 3.4246575342465754e-06, "loss": 0.7864, "step": 1450 }, { "epoch": 2.87, "grad_norm": 7.894024848937988, "learning_rate": 2.523431867339582e-06, "loss": 0.8498, "step": 1475 }, { "epoch": 2.92, "grad_norm": 7.604086399078369, "learning_rate": 1.6222062004325884e-06, "loss": 0.7161, "step": 1500 }, { "epoch": 2.97, "grad_norm": 9.083874702453613, "learning_rate": 7.209805335255948e-07, "loss": 0.9195, "step": 1525 }, { "epoch": 3.0, "eval_accuracy": 0.7640776699029126, "eval_f1_macro": 0.4616966462629057, "eval_f1_micro": 0.7640776699029127, "eval_f1_weighted": 0.7363203004281954, "eval_loss": 0.7649385333061218, "eval_precision_macro": 0.6252686879910994, "eval_precision_micro": 0.7640776699029126, "eval_precision_weighted": 0.7552425547875681, "eval_recall_macro": 0.4119139234072332, "eval_recall_micro": 0.7640776699029126, "eval_recall_weighted": 0.7640776699029126, "eval_runtime": 41.5633, "eval_samples_per_second": 24.781, "eval_steps_per_second": 1.564, "step": 1542 } ], "logging_steps": 25, "max_steps": 1542, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 9.553208099078246e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }