{ "best_metric": 0.7544093132019043, "best_model_checkpoint": "xblock-social-screenshots-3/checkpoint-1245", "epoch": 3.0, "eval_steps": 500, "global_step": 1245, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 13.465712547302246, "learning_rate": 7.2e-06, "loss": 2.4572, "step": 20 }, { "epoch": 0.1, "grad_norm": 9.6524076461792, "learning_rate": 1.52e-05, "loss": 2.079, "step": 40 }, { "epoch": 0.14, "grad_norm": 9.261948585510254, "learning_rate": 2.32e-05, "loss": 1.7089, "step": 60 }, { "epoch": 0.19, "grad_norm": 9.193083763122559, "learning_rate": 3.12e-05, "loss": 1.4402, "step": 80 }, { "epoch": 0.24, "grad_norm": 7.320693492889404, "learning_rate": 3.9200000000000004e-05, "loss": 1.407, "step": 100 }, { "epoch": 0.29, "grad_norm": 7.089232444763184, "learning_rate": 4.72e-05, "loss": 1.2287, "step": 120 }, { "epoch": 0.34, "grad_norm": 9.907115936279297, "learning_rate": 4.941964285714286e-05, "loss": 1.2205, "step": 140 }, { "epoch": 0.39, "grad_norm": 6.660067081451416, "learning_rate": 4.852678571428572e-05, "loss": 1.0867, "step": 160 }, { "epoch": 0.43, "grad_norm": 8.426629066467285, "learning_rate": 4.7633928571428573e-05, "loss": 1.097, "step": 180 }, { "epoch": 0.48, "grad_norm": 9.229801177978516, "learning_rate": 4.674107142857143e-05, "loss": 1.4158, "step": 200 }, { "epoch": 0.53, "grad_norm": 6.415766716003418, "learning_rate": 4.584821428571429e-05, "loss": 1.2353, "step": 220 }, { "epoch": 0.58, "grad_norm": 9.118013381958008, "learning_rate": 4.4955357142857144e-05, "loss": 1.0926, "step": 240 }, { "epoch": 0.63, "grad_norm": 8.883289337158203, "learning_rate": 4.40625e-05, "loss": 1.1642, "step": 260 }, { "epoch": 0.67, "grad_norm": 6.868738651275635, "learning_rate": 4.3169642857142864e-05, "loss": 1.0041, "step": 280 }, { "epoch": 0.72, "grad_norm": 2.424619436264038, "learning_rate": 4.2276785714285714e-05, "loss": 1.1074, "step": 300 }, { "epoch": 0.77, "grad_norm": 10.195758819580078, "learning_rate": 4.138392857142857e-05, "loss": 0.8968, "step": 320 }, { "epoch": 0.82, "grad_norm": 8.857401847839355, "learning_rate": 4.0491071428571434e-05, "loss": 1.2484, "step": 340 }, { "epoch": 0.87, "grad_norm": 7.8076605796813965, "learning_rate": 3.9598214285714284e-05, "loss": 0.998, "step": 360 }, { "epoch": 0.92, "grad_norm": 6.418033599853516, "learning_rate": 3.870535714285715e-05, "loss": 0.9212, "step": 380 }, { "epoch": 0.96, "grad_norm": 7.953289985656738, "learning_rate": 3.78125e-05, "loss": 1.1387, "step": 400 }, { "epoch": 1.0, "eval_accuracy": 0.7156626506024096, "eval_f1_macro": 0.24337876592111682, "eval_f1_micro": 0.7156626506024096, "eval_f1_weighted": 0.6701704805513728, "eval_loss": 0.9069176912307739, "eval_precision_macro": 0.28735578588970384, "eval_precision_micro": 0.7156626506024096, "eval_precision_weighted": 0.6398455360285946, "eval_recall_macro": 0.2455658658820232, "eval_recall_micro": 0.7156626506024096, "eval_recall_weighted": 0.7156626506024096, "eval_runtime": 32.7637, "eval_samples_per_second": 25.333, "eval_steps_per_second": 1.587, "step": 415 }, { "epoch": 1.01, "grad_norm": 10.716949462890625, "learning_rate": 3.691964285714286e-05, "loss": 0.974, "step": 420 }, { "epoch": 1.06, "grad_norm": 6.424170970916748, "learning_rate": 3.602678571428572e-05, "loss": 0.9156, "step": 440 }, { "epoch": 1.11, "grad_norm": 5.7230000495910645, "learning_rate": 3.513392857142857e-05, "loss": 1.0882, "step": 460 }, { "epoch": 1.16, "grad_norm": 6.781800746917725, "learning_rate": 3.424107142857143e-05, "loss": 0.8575, "step": 480 }, { "epoch": 1.2, "grad_norm": 7.278835296630859, "learning_rate": 3.334821428571429e-05, "loss": 0.8842, "step": 500 }, { "epoch": 1.25, "grad_norm": 8.899985313415527, "learning_rate": 3.2455357142857145e-05, "loss": 1.0324, "step": 520 }, { "epoch": 1.3, "grad_norm": 10.547853469848633, "learning_rate": 3.15625e-05, "loss": 0.9993, "step": 540 }, { "epoch": 1.35, "grad_norm": 5.870782852172852, "learning_rate": 3.066964285714286e-05, "loss": 1.0766, "step": 560 }, { "epoch": 1.4, "grad_norm": 7.413377285003662, "learning_rate": 2.9776785714285715e-05, "loss": 0.8224, "step": 580 }, { "epoch": 1.45, "grad_norm": 8.232254981994629, "learning_rate": 2.888392857142857e-05, "loss": 0.8699, "step": 600 }, { "epoch": 1.49, "grad_norm": 7.721341609954834, "learning_rate": 2.7991071428571432e-05, "loss": 1.0413, "step": 620 }, { "epoch": 1.54, "grad_norm": 10.697331428527832, "learning_rate": 2.7098214285714285e-05, "loss": 0.995, "step": 640 }, { "epoch": 1.59, "grad_norm": 5.213534355163574, "learning_rate": 2.6205357142857145e-05, "loss": 1.1162, "step": 660 }, { "epoch": 1.64, "grad_norm": 4.949966907501221, "learning_rate": 2.53125e-05, "loss": 0.7447, "step": 680 }, { "epoch": 1.69, "grad_norm": 8.088377952575684, "learning_rate": 2.441964285714286e-05, "loss": 0.7466, "step": 700 }, { "epoch": 1.73, "grad_norm": 6.961575984954834, "learning_rate": 2.3526785714285715e-05, "loss": 0.8619, "step": 720 }, { "epoch": 1.78, "grad_norm": 12.139084815979004, "learning_rate": 2.2633928571428572e-05, "loss": 0.6702, "step": 740 }, { "epoch": 1.83, "grad_norm": 4.518425941467285, "learning_rate": 2.174107142857143e-05, "loss": 0.8649, "step": 760 }, { "epoch": 1.88, "grad_norm": 7.617171764373779, "learning_rate": 2.084821428571429e-05, "loss": 0.7682, "step": 780 }, { "epoch": 1.93, "grad_norm": 9.569235801696777, "learning_rate": 1.9955357142857146e-05, "loss": 0.9026, "step": 800 }, { "epoch": 1.98, "grad_norm": 8.944718360900879, "learning_rate": 1.90625e-05, "loss": 0.8327, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.744578313253012, "eval_f1_macro": 0.328500713177248, "eval_f1_micro": 0.744578313253012, "eval_f1_weighted": 0.7121093039405768, "eval_loss": 0.7984326481819153, "eval_precision_macro": 0.4303939410339066, "eval_precision_micro": 0.744578313253012, "eval_precision_weighted": 0.7177621227889628, "eval_recall_macro": 0.30809268775929033, "eval_recall_micro": 0.744578313253012, "eval_recall_weighted": 0.744578313253012, "eval_runtime": 30.47, "eval_samples_per_second": 27.24, "eval_steps_per_second": 1.707, "step": 830 }, { "epoch": 2.02, "grad_norm": 6.886695861816406, "learning_rate": 1.8169642857142856e-05, "loss": 0.8599, "step": 840 }, { "epoch": 2.07, "grad_norm": 3.803279399871826, "learning_rate": 1.7276785714285716e-05, "loss": 0.7697, "step": 860 }, { "epoch": 2.12, "grad_norm": 8.693037033081055, "learning_rate": 1.6383928571428573e-05, "loss": 0.7916, "step": 880 }, { "epoch": 2.17, "grad_norm": 10.645078659057617, "learning_rate": 1.549107142857143e-05, "loss": 0.6787, "step": 900 }, { "epoch": 2.22, "grad_norm": 6.220460414886475, "learning_rate": 1.4598214285714288e-05, "loss": 0.7976, "step": 920 }, { "epoch": 2.27, "grad_norm": 10.7758150100708, "learning_rate": 1.3705357142857145e-05, "loss": 0.6825, "step": 940 }, { "epoch": 2.31, "grad_norm": 11.92375373840332, "learning_rate": 1.28125e-05, "loss": 0.853, "step": 960 }, { "epoch": 2.36, "grad_norm": 8.55823040008545, "learning_rate": 1.1919642857142858e-05, "loss": 0.7647, "step": 980 }, { "epoch": 2.41, "grad_norm": 10.385089874267578, "learning_rate": 1.1026785714285715e-05, "loss": 0.7351, "step": 1000 }, { "epoch": 2.46, "grad_norm": 6.002354621887207, "learning_rate": 1.0133928571428572e-05, "loss": 0.8779, "step": 1020 }, { "epoch": 2.51, "grad_norm": 7.847404956817627, "learning_rate": 9.24107142857143e-06, "loss": 0.7776, "step": 1040 }, { "epoch": 2.55, "grad_norm": 5.796429634094238, "learning_rate": 8.348214285714285e-06, "loss": 0.743, "step": 1060 }, { "epoch": 2.6, "grad_norm": 8.282535552978516, "learning_rate": 7.455357142857143e-06, "loss": 0.8694, "step": 1080 }, { "epoch": 2.65, "grad_norm": 9.527463912963867, "learning_rate": 6.5625e-06, "loss": 0.7111, "step": 1100 }, { "epoch": 2.7, "grad_norm": 7.947423458099365, "learning_rate": 5.669642857142857e-06, "loss": 0.9326, "step": 1120 }, { "epoch": 2.75, "grad_norm": 8.990558624267578, "learning_rate": 4.776785714285715e-06, "loss": 0.7542, "step": 1140 }, { "epoch": 2.8, "grad_norm": 8.609009742736816, "learning_rate": 3.883928571428571e-06, "loss": 0.7247, "step": 1160 }, { "epoch": 2.84, "grad_norm": 3.701921224594116, "learning_rate": 2.991071428571429e-06, "loss": 0.6855, "step": 1180 }, { "epoch": 2.89, "grad_norm": 6.839054584503174, "learning_rate": 2.0982142857142857e-06, "loss": 0.7191, "step": 1200 }, { "epoch": 2.94, "grad_norm": 5.237525939941406, "learning_rate": 1.205357142857143e-06, "loss": 0.592, "step": 1220 }, { "epoch": 2.99, "grad_norm": 6.806934356689453, "learning_rate": 3.5714285714285716e-07, "loss": 0.7655, "step": 1240 }, { "epoch": 3.0, "eval_accuracy": 0.7626506024096386, "eval_f1_macro": 0.34243943617676403, "eval_f1_micro": 0.7626506024096386, "eval_f1_weighted": 0.7295452326605777, "eval_loss": 0.7544093132019043, "eval_precision_macro": 0.5376629516266399, "eval_precision_micro": 0.7626506024096386, "eval_precision_weighted": 0.7581766519951105, "eval_recall_macro": 0.3164834046818783, "eval_recall_micro": 0.7626506024096386, "eval_recall_weighted": 0.7626506024096386, "eval_runtime": 30.5059, "eval_samples_per_second": 27.208, "eval_steps_per_second": 1.705, "step": 1245 } ], "logging_steps": 20, "max_steps": 1245, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7.702550117363405e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }