|
{ |
|
"best_metric": 0.7544093132019043, |
|
"best_model_checkpoint": "xblock-social-screenshots-3/checkpoint-1245", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1245, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 13.465712547302246, |
|
"learning_rate": 7.2e-06, |
|
"loss": 2.4572, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.6524076461792, |
|
"learning_rate": 1.52e-05, |
|
"loss": 2.079, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.261948585510254, |
|
"learning_rate": 2.32e-05, |
|
"loss": 1.7089, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.193083763122559, |
|
"learning_rate": 3.12e-05, |
|
"loss": 1.4402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.320693492889404, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 1.407, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.089232444763184, |
|
"learning_rate": 4.72e-05, |
|
"loss": 1.2287, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 9.907115936279297, |
|
"learning_rate": 4.941964285714286e-05, |
|
"loss": 1.2205, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 6.660067081451416, |
|
"learning_rate": 4.852678571428572e-05, |
|
"loss": 1.0867, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.426629066467285, |
|
"learning_rate": 4.7633928571428573e-05, |
|
"loss": 1.097, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 9.229801177978516, |
|
"learning_rate": 4.674107142857143e-05, |
|
"loss": 1.4158, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 6.415766716003418, |
|
"learning_rate": 4.584821428571429e-05, |
|
"loss": 1.2353, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 9.118013381958008, |
|
"learning_rate": 4.4955357142857144e-05, |
|
"loss": 1.0926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 8.883289337158203, |
|
"learning_rate": 4.40625e-05, |
|
"loss": 1.1642, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 6.868738651275635, |
|
"learning_rate": 4.3169642857142864e-05, |
|
"loss": 1.0041, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.424619436264038, |
|
"learning_rate": 4.2276785714285714e-05, |
|
"loss": 1.1074, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 10.195758819580078, |
|
"learning_rate": 4.138392857142857e-05, |
|
"loss": 0.8968, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 8.857401847839355, |
|
"learning_rate": 4.0491071428571434e-05, |
|
"loss": 1.2484, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.8076605796813965, |
|
"learning_rate": 3.9598214285714284e-05, |
|
"loss": 0.998, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.418033599853516, |
|
"learning_rate": 3.870535714285715e-05, |
|
"loss": 0.9212, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.953289985656738, |
|
"learning_rate": 3.78125e-05, |
|
"loss": 1.1387, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7156626506024096, |
|
"eval_f1_macro": 0.24337876592111682, |
|
"eval_f1_micro": 0.7156626506024096, |
|
"eval_f1_weighted": 0.6701704805513728, |
|
"eval_loss": 0.9069176912307739, |
|
"eval_precision_macro": 0.28735578588970384, |
|
"eval_precision_micro": 0.7156626506024096, |
|
"eval_precision_weighted": 0.6398455360285946, |
|
"eval_recall_macro": 0.2455658658820232, |
|
"eval_recall_micro": 0.7156626506024096, |
|
"eval_recall_weighted": 0.7156626506024096, |
|
"eval_runtime": 32.7637, |
|
"eval_samples_per_second": 25.333, |
|
"eval_steps_per_second": 1.587, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 10.716949462890625, |
|
"learning_rate": 3.691964285714286e-05, |
|
"loss": 0.974, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 6.424170970916748, |
|
"learning_rate": 3.602678571428572e-05, |
|
"loss": 0.9156, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 5.7230000495910645, |
|
"learning_rate": 3.513392857142857e-05, |
|
"loss": 1.0882, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 6.781800746917725, |
|
"learning_rate": 3.424107142857143e-05, |
|
"loss": 0.8575, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 7.278835296630859, |
|
"learning_rate": 3.334821428571429e-05, |
|
"loss": 0.8842, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 8.899985313415527, |
|
"learning_rate": 3.2455357142857145e-05, |
|
"loss": 1.0324, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 10.547853469848633, |
|
"learning_rate": 3.15625e-05, |
|
"loss": 0.9993, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 5.870782852172852, |
|
"learning_rate": 3.066964285714286e-05, |
|
"loss": 1.0766, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 7.413377285003662, |
|
"learning_rate": 2.9776785714285715e-05, |
|
"loss": 0.8224, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 8.232254981994629, |
|
"learning_rate": 2.888392857142857e-05, |
|
"loss": 0.8699, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 7.721341609954834, |
|
"learning_rate": 2.7991071428571432e-05, |
|
"loss": 1.0413, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 10.697331428527832, |
|
"learning_rate": 2.7098214285714285e-05, |
|
"loss": 0.995, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 5.213534355163574, |
|
"learning_rate": 2.6205357142857145e-05, |
|
"loss": 1.1162, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 4.949966907501221, |
|
"learning_rate": 2.53125e-05, |
|
"loss": 0.7447, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 8.088377952575684, |
|
"learning_rate": 2.441964285714286e-05, |
|
"loss": 0.7466, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 6.961575984954834, |
|
"learning_rate": 2.3526785714285715e-05, |
|
"loss": 0.8619, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 12.139084815979004, |
|
"learning_rate": 2.2633928571428572e-05, |
|
"loss": 0.6702, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 4.518425941467285, |
|
"learning_rate": 2.174107142857143e-05, |
|
"loss": 0.8649, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 7.617171764373779, |
|
"learning_rate": 2.084821428571429e-05, |
|
"loss": 0.7682, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 9.569235801696777, |
|
"learning_rate": 1.9955357142857146e-05, |
|
"loss": 0.9026, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 8.944718360900879, |
|
"learning_rate": 1.90625e-05, |
|
"loss": 0.8327, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.744578313253012, |
|
"eval_f1_macro": 0.328500713177248, |
|
"eval_f1_micro": 0.744578313253012, |
|
"eval_f1_weighted": 0.7121093039405768, |
|
"eval_loss": 0.7984326481819153, |
|
"eval_precision_macro": 0.4303939410339066, |
|
"eval_precision_micro": 0.744578313253012, |
|
"eval_precision_weighted": 0.7177621227889628, |
|
"eval_recall_macro": 0.30809268775929033, |
|
"eval_recall_micro": 0.744578313253012, |
|
"eval_recall_weighted": 0.744578313253012, |
|
"eval_runtime": 30.47, |
|
"eval_samples_per_second": 27.24, |
|
"eval_steps_per_second": 1.707, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 6.886695861816406, |
|
"learning_rate": 1.8169642857142856e-05, |
|
"loss": 0.8599, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 3.803279399871826, |
|
"learning_rate": 1.7276785714285716e-05, |
|
"loss": 0.7697, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 8.693037033081055, |
|
"learning_rate": 1.6383928571428573e-05, |
|
"loss": 0.7916, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 10.645078659057617, |
|
"learning_rate": 1.549107142857143e-05, |
|
"loss": 0.6787, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 6.220460414886475, |
|
"learning_rate": 1.4598214285714288e-05, |
|
"loss": 0.7976, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 10.7758150100708, |
|
"learning_rate": 1.3705357142857145e-05, |
|
"loss": 0.6825, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 11.92375373840332, |
|
"learning_rate": 1.28125e-05, |
|
"loss": 0.853, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 8.55823040008545, |
|
"learning_rate": 1.1919642857142858e-05, |
|
"loss": 0.7647, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 10.385089874267578, |
|
"learning_rate": 1.1026785714285715e-05, |
|
"loss": 0.7351, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 6.002354621887207, |
|
"learning_rate": 1.0133928571428572e-05, |
|
"loss": 0.8779, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 7.847404956817627, |
|
"learning_rate": 9.24107142857143e-06, |
|
"loss": 0.7776, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 5.796429634094238, |
|
"learning_rate": 8.348214285714285e-06, |
|
"loss": 0.743, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 8.282535552978516, |
|
"learning_rate": 7.455357142857143e-06, |
|
"loss": 0.8694, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 9.527463912963867, |
|
"learning_rate": 6.5625e-06, |
|
"loss": 0.7111, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 7.947423458099365, |
|
"learning_rate": 5.669642857142857e-06, |
|
"loss": 0.9326, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 8.990558624267578, |
|
"learning_rate": 4.776785714285715e-06, |
|
"loss": 0.7542, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 8.609009742736816, |
|
"learning_rate": 3.883928571428571e-06, |
|
"loss": 0.7247, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 3.701921224594116, |
|
"learning_rate": 2.991071428571429e-06, |
|
"loss": 0.6855, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 6.839054584503174, |
|
"learning_rate": 2.0982142857142857e-06, |
|
"loss": 0.7191, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 5.237525939941406, |
|
"learning_rate": 1.205357142857143e-06, |
|
"loss": 0.592, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 6.806934356689453, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"loss": 0.7655, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7626506024096386, |
|
"eval_f1_macro": 0.34243943617676403, |
|
"eval_f1_micro": 0.7626506024096386, |
|
"eval_f1_weighted": 0.7295452326605777, |
|
"eval_loss": 0.7544093132019043, |
|
"eval_precision_macro": 0.5376629516266399, |
|
"eval_precision_micro": 0.7626506024096386, |
|
"eval_precision_weighted": 0.7581766519951105, |
|
"eval_recall_macro": 0.3164834046818783, |
|
"eval_recall_micro": 0.7626506024096386, |
|
"eval_recall_weighted": 0.7626506024096386, |
|
"eval_runtime": 30.5059, |
|
"eval_samples_per_second": 27.208, |
|
"eval_steps_per_second": 1.705, |
|
"step": 1245 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1245, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 7.702550117363405e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|