|
{ |
|
"best_metric": 0.7887323943661972, |
|
"best_model_checkpoint": "./model_out/checkpoint-480", |
|
"epoch": 15.737704918032787, |
|
"eval_steps": 60, |
|
"global_step": 480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 3.083059549331665, |
|
"learning_rate": 9.833333333333333e-05, |
|
"loss": 1.3429, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 5.27333927154541, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.9792, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 6.412991046905518, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.8173, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 4.924794673919678, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.5787, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 7.327770233154297, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 0.3744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 8.928845405578613, |
|
"learning_rate": 9e-05, |
|
"loss": 0.4815, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"eval_accuracy": 0.7897310513447433, |
|
"eval_f1": 0.6728971962616822, |
|
"eval_loss": 0.5841665267944336, |
|
"eval_precision": 0.6666666666666666, |
|
"eval_recall": 0.6792452830188679, |
|
"eval_runtime": 0.9794, |
|
"eval_samples_per_second": 61.26, |
|
"eval_steps_per_second": 8.168, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.2950819672131146, |
|
"grad_norm": 5.491725444793701, |
|
"learning_rate": 8.833333333333333e-05, |
|
"loss": 0.2658, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.6229508196721314, |
|
"grad_norm": 5.113138675689697, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.2402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.9508196721311473, |
|
"grad_norm": 6.449514865875244, |
|
"learning_rate": 8.5e-05, |
|
"loss": 0.1607, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.278688524590164, |
|
"grad_norm": 5.699795246124268, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.0852, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.6065573770491803, |
|
"grad_norm": 3.382863998413086, |
|
"learning_rate": 8.166666666666667e-05, |
|
"loss": 0.1513, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.9344262295081966, |
|
"grad_norm": 2.5515336990356445, |
|
"learning_rate": 8e-05, |
|
"loss": 0.2612, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.9344262295081966, |
|
"eval_accuracy": 0.8166259168704156, |
|
"eval_f1": 0.7009345794392523, |
|
"eval_loss": 0.7145981192588806, |
|
"eval_precision": 0.6944444444444444, |
|
"eval_recall": 0.7075471698113207, |
|
"eval_runtime": 0.9765, |
|
"eval_samples_per_second": 61.442, |
|
"eval_steps_per_second": 8.192, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.262295081967213, |
|
"grad_norm": 3.4330668449401855, |
|
"learning_rate": 7.833333333333333e-05, |
|
"loss": 0.0857, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.590163934426229, |
|
"grad_norm": 0.7570230960845947, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.0339, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.918032786885246, |
|
"grad_norm": 13.973723411560059, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0939, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.245901639344262, |
|
"grad_norm": 6.072541236877441, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 0.0753, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.573770491803279, |
|
"grad_norm": 6.689024925231934, |
|
"learning_rate": 7.166666666666667e-05, |
|
"loss": 0.0497, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.901639344262295, |
|
"grad_norm": 2.9444985389709473, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0535, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.901639344262295, |
|
"eval_accuracy": 0.8166259168704156, |
|
"eval_f1": 0.6697247706422018, |
|
"eval_loss": 0.7874237298965454, |
|
"eval_precision": 0.6517857142857143, |
|
"eval_recall": 0.6886792452830188, |
|
"eval_runtime": 0.9781, |
|
"eval_samples_per_second": 61.346, |
|
"eval_steps_per_second": 8.179, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.229508196721311, |
|
"grad_norm": 1.6225782632827759, |
|
"learning_rate": 6.833333333333333e-05, |
|
"loss": 0.0258, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.557377049180328, |
|
"grad_norm": 0.07454058527946472, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0273, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.885245901639344, |
|
"grad_norm": 5.249931812286377, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 0.023, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.213114754098361, |
|
"grad_norm": 0.14014603197574615, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.0162, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 7.540983606557377, |
|
"grad_norm": 0.024733418598771095, |
|
"learning_rate": 6.166666666666667e-05, |
|
"loss": 0.0099, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 7.868852459016393, |
|
"grad_norm": 1.4897031784057617, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0141, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.868852459016393, |
|
"eval_accuracy": 0.8557457212713936, |
|
"eval_f1": 0.780269058295964, |
|
"eval_loss": 0.8452157378196716, |
|
"eval_precision": 0.7435897435897436, |
|
"eval_recall": 0.8207547169811321, |
|
"eval_runtime": 0.9777, |
|
"eval_samples_per_second": 61.371, |
|
"eval_steps_per_second": 8.183, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.19672131147541, |
|
"grad_norm": 6.675982475280762, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.0201, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.524590163934427, |
|
"grad_norm": 0.21795310080051422, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 0.0098, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.852459016393443, |
|
"grad_norm": 1.0106713771820068, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.0162, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.180327868852459, |
|
"grad_norm": 1.9501103162765503, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.0166, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.508196721311476, |
|
"grad_norm": 0.09368854761123657, |
|
"learning_rate": 5.166666666666667e-05, |
|
"loss": 0.0178, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 9.836065573770492, |
|
"grad_norm": 5.705907821655273, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0102, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.836065573770492, |
|
"eval_accuracy": 0.8068459657701712, |
|
"eval_f1": 0.7296137339055794, |
|
"eval_loss": 1.098240613937378, |
|
"eval_precision": 0.6692913385826772, |
|
"eval_recall": 0.8018867924528302, |
|
"eval_runtime": 0.9802, |
|
"eval_samples_per_second": 61.211, |
|
"eval_steps_per_second": 8.161, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.163934426229508, |
|
"grad_norm": 0.016423122957348824, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.0048, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 10.491803278688524, |
|
"grad_norm": 0.013950828462839127, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.0023, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 10.819672131147541, |
|
"grad_norm": 0.0043854909017682076, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0049, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 11.147540983606557, |
|
"grad_norm": 0.5211315751075745, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.0218, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 11.475409836065573, |
|
"grad_norm": 0.0140004837885499, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0085, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 11.80327868852459, |
|
"grad_norm": 1.8254053592681885, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0099, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.80327868852459, |
|
"eval_accuracy": 0.80440097799511, |
|
"eval_f1": 0.7203791469194313, |
|
"eval_loss": 1.3635536432266235, |
|
"eval_precision": 0.7238095238095238, |
|
"eval_recall": 0.7169811320754716, |
|
"eval_runtime": 0.9823, |
|
"eval_samples_per_second": 61.081, |
|
"eval_steps_per_second": 8.144, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.131147540983607, |
|
"grad_norm": 0.1549140065908432, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.0004, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 12.459016393442623, |
|
"grad_norm": 4.475136756896973, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0026, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 12.78688524590164, |
|
"grad_norm": 11.31729507446289, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0024, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 13.114754098360656, |
|
"grad_norm": 0.003214745782315731, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0012, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 13.442622950819672, |
|
"grad_norm": 0.9436277747154236, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.0145, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 13.770491803278688, |
|
"grad_norm": 5.284872055053711, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0087, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 13.770491803278688, |
|
"eval_accuracy": 0.8166259168704156, |
|
"eval_f1": 0.7751196172248804, |
|
"eval_loss": 1.346622347831726, |
|
"eval_precision": 0.7864077669902912, |
|
"eval_recall": 0.7641509433962265, |
|
"eval_runtime": 0.9819, |
|
"eval_samples_per_second": 61.108, |
|
"eval_steps_per_second": 8.148, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.098360655737705, |
|
"grad_norm": 0.003362592775374651, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.0011, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 14.426229508196721, |
|
"grad_norm": 0.011366785503923893, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0003, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 14.754098360655737, |
|
"grad_norm": 0.003698177170008421, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 15.081967213114755, |
|
"grad_norm": 0.0021139541640877724, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0047, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 15.40983606557377, |
|
"grad_norm": 0.0036122933961451054, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.0002, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 15.737704918032787, |
|
"grad_norm": 0.010879104025661945, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0002, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.737704918032787, |
|
"eval_accuracy": 0.8288508557457213, |
|
"eval_f1": 0.7887323943661972, |
|
"eval_loss": 1.271989107131958, |
|
"eval_precision": 0.7850467289719626, |
|
"eval_recall": 0.7924528301886793, |
|
"eval_runtime": 0.9809, |
|
"eval_samples_per_second": 61.168, |
|
"eval_steps_per_second": 8.156, |
|
"step": 480 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 60, |
|
"total_flos": 995567645030400.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|