|
{ |
|
"best_metric": 0.8011033681765389, |
|
"best_model_checkpoint": "./output_4/checkpoint-270", |
|
"epoch": 16.451612903225808, |
|
"eval_steps": 30, |
|
"global_step": 510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.758064516129034e-06, |
|
"loss": 0.4653, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_f1": 0.47104247104247104, |
|
"eval_loss": 0.34549078345298767, |
|
"eval_precision": 0.44525547445255476, |
|
"eval_recall": 0.5, |
|
"eval_runtime": 0.8351, |
|
"eval_samples_per_second": 164.048, |
|
"eval_steps_per_second": 5.987, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.516129032258065e-06, |
|
"loss": 0.3365, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_f1": 0.47104247104247104, |
|
"eval_loss": 0.33970290422439575, |
|
"eval_precision": 0.44525547445255476, |
|
"eval_recall": 0.5, |
|
"eval_runtime": 0.8593, |
|
"eval_samples_per_second": 159.439, |
|
"eval_steps_per_second": 5.819, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.274193548387097e-06, |
|
"loss": 0.3174, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_f1": 0.47104247104247104, |
|
"eval_loss": 0.3178149461746216, |
|
"eval_precision": 0.44525547445255476, |
|
"eval_recall": 0.5, |
|
"eval_runtime": 0.8461, |
|
"eval_samples_per_second": 161.924, |
|
"eval_steps_per_second": 5.91, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 9.03225806451613e-06, |
|
"loss": 0.3154, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_f1": 0.787531017369727, |
|
"eval_loss": 0.27127787470817566, |
|
"eval_precision": 0.8358585858585859, |
|
"eval_recall": 0.7543715846994535, |
|
"eval_runtime": 0.8477, |
|
"eval_samples_per_second": 161.61, |
|
"eval_steps_per_second": 5.898, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.790322580645163e-06, |
|
"loss": 0.2477, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_f1": 0.7579116465863454, |
|
"eval_loss": 0.2217591255903244, |
|
"eval_precision": 0.8185039370078739, |
|
"eval_recall": 0.7210382513661202, |
|
"eval_runtime": 0.8342, |
|
"eval_samples_per_second": 164.232, |
|
"eval_steps_per_second": 5.994, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 8.548387096774194e-06, |
|
"loss": 0.1818, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_f1": 0.7754098360655737, |
|
"eval_loss": 0.2334146499633789, |
|
"eval_precision": 0.7754098360655737, |
|
"eval_recall": 0.7754098360655737, |
|
"eval_runtime": 0.833, |
|
"eval_samples_per_second": 164.466, |
|
"eval_steps_per_second": 6.002, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 8.306451612903227e-06, |
|
"loss": 0.1681, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_f1": 0.6914948900051966, |
|
"eval_loss": 0.28801918029785156, |
|
"eval_precision": 0.7737403100775193, |
|
"eval_recall": 0.6543715846994536, |
|
"eval_runtime": 0.8432, |
|
"eval_samples_per_second": 162.485, |
|
"eval_steps_per_second": 5.93, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 0.1128, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_f1": 0.726, |
|
"eval_loss": 0.2648162841796875, |
|
"eval_precision": 0.7981770833333333, |
|
"eval_recall": 0.6877049180327869, |
|
"eval_runtime": 0.8446, |
|
"eval_samples_per_second": 162.204, |
|
"eval_steps_per_second": 5.92, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 7.822580645161291e-06, |
|
"loss": 0.0989, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"eval_f1": 0.8011033681765389, |
|
"eval_loss": 0.2688542306423187, |
|
"eval_precision": 0.8219602977667494, |
|
"eval_recall": 0.7836065573770492, |
|
"eval_runtime": 0.8386, |
|
"eval_samples_per_second": 163.369, |
|
"eval_steps_per_second": 5.962, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 7.580645161290323e-06, |
|
"loss": 0.09, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_f1": 0.7716666666666667, |
|
"eval_loss": 0.27322623133659363, |
|
"eval_precision": 0.8576388888888888, |
|
"eval_recall": 0.7251366120218579, |
|
"eval_runtime": 0.8354, |
|
"eval_samples_per_second": 163.993, |
|
"eval_steps_per_second": 5.985, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 7.338709677419356e-06, |
|
"loss": 0.0772, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"eval_f1": 0.7740290898185634, |
|
"eval_loss": 0.30010420083999634, |
|
"eval_precision": 0.8053333333333332, |
|
"eval_recall": 0.7502732240437158, |
|
"eval_runtime": 0.8358, |
|
"eval_samples_per_second": 163.909, |
|
"eval_steps_per_second": 5.982, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 7.096774193548388e-06, |
|
"loss": 0.0616, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_f1": 0.726, |
|
"eval_loss": 0.3184939920902252, |
|
"eval_precision": 0.7981770833333333, |
|
"eval_recall": 0.6877049180327869, |
|
"eval_runtime": 0.8427, |
|
"eval_samples_per_second": 162.575, |
|
"eval_steps_per_second": 5.933, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 6.854838709677419e-06, |
|
"loss": 0.0476, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"eval_f1": 0.7215447154471545, |
|
"eval_loss": 0.36287298798561096, |
|
"eval_precision": 0.7369727047146402, |
|
"eval_recall": 0.708743169398907, |
|
"eval_runtime": 0.8413, |
|
"eval_samples_per_second": 162.849, |
|
"eval_steps_per_second": 5.943, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 6.612903225806452e-06, |
|
"loss": 0.0331, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"eval_f1": 0.7878958479943701, |
|
"eval_loss": 0.3389006555080414, |
|
"eval_precision": 0.7970383275261324, |
|
"eval_recall": 0.7795081967213114, |
|
"eval_runtime": 0.8333, |
|
"eval_samples_per_second": 164.406, |
|
"eval_steps_per_second": 6.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 6.370967741935485e-06, |
|
"loss": 0.0473, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"eval_f1": 0.7389572146197818, |
|
"eval_loss": 0.3224264681339264, |
|
"eval_precision": 0.8401162790697674, |
|
"eval_recall": 0.6918032786885246, |
|
"eval_runtime": 0.8344, |
|
"eval_samples_per_second": 164.2, |
|
"eval_steps_per_second": 5.993, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 6.129032258064517e-06, |
|
"loss": 0.0342, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"eval_f1": 0.787531017369727, |
|
"eval_loss": 0.3013747036457062, |
|
"eval_precision": 0.8358585858585859, |
|
"eval_recall": 0.7543715846994535, |
|
"eval_runtime": 0.8339, |
|
"eval_samples_per_second": 164.295, |
|
"eval_steps_per_second": 5.996, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 5.887096774193549e-06, |
|
"loss": 0.0374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"eval_f1": 0.726, |
|
"eval_loss": 0.34248507022857666, |
|
"eval_precision": 0.7981770833333333, |
|
"eval_recall": 0.6877049180327869, |
|
"eval_runtime": 0.8368, |
|
"eval_samples_per_second": 163.722, |
|
"eval_steps_per_second": 5.975, |
|
"step": 510 |
|
} |
|
], |
|
"logging_steps": 30, |
|
"max_steps": 1240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 30, |
|
"total_flos": 415559245279680.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|