{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.11376428604126, "learning_rate": 4.75e-05, "loss": 0.5647, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.706766917293233, "eval_f1": 0.6297262783854312, "eval_loss": 0.5166164636611938, "eval_precision": 0.6379598662207357, "eval_recall": 0.6250227314057101, "eval_runtime": 5.1151, "eval_samples_per_second": 78.005, "eval_steps_per_second": 9.775, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.067341327667236, "learning_rate": 4.5e-05, "loss": 0.5067, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7343358395989975, "eval_f1": 0.6926129426129426, "eval_loss": 0.4954279363155365, "eval_precision": 0.6870370370370371, "eval_recall": 0.7020367339516276, "eval_runtime": 5.0465, "eval_samples_per_second": 79.065, "eval_steps_per_second": 9.908, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.497439861297607, "learning_rate": 4.25e-05, "loss": 0.4617, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.7496919995465023, "eval_loss": 0.4390866756439209, "eval_precision": 0.7490801616502805, "eval_recall": 0.7503182396799418, "eval_runtime": 5.0495, "eval_samples_per_second": 79.017, "eval_steps_per_second": 9.902, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.5452804565429688, "learning_rate": 4e-05, "loss": 0.4044, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8145363408521303, "eval_f1": 0.7773840400506664, "eval_loss": 0.39114564657211304, "eval_precision": 0.7760504201680672, "eval_recall": 0.7787779596290234, "eval_runtime": 5.0465, "eval_samples_per_second": 79.065, "eval_steps_per_second": 9.908, "step": 488 }, { "epoch": 5.0, "grad_norm": 5.4987263679504395, "learning_rate": 3.7500000000000003e-05, "loss": 0.382, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8195488721804511, "eval_f1": 0.7962206332992849, "eval_loss": 0.38273975253105164, "eval_precision": 0.7848639455782312, "eval_recall": 0.8198308783415167, "eval_runtime": 5.0801, "eval_samples_per_second": 78.542, "eval_steps_per_second": 9.842, "step": 610 }, { "epoch": 6.0, "grad_norm": 6.1503801345825195, "learning_rate": 3.5e-05, "loss": 0.3494, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8100071803786705, "eval_loss": 0.3528314530849457, "eval_precision": 0.8092466373122624, "eval_recall": 0.8107837788688852, "eval_runtime": 5.0684, "eval_samples_per_second": 78.723, "eval_steps_per_second": 9.865, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.3256051540374756, "learning_rate": 3.2500000000000004e-05, "loss": 0.3423, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8255172205802521, "eval_loss": 0.3441900908946991, "eval_precision": 0.8239495798319327, "eval_recall": 0.8271503909801782, "eval_runtime": 5.0527, "eval_samples_per_second": 78.968, "eval_steps_per_second": 9.896, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.73527455329895, "learning_rate": 3e-05, "loss": 0.33, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8341632880321839, "eval_loss": 0.3399864733219147, "eval_precision": 0.8479139504563233, "eval_recall": 0.8235133660665576, "eval_runtime": 5.0846, "eval_samples_per_second": 78.473, "eval_steps_per_second": 9.834, "step": 976 }, { "epoch": 9.0, "grad_norm": 7.524757385253906, "learning_rate": 2.7500000000000004e-05, "loss": 0.3296, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8127815315315315, "eval_loss": 0.3349246084690094, "eval_precision": 0.8244897959183674, "eval_recall": 0.8036006546644845, "eval_runtime": 5.0487, "eval_samples_per_second": 79.03, "eval_steps_per_second": 9.904, "step": 1098 }, { "epoch": 10.0, "grad_norm": 6.317535877227783, "learning_rate": 2.5e-05, "loss": 0.3074, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8249232119350592, "eval_loss": 0.33487534523010254, "eval_precision": 0.8467014712861889, "eval_recall": 0.8099654482633206, "eval_runtime": 5.0732, "eval_samples_per_second": 78.649, "eval_steps_per_second": 9.856, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.027658224105835, "learning_rate": 2.25e-05, "loss": 0.2911, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8377439939939939, "eval_loss": 0.32399529218673706, "eval_precision": 0.8503401360544218, "eval_recall": 0.8277868703400618, "eval_runtime": 5.0441, "eval_samples_per_second": 79.102, "eval_steps_per_second": 9.913, "step": 1342 }, { "epoch": 12.0, "grad_norm": 9.344508171081543, "learning_rate": 2e-05, "loss": 0.2855, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8454251965513313, "eval_loss": 0.3273135721683502, "eval_precision": 0.8463049835506276, "eval_recall": 0.8445626477541371, "eval_runtime": 5.049, "eval_samples_per_second": 79.026, "eval_steps_per_second": 9.903, "step": 1464 }, { "epoch": 13.0, "grad_norm": 4.517082214355469, "learning_rate": 1.75e-05, "loss": 0.2903, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8295950648528947, "eval_loss": 0.32846182584762573, "eval_precision": 0.8472157618446409, "eval_recall": 0.816739407164939, "eval_runtime": 5.0968, "eval_samples_per_second": 78.285, "eval_steps_per_second": 9.81, "step": 1586 }, { "epoch": 14.0, "grad_norm": 8.310128211975098, "learning_rate": 1.5e-05, "loss": 0.2896, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8341632880321839, "eval_loss": 0.3254058063030243, "eval_precision": 0.8479139504563233, "eval_recall": 0.8235133660665576, "eval_runtime": 5.0513, "eval_samples_per_second": 78.99, "eval_steps_per_second": 9.898, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1135729551315308, "learning_rate": 1.25e-05, "loss": 0.2744, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8359175094431583, "eval_loss": 0.3240545392036438, "eval_precision": 0.8376607470912432, "eval_recall": 0.8342425895617385, "eval_runtime": 5.0649, "eval_samples_per_second": 78.778, "eval_steps_per_second": 9.872, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.9109649658203125, "learning_rate": 1e-05, "loss": 0.2691, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.3209517002105713, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 5.0874, "eval_samples_per_second": 78.429, "eval_steps_per_second": 9.828, "step": 1952 }, { "epoch": 17.0, "grad_norm": 6.5280585289001465, "learning_rate": 7.5e-06, "loss": 0.2671, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8377439939939939, "eval_loss": 0.3208070397377014, "eval_precision": 0.8503401360544218, "eval_recall": 0.8277868703400618, "eval_runtime": 5.0622, "eval_samples_per_second": 78.82, "eval_steps_per_second": 9.877, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.9085135459899902, "learning_rate": 5e-06, "loss": 0.2736, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8421640488656195, "eval_loss": 0.31788739562034607, "eval_precision": 0.8512313860252005, "eval_recall": 0.8345608292416803, "eval_runtime": 5.0487, "eval_samples_per_second": 79.031, "eval_steps_per_second": 9.904, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.626889705657959, "learning_rate": 2.5e-06, "loss": 0.2662, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8404212771630449, "eval_loss": 0.3179715573787689, "eval_precision": 0.854416558018253, "eval_recall": 0.8295599199854519, "eval_runtime": 5.0563, "eval_samples_per_second": 78.912, "eval_steps_per_second": 9.889, "step": 2318 }, { "epoch": 20.0, "grad_norm": 9.089592933654785, "learning_rate": 0.0, "loss": 0.2664, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8413023981282901, "eval_loss": 0.3167513608932495, "eval_precision": 0.8527593534677056, "eval_recall": 0.8320603746135662, "eval_runtime": 5.0535, "eval_samples_per_second": 78.955, "eval_steps_per_second": 9.894, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.33757916747546585, "train_runtime": 1952.408, "train_samples_per_second": 37.267, "train_steps_per_second": 1.25 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }