{ "best_metric": 0.2918669879436493, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_pretrain_stsb/checkpoint-675", "epoch": 20.0, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.1501, "step": 45 }, { "epoch": 1.0, "eval_combined_score": 0.7848155232626217, "eval_loss": 0.47263744473457336, "eval_pearson": 0.7774458991104952, "eval_runtime": 2.8147, "eval_samples_per_second": 532.917, "eval_spearmanr": 0.7921851474147482, "eval_steps_per_second": 4.263, "step": 45 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.364, "step": 90 }, { "epoch": 2.0, "eval_combined_score": 0.8455802664916386, "eval_loss": 0.34795352816581726, "eval_pearson": 0.8456719627096524, "eval_runtime": 2.8104, "eval_samples_per_second": 533.726, "eval_spearmanr": 0.8454885702736249, "eval_steps_per_second": 4.27, "step": 90 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.259, "step": 135 }, { "epoch": 3.0, "eval_combined_score": 0.8585617866351674, "eval_loss": 0.3156156539916992, "eval_pearson": 0.858172801547449, "eval_runtime": 2.841, "eval_samples_per_second": 527.982, "eval_spearmanr": 0.8589507717228858, "eval_steps_per_second": 4.224, "step": 135 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.2054, "step": 180 }, { "epoch": 4.0, "eval_combined_score": 0.8549704899730026, "eval_loss": 0.423094242811203, "eval_pearson": 0.8550544334442006, "eval_runtime": 2.8159, "eval_samples_per_second": 532.695, "eval_spearmanr": 0.8548865465018045, "eval_steps_per_second": 4.262, "step": 180 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.1629, "step": 225 }, { "epoch": 5.0, "eval_combined_score": 0.8661052960846953, "eval_loss": 0.3244686424732208, "eval_pearson": 0.8668446586721916, "eval_runtime": 2.8146, "eval_samples_per_second": 532.932, "eval_spearmanr": 0.8653659334971991, "eval_steps_per_second": 4.263, "step": 225 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.1263, "step": 270 }, { "epoch": 6.0, "eval_combined_score": 0.8637024825465704, "eval_loss": 0.31924131512641907, "eval_pearson": 0.8648820058548446, "eval_runtime": 2.8102, "eval_samples_per_second": 533.763, "eval_spearmanr": 0.8625229592382962, "eval_steps_per_second": 4.27, "step": 270 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.1021, "step": 315 }, { "epoch": 7.0, "eval_combined_score": 0.8641855325700929, "eval_loss": 0.33367273211479187, "eval_pearson": 0.8655048921422074, "eval_runtime": 2.824, "eval_samples_per_second": 531.159, "eval_spearmanr": 0.8628661729979783, "eval_steps_per_second": 4.249, "step": 315 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.0841, "step": 360 }, { "epoch": 8.0, "eval_combined_score": 0.8589275349426662, "eval_loss": 0.3060952126979828, "eval_pearson": 0.8601387509705535, "eval_runtime": 2.8107, "eval_samples_per_second": 533.667, "eval_spearmanr": 0.8577163189147787, "eval_steps_per_second": 4.269, "step": 360 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.0713, "step": 405 }, { "epoch": 9.0, "eval_combined_score": 0.8565713298409787, "eval_loss": 0.36002376675605774, "eval_pearson": 0.8576436109786436, "eval_runtime": 2.8144, "eval_samples_per_second": 532.978, "eval_spearmanr": 0.8554990487033137, "eval_steps_per_second": 4.264, "step": 405 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.0587, "step": 450 }, { "epoch": 10.0, "eval_combined_score": 0.8609998923935287, "eval_loss": 0.31353628635406494, "eval_pearson": 0.8619690194372541, "eval_runtime": 2.8144, "eval_samples_per_second": 532.966, "eval_spearmanr": 0.8600307653498033, "eval_steps_per_second": 4.264, "step": 450 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.0488, "step": 495 }, { "epoch": 11.0, "eval_combined_score": 0.8630528258320453, "eval_loss": 0.3006461262702942, "eval_pearson": 0.8640987141337612, "eval_runtime": 2.8106, "eval_samples_per_second": 533.7, "eval_spearmanr": 0.8620069375303294, "eval_steps_per_second": 4.27, "step": 495 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.0441, "step": 540 }, { "epoch": 12.0, "eval_combined_score": 0.8633072379827798, "eval_loss": 0.3308115303516388, "eval_pearson": 0.8645046486846006, "eval_runtime": 2.8091, "eval_samples_per_second": 533.981, "eval_spearmanr": 0.8621098272809588, "eval_steps_per_second": 4.272, "step": 540 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.0385, "step": 585 }, { "epoch": 13.0, "eval_combined_score": 0.8610427119151247, "eval_loss": 0.34676018357276917, "eval_pearson": 0.8619973007896685, "eval_runtime": 2.8063, "eval_samples_per_second": 534.504, "eval_spearmanr": 0.860088123040581, "eval_steps_per_second": 4.276, "step": 585 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.0346, "step": 630 }, { "epoch": 14.0, "eval_combined_score": 0.864625467802823, "eval_loss": 0.3174898028373718, "eval_pearson": 0.8658273441962089, "eval_runtime": 2.8116, "eval_samples_per_second": 533.507, "eval_spearmanr": 0.8634235914094371, "eval_steps_per_second": 4.268, "step": 630 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.0298, "step": 675 }, { "epoch": 15.0, "eval_combined_score": 0.8653611315954421, "eval_loss": 0.2918669879436493, "eval_pearson": 0.8665001034932058, "eval_runtime": 2.812, "eval_samples_per_second": 533.434, "eval_spearmanr": 0.8642221596976783, "eval_steps_per_second": 4.267, "step": 675 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.0299, "step": 720 }, { "epoch": 16.0, "eval_combined_score": 0.8638818647743551, "eval_loss": 0.310304194688797, "eval_pearson": 0.8649166348556964, "eval_runtime": 2.8123, "eval_samples_per_second": 533.376, "eval_spearmanr": 0.862847094693014, "eval_steps_per_second": 4.267, "step": 720 }, { "epoch": 17.0, "learning_rate": 3.3e-05, "loss": 0.0263, "step": 765 }, { "epoch": 17.0, "eval_combined_score": 0.8609287949422196, "eval_loss": 0.3324874937534332, "eval_pearson": 0.8619612623497133, "eval_runtime": 2.8136, "eval_samples_per_second": 533.131, "eval_spearmanr": 0.859896327534726, "eval_steps_per_second": 4.265, "step": 765 }, { "epoch": 18.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.0237, "step": 810 }, { "epoch": 18.0, "eval_combined_score": 0.8623362746505621, "eval_loss": 0.3092164397239685, "eval_pearson": 0.8635793188062354, "eval_runtime": 2.8105, "eval_samples_per_second": 533.72, "eval_spearmanr": 0.8610932304948888, "eval_steps_per_second": 4.27, "step": 810 }, { "epoch": 19.0, "learning_rate": 3.1e-05, "loss": 0.0213, "step": 855 }, { "epoch": 19.0, "eval_combined_score": 0.8641800158406565, "eval_loss": 0.31693407893180847, "eval_pearson": 0.8652586147489575, "eval_runtime": 2.8089, "eval_samples_per_second": 534.009, "eval_spearmanr": 0.8631014169323554, "eval_steps_per_second": 4.272, "step": 855 }, { "epoch": 20.0, "learning_rate": 3e-05, "loss": 0.0196, "step": 900 }, { "epoch": 20.0, "eval_combined_score": 0.8635686954927306, "eval_loss": 0.29848989844322205, "eval_pearson": 0.8647368126144308, "eval_runtime": 2.8089, "eval_samples_per_second": 534.023, "eval_spearmanr": 0.8624005783710303, "eval_steps_per_second": 4.272, "step": 900 }, { "epoch": 20.0, "step": 900, "total_flos": 3651409731911680.0, "train_loss": 0.14503458228376176, "train_runtime": 766.7055, "train_samples_per_second": 374.916, "train_steps_per_second": 2.935 } ], "max_steps": 2250, "num_train_epochs": 50, "total_flos": 3651409731911680.0, "trial_name": null, "trial_params": null }