{ "best_metric": 0.5894856058137782, "best_model_checkpoint": "./runtime-text-classification/deberta-v3-xsmall-CoLA/checkpoint-134", "epoch": 3.0, "global_step": 201, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.5714285714285714e-05, "loss": 0.6778, "step": 3 }, { "epoch": 0.09, "learning_rate": 5.142857142857143e-05, "loss": 0.6609, "step": 6 }, { "epoch": 0.13, "learning_rate": 5.998426707136545e-05, "loss": 0.6124, "step": 9 }, { "epoch": 0.18, "learning_rate": 5.990171431423709e-05, "loss": 0.5988, "step": 12 }, { "epoch": 0.22, "learning_rate": 5.9748603036792754e-05, "loss": 0.5704, "step": 15 }, { "epoch": 0.27, "learning_rate": 5.952529453287223e-05, "loss": 0.5851, "step": 18 }, { "epoch": 0.31, "learning_rate": 5.923231573940778e-05, "loss": 0.5969, "step": 21 }, { "epoch": 0.36, "learning_rate": 5.887035799302091e-05, "loss": 0.5408, "step": 24 }, { "epoch": 0.4, "learning_rate": 5.844027539868812e-05, "loss": 0.5148, "step": 27 }, { "epoch": 0.45, "learning_rate": 5.794308281432527e-05, "loss": 0.4907, "step": 30 }, { "epoch": 0.49, "learning_rate": 5.7379953456046206e-05, "loss": 0.4503, "step": 33 }, { "epoch": 0.54, "learning_rate": 5.675221612974643e-05, "loss": 0.4701, "step": 36 }, { "epoch": 0.58, "learning_rate": 5.606135209554454e-05, "loss": 0.449, "step": 39 }, { "epoch": 0.63, "learning_rate": 5.530899157248014e-05, "loss": 0.4227, "step": 42 }, { "epoch": 0.67, "learning_rate": 5.449690989171633e-05, "loss": 0.3914, "step": 45 }, { "epoch": 0.72, "learning_rate": 5.3627023307323634e-05, "loss": 0.4338, "step": 48 }, { "epoch": 0.76, "learning_rate": 5.2701384474530855e-05, "loss": 0.3944, "step": 51 }, { "epoch": 0.81, "learning_rate": 5.172217760611265e-05, "loss": 0.4302, "step": 54 }, { "epoch": 0.85, "learning_rate": 5.0691713318343134e-05, "loss": 0.4251, "step": 57 }, { "epoch": 0.9, "learning_rate": 4.961242317867758e-05, "loss": 0.3713, "step": 60 }, { "epoch": 0.94, "learning_rate": 4.848685396802782e-05, "loss": 0.4146, "step": 63 }, { "epoch": 0.99, "learning_rate": 4.731766167117059e-05, "loss": 0.3945, "step": 66 }, { "epoch": 1.0, "eval_loss": 0.43225371837615967, "eval_matthews_correlation": 0.5778184033685675, "eval_runtime": 5.0494, "eval_samples_per_second": 206.558, "eval_steps_per_second": 51.689, "step": 67 }, { "epoch": 1.03, "learning_rate": 4.610760520946952e-05, "loss": 0.3873, "step": 69 }, { "epoch": 1.07, "learning_rate": 4.4859539930699636e-05, "loss": 0.33, "step": 72 }, { "epoch": 1.12, "learning_rate": 4.357641087133617e-05, "loss": 0.3265, "step": 75 }, { "epoch": 1.16, "learning_rate": 4.226124580720669e-05, "loss": 0.3306, "step": 78 }, { "epoch": 1.21, "learning_rate": 4.091714810890463e-05, "loss": 0.3695, "step": 81 }, { "epoch": 1.25, "learning_rate": 3.9547289418823325e-05, "loss": 0.3862, "step": 84 }, { "epoch": 1.3, "learning_rate": 3.815490216709029e-05, "loss": 0.3557, "step": 87 }, { "epoch": 1.34, "learning_rate": 3.6743271944061884e-05, "loss": 0.2995, "step": 90 }, { "epoch": 1.39, "learning_rate": 3.5315729747376686e-05, "loss": 0.2953, "step": 93 }, { "epoch": 1.43, "learning_rate": 3.3875644121862195e-05, "loss": 0.3356, "step": 96 }, { "epoch": 1.48, "learning_rate": 3.242641321084213e-05, "loss": 0.3182, "step": 99 }, { "epoch": 1.52, "learning_rate": 3.097145673760068e-05, "loss": 0.358, "step": 102 }, { "epoch": 1.57, "learning_rate": 2.9514207935924957e-05, "loss": 0.3351, "step": 105 }, { "epoch": 1.61, "learning_rate": 2.805810544876693e-05, "loss": 0.3057, "step": 108 }, { "epoch": 1.66, "learning_rate": 2.6606585214141475e-05, "loss": 0.3138, "step": 111 }, { "epoch": 1.7, "learning_rate": 2.5163072357407205e-05, "loss": 0.335, "step": 114 }, { "epoch": 1.75, "learning_rate": 2.373097310906175e-05, "loss": 0.3209, "step": 117 }, { "epoch": 1.79, "learning_rate": 2.231366676712283e-05, "loss": 0.3483, "step": 120 }, { "epoch": 1.84, "learning_rate": 2.0914497723061486e-05, "loss": 0.2824, "step": 123 }, { "epoch": 1.88, "learning_rate": 1.953676757010353e-05, "loss": 0.2709, "step": 126 }, { "epoch": 1.93, "learning_rate": 1.8183727312521392e-05, "loss": 0.3404, "step": 129 }, { "epoch": 1.97, "learning_rate": 1.685856969429953e-05, "loss": 0.3214, "step": 132 }, { "epoch": 2.0, "eval_loss": 0.423650860786438, "eval_matthews_correlation": 0.5894856058137782, "eval_runtime": 5.1798, "eval_samples_per_second": 201.361, "eval_steps_per_second": 50.388, "step": 134 }, { "epoch": 2.01, "learning_rate": 1.556442166527585e-05, "loss": 0.2703, "step": 135 }, { "epoch": 2.06, "learning_rate": 1.4304337002536097e-05, "loss": 0.2356, "step": 138 }, { "epoch": 2.1, "learning_rate": 1.3081289104472803e-05, "loss": 0.2812, "step": 141 }, { "epoch": 2.15, "learning_rate": 1.189816397451226e-05, "loss": 0.2759, "step": 144 }, { "epoch": 2.19, "learning_rate": 1.075775341106581e-05, "loss": 0.25, "step": 147 }, { "epoch": 2.24, "learning_rate": 9.662748419774903e-06, "loss": 0.3233, "step": 150 }, { "epoch": 2.28, "learning_rate": 8.61573286359513e-06, "loss": 0.2499, "step": 153 }, { "epoch": 2.33, "learning_rate": 7.619177365702619e-06, "loss": 0.2734, "step": 156 }, { "epoch": 2.37, "learning_rate": 6.6754334796105274e-06, "loss": 0.2456, "step": 159 }, { "epoch": 2.42, "learning_rate": 5.786728140251719e-06, "loss": 0.2782, "step": 162 }, { "epoch": 2.46, "learning_rate": 4.95515840912187e-06, "loss": 0.248, "step": 165 }, { "epoch": 2.51, "learning_rate": 4.182686525882242e-06, "loss": 0.2766, "step": 168 }, { "epoch": 2.55, "learning_rate": 3.4711352780992968e-06, "loss": 0.2192, "step": 171 }, { "epoch": 2.6, "learning_rate": 2.822183700046593e-06, "loss": 0.1969, "step": 174 }, { "epoch": 2.64, "learning_rate": 2.237363110718876e-06, "loss": 0.2931, "step": 177 }, { "epoch": 2.69, "learning_rate": 1.718053500406972e-06, "loss": 0.2551, "step": 180 }, { "epoch": 2.73, "learning_rate": 1.2654802743603888e-06, "loss": 0.246, "step": 183 }, { "epoch": 2.78, "learning_rate": 8.807113612212248e-07, "loss": 0.2474, "step": 186 }, { "epoch": 2.82, "learning_rate": 5.646546930528284e-07, "loss": 0.2691, "step": 189 }, { "epoch": 2.87, "learning_rate": 3.1805606290931987e-07, "loss": 0.3154, "step": 192 }, { "epoch": 2.91, "learning_rate": 1.414973650015816e-07, "loss": 0.2823, "step": 195 }, { "epoch": 2.96, "learning_rate": 3.5395221612279924e-08, "loss": 0.2711, "step": 198 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.3059, "step": 201 }, { "epoch": 3.0, "eval_loss": 0.4636174738407135, "eval_matthews_correlation": 0.5794528111058918, "eval_runtime": 5.0259, "eval_samples_per_second": 207.524, "eval_steps_per_second": 51.931, "step": 201 }, { "epoch": 3.0, "step": 201, "total_flos": 422473413165056.0, "train_loss": 0.3622648042825917, "train_runtime": 157.4171, "train_samples_per_second": 162.962, "train_steps_per_second": 1.277 } ], "max_steps": 201, "num_train_epochs": 3, "total_flos": 422473413165056.0, "trial_name": null, "trial_params": null }