{ "best_metric": 0.8932172851340642, "best_model_checkpoint": "./save_models/qqp/roberta-base_lr1e-05/checkpoint-12800", "epoch": 10.0, "eval_steps": 500, "global_step": 12800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.39, "learning_rate": 6.510416666666667e-06, "loss": 0.488, "step": 500 }, { "epoch": 0.78, "learning_rate": 9.807180851063832e-06, "loss": 0.3263, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.879235948880033, "eval_averaged_scores": 0.8598257175939353, "eval_f1": 0.8404154863078377, "eval_loss": 0.27544060349464417, "eval_runtime": 30.2577, "eval_samples_per_second": 1202.503, "eval_steps_per_second": 4.726, "step": 1280 }, { "epoch": 1.17, "learning_rate": 9.391622340425532e-06, "loss": 0.2866, "step": 1500 }, { "epoch": 1.56, "learning_rate": 8.976063829787235e-06, "loss": 0.2674, "step": 2000 }, { "epoch": 1.95, "learning_rate": 8.560505319148937e-06, "loss": 0.2554, "step": 2500 }, { "epoch": 2.0, "eval_accuracy": 0.891768585955751, "eval_averaged_scores": 0.8743571206442515, "eval_f1": 0.8569456553327521, "eval_loss": 0.2498372197151184, "eval_runtime": 27.6277, "eval_samples_per_second": 1316.974, "eval_steps_per_second": 5.176, "step": 2560 }, { "epoch": 2.34, "learning_rate": 8.144946808510639e-06, "loss": 0.2339, "step": 3000 }, { "epoch": 2.73, "learning_rate": 7.729388297872341e-06, "loss": 0.2263, "step": 3500 }, { "epoch": 3.0, "eval_accuracy": 0.8935825202693417, "eval_averaged_scores": 0.8724041088244425, "eval_f1": 0.8512256973795435, "eval_loss": 0.2527249753475189, "eval_runtime": 27.9, "eval_samples_per_second": 1304.123, "eval_steps_per_second": 5.125, "step": 3840 }, { "epoch": 3.12, "learning_rate": 7.313829787234044e-06, "loss": 0.2159, "step": 4000 }, { "epoch": 3.52, "learning_rate": 6.898271276595745e-06, "loss": 0.2026, "step": 4500 }, { "epoch": 3.91, "learning_rate": 6.482712765957447e-06, "loss": 0.2011, "step": 5000 }, { "epoch": 4.0, "eval_accuracy": 0.9005634189913426, "eval_averaged_scores": 0.8845524747016884, "eval_f1": 0.8685415304120341, "eval_loss": 0.23971907794475555, "eval_runtime": 27.6643, "eval_samples_per_second": 1315.233, "eval_steps_per_second": 5.169, "step": 5120 }, { "epoch": 4.3, "learning_rate": 6.06715425531915e-06, "loss": 0.1871, "step": 5500 }, { "epoch": 4.69, "learning_rate": 5.651595744680851e-06, "loss": 0.1803, "step": 6000 }, { "epoch": 5.0, "eval_accuracy": 0.9041088360588154, "eval_averaged_scores": 0.8878877207108364, "eval_f1": 0.8716666053628573, "eval_loss": 0.2404341995716095, "eval_runtime": 27.871, "eval_samples_per_second": 1305.48, "eval_steps_per_second": 5.131, "step": 6400 }, { "epoch": 5.08, "learning_rate": 5.236037234042554e-06, "loss": 0.1775, "step": 6500 }, { "epoch": 5.47, "learning_rate": 4.820478723404256e-06, "loss": 0.1644, "step": 7000 }, { "epoch": 5.86, "learning_rate": 4.404920212765958e-06, "loss": 0.1654, "step": 7500 }, { "epoch": 6.0, "eval_accuracy": 0.9068297375292016, "eval_averaged_scores": 0.8913133079820112, "eval_f1": 0.8757968784348208, "eval_loss": 0.24361343681812286, "eval_runtime": 28.0056, "eval_samples_per_second": 1299.207, "eval_steps_per_second": 5.106, "step": 7680 }, { "epoch": 6.25, "learning_rate": 3.98936170212766e-06, "loss": 0.1527, "step": 8000 }, { "epoch": 6.64, "learning_rate": 3.5738031914893617e-06, "loss": 0.1519, "step": 8500 }, { "epoch": 7.0, "eval_accuracy": 0.9057303833997526, "eval_averaged_scores": 0.8910411109500566, "eval_f1": 0.8763518385003605, "eval_loss": 0.24579738080501556, "eval_runtime": 28.1352, "eval_samples_per_second": 1293.219, "eval_steps_per_second": 5.083, "step": 8960 }, { "epoch": 7.03, "learning_rate": 3.1582446808510644e-06, "loss": 0.1534, "step": 9000 }, { "epoch": 7.42, "learning_rate": 2.7426861702127662e-06, "loss": 0.138, "step": 9500 }, { "epoch": 7.81, "learning_rate": 2.327127659574468e-06, "loss": 0.1418, "step": 10000 }, { "epoch": 8.0, "eval_accuracy": 0.9066923182630204, "eval_averaged_scores": 0.8916345948024302, "eval_f1": 0.8765768713418401, "eval_loss": 0.25888094305992126, "eval_runtime": 27.7859, "eval_samples_per_second": 1309.477, "eval_steps_per_second": 5.146, "step": 10240 }, { "epoch": 8.2, "learning_rate": 1.9115691489361704e-06, "loss": 0.1366, "step": 10500 }, { "epoch": 8.59, "learning_rate": 1.4960106382978725e-06, "loss": 0.1307, "step": 11000 }, { "epoch": 8.98, "learning_rate": 1.0804521276595746e-06, "loss": 0.1327, "step": 11500 }, { "epoch": 9.0, "eval_accuracy": 0.9079840593651229, "eval_averaged_scores": 0.8931944703014327, "eval_f1": 0.8784048812377424, "eval_loss": 0.25855743885040283, "eval_runtime": 27.685, "eval_samples_per_second": 1314.247, "eval_steps_per_second": 5.165, "step": 11520 }, { "epoch": 9.38, "learning_rate": 6.648936170212766e-07, "loss": 0.1271, "step": 12000 }, { "epoch": 9.77, "learning_rate": 2.4933510638297876e-07, "loss": 0.1257, "step": 12500 }, { "epoch": 10.0, "eval_accuracy": 0.9080665109248317, "eval_averaged_scores": 0.8932172851340642, "eval_f1": 0.8783680593432966, "eval_loss": 0.26402151584625244, "eval_runtime": 28.0618, "eval_samples_per_second": 1296.601, "eval_steps_per_second": 5.096, "step": 12800 }, { "epoch": 10.0, "step": 12800, "total_flos": 1.5831238702432614e+17, "train_loss": 0.1970924124121666, "train_runtime": 4395.9489, "train_samples_per_second": 744.915, "train_steps_per_second": 2.912 } ], "logging_steps": 500, "max_steps": 12800, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.5831238702432614e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }