{ "best_metric": 0.95920731151508, "best_model_checkpoint": "/scicore/home/lauerg/cerque0000/models/nlp-job-ads/gbert-large-ft-edu-redux/checkpoint-1400", "epoch": 9.0, "global_step": 2664, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "eval_loss": 0.29267561435699463, "eval_lrap": 0.7186470358938737, "eval_runtime": 22.3251, "eval_samples_per_second": 30.101, "eval_steps_per_second": 3.763, "step": 100 }, { "epoch": 0.68, "eval_loss": 0.19173942506313324, "eval_lrap": 0.8874944283175044, "eval_runtime": 22.732, "eval_samples_per_second": 29.562, "eval_steps_per_second": 3.695, "step": 200 }, { "epoch": 1.01, "eval_loss": 0.1740952730178833, "eval_lrap": 0.9086508545541496, "eval_runtime": 22.6942, "eval_samples_per_second": 29.611, "eval_steps_per_second": 3.701, "step": 300 }, { "epoch": 1.35, "eval_loss": 0.19585496187210083, "eval_lrap": 0.9103675474295027, "eval_runtime": 23.3607, "eval_samples_per_second": 28.766, "eval_steps_per_second": 3.596, "step": 400 }, { "epoch": 1.69, "learning_rate": 2.193243243243243e-05, "loss": 0.2242, "step": 500 }, { "epoch": 1.69, "eval_loss": 0.1878644824028015, "eval_lrap": 0.9258567343567102, "eval_runtime": 22.7639, "eval_samples_per_second": 29.52, "eval_steps_per_second": 3.69, "step": 500 }, { "epoch": 2.03, "eval_loss": 0.16258566081523895, "eval_lrap": 0.9308544571355302, "eval_runtime": 23.3248, "eval_samples_per_second": 28.81, "eval_steps_per_second": 3.601, "step": 600 }, { "epoch": 2.36, "eval_loss": 0.15402589738368988, "eval_lrap": 0.9306379874972384, "eval_runtime": 22.759, "eval_samples_per_second": 29.527, "eval_steps_per_second": 3.691, "step": 700 }, { "epoch": 2.7, "eval_loss": 0.1420990228652954, "eval_lrap": 0.9364025033506702, "eval_runtime": 23.3898, "eval_samples_per_second": 28.73, "eval_steps_per_second": 3.591, "step": 800 }, { "epoch": 3.04, "eval_loss": 0.14033490419387817, "eval_lrap": 0.9166296066698628, "eval_runtime": 22.7034, "eval_samples_per_second": 29.599, "eval_steps_per_second": 3.7, "step": 900 }, { "epoch": 3.38, "learning_rate": 1.6864864864864864e-05, "loss": 0.1316, "step": 1000 }, { "epoch": 3.38, "eval_loss": 0.13963457942008972, "eval_lrap": 0.9372152596624732, "eval_runtime": 23.3136, "eval_samples_per_second": 28.824, "eval_steps_per_second": 3.603, "step": 1000 }, { "epoch": 3.72, "eval_loss": 0.1491432934999466, "eval_lrap": 0.9427461184278115, "eval_runtime": 22.6955, "eval_samples_per_second": 29.609, "eval_steps_per_second": 3.701, "step": 1100 }, { "epoch": 4.05, "eval_loss": 0.14092977344989777, "eval_lrap": 0.9405212229801448, "eval_runtime": 22.7532, "eval_samples_per_second": 29.534, "eval_steps_per_second": 3.692, "step": 1200 }, { "epoch": 4.39, "eval_loss": 0.13760367035865784, "eval_lrap": 0.930910014527521, "eval_runtime": 22.795, "eval_samples_per_second": 29.48, "eval_steps_per_second": 3.685, "step": 1300 }, { "epoch": 4.73, "eval_loss": 0.13160455226898193, "eval_lrap": 0.95920731151508, "eval_runtime": 22.6631, "eval_samples_per_second": 29.652, "eval_steps_per_second": 3.706, "step": 1400 }, { "epoch": 5.07, "learning_rate": 1.1797297297297297e-05, "loss": 0.0757, "step": 1500 }, { "epoch": 5.07, "eval_loss": 0.12442280352115631, "eval_lrap": 0.9517553489380385, "eval_runtime": 23.4283, "eval_samples_per_second": 28.683, "eval_steps_per_second": 3.585, "step": 1500 }, { "epoch": 5.41, "eval_loss": 0.12935081124305725, "eval_lrap": 0.9447551279048475, "eval_runtime": 23.3876, "eval_samples_per_second": 28.733, "eval_steps_per_second": 3.592, "step": 1600 }, { "epoch": 5.74, "eval_loss": 0.13968029618263245, "eval_lrap": 0.9520049098544898, "eval_runtime": 23.3148, "eval_samples_per_second": 28.823, "eval_steps_per_second": 3.603, "step": 1700 }, { "epoch": 6.08, "eval_loss": 0.1321108192205429, "eval_lrap": 0.9493008222774945, "eval_runtime": 22.7141, "eval_samples_per_second": 29.585, "eval_steps_per_second": 3.698, "step": 1800 }, { "epoch": 6.42, "eval_loss": 0.1369735449552536, "eval_lrap": 0.9438046186531343, "eval_runtime": 22.7987, "eval_samples_per_second": 29.475, "eval_steps_per_second": 3.684, "step": 1900 }, { "epoch": 6.76, "learning_rate": 6.729729729729729e-06, "loss": 0.04, "step": 2000 }, { "epoch": 6.76, "eval_loss": 0.13185060024261475, "eval_lrap": 0.9582819439486873, "eval_runtime": 22.7045, "eval_samples_per_second": 29.598, "eval_steps_per_second": 3.7, "step": 2000 }, { "epoch": 7.09, "eval_loss": 0.14138825237751007, "eval_lrap": 0.9504156521289109, "eval_runtime": 23.4267, "eval_samples_per_second": 28.685, "eval_steps_per_second": 3.586, "step": 2100 }, { "epoch": 7.43, "eval_loss": 0.14279799163341522, "eval_lrap": 0.9578134572863043, "eval_runtime": 22.7296, "eval_samples_per_second": 29.565, "eval_steps_per_second": 3.696, "step": 2200 }, { "epoch": 7.77, "eval_loss": 0.14730122685432434, "eval_lrap": 0.9416550938849539, "eval_runtime": 22.7712, "eval_samples_per_second": 29.511, "eval_steps_per_second": 3.689, "step": 2300 }, { "epoch": 8.11, "eval_loss": 0.14928565919399261, "eval_lrap": 0.9514275899253632, "eval_runtime": 23.4136, "eval_samples_per_second": 28.701, "eval_steps_per_second": 3.588, "step": 2400 }, { "epoch": 8.45, "learning_rate": 1.662162162162162e-06, "loss": 0.0204, "step": 2500 }, { "epoch": 8.45, "eval_loss": 0.14889824390411377, "eval_lrap": 0.955463596740468, "eval_runtime": 23.4183, "eval_samples_per_second": 28.695, "eval_steps_per_second": 3.587, "step": 2500 }, { "epoch": 8.78, "eval_loss": 0.14893263578414917, "eval_lrap": 0.9515173886897298, "eval_runtime": 23.311, "eval_samples_per_second": 28.828, "eval_steps_per_second": 3.603, "step": 2600 }, { "epoch": 9.0, "step": 2664, "total_flos": 1.9853269227408384e+16, "train_loss": 0.09327275080007834, "train_runtime": 2835.2925, "train_samples_per_second": 7.514, "train_steps_per_second": 0.94 } ], "max_steps": 2664, "num_train_epochs": 9, "total_flos": 1.9853269227408384e+16, "trial_name": null, "trial_params": null }