{ "best_metric": 0.8582048416137695, "best_model_checkpoint": "/content/drive/My Drive/Colab Notebooks/aai520-project/checkpoints/roberta-finetuned/checkpoint-1600", "epoch": 3.9215686274509802, "eval_steps": 100, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.811764705882353e-05, "loss": 2.9129, "step": 100 }, { "epoch": 0.2, "eval_loss": 1.4699586629867554, "eval_runtime": 17.6815, "eval_samples_per_second": 676.131, "eval_steps_per_second": 10.576, "step": 100 }, { "epoch": 0.39, "learning_rate": 1.6176470588235296e-05, "loss": 1.4395, "step": 200 }, { "epoch": 0.39, "eval_loss": 1.240740418434143, "eval_runtime": 17.6813, "eval_samples_per_second": 676.14, "eval_steps_per_second": 10.576, "step": 200 }, { "epoch": 0.59, "learning_rate": 1.4215686274509805e-05, "loss": 1.2356, "step": 300 }, { "epoch": 0.59, "eval_loss": 1.0324599742889404, "eval_runtime": 17.7184, "eval_samples_per_second": 674.723, "eval_steps_per_second": 10.554, "step": 300 }, { "epoch": 0.78, "learning_rate": 1.2254901960784315e-05, "loss": 1.1284, "step": 400 }, { "epoch": 0.78, "eval_loss": 0.9749970436096191, "eval_runtime": 17.7173, "eval_samples_per_second": 674.763, "eval_steps_per_second": 10.555, "step": 400 }, { "epoch": 0.98, "learning_rate": 1.0294117647058823e-05, "loss": 1.0821, "step": 500 }, { "epoch": 0.98, "eval_loss": 0.9345471858978271, "eval_runtime": 17.7101, "eval_samples_per_second": 675.038, "eval_steps_per_second": 10.559, "step": 500 }, { "epoch": 1.18, "learning_rate": 8.333333333333334e-06, "loss": 0.9978, "step": 600 }, { "epoch": 1.18, "eval_loss": 0.9892988801002502, "eval_runtime": 17.6864, "eval_samples_per_second": 675.943, "eval_steps_per_second": 10.573, "step": 600 }, { "epoch": 1.37, "learning_rate": 6.372549019607843e-06, "loss": 0.9697, "step": 700 }, { "epoch": 1.37, "eval_loss": 0.9299613237380981, "eval_runtime": 17.7225, "eval_samples_per_second": 674.565, "eval_steps_per_second": 10.552, "step": 700 }, { "epoch": 1.57, "learning_rate": 4.411764705882353e-06, "loss": 0.9455, "step": 800 }, { "epoch": 1.57, "eval_loss": 0.9351199865341187, "eval_runtime": 17.7361, "eval_samples_per_second": 674.051, "eval_steps_per_second": 10.543, "step": 800 }, { "epoch": 1.76, "learning_rate": 2.450980392156863e-06, "loss": 0.9322, "step": 900 }, { "epoch": 1.76, "eval_loss": 0.9450510144233704, "eval_runtime": 17.7032, "eval_samples_per_second": 675.301, "eval_steps_per_second": 10.563, "step": 900 }, { "epoch": 1.96, "learning_rate": 4.901960784313725e-07, "loss": 0.9269, "step": 1000 }, { "epoch": 1.96, "eval_loss": 0.9063502550125122, "eval_runtime": 17.6937, "eval_samples_per_second": 675.663, "eval_steps_per_second": 10.569, "step": 1000 }, { "epoch": 2.16, "learning_rate": 9.284313725490197e-06, "loss": 0.9105, "step": 1100 }, { "epoch": 2.16, "eval_loss": 0.8837365508079529, "eval_runtime": 17.5298, "eval_samples_per_second": 681.981, "eval_steps_per_second": 10.668, "step": 1100 }, { "epoch": 2.35, "learning_rate": 8.303921568627452e-06, "loss": 0.8805, "step": 1200 }, { "epoch": 2.35, "eval_loss": 0.8875929713249207, "eval_runtime": 17.5814, "eval_samples_per_second": 679.978, "eval_steps_per_second": 10.636, "step": 1200 }, { "epoch": 2.55, "learning_rate": 7.333333333333333e-06, "loss": 0.8703, "step": 1300 }, { "epoch": 2.55, "eval_loss": 0.9852611422538757, "eval_runtime": 17.5854, "eval_samples_per_second": 679.824, "eval_steps_per_second": 10.634, "step": 1300 }, { "epoch": 2.75, "learning_rate": 6.352941176470589e-06, "loss": 0.8699, "step": 1400 }, { "epoch": 2.75, "eval_loss": 0.9235011339187622, "eval_runtime": 17.5815, "eval_samples_per_second": 679.975, "eval_steps_per_second": 10.636, "step": 1400 }, { "epoch": 2.94, "learning_rate": 5.372549019607843e-06, "loss": 0.8633, "step": 1500 }, { "epoch": 2.94, "eval_loss": 0.8929564356803894, "eval_runtime": 17.5589, "eval_samples_per_second": 680.85, "eval_steps_per_second": 10.65, "step": 1500 }, { "epoch": 3.14, "learning_rate": 4.392156862745098e-06, "loss": 0.828, "step": 1600 }, { "epoch": 3.14, "eval_loss": 0.8582048416137695, "eval_runtime": 17.5663, "eval_samples_per_second": 680.564, "eval_steps_per_second": 10.645, "step": 1600 }, { "epoch": 3.33, "learning_rate": 3.421568627450981e-06, "loss": 0.8284, "step": 1700 }, { "epoch": 3.33, "eval_loss": 0.920342743396759, "eval_runtime": 17.6216, "eval_samples_per_second": 678.428, "eval_steps_per_second": 10.612, "step": 1700 }, { "epoch": 3.53, "learning_rate": 2.4411764705882356e-06, "loss": 0.8076, "step": 1800 }, { "epoch": 3.53, "eval_loss": 0.8865646719932556, "eval_runtime": 17.6165, "eval_samples_per_second": 678.626, "eval_steps_per_second": 10.615, "step": 1800 }, { "epoch": 3.73, "learning_rate": 1.4607843137254903e-06, "loss": 0.7805, "step": 1900 }, { "epoch": 3.73, "eval_loss": 0.9098581075668335, "eval_runtime": 17.5589, "eval_samples_per_second": 680.85, "eval_steps_per_second": 10.65, "step": 1900 }, { "epoch": 3.92, "learning_rate": 4.901960784313725e-07, "loss": 0.7974, "step": 2000 }, { "epoch": 3.92, "eval_loss": 0.8746156096458435, "eval_runtime": 17.5409, "eval_samples_per_second": 681.548, "eval_steps_per_second": 10.661, "step": 2000 }, { "epoch": 3.92, "step": 2000, "total_flos": 1.3377688443640013e+17, "train_loss": 0.0, "train_runtime": 0.6732, "train_samples_per_second": 775720.158, "train_steps_per_second": 1515.183 }, { "epoch": 3.92, "eval_loss": 0.8582085371017456, "eval_runtime": 17.3486, "eval_samples_per_second": 689.103, "eval_steps_per_second": 5.418, "step": 2000 } ], "logging_steps": 100, "max_steps": 1020, "num_train_epochs": 4, "save_steps": 100, "total_flos": 1.3377688443640013e+17, "trial_name": null, "trial_params": null }