{ "best_metric": 0.5091743119266054, "best_model_checkpoint": "outputs/soft_prompt/roberta-base/sst2/checkpoint-200", "epoch": 3.0, "global_step": 6315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.2113580703735352, "eval_runtime": 1.5734, "eval_samples_per_second": 554.211, "eval_steps_per_second": 17.796, "step": 200 }, { "epoch": 0.19, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1833394765853882, "eval_runtime": 1.3274, "eval_samples_per_second": 656.934, "eval_steps_per_second": 21.094, "step": 400 }, { "epoch": 0.24, "learning_rate": 0.027624703087885987, "loss": 1.7048, "step": 500 }, { "epoch": 0.29, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.4068197011947632, "eval_runtime": 1.6024, "eval_samples_per_second": 544.198, "eval_steps_per_second": 17.474, "step": 600 }, { "epoch": 0.38, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.2358722686767578, "eval_runtime": 1.3312, "eval_samples_per_second": 655.031, "eval_steps_per_second": 21.033, "step": 800 }, { "epoch": 0.48, "learning_rate": 0.02524940617577197, "loss": 1.4721, "step": 1000 }, { "epoch": 0.48, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.5192540884017944, "eval_runtime": 1.4576, "eval_samples_per_second": 598.233, "eval_steps_per_second": 19.209, "step": 1000 }, { "epoch": 0.57, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.0336543321609497, "eval_runtime": 1.7394, "eval_samples_per_second": 501.315, "eval_steps_per_second": 16.097, "step": 1200 }, { "epoch": 0.67, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.6292548179626465, "eval_runtime": 1.5911, "eval_samples_per_second": 548.047, "eval_steps_per_second": 17.598, "step": 1400 }, { "epoch": 0.71, "learning_rate": 0.022874109263657957, "loss": 1.821, "step": 1500 }, { "epoch": 0.76, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.309380054473877, "eval_runtime": 1.4381, "eval_samples_per_second": 606.369, "eval_steps_per_second": 19.471, "step": 1600 }, { "epoch": 0.86, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.0032501220703125, "eval_runtime": 1.3929, "eval_samples_per_second": 626.051, "eval_steps_per_second": 20.103, "step": 1800 }, { "epoch": 0.95, "learning_rate": 0.02049881235154394, "loss": 1.6669, "step": 2000 }, { "epoch": 0.95, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.3615148067474365, "eval_runtime": 1.4549, "eval_samples_per_second": 599.336, "eval_steps_per_second": 19.245, "step": 2000 }, { "epoch": 1.05, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.1986268758773804, "eval_runtime": 1.518, "eval_samples_per_second": 574.444, "eval_steps_per_second": 18.445, "step": 2200 }, { "epoch": 1.14, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.9300673007965088, "eval_runtime": 1.4024, "eval_samples_per_second": 621.772, "eval_steps_per_second": 19.965, "step": 2400 }, { "epoch": 1.19, "learning_rate": 0.01812351543942993, "loss": 1.5726, "step": 2500 }, { "epoch": 1.24, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.1378525495529175, "eval_runtime": 1.3734, "eval_samples_per_second": 634.927, "eval_steps_per_second": 20.388, "step": 2600 }, { "epoch": 1.33, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8935280442237854, "eval_runtime": 1.4758, "eval_samples_per_second": 590.85, "eval_steps_per_second": 18.972, "step": 2800 }, { "epoch": 1.43, "learning_rate": 0.015748218527315914, "loss": 2.2264, "step": 3000 }, { "epoch": 1.43, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8003481030464172, "eval_runtime": 1.4567, "eval_samples_per_second": 598.601, "eval_steps_per_second": 19.221, "step": 3000 }, { "epoch": 1.52, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1951367855072021, "eval_runtime": 1.2323, "eval_samples_per_second": 707.612, "eval_steps_per_second": 22.721, "step": 3200 }, { "epoch": 1.62, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1763222217559814, "eval_runtime": 1.3939, "eval_samples_per_second": 625.563, "eval_steps_per_second": 20.087, "step": 3400 }, { "epoch": 1.66, "learning_rate": 0.0133729216152019, "loss": 1.5653, "step": 3500 }, { "epoch": 1.71, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1043964624404907, "eval_runtime": 1.3672, "eval_samples_per_second": 637.781, "eval_steps_per_second": 20.479, "step": 3600 }, { "epoch": 1.81, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.281111717224121, "eval_runtime": 1.6251, "eval_samples_per_second": 536.597, "eval_steps_per_second": 17.23, "step": 3800 }, { "epoch": 1.9, "learning_rate": 0.010997624703087886, "loss": 1.3918, "step": 4000 }, { "epoch": 1.9, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.7353538274765015, "eval_runtime": 1.3011, "eval_samples_per_second": 670.193, "eval_steps_per_second": 21.52, "step": 4000 }, { "epoch": 2.0, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.1966099739074707, "eval_runtime": 1.3231, "eval_samples_per_second": 659.044, "eval_steps_per_second": 21.162, "step": 4200 }, { "epoch": 2.09, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.9623039364814758, "eval_runtime": 1.428, "eval_samples_per_second": 610.649, "eval_steps_per_second": 19.608, "step": 4400 }, { "epoch": 2.14, "learning_rate": 0.008622327790973872, "loss": 1.3465, "step": 4500 }, { "epoch": 2.19, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7837328314781189, "eval_runtime": 1.6722, "eval_samples_per_second": 521.453, "eval_steps_per_second": 16.744, "step": 4600 }, { "epoch": 2.28, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.6955246329307556, "eval_runtime": 1.5518, "eval_samples_per_second": 561.939, "eval_steps_per_second": 18.044, "step": 4800 }, { "epoch": 2.38, "learning_rate": 0.006247030878859857, "loss": 1.0154, "step": 5000 }, { "epoch": 2.38, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.5205951929092407, "eval_runtime": 1.4264, "eval_samples_per_second": 611.325, "eval_steps_per_second": 19.63, "step": 5000 }, { "epoch": 2.47, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.8486016988754272, "eval_runtime": 1.5808, "eval_samples_per_second": 551.633, "eval_steps_per_second": 17.713, "step": 5200 }, { "epoch": 2.57, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.806239128112793, "eval_runtime": 1.0228, "eval_samples_per_second": 852.573, "eval_steps_per_second": 27.376, "step": 5400 }, { "epoch": 2.61, "learning_rate": 0.003871733966745843, "loss": 0.892, "step": 5500 }, { "epoch": 2.66, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7133888602256775, "eval_runtime": 1.4851, "eval_samples_per_second": 587.161, "eval_steps_per_second": 18.854, "step": 5600 }, { "epoch": 2.76, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.8787677884101868, "eval_runtime": 1.0808, "eval_samples_per_second": 806.785, "eval_steps_per_second": 25.906, "step": 5800 }, { "epoch": 2.85, "learning_rate": 0.001496437054631829, "loss": 0.784, "step": 6000 }, { "epoch": 2.85, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8426281809806824, "eval_runtime": 1.6584, "eval_samples_per_second": 525.823, "eval_steps_per_second": 16.884, "step": 6000 }, { "epoch": 2.95, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.8049508333206177, "eval_runtime": 1.4457, "eval_samples_per_second": 603.185, "eval_steps_per_second": 19.368, "step": 6200 }, { "epoch": 3.0, "step": 6315, "total_flos": 1.330211706755328e+16, "train_loss": 1.4194242838348365, "train_runtime": 1125.0881, "train_samples_per_second": 179.583, "train_steps_per_second": 5.613 } ], "max_steps": 6315, "num_train_epochs": 3, "total_flos": 1.330211706755328e+16, "trial_name": null, "trial_params": null }