|
{ |
|
"best_metric": 0.5091743119266054, |
|
"best_model_checkpoint": "outputs/soft_prompt/roberta-base/sst2/checkpoint-200", |
|
"epoch": 3.0, |
|
"global_step": 6315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.2113580703735352, |
|
"eval_runtime": 1.5734, |
|
"eval_samples_per_second": 554.211, |
|
"eval_steps_per_second": 17.796, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1833394765853882, |
|
"eval_runtime": 1.3274, |
|
"eval_samples_per_second": 656.934, |
|
"eval_steps_per_second": 21.094, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.027624703087885987, |
|
"loss": 1.7048, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.4068197011947632, |
|
"eval_runtime": 1.6024, |
|
"eval_samples_per_second": 544.198, |
|
"eval_steps_per_second": 17.474, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.2358722686767578, |
|
"eval_runtime": 1.3312, |
|
"eval_samples_per_second": 655.031, |
|
"eval_steps_per_second": 21.033, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.02524940617577197, |
|
"loss": 1.4721, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.5192540884017944, |
|
"eval_runtime": 1.4576, |
|
"eval_samples_per_second": 598.233, |
|
"eval_steps_per_second": 19.209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.0336543321609497, |
|
"eval_runtime": 1.7394, |
|
"eval_samples_per_second": 501.315, |
|
"eval_steps_per_second": 16.097, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.6292548179626465, |
|
"eval_runtime": 1.5911, |
|
"eval_samples_per_second": 548.047, |
|
"eval_steps_per_second": 17.598, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.022874109263657957, |
|
"loss": 1.821, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.309380054473877, |
|
"eval_runtime": 1.4381, |
|
"eval_samples_per_second": 606.369, |
|
"eval_steps_per_second": 19.471, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.0032501220703125, |
|
"eval_runtime": 1.3929, |
|
"eval_samples_per_second": 626.051, |
|
"eval_steps_per_second": 20.103, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.02049881235154394, |
|
"loss": 1.6669, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.3615148067474365, |
|
"eval_runtime": 1.4549, |
|
"eval_samples_per_second": 599.336, |
|
"eval_steps_per_second": 19.245, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.1986268758773804, |
|
"eval_runtime": 1.518, |
|
"eval_samples_per_second": 574.444, |
|
"eval_steps_per_second": 18.445, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.9300673007965088, |
|
"eval_runtime": 1.4024, |
|
"eval_samples_per_second": 621.772, |
|
"eval_steps_per_second": 19.965, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.01812351543942993, |
|
"loss": 1.5726, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.1378525495529175, |
|
"eval_runtime": 1.3734, |
|
"eval_samples_per_second": 634.927, |
|
"eval_steps_per_second": 20.388, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8935280442237854, |
|
"eval_runtime": 1.4758, |
|
"eval_samples_per_second": 590.85, |
|
"eval_steps_per_second": 18.972, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.015748218527315914, |
|
"loss": 2.2264, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8003481030464172, |
|
"eval_runtime": 1.4567, |
|
"eval_samples_per_second": 598.601, |
|
"eval_steps_per_second": 19.221, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1951367855072021, |
|
"eval_runtime": 1.2323, |
|
"eval_samples_per_second": 707.612, |
|
"eval_steps_per_second": 22.721, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1763222217559814, |
|
"eval_runtime": 1.3939, |
|
"eval_samples_per_second": 625.563, |
|
"eval_steps_per_second": 20.087, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0133729216152019, |
|
"loss": 1.5653, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.1043964624404907, |
|
"eval_runtime": 1.3672, |
|
"eval_samples_per_second": 637.781, |
|
"eval_steps_per_second": 20.479, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 2.281111717224121, |
|
"eval_runtime": 1.6251, |
|
"eval_samples_per_second": 536.597, |
|
"eval_steps_per_second": 17.23, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.010997624703087886, |
|
"loss": 1.3918, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.7353538274765015, |
|
"eval_runtime": 1.3011, |
|
"eval_samples_per_second": 670.193, |
|
"eval_steps_per_second": 21.52, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 2.1966099739074707, |
|
"eval_runtime": 1.3231, |
|
"eval_samples_per_second": 659.044, |
|
"eval_steps_per_second": 21.162, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.9623039364814758, |
|
"eval_runtime": 1.428, |
|
"eval_samples_per_second": 610.649, |
|
"eval_steps_per_second": 19.608, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.008622327790973872, |
|
"loss": 1.3465, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.7837328314781189, |
|
"eval_runtime": 1.6722, |
|
"eval_samples_per_second": 521.453, |
|
"eval_steps_per_second": 16.744, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.6955246329307556, |
|
"eval_runtime": 1.5518, |
|
"eval_samples_per_second": 561.939, |
|
"eval_steps_per_second": 18.044, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.006247030878859857, |
|
"loss": 1.0154, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 1.5205951929092407, |
|
"eval_runtime": 1.4264, |
|
"eval_samples_per_second": 611.325, |
|
"eval_steps_per_second": 19.63, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 1.8486016988754272, |
|
"eval_runtime": 1.5808, |
|
"eval_samples_per_second": 551.633, |
|
"eval_steps_per_second": 17.713, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.806239128112793, |
|
"eval_runtime": 1.0228, |
|
"eval_samples_per_second": 852.573, |
|
"eval_steps_per_second": 27.376, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.003871733966745843, |
|
"loss": 0.892, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.7133888602256775, |
|
"eval_runtime": 1.4851, |
|
"eval_samples_per_second": 587.161, |
|
"eval_steps_per_second": 18.854, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.8787677884101868, |
|
"eval_runtime": 1.0808, |
|
"eval_samples_per_second": 806.785, |
|
"eval_steps_per_second": 25.906, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.001496437054631829, |
|
"loss": 0.784, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.4908256880733945, |
|
"eval_loss": 0.8426281809806824, |
|
"eval_runtime": 1.6584, |
|
"eval_samples_per_second": 525.823, |
|
"eval_steps_per_second": 16.884, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.5091743119266054, |
|
"eval_loss": 0.8049508333206177, |
|
"eval_runtime": 1.4457, |
|
"eval_samples_per_second": 603.185, |
|
"eval_steps_per_second": 19.368, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6315, |
|
"total_flos": 1.330211706755328e+16, |
|
"train_loss": 1.4194242838348365, |
|
"train_runtime": 1125.0881, |
|
"train_samples_per_second": 179.583, |
|
"train_steps_per_second": 5.613 |
|
} |
|
], |
|
"max_steps": 6315, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.330211706755328e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|