|
{ |
|
"best_metric": 0.95920731151508, |
|
"best_model_checkpoint": "/scicore/home/lauerg/cerque0000/models/nlp-job-ads/gbert-large-ft-edu-redux/checkpoint-1400", |
|
"epoch": 9.0, |
|
"global_step": 2664, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.29267561435699463, |
|
"eval_lrap": 0.7186470358938737, |
|
"eval_runtime": 22.3251, |
|
"eval_samples_per_second": 30.101, |
|
"eval_steps_per_second": 3.763, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.19173942506313324, |
|
"eval_lrap": 0.8874944283175044, |
|
"eval_runtime": 22.732, |
|
"eval_samples_per_second": 29.562, |
|
"eval_steps_per_second": 3.695, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.1740952730178833, |
|
"eval_lrap": 0.9086508545541496, |
|
"eval_runtime": 22.6942, |
|
"eval_samples_per_second": 29.611, |
|
"eval_steps_per_second": 3.701, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.19585496187210083, |
|
"eval_lrap": 0.9103675474295027, |
|
"eval_runtime": 23.3607, |
|
"eval_samples_per_second": 28.766, |
|
"eval_steps_per_second": 3.596, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.193243243243243e-05, |
|
"loss": 0.2242, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.1878644824028015, |
|
"eval_lrap": 0.9258567343567102, |
|
"eval_runtime": 22.7639, |
|
"eval_samples_per_second": 29.52, |
|
"eval_steps_per_second": 3.69, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.16258566081523895, |
|
"eval_lrap": 0.9308544571355302, |
|
"eval_runtime": 23.3248, |
|
"eval_samples_per_second": 28.81, |
|
"eval_steps_per_second": 3.601, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.15402589738368988, |
|
"eval_lrap": 0.9306379874972384, |
|
"eval_runtime": 22.759, |
|
"eval_samples_per_second": 29.527, |
|
"eval_steps_per_second": 3.691, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 0.1420990228652954, |
|
"eval_lrap": 0.9364025033506702, |
|
"eval_runtime": 23.3898, |
|
"eval_samples_per_second": 28.73, |
|
"eval_steps_per_second": 3.591, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 0.14033490419387817, |
|
"eval_lrap": 0.9166296066698628, |
|
"eval_runtime": 22.7034, |
|
"eval_samples_per_second": 29.599, |
|
"eval_steps_per_second": 3.7, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.6864864864864864e-05, |
|
"loss": 0.1316, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.13963457942008972, |
|
"eval_lrap": 0.9372152596624732, |
|
"eval_runtime": 23.3136, |
|
"eval_samples_per_second": 28.824, |
|
"eval_steps_per_second": 3.603, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 0.1491432934999466, |
|
"eval_lrap": 0.9427461184278115, |
|
"eval_runtime": 22.6955, |
|
"eval_samples_per_second": 29.609, |
|
"eval_steps_per_second": 3.701, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 0.14092977344989777, |
|
"eval_lrap": 0.9405212229801448, |
|
"eval_runtime": 22.7532, |
|
"eval_samples_per_second": 29.534, |
|
"eval_steps_per_second": 3.692, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 0.13760367035865784, |
|
"eval_lrap": 0.930910014527521, |
|
"eval_runtime": 22.795, |
|
"eval_samples_per_second": 29.48, |
|
"eval_steps_per_second": 3.685, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 0.13160455226898193, |
|
"eval_lrap": 0.95920731151508, |
|
"eval_runtime": 22.6631, |
|
"eval_samples_per_second": 29.652, |
|
"eval_steps_per_second": 3.706, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.1797297297297297e-05, |
|
"loss": 0.0757, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 0.12442280352115631, |
|
"eval_lrap": 0.9517553489380385, |
|
"eval_runtime": 23.4283, |
|
"eval_samples_per_second": 28.683, |
|
"eval_steps_per_second": 3.585, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.12935081124305725, |
|
"eval_lrap": 0.9447551279048475, |
|
"eval_runtime": 23.3876, |
|
"eval_samples_per_second": 28.733, |
|
"eval_steps_per_second": 3.592, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 0.13968029618263245, |
|
"eval_lrap": 0.9520049098544898, |
|
"eval_runtime": 23.3148, |
|
"eval_samples_per_second": 28.823, |
|
"eval_steps_per_second": 3.603, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 0.1321108192205429, |
|
"eval_lrap": 0.9493008222774945, |
|
"eval_runtime": 22.7141, |
|
"eval_samples_per_second": 29.585, |
|
"eval_steps_per_second": 3.698, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 0.1369735449552536, |
|
"eval_lrap": 0.9438046186531343, |
|
"eval_runtime": 22.7987, |
|
"eval_samples_per_second": 29.475, |
|
"eval_steps_per_second": 3.684, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 6.729729729729729e-06, |
|
"loss": 0.04, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_loss": 0.13185060024261475, |
|
"eval_lrap": 0.9582819439486873, |
|
"eval_runtime": 22.7045, |
|
"eval_samples_per_second": 29.598, |
|
"eval_steps_per_second": 3.7, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_loss": 0.14138825237751007, |
|
"eval_lrap": 0.9504156521289109, |
|
"eval_runtime": 23.4267, |
|
"eval_samples_per_second": 28.685, |
|
"eval_steps_per_second": 3.586, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"eval_loss": 0.14279799163341522, |
|
"eval_lrap": 0.9578134572863043, |
|
"eval_runtime": 22.7296, |
|
"eval_samples_per_second": 29.565, |
|
"eval_steps_per_second": 3.696, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_loss": 0.14730122685432434, |
|
"eval_lrap": 0.9416550938849539, |
|
"eval_runtime": 22.7712, |
|
"eval_samples_per_second": 29.511, |
|
"eval_steps_per_second": 3.689, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"eval_loss": 0.14928565919399261, |
|
"eval_lrap": 0.9514275899253632, |
|
"eval_runtime": 23.4136, |
|
"eval_samples_per_second": 28.701, |
|
"eval_steps_per_second": 3.588, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1.662162162162162e-06, |
|
"loss": 0.0204, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 0.14889824390411377, |
|
"eval_lrap": 0.955463596740468, |
|
"eval_runtime": 23.4183, |
|
"eval_samples_per_second": 28.695, |
|
"eval_steps_per_second": 3.587, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_loss": 0.14893263578414917, |
|
"eval_lrap": 0.9515173886897298, |
|
"eval_runtime": 23.311, |
|
"eval_samples_per_second": 28.828, |
|
"eval_steps_per_second": 3.603, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 2664, |
|
"total_flos": 1.9853269227408384e+16, |
|
"train_loss": 0.09327275080007834, |
|
"train_runtime": 2835.2925, |
|
"train_samples_per_second": 7.514, |
|
"train_steps_per_second": 0.94 |
|
} |
|
], |
|
"max_steps": 2664, |
|
"num_train_epochs": 9, |
|
"total_flos": 1.9853269227408384e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|