|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.954646181119811, |
|
"global_step": 100000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.249924149859892e-05, |
|
"loss": 2.9841, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.880531072616577, |
|
"eval_runtime": 5041.8037, |
|
"eval_samples_per_second": 11.934, |
|
"eval_steps_per_second": 2.984, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 6.249460454641857e-05, |
|
"loss": 2.9159, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.843543291091919, |
|
"eval_runtime": 5041.0801, |
|
"eval_samples_per_second": 11.936, |
|
"eval_steps_per_second": 2.984, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.248574782066793e-05, |
|
"loss": 2.8874, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.8224122524261475, |
|
"eval_runtime": 5041.8102, |
|
"eval_samples_per_second": 11.934, |
|
"eval_steps_per_second": 2.984, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.24726796015593e-05, |
|
"loss": 2.867, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.805563449859619, |
|
"eval_runtime": 5045.7151, |
|
"eval_samples_per_second": 11.925, |
|
"eval_steps_per_second": 2.981, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.245539895286441e-05, |
|
"loss": 2.8462, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.243390820601403e-05, |
|
"loss": 2.8343, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 6.240821026044725e-05, |
|
"loss": 2.8232, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.237830858322031e-05, |
|
"loss": 2.8154, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.7630834579467773, |
|
"eval_runtime": 5044.7761, |
|
"eval_samples_per_second": 11.927, |
|
"eval_steps_per_second": 2.982, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.234420720853886e-05, |
|
"loss": 2.8085, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.230591073721361e-05, |
|
"loss": 2.7959, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.22634243360397e-05, |
|
"loss": 2.7934, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.221675373709958e-05, |
|
"loss": 2.7856, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 2.7356297969818115, |
|
"eval_runtime": 5045.7013, |
|
"eval_samples_per_second": 11.925, |
|
"eval_steps_per_second": 2.981, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.216590523698961e-05, |
|
"loss": 2.7796, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.211090854583099e-05, |
|
"loss": 2.7579, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.205172705145689e-05, |
|
"loss": 2.7374, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.198836374494218e-05, |
|
"loss": 2.7324, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.7184464931488037, |
|
"eval_runtime": 5041.2928, |
|
"eval_samples_per_second": 11.935, |
|
"eval_steps_per_second": 2.984, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.192087786506709e-05, |
|
"loss": 2.7345, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.18492540023217e-05, |
|
"loss": 2.7306, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.177350181988941e-05, |
|
"loss": 2.7291, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 6.169363153792874e-05, |
|
"loss": 2.7255, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.7048535346984863, |
|
"eval_runtime": 5061.5104, |
|
"eval_samples_per_second": 11.888, |
|
"eval_steps_per_second": 2.972, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.160961950708177e-05, |
|
"loss": 2.725, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.152154427075951e-05, |
|
"loss": 2.7212, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.142938492793726e-05, |
|
"loss": 2.7196, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.133315391235702e-05, |
|
"loss": 2.7169, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.6941745281219482, |
|
"eval_runtime": 5046.2528, |
|
"eval_samples_per_second": 11.924, |
|
"eval_steps_per_second": 2.981, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.12328232645584e-05, |
|
"loss": 2.7174, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.112848678433687e-05, |
|
"loss": 2.7153, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.102011922724016e-05, |
|
"loss": 2.7117, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.090778098663474e-05, |
|
"loss": 2.6718, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 2.6850435733795166, |
|
"eval_runtime": 5045.0709, |
|
"eval_samples_per_second": 11.926, |
|
"eval_steps_per_second": 2.982, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 6.0791397277177804e-05, |
|
"loss": 2.6687, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 6.0671027969511556e-05, |
|
"loss": 2.6724, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.0546638755690396e-05, |
|
"loss": 2.6703, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.041839805391616e-05, |
|
"loss": 2.6717, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.67754864692688, |
|
"eval_runtime": 5038.9811, |
|
"eval_samples_per_second": 11.941, |
|
"eval_steps_per_second": 2.985, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.028617152972819e-05, |
|
"loss": 2.6733, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.0150027570214874e-05, |
|
"loss": 2.6751, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.000998454333341e-05, |
|
"loss": 2.6738, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.9866061343086405e-05, |
|
"loss": 2.6747, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 2.6707887649536133, |
|
"eval_runtime": 5038.9349, |
|
"eval_samples_per_second": 11.941, |
|
"eval_steps_per_second": 2.985, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.971821747960996e-05, |
|
"loss": 2.676, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.9566591173123494e-05, |
|
"loss": 2.6739, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.941120747883403e-05, |
|
"loss": 2.6756, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.925196295535967e-05, |
|
"loss": 2.6714, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.663831949234009, |
|
"eval_runtime": 5038.524, |
|
"eval_samples_per_second": 11.942, |
|
"eval_steps_per_second": 2.986, |
|
"step": 100000 |
|
} |
|
], |
|
"max_steps": 676900, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.418000801633927e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|