|
{ |
|
"best_metric": 0.027055351063609123, |
|
"best_model_checkpoint": "mgh6/TCS_Pairing/checkpoint-25000", |
|
"epoch": 2.9836496001909536, |
|
"eval_steps": 1000, |
|
"global_step": 25000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.602180053307873e-05, |
|
"loss": 0.0929, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 0.0718587264418602, |
|
"eval_runtime": 142.087, |
|
"eval_samples_per_second": 178.778, |
|
"eval_steps_per_second": 22.353, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.204360106615746e-05, |
|
"loss": 0.0654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.06243297830224037, |
|
"eval_runtime": 142.08, |
|
"eval_samples_per_second": 178.787, |
|
"eval_steps_per_second": 22.354, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.806540159923618e-05, |
|
"loss": 0.056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.05325521528720856, |
|
"eval_runtime": 142.1931, |
|
"eval_samples_per_second": 178.644, |
|
"eval_steps_per_second": 22.336, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.408720213231491e-05, |
|
"loss": 0.0494, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.0536990687251091, |
|
"eval_runtime": 142.149, |
|
"eval_samples_per_second": 178.7, |
|
"eval_steps_per_second": 22.343, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.010900266539365e-05, |
|
"loss": 0.0456, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.04832467809319496, |
|
"eval_runtime": 141.7755, |
|
"eval_samples_per_second": 179.171, |
|
"eval_steps_per_second": 22.402, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.613080319847238e-05, |
|
"loss": 0.0423, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.043777432292699814, |
|
"eval_runtime": 142.1073, |
|
"eval_samples_per_second": 178.752, |
|
"eval_steps_per_second": 22.349, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.21526037315511e-05, |
|
"loss": 0.0392, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.04013779014348984, |
|
"eval_runtime": 142.123, |
|
"eval_samples_per_second": 178.733, |
|
"eval_steps_per_second": 22.347, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.817440426462983e-05, |
|
"loss": 0.0383, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.04025523364543915, |
|
"eval_runtime": 142.0835, |
|
"eval_samples_per_second": 178.782, |
|
"eval_steps_per_second": 22.353, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.419620479770856e-05, |
|
"loss": 0.0308, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.03931814804673195, |
|
"eval_runtime": 142.1394, |
|
"eval_samples_per_second": 178.712, |
|
"eval_steps_per_second": 22.344, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.021800533078728e-05, |
|
"loss": 0.0279, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.037444014102220535, |
|
"eval_runtime": 142.0806, |
|
"eval_samples_per_second": 178.786, |
|
"eval_steps_per_second": 22.354, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.6239805863866016e-05, |
|
"loss": 0.0272, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.037211526185274124, |
|
"eval_runtime": 142.1393, |
|
"eval_samples_per_second": 178.712, |
|
"eval_steps_per_second": 22.344, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.226160639694474e-05, |
|
"loss": 0.0262, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.03615099936723709, |
|
"eval_runtime": 142.2278, |
|
"eval_samples_per_second": 178.601, |
|
"eval_steps_per_second": 22.33, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.8283406930023476e-05, |
|
"loss": 0.0245, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.03587919846177101, |
|
"eval_runtime": 142.4557, |
|
"eval_samples_per_second": 178.315, |
|
"eval_steps_per_second": 22.295, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.43052074631022e-05, |
|
"loss": 0.0243, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.0344834141433239, |
|
"eval_runtime": 142.1512, |
|
"eval_samples_per_second": 178.697, |
|
"eval_steps_per_second": 22.342, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.032700799618093e-05, |
|
"loss": 0.0226, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.03409423679113388, |
|
"eval_runtime": 142.2177, |
|
"eval_samples_per_second": 178.613, |
|
"eval_steps_per_second": 22.332, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.634880852925966e-05, |
|
"loss": 0.0231, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.03197489306330681, |
|
"eval_runtime": 140.7435, |
|
"eval_samples_per_second": 180.484, |
|
"eval_steps_per_second": 22.566, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.237060906233839e-05, |
|
"loss": 0.0198, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.03260968625545502, |
|
"eval_runtime": 142.0512, |
|
"eval_samples_per_second": 178.823, |
|
"eval_steps_per_second": 22.358, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.839240959541711e-05, |
|
"loss": 0.013, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.03267335891723633, |
|
"eval_runtime": 141.695, |
|
"eval_samples_per_second": 179.272, |
|
"eval_steps_per_second": 22.414, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.441421012849584e-05, |
|
"loss": 0.0132, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.03128109499812126, |
|
"eval_runtime": 142.0978, |
|
"eval_samples_per_second": 178.764, |
|
"eval_steps_per_second": 22.351, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.043601066157457e-05, |
|
"loss": 0.0123, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 0.03134540468454361, |
|
"eval_runtime": 142.1226, |
|
"eval_samples_per_second": 178.733, |
|
"eval_steps_per_second": 22.347, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.64578111946533e-05, |
|
"loss": 0.0123, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.030218619853258133, |
|
"eval_runtime": 141.8386, |
|
"eval_samples_per_second": 179.091, |
|
"eval_steps_per_second": 22.392, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.247961172773203e-05, |
|
"loss": 0.012, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.028801346197724342, |
|
"eval_runtime": 142.0191, |
|
"eval_samples_per_second": 178.863, |
|
"eval_steps_per_second": 22.363, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.501412260810758e-06, |
|
"loss": 0.0116, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 0.027987737208604813, |
|
"eval_runtime": 141.3339, |
|
"eval_samples_per_second": 179.73, |
|
"eval_steps_per_second": 22.472, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.523212793889486e-06, |
|
"loss": 0.0107, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 0.027928264811635017, |
|
"eval_runtime": 141.818, |
|
"eval_samples_per_second": 179.117, |
|
"eval_steps_per_second": 22.395, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.450133269682142e-07, |
|
"loss": 0.0115, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 0.027055351063609123, |
|
"eval_runtime": 142.8238, |
|
"eval_samples_per_second": 177.856, |
|
"eval_steps_per_second": 22.237, |
|
"step": 25000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 25137, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 6.252295780604789e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|