|
{ |
|
"best_metric": 0.40839770436286926, |
|
"best_model_checkpoint": "/content/drive/MyDrive/colab/checkpoint-1700", |
|
"epoch": 1.6346153846153846, |
|
"global_step": 1700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7596153846153844e-05, |
|
"loss": 1.033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.6874200105667114, |
|
"eval_runtime": 57.2656, |
|
"eval_samples_per_second": 16.153, |
|
"eval_steps_per_second": 2.026, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.519230769230769e-05, |
|
"loss": 0.6703, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.5938639044761658, |
|
"eval_runtime": 57.2626, |
|
"eval_samples_per_second": 16.154, |
|
"eval_steps_per_second": 2.026, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.278846153846154e-05, |
|
"loss": 0.6579, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.49521076679229736, |
|
"eval_runtime": 57.2701, |
|
"eval_samples_per_second": 16.152, |
|
"eval_steps_per_second": 2.025, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.038461538461539e-05, |
|
"loss": 0.5068, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.5416558384895325, |
|
"eval_runtime": 57.2833, |
|
"eval_samples_per_second": 16.148, |
|
"eval_steps_per_second": 2.025, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.798076923076923e-05, |
|
"loss": 0.5439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.5001481175422668, |
|
"eval_runtime": 57.2802, |
|
"eval_samples_per_second": 16.149, |
|
"eval_steps_per_second": 2.025, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.557692307692308e-05, |
|
"loss": 0.5429, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.47204354405403137, |
|
"eval_runtime": 57.2593, |
|
"eval_samples_per_second": 16.155, |
|
"eval_steps_per_second": 2.026, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3173076923076926e-05, |
|
"loss": 0.4817, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 0.5970540642738342, |
|
"eval_runtime": 57.2593, |
|
"eval_samples_per_second": 16.155, |
|
"eval_steps_per_second": 2.026, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.5023, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 0.44290876388549805, |
|
"eval_runtime": 57.2393, |
|
"eval_samples_per_second": 16.16, |
|
"eval_steps_per_second": 2.027, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.8365384615384616e-05, |
|
"loss": 0.4756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.49230891466140747, |
|
"eval_runtime": 57.0994, |
|
"eval_samples_per_second": 16.2, |
|
"eval_steps_per_second": 2.032, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.5961538461538464e-05, |
|
"loss": 0.4772, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.4519612491130829, |
|
"eval_runtime": 57.1095, |
|
"eval_samples_per_second": 16.197, |
|
"eval_steps_per_second": 2.031, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.355769230769231e-05, |
|
"loss": 0.336, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 0.41091373562812805, |
|
"eval_runtime": 57.1093, |
|
"eval_samples_per_second": 16.197, |
|
"eval_steps_per_second": 2.031, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1153846153846154e-05, |
|
"loss": 0.2225, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.4540548324584961, |
|
"eval_runtime": 57.116, |
|
"eval_samples_per_second": 16.195, |
|
"eval_steps_per_second": 2.031, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.2296, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.4974042475223541, |
|
"eval_runtime": 57.1073, |
|
"eval_samples_per_second": 16.198, |
|
"eval_steps_per_second": 2.031, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6346153846153847e-05, |
|
"loss": 0.2511, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.48081809282302856, |
|
"eval_runtime": 57.1059, |
|
"eval_samples_per_second": 16.198, |
|
"eval_steps_per_second": 2.031, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.3942307692307693e-05, |
|
"loss": 0.3349, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.4536380171775818, |
|
"eval_runtime": 57.096, |
|
"eval_samples_per_second": 16.201, |
|
"eval_steps_per_second": 2.032, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 0.2529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 0.4738934636116028, |
|
"eval_runtime": 57.1337, |
|
"eval_samples_per_second": 16.19, |
|
"eval_steps_per_second": 2.03, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.134615384615384e-06, |
|
"loss": 0.2064, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.40839770436286926, |
|
"eval_runtime": 57.1173, |
|
"eval_samples_per_second": 16.195, |
|
"eval_steps_per_second": 2.031, |
|
"step": 1700 |
|
} |
|
], |
|
"max_steps": 2080, |
|
"num_train_epochs": 2, |
|
"total_flos": 3578535250329600.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|