|
{ |
|
"best_metric": 1.1207506656646729, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.987249544626594, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2669, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.180582046508789, |
|
"eval_runtime": 550.6875, |
|
"eval_samples_per_second": 11.393, |
|
"eval_steps_per_second": 1.425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1546, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.131606101989746, |
|
"eval_runtime": 550.6761, |
|
"eval_samples_per_second": 11.393, |
|
"eval_steps_per_second": 1.426, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1084, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.089244842529297, |
|
"eval_runtime": 551.3766, |
|
"eval_samples_per_second": 11.379, |
|
"eval_steps_per_second": 1.424, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.068, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.043504476547241, |
|
"eval_runtime": 551.782, |
|
"eval_samples_per_second": 11.37, |
|
"eval_steps_per_second": 1.423, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0238, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.0026352405548096, |
|
"eval_runtime": 552.6008, |
|
"eval_samples_per_second": 11.354, |
|
"eval_steps_per_second": 1.421, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9746, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.9627352952957153, |
|
"eval_runtime": 553.4908, |
|
"eval_samples_per_second": 11.335, |
|
"eval_steps_per_second": 1.418, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9436, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.9262831211090088, |
|
"eval_runtime": 553.1509, |
|
"eval_samples_per_second": 11.342, |
|
"eval_steps_per_second": 1.419, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9026, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8915894031524658, |
|
"eval_runtime": 553.4095, |
|
"eval_samples_per_second": 11.337, |
|
"eval_steps_per_second": 1.418, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8633, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8577481508255005, |
|
"eval_runtime": 553.9449, |
|
"eval_samples_per_second": 11.326, |
|
"eval_steps_per_second": 1.417, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8404, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8280558586120605, |
|
"eval_runtime": 554.6062, |
|
"eval_samples_per_second": 11.313, |
|
"eval_steps_per_second": 1.415, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8207, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.7947672605514526, |
|
"eval_runtime": 555.1319, |
|
"eval_samples_per_second": 11.302, |
|
"eval_steps_per_second": 1.414, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.769, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.761672019958496, |
|
"eval_runtime": 555.3259, |
|
"eval_samples_per_second": 11.298, |
|
"eval_steps_per_second": 1.414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7687, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.7306458950042725, |
|
"eval_runtime": 554.8996, |
|
"eval_samples_per_second": 11.307, |
|
"eval_steps_per_second": 1.415, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6906, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.6910051107406616, |
|
"eval_runtime": 555.5658, |
|
"eval_samples_per_second": 11.293, |
|
"eval_steps_per_second": 1.413, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6215, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.6626144647598267, |
|
"eval_runtime": 555.8884, |
|
"eval_samples_per_second": 11.286, |
|
"eval_steps_per_second": 1.412, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.601, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.6326826810836792, |
|
"eval_runtime": 555.8386, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 1.412, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.595, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.6032005548477173, |
|
"eval_runtime": 555.9827, |
|
"eval_samples_per_second": 11.285, |
|
"eval_steps_per_second": 1.412, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5417, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.581026315689087, |
|
"eval_runtime": 556.3673, |
|
"eval_samples_per_second": 11.277, |
|
"eval_steps_per_second": 1.411, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5177, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.5540947914123535, |
|
"eval_runtime": 556.1818, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5071, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.529414415359497, |
|
"eval_runtime": 556.1478, |
|
"eval_samples_per_second": 11.281, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4879, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.5049669742584229, |
|
"eval_runtime": 556.064, |
|
"eval_samples_per_second": 11.283, |
|
"eval_steps_per_second": 1.412, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4477, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.47505521774292, |
|
"eval_runtime": 555.9558, |
|
"eval_samples_per_second": 11.285, |
|
"eval_steps_per_second": 1.412, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4289, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4537893533706665, |
|
"eval_runtime": 556.1711, |
|
"eval_samples_per_second": 11.281, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4179, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.4315266609191895, |
|
"eval_runtime": 556.1848, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3847, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.4086532592773438, |
|
"eval_runtime": 556.2903, |
|
"eval_samples_per_second": 11.278, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3664, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.3867732286453247, |
|
"eval_runtime": 556.1989, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3493, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.361178994178772, |
|
"eval_runtime": 556.384, |
|
"eval_samples_per_second": 11.276, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2768, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.3487073183059692, |
|
"eval_runtime": 556.4859, |
|
"eval_samples_per_second": 11.274, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2273, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.3275220394134521, |
|
"eval_runtime": 556.9562, |
|
"eval_samples_per_second": 11.265, |
|
"eval_steps_per_second": 1.409, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2451, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.307883858680725, |
|
"eval_runtime": 555.8716, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.208, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.2866222858428955, |
|
"eval_runtime": 556.1597, |
|
"eval_samples_per_second": 11.281, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2102, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.27306067943573, |
|
"eval_runtime": 765.8818, |
|
"eval_samples_per_second": 8.192, |
|
"eval_steps_per_second": 1.025, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1778, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.2513889074325562, |
|
"eval_runtime": 556.1919, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1666, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.2403820753097534, |
|
"eval_runtime": 555.8669, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1521, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.2223913669586182, |
|
"eval_runtime": 556.0099, |
|
"eval_samples_per_second": 11.284, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1431, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.2031002044677734, |
|
"eval_runtime": 556.2202, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1138, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.1843565702438354, |
|
"eval_runtime": 556.3313, |
|
"eval_samples_per_second": 11.277, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1002, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.1716701984405518, |
|
"eval_runtime": 556.4009, |
|
"eval_samples_per_second": 11.276, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0883, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.1533775329589844, |
|
"eval_runtime": 556.2301, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0899, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.139776587486267, |
|
"eval_runtime": 556.1925, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0699, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.1207506656646729, |
|
"eval_runtime": 916.3506, |
|
"eval_samples_per_second": 6.847, |
|
"eval_steps_per_second": 0.857, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.0672136421373379e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|