{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 4.935064935064935, | |
"eval_steps": 500, | |
"global_step": 190, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.13, | |
"learning_rate": 0.0001, | |
"loss": 2.0038, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 0.0001, | |
"loss": 1.9796, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 0.0001, | |
"loss": 1.9617, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.52, | |
"learning_rate": 0.0001, | |
"loss": 2.0267, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 0.0001, | |
"loss": 1.9767, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.0001, | |
"loss": 1.9649, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.0001, | |
"loss": 2.0785, | |
"step": 35 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 0.0001, | |
"loss": 1.8929, | |
"step": 40 | |
}, | |
{ | |
"epoch": 1.17, | |
"learning_rate": 0.0001, | |
"loss": 1.9942, | |
"step": 45 | |
}, | |
{ | |
"epoch": 1.3, | |
"learning_rate": 0.0001, | |
"loss": 1.8956, | |
"step": 50 | |
}, | |
{ | |
"epoch": 1.43, | |
"learning_rate": 0.0001, | |
"loss": 1.8971, | |
"step": 55 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.0001, | |
"loss": 1.8397, | |
"step": 60 | |
}, | |
{ | |
"epoch": 1.69, | |
"learning_rate": 0.0001, | |
"loss": 1.9135, | |
"step": 65 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 0.0001, | |
"loss": 1.8782, | |
"step": 70 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.0001, | |
"loss": 1.8756, | |
"step": 75 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 0.0001, | |
"loss": 1.887, | |
"step": 80 | |
}, | |
{ | |
"epoch": 2.21, | |
"learning_rate": 0.0001, | |
"loss": 1.8408, | |
"step": 85 | |
}, | |
{ | |
"epoch": 2.34, | |
"learning_rate": 0.0001, | |
"loss": 1.8352, | |
"step": 90 | |
}, | |
{ | |
"epoch": 2.47, | |
"learning_rate": 0.0001, | |
"loss": 1.8403, | |
"step": 95 | |
}, | |
{ | |
"epoch": 2.6, | |
"learning_rate": 0.0001, | |
"loss": 1.8519, | |
"step": 100 | |
}, | |
{ | |
"epoch": 2.73, | |
"learning_rate": 0.0001, | |
"loss": 1.8683, | |
"step": 105 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 0.0001, | |
"loss": 1.8082, | |
"step": 110 | |
}, | |
{ | |
"epoch": 2.99, | |
"learning_rate": 0.0001, | |
"loss": 1.811, | |
"step": 115 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.0001, | |
"loss": 1.8451, | |
"step": 120 | |
}, | |
{ | |
"epoch": 3.25, | |
"learning_rate": 0.0001, | |
"loss": 1.7562, | |
"step": 125 | |
}, | |
{ | |
"epoch": 3.38, | |
"learning_rate": 0.0001, | |
"loss": 1.8435, | |
"step": 130 | |
}, | |
{ | |
"epoch": 3.51, | |
"learning_rate": 0.0001, | |
"loss": 1.7416, | |
"step": 135 | |
}, | |
{ | |
"epoch": 3.64, | |
"learning_rate": 0.0001, | |
"loss": 1.8242, | |
"step": 140 | |
}, | |
{ | |
"epoch": 3.77, | |
"learning_rate": 0.0001, | |
"loss": 1.7191, | |
"step": 145 | |
}, | |
{ | |
"epoch": 3.9, | |
"learning_rate": 0.0001, | |
"loss": 1.8372, | |
"step": 150 | |
}, | |
{ | |
"epoch": 4.03, | |
"learning_rate": 0.0001, | |
"loss": 1.6897, | |
"step": 155 | |
}, | |
{ | |
"epoch": 4.16, | |
"learning_rate": 0.0001, | |
"loss": 1.7764, | |
"step": 160 | |
}, | |
{ | |
"epoch": 4.29, | |
"learning_rate": 0.0001, | |
"loss": 1.7044, | |
"step": 165 | |
}, | |
{ | |
"epoch": 4.42, | |
"learning_rate": 0.0001, | |
"loss": 1.6948, | |
"step": 170 | |
}, | |
{ | |
"epoch": 4.55, | |
"learning_rate": 0.0001, | |
"loss": 1.7268, | |
"step": 175 | |
}, | |
{ | |
"epoch": 4.68, | |
"learning_rate": 0.0001, | |
"loss": 1.7703, | |
"step": 180 | |
}, | |
{ | |
"epoch": 4.81, | |
"learning_rate": 0.0001, | |
"loss": 1.7836, | |
"step": 185 | |
}, | |
{ | |
"epoch": 4.94, | |
"learning_rate": 0.0001, | |
"loss": 1.7871, | |
"step": 190 | |
}, | |
{ | |
"epoch": 4.94, | |
"step": 190, | |
"total_flos": 697572311040000.0, | |
"train_loss": 1.8531885046707957, | |
"train_runtime": 316.8119, | |
"train_samples_per_second": 4.861, | |
"train_steps_per_second": 0.6 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 190, | |
"num_train_epochs": 5, | |
"save_steps": 1000, | |
"total_flos": 697572311040000.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |