|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8777365254128793, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00025, |
|
"loss": 1.6044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0005, |
|
"loss": 0.957, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.7217180132865906, |
|
"eval_phone_accuracy": 0.7656547891757681, |
|
"eval_runtime": 9.9001, |
|
"eval_samples_per_second": 20.202, |
|
"eval_steps_per_second": 1.717, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004766703994027622, |
|
"loss": 0.8156, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004533407988055245, |
|
"loss": 0.7327, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.5681599974632263, |
|
"eval_phone_accuracy": 0.8123568272172383, |
|
"eval_runtime": 9.9031, |
|
"eval_samples_per_second": 20.196, |
|
"eval_steps_per_second": 1.717, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00043001119820828664, |
|
"loss": 0.6781, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004066815976110489, |
|
"loss": 0.6426, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.4999035894870758, |
|
"eval_phone_accuracy": 0.8317913778579135, |
|
"eval_runtime": 9.8493, |
|
"eval_samples_per_second": 20.306, |
|
"eval_steps_per_second": 1.726, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003833519970138111, |
|
"loss": 0.6148, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0003600223964165734, |
|
"loss": 0.5924, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.4667314887046814, |
|
"eval_phone_accuracy": 0.8421219869056944, |
|
"eval_runtime": 10.1485, |
|
"eval_samples_per_second": 19.707, |
|
"eval_steps_per_second": 1.675, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0003366927958193356, |
|
"loss": 0.5743, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0003133631952220978, |
|
"loss": 0.559, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.43349266052246094, |
|
"eval_phone_accuracy": 0.8515954066476509, |
|
"eval_runtime": 10.2876, |
|
"eval_samples_per_second": 19.441, |
|
"eval_steps_per_second": 1.652, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00029003359462486, |
|
"loss": 0.5442, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00026670399402762223, |
|
"loss": 0.5294, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.4208233654499054, |
|
"eval_phone_accuracy": 0.8561843252590042, |
|
"eval_runtime": 10.0364, |
|
"eval_samples_per_second": 19.928, |
|
"eval_steps_per_second": 1.694, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00024337439343038447, |
|
"loss": 0.5128, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0002200447928331467, |
|
"loss": 0.5037, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.40133553743362427, |
|
"eval_phone_accuracy": 0.8622585460295878, |
|
"eval_runtime": 10.294, |
|
"eval_samples_per_second": 19.429, |
|
"eval_steps_per_second": 1.651, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019671519223590892, |
|
"loss": 0.497, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00017338559163867115, |
|
"loss": 0.4918, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.3905256986618042, |
|
"eval_phone_accuracy": 0.8658424840754918, |
|
"eval_runtime": 10.3057, |
|
"eval_samples_per_second": 19.407, |
|
"eval_steps_per_second": 1.65, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001500559910414334, |
|
"loss": 0.4832, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001267263904441956, |
|
"loss": 0.475, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 0.37418654561042786, |
|
"eval_phone_accuracy": 0.8692195143579209, |
|
"eval_runtime": 10.1391, |
|
"eval_samples_per_second": 19.726, |
|
"eval_steps_per_second": 1.677, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00010339678984695783, |
|
"loss": 0.4684, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.006718924972005e-05, |
|
"loss": 0.4647, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.36982452869415283, |
|
"eval_phone_accuracy": 0.8727960628408435, |
|
"eval_runtime": 9.9379, |
|
"eval_samples_per_second": 20.125, |
|
"eval_steps_per_second": 1.711, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.678424785367675e-05, |
|
"loss": 0.4579, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.345464725643897e-05, |
|
"loss": 0.4556, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.3634908199310303, |
|
"eval_phone_accuracy": 0.875249397750617, |
|
"eval_runtime": 10.1013, |
|
"eval_samples_per_second": 19.799, |
|
"eval_steps_per_second": 1.683, |
|
"step": 11000 |
|
} |
|
], |
|
"max_steps": 11716, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.8523977459584336e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|