|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.28415300546448, |
|
"global_step": 1300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.5e-06, |
|
"loss": 3.5867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-06, |
|
"loss": 3.5457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.5e-06, |
|
"loss": 3.4513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-05, |
|
"loss": 3.3432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.25e-05, |
|
"loss": 3.3533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 3.2806732654571533, |
|
"eval_runtime": 190.4728, |
|
"eval_samples_per_second": 25.426, |
|
"eval_steps_per_second": 0.798, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5e-05, |
|
"loss": 3.2217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 3.1765, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2e-05, |
|
"loss": 3.1408, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 3.1165, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.1709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 3.1325438022613525, |
|
"eval_runtime": 192.4978, |
|
"eval_samples_per_second": 25.159, |
|
"eval_steps_per_second": 0.79, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.75e-05, |
|
"loss": 3.079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3e-05, |
|
"loss": 3.0677, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 3.0656, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 3.1463, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.75e-05, |
|
"loss": 3.0573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 3.0614514350891113, |
|
"eval_runtime": 194.36, |
|
"eval_samples_per_second": 24.918, |
|
"eval_steps_per_second": 0.782, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4e-05, |
|
"loss": 3.0511, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.25e-05, |
|
"loss": 3.0358, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 3.0416, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.75e-05, |
|
"loss": 3.1053, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0314, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 3.0990231037139893, |
|
"eval_runtime": 198.3688, |
|
"eval_samples_per_second": 24.414, |
|
"eval_steps_per_second": 0.766, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 5.25e-05, |
|
"loss": 3.0309, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.5e-05, |
|
"loss": 3.0259, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 5.75e-05, |
|
"loss": 3.0998, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 6e-05, |
|
"loss": 3.0152, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.25e-05, |
|
"loss": 3.0129, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 3.039973497390747, |
|
"eval_runtime": 190.8567, |
|
"eval_samples_per_second": 25.375, |
|
"eval_steps_per_second": 0.796, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 3.0088, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 6.75e-05, |
|
"loss": 3.0051, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 3.073, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 7.25e-05, |
|
"loss": 3.0031, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 7.5e-05, |
|
"loss": 2.9964, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 2.998962640762329, |
|
"eval_runtime": 193.5213, |
|
"eval_samples_per_second": 25.026, |
|
"eval_steps_per_second": 0.785, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 7.75e-05, |
|
"loss": 2.9921, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 8e-05, |
|
"loss": 3.0665, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 8.25e-05, |
|
"loss": 2.9826, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 8.5e-05, |
|
"loss": 2.9689, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 8.75e-05, |
|
"loss": 2.9602, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 2.9620397090911865, |
|
"eval_runtime": 193.5851, |
|
"eval_samples_per_second": 25.017, |
|
"eval_steps_per_second": 0.785, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 2.9639, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 9.25e-05, |
|
"loss": 3.0215, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 9.5e-05, |
|
"loss": 2.9454, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 9.750000000000001e-05, |
|
"loss": 2.9239, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.0001, |
|
"loss": 2.8756, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 2.7302000522613525, |
|
"eval_runtime": 191.8065, |
|
"eval_samples_per_second": 25.249, |
|
"eval_steps_per_second": 0.792, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0001025, |
|
"loss": 2.8933, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.000105, |
|
"loss": 2.7318, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.0001075, |
|
"loss": 2.5941, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00011, |
|
"loss": 2.4441, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.00011250000000000001, |
|
"loss": 2.2931, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_loss": 1.5057899951934814, |
|
"eval_runtime": 196.4368, |
|
"eval_samples_per_second": 24.654, |
|
"eval_steps_per_second": 0.774, |
|
"eval_wer": 0.9775759296054499, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.000115, |
|
"loss": 2.1999, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.0001175, |
|
"loss": 2.0574, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.00012, |
|
"loss": 1.9672, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.0001225, |
|
"loss": 1.9015, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.000125, |
|
"loss": 1.8427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 0.9154536724090576, |
|
"eval_runtime": 203.1168, |
|
"eval_samples_per_second": 23.843, |
|
"eval_steps_per_second": 0.748, |
|
"eval_wer": 0.7832245245529378, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.0001275, |
|
"loss": 1.8155, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 1.6767, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 0.00013250000000000002, |
|
"loss": 1.5184, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.000135, |
|
"loss": 1.4505, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.0001375, |
|
"loss": 1.4286, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"eval_loss": 0.4074769914150238, |
|
"eval_runtime": 194.7798, |
|
"eval_samples_per_second": 24.864, |
|
"eval_steps_per_second": 0.78, |
|
"eval_wer": 0.37964802724950325, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 0.00014000000000000001, |
|
"loss": 1.3478, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 0.0001425, |
|
"loss": 1.3164, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 0.000145, |
|
"loss": 1.269, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.0001475, |
|
"loss": 1.2292, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00015, |
|
"loss": 1.2229, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"eval_loss": 0.28933778405189514, |
|
"eval_runtime": 192.6398, |
|
"eval_samples_per_second": 25.14, |
|
"eval_steps_per_second": 0.789, |
|
"eval_wer": 0.26520011353959694, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.0001525, |
|
"loss": 1.182, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 0.000155, |
|
"loss": 1.1768, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 0.0001575, |
|
"loss": 1.1534, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 0.00016, |
|
"loss": 1.1832, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 0.00016250000000000002, |
|
"loss": 1.1106, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"eval_loss": 0.24685777723789215, |
|
"eval_runtime": 198.5067, |
|
"eval_samples_per_second": 24.397, |
|
"eval_steps_per_second": 0.766, |
|
"eval_wer": 0.2253760999148453, |
|
"step": 1300 |
|
} |
|
], |
|
"max_steps": 4550, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.013668332353175e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|