|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 7800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.275e-06, |
|
"loss": 16.819, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4775e-05, |
|
"loss": 6.1908, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.2274999999999996e-05, |
|
"loss": 4.1987, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9775e-05, |
|
"loss": 3.489, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 3.3590216636657715, |
|
"eval_runtime": 66.6533, |
|
"eval_samples_per_second": 32.226, |
|
"eval_steps_per_second": 2.025, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.7275e-05, |
|
"loss": 3.2231, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.4775e-05, |
|
"loss": 3.1038, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 5.227499999999999e-05, |
|
"loss": 3.0217, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 5.9774999999999996e-05, |
|
"loss": 2.9903, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 2.970390796661377, |
|
"eval_runtime": 66.5925, |
|
"eval_samples_per_second": 32.256, |
|
"eval_steps_per_second": 2.027, |
|
"eval_wer": 1.0000977995110025, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 6.7275e-05, |
|
"loss": 2.906, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 7.477499999999999e-05, |
|
"loss": 2.5346, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 7.393014705882353e-05, |
|
"loss": 1.8923, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.282720588235294e-05, |
|
"loss": 1.6712, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 0.6178616285324097, |
|
"eval_runtime": 66.5699, |
|
"eval_samples_per_second": 32.267, |
|
"eval_steps_per_second": 2.028, |
|
"eval_wer": 0.6566259168704156, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 7.172426470588234e-05, |
|
"loss": 1.5317, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 7.062132352941176e-05, |
|
"loss": 1.4151, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 6.951838235294117e-05, |
|
"loss": 1.3153, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 6.841544117647059e-05, |
|
"loss": 1.2635, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"eval_loss": 0.31762251257896423, |
|
"eval_runtime": 67.311, |
|
"eval_samples_per_second": 31.912, |
|
"eval_steps_per_second": 2.006, |
|
"eval_wer": 0.45310513447432765, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 6.731249999999999e-05, |
|
"loss": 1.1929, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 6.62095588235294e-05, |
|
"loss": 1.166, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 6.510661764705882e-05, |
|
"loss": 1.0971, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 6.400367647058824e-05, |
|
"loss": 1.0819, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_loss": 0.2516830265522003, |
|
"eval_runtime": 66.7082, |
|
"eval_samples_per_second": 32.2, |
|
"eval_steps_per_second": 2.024, |
|
"eval_wer": 0.35080684596577016, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 6.290073529411764e-05, |
|
"loss": 1.0892, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 6.179779411764705e-05, |
|
"loss": 1.0534, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 6.069485294117646e-05, |
|
"loss": 1.012, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 5.9591911764705876e-05, |
|
"loss": 1.0136, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"eval_loss": 0.22566433250904083, |
|
"eval_runtime": 66.3228, |
|
"eval_samples_per_second": 32.387, |
|
"eval_steps_per_second": 2.036, |
|
"eval_wer": 0.3123716381418093, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 5.848897058823529e-05, |
|
"loss": 0.9991, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 5.73860294117647e-05, |
|
"loss": 0.9722, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 5.6283088235294115e-05, |
|
"loss": 0.9794, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 5.5180147058823523e-05, |
|
"loss": 0.9625, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"eval_loss": 0.19747723639011383, |
|
"eval_runtime": 67.1687, |
|
"eval_samples_per_second": 31.979, |
|
"eval_steps_per_second": 2.01, |
|
"eval_wer": 0.2311002444987775, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 5.407720588235294e-05, |
|
"loss": 0.9505, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 5.297426470588235e-05, |
|
"loss": 0.9337, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 5.187132352941176e-05, |
|
"loss": 0.936, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 5.076838235294117e-05, |
|
"loss": 0.901, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"eval_loss": 0.19860759377479553, |
|
"eval_runtime": 66.128, |
|
"eval_samples_per_second": 32.482, |
|
"eval_steps_per_second": 2.041, |
|
"eval_wer": 0.20968215158924206, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 4.9665441176470586e-05, |
|
"loss": 0.9179, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 21.79, |
|
"learning_rate": 4.8562499999999995e-05, |
|
"loss": 0.8963, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 4.745955882352941e-05, |
|
"loss": 0.8826, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 4.635661764705882e-05, |
|
"loss": 0.8842, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"eval_loss": 0.19037693738937378, |
|
"eval_runtime": 67.9694, |
|
"eval_samples_per_second": 31.602, |
|
"eval_steps_per_second": 1.986, |
|
"eval_wer": 0.2039119804400978, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 4.5253676470588234e-05, |
|
"loss": 0.8683, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 4.416176470588235e-05, |
|
"loss": 0.8599, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.305882352941176e-05, |
|
"loss": 0.8658, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 4.1955882352941173e-05, |
|
"loss": 0.8542, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_loss": 0.1846681386232376, |
|
"eval_runtime": 67.5681, |
|
"eval_samples_per_second": 31.79, |
|
"eval_steps_per_second": 1.998, |
|
"eval_wer": 0.19814180929095354, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 26.28, |
|
"learning_rate": 4.085294117647058e-05, |
|
"loss": 0.8468, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 3.975e-05, |
|
"loss": 0.8462, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 3.8647058823529406e-05, |
|
"loss": 0.8284, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 3.754411764705882e-05, |
|
"loss": 0.8244, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"eval_loss": 0.1804967224597931, |
|
"eval_runtime": 66.5917, |
|
"eval_samples_per_second": 32.256, |
|
"eval_steps_per_second": 2.027, |
|
"eval_wer": 0.18474327628361858, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 3.6441176470588236e-05, |
|
"loss": 0.8288, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 3.5338235294117645e-05, |
|
"loss": 0.7904, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 30.13, |
|
"learning_rate": 3.423529411764706e-05, |
|
"loss": 0.8157, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 3.313235294117647e-05, |
|
"loss": 0.7689, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"eval_loss": 0.17355979979038239, |
|
"eval_runtime": 69.3305, |
|
"eval_samples_per_second": 30.982, |
|
"eval_steps_per_second": 1.947, |
|
"eval_wer": 0.18317848410757948, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 31.41, |
|
"learning_rate": 3.202941176470588e-05, |
|
"loss": 0.7948, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 3.092647058823529e-05, |
|
"loss": 0.8035, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 32.69, |
|
"learning_rate": 2.9823529411764704e-05, |
|
"loss": 0.7754, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 2.8720588235294116e-05, |
|
"loss": 0.7825, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.1698261797428131, |
|
"eval_runtime": 67.6625, |
|
"eval_samples_per_second": 31.746, |
|
"eval_steps_per_second": 1.995, |
|
"eval_wer": 0.18210268948655256, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"learning_rate": 2.7617647058823528e-05, |
|
"loss": 0.7748, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 34.62, |
|
"learning_rate": 2.651470588235294e-05, |
|
"loss": 0.7704, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 2.5411764705882348e-05, |
|
"loss": 0.7597, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"learning_rate": 2.430882352941176e-05, |
|
"loss": 0.7817, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"eval_loss": 0.17581327259540558, |
|
"eval_runtime": 67.4489, |
|
"eval_samples_per_second": 31.846, |
|
"eval_steps_per_second": 2.002, |
|
"eval_wer": 0.18034229828850856, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 36.54, |
|
"learning_rate": 2.3205882352941172e-05, |
|
"loss": 0.744, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 37.18, |
|
"learning_rate": 2.2102941176470584e-05, |
|
"loss": 0.7508, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 37.82, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.746, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 1.989705882352941e-05, |
|
"loss": 0.7488, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.16634251177310944, |
|
"eval_runtime": 67.3054, |
|
"eval_samples_per_second": 31.914, |
|
"eval_steps_per_second": 2.006, |
|
"eval_wer": 0.17603911980440098, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 39.1, |
|
"learning_rate": 1.8794117647058823e-05, |
|
"loss": 0.7486, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 39.74, |
|
"learning_rate": 1.7691176470588234e-05, |
|
"loss": 0.7281, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 40.38, |
|
"learning_rate": 1.6588235294117646e-05, |
|
"loss": 0.7317, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 41.03, |
|
"learning_rate": 1.5485294117647058e-05, |
|
"loss": 0.7171, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 41.03, |
|
"eval_loss": 0.16363976895809174, |
|
"eval_runtime": 68.4211, |
|
"eval_samples_per_second": 31.394, |
|
"eval_steps_per_second": 1.973, |
|
"eval_wer": 0.17212713936430318, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 1.438235294117647e-05, |
|
"loss": 0.7196, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"learning_rate": 1.3279411764705882e-05, |
|
"loss": 0.7292, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"learning_rate": 1.2176470588235294e-05, |
|
"loss": 0.7319, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 43.59, |
|
"learning_rate": 1.1073529411764704e-05, |
|
"loss": 0.7222, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 43.59, |
|
"eval_loss": 0.16627496480941772, |
|
"eval_runtime": 67.2415, |
|
"eval_samples_per_second": 31.945, |
|
"eval_steps_per_second": 2.008, |
|
"eval_wer": 0.17290953545232274, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 44.23, |
|
"learning_rate": 9.970588235294116e-06, |
|
"loss": 0.709, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 8.86764705882353e-06, |
|
"loss": 0.7162, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"learning_rate": 7.764705882352941e-06, |
|
"loss": 0.7282, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 6.6838235294117634e-06, |
|
"loss": 0.7156, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"eval_loss": 0.16330638527870178, |
|
"eval_runtime": 67.815, |
|
"eval_samples_per_second": 31.674, |
|
"eval_steps_per_second": 1.991, |
|
"eval_wer": 0.17154034229828852, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 46.79, |
|
"learning_rate": 5.580882352941176e-06, |
|
"loss": 0.7001, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 47.44, |
|
"learning_rate": 4.477941176470588e-06, |
|
"loss": 0.7034, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 3.3749999999999995e-06, |
|
"loss": 0.6991, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 48.72, |
|
"learning_rate": 2.2720588235294113e-06, |
|
"loss": 0.7121, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 48.72, |
|
"eval_loss": 0.16656027734279633, |
|
"eval_runtime": 66.7215, |
|
"eval_samples_per_second": 32.194, |
|
"eval_steps_per_second": 2.023, |
|
"eval_wer": 0.17183374083129585, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 49.36, |
|
"learning_rate": 1.1691176470588234e-06, |
|
"loss": 0.7012, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 6.617647058823529e-08, |
|
"loss": 0.6972, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 7800, |
|
"total_flos": 2.5302348778981876e+19, |
|
"train_loss": 1.4054935827010717, |
|
"train_runtime": 11681.5311, |
|
"train_samples_per_second": 21.243, |
|
"train_steps_per_second": 0.668 |
|
} |
|
], |
|
"max_steps": 7800, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.5302348778981876e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|