|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 3870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 4.564502716064453, |
|
"eval_runtime": 179.128, |
|
"eval_samples_per_second": 20.142, |
|
"eval_steps_per_second": 2.518, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.9015955924987793, |
|
"eval_runtime": 175.3683, |
|
"eval_samples_per_second": 20.574, |
|
"eval_steps_per_second": 2.572, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.266641616821289, |
|
"eval_runtime": 174.1906, |
|
"eval_samples_per_second": 20.713, |
|
"eval_steps_per_second": 2.589, |
|
"eval_wer": 1.098215533709174, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 0.6078956127166748, |
|
"eval_runtime": 181.241, |
|
"eval_samples_per_second": 19.907, |
|
"eval_steps_per_second": 2.488, |
|
"eval_wer": 0.6375623165447515, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0002988, |
|
"loss": 3.2188, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_loss": 0.49847397208213806, |
|
"eval_runtime": 175.9858, |
|
"eval_samples_per_second": 20.502, |
|
"eval_steps_per_second": 2.563, |
|
"eval_wer": 0.5007687648511392, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 0.447698175907135, |
|
"eval_runtime": 175.6036, |
|
"eval_samples_per_second": 20.546, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.44690863346223736, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_loss": 0.39529213309288025, |
|
"eval_runtime": 175.4478, |
|
"eval_samples_per_second": 20.565, |
|
"eval_steps_per_second": 2.571, |
|
"eval_wer": 0.3914643805618972, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 0.43193477392196655, |
|
"eval_runtime": 176.2224, |
|
"eval_samples_per_second": 20.474, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.3921166658901365, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 0.4170827865600586, |
|
"eval_runtime": 175.9073, |
|
"eval_samples_per_second": 20.511, |
|
"eval_steps_per_second": 2.564, |
|
"eval_wer": 0.3698457811116806, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00025566765578635014, |
|
"loss": 0.2193, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 0.3956995904445648, |
|
"eval_runtime": 178.52, |
|
"eval_samples_per_second": 20.211, |
|
"eval_steps_per_second": 2.526, |
|
"eval_wer": 0.36001490937893116, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_loss": 0.37300992012023926, |
|
"eval_runtime": 175.8463, |
|
"eval_samples_per_second": 20.518, |
|
"eval_steps_per_second": 2.565, |
|
"eval_wer": 0.34929879327214275, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 0.3779752850532532, |
|
"eval_runtime": 176.2153, |
|
"eval_samples_per_second": 20.475, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.3348087406233984, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"eval_loss": 0.41326919198036194, |
|
"eval_runtime": 173.7883, |
|
"eval_samples_per_second": 20.761, |
|
"eval_steps_per_second": 2.595, |
|
"eval_wer": 0.35680007454689466, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_loss": 0.3984449505805969, |
|
"eval_runtime": 175.4199, |
|
"eval_samples_per_second": 20.568, |
|
"eval_steps_per_second": 2.571, |
|
"eval_wer": 0.31929366817313515, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 0.00021115727002967357, |
|
"loss": 0.1129, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"eval_loss": 0.38447898626327515, |
|
"eval_runtime": 175.2875, |
|
"eval_samples_per_second": 20.583, |
|
"eval_steps_per_second": 2.573, |
|
"eval_wer": 0.3174299958067372, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_loss": 0.3882218301296234, |
|
"eval_runtime": 175.3702, |
|
"eval_samples_per_second": 20.574, |
|
"eval_steps_per_second": 2.572, |
|
"eval_wer": 0.3162186087685785, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"eval_loss": 0.39824405312538147, |
|
"eval_runtime": 175.3906, |
|
"eval_samples_per_second": 20.571, |
|
"eval_steps_per_second": 2.571, |
|
"eval_wer": 0.3008433117457951, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_loss": 0.3901589810848236, |
|
"eval_runtime": 177.0061, |
|
"eval_samples_per_second": 20.383, |
|
"eval_steps_per_second": 2.548, |
|
"eval_wer": 0.3198061780738946, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"eval_loss": 0.4082184135913849, |
|
"eval_runtime": 175.0779, |
|
"eval_samples_per_second": 20.608, |
|
"eval_steps_per_second": 2.576, |
|
"eval_wer": 0.3237198900433304, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 0.000166646884272997, |
|
"loss": 0.0765, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"eval_loss": 0.3732178211212158, |
|
"eval_runtime": 175.4721, |
|
"eval_samples_per_second": 20.562, |
|
"eval_steps_per_second": 2.57, |
|
"eval_wer": 0.3125844476541024, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"eval_loss": 0.3892667293548584, |
|
"eval_runtime": 178.0744, |
|
"eval_samples_per_second": 20.261, |
|
"eval_steps_per_second": 2.533, |
|
"eval_wer": 0.3000978427992359, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_loss": 0.4168277978897095, |
|
"eval_runtime": 176.2583, |
|
"eval_samples_per_second": 20.47, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.308344593020547, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"eval_loss": 0.4192778170108795, |
|
"eval_runtime": 172.3953, |
|
"eval_samples_per_second": 20.929, |
|
"eval_steps_per_second": 2.616, |
|
"eval_wer": 0.3044308810511112, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"eval_loss": 0.40058156847953796, |
|
"eval_runtime": 174.3893, |
|
"eval_samples_per_second": 20.689, |
|
"eval_steps_per_second": 2.586, |
|
"eval_wer": 0.3013092298373946, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 0.00012213649851632047, |
|
"loss": 0.0588, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"eval_loss": 0.38357821106910706, |
|
"eval_runtime": 180.4646, |
|
"eval_samples_per_second": 19.993, |
|
"eval_steps_per_second": 2.499, |
|
"eval_wer": 0.2892419512649676, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_loss": 0.3760845959186554, |
|
"eval_runtime": 191.865, |
|
"eval_samples_per_second": 18.805, |
|
"eval_steps_per_second": 2.351, |
|
"eval_wer": 0.2902669710664865, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 20.93, |
|
"eval_loss": 0.38948509097099304, |
|
"eval_runtime": 181.0859, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 2.491, |
|
"eval_wer": 0.29301588780692356, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"eval_loss": 0.3884966969490051, |
|
"eval_runtime": 179.119, |
|
"eval_samples_per_second": 20.143, |
|
"eval_steps_per_second": 2.518, |
|
"eval_wer": 0.27913152867725854, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"eval_loss": 0.3901614248752594, |
|
"eval_runtime": 182.8151, |
|
"eval_samples_per_second": 19.736, |
|
"eval_steps_per_second": 2.467, |
|
"eval_wer": 0.2891487676466477, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"learning_rate": 7.762611275964391e-05, |
|
"loss": 0.0448, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"eval_loss": 0.42003825306892395, |
|
"eval_runtime": 180.6554, |
|
"eval_samples_per_second": 19.972, |
|
"eval_steps_per_second": 2.496, |
|
"eval_wer": 0.2849089130130923, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"eval_loss": 0.40127792954444885, |
|
"eval_runtime": 180.093, |
|
"eval_samples_per_second": 20.034, |
|
"eval_steps_per_second": 2.504, |
|
"eval_wer": 0.27987699762381774, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 24.81, |
|
"eval_loss": 0.4039434492588043, |
|
"eval_runtime": 182.9792, |
|
"eval_samples_per_second": 19.718, |
|
"eval_steps_per_second": 2.465, |
|
"eval_wer": 0.273121185295625, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 25.58, |
|
"eval_loss": 0.397048681974411, |
|
"eval_runtime": 186.4145, |
|
"eval_samples_per_second": 19.355, |
|
"eval_steps_per_second": 2.419, |
|
"eval_wer": 0.26473465964683407, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"eval_loss": 0.4080738127231598, |
|
"eval_runtime": 183.3845, |
|
"eval_samples_per_second": 19.675, |
|
"eval_steps_per_second": 2.459, |
|
"eval_wer": 0.26902110608954943, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 3.311572700296736e-05, |
|
"loss": 0.0351, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"eval_loss": 0.4090190827846527, |
|
"eval_runtime": 188.4549, |
|
"eval_samples_per_second": 19.145, |
|
"eval_steps_per_second": 2.393, |
|
"eval_wer": 0.26743698457811116, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"eval_loss": 0.3952561318874359, |
|
"eval_runtime": 183.4934, |
|
"eval_samples_per_second": 19.663, |
|
"eval_steps_per_second": 2.458, |
|
"eval_wer": 0.26627218934911245, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 28.68, |
|
"eval_loss": 0.40437063574790955, |
|
"eval_runtime": 188.3196, |
|
"eval_samples_per_second": 19.159, |
|
"eval_steps_per_second": 2.395, |
|
"eval_wer": 0.26496761869263386, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 29.46, |
|
"eval_loss": 0.3968600630760193, |
|
"eval_runtime": 182.2466, |
|
"eval_samples_per_second": 19.797, |
|
"eval_steps_per_second": 2.475, |
|
"eval_wer": 0.26459488421935423, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 3870, |
|
"total_flos": 4.209827274605221e+19, |
|
"train_loss": 0.4894287838800317, |
|
"train_runtime": 22523.7187, |
|
"train_samples_per_second": 10.974, |
|
"train_steps_per_second": 0.172 |
|
} |
|
], |
|
"max_steps": 3870, |
|
"num_train_epochs": 30, |
|
"total_flos": 4.209827274605221e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|