|
{ |
|
"best_metric": 0.5565288662910461, |
|
"best_model_checkpoint": "/scratch/skscla001/results/mms-1b-all-bem-natbed-combined/checkpoint-3800", |
|
"epoch": 5.506883604505632, |
|
"eval_steps": 100, |
|
"global_step": 4400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1251564455569462, |
|
"grad_norm": 4.582801342010498, |
|
"learning_rate": 0.00029099999999999997, |
|
"loss": 8.2239, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1251564455569462, |
|
"eval_loss": 1.3301187753677368, |
|
"eval_runtime": 95.4493, |
|
"eval_samples_per_second": 14.227, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.8959954139710098, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2503128911138924, |
|
"grad_norm": 2.8414461612701416, |
|
"learning_rate": 0.000298780896522832, |
|
"loss": 0.8992, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2503128911138924, |
|
"eval_loss": 0.7774138450622559, |
|
"eval_runtime": 95.7815, |
|
"eval_samples_per_second": 14.178, |
|
"eval_steps_per_second": 1.775, |
|
"eval_wer": 0.6657112439603636, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37546933667083854, |
|
"grad_norm": 1.6420665979385376, |
|
"learning_rate": 0.00029752408881441136, |
|
"loss": 0.8263, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37546933667083854, |
|
"eval_loss": 0.7549673914909363, |
|
"eval_runtime": 95.0731, |
|
"eval_samples_per_second": 14.284, |
|
"eval_steps_per_second": 1.788, |
|
"eval_wer": 0.5891409384980755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5006257822277848, |
|
"grad_norm": 1.838030219078064, |
|
"learning_rate": 0.000296279849183075, |
|
"loss": 0.7846, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5006257822277848, |
|
"eval_loss": 0.6995427012443542, |
|
"eval_runtime": 94.8499, |
|
"eval_samples_per_second": 14.317, |
|
"eval_steps_per_second": 1.792, |
|
"eval_wer": 0.5665383670461059, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6257822277847309, |
|
"grad_norm": 3.311655282974243, |
|
"learning_rate": 0.00029502304147465433, |
|
"loss": 0.9046, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6257822277847309, |
|
"eval_loss": 0.6796234846115112, |
|
"eval_runtime": 96.1056, |
|
"eval_samples_per_second": 14.13, |
|
"eval_steps_per_second": 1.769, |
|
"eval_wer": 0.5545819343215134, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7509386733416771, |
|
"grad_norm": 1.0524609088897705, |
|
"learning_rate": 0.00029376623376623374, |
|
"loss": 0.7688, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7509386733416771, |
|
"eval_loss": 0.6881176829338074, |
|
"eval_runtime": 95.4601, |
|
"eval_samples_per_second": 14.226, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.5436082220948325, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8760951188986232, |
|
"grad_norm": 1.534467339515686, |
|
"learning_rate": 0.00029250942605781315, |
|
"loss": 0.7139, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8760951188986232, |
|
"eval_loss": 0.6998778581619263, |
|
"eval_runtime": 96.7901, |
|
"eval_samples_per_second": 14.03, |
|
"eval_steps_per_second": 1.756, |
|
"eval_wer": 0.5557284415690771, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0012515644555695, |
|
"grad_norm": 4.051555633544922, |
|
"learning_rate": 0.0002912526183493925, |
|
"loss": 0.7922, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0012515644555695, |
|
"eval_loss": 0.6810752153396606, |
|
"eval_runtime": 96.2586, |
|
"eval_samples_per_second": 14.108, |
|
"eval_steps_per_second": 1.766, |
|
"eval_wer": 0.542134141347965, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1264080100125156, |
|
"grad_norm": 3.281071186065674, |
|
"learning_rate": 0.0002899958106409719, |
|
"loss": 0.7929, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1264080100125156, |
|
"eval_loss": 0.6760120391845703, |
|
"eval_runtime": 97.2734, |
|
"eval_samples_per_second": 13.961, |
|
"eval_steps_per_second": 1.748, |
|
"eval_wer": 0.5433625419703546, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2515644555694618, |
|
"grad_norm": 6.815141201019287, |
|
"learning_rate": 0.0002887515710096355, |
|
"loss": 0.7508, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2515644555694618, |
|
"eval_loss": 0.6555261015892029, |
|
"eval_runtime": 96.3987, |
|
"eval_samples_per_second": 14.087, |
|
"eval_steps_per_second": 1.764, |
|
"eval_wer": 0.5660470067971501, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3767209011264079, |
|
"grad_norm": 2.6557281017303467, |
|
"learning_rate": 0.0002874947633012149, |
|
"loss": 0.7534, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3767209011264079, |
|
"eval_loss": 0.6411116719245911, |
|
"eval_runtime": 96.0443, |
|
"eval_samples_per_second": 14.139, |
|
"eval_steps_per_second": 1.77, |
|
"eval_wer": 0.5372205388584064, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5018773466833542, |
|
"grad_norm": 19.327106475830078, |
|
"learning_rate": 0.00028623795559279424, |
|
"loss": 0.7316, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5018773466833542, |
|
"eval_loss": 0.6420451998710632, |
|
"eval_runtime": 97.2234, |
|
"eval_samples_per_second": 13.968, |
|
"eval_steps_per_second": 1.749, |
|
"eval_wer": 0.5319793628695438, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6270337922403004, |
|
"grad_norm": 4.125315189361572, |
|
"learning_rate": 0.00028498114788437365, |
|
"loss": 0.7147, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6270337922403004, |
|
"eval_loss": 0.6725718975067139, |
|
"eval_runtime": 97.3227, |
|
"eval_samples_per_second": 13.954, |
|
"eval_steps_per_second": 1.747, |
|
"eval_wer": 0.5293587748751126, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7521902377972465, |
|
"grad_norm": 10.938685417175293, |
|
"learning_rate": 0.00028372434017595306, |
|
"loss": 0.6734, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7521902377972465, |
|
"eval_loss": 0.6308336853981018, |
|
"eval_runtime": 96.6381, |
|
"eval_samples_per_second": 14.052, |
|
"eval_steps_per_second": 1.759, |
|
"eval_wer": 0.5252641061338138, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8773466833541927, |
|
"grad_norm": 7.951139450073242, |
|
"learning_rate": 0.00028246753246753247, |
|
"loss": 0.7084, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8773466833541927, |
|
"eval_loss": 0.6205306649208069, |
|
"eval_runtime": 97.9744, |
|
"eval_samples_per_second": 13.861, |
|
"eval_steps_per_second": 1.735, |
|
"eval_wer": 0.5438539022193104, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.002503128911139, |
|
"grad_norm": 2.1400046348571777, |
|
"learning_rate": 0.00028121072475911183, |
|
"loss": 0.6714, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.002503128911139, |
|
"eval_loss": 0.6118720769882202, |
|
"eval_runtime": 97.507, |
|
"eval_samples_per_second": 13.927, |
|
"eval_steps_per_second": 1.743, |
|
"eval_wer": 0.5231348783883384, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 8.502281188964844, |
|
"learning_rate": 0.00027995391705069124, |
|
"loss": 0.6888, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_loss": 0.6349774599075317, |
|
"eval_runtime": 96.997, |
|
"eval_samples_per_second": 14.0, |
|
"eval_steps_per_second": 1.753, |
|
"eval_wer": 0.5166653017770862, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.252816020025031, |
|
"grad_norm": 1.059962272644043, |
|
"learning_rate": 0.0002786971093422706, |
|
"loss": 0.6871, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.252816020025031, |
|
"eval_loss": 0.6183043718338013, |
|
"eval_runtime": 97.927, |
|
"eval_samples_per_second": 13.867, |
|
"eval_steps_per_second": 1.736, |
|
"eval_wer": 0.5118335926623536, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.3779724655819776, |
|
"grad_norm": 1.3032373189926147, |
|
"learning_rate": 0.00027744030163385, |
|
"loss": 0.6882, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.3779724655819776, |
|
"eval_loss": 0.5973983407020569, |
|
"eval_runtime": 97.1298, |
|
"eval_samples_per_second": 13.981, |
|
"eval_steps_per_second": 1.75, |
|
"eval_wer": 0.5332896568667594, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.5031289111389237, |
|
"grad_norm": 1.9106544256210327, |
|
"learning_rate": 0.00027618349392542936, |
|
"loss": 0.6769, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.5031289111389237, |
|
"eval_loss": 0.5994674563407898, |
|
"eval_runtime": 96.7136, |
|
"eval_samples_per_second": 14.041, |
|
"eval_steps_per_second": 1.758, |
|
"eval_wer": 0.5300958152485464, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.6282853566958697, |
|
"grad_norm": 1.2871947288513184, |
|
"learning_rate": 0.00027492668621700877, |
|
"loss": 0.6801, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.6282853566958697, |
|
"eval_loss": 0.5880154371261597, |
|
"eval_runtime": 97.9989, |
|
"eval_samples_per_second": 13.857, |
|
"eval_steps_per_second": 1.735, |
|
"eval_wer": 0.5377937924821882, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.7534418022528158, |
|
"grad_norm": 0.619396448135376, |
|
"learning_rate": 0.0002736698785085881, |
|
"loss": 0.6695, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.7534418022528158, |
|
"eval_loss": 0.5972831845283508, |
|
"eval_runtime": 97.3411, |
|
"eval_samples_per_second": 13.951, |
|
"eval_steps_per_second": 1.746, |
|
"eval_wer": 0.5051183359266236, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.8785982478097623, |
|
"grad_norm": 1.0727863311767578, |
|
"learning_rate": 0.0002724130708001676, |
|
"loss": 0.6557, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.8785982478097623, |
|
"eval_loss": 0.6026735901832581, |
|
"eval_runtime": 97.4796, |
|
"eval_samples_per_second": 13.931, |
|
"eval_steps_per_second": 1.744, |
|
"eval_wer": 0.5056096961755794, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.0037546933667083, |
|
"grad_norm": 0.626075029373169, |
|
"learning_rate": 0.00027115626309174695, |
|
"loss": 0.6525, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.0037546933667083, |
|
"eval_loss": 0.59461510181427, |
|
"eval_runtime": 97.9487, |
|
"eval_samples_per_second": 13.864, |
|
"eval_steps_per_second": 1.736, |
|
"eval_wer": 0.4996314798132831, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.1289111389236544, |
|
"grad_norm": 4.777908802032471, |
|
"learning_rate": 0.00026989945538332636, |
|
"loss": 0.6829, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.1289111389236544, |
|
"eval_loss": 0.5881961584091187, |
|
"eval_runtime": 97.8698, |
|
"eval_samples_per_second": 13.876, |
|
"eval_steps_per_second": 1.737, |
|
"eval_wer": 0.49979526656293505, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.254067584480601, |
|
"grad_norm": 5.189509391784668, |
|
"learning_rate": 0.0002686426476749057, |
|
"loss": 0.6627, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.254067584480601, |
|
"eval_loss": 0.6009677052497864, |
|
"eval_runtime": 95.9556, |
|
"eval_samples_per_second": 14.152, |
|
"eval_steps_per_second": 1.772, |
|
"eval_wer": 0.49848497256571944, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.379224030037547, |
|
"grad_norm": 6.410580158233643, |
|
"learning_rate": 0.0002673858399664851, |
|
"loss": 0.6146, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.379224030037547, |
|
"eval_loss": 0.5770368576049805, |
|
"eval_runtime": 95.4343, |
|
"eval_samples_per_second": 14.23, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.5009417738104988, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.504380475594493, |
|
"grad_norm": 1.4566117525100708, |
|
"learning_rate": 0.0002661290322580645, |
|
"loss": 0.6205, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.504380475594493, |
|
"eval_loss": 0.5738538503646851, |
|
"eval_runtime": 95.9905, |
|
"eval_samples_per_second": 14.147, |
|
"eval_steps_per_second": 1.771, |
|
"eval_wer": 0.5020882810580624, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.6295369211514394, |
|
"grad_norm": 5.905794143676758, |
|
"learning_rate": 0.0002648722245496439, |
|
"loss": 0.7025, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.6295369211514394, |
|
"eval_loss": 0.5806447863578796, |
|
"eval_runtime": 96.9069, |
|
"eval_samples_per_second": 14.013, |
|
"eval_steps_per_second": 1.754, |
|
"eval_wer": 0.5223978380149046, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.7546933667083855, |
|
"grad_norm": 7.341220855712891, |
|
"learning_rate": 0.00026361541684122324, |
|
"loss": 0.6379, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.7546933667083855, |
|
"eval_loss": 0.6210225820541382, |
|
"eval_runtime": 96.7041, |
|
"eval_samples_per_second": 14.043, |
|
"eval_steps_per_second": 1.758, |
|
"eval_wer": 0.5064286299238392, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.8798498122653315, |
|
"grad_norm": 2.2515931129455566, |
|
"learning_rate": 0.00026235860913280265, |
|
"loss": 0.6104, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.8798498122653315, |
|
"eval_loss": 0.5702349543571472, |
|
"eval_runtime": 95.9116, |
|
"eval_samples_per_second": 14.159, |
|
"eval_steps_per_second": 1.772, |
|
"eval_wer": 0.5033985750552781, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.005006257822278, |
|
"grad_norm": 0.5629591941833496, |
|
"learning_rate": 0.00026110180142438206, |
|
"loss": 0.6607, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.005006257822278, |
|
"eval_loss": 0.5755676627159119, |
|
"eval_runtime": 96.4548, |
|
"eval_samples_per_second": 14.079, |
|
"eval_steps_per_second": 1.762, |
|
"eval_wer": 0.4891491278355581, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.130162703379224, |
|
"grad_norm": 6.9959259033203125, |
|
"learning_rate": 0.0002598449937159615, |
|
"loss": 0.6776, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.130162703379224, |
|
"eval_loss": 0.5678644180297852, |
|
"eval_runtime": 95.8409, |
|
"eval_samples_per_second": 14.169, |
|
"eval_steps_per_second": 1.774, |
|
"eval_wer": 0.4885758742117763, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 4.9695658683776855, |
|
"learning_rate": 0.00025860075408462504, |
|
"loss": 0.6343, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_loss": 0.5597887635231018, |
|
"eval_runtime": 95.5822, |
|
"eval_samples_per_second": 14.208, |
|
"eval_steps_per_second": 1.779, |
|
"eval_wer": 0.4898861682089919, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.380475594493117, |
|
"grad_norm": 1.5676358938217163, |
|
"learning_rate": 0.0002573439463762044, |
|
"loss": 0.5818, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.380475594493117, |
|
"eval_loss": 0.5806618928909302, |
|
"eval_runtime": 95.8037, |
|
"eval_samples_per_second": 14.175, |
|
"eval_steps_per_second": 1.774, |
|
"eval_wer": 0.49643763819507003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.505632040050062, |
|
"grad_norm": 1.323522925376892, |
|
"learning_rate": 0.0002560871386677838, |
|
"loss": 0.6085, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.505632040050062, |
|
"eval_loss": 0.5931637287139893, |
|
"eval_runtime": 95.2763, |
|
"eval_samples_per_second": 14.253, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.4915240357055114, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.630788485607009, |
|
"grad_norm": 1.9220880270004272, |
|
"learning_rate": 0.0002548303309593632, |
|
"loss": 0.6648, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.630788485607009, |
|
"eval_loss": 0.5579622983932495, |
|
"eval_runtime": 96.2129, |
|
"eval_samples_per_second": 14.115, |
|
"eval_steps_per_second": 1.767, |
|
"eval_wer": 0.486037179592171, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.755944931163955, |
|
"grad_norm": 0.6678237915039062, |
|
"learning_rate": 0.00025357352325094257, |
|
"loss": 0.6359, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.755944931163955, |
|
"eval_loss": 0.5565288662910461, |
|
"eval_runtime": 95.7617, |
|
"eval_samples_per_second": 14.181, |
|
"eval_steps_per_second": 1.775, |
|
"eval_wer": 0.4959462779461142, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.881101376720901, |
|
"grad_norm": 0.8339139819145203, |
|
"learning_rate": 0.000252316715542522, |
|
"loss": 0.6139, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.881101376720901, |
|
"eval_loss": 0.5604941248893738, |
|
"eval_runtime": 95.8364, |
|
"eval_samples_per_second": 14.17, |
|
"eval_steps_per_second": 1.774, |
|
"eval_wer": 0.4885758742117763, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.006257822277847, |
|
"grad_norm": 0.6905695199966431, |
|
"learning_rate": 0.00025105990783410133, |
|
"loss": 0.5995, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.006257822277847, |
|
"eval_loss": 0.5719751715660095, |
|
"eval_runtime": 95.3867, |
|
"eval_samples_per_second": 14.237, |
|
"eval_steps_per_second": 1.782, |
|
"eval_wer": 0.48030464335435263, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.131414267834794, |
|
"grad_norm": 1.2827470302581787, |
|
"learning_rate": 0.00024980310012568074, |
|
"loss": 0.6349, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.131414267834794, |
|
"eval_loss": 0.5505784749984741, |
|
"eval_runtime": 96.3073, |
|
"eval_samples_per_second": 14.101, |
|
"eval_steps_per_second": 1.765, |
|
"eval_wer": 0.5011874539349767, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.256570713391739, |
|
"grad_norm": 1.4041779041290283, |
|
"learning_rate": 0.00024854629241726015, |
|
"loss": 0.6134, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.256570713391739, |
|
"eval_loss": 0.5602907538414001, |
|
"eval_runtime": 96.7539, |
|
"eval_samples_per_second": 14.036, |
|
"eval_steps_per_second": 1.757, |
|
"eval_wer": 0.4784210957333552, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.381727158948686, |
|
"grad_norm": 16.846094131469727, |
|
"learning_rate": 0.0002472894847088395, |
|
"loss": 0.5989, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.381727158948686, |
|
"eval_loss": 0.5714141726493835, |
|
"eval_runtime": 96.2578, |
|
"eval_samples_per_second": 14.108, |
|
"eval_steps_per_second": 1.766, |
|
"eval_wer": 0.48439931209565146, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.506883604505632, |
|
"grad_norm": 1.1656825542449951, |
|
"learning_rate": 0.0002460326770004189, |
|
"loss": 0.6083, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.506883604505632, |
|
"eval_loss": 0.5697967410087585, |
|
"eval_runtime": 96.7222, |
|
"eval_samples_per_second": 14.04, |
|
"eval_steps_per_second": 1.758, |
|
"eval_wer": 0.4758005077389239, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.506883604505632, |
|
"step": 4400, |
|
"total_flos": 2.278783878958534e+19, |
|
"train_loss": 0.858475610559637, |
|
"train_runtime": 9525.4326, |
|
"train_samples_per_second": 20.116, |
|
"train_steps_per_second": 2.516 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 23970, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.278783878958534e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|