|
{ |
|
"best_metric": 55.5672268907563, |
|
"best_model_checkpoint": "./whisper-small-bn/checkpoint-3000", |
|
"epoch": 2.5, |
|
"eval_steps": 1000, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 27.802248001098633, |
|
"learning_rate": 3.2e-07, |
|
"loss": 2.2811, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 18.253421783447266, |
|
"learning_rate": 7.2e-07, |
|
"loss": 2.1964, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 13.585515975952148, |
|
"learning_rate": 1.12e-06, |
|
"loss": 1.8635, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 22.400362014770508, |
|
"learning_rate": 1.52e-06, |
|
"loss": 1.703, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 13.77535343170166, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 1.5336, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 14.989215850830078, |
|
"learning_rate": 2.3200000000000002e-06, |
|
"loss": 1.4044, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 22.01858139038086, |
|
"learning_rate": 2.7200000000000002e-06, |
|
"loss": 1.2402, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 16.482311248779297, |
|
"learning_rate": 3.12e-06, |
|
"loss": 1.0922, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 17.529874801635742, |
|
"learning_rate": 3.52e-06, |
|
"loss": 0.9583, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 18.252761840820312, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.8783, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 16.48067283630371, |
|
"learning_rate": 4.32e-06, |
|
"loss": 0.7643, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 15.062904357910156, |
|
"learning_rate": 4.7200000000000005e-06, |
|
"loss": 0.6991, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 14.231403350830078, |
|
"learning_rate": 5.12e-06, |
|
"loss": 0.6594, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.425230026245117, |
|
"learning_rate": 5.5200000000000005e-06, |
|
"loss": 0.5724, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 11.786844253540039, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.5687, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 15.346015930175781, |
|
"learning_rate": 6.3200000000000005e-06, |
|
"loss": 0.4696, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 7.936849594116211, |
|
"learning_rate": 6.720000000000001e-06, |
|
"loss": 0.4569, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 13.894779205322266, |
|
"learning_rate": 7.1200000000000004e-06, |
|
"loss": 0.4089, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 11.665081977844238, |
|
"learning_rate": 7.520000000000001e-06, |
|
"loss": 0.4168, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 16.7696475982666, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.4433, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 15.60254192352295, |
|
"learning_rate": 8.32e-06, |
|
"loss": 0.4013, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 13.72472858428955, |
|
"learning_rate": 8.720000000000001e-06, |
|
"loss": 0.3903, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 10.506444931030273, |
|
"learning_rate": 9.12e-06, |
|
"loss": 0.3581, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 13.643900871276855, |
|
"learning_rate": 9.52e-06, |
|
"loss": 0.3605, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 11.669689178466797, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.3548, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 9.907474517822266, |
|
"learning_rate": 9.936000000000001e-06, |
|
"loss": 0.3535, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 11.366904258728027, |
|
"learning_rate": 9.856000000000002e-06, |
|
"loss": 0.2943, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 7.5114240646362305, |
|
"learning_rate": 9.776000000000001e-06, |
|
"loss": 0.3057, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 11.717443466186523, |
|
"learning_rate": 9.696000000000002e-06, |
|
"loss": 0.2919, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 9.275129318237305, |
|
"learning_rate": 9.616e-06, |
|
"loss": 0.3115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 9.388382911682129, |
|
"learning_rate": 9.536000000000002e-06, |
|
"loss": 0.1926, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 8.574433326721191, |
|
"learning_rate": 9.456e-06, |
|
"loss": 0.2193, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 5.512338638305664, |
|
"learning_rate": 9.376000000000001e-06, |
|
"loss": 0.18, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 9.644317626953125, |
|
"learning_rate": 9.296e-06, |
|
"loss": 0.1916, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 9.321474075317383, |
|
"learning_rate": 9.216000000000001e-06, |
|
"loss": 0.2055, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 4.868476390838623, |
|
"learning_rate": 9.136e-06, |
|
"loss": 0.1843, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 6.592445373535156, |
|
"learning_rate": 9.056000000000001e-06, |
|
"loss": 0.1713, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 8.138116836547852, |
|
"learning_rate": 8.976e-06, |
|
"loss": 0.1792, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 9.782510757446289, |
|
"learning_rate": 8.896000000000001e-06, |
|
"loss": 0.1796, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 5.867671012878418, |
|
"learning_rate": 8.816000000000002e-06, |
|
"loss": 0.1652, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 6.637588977813721, |
|
"learning_rate": 8.736e-06, |
|
"loss": 0.1735, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 11.652266502380371, |
|
"learning_rate": 8.656000000000001e-06, |
|
"loss": 0.1734, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 6.051203727722168, |
|
"learning_rate": 8.576e-06, |
|
"loss": 0.1557, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 11.018319129943848, |
|
"learning_rate": 8.496000000000001e-06, |
|
"loss": 0.1668, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.878730297088623, |
|
"learning_rate": 8.416e-06, |
|
"loss": 0.1706, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 6.223515510559082, |
|
"learning_rate": 8.336000000000001e-06, |
|
"loss": 0.0917, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 7.449841022491455, |
|
"learning_rate": 8.256e-06, |
|
"loss": 0.0853, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 3.824934244155884, |
|
"learning_rate": 8.176000000000001e-06, |
|
"loss": 0.1033, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 6.369926929473877, |
|
"learning_rate": 8.096e-06, |
|
"loss": 0.0997, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 6.938141822814941, |
|
"learning_rate": 8.016e-06, |
|
"loss": 0.095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.27550819516181946, |
|
"eval_runtime": 345.1723, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"eval_wer": 60.18099547511312, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 7.869150638580322, |
|
"learning_rate": 7.936e-06, |
|
"loss": 0.1443, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 3.6447396278381348, |
|
"learning_rate": 7.860000000000001e-06, |
|
"loss": 0.1974, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 9.588157653808594, |
|
"learning_rate": 7.78e-06, |
|
"loss": 0.1427, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 10.796669006347656, |
|
"learning_rate": 7.7e-06, |
|
"loss": 0.187, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 4.941339015960693, |
|
"learning_rate": 7.620000000000001e-06, |
|
"loss": 0.1568, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 11.114888191223145, |
|
"learning_rate": 7.540000000000001e-06, |
|
"loss": 0.1697, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 10.200749397277832, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.1675, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 5.67927360534668, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 0.1557, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 7.413832664489746, |
|
"learning_rate": 7.304000000000001e-06, |
|
"loss": 0.1629, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 6.933262348175049, |
|
"learning_rate": 7.224000000000001e-06, |
|
"loss": 0.17, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 4.847285747528076, |
|
"learning_rate": 7.1440000000000005e-06, |
|
"loss": 0.1389, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 19.55495834350586, |
|
"learning_rate": 7.0640000000000005e-06, |
|
"loss": 0.1338, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 7.148222923278809, |
|
"learning_rate": 6.984e-06, |
|
"loss": 0.1296, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 6.323164939880371, |
|
"learning_rate": 6.904e-06, |
|
"loss": 0.163, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 9.096046447753906, |
|
"learning_rate": 6.824e-06, |
|
"loss": 0.1942, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 8.462915420532227, |
|
"learning_rate": 6.744e-06, |
|
"loss": 0.1576, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 7.848359107971191, |
|
"learning_rate": 6.664e-06, |
|
"loss": 0.1219, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 6.30383825302124, |
|
"learning_rate": 6.584e-06, |
|
"loss": 0.1404, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 5.657530784606934, |
|
"learning_rate": 6.504e-06, |
|
"loss": 0.1449, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.4172234535217285, |
|
"learning_rate": 6.424e-06, |
|
"loss": 0.1234, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 6.550841331481934, |
|
"learning_rate": 6.344e-06, |
|
"loss": 0.0885, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 6.212351322174072, |
|
"learning_rate": 6.264e-06, |
|
"loss": 0.0725, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 5.2562575340271, |
|
"learning_rate": 6.184e-06, |
|
"loss": 0.0886, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 7.921740531921387, |
|
"learning_rate": 6.104000000000001e-06, |
|
"loss": 0.0805, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 3.5878069400787354, |
|
"learning_rate": 6.024000000000001e-06, |
|
"loss": 0.0628, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 8.585208892822266, |
|
"learning_rate": 5.944000000000001e-06, |
|
"loss": 0.0812, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 4.111368656158447, |
|
"learning_rate": 5.868e-06, |
|
"loss": 0.0694, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 5.304944038391113, |
|
"learning_rate": 5.788e-06, |
|
"loss": 0.0739, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 6.168578147888184, |
|
"learning_rate": 5.708e-06, |
|
"loss": 0.0821, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 12.2472505569458, |
|
"learning_rate": 5.628e-06, |
|
"loss": 0.0885, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 9.349952697753906, |
|
"learning_rate": 5.548e-06, |
|
"loss": 0.0607, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 5.98253870010376, |
|
"learning_rate": 5.468e-06, |
|
"loss": 0.0619, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 7.106723785400391, |
|
"learning_rate": 5.388e-06, |
|
"loss": 0.0992, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 3.693020820617676, |
|
"learning_rate": 5.308000000000001e-06, |
|
"loss": 0.0855, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 7.3649678230285645, |
|
"learning_rate": 5.228000000000001e-06, |
|
"loss": 0.081, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 5.4454240798950195, |
|
"learning_rate": 5.1480000000000005e-06, |
|
"loss": 0.0872, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 8.854460716247559, |
|
"learning_rate": 5.0680000000000004e-06, |
|
"loss": 0.0869, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 3.8615269660949707, |
|
"learning_rate": 4.988e-06, |
|
"loss": 0.0821, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 3.7190287113189697, |
|
"learning_rate": 4.908e-06, |
|
"loss": 0.0636, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 7.683879375457764, |
|
"learning_rate": 4.828e-06, |
|
"loss": 0.0892, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 3.645141839981079, |
|
"learning_rate": 4.748e-06, |
|
"loss": 0.0803, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 3.8167316913604736, |
|
"learning_rate": 4.668e-06, |
|
"loss": 0.1012, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"grad_norm": 7.247605323791504, |
|
"learning_rate": 4.588e-06, |
|
"loss": 0.0728, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 3.5495615005493164, |
|
"learning_rate": 4.508e-06, |
|
"loss": 0.0439, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 4.105050086975098, |
|
"learning_rate": 4.428000000000001e-06, |
|
"loss": 0.0357, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 9.399913787841797, |
|
"learning_rate": 4.3480000000000006e-06, |
|
"loss": 0.0363, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 3.0376510620117188, |
|
"learning_rate": 4.2680000000000005e-06, |
|
"loss": 0.0221, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.6908012628555298, |
|
"learning_rate": 4.188e-06, |
|
"loss": 0.0323, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 6.255786895751953, |
|
"learning_rate": 4.108e-06, |
|
"loss": 0.0327, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 7.1336669921875, |
|
"learning_rate": 4.028e-06, |
|
"loss": 0.0416, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 0.27948442101478577, |
|
"eval_runtime": 349.9899, |
|
"eval_samples_per_second": 0.286, |
|
"eval_steps_per_second": 0.286, |
|
"eval_wer": 59.0146750524109, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 13.214738845825195, |
|
"learning_rate": 7.250909090909092e-06, |
|
"loss": 0.2839, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 8.846616744995117, |
|
"learning_rate": 7.214545454545455e-06, |
|
"loss": 0.2221, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 11.778766632080078, |
|
"learning_rate": 7.178181818181818e-06, |
|
"loss": 0.2394, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 4.484768867492676, |
|
"learning_rate": 7.141818181818182e-06, |
|
"loss": 0.2299, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 4.385435104370117, |
|
"learning_rate": 7.105454545454546e-06, |
|
"loss": 0.2221, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 8.751326560974121, |
|
"learning_rate": 7.06909090909091e-06, |
|
"loss": 0.1966, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 8.837333679199219, |
|
"learning_rate": 7.032727272727273e-06, |
|
"loss": 0.1893, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 9.331002235412598, |
|
"learning_rate": 6.998181818181818e-06, |
|
"loss": 0.2365, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 6.599325180053711, |
|
"learning_rate": 6.961818181818183e-06, |
|
"loss": 0.2152, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 11.716184616088867, |
|
"learning_rate": 6.9254545454545464e-06, |
|
"loss": 0.203, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 6.506683349609375, |
|
"learning_rate": 6.88909090909091e-06, |
|
"loss": 0.2162, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 4.64941930770874, |
|
"learning_rate": 6.852727272727273e-06, |
|
"loss": 0.214, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 8.344452857971191, |
|
"learning_rate": 6.816363636363637e-06, |
|
"loss": 0.2106, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 5.504006385803223, |
|
"learning_rate": 6.780000000000001e-06, |
|
"loss": 0.196, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 9.84432601928711, |
|
"learning_rate": 6.743636363636365e-06, |
|
"loss": 0.2207, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 7.112894535064697, |
|
"learning_rate": 6.707272727272728e-06, |
|
"loss": 0.2072, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 7.10872745513916, |
|
"learning_rate": 6.670909090909091e-06, |
|
"loss": 0.1925, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 8.612652778625488, |
|
"learning_rate": 6.634545454545455e-06, |
|
"loss": 0.2023, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 6.083488941192627, |
|
"learning_rate": 6.5981818181818195e-06, |
|
"loss": 0.2111, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 7.59145975112915, |
|
"learning_rate": 6.561818181818182e-06, |
|
"loss": 0.1934, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 6.529219627380371, |
|
"learning_rate": 6.525454545454546e-06, |
|
"loss": 0.1756, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 13.685791015625, |
|
"learning_rate": 6.48909090909091e-06, |
|
"loss": 0.1872, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 6.508156776428223, |
|
"learning_rate": 6.4527272727272725e-06, |
|
"loss": 0.1424, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 4.847611427307129, |
|
"learning_rate": 6.416363636363637e-06, |
|
"loss": 0.1588, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 5.715017795562744, |
|
"learning_rate": 6.380000000000001e-06, |
|
"loss": 0.1896, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 10.562045097351074, |
|
"learning_rate": 6.3436363636363644e-06, |
|
"loss": 0.2007, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 3.2870736122131348, |
|
"learning_rate": 6.307272727272727e-06, |
|
"loss": 0.179, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 11.021241188049316, |
|
"learning_rate": 6.270909090909092e-06, |
|
"loss": 0.1573, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 7.566544055938721, |
|
"learning_rate": 6.2345454545454555e-06, |
|
"loss": 0.1832, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 5.980641841888428, |
|
"learning_rate": 6.198181818181819e-06, |
|
"loss": 0.1498, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 5.697175025939941, |
|
"learning_rate": 6.161818181818182e-06, |
|
"loss": 0.1378, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 4.451406002044678, |
|
"learning_rate": 6.125454545454546e-06, |
|
"loss": 0.152, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 5.854424476623535, |
|
"learning_rate": 6.08909090909091e-06, |
|
"loss": 0.1492, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 6.480546951293945, |
|
"learning_rate": 6.052727272727274e-06, |
|
"loss": 0.1501, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 3.3347442150115967, |
|
"learning_rate": 6.016363636363637e-06, |
|
"loss": 0.132, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 5.9716081619262695, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.1483, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 5.842201232910156, |
|
"learning_rate": 5.943636363636364e-06, |
|
"loss": 0.1688, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 5.079952716827393, |
|
"learning_rate": 5.9072727272727285e-06, |
|
"loss": 0.167, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 9.07481575012207, |
|
"learning_rate": 5.870909090909091e-06, |
|
"loss": 0.1581, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 9.30324935913086, |
|
"learning_rate": 5.834545454545455e-06, |
|
"loss": 0.1704, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 4.848841667175293, |
|
"learning_rate": 5.798181818181819e-06, |
|
"loss": 0.1894, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 7.52855110168457, |
|
"learning_rate": 5.7618181818181816e-06, |
|
"loss": 0.1702, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 6.624313831329346, |
|
"learning_rate": 5.725454545454546e-06, |
|
"loss": 0.1628, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 6.889793872833252, |
|
"learning_rate": 5.68909090909091e-06, |
|
"loss": 0.1705, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 5.635292053222656, |
|
"learning_rate": 5.6527272727272734e-06, |
|
"loss": 0.1531, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 4.1545729637146, |
|
"learning_rate": 5.616363636363636e-06, |
|
"loss": 0.1371, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 7.1985697746276855, |
|
"learning_rate": 5.580000000000001e-06, |
|
"loss": 0.1571, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 7.112518310546875, |
|
"learning_rate": 5.5436363636363645e-06, |
|
"loss": 0.1525, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 9.503728866577148, |
|
"learning_rate": 5.507272727272728e-06, |
|
"loss": 0.1426, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 5.497040271759033, |
|
"learning_rate": 5.470909090909091e-06, |
|
"loss": 0.1501, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.2098405510187149, |
|
"eval_runtime": 325.0771, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.308, |
|
"eval_wer": 55.5672268907563, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"total_flos": 2.59669143207936e+18, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|