{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.537318712415989, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017686593562079942, "grad_norm": 32.10449981689453, "learning_rate": 4.6000000000000004e-07, "loss": 2.6364, "step": 25 }, { "epoch": 0.035373187124159884, "grad_norm": 20.653064727783203, "learning_rate": 9.600000000000001e-07, "loss": 1.5119, "step": 50 }, { "epoch": 0.05305978068623983, "grad_norm": 20.740116119384766, "learning_rate": 1.46e-06, "loss": 0.984, "step": 75 }, { "epoch": 0.07074637424831977, "grad_norm": 18.87639045715332, "learning_rate": 1.9600000000000003e-06, "loss": 0.9579, "step": 100 }, { "epoch": 0.08843296781039972, "grad_norm": 18.41526985168457, "learning_rate": 2.46e-06, "loss": 0.8896, "step": 125 }, { "epoch": 0.10611956137247966, "grad_norm": 17.025005340576172, "learning_rate": 2.96e-06, "loss": 0.8909, "step": 150 }, { "epoch": 0.1238061549345596, "grad_norm": 17.609508514404297, "learning_rate": 3.46e-06, "loss": 0.8571, "step": 175 }, { "epoch": 0.14149274849663954, "grad_norm": 18.9322452545166, "learning_rate": 3.96e-06, "loss": 0.906, "step": 200 }, { "epoch": 0.1591793420587195, "grad_norm": 20.501718521118164, "learning_rate": 4.4600000000000005e-06, "loss": 0.8095, "step": 225 }, { "epoch": 0.17686593562079944, "grad_norm": 19.373750686645508, "learning_rate": 4.960000000000001e-06, "loss": 0.8398, "step": 250 }, { "epoch": 0.19455252918287938, "grad_norm": 16.65863037109375, "learning_rate": 5.460000000000001e-06, "loss": 0.8129, "step": 275 }, { "epoch": 0.21223912274495932, "grad_norm": 20.23094940185547, "learning_rate": 5.9600000000000005e-06, "loss": 0.8192, "step": 300 }, { "epoch": 0.22992571630703926, "grad_norm": 19.06565284729004, "learning_rate": 6.460000000000001e-06, "loss": 0.849, "step": 325 }, { "epoch": 0.2476123098691192, "grad_norm": 21.71100425720215, "learning_rate": 6.96e-06, "loss": 0.7948, "step": 350 }, { "epoch": 0.26529890343119916, "grad_norm": 17.772262573242188, "learning_rate": 7.4600000000000006e-06, "loss": 0.7934, "step": 375 }, { "epoch": 0.2829854969932791, "grad_norm": 17.064373016357422, "learning_rate": 7.960000000000002e-06, "loss": 0.7988, "step": 400 }, { "epoch": 0.30067209055535904, "grad_norm": 16.245346069335938, "learning_rate": 8.46e-06, "loss": 0.8256, "step": 425 }, { "epoch": 0.318358684117439, "grad_norm": 15.647849082946777, "learning_rate": 8.96e-06, "loss": 0.7855, "step": 450 }, { "epoch": 0.3360452776795189, "grad_norm": 19.271818161010742, "learning_rate": 9.460000000000001e-06, "loss": 0.7986, "step": 475 }, { "epoch": 0.3537318712415989, "grad_norm": 15.884185791015625, "learning_rate": 9.960000000000001e-06, "loss": 0.8113, "step": 500 }, { "epoch": 0.3714184648036788, "grad_norm": 14.456477165222168, "learning_rate": 9.94888888888889e-06, "loss": 0.7269, "step": 525 }, { "epoch": 0.38910505836575876, "grad_norm": 15.575183868408203, "learning_rate": 9.893333333333334e-06, "loss": 0.8071, "step": 550 }, { "epoch": 0.40679165192783867, "grad_norm": 17.374601364135742, "learning_rate": 9.837777777777778e-06, "loss": 0.7626, "step": 575 }, { "epoch": 0.42447824548991864, "grad_norm": 16.48969268798828, "learning_rate": 9.782222222222222e-06, "loss": 0.7429, "step": 600 }, { "epoch": 0.4421648390519986, "grad_norm": 16.197696685791016, "learning_rate": 9.72888888888889e-06, "loss": 0.7456, "step": 625 }, { "epoch": 0.4598514326140785, "grad_norm": 15.233689308166504, "learning_rate": 9.673333333333334e-06, "loss": 0.7541, "step": 650 }, { "epoch": 0.4775380261761585, "grad_norm": 14.872835159301758, "learning_rate": 9.617777777777778e-06, "loss": 0.7168, "step": 675 }, { "epoch": 0.4952246197382384, "grad_norm": 16.02081298828125, "learning_rate": 9.562222222222223e-06, "loss": 0.7081, "step": 700 }, { "epoch": 0.5129112133003184, "grad_norm": 15.401288032531738, "learning_rate": 9.506666666666667e-06, "loss": 0.7298, "step": 725 }, { "epoch": 0.5305978068623983, "grad_norm": 15.33830738067627, "learning_rate": 9.451111111111112e-06, "loss": 0.6965, "step": 750 }, { "epoch": 0.5482844004244782, "grad_norm": 11.412097930908203, "learning_rate": 9.395555555555556e-06, "loss": 0.7183, "step": 775 }, { "epoch": 0.5659709939865581, "grad_norm": 13.304567337036133, "learning_rate": 9.340000000000002e-06, "loss": 0.6847, "step": 800 }, { "epoch": 0.5836575875486382, "grad_norm": 11.349915504455566, "learning_rate": 9.284444444444444e-06, "loss": 0.6513, "step": 825 }, { "epoch": 0.6013441811107181, "grad_norm": 14.461771011352539, "learning_rate": 9.22888888888889e-06, "loss": 0.6613, "step": 850 }, { "epoch": 0.619030774672798, "grad_norm": 24.84481430053711, "learning_rate": 9.173333333333334e-06, "loss": 0.6935, "step": 875 }, { "epoch": 0.636717368234878, "grad_norm": 14.775162696838379, "learning_rate": 9.117777777777778e-06, "loss": 0.6126, "step": 900 }, { "epoch": 0.6544039617969579, "grad_norm": 13.53603744506836, "learning_rate": 9.062222222222224e-06, "loss": 0.6542, "step": 925 }, { "epoch": 0.6720905553590378, "grad_norm": 12.807902336120605, "learning_rate": 9.006666666666666e-06, "loss": 0.6512, "step": 950 }, { "epoch": 0.6897771489211177, "grad_norm": 13.615946769714355, "learning_rate": 8.951111111111112e-06, "loss": 0.6428, "step": 975 }, { "epoch": 0.7074637424831978, "grad_norm": 16.28773307800293, "learning_rate": 8.895555555555556e-06, "loss": 0.6485, "step": 1000 }, { "epoch": 0.7074637424831978, "eval_loss": 0.3581026792526245, "eval_runtime": 4019.5457, "eval_samples_per_second": 2.678, "eval_steps_per_second": 0.167, "eval_wer": 0.22099663615739654, "step": 1000 }, { "epoch": 0.7251503360452777, "grad_norm": 12.776633262634277, "learning_rate": 8.84e-06, "loss": 0.6253, "step": 1025 }, { "epoch": 0.7428369296073576, "grad_norm": 13.185070991516113, "learning_rate": 8.784444444444446e-06, "loss": 0.599, "step": 1050 }, { "epoch": 0.7605235231694376, "grad_norm": 11.916515350341797, "learning_rate": 8.72888888888889e-06, "loss": 0.6254, "step": 1075 }, { "epoch": 0.7782101167315175, "grad_norm": 15.015724182128906, "learning_rate": 8.673333333333334e-06, "loss": 0.6457, "step": 1100 }, { "epoch": 0.7958967102935974, "grad_norm": 13.483101844787598, "learning_rate": 8.617777777777778e-06, "loss": 0.6214, "step": 1125 }, { "epoch": 0.8135833038556773, "grad_norm": 10.465691566467285, "learning_rate": 8.562222222222224e-06, "loss": 0.5961, "step": 1150 }, { "epoch": 0.8312698974177574, "grad_norm": 13.515427589416504, "learning_rate": 8.506666666666668e-06, "loss": 0.6166, "step": 1175 }, { "epoch": 0.8489564909798373, "grad_norm": 11.983545303344727, "learning_rate": 8.451111111111112e-06, "loss": 0.6157, "step": 1200 }, { "epoch": 0.8666430845419172, "grad_norm": 12.49000072479248, "learning_rate": 8.395555555555557e-06, "loss": 0.5511, "step": 1225 }, { "epoch": 0.8843296781039972, "grad_norm": 14.379878997802734, "learning_rate": 8.34e-06, "loss": 0.6323, "step": 1250 }, { "epoch": 0.9020162716660771, "grad_norm": 13.675074577331543, "learning_rate": 8.284444444444446e-06, "loss": 0.5503, "step": 1275 }, { "epoch": 0.919702865228157, "grad_norm": 14.126933097839355, "learning_rate": 8.22888888888889e-06, "loss": 0.5654, "step": 1300 }, { "epoch": 0.937389458790237, "grad_norm": 12.32470417022705, "learning_rate": 8.173333333333334e-06, "loss": 0.5526, "step": 1325 }, { "epoch": 0.955076052352317, "grad_norm": 15.61474323272705, "learning_rate": 8.11777777777778e-06, "loss": 0.5485, "step": 1350 }, { "epoch": 0.9727626459143969, "grad_norm": 11.217473983764648, "learning_rate": 8.062222222222222e-06, "loss": 0.6228, "step": 1375 }, { "epoch": 0.9904492394764768, "grad_norm": 13.000173568725586, "learning_rate": 8.006666666666667e-06, "loss": 0.5661, "step": 1400 }, { "epoch": 1.0081358330385568, "grad_norm": 10.341316223144531, "learning_rate": 7.951111111111111e-06, "loss": 0.474, "step": 1425 }, { "epoch": 1.0258224266006368, "grad_norm": 9.769425392150879, "learning_rate": 7.895555555555557e-06, "loss": 0.418, "step": 1450 }, { "epoch": 1.0435090201627166, "grad_norm": 10.41649341583252, "learning_rate": 7.840000000000001e-06, "loss": 0.3768, "step": 1475 }, { "epoch": 1.0611956137247966, "grad_norm": 7.566088676452637, "learning_rate": 7.784444444444445e-06, "loss": 0.388, "step": 1500 }, { "epoch": 1.0788822072868764, "grad_norm": 11.578902244567871, "learning_rate": 7.72888888888889e-06, "loss": 0.3349, "step": 1525 }, { "epoch": 1.0965688008489565, "grad_norm": 10.13636302947998, "learning_rate": 7.673333333333333e-06, "loss": 0.3656, "step": 1550 }, { "epoch": 1.1142553944110365, "grad_norm": 10.895235061645508, "learning_rate": 7.617777777777778e-06, "loss": 0.3707, "step": 1575 }, { "epoch": 1.1319419879731163, "grad_norm": 10.668679237365723, "learning_rate": 7.562222222222223e-06, "loss": 0.3588, "step": 1600 }, { "epoch": 1.1496285815351963, "grad_norm": 10.62954044342041, "learning_rate": 7.506666666666668e-06, "loss": 0.3737, "step": 1625 }, { "epoch": 1.1673151750972763, "grad_norm": 10.885923385620117, "learning_rate": 7.451111111111111e-06, "loss": 0.3484, "step": 1650 }, { "epoch": 1.1850017686593561, "grad_norm": 12.320752143859863, "learning_rate": 7.395555555555556e-06, "loss": 0.3775, "step": 1675 }, { "epoch": 1.2026883622214362, "grad_norm": 10.216126441955566, "learning_rate": 7.340000000000001e-06, "loss": 0.3369, "step": 1700 }, { "epoch": 1.2203749557835162, "grad_norm": 12.078413009643555, "learning_rate": 7.284444444444445e-06, "loss": 0.3669, "step": 1725 }, { "epoch": 1.238061549345596, "grad_norm": 9.149145126342773, "learning_rate": 7.22888888888889e-06, "loss": 0.3453, "step": 1750 }, { "epoch": 1.255748142907676, "grad_norm": 16.312984466552734, "learning_rate": 7.173333333333335e-06, "loss": 0.3405, "step": 1775 }, { "epoch": 1.2734347364697558, "grad_norm": 11.405570983886719, "learning_rate": 7.117777777777778e-06, "loss": 0.3665, "step": 1800 }, { "epoch": 1.2911213300318358, "grad_norm": 11.892732620239258, "learning_rate": 7.062222222222223e-06, "loss": 0.3561, "step": 1825 }, { "epoch": 1.3088079235939158, "grad_norm": 8.842497825622559, "learning_rate": 7.006666666666667e-06, "loss": 0.3218, "step": 1850 }, { "epoch": 1.3264945171559956, "grad_norm": 14.237404823303223, "learning_rate": 6.951111111111112e-06, "loss": 0.3334, "step": 1875 }, { "epoch": 1.3441811107180757, "grad_norm": 11.255355834960938, "learning_rate": 6.8955555555555565e-06, "loss": 0.3293, "step": 1900 }, { "epoch": 1.3618677042801557, "grad_norm": 10.295186042785645, "learning_rate": 6.8400000000000014e-06, "loss": 0.3085, "step": 1925 }, { "epoch": 1.3795542978422355, "grad_norm": 8.852261543273926, "learning_rate": 6.784444444444445e-06, "loss": 0.3423, "step": 1950 }, { "epoch": 1.3972408914043155, "grad_norm": 8.795380592346191, "learning_rate": 6.7288888888888895e-06, "loss": 0.3436, "step": 1975 }, { "epoch": 1.4149274849663955, "grad_norm": 8.382344245910645, "learning_rate": 6.6733333333333335e-06, "loss": 0.3362, "step": 2000 }, { "epoch": 1.4149274849663955, "eval_loss": 0.309411883354187, "eval_runtime": 4009.7652, "eval_samples_per_second": 2.684, "eval_steps_per_second": 0.168, "eval_wer": 0.1831142924196198, "step": 2000 }, { "epoch": 1.4326140785284753, "grad_norm": 9.291923522949219, "learning_rate": 6.617777777777778e-06, "loss": 0.3433, "step": 2025 }, { "epoch": 1.4503006720905554, "grad_norm": 8.35601806640625, "learning_rate": 6.562222222222223e-06, "loss": 0.3344, "step": 2050 }, { "epoch": 1.4679872656526354, "grad_norm": 8.72737979888916, "learning_rate": 6.5066666666666665e-06, "loss": 0.3333, "step": 2075 }, { "epoch": 1.4856738592147152, "grad_norm": 8.06108283996582, "learning_rate": 6.451111111111111e-06, "loss": 0.3291, "step": 2100 }, { "epoch": 1.5033604527767952, "grad_norm": 9.253275871276855, "learning_rate": 6.395555555555556e-06, "loss": 0.3341, "step": 2125 }, { "epoch": 1.5210470463388752, "grad_norm": 10.817941665649414, "learning_rate": 6.34e-06, "loss": 0.3549, "step": 2150 }, { "epoch": 1.538733639900955, "grad_norm": 10.792004585266113, "learning_rate": 6.284444444444445e-06, "loss": 0.3353, "step": 2175 }, { "epoch": 1.556420233463035, "grad_norm": 7.487943649291992, "learning_rate": 6.22888888888889e-06, "loss": 0.326, "step": 2200 }, { "epoch": 1.574106827025115, "grad_norm": 9.3944673538208, "learning_rate": 6.173333333333333e-06, "loss": 0.3214, "step": 2225 }, { "epoch": 1.5917934205871949, "grad_norm": 10.089088439941406, "learning_rate": 6.117777777777778e-06, "loss": 0.3066, "step": 2250 }, { "epoch": 1.6094800141492749, "grad_norm": 9.607513427734375, "learning_rate": 6.062222222222223e-06, "loss": 0.3492, "step": 2275 }, { "epoch": 1.627166607711355, "grad_norm": 9.252910614013672, "learning_rate": 6.006666666666667e-06, "loss": 0.3137, "step": 2300 }, { "epoch": 1.6448532012734347, "grad_norm": 11.22940444946289, "learning_rate": 5.951111111111112e-06, "loss": 0.3408, "step": 2325 }, { "epoch": 1.6625397948355147, "grad_norm": 9.891559600830078, "learning_rate": 5.895555555555557e-06, "loss": 0.306, "step": 2350 }, { "epoch": 1.6802263883975948, "grad_norm": 8.754281044006348, "learning_rate": 5.84e-06, "loss": 0.3356, "step": 2375 }, { "epoch": 1.6979129819596745, "grad_norm": 10.18521499633789, "learning_rate": 5.784444444444445e-06, "loss": 0.3167, "step": 2400 }, { "epoch": 1.7155995755217543, "grad_norm": 7.963505744934082, "learning_rate": 5.72888888888889e-06, "loss": 0.3229, "step": 2425 }, { "epoch": 1.7332861690838346, "grad_norm": 9.744851112365723, "learning_rate": 5.673333333333334e-06, "loss": 0.2961, "step": 2450 }, { "epoch": 1.7509727626459144, "grad_norm": 8.96956729888916, "learning_rate": 5.617777777777779e-06, "loss": 0.3399, "step": 2475 }, { "epoch": 1.7686593562079942, "grad_norm": 10.65976619720459, "learning_rate": 5.562222222222222e-06, "loss": 0.3205, "step": 2500 }, { "epoch": 1.7863459497700744, "grad_norm": 10.630120277404785, "learning_rate": 5.506666666666667e-06, "loss": 0.3188, "step": 2525 }, { "epoch": 1.8040325433321542, "grad_norm": 9.28016471862793, "learning_rate": 5.451111111111112e-06, "loss": 0.3033, "step": 2550 }, { "epoch": 1.821719136894234, "grad_norm": 9.342653274536133, "learning_rate": 5.3955555555555565e-06, "loss": 0.3299, "step": 2575 }, { "epoch": 1.839405730456314, "grad_norm": 9.810345649719238, "learning_rate": 5.3400000000000005e-06, "loss": 0.3141, "step": 2600 }, { "epoch": 1.857092324018394, "grad_norm": 9.378629684448242, "learning_rate": 5.2844444444444454e-06, "loss": 0.2861, "step": 2625 }, { "epoch": 1.8747789175804739, "grad_norm": 9.487212181091309, "learning_rate": 5.228888888888889e-06, "loss": 0.3064, "step": 2650 }, { "epoch": 1.892465511142554, "grad_norm": 10.776664733886719, "learning_rate": 5.1733333333333335e-06, "loss": 0.3091, "step": 2675 }, { "epoch": 1.910152104704634, "grad_norm": 9.766382217407227, "learning_rate": 5.117777777777778e-06, "loss": 0.2854, "step": 2700 }, { "epoch": 1.9278386982667137, "grad_norm": 10.020450592041016, "learning_rate": 5.062222222222222e-06, "loss": 0.3398, "step": 2725 }, { "epoch": 1.9455252918287937, "grad_norm": 10.089125633239746, "learning_rate": 5.006666666666667e-06, "loss": 0.3081, "step": 2750 }, { "epoch": 1.9632118853908738, "grad_norm": 8.752697944641113, "learning_rate": 4.951111111111111e-06, "loss": 0.3033, "step": 2775 }, { "epoch": 1.9808984789529536, "grad_norm": 9.102664947509766, "learning_rate": 4.895555555555556e-06, "loss": 0.3018, "step": 2800 }, { "epoch": 1.9985850725150336, "grad_norm": 8.133709907531738, "learning_rate": 4.84e-06, "loss": 0.3068, "step": 2825 }, { "epoch": 2.0162716660771136, "grad_norm": 7.3281779289245605, "learning_rate": 4.784444444444445e-06, "loss": 0.1808, "step": 2850 }, { "epoch": 2.0339582596391934, "grad_norm": 8.63786792755127, "learning_rate": 4.728888888888889e-06, "loss": 0.1844, "step": 2875 }, { "epoch": 2.0516448532012737, "grad_norm": 6.717278957366943, "learning_rate": 4.673333333333333e-06, "loss": 0.1663, "step": 2900 }, { "epoch": 2.0693314467633535, "grad_norm": 6.7627739906311035, "learning_rate": 4.617777777777778e-06, "loss": 0.1543, "step": 2925 }, { "epoch": 2.0870180403254333, "grad_norm": 8.074238777160645, "learning_rate": 4.562222222222222e-06, "loss": 0.1515, "step": 2950 }, { "epoch": 2.104704633887513, "grad_norm": 6.387521266937256, "learning_rate": 4.506666666666667e-06, "loss": 0.1572, "step": 2975 }, { "epoch": 2.1223912274495933, "grad_norm": 6.854986190795898, "learning_rate": 4.451111111111112e-06, "loss": 0.1504, "step": 3000 }, { "epoch": 2.1223912274495933, "eval_loss": 0.2956615090370178, "eval_runtime": 4074.1591, "eval_samples_per_second": 2.642, "eval_steps_per_second": 0.165, "eval_wer": 0.16988383008683408, "step": 3000 }, { "epoch": 2.140077821011673, "grad_norm": 7.486941814422607, "learning_rate": 4.395555555555556e-06, "loss": 0.139, "step": 3025 }, { "epoch": 2.157764414573753, "grad_norm": 10.396411895751953, "learning_rate": 4.34e-06, "loss": 0.166, "step": 3050 }, { "epoch": 2.175451008135833, "grad_norm": 7.398874282836914, "learning_rate": 4.284444444444445e-06, "loss": 0.1654, "step": 3075 }, { "epoch": 2.193137601697913, "grad_norm": 6.542088985443115, "learning_rate": 4.228888888888889e-06, "loss": 0.1559, "step": 3100 }, { "epoch": 2.2108241952599927, "grad_norm": 6.458364009857178, "learning_rate": 4.173333333333334e-06, "loss": 0.1493, "step": 3125 }, { "epoch": 2.228510788822073, "grad_norm": 7.253146648406982, "learning_rate": 4.117777777777779e-06, "loss": 0.1488, "step": 3150 }, { "epoch": 2.246197382384153, "grad_norm": 7.556519508361816, "learning_rate": 4.062222222222223e-06, "loss": 0.136, "step": 3175 }, { "epoch": 2.2638839759462326, "grad_norm": 6.949014186859131, "learning_rate": 4.006666666666667e-06, "loss": 0.1626, "step": 3200 }, { "epoch": 2.281570569508313, "grad_norm": 7.269302845001221, "learning_rate": 3.951111111111112e-06, "loss": 0.1635, "step": 3225 }, { "epoch": 2.2992571630703926, "grad_norm": 9.310769081115723, "learning_rate": 3.895555555555556e-06, "loss": 0.1593, "step": 3250 }, { "epoch": 2.3169437566324724, "grad_norm": 6.7611894607543945, "learning_rate": 3.8400000000000005e-06, "loss": 0.1331, "step": 3275 }, { "epoch": 2.3346303501945527, "grad_norm": 8.675508499145508, "learning_rate": 3.784444444444445e-06, "loss": 0.1607, "step": 3300 }, { "epoch": 2.3523169437566325, "grad_norm": 8.40794563293457, "learning_rate": 3.728888888888889e-06, "loss": 0.1566, "step": 3325 }, { "epoch": 2.3700035373187123, "grad_norm": 8.73206615447998, "learning_rate": 3.673333333333334e-06, "loss": 0.1499, "step": 3350 }, { "epoch": 2.3876901308807925, "grad_norm": 7.350528717041016, "learning_rate": 3.617777777777778e-06, "loss": 0.1645, "step": 3375 }, { "epoch": 2.4053767244428723, "grad_norm": 6.264787197113037, "learning_rate": 3.5622222222222224e-06, "loss": 0.1593, "step": 3400 }, { "epoch": 2.423063318004952, "grad_norm": 7.77794885635376, "learning_rate": 3.5066666666666673e-06, "loss": 0.1499, "step": 3425 }, { "epoch": 2.4407499115670324, "grad_norm": 6.835781574249268, "learning_rate": 3.4511111111111113e-06, "loss": 0.1475, "step": 3450 }, { "epoch": 2.458436505129112, "grad_norm": 7.863101482391357, "learning_rate": 3.3955555555555558e-06, "loss": 0.1495, "step": 3475 }, { "epoch": 2.476123098691192, "grad_norm": 5.20499849319458, "learning_rate": 3.3400000000000006e-06, "loss": 0.1486, "step": 3500 }, { "epoch": 2.493809692253272, "grad_norm": 7.780925750732422, "learning_rate": 3.2844444444444447e-06, "loss": 0.1324, "step": 3525 }, { "epoch": 2.511496285815352, "grad_norm": 8.029056549072266, "learning_rate": 3.228888888888889e-06, "loss": 0.1654, "step": 3550 }, { "epoch": 2.529182879377432, "grad_norm": 8.651548385620117, "learning_rate": 3.173333333333334e-06, "loss": 0.1409, "step": 3575 }, { "epoch": 2.5468694729395116, "grad_norm": 7.2054548263549805, "learning_rate": 3.117777777777778e-06, "loss": 0.1706, "step": 3600 }, { "epoch": 2.564556066501592, "grad_norm": 9.639912605285645, "learning_rate": 3.0622222222222225e-06, "loss": 0.1446, "step": 3625 }, { "epoch": 2.5822426600636716, "grad_norm": 8.069547653198242, "learning_rate": 3.0066666666666674e-06, "loss": 0.1492, "step": 3650 }, { "epoch": 2.599929253625752, "grad_norm": 7.179859638214111, "learning_rate": 2.9511111111111114e-06, "loss": 0.1401, "step": 3675 }, { "epoch": 2.6176158471878317, "grad_norm": 6.412818908691406, "learning_rate": 2.895555555555556e-06, "loss": 0.1364, "step": 3700 }, { "epoch": 2.6353024407499115, "grad_norm": 6.2440571784973145, "learning_rate": 2.84e-06, "loss": 0.143, "step": 3725 }, { "epoch": 2.6529890343119913, "grad_norm": 3.8863301277160645, "learning_rate": 2.784444444444445e-06, "loss": 0.1416, "step": 3750 }, { "epoch": 2.6706756278740715, "grad_norm": 7.729201793670654, "learning_rate": 2.7288888888888893e-06, "loss": 0.1407, "step": 3775 }, { "epoch": 2.6883622214361513, "grad_norm": 6.973865032196045, "learning_rate": 2.6733333333333333e-06, "loss": 0.1555, "step": 3800 }, { "epoch": 2.7060488149982316, "grad_norm": 7.154475212097168, "learning_rate": 2.617777777777778e-06, "loss": 0.1541, "step": 3825 }, { "epoch": 2.7237354085603114, "grad_norm": 8.38300609588623, "learning_rate": 2.5622222222222226e-06, "loss": 0.1345, "step": 3850 }, { "epoch": 2.741422002122391, "grad_norm": 7.105490684509277, "learning_rate": 2.5066666666666667e-06, "loss": 0.136, "step": 3875 }, { "epoch": 2.759108595684471, "grad_norm": 6.707284927368164, "learning_rate": 2.451111111111111e-06, "loss": 0.1454, "step": 3900 }, { "epoch": 2.776795189246551, "grad_norm": 4.504308223724365, "learning_rate": 2.3955555555555556e-06, "loss": 0.1383, "step": 3925 }, { "epoch": 2.794481782808631, "grad_norm": 8.98666763305664, "learning_rate": 2.3400000000000005e-06, "loss": 0.1247, "step": 3950 }, { "epoch": 2.8121683763707113, "grad_norm": 7.637745380401611, "learning_rate": 2.2844444444444445e-06, "loss": 0.1503, "step": 3975 }, { "epoch": 2.829854969932791, "grad_norm": 8.50109577178955, "learning_rate": 2.228888888888889e-06, "loss": 0.1558, "step": 4000 }, { "epoch": 2.829854969932791, "eval_loss": 0.2815879285335541, "eval_runtime": 3977.9422, "eval_samples_per_second": 2.706, "eval_steps_per_second": 0.169, "eval_wer": 0.16456426503950558, "step": 4000 }, { "epoch": 2.847541563494871, "grad_norm": 6.490965366363525, "learning_rate": 2.1733333333333334e-06, "loss": 0.146, "step": 4025 }, { "epoch": 2.8652281570569507, "grad_norm": 6.164106845855713, "learning_rate": 2.117777777777778e-06, "loss": 0.1306, "step": 4050 }, { "epoch": 2.882914750619031, "grad_norm": 6.784088611602783, "learning_rate": 2.0622222222222223e-06, "loss": 0.1362, "step": 4075 }, { "epoch": 2.9006013441811107, "grad_norm": 7.150921821594238, "learning_rate": 2.006666666666667e-06, "loss": 0.1483, "step": 4100 }, { "epoch": 2.9182879377431905, "grad_norm": 6.3694329261779785, "learning_rate": 1.9511111111111113e-06, "loss": 0.1545, "step": 4125 }, { "epoch": 2.9359745313052708, "grad_norm": 5.38779878616333, "learning_rate": 1.8955555555555557e-06, "loss": 0.1395, "step": 4150 }, { "epoch": 2.9536611248673506, "grad_norm": 7.62282657623291, "learning_rate": 1.8400000000000002e-06, "loss": 0.1436, "step": 4175 }, { "epoch": 2.9713477184294304, "grad_norm": 9.915560722351074, "learning_rate": 1.7844444444444444e-06, "loss": 0.1332, "step": 4200 }, { "epoch": 2.9890343119915106, "grad_norm": 6.760516166687012, "learning_rate": 1.728888888888889e-06, "loss": 0.1258, "step": 4225 }, { "epoch": 3.0067209055535904, "grad_norm": 3.125908136367798, "learning_rate": 1.6733333333333335e-06, "loss": 0.1119, "step": 4250 }, { "epoch": 3.02440749911567, "grad_norm": 4.832523345947266, "learning_rate": 1.6177777777777778e-06, "loss": 0.0726, "step": 4275 }, { "epoch": 3.0420940926777504, "grad_norm": 3.6243960857391357, "learning_rate": 1.5622222222222225e-06, "loss": 0.0672, "step": 4300 }, { "epoch": 3.0597806862398302, "grad_norm": 4.241108417510986, "learning_rate": 1.506666666666667e-06, "loss": 0.0654, "step": 4325 }, { "epoch": 3.07746727980191, "grad_norm": 4.0127644538879395, "learning_rate": 1.4511111111111112e-06, "loss": 0.0801, "step": 4350 }, { "epoch": 3.0951538733639903, "grad_norm": 4.87974739074707, "learning_rate": 1.3955555555555556e-06, "loss": 0.0599, "step": 4375 }, { "epoch": 3.11284046692607, "grad_norm": 5.309682846069336, "learning_rate": 1.34e-06, "loss": 0.0636, "step": 4400 }, { "epoch": 3.13052706048815, "grad_norm": 2.6454548835754395, "learning_rate": 1.2844444444444445e-06, "loss": 0.0539, "step": 4425 }, { "epoch": 3.14821365405023, "grad_norm": 3.234809637069702, "learning_rate": 1.228888888888889e-06, "loss": 0.0531, "step": 4450 }, { "epoch": 3.16590024761231, "grad_norm": 5.55891227722168, "learning_rate": 1.1733333333333335e-06, "loss": 0.0805, "step": 4475 }, { "epoch": 3.1835868411743897, "grad_norm": 5.280560493469238, "learning_rate": 1.117777777777778e-06, "loss": 0.0658, "step": 4500 }, { "epoch": 3.2012734347364695, "grad_norm": 4.731041431427002, "learning_rate": 1.0622222222222222e-06, "loss": 0.0653, "step": 4525 }, { "epoch": 3.2189600282985498, "grad_norm": 3.2889113426208496, "learning_rate": 1.0066666666666668e-06, "loss": 0.0628, "step": 4550 }, { "epoch": 3.2366466218606296, "grad_norm": 3.6610448360443115, "learning_rate": 9.511111111111111e-07, "loss": 0.0739, "step": 4575 }, { "epoch": 3.25433321542271, "grad_norm": 4.585997104644775, "learning_rate": 8.955555555555557e-07, "loss": 0.0733, "step": 4600 }, { "epoch": 3.2720198089847896, "grad_norm": 4.393402099609375, "learning_rate": 8.400000000000001e-07, "loss": 0.0589, "step": 4625 }, { "epoch": 3.2897064025468694, "grad_norm": 6.0053205490112305, "learning_rate": 7.844444444444445e-07, "loss": 0.0669, "step": 4650 }, { "epoch": 3.307392996108949, "grad_norm": 6.425204277038574, "learning_rate": 7.28888888888889e-07, "loss": 0.0633, "step": 4675 }, { "epoch": 3.3250795896710295, "grad_norm": 3.262568473815918, "learning_rate": 6.733333333333334e-07, "loss": 0.0601, "step": 4700 }, { "epoch": 3.3427661832331093, "grad_norm": 4.711056709289551, "learning_rate": 6.177777777777778e-07, "loss": 0.0694, "step": 4725 }, { "epoch": 3.360452776795189, "grad_norm": 3.0668253898620605, "learning_rate": 5.622222222222223e-07, "loss": 0.0453, "step": 4750 }, { "epoch": 3.3781393703572693, "grad_norm": 2.9075567722320557, "learning_rate": 5.066666666666667e-07, "loss": 0.0594, "step": 4775 }, { "epoch": 3.395825963919349, "grad_norm": 3.7801194190979004, "learning_rate": 4.511111111111111e-07, "loss": 0.0718, "step": 4800 }, { "epoch": 3.413512557481429, "grad_norm": 5.163565158843994, "learning_rate": 3.9555555555555557e-07, "loss": 0.058, "step": 4825 }, { "epoch": 3.431199151043509, "grad_norm": 4.530272960662842, "learning_rate": 3.4000000000000003e-07, "loss": 0.0533, "step": 4850 }, { "epoch": 3.448885744605589, "grad_norm": 3.2570483684539795, "learning_rate": 2.844444444444445e-07, "loss": 0.0603, "step": 4875 }, { "epoch": 3.4665723381676687, "grad_norm": 2.4571080207824707, "learning_rate": 2.2888888888888892e-07, "loss": 0.0597, "step": 4900 }, { "epoch": 3.484258931729749, "grad_norm": 3.9720897674560547, "learning_rate": 1.7333333333333335e-07, "loss": 0.0589, "step": 4925 }, { "epoch": 3.501945525291829, "grad_norm": 4.322543621063232, "learning_rate": 1.1777777777777778e-07, "loss": 0.0758, "step": 4950 }, { "epoch": 3.5196321188539086, "grad_norm": 6.084353446960449, "learning_rate": 6.222222222222223e-08, "loss": 0.0586, "step": 4975 }, { "epoch": 3.537318712415989, "grad_norm": 2.7334227561950684, "learning_rate": 6.666666666666667e-09, "loss": 0.0619, "step": 5000 }, { "epoch": 3.537318712415989, "eval_loss": 0.29267358779907227, "eval_runtime": 3980.6566, "eval_samples_per_second": 2.704, "eval_steps_per_second": 0.169, "eval_wer": 0.15772901509817727, "step": 5000 }, { "epoch": 3.537318712415989, "step": 5000, "total_flos": 2.7274615074127872e+20, "train_loss": 0.36250473141670225, "train_runtime": 65548.1959, "train_samples_per_second": 2.441, "train_steps_per_second": 0.076 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7274615074127872e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }