{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4851953222194576, "global_step": 3900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.6399999999999995e-05, "loss": 12.4264, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.6399999999999995e-05, "loss": 7.1545, "step": 100 }, { "epoch": 0.01, "eval_cer": 0.990295846526883, "eval_loss": 4.435502529144287, "eval_runtime": 2212.4333, "eval_samples_per_second": 21.798, "eval_steps_per_second": 1.363, "eval_wer": 0.9803344576185385, "step": 100 }, { "epoch": 0.02, "learning_rate": 8.639999999999999e-05, "loss": 3.2621, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.0001164, "loss": 2.9431, "step": 200 }, { "epoch": 0.02, "eval_cer": 0.990295846526883, "eval_loss": 3.032039165496826, "eval_runtime": 2093.0103, "eval_samples_per_second": 23.041, "eval_steps_per_second": 1.441, "eval_wer": 0.9803344576185385, "step": 200 }, { "epoch": 0.03, "learning_rate": 0.00014639999999999998, "loss": 2.9236, "step": 250 }, { "epoch": 0.04, "learning_rate": 0.00017639999999999998, "loss": 2.9188, "step": 300 }, { "epoch": 0.04, "eval_cer": 0.990295846526883, "eval_loss": 2.976602077484131, "eval_runtime": 2082.4254, "eval_samples_per_second": 23.159, "eval_steps_per_second": 1.448, "eval_wer": 0.9803344576185385, "step": 300 }, { "epoch": 0.04, "learning_rate": 0.00020639999999999998, "loss": 2.8775, "step": 350 }, { "epoch": 0.05, "learning_rate": 0.0002364, "loss": 2.7809, "step": 400 }, { "epoch": 0.05, "eval_cer": 0.9696764691640024, "eval_loss": 2.7085533142089844, "eval_runtime": 2082.5137, "eval_samples_per_second": 23.158, "eval_steps_per_second": 1.448, "eval_wer": 0.9589848218162464, "step": 400 }, { "epoch": 0.06, "learning_rate": 0.00026639999999999997, "loss": 2.5919, "step": 450 }, { "epoch": 0.06, "learning_rate": 0.0002964, "loss": 2.3631, "step": 500 }, { "epoch": 0.06, "eval_cer": 0.862518823322377, "eval_loss": 1.9892584085464478, "eval_runtime": 2074.1857, "eval_samples_per_second": 23.251, "eval_steps_per_second": 1.454, "eval_wer": 0.9145354269893812, "step": 500 }, { "epoch": 0.07, "learning_rate": 0.0002994410095705937, "loss": 1.5803, "step": 550 }, { "epoch": 0.07, "learning_rate": 0.0002988057931735411, "loss": 0.9821, "step": 600 }, { "epoch": 0.07, "eval_cer": 0.1523255882620419, "eval_loss": 0.5998358726501465, "eval_runtime": 2075.5254, "eval_samples_per_second": 23.236, "eval_steps_per_second": 1.453, "eval_wer": 0.3576434163952664, "step": 600 }, { "epoch": 0.08, "learning_rate": 0.0002981705767764885, "loss": 0.7758, "step": 650 }, { "epoch": 0.09, "learning_rate": 0.0002975353603794359, "loss": 0.6585, "step": 700 }, { "epoch": 0.09, "eval_cer": 0.10982663055738294, "eval_loss": 0.4177716076374054, "eval_runtime": 2075.6462, "eval_samples_per_second": 23.234, "eval_steps_per_second": 1.453, "eval_wer": 0.25202041187922813, "step": 700 }, { "epoch": 0.09, "learning_rate": 0.0002969001439823833, "loss": 0.6077, "step": 750 }, { "epoch": 0.1, "learning_rate": 0.0002962649275853307, "loss": 0.5657, "step": 800 }, { "epoch": 0.1, "eval_cer": 0.09587676230587619, "eval_loss": 0.36425185203552246, "eval_runtime": 2094.5001, "eval_samples_per_second": 23.025, "eval_steps_per_second": 1.439, "eval_wer": 0.21289942193910186, "step": 800 }, { "epoch": 0.11, "learning_rate": 0.00029562971118827813, "loss": 0.5295, "step": 850 }, { "epoch": 0.11, "learning_rate": 0.0002949944947912255, "loss": 0.4998, "step": 900 }, { "epoch": 0.11, "eval_cer": 0.09022533657576749, "eval_loss": 0.3022037148475647, "eval_runtime": 2108.0878, "eval_samples_per_second": 22.877, "eval_steps_per_second": 1.43, "eval_wer": 0.20626376037718358, "step": 900 }, { "epoch": 0.12, "learning_rate": 0.00029435927839417295, "loss": 0.4887, "step": 950 }, { "epoch": 0.12, "learning_rate": 0.0002937240619971203, "loss": 0.4902, "step": 1000 }, { "epoch": 0.12, "eval_cer": 0.0833139440810026, "eval_loss": 0.2917855978012085, "eval_runtime": 2122.6451, "eval_samples_per_second": 22.72, "eval_steps_per_second": 1.42, "eval_wer": 0.1899083786482299, "step": 1000 }, { "epoch": 0.13, "learning_rate": 0.0002930888456000677, "loss": 0.4776, "step": 1050 }, { "epoch": 0.14, "learning_rate": 0.00029245362920301513, "loss": 0.468, "step": 1100 }, { "epoch": 0.14, "eval_cer": 0.08389881054318646, "eval_loss": 0.2726401686668396, "eval_runtime": 2129.6342, "eval_samples_per_second": 22.645, "eval_steps_per_second": 1.416, "eval_wer": 0.1888892520157431, "step": 1100 }, { "epoch": 0.14, "learning_rate": 0.00029181841280596254, "loss": 0.44, "step": 1150 }, { "epoch": 0.15, "learning_rate": 0.00029118319640890995, "loss": 0.4332, "step": 1200 }, { "epoch": 0.15, "eval_cer": 0.07579453612837772, "eval_loss": 0.2764703929424286, "eval_runtime": 2387.2613, "eval_samples_per_second": 20.201, "eval_steps_per_second": 1.263, "eval_wer": 0.1726946095004212, "step": 1200 }, { "epoch": 0.16, "learning_rate": 0.00029054798001185736, "loss": 0.4363, "step": 1250 }, { "epoch": 0.16, "learning_rate": 0.00028991276361480477, "loss": 0.4178, "step": 1300 }, { "epoch": 0.16, "eval_cer": 0.07956404299571626, "eval_loss": 0.2525430917739868, "eval_runtime": 2685.8951, "eval_samples_per_second": 17.955, "eval_steps_per_second": 1.123, "eval_wer": 0.18290121608219176, "step": 1300 }, { "epoch": 0.17, "learning_rate": 0.0002892775472177521, "loss": 0.4072, "step": 1350 }, { "epoch": 0.17, "learning_rate": 0.0002886423308206996, "loss": 0.4171, "step": 1400 }, { "epoch": 0.17, "eval_cer": 0.07121410576965095, "eval_loss": 0.24207398295402527, "eval_runtime": 2387.6421, "eval_samples_per_second": 20.198, "eval_steps_per_second": 1.263, "eval_wer": 0.16114939876197457, "step": 1400 }, { "epoch": 0.18, "learning_rate": 0.00028800711442364694, "loss": 0.4074, "step": 1450 }, { "epoch": 0.19, "learning_rate": 0.00028737189802659435, "loss": 0.4049, "step": 1500 }, { "epoch": 0.19, "eval_cer": 0.07119875184394742, "eval_loss": 0.23904423415660858, "eval_runtime": 2682.696, "eval_samples_per_second": 17.977, "eval_steps_per_second": 1.124, "eval_wer": 0.1614502011908041, "step": 1500 }, { "epoch": 0.19, "learning_rate": 0.00028673668162954176, "loss": 0.3901, "step": 1550 }, { "epoch": 0.2, "learning_rate": 0.0002861014652324892, "loss": 0.3963, "step": 1600 }, { "epoch": 0.2, "eval_cer": 0.07037200199837249, "eval_loss": 0.22700898349285126, "eval_runtime": 2633.1938, "eval_samples_per_second": 18.315, "eval_steps_per_second": 1.145, "eval_wer": 0.16208515443303412, "step": 1600 }, { "epoch": 0.21, "learning_rate": 0.0002854662488354366, "loss": 0.3813, "step": 1650 }, { "epoch": 0.21, "learning_rate": 0.000284831032438384, "loss": 0.3924, "step": 1700 }, { "epoch": 0.21, "eval_cer": 0.07119875184394742, "eval_loss": 0.2278161644935608, "eval_runtime": 2675.9933, "eval_samples_per_second": 18.022, "eval_steps_per_second": 1.127, "eval_wer": 0.16218386565136397, "step": 1700 }, { "epoch": 0.22, "learning_rate": 0.0002841958160413314, "loss": 0.3938, "step": 1750 }, { "epoch": 0.22, "learning_rate": 0.00028356059964427876, "loss": 0.3942, "step": 1800 }, { "epoch": 0.22, "eval_cer": 0.06874165130289871, "eval_loss": 0.22094863653182983, "eval_runtime": 2685.0595, "eval_samples_per_second": 17.961, "eval_steps_per_second": 1.123, "eval_wer": 0.1562058342331839, "step": 1800 }, { "epoch": 0.23, "learning_rate": 0.0002829253832472262, "loss": 0.3674, "step": 1850 }, { "epoch": 0.24, "learning_rate": 0.0002822901668501736, "loss": 0.3745, "step": 1900 }, { "epoch": 0.24, "eval_cer": 0.0659505438242377, "eval_loss": 0.21558238565921783, "eval_runtime": 2439.0418, "eval_samples_per_second": 19.773, "eval_steps_per_second": 1.236, "eval_wer": 0.1506740042005625, "step": 1900 }, { "epoch": 0.24, "learning_rate": 0.000281654950453121, "loss": 0.3711, "step": 1950 }, { "epoch": 0.25, "learning_rate": 0.0002810197340560684, "loss": 0.3598, "step": 2000 }, { "epoch": 0.25, "eval_cer": 0.06898353468628977, "eval_loss": 0.221131831407547, "eval_runtime": 2663.9985, "eval_samples_per_second": 18.103, "eval_steps_per_second": 1.132, "eval_wer": 0.15953867178719996, "step": 2000 }, { "epoch": 0.26, "learning_rate": 0.0002803845176590158, "loss": 0.3756, "step": 2050 }, { "epoch": 0.26, "learning_rate": 0.0002797493012619632, "loss": 0.3653, "step": 2100 }, { "epoch": 0.26, "eval_cer": 0.06659281034712863, "eval_loss": 0.2138465791940689, "eval_runtime": 2389.088, "eval_samples_per_second": 20.186, "eval_steps_per_second": 1.262, "eval_wer": 0.1510541757846708, "step": 2100 }, { "epoch": 0.27, "learning_rate": 0.00027911408486491063, "loss": 0.3751, "step": 2150 }, { "epoch": 0.27, "learning_rate": 0.00027847886846785804, "loss": 0.3576, "step": 2200 }, { "epoch": 0.27, "eval_cer": 0.06642438959287295, "eval_loss": 0.2111993283033371, "eval_runtime": 2676.0178, "eval_samples_per_second": 18.022, "eval_steps_per_second": 1.127, "eval_wer": 0.1520546273217979, "step": 2200 }, { "epoch": 0.28, "learning_rate": 0.00027784365207080545, "loss": 0.353, "step": 2250 }, { "epoch": 0.29, "learning_rate": 0.00027720843567375286, "loss": 0.3569, "step": 2300 }, { "epoch": 0.29, "eval_cer": 0.06564322909592542, "eval_loss": 0.21856307983398438, "eval_runtime": 2642.3302, "eval_samples_per_second": 18.251, "eval_steps_per_second": 1.141, "eval_wer": 0.14776135629213985, "step": 2300 }, { "epoch": 0.29, "learning_rate": 0.00027657321927670027, "loss": 0.371, "step": 2350 }, { "epoch": 0.3, "learning_rate": 0.0002759380028796476, "loss": 0.3473, "step": 2400 }, { "epoch": 0.3, "eval_cer": 0.0664718686554331, "eval_loss": 0.2098075896501541, "eval_runtime": 2685.1293, "eval_samples_per_second": 17.96, "eval_steps_per_second": 1.123, "eval_wer": 0.15094546005096968, "step": 2400 }, { "epoch": 0.3, "learning_rate": 0.0002753027864825951, "loss": 0.3574, "step": 2450 }, { "epoch": 0.31, "learning_rate": 0.00027466757008554244, "loss": 0.3735, "step": 2500 }, { "epoch": 0.31, "eval_cer": 0.06522796445920521, "eval_loss": 0.20497867465019226, "eval_runtime": 2686.3997, "eval_samples_per_second": 17.952, "eval_steps_per_second": 1.122, "eval_wer": 0.1481148491685914, "step": 2500 }, { "epoch": 0.32, "learning_rate": 0.00027403235368848985, "loss": 0.3617, "step": 2550 }, { "epoch": 0.32, "learning_rate": 0.00027339713729143726, "loss": 0.3523, "step": 2600 }, { "epoch": 0.32, "eval_cer": 0.06236906349320707, "eval_loss": 0.20248664915561676, "eval_runtime": 2418.5335, "eval_samples_per_second": 19.94, "eval_steps_per_second": 1.247, "eval_wer": 0.14132578503764698, "step": 2600 }, { "epoch": 0.33, "learning_rate": 0.0002727619208943847, "loss": 0.3466, "step": 2650 }, { "epoch": 0.34, "learning_rate": 0.0002721267044973321, "loss": 0.3443, "step": 2700 }, { "epoch": 0.34, "eval_cer": 0.0631530585610537, "eval_loss": 0.19711410999298096, "eval_runtime": 2678.8408, "eval_samples_per_second": 18.003, "eval_steps_per_second": 1.125, "eval_wer": 0.14343206900714522, "step": 2700 }, { "epoch": 0.34, "learning_rate": 0.0002714914881002795, "loss": 0.3353, "step": 2750 }, { "epoch": 0.35, "learning_rate": 0.0002708562717032269, "loss": 0.3271, "step": 2800 }, { "epoch": 0.35, "eval_cer": 0.06349556921136332, "eval_loss": 0.1974596232175827, "eval_runtime": 2413.2013, "eval_samples_per_second": 19.984, "eval_steps_per_second": 1.249, "eval_wer": 0.14444185809195217, "step": 2800 }, { "epoch": 0.35, "learning_rate": 0.00027022105530617426, "loss": 0.3411, "step": 2850 }, { "epoch": 0.36, "learning_rate": 0.0002695858389091217, "loss": 0.347, "step": 2900 }, { "epoch": 0.36, "eval_cer": 0.06194080707319925, "eval_loss": 0.19428882002830505, "eval_runtime": 2689.3153, "eval_samples_per_second": 17.932, "eval_steps_per_second": 1.121, "eval_wer": 0.14129443755615034, "step": 2900 }, { "epoch": 0.37, "learning_rate": 0.0002689506225120691, "loss": 0.3414, "step": 2950 }, { "epoch": 0.37, "learning_rate": 0.0002683154061150165, "loss": 0.3291, "step": 3000 }, { "epoch": 0.37, "eval_cer": 0.06220513080954164, "eval_loss": 0.19397367537021637, "eval_runtime": 2678.8937, "eval_samples_per_second": 18.002, "eval_steps_per_second": 1.125, "eval_wer": 0.14132445110226416, "step": 3000 }, { "epoch": 0.38, "learning_rate": 0.0002676801897179639, "loss": 0.3472, "step": 3050 }, { "epoch": 0.39, "learning_rate": 0.0002670449733209113, "loss": 0.3386, "step": 3100 }, { "epoch": 0.39, "eval_cer": 0.06227859343867701, "eval_loss": 0.18810917437076569, "eval_runtime": 2096.7687, "eval_samples_per_second": 23.0, "eval_steps_per_second": 1.438, "eval_wer": 0.143275331599662, "step": 3100 }, { "epoch": 0.39, "learning_rate": 0.0002664097569238587, "loss": 0.3561, "step": 3150 }, { "epoch": 0.4, "learning_rate": 0.00026577454052680613, "loss": 0.3346, "step": 3200 }, { "epoch": 0.4, "eval_cer": 0.062040017054668245, "eval_loss": 0.19153311848640442, "eval_runtime": 2107.1655, "eval_samples_per_second": 22.887, "eval_steps_per_second": 1.431, "eval_wer": 0.141297105426916, "step": 3200 }, { "epoch": 0.4, "learning_rate": 0.00026513932412975354, "loss": 0.3362, "step": 3250 }, { "epoch": 0.41, "learning_rate": 0.0002645041077327009, "loss": 0.3286, "step": 3300 }, { "epoch": 0.41, "eval_cer": 0.06275858077759366, "eval_loss": 0.18800656497478485, "eval_runtime": 2108.9053, "eval_samples_per_second": 22.868, "eval_steps_per_second": 1.43, "eval_wer": 0.14638073317090447, "step": 3300 }, { "epoch": 0.42, "learning_rate": 0.00026386889133564836, "loss": 0.3088, "step": 3350 }, { "epoch": 0.42, "learning_rate": 0.0002632336749385957, "loss": 0.3211, "step": 3400 }, { "epoch": 0.42, "eval_cer": 0.06097185625418542, "eval_loss": 0.1875077337026596, "eval_runtime": 2192.9622, "eval_samples_per_second": 21.991, "eval_steps_per_second": 1.375, "eval_wer": 0.1381210052803832, "step": 3400 }, { "epoch": 0.43, "learning_rate": 0.0002625984585415431, "loss": 0.3185, "step": 3450 }, { "epoch": 0.44, "learning_rate": 0.00026196324214449053, "loss": 0.3272, "step": 3500 }, { "epoch": 0.44, "eval_cer": 0.060900047124741195, "eval_loss": 0.18167421221733093, "eval_runtime": 2110.7367, "eval_samples_per_second": 22.848, "eval_steps_per_second": 1.428, "eval_wer": 0.14127176065464214, "step": 3500 }, { "epoch": 0.44, "learning_rate": 0.00026132802574743794, "loss": 0.321, "step": 3550 }, { "epoch": 0.45, "learning_rate": 0.00026069280935038535, "loss": 0.3246, "step": 3600 }, { "epoch": 0.45, "eval_cer": 0.05953118559471069, "eval_loss": 0.18253999948501587, "eval_runtime": 2122.2554, "eval_samples_per_second": 22.724, "eval_steps_per_second": 1.421, "eval_wer": 0.13588532957874988, "step": 3600 }, { "epoch": 0.45, "learning_rate": 0.00026005759295333276, "loss": 0.3266, "step": 3650 }, { "epoch": 0.46, "learning_rate": 0.0002594223765562802, "loss": 0.3239, "step": 3700 }, { "epoch": 0.46, "eval_cer": 0.06006573842343529, "eval_loss": 0.18680287897586823, "eval_runtime": 2677.6172, "eval_samples_per_second": 18.011, "eval_steps_per_second": 1.126, "eval_wer": 0.13703584884644604, "step": 3700 }, { "epoch": 0.47, "learning_rate": 0.00025878716015922753, "loss": 0.3272, "step": 3750 }, { "epoch": 0.47, "learning_rate": 0.000258151943762175, "loss": 0.32, "step": 3800 }, { "epoch": 0.47, "eval_cer": 0.05884947129347375, "eval_loss": 0.18254593014717102, "eval_runtime": 2588.5152, "eval_samples_per_second": 18.631, "eval_steps_per_second": 1.165, "eval_wer": 0.13350625582346165, "step": 3800 }, { "epoch": 0.48, "learning_rate": 0.00025751672736512235, "loss": 0.3148, "step": 3850 }, { "epoch": 0.49, "learning_rate": 0.00025688151096806976, "loss": 0.3185, "step": 3900 }, { "epoch": 0.49, "eval_cer": 0.06071367408812445, "eval_loss": 0.1804201900959015, "eval_runtime": 2677.674, "eval_samples_per_second": 18.01, "eval_steps_per_second": 1.126, "eval_wer": 0.14048273787569457, "step": 3900 } ], "max_steps": 24114, "num_train_epochs": 3, "total_flos": 3.282673713375086e+19, "trial_name": null, "trial_params": null }