{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.993114001530222, "eval_steps": 2000, "global_step": 24495, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 3.9999999999999996e-05, "loss": 33.0181, "step": 200 }, { "epoch": 0.24, "learning_rate": 7.999999999999999e-05, "loss": 8.7646, "step": 400 }, { "epoch": 0.37, "learning_rate": 0.00011999999999999999, "loss": 4.6933, "step": 600 }, { "epoch": 0.49, "learning_rate": 0.00015999999999999999, "loss": 4.5796, "step": 800 }, { "epoch": 0.61, "learning_rate": 0.00019999999999999998, "loss": 4.1047, "step": 1000 }, { "epoch": 0.73, "learning_rate": 0.00023999999999999998, "loss": 2.6327, "step": 1200 }, { "epoch": 0.86, "learning_rate": 0.00028, "loss": 1.9893, "step": 1400 }, { "epoch": 0.98, "learning_rate": 0.00029869536855838224, "loss": 1.7241, "step": 1600 }, { "epoch": 1.1, "learning_rate": 0.00029608610567514673, "loss": 1.5607, "step": 1800 }, { "epoch": 1.22, "learning_rate": 0.00029347684279191127, "loss": 1.4503, "step": 2000 }, { "epoch": 1.22, "eval_cer": 0.268688843618788, "eval_loss": 1.0610458850860596, "eval_runtime": 1586.9419, "eval_samples_per_second": 8.25, "eval_steps_per_second": 0.516, "step": 2000 }, { "epoch": 1.35, "learning_rate": 0.00029086757990867576, "loss": 1.3782, "step": 2200 }, { "epoch": 1.47, "learning_rate": 0.0002882583170254403, "loss": 1.3244, "step": 2400 }, { "epoch": 1.59, "learning_rate": 0.0002856490541422048, "loss": 1.2553, "step": 2600 }, { "epoch": 1.71, "learning_rate": 0.00028303979125896933, "loss": 1.2077, "step": 2800 }, { "epoch": 1.84, "learning_rate": 0.0002804305283757338, "loss": 1.1868, "step": 3000 }, { "epoch": 1.96, "learning_rate": 0.00027782126549249836, "loss": 1.1624, "step": 3200 }, { "epoch": 2.08, "learning_rate": 0.00027521200260926284, "loss": 1.1136, "step": 3400 }, { "epoch": 2.2, "learning_rate": 0.0002726027397260274, "loss": 1.077, "step": 3600 }, { "epoch": 2.33, "learning_rate": 0.00026999347684279187, "loss": 1.053, "step": 3800 }, { "epoch": 2.45, "learning_rate": 0.0002673842139595564, "loss": 1.0239, "step": 4000 }, { "epoch": 2.45, "eval_cer": 0.19039349567825709, "eval_loss": 0.6961866617202759, "eval_runtime": 1607.479, "eval_samples_per_second": 8.145, "eval_steps_per_second": 0.509, "step": 4000 }, { "epoch": 2.57, "learning_rate": 0.0002647749510763209, "loss": 1.0206, "step": 4200 }, { "epoch": 2.69, "learning_rate": 0.00026216568819308544, "loss": 1.0045, "step": 4400 }, { "epoch": 2.82, "learning_rate": 0.0002595564253098499, "loss": 0.9802, "step": 4600 }, { "epoch": 2.94, "learning_rate": 0.00025694716242661447, "loss": 0.9839, "step": 4800 }, { "epoch": 3.06, "learning_rate": 0.00025433789954337895, "loss": 0.9243, "step": 5000 }, { "epoch": 3.18, "learning_rate": 0.0002517286366601435, "loss": 0.9082, "step": 5200 }, { "epoch": 3.31, "learning_rate": 0.000249119373776908, "loss": 0.9017, "step": 5400 }, { "epoch": 3.43, "learning_rate": 0.0002465101108936725, "loss": 0.8871, "step": 5600 }, { "epoch": 3.55, "learning_rate": 0.00024390084801043704, "loss": 0.9036, "step": 5800 }, { "epoch": 3.67, "learning_rate": 0.00024129158512720155, "loss": 0.8977, "step": 6000 }, { "epoch": 3.67, "eval_cer": 0.16872254319465907, "eval_loss": 0.594495415687561, "eval_runtime": 1591.6049, "eval_samples_per_second": 8.226, "eval_steps_per_second": 0.515, "step": 6000 }, { "epoch": 3.79, "learning_rate": 0.00023868232224396607, "loss": 0.8843, "step": 6200 }, { "epoch": 3.92, "learning_rate": 0.00023607305936073058, "loss": 0.8757, "step": 6400 }, { "epoch": 4.04, "learning_rate": 0.0002334637964774951, "loss": 0.8573, "step": 6600 }, { "epoch": 4.16, "learning_rate": 0.0002308545335942596, "loss": 0.8126, "step": 6800 }, { "epoch": 4.28, "learning_rate": 0.00022824527071102412, "loss": 0.8192, "step": 7000 }, { "epoch": 4.41, "learning_rate": 0.00022563600782778863, "loss": 0.8061, "step": 7200 }, { "epoch": 4.53, "learning_rate": 0.00022302674494455315, "loss": 0.8123, "step": 7400 }, { "epoch": 4.65, "learning_rate": 0.00022041748206131766, "loss": 0.8046, "step": 7600 }, { "epoch": 4.77, "learning_rate": 0.00021780821917808218, "loss": 0.7979, "step": 7800 }, { "epoch": 4.9, "learning_rate": 0.0002151989562948467, "loss": 0.804, "step": 8000 }, { "epoch": 4.9, "eval_cer": 0.14924661713942214, "eval_loss": 0.5327703952789307, "eval_runtime": 1595.6324, "eval_samples_per_second": 8.206, "eval_steps_per_second": 0.513, "step": 8000 }, { "epoch": 5.02, "learning_rate": 0.0002125896934116112, "loss": 0.7867, "step": 8200 }, { "epoch": 5.14, "learning_rate": 0.00020998043052837572, "loss": 0.7557, "step": 8400 }, { "epoch": 5.26, "learning_rate": 0.00020737116764514023, "loss": 0.7478, "step": 8600 }, { "epoch": 5.39, "learning_rate": 0.00020476190476190475, "loss": 0.7398, "step": 8800 }, { "epoch": 5.51, "learning_rate": 0.00020215264187866926, "loss": 0.7408, "step": 9000 }, { "epoch": 5.63, "learning_rate": 0.00019954337899543377, "loss": 0.75, "step": 9200 }, { "epoch": 5.75, "learning_rate": 0.0001969341161121983, "loss": 0.7344, "step": 9400 }, { "epoch": 5.88, "learning_rate": 0.0001943248532289628, "loss": 0.738, "step": 9600 }, { "epoch": 6.0, "learning_rate": 0.00019171559034572732, "loss": 0.7373, "step": 9800 }, { "epoch": 6.12, "learning_rate": 0.00018910632746249183, "loss": 0.698, "step": 10000 }, { "epoch": 6.12, "eval_cer": 0.13653489424101573, "eval_loss": 0.5013594031333923, "eval_runtime": 1605.1161, "eval_samples_per_second": 8.157, "eval_steps_per_second": 0.51, "step": 10000 }, { "epoch": 6.24, "learning_rate": 0.00018649706457925634, "loss": 0.6943, "step": 10200 }, { "epoch": 6.37, "learning_rate": 0.00018388780169602086, "loss": 0.6997, "step": 10400 }, { "epoch": 6.49, "learning_rate": 0.00018127853881278537, "loss": 0.6929, "step": 10600 }, { "epoch": 6.61, "learning_rate": 0.00017866927592954989, "loss": 0.7003, "step": 10800 }, { "epoch": 6.73, "learning_rate": 0.0001760600130463144, "loss": 0.6863, "step": 11000 }, { "epoch": 6.86, "learning_rate": 0.00017345075016307891, "loss": 0.6883, "step": 11200 }, { "epoch": 6.98, "learning_rate": 0.00017084148727984343, "loss": 0.6787, "step": 11400 }, { "epoch": 7.1, "learning_rate": 0.00016823222439660794, "loss": 0.6518, "step": 11600 }, { "epoch": 7.22, "learning_rate": 0.00016562296151337246, "loss": 0.6494, "step": 11800 }, { "epoch": 7.35, "learning_rate": 0.00016301369863013697, "loss": 0.6426, "step": 12000 }, { "epoch": 7.35, "eval_cer": 0.13216305737125092, "eval_loss": 0.47150149941444397, "eval_runtime": 1597.7342, "eval_samples_per_second": 8.195, "eval_steps_per_second": 0.513, "step": 12000 }, { "epoch": 7.47, "learning_rate": 0.00016040443574690148, "loss": 0.6457, "step": 12200 }, { "epoch": 7.59, "learning_rate": 0.000157795172863666, "loss": 0.6429, "step": 12400 }, { "epoch": 7.71, "learning_rate": 0.0001551859099804305, "loss": 0.6512, "step": 12600 }, { "epoch": 7.83, "learning_rate": 0.00015257664709719503, "loss": 0.6458, "step": 12800 }, { "epoch": 7.96, "learning_rate": 0.00014996738421395954, "loss": 0.6501, "step": 13000 }, { "epoch": 8.08, "learning_rate": 0.00014735812133072405, "loss": 0.6095, "step": 13200 }, { "epoch": 8.2, "learning_rate": 0.00014474885844748857, "loss": 0.6132, "step": 13400 }, { "epoch": 8.32, "learning_rate": 0.00014213959556425308, "loss": 0.6102, "step": 13600 }, { "epoch": 8.45, "learning_rate": 0.0001395303326810176, "loss": 0.6125, "step": 13800 }, { "epoch": 8.57, "learning_rate": 0.0001369210697977821, "loss": 0.61, "step": 14000 }, { "epoch": 8.57, "eval_cer": 0.1257692459492199, "eval_loss": 0.45295360684394836, "eval_runtime": 1603.0798, "eval_samples_per_second": 8.167, "eval_steps_per_second": 0.511, "step": 14000 }, { "epoch": 8.69, "learning_rate": 0.00013431180691454662, "loss": 0.606, "step": 14200 }, { "epoch": 8.81, "learning_rate": 0.00013170254403131114, "loss": 0.5957, "step": 14400 }, { "epoch": 8.94, "learning_rate": 0.00012909328114807565, "loss": 0.5992, "step": 14600 }, { "epoch": 9.06, "learning_rate": 0.00012648401826484017, "loss": 0.5752, "step": 14800 }, { "epoch": 9.18, "learning_rate": 0.00012387475538160468, "loss": 0.5654, "step": 15000 }, { "epoch": 9.3, "learning_rate": 0.00012126549249836919, "loss": 0.5725, "step": 15200 }, { "epoch": 9.43, "learning_rate": 0.00011865622961513371, "loss": 0.5713, "step": 15400 }, { "epoch": 9.55, "learning_rate": 0.00011604696673189822, "loss": 0.5649, "step": 15600 }, { "epoch": 9.67, "learning_rate": 0.00011343770384866273, "loss": 0.5643, "step": 15800 }, { "epoch": 9.79, "learning_rate": 0.00011082844096542725, "loss": 0.5709, "step": 16000 }, { "epoch": 9.79, "eval_cer": 0.1200554980402634, "eval_loss": 0.4299587607383728, "eval_runtime": 1609.5227, "eval_samples_per_second": 8.135, "eval_steps_per_second": 0.509, "step": 16000 }, { "epoch": 9.92, "learning_rate": 0.00010821917808219176, "loss": 0.5666, "step": 16200 }, { "epoch": 10.04, "learning_rate": 0.00010560991519895628, "loss": 0.5531, "step": 16400 }, { "epoch": 10.16, "learning_rate": 0.00010300065231572079, "loss": 0.5389, "step": 16600 }, { "epoch": 10.28, "learning_rate": 0.0001003913894324853, "loss": 0.5456, "step": 16800 }, { "epoch": 10.41, "learning_rate": 9.778212654924982e-05, "loss": 0.5353, "step": 17000 }, { "epoch": 10.53, "learning_rate": 9.517286366601433e-05, "loss": 0.5337, "step": 17200 }, { "epoch": 10.65, "learning_rate": 9.256360078277885e-05, "loss": 0.5296, "step": 17400 }, { "epoch": 10.77, "learning_rate": 8.995433789954336e-05, "loss": 0.5372, "step": 17600 }, { "epoch": 10.9, "learning_rate": 8.734507501630787e-05, "loss": 0.5388, "step": 17800 }, { "epoch": 11.02, "learning_rate": 8.473581213307239e-05, "loss": 0.5235, "step": 18000 }, { "epoch": 11.02, "eval_cer": 0.11664607248141211, "eval_loss": 0.4167773723602295, "eval_runtime": 1608.2913, "eval_samples_per_second": 8.141, "eval_steps_per_second": 0.509, "step": 18000 }, { "epoch": 11.14, "learning_rate": 8.212654924983692e-05, "loss": 0.509, "step": 18200 }, { "epoch": 11.26, "learning_rate": 7.951728636660143e-05, "loss": 0.5116, "step": 18400 }, { "epoch": 11.38, "learning_rate": 7.690802348336594e-05, "loss": 0.4967, "step": 18600 }, { "epoch": 11.51, "learning_rate": 7.429876060013046e-05, "loss": 0.511, "step": 18800 }, { "epoch": 11.63, "learning_rate": 7.168949771689497e-05, "loss": 0.5056, "step": 19000 }, { "epoch": 11.75, "learning_rate": 6.908023483365949e-05, "loss": 0.5073, "step": 19200 }, { "epoch": 11.87, "learning_rate": 6.6470971950424e-05, "loss": 0.4968, "step": 19400 }, { "epoch": 12.0, "learning_rate": 6.386170906718851e-05, "loss": 0.5009, "step": 19600 }, { "epoch": 12.12, "learning_rate": 6.125244618395303e-05, "loss": 0.4832, "step": 19800 }, { "epoch": 12.24, "learning_rate": 5.864318330071754e-05, "loss": 0.4778, "step": 20000 }, { "epoch": 12.24, "eval_cer": 0.11294822712906938, "eval_loss": 0.40570223331451416, "eval_runtime": 1612.661, "eval_samples_per_second": 8.119, "eval_steps_per_second": 0.508, "step": 20000 }, { "epoch": 12.36, "learning_rate": 5.6033920417482055e-05, "loss": 0.4775, "step": 20200 }, { "epoch": 12.49, "learning_rate": 5.342465753424657e-05, "loss": 0.4855, "step": 20400 }, { "epoch": 12.61, "learning_rate": 5.081539465101108e-05, "loss": 0.4773, "step": 20600 }, { "epoch": 12.73, "learning_rate": 4.82061317677756e-05, "loss": 0.4745, "step": 20800 }, { "epoch": 12.85, "learning_rate": 4.559686888454011e-05, "loss": 0.48, "step": 21000 }, { "epoch": 12.98, "learning_rate": 4.2987606001304625e-05, "loss": 0.463, "step": 21200 }, { "epoch": 13.1, "learning_rate": 4.037834311806914e-05, "loss": 0.4643, "step": 21400 }, { "epoch": 13.22, "learning_rate": 3.776908023483365e-05, "loss": 0.449, "step": 21600 }, { "epoch": 13.34, "learning_rate": 3.515981735159817e-05, "loss": 0.4604, "step": 21800 }, { "epoch": 13.47, "learning_rate": 3.255055446836268e-05, "loss": 0.4571, "step": 22000 }, { "epoch": 13.47, "eval_cer": 0.10995473327241098, "eval_loss": 0.3945465385913849, "eval_runtime": 1610.0901, "eval_samples_per_second": 8.132, "eval_steps_per_second": 0.509, "step": 22000 }, { "epoch": 13.59, "learning_rate": 2.99412915851272e-05, "loss": 0.4539, "step": 22200 }, { "epoch": 13.71, "learning_rate": 2.7332028701891712e-05, "loss": 0.4569, "step": 22400 }, { "epoch": 13.83, "learning_rate": 2.4722765818656226e-05, "loss": 0.4533, "step": 22600 }, { "epoch": 13.96, "learning_rate": 2.211350293542074e-05, "loss": 0.4545, "step": 22800 }, { "epoch": 14.08, "learning_rate": 1.9504240052185254e-05, "loss": 0.447, "step": 23000 }, { "epoch": 14.2, "learning_rate": 1.6894977168949768e-05, "loss": 0.4443, "step": 23200 }, { "epoch": 14.32, "learning_rate": 1.4285714285714284e-05, "loss": 0.4423, "step": 23400 }, { "epoch": 14.45, "learning_rate": 1.1676451402478798e-05, "loss": 0.4434, "step": 23600 }, { "epoch": 14.57, "learning_rate": 9.067188519243312e-06, "loss": 0.438, "step": 23800 }, { "epoch": 14.69, "learning_rate": 6.4579256360078264e-06, "loss": 0.4388, "step": 24000 }, { "epoch": 14.69, "eval_cer": 0.10809973860058716, "eval_loss": 0.38906005024909973, "eval_runtime": 1607.4288, "eval_samples_per_second": 8.145, "eval_steps_per_second": 0.51, "step": 24000 }, { "epoch": 14.81, "learning_rate": 3.848662752772341e-06, "loss": 0.4406, "step": 24200 }, { "epoch": 14.93, "learning_rate": 1.2393998695368556e-06, "loss": 0.449, "step": 24400 }, { "epoch": 14.99, "step": 24495, "total_flos": 1.7220710227304147e+20, "train_loss": 1.1506779817299688, "train_runtime": 54167.2991, "train_samples_per_second": 28.951, "train_steps_per_second": 0.452 } ], "logging_steps": 200, "max_steps": 24495, "num_train_epochs": 15, "save_steps": 2000, "total_flos": 1.7220710227304147e+20, "trial_name": null, "trial_params": null }